diff --git a/BUILD.bazel b/BUILD.bazel index 376d3fe2767e5b34e916d3223e90b04fc418ea3d..885571722c52669669a7fab5dff6f87bfc7808f3 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -11,7 +11,16 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +package( + default_visibility = [ + "//visibility:public", + ], +) -load("//build/kleaf:common_kernels.bzl", "define_common_kernels") +load("//build/kernel/kleaf:common_kernels.bzl", "define_common_kernels") +# This uses android/abi_gki_aarch64* in kmi_configs. If the list of +# glob(["android/abi_gki_aarch64*"]) differs from +# KMI_SYMBOL_LIST + ADDITIONAL_KMI_SYMBOL_LISTS in build.config.gki.aarch64, +# or TRIM_NONLISTED_KMI changes, override kmi_configs here. define_common_kernels() diff --git a/Documentation/ABI/testing/sysfs-bus-iio-lptimer-stm32 b/Documentation/ABI/testing/sysfs-bus-iio-lptimer-stm32 deleted file mode 100644 index 73498ff666bd74a92269bf5e1006d34571869233..0000000000000000000000000000000000000000 --- a/Documentation/ABI/testing/sysfs-bus-iio-lptimer-stm32 +++ /dev/null @@ -1,62 +0,0 @@ -What: /sys/bus/iio/devices/iio:deviceX/in_count0_preset -KernelVersion: 4.13 -Contact: fabrice.gasnier@st.com -Description: - Reading returns the current preset value. Writing sets the - preset value. Encoder counts continuously from 0 to preset - value, depending on direction (up/down). - -What: /sys/bus/iio/devices/iio:deviceX/in_count_quadrature_mode_available -KernelVersion: 4.13 -Contact: fabrice.gasnier@st.com -Description: - Reading returns the list possible quadrature modes. - -What: /sys/bus/iio/devices/iio:deviceX/in_count0_quadrature_mode -KernelVersion: 4.13 -Contact: fabrice.gasnier@st.com -Description: - Configure the device counter quadrature modes: - - - non-quadrature: - Encoder IN1 input servers as the count input (up - direction). - - - quadrature: - Encoder IN1 and IN2 inputs are mixed to get direction - and count. - -What: /sys/bus/iio/devices/iio:deviceX/in_count_polarity_available -KernelVersion: 4.13 -Contact: fabrice.gasnier@st.com -Description: - Reading returns the list possible active edges. - -What: /sys/bus/iio/devices/iio:deviceX/in_count0_polarity -KernelVersion: 4.13 -Contact: fabrice.gasnier@st.com -Description: - Configure the device encoder/counter active edge: - - - rising-edge - - falling-edge - - both-edges - - In non-quadrature mode, device counts up on active edge. - - In quadrature mode, encoder counting scenarios are as follows: - - +---------+----------+--------------------+--------------------+ - | Active | Level on | IN1 signal | IN2 signal | - | edge | opposite +----------+---------+----------+---------+ - | | signal | Rising | Falling | Rising | Falling | - +---------+----------+----------+---------+----------+---------+ - | Rising | High -> | Down | - | Up | - | - | edge | Low -> | Up | - | Down | - | - +---------+----------+----------+---------+----------+---------+ - | Falling | High -> | - | Up | - | Down | - | edge | Low -> | - | Down | - | Up | - +---------+----------+----------+---------+----------+---------+ - | Both | High -> | Down | Up | Up | Down | - | edges | Low -> | Up | Down | Down | Up | - +---------+----------+----------+---------+----------+---------+ diff --git a/Documentation/ABI/testing/sysfs-fs-erofs b/Documentation/ABI/testing/sysfs-fs-erofs new file mode 100644 index 0000000000000000000000000000000000000000..a9512594dc4ca0d41e670fad6b61bea726a95ee2 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-fs-erofs @@ -0,0 +1,7 @@ +What: /sys/fs/erofs/features/ +Date: November 2021 +Contact: "Huang Jianan" +Description: Shows all enabled kernel features. + Supported features: + zero_padding, compr_cfgs, big_pcluster, chunked_file, + device_table, compr_head2, sb_chksum. diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index 525d41af23aa940b2a444ffe9f2dd8a7abb7984d..9b583dd0298b7926d7b2ab9bd33a846711d109e3 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -55,8 +55,9 @@ Description: Controls the in-place-update policy. 0x04 F2FS_IPU_UTIL 0x08 F2FS_IPU_SSR_UTIL 0x10 F2FS_IPU_FSYNC - 0x20 F2FS_IPU_ASYNC, + 0x20 F2FS_IPU_ASYNC 0x40 F2FS_IPU_NOCACHE + 0x80 F2FS_IPU_HONOR_OPU_WRITE ==== ================= Refer segment.h for details. @@ -98,6 +99,33 @@ Description: Controls the issue rate of discard commands that consist of small checkpoint is triggered, and issued during the checkpoint. By default, it is disabled with 0. +What: /sys/fs/f2fs//max_discard_request +Date: December 2021 +Contact: "Konstantin Vyshetsky" +Description: Controls the number of discards a thread will issue at a time. + Higher number will allow the discard thread to finish its work + faster, at the cost of higher latency for incomming I/O. + +What: /sys/fs/f2fs//min_discard_issue_time +Date: December 2021 +Contact: "Konstantin Vyshetsky" +Description: Controls the interval the discard thread will wait between + issuing discard requests when there are discards to be issued and + no I/O aware interruptions occur. + +What: /sys/fs/f2fs//mid_discard_issue_time +Date: December 2021 +Contact: "Konstantin Vyshetsky" +Description: Controls the interval the discard thread will wait between + issuing discard requests when there are discards to be issued and + an I/O aware interruption occurs. + +What: /sys/fs/f2fs//max_discard_issue_time +Date: December 2021 +Contact: "Konstantin Vyshetsky" +Description: Controls the interval the discard thread will wait when there are + no discard operations to be issued. + What: /sys/fs/f2fs//discard_granularity Date: July 2017 Contact: "Chao Yu" @@ -112,6 +140,11 @@ Contact: "Jaegeuk Kim" Description: Set timeout to issue discard commands during umount. Default: 5 secs +What: /sys/fs/f2fs//pending_discard +Date: November 2021 +Contact: "Jaegeuk Kim" +Description: Shows the number of pending discard commands in the queue. + What: /sys/fs/f2fs//max_victim_search Date: January 2014 Contact: "Jaegeuk Kim" @@ -264,11 +297,16 @@ Description: Shows current reserved blocks in system, it may be temporarily What: /sys/fs/f2fs//gc_urgent Date: August 2017 Contact: "Jaegeuk Kim" -Description: Do background GC agressively when set. When gc_urgent = 1, - background thread starts to do GC by given gc_urgent_sleep_time - interval. When gc_urgent = 2, F2FS will lower the bar of - checking idle in order to process outstanding discard commands - and GC a little bit aggressively. It is set to 0 by default. +Description: Do background GC aggressively when set. Set to 0 by default. + gc urgent high(1): does GC forcibly in a period of given + gc_urgent_sleep_time and ignores I/O idling check. uses greedy + GC approach and turns SSR mode on. + gc urgent low(2): lowers the bar of checking I/O idling in + order to process outstanding discard commands and GC a + little bit aggressively. uses cost benefit GC approach. + gc urgent mid(3): does GC forcibly in a period of given + gc_urgent_sleep_time and executes a mid level of I/O idling check. + uses cost benefit GC approach. What: /sys/fs/f2fs//gc_urgent_sleep_time Date: August 2017 @@ -425,6 +463,7 @@ Description: Show status of f2fs superblock in real time. 0x800 SBI_QUOTA_SKIP_FLUSH skip flushing quota in current CP 0x1000 SBI_QUOTA_NEED_REPAIR quota file may be corrupted 0x2000 SBI_IS_RESIZEFS resizefs is in process + 0x4000 SBI_IS_FREEZING freefs is in process ====== ===================== ================================= What: /sys/fs/f2fs//ckpt_thread_ioprio @@ -498,7 +537,7 @@ Date: July 2021 Contact: "Daeho Jeong" Description: Show how many segments have been reclaimed by GC during a specific GC mode (0: GC normal, 1: GC idle CB, 2: GC idle greedy, - 3: GC idle AT, 4: GC urgent high, 5: GC urgent low) + 3: GC idle AT, 4: GC urgent high, 5: GC urgent low 6: GC urgent mid) You can re-initialize this value to "0". What: /sys/fs/f2fs//gc_segment_mode @@ -512,3 +551,32 @@ Date: July 2021 Contact: "Daeho Jeong" Description: You can control the multiplier value of bdi device readahead window size between 2 (default) and 256 for POSIX_FADV_SEQUENTIAL advise option. + +What: /sys/fs/f2fs//max_fragment_chunk +Date: August 2021 +Contact: "Daeho Jeong" +Description: With "mode=fragment:block" mount options, we can scatter block allocation. + f2fs will allocate 1.. blocks in a chunk and make a hole + in the length of 1.. by turns. This value can be set + between 1..512 and the default value is 4. + +What: /sys/fs/f2fs//max_fragment_hole +Date: August 2021 +Contact: "Daeho Jeong" +Description: With "mode=fragment:block" mount options, we can scatter block allocation. + f2fs will allocate 1.. blocks in a chunk and make a hole + in the length of 1.. by turns. This value can be set + between 1..512 and the default value is 4. + +What: /sys/fs/f2fs//gc_urgent_high_remaining +Date: December 2021 +Contact: "Daeho Jeong" +Description: You can set the trial count limit for GC urgent high mode with this value. + If GC thread gets to the limit, the mode will turn back to GC normal mode. + By default, the value is zero, which means there is no limit like before. + +What: /sys/fs/f2fs//max_roll_forward_node_blocks +Date: January 2022 +Contact: "Jaegeuk Kim" +Description: Controls max # of node block writes to be used for roll forward + recovery. This can limit the roll forward recovery time. diff --git a/Documentation/accounting/psi.rst b/Documentation/accounting/psi.rst index f2b3439edcc2cc772c7f92cd3510060df6b5e8b8..860fe651d6453eed1652f721cb048c8b3840be33 100644 --- a/Documentation/accounting/psi.rst +++ b/Documentation/accounting/psi.rst @@ -92,7 +92,8 @@ Triggers can be set on more than one psi metric and more than one trigger for the same psi metric can be specified. However for each trigger a separate file descriptor is required to be able to poll it separately from others, therefore for each trigger a separate open() syscall should be made even -when opening the same psi interface file. +when opening the same psi interface file. Write operations to a file descriptor +with an already existing psi trigger will fail with EBUSY. Monitors activate only when system enters stall state for the monitored psi metric and deactivates upon exit from the stall state. While system is diff --git a/Documentation/admin-guide/binderfs.rst b/Documentation/admin-guide/binderfs.rst index 8243af9b3510e8c428f36c2879bc9bcb7e645e12..9919879dcba3569f540791c14a8594efa529dac5 100644 --- a/Documentation/admin-guide/binderfs.rst +++ b/Documentation/admin-guide/binderfs.rst @@ -72,3 +72,16 @@ that the `rm() `_ tool can be used to delete them. Note that the ``binder-control`` device cannot be deleted since this would make the binderfs instance unuseable. The ``binder-control`` device will be deleted when the binderfs instance is unmounted and all references to it have been dropped. + +Binder features +--------------- + +Assuming an instance of binderfs has been mounted at ``/dev/binderfs``, the +features supported by the binder driver can be located under +``/dev/binderfs/features/``. The presence of individual files can be tested +to determine whether a particular feature is supported by the driver. + +Example:: + + cat /dev/binderfs/features/oneway_spam_detection + 1 diff --git a/Documentation/admin-guide/hw-vuln/spectre.rst b/Documentation/admin-guide/hw-vuln/spectre.rst index e05e581af5cfe617f38112907aadea8d03f2a9d6..6bd97cd50d6256fbb024278e5f233372b43adce6 100644 --- a/Documentation/admin-guide/hw-vuln/spectre.rst +++ b/Documentation/admin-guide/hw-vuln/spectre.rst @@ -60,8 +60,8 @@ privileged data touched during the speculative execution. Spectre variant 1 attacks take advantage of speculative execution of conditional branches, while Spectre variant 2 attacks use speculative execution of indirect branches to leak privileged memory. -See :ref:`[1] ` :ref:`[5] ` :ref:`[7] ` -:ref:`[10] ` :ref:`[11] `. +See :ref:`[1] ` :ref:`[5] ` :ref:`[6] ` +:ref:`[7] ` :ref:`[10] ` :ref:`[11] `. Spectre variant 1 (Bounds Check Bypass) --------------------------------------- @@ -131,6 +131,19 @@ steer its indirect branch speculations to gadget code, and measure the speculative execution's side effects left in level 1 cache to infer the victim's data. +Yet another variant 2 attack vector is for the attacker to poison the +Branch History Buffer (BHB) to speculatively steer an indirect branch +to a specific Branch Target Buffer (BTB) entry, even if the entry isn't +associated with the source address of the indirect branch. Specifically, +the BHB might be shared across privilege levels even in the presence of +Enhanced IBRS. + +Currently the only known real-world BHB attack vector is via +unprivileged eBPF. Therefore, it's highly recommended to not enable +unprivileged eBPF, especially when eIBRS is used (without retpolines). +For a full mitigation against BHB attacks, it's recommended to use +retpolines (or eIBRS combined with retpolines). + Attack scenarios ---------------- @@ -364,13 +377,15 @@ The possible values in this file are: - Kernel status: - ==================================== ================================= - 'Not affected' The processor is not vulnerable - 'Vulnerable' Vulnerable, no mitigation - 'Mitigation: Full generic retpoline' Software-focused mitigation - 'Mitigation: Full AMD retpoline' AMD-specific software mitigation - 'Mitigation: Enhanced IBRS' Hardware-focused mitigation - ==================================== ================================= + ======================================== ================================= + 'Not affected' The processor is not vulnerable + 'Mitigation: None' Vulnerable, no mitigation + 'Mitigation: Retpolines' Use Retpoline thunks + 'Mitigation: LFENCE' Use LFENCE instructions + 'Mitigation: Enhanced IBRS' Hardware-focused mitigation + 'Mitigation: Enhanced IBRS + Retpolines' Hardware-focused + Retpolines + 'Mitigation: Enhanced IBRS + LFENCE' Hardware-focused + LFENCE + ======================================== ================================= - Firmware status: Show if Indirect Branch Restricted Speculation (IBRS) is used to protect against Spectre variant 2 attacks when calling firmware (x86 only). @@ -468,7 +483,7 @@ Spectre variant 2 before invoking any firmware code to prevent Spectre variant 2 exploits using the firmware. - Using kernel address space randomization (CONFIG_RANDOMIZE_SLAB=y + Using kernel address space randomization (CONFIG_RANDOMIZE_BASE=y and CONFIG_SLAB_FREELIST_RANDOM=y in the kernel configuration) makes attacks on the kernel generally more difficult. @@ -584,12 +599,13 @@ kernel command line. Specific mitigations can also be selected manually: - retpoline - replace indirect branches - retpoline,generic - google's original retpoline - retpoline,amd - AMD-specific minimal thunk + retpoline auto pick between generic,lfence + retpoline,generic Retpolines + retpoline,lfence LFENCE; indirect branch + retpoline,amd alias for retpoline,lfence + eibrs enhanced IBRS + eibrs,retpoline enhanced IBRS + Retpolines + eibrs,lfence enhanced IBRS + LFENCE Not specifying this option is equivalent to spectre_v2=auto. @@ -730,7 +746,7 @@ AMD white papers: .. _spec_ref6: -[6] `Software techniques for managing speculation on AMD processors `_. +[6] `Software techniques for managing speculation on AMD processors `_. ARM white papers: diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 69804b20833fbd966fdce163ec9f34c16bc32491..ce97bf5aafd9133a55d0306997263941984243ef 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -936,6 +936,10 @@ can be useful when debugging issues that require an SLB miss to occur. + disable_dma32= [KNL] + Dynamically disable ZONE_DMA32 on kernels compiled with + CONFIG_ZONE_DMA32=y. + stress_slb [PPC] Limits the number of kernel SLB entries, and flushes them frequently to increase the rate of SLB faults @@ -1651,6 +1655,8 @@ architectures force reset to be always executed i8042.unlock [HW] Unlock (ignore) the keylock i8042.kbdreset [HW] Reset device connected to KBD port + i8042.probe_defer + [HW] Allow deferred probing upon i8042 probe errors i810= [HW,DRM] @@ -2013,6 +2019,9 @@ 1 - Bypass the IOMMU for DMA. unset - Use value of CONFIG_IOMMU_DEFAULT_PASSTHROUGH. + ioremap_guard [ARM64] enable the KVM MMIO guard functionality + if available. + io7= [HW] IO7 for Marvel-based Alpha systems See comment before marvel_specify_io7 in arch/alpha/kernel/core_marvel.c. @@ -2302,17 +2311,17 @@ kvm-arm.mode= [KVM,ARM] Select one of KVM/arm64's modes of operation. - none: Forcefully disable KVM and run in nVHE mode, - preventing KVM from ever initialising. + none: Forcefully disable KVM. nvhe: Standard nVHE-based mode, without support for protected guests. protected: nVHE-based mode with support for guests whose state is kept private from the host. - Not valid if the kernel is running in EL2. - Defaults to VHE/nVHE based on hardware support. + Defaults to VHE/nVHE based on hardware support. Setting + mode to "protected" will disable kexec and hibernation + for the host. kvm-arm.vgic_v3_group0_trap= [KVM,ARM] Trap guest accesses to GICv3 group-0 @@ -2343,8 +2352,12 @@ Default is 1 (enabled) kvm-intel.emulate_invalid_guest_state= - [KVM,Intel] Enable emulation of invalid guest states - Default is 0 (disabled) + [KVM,Intel] Disable emulation of invalid guest state. + Ignored if kvm-intel.enable_unrestricted_guest=1, as + guest state is never invalid for unrestricted guests. + This param doesn't apply to nested guests (L2), as KVM + never emulates invalid L2 guest state. + Default is 1 (enabled) kvm-intel.flexpriority= [KVM,Intel] Disable FlexPriority feature (TPR shadow). @@ -5041,8 +5054,12 @@ Specific mitigations can also be selected manually: retpoline - replace indirect branches - retpoline,generic - google's original retpoline - retpoline,amd - AMD-specific minimal thunk + retpoline,generic - Retpolines + retpoline,lfence - LFENCE; indirect branch + retpoline,amd - alias for retpoline,lfence + eibrs - enhanced IBRS + eibrs,retpoline - enhanced IBRS + Retpolines + eibrs,lfence - enhanced IBRS + LFENCE Not specifying this option is equivalent to spectre_v2=auto. @@ -6084,6 +6101,13 @@ improve timer resolution at the expense of processing more timer interrupts. + xen.balloon_boot_timeout= [XEN] + The time (in seconds) to wait before giving up to boot + in case initial ballooning fails to free enough memory. + Applies only when running as HVM or PVH guest and + started with less memory configured than allowed at + max. Default is 180. + xen.event_eoi_delay= [XEN] How long to delay EOI handling in case of event storms (jiffies). Default is 10. diff --git a/Documentation/admin-guide/mm/index.rst b/Documentation/admin-guide/mm/index.rst index cd727cfc1b0404db5e422a41a7dd2b3193d122a9..2a2b5bd8299a8b6b751289732e733116bcb3ff04 100644 --- a/Documentation/admin-guide/mm/index.rst +++ b/Documentation/admin-guide/mm/index.rst @@ -31,6 +31,7 @@ the Linux memory management. idle_page_tracking ksm memory-hotplug + multigen_lru nommu-mmap numa_memory_policy numaperf diff --git a/Documentation/admin-guide/mm/multigen_lru.rst b/Documentation/admin-guide/mm/multigen_lru.rst new file mode 100644 index 0000000000000000000000000000000000000000..3d9a6ef842298f99e09393bafa9b23a22bc2ac56 --- /dev/null +++ b/Documentation/admin-guide/mm/multigen_lru.rst @@ -0,0 +1,152 @@ +.. SPDX-License-Identifier: GPL-2.0 + +============= +Multi-Gen LRU +============= +The multi-gen LRU is an alternative LRU implementation that optimizes +page reclaim and improves performance under memory pressure. Page +reclaim decides the kernel's caching policy and ability to overcommit +memory. It directly impacts the kswapd CPU usage and RAM efficiency. + +Quick start +=========== +Build the kernel with the following configurations. + +* ``CONFIG_LRU_GEN=y`` +* ``CONFIG_LRU_GEN_ENABLED=y`` + +All set! + +Runtime options +=============== +``/sys/kernel/mm/lru_gen/`` contains stable ABIs described in the +following subsections. + +Kill switch +----------- +``enable`` accepts different values to enable or disable the following +components. Its default value depends on ``CONFIG_LRU_GEN_ENABLED``. +All the components should be enabled unless some of them have +unforeseen side effects. Writing to ``enable`` has no effect when a +component is not supported by the hardware, and valid values will be +accepted even when the main switch is off. + +====== =============================================================== +Values Components +====== =============================================================== +0x0001 The main switch for the multi-gen LRU. +0x0002 Clearing the accessed bit in leaf page table entries in large + batches, when MMU sets it (e.g., on x86). This behavior can + theoretically worsen lock contention (mmap_lock). If it is + disabled, the multi-gen LRU will suffer a minor performance + degradation. +0x0004 Clearing the accessed bit in non-leaf page table entries as + well, when MMU sets it (e.g., on x86). This behavior was not + verified on x86 varieties other than Intel and AMD. If it is + disabled, the multi-gen LRU will suffer a negligible + performance degradation. +[yYnN] Apply to all the components above. +====== =============================================================== + +E.g., +:: + + echo y >/sys/kernel/mm/lru_gen/enabled + cat /sys/kernel/mm/lru_gen/enabled + 0x0007 + echo 5 >/sys/kernel/mm/lru_gen/enabled + cat /sys/kernel/mm/lru_gen/enabled + 0x0005 + +Thrashing prevention +-------------------- +Personal computers are more sensitive to thrashing because it can +cause janks (lags when rendering UI) and negatively impact user +experience. The multi-gen LRU offers thrashing prevention to the +majority of laptop and desktop users who do not have ``oomd``. + +Users can write ``N`` to ``min_ttl_ms`` to prevent the working set of +``N`` milliseconds from getting evicted. The OOM killer is triggered +if this working set cannot be kept in memory. In other words, this +option works as an adjustable pressure relief valve, and when open, it +terminates applications that are hopefully not being used. + +Based on the average human detectable lag (~100ms), ``N=1000`` usually +eliminates intolerable janks due to thrashing. Larger values like +``N=3000`` make janks less noticeable at the risk of premature OOM +kills. + +The default value ``0`` means disabled. + +Experimental features +===================== +``/sys/kernel/debug/lru_gen`` accepts commands described in the +following subsections. Multiple command lines are supported, so does +concatenation with delimiters ``,`` and ``;``. + +``/sys/kernel/debug/lru_gen_full`` provides additional stats for +debugging. ``CONFIG_LRU_GEN_STATS=y`` keeps historical stats from +evicted generations in this file. + +Working set estimation +---------------------- +Working set estimation measures how much memory an application +requires in a given time interval, and it is usually done with little +impact on the performance of the application. E.g., data centers want +to optimize job scheduling (bin packing) to improve memory +utilizations. When a new job comes in, the job scheduler needs to find +out whether each server it manages can allocate a certain amount of +memory for this new job before it can pick a candidate. To do so, this +job scheduler needs to estimate the working sets of the existing jobs. + +When it is read, ``lru_gen`` returns a histogram of numbers of pages +accessed over different time intervals for each memcg and node. +``MAX_NR_GENS`` decides the number of bins for each histogram. +:: + + memcg memcg_id memcg_path + node node_id + min_gen_nr age_in_ms nr_anon_pages nr_file_pages + ... + max_gen_nr age_in_ms nr_anon_pages nr_file_pages + +Each generation contains an estimated number of pages that have been +accessed within ``age_in_ms`` non-cumulatively. E.g., ``min_gen_nr`` +contains the coldest pages and ``max_gen_nr`` contains the hottest +pages, since ``age_in_ms`` of the former is the largest and that of +the latter is the smallest. + +Users can write ``+ memcg_id node_id max_gen_nr +[can_swap[full_scan]]`` to ``lru_gen`` to create a new generation +``max_gen_nr+1``. ``can_swap`` defaults to the swap setting and, if it +is set to ``1``, it forces the scan of anon pages when swap is off. +``full_scan`` defaults to ``1`` and, if it is set to ``0``, it reduces +the overhead as well as the coverage when scanning page tables. + +A typical use case is that a job scheduler writes to ``lru_gen`` at a +certain time interval to create new generations, and it ranks the +servers it manages based on the sizes of their cold memory defined by +this time interval. + +Proactive reclaim +----------------- +Proactive reclaim induces memory reclaim when there is no memory +pressure and usually targets cold memory only. E.g., when a new job +comes in, the job scheduler wants to proactively reclaim memory on the +server it has selected to improve the chance of successfully landing +this new job. + +Users can write ``- memcg_id node_id min_gen_nr [swappiness +[nr_to_reclaim]]`` to ``lru_gen`` to evict generations less than or +equal to ``min_gen_nr``. Note that ``min_gen_nr`` should be less than +``max_gen_nr-1`` as ``max_gen_nr`` and ``max_gen_nr-1`` are not fully +aged and therefore cannot be evicted. ``swappiness`` overrides the +default value in ``/proc/sys/vm/swappiness``. ``nr_to_reclaim`` limits +the number of pages to evict. + +A typical use case is that a job scheduler writes to ``lru_gen`` +before it tries to land a new job on a server, and if it fails to +materialize the cold memory without impacting the existing jobs on +this server, it retries on the next server according to the ranking +result obtained from the working set estimation step described +earlier. diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index d4b32cc32bb79030d9f685a969c7934ad7312180..7d5e8a67c775f16dcc63bacdd50dbad80248d111 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -1457,11 +1457,22 @@ unprivileged_bpf_disabled ========================= Writing 1 to this entry will disable unprivileged calls to ``bpf()``; -once disabled, calling ``bpf()`` without ``CAP_SYS_ADMIN`` will return -``-EPERM``. +once disabled, calling ``bpf()`` without ``CAP_SYS_ADMIN`` or ``CAP_BPF`` +will return ``-EPERM``. Once set to 1, this can't be cleared from the +running kernel anymore. -Once set, this can't be cleared. +Writing 2 to this entry will also disable unprivileged calls to ``bpf()``, +however, an admin can still change this setting later on, if needed, by +writing 0 or 1 to this entry. +If ``BPF_UNPRIV_DEFAULT_OFF`` is enabled in the kernel config, then this +entry will default to 2 instead of 0. + += ============================================================= +0 Unprivileged calls to ``bpf()`` are enabled +1 Unprivileged calls to ``bpf()`` are disabled without recovery +2 Unprivileged calls to ``bpf()`` are disabled += ============================================================= watchdog ======== diff --git a/Documentation/admin-guide/sysctl/vm.rst b/Documentation/admin-guide/sysctl/vm.rst index 9c321c7ddc18452f381553ed79b5d5310b69f9a0..c7d82184fae02f09e061b61ba9adc43bdf7c7f29 100644 --- a/Documentation/admin-guide/sysctl/vm.rst +++ b/Documentation/admin-guide/sysctl/vm.rst @@ -970,7 +970,7 @@ how much memory needs to be free before kswapd goes back to sleep. The unit is in fractions of 10,000. The default value of 10 means the distances between watermarks are 0.1% of the available memory in the -node/system. The maximum value is 1000, or 10% of memory. +node/system. The maximum value is 3000, or 30% of memory. A high rate of threads entering direct reclaim (allocstall) or kswapd going to sleep prematurely (kswapd_low_wmark_hit_quickly) can indicate diff --git a/Documentation/arm64/cpu-feature-registers.rst b/Documentation/arm64/cpu-feature-registers.rst index 328e0c454fbd4606b636b344eb0bb8d8401fad58..749ae970c31955a6ddd54807d4f7a0700cf85295 100644 --- a/Documentation/arm64/cpu-feature-registers.rst +++ b/Documentation/arm64/cpu-feature-registers.rst @@ -235,7 +235,15 @@ infrastructure: | DPB | [3-0] | y | +------------------------------+---------+---------+ - 6) ID_AA64MMFR2_EL1 - Memory model feature register 2 + 6) ID_AA64MMFR0_EL1 - Memory model feature register 0 + + +------------------------------+---------+---------+ + | Name | bits | visible | + +------------------------------+---------+---------+ + | ECV | [63-60] | y | + +------------------------------+---------+---------+ + + 7) ID_AA64MMFR2_EL1 - Memory model feature register 2 +------------------------------+---------+---------+ | Name | bits | visible | @@ -243,7 +251,7 @@ infrastructure: | AT | [35-32] | y | +------------------------------+---------+---------+ - 7) ID_AA64ZFR0_EL1 - SVE feature ID register 0 + 8) ID_AA64ZFR0_EL1 - SVE feature ID register 0 +------------------------------+---------+---------+ | Name | bits | visible | @@ -267,6 +275,23 @@ infrastructure: | SVEVer | [3-0] | y | +------------------------------+---------+---------+ + 8) ID_AA64MMFR1_EL1 - Memory model feature register 1 + + +------------------------------+---------+---------+ + | Name | bits | visible | + +------------------------------+---------+---------+ + | AFP | [47-44] | y | + +------------------------------+---------+---------+ + + 9) ID_AA64ISAR2_EL1 - Instruction set attribute register 2 + + +------------------------------+---------+---------+ + | Name | bits | visible | + +------------------------------+---------+---------+ + | RPRES | [7-4] | y | + +------------------------------+---------+---------+ + + Appendix I: Example ------------------- diff --git a/Documentation/arm64/elf_hwcaps.rst b/Documentation/arm64/elf_hwcaps.rst index bbd9cf54db6c70f173b0e658dffadc8d46e6fd3a..fcb01338b3129bdeb2891fa09b7eea3d9219075e 100644 --- a/Documentation/arm64/elf_hwcaps.rst +++ b/Documentation/arm64/elf_hwcaps.rst @@ -245,6 +245,23 @@ HWCAP2_MTE Functionality implied by ID_AA64PFR1_EL1.MTE == 0b0010, as described by Documentation/arm64/memory-tagging-extension.rst. +HWCAP2_ECV + + Functionality implied by ID_AA64MMFR0_EL1.ECV == 0b0001. + +HWCAP2_AFP + + Functionality implied by ID_AA64MFR1_EL1.AFP == 0b0001. + +HWCAP2_RPRES + + Functionality implied by ID_AA64ISAR2_EL1.RPRES == 0b0001. + +HWCAP2_MTE3 + + Functionality implied by ID_AA64PFR1_EL1.MTE == 0b0011, as described + by Documentation/arm64/memory-tagging-extension.rst. + 4. Unused AT_HWCAP bits ----------------------- diff --git a/Documentation/arm64/memory-tagging-extension.rst b/Documentation/arm64/memory-tagging-extension.rst index 7b99c8f428eb6e623cdd41734cea9111c88980f7..7e812a51e5065a385dcffefa21f4fe7937174c7b 100644 --- a/Documentation/arm64/memory-tagging-extension.rst +++ b/Documentation/arm64/memory-tagging-extension.rst @@ -76,6 +76,9 @@ configurable behaviours: with ``.si_code = SEGV_MTEAERR`` and ``.si_addr = 0`` (the faulting address is unknown). +- *Asymmetric* - Reads are handled as for synchronous mode while writes + are handled as for asynchronous mode. + The user can select the above modes, per thread, using the ``prctl(PR_SET_TAGGED_ADDR_CTRL, flags, 0, 0, 0)`` system call where ``flags`` contains any number of the following values in the ``PR_MTE_TCF_MASK`` @@ -139,18 +142,25 @@ tag checking mode as the CPU's preferred tag checking mode. The preferred tag checking mode for each CPU is controlled by ``/sys/devices/system/cpu/cpu/mte_tcf_preferred``, to which a -privileged user may write the value ``async`` or ``sync``. The default -preferred mode for each CPU is ``async``. +privileged user may write the value ``async``, ``sync`` or ``asymm``. The +default preferred mode for each CPU is ``async``. To allow a program to potentially run in the CPU's preferred tag checking mode, the user program may set multiple tag check fault mode bits in the ``flags`` argument to the ``prctl(PR_SET_TAGGED_ADDR_CTRL, -flags, 0, 0, 0)`` system call. If the CPU's preferred tag checking -mode is in the task's set of provided tag checking modes (this will -always be the case at present because the kernel only supports two -tag checking modes, but future kernels may support more modes), that -mode will be selected. Otherwise, one of the modes in the task's mode -set will be selected in a currently unspecified manner. +flags, 0, 0, 0)`` system call. If both synchronous and asynchronous +modes are requested then asymmetric mode may also be selected by the +kernel. If the CPU's preferred tag checking mode is in the task's set +of provided tag checking modes, that mode will be selected. Otherwise, +one of the modes in the task's mode will be selected by the kernel +from the task's mode set using the preference order: + + 1. Asynchronous + 2. Asymmetric + 3. Synchronous + +Note that there is no way for userspace to request multiple modes and +also disable asymmetric mode. Initial process state --------------------- diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst index 71951024729294a70f55d1fa5153390c970c7b64..5a30b1857420d9d821526c6cf25899a7ef0673a3 100644 --- a/Documentation/arm64/silicon-errata.rst +++ b/Documentation/arm64/silicon-errata.rst @@ -92,12 +92,16 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A77 | #1508412 | ARM64_ERRATUM_1508412 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A710 | #2054223 | ARM64_ERRATUM_2054223 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-N1 | #1188873,1418040| ARM64_ERRATUM_1418040 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-N1 | #1349291 | N/A | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-N1 | #1542419 | ARM64_ERRATUM_1542419 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Neoverse-N2 | #2067961 | ARM64_ERRATUM_2067961 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | MMU-500 | #841119,826419 | N/A | +----------------+-----------------+-----------------+-----------------------------+ +----------------+-----------------+-----------------+-----------------------------+ diff --git a/Documentation/block/inline-encryption.rst b/Documentation/block/inline-encryption.rst index 7f9b40d6b416b11c23ee0e2b806a2b12fadc8098..2e2a2c62709622248adbac2fdeb688dec2d62f2e 100644 --- a/Documentation/block/inline-encryption.rst +++ b/Documentation/block/inline-encryption.rst @@ -1,5 +1,7 @@ .. SPDX-License-Identifier: GPL-2.0 +.. _inline_encryption: + ================= Inline Encryption ================= diff --git a/Documentation/dev-tools/kasan.rst b/Documentation/dev-tools/kasan.rst index 075ef8e17b64ca5f922cd7f63ec35cfffb2bb927..7614a1fc30fac4cccd511d194e661395fefa63ce 100644 --- a/Documentation/dev-tools/kasan.rst +++ b/Documentation/dev-tools/kasan.rst @@ -11,46 +11,56 @@ designed to find out-of-bound and use-after-free bugs. KASAN has three modes: 2. software tag-based KASAN (similar to userspace HWASan), 3. hardware tag-based KASAN (based on hardware memory tagging). -Software KASAN modes (1 and 2) use compile-time instrumentation to insert -validity checks before every memory access, and therefore require a compiler +Generic KASAN is mainly used for debugging due to a large memory overhead. +Software tag-based KASAN can be used for dogfood testing as it has a lower +memory overhead that allows using it with real workloads. Hardware tag-based +KASAN comes with low memory and performance overheads and, therefore, can be +used in production. Either as an in-field memory bug detector or as a security +mitigation. + +Software KASAN modes (#1 and #2) use compile-time instrumentation to insert +validity checks before every memory access and, therefore, require a compiler version that supports that. -Generic KASAN is supported in both GCC and Clang. With GCC it requires version +Generic KASAN is supported in GCC and Clang. With GCC, it requires version 8.3.0 or later. Any supported Clang version is compatible, but detection of out-of-bounds accesses for global variables is only supported since Clang 11. -Tag-based KASAN is only supported in Clang. +Software tag-based KASAN mode is only supported in Clang. -Currently generic KASAN is supported for the x86_64, arm64, xtensa, s390 and +The hardware KASAN mode (#3) relies on hardware to perform the checks but +still requires a compiler version that supports memory tagging instructions. +This mode is supported in GCC 10+ and Clang 12+. + +Both software KASAN modes work with SLUB and SLAB memory allocators, +while the hardware tag-based KASAN currently only supports SLUB. + +Currently, generic KASAN is supported for the x86_64, arm, arm64, xtensa, s390, and riscv architectures, and tag-based KASAN modes are supported only for arm64. Usage ----- -To enable KASAN configure kernel with:: - - CONFIG_KASAN = y - -and choose between CONFIG_KASAN_GENERIC (to enable generic KASAN), -CONFIG_KASAN_SW_TAGS (to enable software tag-based KASAN), and -CONFIG_KASAN_HW_TAGS (to enable hardware tag-based KASAN). +To enable KASAN, configure the kernel with:: -For software modes, you also need to choose between CONFIG_KASAN_OUTLINE and -CONFIG_KASAN_INLINE. Outline and inline are compiler instrumentation types. -The former produces smaller binary while the latter is 1.1 - 2 times faster. + CONFIG_KASAN=y -Both software KASAN modes work with both SLUB and SLAB memory allocators, -while the hardware tag-based KASAN currently only support SLUB. +and choose between ``CONFIG_KASAN_GENERIC`` (to enable generic KASAN), +``CONFIG_KASAN_SW_TAGS`` (to enable software tag-based KASAN), and +``CONFIG_KASAN_HW_TAGS`` (to enable hardware tag-based KASAN). -For better error reports that include stack traces, enable CONFIG_STACKTRACE. +For software modes, also choose between ``CONFIG_KASAN_OUTLINE`` and +``CONFIG_KASAN_INLINE``. Outline and inline are compiler instrumentation types. +The former produces a smaller binary while the latter is 1.1-2 times faster. -To augment reports with last allocation and freeing stack of the physical page, -it is recommended to enable also CONFIG_PAGE_OWNER and boot with page_owner=on. +To include alloc and free stack traces of affected slab objects into reports, +enable ``CONFIG_STACKTRACE``. To include alloc and free stack traces of affected +physical pages, enable ``CONFIG_PAGE_OWNER`` and boot with ``page_owner=on``. Error reports ~~~~~~~~~~~~~ -A typical out-of-bounds access generic KASAN report looks like this:: +A typical KASAN report looks like this:: ================================================================== BUG: KASAN: slab-out-of-bounds in kmalloc_oob_right+0xa8/0xbc [test_kasan] @@ -123,90 +133,96 @@ A typical out-of-bounds access generic KASAN report looks like this:: ffff8801f44ec400: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc ================================================================== -The header of the report provides a short summary of what kind of bug happened -and what kind of access caused it. It's followed by a stack trace of the bad -access, a stack trace of where the accessed memory was allocated (in case bad -access happens on a slab object), and a stack trace of where the object was -freed (in case of a use-after-free bug report). Next comes a description of -the accessed slab object and information about the accessed memory page. +The report header summarizes what kind of bug happened and what kind of access +caused it. It is followed by a stack trace of the bad access, a stack trace of +where the accessed memory was allocated (in case a slab object was accessed), +and a stack trace of where the object was freed (in case of a use-after-free +bug report). Next comes a description of the accessed slab object and the +information about the accessed memory page. -In the last section the report shows memory state around the accessed address. -Internally KASAN tracks memory state separately for each memory granule, which +In the end, the report shows the memory state around the accessed address. +Internally, KASAN tracks memory state separately for each memory granule, which is either 8 or 16 aligned bytes depending on KASAN mode. Each number in the memory state section of the report shows the state of one of the memory granules that surround the accessed address. -For generic KASAN the size of each memory granule is 8. The state of each +For generic KASAN, the size of each memory granule is 8. The state of each granule is encoded in one shadow byte. Those 8 bytes can be accessible, -partially accessible, freed or be a part of a redzone. KASAN uses the following -encoding for each shadow byte: 0 means that all 8 bytes of the corresponding +partially accessible, freed, or be a part of a redzone. KASAN uses the following +encoding for each shadow byte: 00 means that all 8 bytes of the corresponding memory region are accessible; number N (1 <= N <= 7) means that the first N bytes are accessible, and other (8 - N) bytes are not; any negative value indicates that the entire 8-byte word is inaccessible. KASAN uses different negative values to distinguish between different kinds of inaccessible memory like redzones or freed memory (see mm/kasan/kasan.h). -In the report above the arrows point to the shadow byte 03, which means that -the accessed address is partially accessible. For tag-based KASAN modes this -last report section shows the memory tags around the accessed address -(see the `Implementation details`_ section). +In the report above, the arrow points to the shadow byte ``03``, which means +that the accessed address is partially accessible. + +For tag-based KASAN modes, this last report section shows the memory tags around +the accessed address (see the `Implementation details`_ section). + +Note that KASAN bug titles (like ``slab-out-of-bounds`` or ``use-after-free``) +are best-effort: KASAN prints the most probable bug type based on the limited +information it has. The actual type of the bug might be different. + +Generic KASAN also reports up to two auxiliary call stack traces. These stack +traces point to places in code that interacted with the object but that are not +directly present in the bad access stack trace. Currently, this includes +call_rcu() and workqueue queuing. Boot parameters ~~~~~~~~~~~~~~~ +KASAN is affected by the generic ``panic_on_warn`` command line parameter. +When it is enabled, KASAN panics the kernel after printing a bug report. + +By default, KASAN prints a bug report only for the first invalid memory access. +With ``kasan_multi_shot``, KASAN prints a report on every invalid access. This +effectively disables ``panic_on_warn`` for KASAN reports. + +Alternatively, independent of ``panic_on_warn`` the ``kasan.fault=`` boot +parameter can be used to control panic and reporting behaviour: + +- ``kasan.fault=report`` or ``=panic`` controls whether to only print a KASAN + report or also panic the kernel (default: ``report``). The panic happens even + if ``kasan_multi_shot`` is enabled. + Hardware tag-based KASAN mode (see the section about various modes below) is intended for use in production as a security mitigation. Therefore, it supports -boot parameters that allow to disable KASAN competely or otherwise control -particular KASAN features. +additional boot parameters that allow disabling KASAN or controlling features: - ``kasan=off`` or ``=on`` controls whether KASAN is enabled (default: ``on``). -- ``kasan.mode=sync`` or ``=async`` controls whether KASAN is configured in - synchronous or asynchronous mode of execution (default: ``sync``). +- ``kasan.mode=sync``, ``=async`` or ``=asymm`` controls whether KASAN + is configured in synchronous, asynchronous or asymmetric mode of + execution (default: ``sync``). Synchronous mode: a bad access is detected immediately when a tag check fault occurs. Asynchronous mode: a bad access detection is delayed. When a tag check fault occurs, the information is stored in hardware (in the TFSR_EL1 register for arm64). The kernel periodically checks the hardware and only reports tag faults during these checks. + Asymmetric mode: a bad access is detected synchronously on reads and + asynchronously on writes. + +- ``kasan.vmalloc=off`` or ``=on`` disables or enables tagging of vmalloc + allocations (default: ``on``). - ``kasan.stacktrace=off`` or ``=on`` disables or enables alloc and free stack traces collection (default: ``on``). -- ``kasan.fault=report`` or ``=panic`` controls whether to only print a KASAN - report or also panic the kernel (default: ``report``). Note, that tag - checking gets disabled after the first reported bug. - -For developers -~~~~~~~~~~~~~~ - -Software KASAN modes use compiler instrumentation to insert validity checks. -Such instrumentation might be incompatible with some part of the kernel, and -therefore needs to be disabled. To disable instrumentation for specific files -or directories, add a line similar to the following to the respective kernel -Makefile: - -- For a single file (e.g. main.o):: - - KASAN_SANITIZE_main.o := n - -- For all files in one directory:: - - KASAN_SANITIZE := n - - Implementation details ---------------------- Generic KASAN ~~~~~~~~~~~~~ -From a high level perspective, KASAN's approach to memory error detection is -similar to that of kmemcheck: use shadow memory to record whether each byte of -memory is safe to access, and use compile-time instrumentation to insert checks -of shadow memory on each memory access. +Software KASAN modes use shadow memory to record whether each byte of memory is +safe to access and use compile-time instrumentation to insert shadow memory +checks before each memory access. -Generic KASAN dedicates 1/8th of kernel memory to its shadow memory (e.g. 16TB +Generic KASAN dedicates 1/8th of kernel memory to its shadow memory (16TB to cover 128TB on x86_64) and uses direct mapping with a scale and offset to translate a memory address to its corresponding shadow address. @@ -215,113 +231,107 @@ address:: static inline void *kasan_mem_to_shadow(const void *addr) { - return ((unsigned long)addr >> KASAN_SHADOW_SCALE_SHIFT) + return (void *)((unsigned long)addr >> KASAN_SHADOW_SCALE_SHIFT) + KASAN_SHADOW_OFFSET; } where ``KASAN_SHADOW_SCALE_SHIFT = 3``. Compile-time instrumentation is used to insert memory access checks. Compiler -inserts function calls (__asan_load*(addr), __asan_store*(addr)) before each -memory access of size 1, 2, 4, 8 or 16. These functions check whether memory -access is valid or not by checking corresponding shadow memory. +inserts function calls (``__asan_load*(addr)``, ``__asan_store*(addr)``) before +each memory access of size 1, 2, 4, 8, or 16. These functions check whether +memory accesses are valid or not by checking corresponding shadow memory. -GCC 5.0 has possibility to perform inline instrumentation. Instead of making -function calls GCC directly inserts the code to check the shadow memory. -This option significantly enlarges kernel but it gives x1.1-x2 performance -boost over outline instrumented kernel. - -Generic KASAN also reports the last 2 call stacks to creation of work that -potentially has access to an object. Call stacks for the following are shown: -call_rcu() and workqueue queuing. +With inline instrumentation, instead of making function calls, the compiler +directly inserts the code to check shadow memory. This option significantly +enlarges the kernel, but it gives an x1.1-x2 performance boost over the +outline-instrumented kernel. -Generic KASAN is the only mode that delays the reuse of freed object via +Generic KASAN is the only mode that delays the reuse of freed objects via quarantine (see mm/kasan/quarantine.c for implementation). Software tag-based KASAN ~~~~~~~~~~~~~~~~~~~~~~~~ -Software tag-based KASAN requires software memory tagging support in the form -of HWASan-like compiler instrumentation (see HWASan documentation for details). - -Software tag-based KASAN is currently only implemented for arm64 architecture. +Software tag-based KASAN uses a software memory tagging approach to checking +access validity. It is currently only implemented for the arm64 architecture. Software tag-based KASAN uses the Top Byte Ignore (TBI) feature of arm64 CPUs -to store a pointer tag in the top byte of kernel pointers. Like generic KASAN -it uses shadow memory to store memory tags associated with each 16-byte memory -cell (therefore it dedicates 1/16th of the kernel memory for shadow memory). +to store a pointer tag in the top byte of kernel pointers. It uses shadow memory +to store memory tags associated with each 16-byte memory cell (therefore, it +dedicates 1/16th of the kernel memory for shadow memory). -On each memory allocation software tag-based KASAN generates a random tag, tags -the allocated memory with this tag, and embeds this tag into the returned +On each memory allocation, software tag-based KASAN generates a random tag, tags +the allocated memory with this tag, and embeds the same tag into the returned pointer. Software tag-based KASAN uses compile-time instrumentation to insert checks -before each memory access. These checks make sure that tag of the memory that -is being accessed is equal to tag of the pointer that is used to access this -memory. In case of a tag mismatch software tag-based KASAN prints a bug report. +before each memory access. These checks make sure that the tag of the memory +that is being accessed is equal to the tag of the pointer that is used to access +this memory. In case of a tag mismatch, software tag-based KASAN prints a bug +report. -Software tag-based KASAN also has two instrumentation modes (outline, that -emits callbacks to check memory accesses; and inline, that performs the shadow +Software tag-based KASAN also has two instrumentation modes (outline, which +emits callbacks to check memory accesses; and inline, which performs the shadow memory checks inline). With outline instrumentation mode, a bug report is -simply printed from the function that performs the access check. With inline -instrumentation a brk instruction is emitted by the compiler, and a dedicated -brk handler is used to print bug reports. +printed from the function that performs the access check. With inline +instrumentation, a ``brk`` instruction is emitted by the compiler, and a +dedicated ``brk`` handler is used to print bug reports. Software tag-based KASAN uses 0xFF as a match-all pointer tag (accesses through -pointers with 0xFF pointer tag aren't checked). The value 0xFE is currently +pointers with the 0xFF pointer tag are not checked). The value 0xFE is currently reserved to tag freed memory regions. -Software tag-based KASAN currently only supports tagging of -kmem_cache_alloc/kmalloc and page_alloc memory. +Software tag-based KASAN currently only supports tagging of slab, page_alloc, +and vmalloc memory. Hardware tag-based KASAN ~~~~~~~~~~~~~~~~~~~~~~~~ -Hardware tag-based KASAN is similar to the software mode in concept, but uses +Hardware tag-based KASAN is similar to the software mode in concept but uses hardware memory tagging support instead of compiler instrumentation and shadow memory. Hardware tag-based KASAN is currently only implemented for arm64 architecture and based on both arm64 Memory Tagging Extension (MTE) introduced in ARMv8.5 -Instruction Set Architecture, and Top Byte Ignore (TBI). +Instruction Set Architecture and Top Byte Ignore (TBI). Special arm64 instructions are used to assign memory tags for each allocation. Same tags are assigned to pointers to those allocations. On every memory -access, hardware makes sure that tag of the memory that is being accessed is -equal to tag of the pointer that is used to access this memory. In case of a -tag mismatch a fault is generated and a report is printed. +access, hardware makes sure that the tag of the memory that is being accessed is +equal to the tag of the pointer that is used to access this memory. In case of a +tag mismatch, a fault is generated, and a report is printed. Hardware tag-based KASAN uses 0xFF as a match-all pointer tag (accesses through -pointers with 0xFF pointer tag aren't checked). The value 0xFE is currently +pointers with the 0xFF pointer tag are not checked). The value 0xFE is currently reserved to tag freed memory regions. -Hardware tag-based KASAN currently only supports tagging of -kmem_cache_alloc/kmalloc and page_alloc memory. +Hardware tag-based KASAN currently only supports tagging of slab, page_alloc, +and VM_ALLOC-based vmalloc memory. -If the hardware doesn't support MTE (pre ARMv8.5), hardware tag-based KASAN -won't be enabled. In this case all boot parameters are ignored. +If the hardware does not support MTE (pre ARMv8.5), hardware tag-based KASAN +will not be enabled. In this case, all KASAN boot parameters are ignored. -Note, that enabling CONFIG_KASAN_HW_TAGS always results in in-kernel TBI being -enabled. Even when kasan.mode=off is provided, or when the hardware doesn't +Note that enabling CONFIG_KASAN_HW_TAGS always results in in-kernel TBI being +enabled. Even when ``kasan.mode=off`` is provided or when the hardware does not support MTE (but supports TBI). -Hardware tag-based KASAN only reports the first found bug. After that MTE tag +Hardware tag-based KASAN only reports the first found bug. After that, MTE tag checking gets disabled. -What memory accesses are sanitised by KASAN? --------------------------------------------- +Shadow memory +------------- -The kernel maps memory in a number of different parts of the address -space. This poses something of a problem for KASAN, which requires -that all addresses accessed by instrumented code have a valid shadow -region. +The contents of this section are only applicable to software KASAN modes. -The range of kernel virtual addresses is large: there is not enough -real memory to support a real shadow region for every address that -could be accessed by the kernel. +The kernel maps memory in several different parts of the address space. +The range of kernel virtual addresses is large: there is not enough real +memory to support a real shadow region for every address that could be +accessed by the kernel. Therefore, KASAN only maps real shadow for certain +parts of the address space. -By default -~~~~~~~~~~ +Default behaviour +~~~~~~~~~~~~~~~~~ By default, architectures only map real memory over the shadow region for the linear mapping (and potentially other small areas). For all @@ -330,10 +340,9 @@ page is mapped over the shadow area. This read-only shadow page declares all memory accesses as permitted. This presents a problem for modules: they do not live in the linear -mapping, but in a dedicated module space. By hooking in to the module -allocator, KASAN can temporarily map real shadow memory to cover -them. This allows detection of invalid accesses to module globals, for -example. +mapping but in a dedicated module space. By hooking into the module +allocator, KASAN temporarily maps real shadow memory to cover them. +This allows detection of invalid accesses to module globals, for example. This also creates an incompatibility with ``VMAP_STACK``: if the stack lives in vmalloc space, it will be shadowed by the read-only page, and @@ -344,9 +353,10 @@ CONFIG_KASAN_VMALLOC ~~~~~~~~~~~~~~~~~~~~ With ``CONFIG_KASAN_VMALLOC``, KASAN can cover vmalloc space at the -cost of greater memory usage. Currently this is only supported on x86. +cost of greater memory usage. Currently, this is supported on x86, +arm64, riscv, s390, and powerpc. -This works by hooking into vmalloc and vmap, and dynamically +This works by hooking into vmalloc and vmap and dynamically allocating real shadow memory to back the mappings. Most mappings in vmalloc space are small, requiring less than a full @@ -365,28 +375,76 @@ memory. To avoid the difficulties around swapping mappings around, KASAN expects that the part of the shadow region that covers the vmalloc space will -not be covered by the early shadow page, but will be left -unmapped. This will require changes in arch-specific code. +not be covered by the early shadow page but will be left unmapped. +This will require changes in arch-specific code. -This allows ``VMAP_STACK`` support on x86, and can simplify support of +This allows ``VMAP_STACK`` support on x86 and can simplify support of architectures that do not have a fixed module region. -CONFIG_KASAN_KUNIT_TEST and CONFIG_KASAN_MODULE_TEST ----------------------------------------------------- +For developers +-------------- + +Ignoring accesses +~~~~~~~~~~~~~~~~~ + +Software KASAN modes use compiler instrumentation to insert validity checks. +Such instrumentation might be incompatible with some parts of the kernel, and +therefore needs to be disabled. + +Other parts of the kernel might access metadata for allocated objects. +Normally, KASAN detects and reports such accesses, but in some cases (e.g., +in memory allocators), these accesses are valid. + +For software KASAN modes, to disable instrumentation for a specific file or +directory, add a ``KASAN_SANITIZE`` annotation to the respective kernel +Makefile: + +- For a single file (e.g., main.o):: -KASAN tests consist of two parts: + KASAN_SANITIZE_main.o := n + +- For all files in one directory:: + + KASAN_SANITIZE := n + +For software KASAN modes, to disable instrumentation on a per-function basis, +use the KASAN-specific ``__no_sanitize_address`` function attribute or the +generic ``noinstr`` one. + +Note that disabling compiler instrumentation (either on a per-file or a +per-function basis) makes KASAN ignore the accesses that happen directly in +that code for software KASAN modes. It does not help when the accesses happen +indirectly (through calls to instrumented functions) or with the hardware +tag-based mode that does not use compiler instrumentation. + +For software KASAN modes, to disable KASAN reports in a part of the kernel code +for the current task, annotate this part of the code with a +``kasan_disable_current()``/``kasan_enable_current()`` section. This also +disables the reports for indirect accesses that happen through function calls. + +For tag-based KASAN modes (include the hardware one), to disable access +checking, use ``kasan_reset_tag()`` or ``page_kasan_tag_reset()``. Note that +temporarily disabling access checking via ``page_kasan_tag_reset()`` requires +saving and restoring the per-page KASAN tag via +``page_kasan_tag``/``page_kasan_tag_set``. + +Tests +~~~~~ + +There are KASAN tests that allow verifying that KASAN works and can detect +certain types of memory corruptions. The tests consist of two parts: 1. Tests that are integrated with the KUnit Test Framework. Enabled with ``CONFIG_KASAN_KUNIT_TEST``. These tests can be run and partially verified -automatically in a few different ways, see the instructions below. +automatically in a few different ways; see the instructions below. 2. Tests that are currently incompatible with KUnit. Enabled with ``CONFIG_KASAN_MODULE_TEST`` and can only be run as a module. These tests can -only be verified manually, by loading the kernel module and inspecting the +only be verified manually by loading the kernel module and inspecting the kernel log for KASAN reports. -Each KUnit-compatible KASAN test prints a KASAN report if an error is detected. -Then the test prints its number and status. +Each KUnit-compatible KASAN test prints one of multiple KASAN reports if an +error is detected. Then the test prints its number and status. When a test passes:: @@ -400,11 +458,10 @@ When a test fails due to a failed ``kmalloc``:: When a test fails due to a missing KASAN report:: - # kmalloc_double_kzfree: EXPECTATION FAILED at lib/test_kasan.c:629 - Expected kasan_data->report_expected == kasan_data->report_found, but - kasan_data->report_expected == 1 - kasan_data->report_found == 0 - not ok 28 - kmalloc_double_kzfree + # kmalloc_double_kzfree: EXPECTATION FAILED at lib/test_kasan.c:974 + KASAN failure expected in "kfree_sensitive(ptr)", but none occurred + not ok 44 - kmalloc_double_kzfree + At the end the cumulative status of all KASAN tests is printed. On success:: @@ -414,30 +471,24 @@ Or, if one of the tests failed:: not ok 1 - kasan - There are a few ways to run KUnit-compatible KASAN tests. 1. Loadable module -~~~~~~~~~~~~~~~~~~ -With ``CONFIG_KUNIT`` enabled, ``CONFIG_KASAN_KUNIT_TEST`` can be built as -a loadable module and run on any architecture that supports KASAN by loading -the module with insmod or modprobe. The module is called ``test_kasan``. + With ``CONFIG_KUNIT`` enabled, KASAN-KUnit tests can be built as a loadable + module and run by loading ``test_kasan.ko`` with ``insmod`` or ``modprobe``. 2. Built-In -~~~~~~~~~~~ -With ``CONFIG_KUNIT`` built-in, ``CONFIG_KASAN_KUNIT_TEST`` can be built-in -on any architecure that supports KASAN. These and any other KUnit tests enabled -will run and print the results at boot as a late-init call. + With ``CONFIG_KUNIT`` built-in, KASAN-KUnit tests can be built-in as well. + In this case, the tests will run at boot as a late-init call. 3. Using kunit_tool -~~~~~~~~~~~~~~~~~~~ -With ``CONFIG_KUNIT`` and ``CONFIG_KASAN_KUNIT_TEST`` built-in, it's also -possible use ``kunit_tool`` to see the results of these and other KUnit tests -in a more readable way. This will not print the KASAN reports of the tests that -passed. Use `KUnit documentation `_ -for more up-to-date information on ``kunit_tool``. + With ``CONFIG_KUNIT`` and ``CONFIG_KASAN_KUNIT_TEST`` built-in, it is also + possible to use ``kunit_tool`` to see the results of KUnit tests in a more + readable way. This will not print the KASAN reports of the tests that passed. + See `KUnit documentation `_ + for more up-to-date information on ``kunit_tool``. .. _KUnit: https://www.kernel.org/doc/html/latest/dev-tools/kunit/index.html diff --git a/Documentation/devicetree/bindings/Makefile b/Documentation/devicetree/bindings/Makefile index f50420099a553e0f40b6e1e47b2a9fb35a5f8c02..68764fe4fd3cb1c0208259ee6c45860b9f54c5f4 100644 --- a/Documentation/devicetree/bindings/Makefile +++ b/Documentation/devicetree/bindings/Makefile @@ -48,7 +48,7 @@ define rule_chkdt $(call cmd,mk_schema) endef -DT_DOCS = $(shell $(find_cmd) | sed -e 's|^$(srctree)/||') +DT_DOCS = $(patsubst $(srctree)/%,%,$(shell $(find_cmd))) override DTC_FLAGS := \ -Wno-avoid_unnecessary_addr_size \ diff --git a/Documentation/devicetree/bindings/arm/omap/omap.txt b/Documentation/devicetree/bindings/arm/omap/omap.txt index e77635c5422c6a03995bc45e02049d6b44e4b190..fa8b31660cadd1e2cfd0c7cc8c19bb971be3808e 100644 --- a/Documentation/devicetree/bindings/arm/omap/omap.txt +++ b/Documentation/devicetree/bindings/arm/omap/omap.txt @@ -119,6 +119,9 @@ Boards (incomplete list of examples): - OMAP3 BeagleBoard : Low cost community board compatible = "ti,omap3-beagle", "ti,omap3430", "ti,omap3" +- OMAP3 BeagleBoard A to B4 : Early BeagleBoard revisions A to B4 with a timer quirk + compatible = "ti,omap3-beagle-ab4", "ti,omap3-beagle", "ti,omap3430", "ti,omap3" + - OMAP3 Tobi with Overo : Commercial expansion board with daughter board compatible = "gumstix,omap3-overo-tobi", "gumstix,omap3-overo", "ti,omap3430", "ti,omap3" diff --git a/Documentation/devicetree/bindings/display/amlogic,meson-dw-hdmi.yaml b/Documentation/devicetree/bindings/display/amlogic,meson-dw-hdmi.yaml index 0da42ab8fd3a52b2f9fef7b1ff47919a273c3003..8a67bb889f18a851cc0c3d6e471bc25c9bc13f39 100644 --- a/Documentation/devicetree/bindings/display/amlogic,meson-dw-hdmi.yaml +++ b/Documentation/devicetree/bindings/display/amlogic,meson-dw-hdmi.yaml @@ -10,6 +10,9 @@ title: Amlogic specific extensions to the Synopsys Designware HDMI Controller maintainers: - Neil Armstrong +allOf: + - $ref: /schemas/sound/name-prefix.yaml# + description: | The Amlogic Meson Synopsys Designware Integration is composed of - A Synopsys DesignWare HDMI Controller IP @@ -99,6 +102,8 @@ properties: "#sound-dai-cells": const: 0 + sound-name-prefix: true + required: - compatible - reg diff --git a/Documentation/devicetree/bindings/display/amlogic,meson-vpu.yaml b/Documentation/devicetree/bindings/display/amlogic,meson-vpu.yaml index a8d202c9d004c67f7009e17d8928cbb415e45355..b8cb1b4dae1ff02d836441380bd1be7ae98675e0 100644 --- a/Documentation/devicetree/bindings/display/amlogic,meson-vpu.yaml +++ b/Documentation/devicetree/bindings/display/amlogic,meson-vpu.yaml @@ -78,6 +78,10 @@ properties: interrupts: maxItems: 1 + amlogic,canvas: + description: should point to a canvas provider node + $ref: /schemas/types.yaml#/definitions/phandle + power-domains: maxItems: 1 description: phandle to the associated power domain @@ -106,6 +110,7 @@ required: - port@1 - "#address-cells" - "#size-cells" + - amlogic,canvas additionalProperties: false @@ -118,6 +123,7 @@ examples: interrupts = <3>; #address-cells = <1>; #size-cells = <0>; + amlogic,canvas = <&canvas>; /* CVBS VDAC output port */ port@0 { diff --git a/Documentation/devicetree/bindings/display/bridge/ti,sn65dsi86.yaml b/Documentation/devicetree/bindings/display/bridge/ti,sn65dsi86.yaml index f8622bd0f61ee62d232af4e5417f30e8fb564d61..f0e0345da498fddc346a7c3a3b3adda6d0807f5d 100644 --- a/Documentation/devicetree/bindings/display/bridge/ti,sn65dsi86.yaml +++ b/Documentation/devicetree/bindings/display/bridge/ti,sn65dsi86.yaml @@ -18,7 +18,7 @@ properties: const: ti,sn65dsi86 reg: - const: 0x2d + enum: [ 0x2c, 0x2d ] enable-gpios: maxItems: 1 diff --git a/Documentation/devicetree/bindings/iommu/google,s2mpu.yaml b/Documentation/devicetree/bindings/iommu/google,s2mpu.yaml new file mode 100644 index 0000000000000000000000000000000000000000..18bc10032ecfd4829128758a114016083f5a31ab --- /dev/null +++ b/Documentation/devicetree/bindings/iommu/google,s2mpu.yaml @@ -0,0 +1,30 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/iommu/google,s2mpu.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: S2MPU device tree bindings + +maintainers: + - David Brazdil + +properties: + reg: + maxItems: 1 + + compatible: + contains: + const: google,s2mpu + + power-domain-id: + $ref: /schemas/types.yaml#/definitions/uint32 + description: + ID of the power domain that the S2MPU belongs to. If not specified, + the device is assumed to be always powered. + +required: + - compatible + - reg + +additionalProperties: true diff --git a/Documentation/devicetree/bindings/net/can/tcan4x5x.txt b/Documentation/devicetree/bindings/net/can/tcan4x5x.txt index 0968b40aef1e8147a63087f8f360107285fb900d..e3501bfa22e904fe52b7944a37ab541eaf531fd2 100644 --- a/Documentation/devicetree/bindings/net/can/tcan4x5x.txt +++ b/Documentation/devicetree/bindings/net/can/tcan4x5x.txt @@ -31,7 +31,7 @@ tcan4x5x: tcan4x5x@0 { #address-cells = <1>; #size-cells = <1>; spi-max-frequency = <10000000>; - bosch,mram-cfg = <0x0 0 0 32 0 0 1 1>; + bosch,mram-cfg = <0x0 0 0 16 0 0 1 1>; interrupt-parent = <&gpio1>; interrupts = <14 IRQ_TYPE_LEVEL_LOW>; device-state-gpios = <&gpio3 21 GPIO_ACTIVE_HIGH>; diff --git a/Documentation/devicetree/bindings/net/ethernet-phy.yaml b/Documentation/devicetree/bindings/net/ethernet-phy.yaml index 6dd72faebd8961115521ae819ccbba0adccf4fc9..a054accc20bd446c0c57638cd7bc71b265f49158 100644 --- a/Documentation/devicetree/bindings/net/ethernet-phy.yaml +++ b/Documentation/devicetree/bindings/net/ethernet-phy.yaml @@ -91,6 +91,14 @@ properties: compensate for the board being designed with the lanes swapped. + enet-phy-lane-no-swap: + $ref: /schemas/types.yaml#/definitions/flag + description: + If set, indicates that PHY will disable swap of the + TX/RX lanes. This property allows the PHY to work correcly after + e.g. wrong bootstrap configuration caused by issues in PCB + layout design. + eee-broken-100tx: $ref: /schemas/types.yaml#definitions/flag description: diff --git a/Documentation/devicetree/bindings/regulator/samsung,s5m8767.txt b/Documentation/devicetree/bindings/regulator/samsung,s5m8767.txt index 093edda0c8dfcaa4a3b1d7fbb731ba7e84577117..6cd83d920155fa85e8ff859c2f795513aa1ea513 100644 --- a/Documentation/devicetree/bindings/regulator/samsung,s5m8767.txt +++ b/Documentation/devicetree/bindings/regulator/samsung,s5m8767.txt @@ -13,6 +13,14 @@ common regulator binding documented in: Required properties of the main device node (the parent!): + - s5m8767,pmic-buck-ds-gpios: GPIO specifiers for three host gpio's used + for selecting GPIO DVS lines. It is one-to-one mapped to dvs gpio lines. + + [1] If either of the 's5m8767,pmic-buck[2/3/4]-uses-gpio-dvs' optional + property is specified, then all the eight voltage values for the + 's5m8767,pmic-buck[2/3/4]-dvs-voltage' should be specified. + +Optional properties of the main device node (the parent!): - s5m8767,pmic-buck2-dvs-voltage: A set of 8 voltage values in micro-volt (uV) units for buck2 when changing voltage using gpio dvs. Refer to [1] below for additional information. @@ -25,26 +33,13 @@ Required properties of the main device node (the parent!): units for buck4 when changing voltage using gpio dvs. Refer to [1] below for additional information. - - s5m8767,pmic-buck-ds-gpios: GPIO specifiers for three host gpio's used - for selecting GPIO DVS lines. It is one-to-one mapped to dvs gpio lines. - - [1] If none of the 's5m8767,pmic-buck[2/3/4]-uses-gpio-dvs' optional - property is specified, the 's5m8767,pmic-buck[2/3/4]-dvs-voltage' - property should specify atleast one voltage level (which would be a - safe operating voltage). - - If either of the 's5m8767,pmic-buck[2/3/4]-uses-gpio-dvs' optional - property is specified, then all the eight voltage values for the - 's5m8767,pmic-buck[2/3/4]-dvs-voltage' should be specified. - -Optional properties of the main device node (the parent!): - s5m8767,pmic-buck2-uses-gpio-dvs: 'buck2' can be controlled by gpio dvs. - s5m8767,pmic-buck3-uses-gpio-dvs: 'buck3' can be controlled by gpio dvs. - s5m8767,pmic-buck4-uses-gpio-dvs: 'buck4' can be controlled by gpio dvs. Additional properties required if either of the optional properties are used: - - s5m8767,pmic-buck234-default-dvs-idx: Default voltage setting selected from + - s5m8767,pmic-buck-default-dvs-idx: Default voltage setting selected from the possible 8 options selectable by the dvs gpios. The value of this property should be between 0 and 7. If not specified or if out of range, the default value of this property is set to 0. diff --git a/Documentation/devicetree/bindings/reserved-memory/google,open-dice.yaml b/Documentation/devicetree/bindings/reserved-memory/google,open-dice.yaml new file mode 100644 index 0000000000000000000000000000000000000000..257a0b51994a5477a9494395a319de0608a7f154 --- /dev/null +++ b/Documentation/devicetree/bindings/reserved-memory/google,open-dice.yaml @@ -0,0 +1,46 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/reserved-memory/google,open-dice.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Open Profile for DICE Device Tree Bindings + +description: | + This binding represents a reserved memory region containing data + generated by the Open Profile for DICE protocol. + + See https://pigweed.googlesource.com/open-dice/ + +maintainers: + - David Brazdil + +allOf: + - $ref: "reserved-memory.yaml" + +properties: + compatible: + const: google,open-dice + + reg: + description: page-aligned region of memory containing DICE data + +required: + - compatible + - reg + - no-map + +unevaluatedProperties: false + +examples: + - | + reserved-memory { + #address-cells = <2>; + #size-cells = <1>; + + dice: dice@12340000 { + compatible = "google,open-dice"; + reg = <0x00 0x12340000 0x2000>; + no-map; + }; + }; diff --git a/Documentation/devicetree/bindings/thermal/thermal-zones.yaml b/Documentation/devicetree/bindings/thermal/thermal-zones.yaml index 164f71598c5956fa16b21284ba1e2c3d3523dca8..1b3954aa71c157069b9ec34a02c9d008a56d552b 100644 --- a/Documentation/devicetree/bindings/thermal/thermal-zones.yaml +++ b/Documentation/devicetree/bindings/thermal/thermal-zones.yaml @@ -199,12 +199,11 @@ patternProperties: contribution: $ref: /schemas/types.yaml#/definitions/uint32 - minimum: 0 - maximum: 100 description: - The percentage contribution of the cooling devices at the - specific trip temperature referenced in this map - to this thermal zone + The cooling contribution to the thermal zone of the referred + cooling device at the referred trip point. The contribution is + a ratio of the sum of all cooling contributions within a + thermal zone. required: - trip diff --git a/Documentation/devicetree/bindings/watchdog/samsung-wdt.yaml b/Documentation/devicetree/bindings/watchdog/samsung-wdt.yaml index 76cb9586ee00cab46858e1004089ab505c8c9a0c..93cd77a6e92c0132a51d353c3d5a75bdb032159a 100644 --- a/Documentation/devicetree/bindings/watchdog/samsung-wdt.yaml +++ b/Documentation/devicetree/bindings/watchdog/samsung-wdt.yaml @@ -39,8 +39,8 @@ properties: samsung,syscon-phandle: $ref: /schemas/types.yaml#/definitions/phandle description: - Phandle to the PMU system controller node (in case of Exynos5250 - and Exynos5420). + Phandle to the PMU system controller node (in case of Exynos5250, + Exynos5420 and Exynos7). required: - compatible @@ -58,6 +58,7 @@ allOf: enum: - samsung,exynos5250-wdt - samsung,exynos5420-wdt + - samsung,exynos7-wdt then: required: - samsung,syscon-phandle diff --git a/Documentation/driver-api/dmaengine/dmatest.rst b/Documentation/driver-api/dmaengine/dmatest.rst index ee268d445d38b64b173098630a1db287f7bcd1b0..d2e1d8b58e7dc13b7285ece750917952a180a559 100644 --- a/Documentation/driver-api/dmaengine/dmatest.rst +++ b/Documentation/driver-api/dmaengine/dmatest.rst @@ -143,13 +143,14 @@ Part 5 - Handling channel allocation Allocating Channels ------------------- -Channels are required to be configured prior to starting the test run. -Attempting to run the test without configuring the channels will fail. +Channels do not need to be configured prior to starting a test run. Attempting +to run the test without configuring the channels will result in testing any +channels that are available. Example:: % echo 1 > /sys/module/dmatest/parameters/run - dmatest: Could not start test, no channels configured + dmatest: No channels configured, continue with any Channels are registered using the "channel" parameter. Channels can be requested by their name, once requested, the channel is registered and a pending thread is added to the test list. diff --git a/Documentation/driver-api/firewire.rst b/Documentation/driver-api/firewire.rst index 94a2d7f01d99924e0d338b57e09387ecb230c41a..d3cfa73cbb2b47e624961f3e15aadb6528dfec37 100644 --- a/Documentation/driver-api/firewire.rst +++ b/Documentation/driver-api/firewire.rst @@ -19,7 +19,7 @@ of kernel interfaces is available via exported symbols in `firewire-core` module Firewire char device data structures ==================================== -.. include:: /ABI/stable/firewire-cdev +.. include:: ../ABI/stable/firewire-cdev :literal: .. kernel-doc:: include/uapi/linux/firewire-cdev.h @@ -28,7 +28,7 @@ Firewire char device data structures Firewire device probing and sysfs interfaces ============================================ -.. include:: /ABI/stable/sysfs-bus-firewire +.. include:: ../ABI/stable/sysfs-bus-firewire :literal: .. kernel-doc:: drivers/firewire/core-device.c diff --git a/Documentation/filesystems/erofs.rst b/Documentation/filesystems/erofs.rst index bf145171c2bf8f89095dcf8d316ad7d14a6a16e2..7119aa213be71c396c599c215e17688688db4d33 100644 --- a/Documentation/filesystems/erofs.rst +++ b/Documentation/filesystems/erofs.rst @@ -19,9 +19,10 @@ It is designed as a better filesystem solution for the following scenarios: immutable and bit-for-bit identical to the official golden image for their releases due to security and other considerations and - - hope to save some extra storage space with guaranteed end-to-end performance - by using reduced metadata and transparent file compression, especially - for those embedded devices with limited memory (ex, smartphone); + - hope to minimize extra storage space with guaranteed end-to-end performance + by using compact layout, transparent file compression and direct access, + especially for those embedded devices with limited memory and high-density + hosts with numerous containers; Here is the main features of EROFS: @@ -50,8 +51,10 @@ Here is the main features of EROFS: - Support POSIX.1e ACLs by using xattrs; - - Support transparent file compression as an option: - LZ4 algorithm with 4 KB fixed-sized output compression for high performance. + - Support transparent data compression as an option: + LZ4 algorithm with the fixed-sized output compression for high performance; + + - Multiple device support for multi-layer container images. The following git tree provides the file system user-space tools under development (ex, formatting tool mkfs.erofs): @@ -84,8 +87,20 @@ cache_strategy=%s Select a strategy for cached decompression from now on: It still does in-place I/O decompression for the rest compressed physical clusters. ========== ============================================= +dax={always,never} Use direct access (no page cache). See + Documentation/filesystems/dax.rst. +dax A legacy option which is an alias for ``dax=always``. +device=%s Specify a path to an extra device to be used together. =================== ========================================================= +Sysfs Entries +============= + +Information about mounted erofs file systems can be found in /sys/fs/erofs. +Each mounted filesystem will have a directory in /sys/fs/erofs based on its +device name (i.e., /sys/fs/erofs/sda). +(see also Documentation/ABI/testing/sysfs-fs-erofs) + On-disk details =============== @@ -113,31 +128,31 @@ may not. All metadatas can be now observed in two different spaces (views): :: - |-> aligned with 8B - |-> followed closely - + meta_blkaddr blocks |-> another slot - _____________________________________________________________________ - | ... | inode | xattrs | extents | data inline | ... | inode ... - |________|_______|(optional)|(optional)|__(optional)_|_____|__________ - |-> aligned with the inode slot size - . . - . . - . . - . . - . . - . . - .____________________________________________________|-> aligned with 4B - | xattr_ibody_header | shared xattrs | inline xattrs | - |____________________|_______________|_______________| - |-> 12 bytes <-|->x * 4 bytes<-| . - . . . - . . . - . . . - ._______________________________.______________________. - | id | id | id | id | ... | id | ent | ... | ent| ... | - |____|____|____|____|______|____|_____|_____|____|_____| - |-> aligned with 4B - |-> aligned with 4B + |-> aligned with 8B + |-> followed closely + + meta_blkaddr blocks |-> another slot + _____________________________________________________________________ + | ... | inode | xattrs | extents | data inline | ... | inode ... + |________|_______|(optional)|(optional)|__(optional)_|_____|__________ + |-> aligned with the inode slot size + . . + . . + . . + . . + . . + . . + .____________________________________________________|-> aligned with 4B + | xattr_ibody_header | shared xattrs | inline xattrs | + |____________________|_______________|_______________| + |-> 12 bytes <-|->x * 4 bytes<-| . + . . . + . . . + . . . + ._______________________________.______________________. + | id | id | id | id | ... | id | ent | ... | ent| ... | + |____|____|____|____|______|____|_____|_____|____|_____| + |-> aligned with 4B + |-> aligned with 4B Inode could be 32 or 64 bytes, which can be distinguished from a common field which all inode versions have -- i_format:: @@ -153,13 +168,14 @@ may not. All metadatas can be now observed in two different spaces (views): Xattrs, extents, data inline are followed by the corresponding inode with proper alignment, and they could be optional for different data mappings. - _currently_ total 4 valid data mappings are supported: + _currently_ total 5 data layouts are supported: == ==================================================================== 0 flat file data without data inline (no extent); 1 fixed-sized output data compression (with non-compacted indexes); 2 flat file data with tail packing data inline (no extent); - 3 fixed-sized output data compression (with compacted indexes, v5.3+). + 3 fixed-sized output data compression (with compacted indexes, v5.3+); + 4 chunk-based file (v5.15+). == ==================================================================== The size of the optional xattrs is indicated by i_xattr_count in inode @@ -175,13 +191,13 @@ may not. All metadatas can be now observed in two different spaces (views): Each share xattr can also be directly found by the following formula: xattr offset = xattr_blkaddr * block_size + 4 * xattr_id - :: +:: - |-> aligned by 4 bytes - + xattr_blkaddr blocks |-> aligned with 4 bytes - _________________________________________________________________________ - | ... | xattr_entry | xattr data | ... | xattr_entry | xattr data ... - |________|_____________|_____________|_____|______________|_______________ + |-> aligned by 4 bytes + + xattr_blkaddr blocks |-> aligned with 4 bytes + _________________________________________________________________________ + | ... | xattr_entry | xattr data | ... | xattr_entry | xattr data ... + |________|_____________|_____________|_____|______________|_______________ Directories ----------- @@ -193,48 +209,88 @@ algorithm (could refer to the related source code). :: - ___________________________ - / | - / ______________|________________ - / / | nameoff1 | nameoffN-1 - ____________.______________._______________v________________v__________ - | dirent | dirent | ... | dirent | filename | filename | ... | filename | - |___.0___|____1___|_____|___N-1__|____0_____|____1_____|_____|___N-1____| - \ ^ - \ | * could have - \ | trailing '\0' - \________________________| nameoff0 - - Directory block + ___________________________ + / | + / ______________|________________ + / / | nameoff1 | nameoffN-1 + ____________.______________._______________v________________v__________ + | dirent | dirent | ... | dirent | filename | filename | ... | filename | + |___.0___|____1___|_____|___N-1__|____0_____|____1_____|_____|___N-1____| + \ ^ + \ | * could have + \ | trailing '\0' + \________________________| nameoff0 + Directory block Note that apart from the offset of the first filename, nameoff0 also indicates the total number of directory entries in this block since it is no need to introduce another on-disk field at all. -Compression ------------ -Currently, EROFS supports 4KB fixed-sized output transparent file compression, -as illustrated below:: - - |---- Variant-Length Extent ----|-------- VLE --------|----- VLE ----- - clusterofs clusterofs clusterofs - | | | logical data - _________v_______________________________v_____________________v_______________ - ... | . | | . | | . | ... - ____|____.________|_____________|________.____|_____________|__.__________|____ - |-> cluster <-|-> cluster <-|-> cluster <-|-> cluster <-|-> cluster <-| - size size size size size - . . . . - . . . . - . . . . - _______._____________._____________._____________._____________________ - ... | | | | ... physical data - _______|_____________|_____________|_____________|_____________________ - |-> cluster <-|-> cluster <-|-> cluster <-| - size size size - -Currently each on-disk physical cluster can contain 4KB (un)compressed data -at most. For each logical cluster, there is a corresponding on-disk index to -describe its cluster type, physical cluster address, etc. - -See "struct z_erofs_vle_decompressed_index" in erofs_fs.h for more details. +Chunk-based file +---------------- +In order to support chunk-based data deduplication, a new inode data layout has +been supported since Linux v5.15: Files are split in equal-sized data chunks +with ``extents`` area of the inode metadata indicating how to get the chunk +data: these can be simply as a 4-byte block address array or in the 8-byte +chunk index form (see struct erofs_inode_chunk_index in erofs_fs.h for more +details.) + +By the way, chunk-based files are all uncompressed for now. + +Data compression +---------------- +EROFS implements LZ4 fixed-sized output compression which generates fixed-sized +compressed data blocks from variable-sized input in contrast to other existing +fixed-sized input solutions. Relatively higher compression ratios can be gotten +by using fixed-sized output compression since nowadays popular data compression +algorithms are mostly LZ77-based and such fixed-sized output approach can be +benefited from the historical dictionary (aka. sliding window). + +In details, original (uncompressed) data is turned into several variable-sized +extents and in the meanwhile, compressed into physical clusters (pclusters). +In order to record each variable-sized extent, logical clusters (lclusters) are +introduced as the basic unit of compress indexes to indicate whether a new +extent is generated within the range (HEAD) or not (NONHEAD). Lclusters are now +fixed in block size, as illustrated below:: + + |<- variable-sized extent ->|<- VLE ->| + clusterofs clusterofs clusterofs + | | | + _________v_________________________________v_______________________v________ + ... | . | | . | | . ... + ____|____._________|______________|________.___ _|______________|__.________ + |-> lcluster <-|-> lcluster <-|-> lcluster <-|-> lcluster <-| + (HEAD) (NONHEAD) (HEAD) (NONHEAD) . + . CBLKCNT . . + . . . + . . . + _______._____________________________.______________._________________ + ... | | | | ... + _______|______________|______________|______________|_________________ + |-> big pcluster <-|-> pcluster <-| + +A physical cluster can be seen as a container of physical compressed blocks +which contains compressed data. Previously, only lcluster-sized (4KB) pclusters +were supported. After big pcluster feature is introduced (available since +Linux v5.13), pcluster can be a multiple of lcluster size. + +For each HEAD lcluster, clusterofs is recorded to indicate where a new extent +starts and blkaddr is used to seek the compressed data. For each NONHEAD +lcluster, delta0 and delta1 are available instead of blkaddr to indicate the +distance to its HEAD lcluster and the next HEAD lcluster. A PLAIN lcluster is +also a HEAD lcluster except that its data is uncompressed. See the comments +around "struct z_erofs_vle_decompressed_index" in erofs_fs.h for more details. + +If big pcluster is enabled, pcluster size in lclusters needs to be recorded as +well. Let the delta0 of the first NONHEAD lcluster store the compressed block +count with a special flag as a new called CBLKCNT NONHEAD lcluster. It's easy +to understand its delta0 is constantly 1, as illustrated below:: + + __________________________________________________________ + | HEAD | NONHEAD | NONHEAD | ... | NONHEAD | HEAD | HEAD | + |__:___|_(CBLKCNT)_|_________|_____|_________|__:___|____:_| + |<----- a big pcluster (with CBLKCNT) ------>|<-- -->| + a lcluster-sized pcluster (without CBLKCNT) ^ + +If another HEAD follows a HEAD lcluster, there is no room to record CBLKCNT, +but it's easy to know the size of such pcluster is 1 lcluster as well. diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst index 09de6ebbbdfa2d6c9d28d1db002bc7d3a69a7bc7..5fda320354a6420dd292e0a470e775595a701260 100644 --- a/Documentation/filesystems/f2fs.rst +++ b/Documentation/filesystems/f2fs.rst @@ -197,10 +197,30 @@ fault_type=%d Support configuring fault injection type, should be FAULT_DISCARD 0x000002000 FAULT_WRITE_IO 0x000004000 FAULT_SLAB_ALLOC 0x000008000 + FAULT_DQUOT_INIT 0x000010000 + FAULT_LOCK_OP 0x000020000 =================== =========== mode=%s Control block allocation mode which supports "adaptive" and "lfs". In "lfs" mode, there should be no random writes towards main area. + "fragment:segment" and "fragment:block" are newly added here. + These are developer options for experiments to simulate filesystem + fragmentation/after-GC situation itself. The developers use these + modes to understand filesystem fragmentation/after-GC condition well, + and eventually get some insights to handle them better. + In "fragment:segment", f2fs allocates a new segment in ramdom + position. With this, we can simulate the after-GC condition. + In "fragment:block", we can scatter block allocation with + "max_fragment_chunk" and "max_fragment_hole" sysfs nodes. + We added some randomness to both chunk and hole size to make + it close to realistic IO pattern. So, in this mode, f2fs will allocate + 1.. blocks in a chunk and make a hole in the + length of 1.. by turns. With this, the newly + allocated blocks will be scattered throughout the whole partition. + Note that "fragment:block" implicitly enables "fragment:segment" + option for more randomness. + Please, use these options for your experiments and we strongly + recommend to re-format the filesystem after using these options. io_bits=%u Set the bit size of write IO requests. It should be set with "mode=lfs". usrquota Enable plain user disk quota accounting. diff --git a/Documentation/filesystems/fscrypt.rst b/Documentation/filesystems/fscrypt.rst index 44b67ebd6e40d6e7bc59afd9961e44d84f03eb41..6ccd5efb25b779d13157d0279a9e16328a0f1476 100644 --- a/Documentation/filesystems/fscrypt.rst +++ b/Documentation/filesystems/fscrypt.rst @@ -77,11 +77,11 @@ Side-channel attacks fscrypt is only resistant to side-channel attacks, such as timing or electromagnetic attacks, to the extent that the underlying Linux -Cryptographic API algorithms are. If a vulnerable algorithm is used, -such as a table-based implementation of AES, it may be possible for an -attacker to mount a side channel attack against the online system. -Side channel attacks may also be mounted against applications -consuming decrypted data. +Cryptographic API algorithms or inline encryption hardware are. If a +vulnerable algorithm is used, such as a table-based implementation of +AES, it may be possible for an attacker to mount a side channel attack +against the online system. Side channel attacks may also be mounted +against applications consuming decrypted data. Unauthorized file access ~~~~~~~~~~~~~~~~~~~~~~~~ @@ -176,11 +176,11 @@ Master Keys Each encrypted directory tree is protected by a *master key*. Master keys can be up to 64 bytes long, and must be at least as long as the -greater of the key length needed by the contents and filenames -encryption modes being used. For example, if AES-256-XTS is used for -contents encryption, the master key must be 64 bytes (512 bits). Note -that the XTS mode is defined to require a key twice as long as that -required by the underlying block cipher. +greater of the security strength of the contents and filenames +encryption modes being used. For example, if any AES-256 mode is +used, the master key must be at least 256 bits, i.e. 32 bytes. A +stricter requirement applies if the key is used by a v1 encryption +policy and AES-256-XTS is used; such keys must be 64 bytes. To "unlock" an encrypted directory tree, userspace must provide the appropriate master key. There can be any number of master keys, each @@ -1047,8 +1047,8 @@ astute users may notice some differences in behavior: may be used to overwrite the source files but isn't guaranteed to be effective on all filesystems and storage devices. -- Direct I/O is not supported on encrypted files. Attempts to use - direct I/O on such files will fall back to buffered I/O. +- Direct I/O is supported on encrypted files only under some + circumstances. For details, see `Direct I/O support`_. - The fallocate operations FALLOC_FL_COLLAPSE_RANGE and FALLOC_FL_INSERT_RANGE are not supported on encrypted files and will @@ -1063,11 +1063,6 @@ astute users may notice some differences in behavior: - DAX (Direct Access) is not supported on encrypted files. -- The st_size of an encrypted symlink will not necessarily give the - length of the symlink target as required by POSIX. It will actually - give the length of the ciphertext, which will be slightly longer - than the plaintext due to NUL-padding and an extra 2-byte overhead. - - The maximum length of an encrypted symlink is 2 bytes shorter than the maximum length of an unencrypted symlink. For example, on an EXT4 filesystem with a 4K block size, unencrypted symlinks can be up @@ -1140,6 +1135,71 @@ where applications may later write sensitive data. It is recommended that systems implementing a form of "verified boot" take advantage of this by validating all top-level encryption policies prior to access. +Inline encryption support +========================= + +By default, fscrypt uses the kernel crypto API for all cryptographic +operations (other than HKDF, which fscrypt partially implements +itself). The kernel crypto API supports hardware crypto accelerators, +but only ones that work in the traditional way where all inputs and +outputs (e.g. plaintexts and ciphertexts) are in memory. fscrypt can +take advantage of such hardware, but the traditional acceleration +model isn't particularly efficient and fscrypt hasn't been optimized +for it. + +Instead, many newer systems (especially mobile SoCs) have *inline +encryption hardware* that can encrypt/decrypt data while it is on its +way to/from the storage device. Linux supports inline encryption +through a set of extensions to the block layer called *blk-crypto*. +blk-crypto allows filesystems to attach encryption contexts to bios +(I/O requests) to specify how the data will be encrypted or decrypted +in-line. For more information about blk-crypto, see +:ref:`Documentation/block/inline-encryption.rst `. + +On supported filesystems (currently ext4 and f2fs), fscrypt can use +blk-crypto instead of the kernel crypto API to encrypt/decrypt file +contents. To enable this, set CONFIG_FS_ENCRYPTION_INLINE_CRYPT=y in +the kernel configuration, and specify the "inlinecrypt" mount option +when mounting the filesystem. + +Note that the "inlinecrypt" mount option just specifies to use inline +encryption when possible; it doesn't force its use. fscrypt will +still fall back to using the kernel crypto API on files where the +inline encryption hardware doesn't have the needed crypto capabilities +(e.g. support for the needed encryption algorithm and data unit size) +and where blk-crypto-fallback is unusable. (For blk-crypto-fallback +to be usable, it must be enabled in the kernel configuration with +CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK=y.) + +Currently fscrypt always uses the filesystem block size (which is +usually 4096 bytes) as the data unit size. Therefore, it can only use +inline encryption hardware that supports that data unit size. + +Inline encryption doesn't affect the ciphertext or other aspects of +the on-disk format, so users may freely switch back and forth between +using "inlinecrypt" and not using "inlinecrypt". + +Direct I/O support +================== + +For direct I/O on an encrypted file to work, the following conditions +must be met (in addition to the conditions for direct I/O on an +unencrypted file): + +* The file must be using inline encryption. Usually this means that + the filesystem must be mounted with ``-o inlinecrypt`` and inline + encryption hardware must be present. However, a software fallback + is also available. For details, see `Inline encryption support`_. + +* The I/O request must be fully aligned to the filesystem block size. + This means that the file position the I/O is targeting, the lengths + of all I/O segments, and the memory addresses of all I/O buffers + must be multiples of this value. Note that the filesystem block + size may be greater than the logical block size of the block device. + +If either of the above conditions is not met, then direct I/O on the +encrypted file will fall back to buffered I/O. + Implementation details ====================== @@ -1189,6 +1249,13 @@ keys`_ and `DIRECT_KEY policies`_. Data path changes ----------------- +When inline encryption is used, filesystems just need to associate +encryption contexts with bios to specify how the block layer or the +inline encryption hardware will encrypt/decrypt the file contents. + +When inline encryption isn't used, filesystems must encrypt/decrypt +the file contents themselves, as described below: + For the read path (->readpage()) of regular files, filesystems can read the ciphertext into the page cache and decrypt it in-place. The page lock must be held until decryption has finished, to prevent the @@ -1202,18 +1269,6 @@ buffer. Some filesystems, such as UBIFS, already use temporary buffers regardless of encryption. Other filesystems, such as ext4 and F2FS, have to allocate bounce pages specially for encryption. -Fscrypt is also able to use inline encryption hardware instead of the -kernel crypto API for en/decryption of file contents. When possible, -and if directed to do so (by specifying the 'inlinecrypt' mount option -for an ext4/F2FS filesystem), it adds encryption contexts to bios and -uses blk-crypto to perform the en/decryption instead of making use of -the above read/write path changes. Of course, even if directed to -make use of inline encryption, fscrypt will only be able to do so if -either hardware inline encryption support is available for the -selected encryption algorithm or CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK -is selected. If neither is the case, fscrypt will fall back to using -the above mentioned read/write path changes for en/decryption. - Filename hashing and encoding ----------------------------- @@ -1235,12 +1290,12 @@ the user-supplied name to get the ciphertext. Lookups without the key are more complicated. The raw ciphertext may contain the ``\0`` and ``/`` characters, which are illegal in -filenames. Therefore, readdir() must base64-encode the ciphertext for -presentation. For most filenames, this works fine; on ->lookup(), the -filesystem just base64-decodes the user-supplied name to get back to -the raw ciphertext. +filenames. Therefore, readdir() must base64url-encode the ciphertext +for presentation. For most filenames, this works fine; on ->lookup(), +the filesystem just base64url-decodes the user-supplied name to get +back to the raw ciphertext. -However, for very long filenames, base64 encoding would cause the +However, for very long filenames, base64url encoding would cause the filename length to exceed NAME_MAX. To prevent this, readdir() actually presents long filenames in an abbreviated form which encodes a strong "hash" of the ciphertext filename, along with the optional diff --git a/Documentation/filesystems/incfs.rst b/Documentation/filesystems/incfs.rst index 03ae39ec72dc924f0601a24ba25a20bf2e515ea0..19db303b832722aa0b8a36e28080e0b337f58c3f 100644 --- a/Documentation/filesystems/incfs.rst +++ b/Documentation/filesystems/incfs.rst @@ -7,7 +7,7 @@ incfs: A stacked incremental filesystem for Linux /sys/fs interface ================= -Please update Documentation/ABI/testing/sys-fs-incfs if you update this +Please update Documentation/ABI/testing/sysfs-fs-incfs if you update this section. incfs creates the following files in /sys/fs. diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems/proc.rst index 1b506a51d1482191ec7bad882989ea65e19d91c1..e1bef77aaf9945d5487cbda16426e47e7adbb38b 100644 --- a/Documentation/filesystems/proc.rst +++ b/Documentation/filesystems/proc.rst @@ -424,14 +424,14 @@ with the memory region, as the case would be with BSS (uninitialized data). The "pathname" shows the name associated file for this mapping. If the mapping is not associated with a file: - ======= ==================================== + ============= ==================================== [heap] the heap of the program [stack] the stack of the main process [vdso] the "virtual dynamic shared object", the kernel system call handler [anon:] an anonymous mapping that has been named by userspace - ======= ==================================== + ============= ==================================== or if empty, the mapping is anonymous. @@ -464,7 +464,6 @@ Memory Area, or VMA) there is a series of lines such as the following:: Locked: 0 kB THPeligible: 0 VmFlags: rd ex mr mw me dw - Name: name from userspace The first of these lines shows the same information as is displayed for the mapping in /proc/PID/maps. Following lines show the size of the mapping @@ -558,9 +557,6 @@ be vanished or the reverse -- new added. Interpretation of their meaning might change in future as well. So each consumer of these flags has to follow each specific kernel version for the exact semantic. -The "Name" field will only be present on a mapping that has been named by -userspace, and will show the name passed in by userspace. - This file is only present if the CONFIG_MMU kernel configuration option is enabled. diff --git a/Documentation/firmware-guide/acpi/dsd/data-node-references.rst b/Documentation/firmware-guide/acpi/dsd/data-node-references.rst index 9b17dc77d18c5c0c916007cf8cc2f160ba56969c..da0e46496fc4db82086fc05709f9bd04220cd268 100644 --- a/Documentation/firmware-guide/acpi/dsd/data-node-references.rst +++ b/Documentation/firmware-guide/acpi/dsd/data-node-references.rst @@ -5,7 +5,7 @@ Referencing hierarchical data nodes =================================== -:Copyright: |copy| 2018 Intel Corporation +:Copyright: |copy| 2018, 2021 Intel Corporation :Author: Sakari Ailus ACPI in general allows referring to device objects in the tree only. @@ -52,12 +52,14 @@ the ANOD object which is also the final target node of the reference. Name (NOD0, Package() { ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"), Package () { + Package () { "reg", 0 }, Package () { "random-property", 3 }, } }) Name (NOD1, Package() { ToUUID("dbb8e3e6-5886-4ba6-8795-1319f52a966b"), Package () { + Package () { "reg", 1 }, Package () { "anothernode", "ANOD" }, } }) @@ -74,7 +76,11 @@ the ANOD object which is also the final target node of the reference. Name (_DSD, Package () { ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"), Package () { - Package () { "reference", ^DEV0, "node@1", "anothernode" }, + Package () { + "reference", Package () { + ^DEV0, "node@1", "anothernode" + } + }, } }) } diff --git a/Documentation/gpu/todo.rst b/Documentation/gpu/todo.rst index 7272a4bd74dd05a89ba7668e69980c906596665b..28841609aa4f8d589d3952d7381d3ceaefa1f7fa 100644 --- a/Documentation/gpu/todo.rst +++ b/Documentation/gpu/todo.rst @@ -273,24 +273,6 @@ Contact: Daniel Vetter, Noralf Tronnes Level: Advanced -Garbage collect fbdev scrolling acceleration --------------------------------------------- - -Scroll acceleration is disabled in fbcon by hard-wiring p->scrollmode = -SCROLL_REDRAW. There's a ton of code this will allow us to remove: -- lots of code in fbcon.c -- a bunch of the hooks in fbcon_ops, maybe the remaining hooks could be called - directly instead of the function table (with a switch on p->rotate) -- fb_copyarea is unused after this, and can be deleted from all drivers - -Note that not all acceleration code can be deleted, since clearing and cursor -support is still accelerated, which might be good candidates for further -deletion projects. - -Contact: Daniel Vetter - -Level: Intermediate - idr_init_base() --------------- diff --git a/Documentation/hwmon/lm90.rst b/Documentation/hwmon/lm90.rst index 3da8c6e06a365d0f440116530d542d8bddbe0faf..05391fb4042d96d1a00fa386cf638d569204772c 100644 --- a/Documentation/hwmon/lm90.rst +++ b/Documentation/hwmon/lm90.rst @@ -265,6 +265,16 @@ Supported chips: https://www.ti.com/litv/pdf/sbos686 + * Texas Instruments TMP461 + + Prefix: 'tmp461' + + Addresses scanned: I2C 0x48 through 0x4F + + Datasheet: Publicly available at TI website + + https://www.ti.com/lit/gpn/tmp461 + Author: Jean Delvare diff --git a/Documentation/kbuild/gcc-plugins.rst b/Documentation/kbuild/gcc-plugins.rst index 4b1c10f88e304ee8e03bb0855e7491c4c123538c..3349966f213dcff110e855fcc42efbb638a9cae6 100644 --- a/Documentation/kbuild/gcc-plugins.rst +++ b/Documentation/kbuild/gcc-plugins.rst @@ -11,16 +11,13 @@ compiler [1]_. They are useful for runtime instrumentation and static analysis. We can analyse, change and add further code during compilation via callbacks [2]_, GIMPLE [3]_, IPA [4]_ and RTL passes [5]_. -The GCC plugin infrastructure of the kernel supports all gcc versions from -4.5 to 6.0, building out-of-tree modules, cross-compilation and building in a -separate directory. -Plugin source files have to be compilable by both a C and a C++ compiler as well -because gcc versions 4.5 and 4.6 are compiled by a C compiler, -gcc-4.7 can be compiled by a C or a C++ compiler, -and versions 4.8+ can only be compiled by a C++ compiler. +The GCC plugin infrastructure of the kernel supports building out-of-tree +modules, cross-compilation and building in a separate directory. +Plugin source files have to be compilable by a C++ compiler. -Currently the GCC plugin infrastructure supports only the x86, arm, arm64 and -powerpc architectures. +Currently the GCC plugin infrastructure supports only some architectures. +Grep "select HAVE_GCC_PLUGINS" to find out which architectures support +GCC plugins. This infrastructure was ported from grsecurity [6]_ and PaX [7]_. @@ -47,20 +44,13 @@ Files This is a compatibility header for GCC plugins. It should be always included instead of individual gcc headers. -**$(src)/scripts/gcc-plugin.sh** - - This script checks the availability of the included headers in - gcc-common.h and chooses the proper host compiler to build the plugins - (gcc-4.7 can be built by either gcc or g++). - **$(src)/scripts/gcc-plugins/gcc-generate-gimple-pass.h, $(src)/scripts/gcc-plugins/gcc-generate-ipa-pass.h, $(src)/scripts/gcc-plugins/gcc-generate-simple_ipa-pass.h, $(src)/scripts/gcc-plugins/gcc-generate-rtl-pass.h** These headers automatically generate the registration structures for - GIMPLE, SIMPLE_IPA, IPA and RTL passes. They support all gcc versions - from 4.5 to 6.0. + GIMPLE, SIMPLE_IPA, IPA and RTL passes. They should be preferred to creating the structures by hand. @@ -68,21 +58,25 @@ Usage ===== You must install the gcc plugin headers for your gcc version, -e.g., on Ubuntu for gcc-4.9:: +e.g., on Ubuntu for gcc-10:: - apt-get install gcc-4.9-plugin-dev + apt-get install gcc-10-plugin-dev Or on Fedora:: dnf install gcc-plugin-devel -Enable a GCC plugin based feature in the kernel config:: +Enable the GCC plugin infrastructure and some plugin(s) you want to use +in the kernel config:: - CONFIG_GCC_PLUGIN_CYC_COMPLEXITY = y + CONFIG_GCC_PLUGINS=y + CONFIG_GCC_PLUGIN_CYC_COMPLEXITY=y + CONFIG_GCC_PLUGIN_LATENT_ENTROPY=y + ... -To compile only the plugin(s):: +To compile the minimum tool set including the plugin(s):: - make gcc-plugins + make scripts or just run the kernel make and compile the whole kernel with the cyclomatic complexity GCC plugin. @@ -91,7 +85,8 @@ the cyclomatic complexity GCC plugin. 4. How to add a new GCC plugin ============================== -The GCC plugins are in $(src)/scripts/gcc-plugins/. You can use a file or a directory -here. It must be added to $(src)/scripts/gcc-plugins/Makefile, -$(src)/scripts/Makefile.gcc-plugins and $(src)/arch/Kconfig. +The GCC plugins are in scripts/gcc-plugins/. You need to put plugin source files +right under scripts/gcc-plugins/. Creating subdirectories is not supported. +It must be added to scripts/gcc-plugins/Makefile, scripts/Makefile.gcc-plugins +and a relevant Kconfig file. See the cyc_complexity_plugin.c (CONFIG_GCC_PLUGIN_CYC_COMPLEXITY) GCC plugin. diff --git a/Documentation/kbuild/kbuild.rst b/Documentation/kbuild/kbuild.rst index 2d1fc03d346ea7c044f945017e6ca74a50b58092..ef19b9c1352362e7d7c01773650b20d9fd7a5bd2 100644 --- a/Documentation/kbuild/kbuild.rst +++ b/Documentation/kbuild/kbuild.rst @@ -77,6 +77,17 @@ HOSTLDLIBS ---------- Additional libraries to link against when building host programs. +.. _userkbuildflags: + +USERCFLAGS +---------- +Additional options used for $(CC) when compiling userprogs. + +USERLDFLAGS +----------- +Additional options used for $(LD) when linking userprogs. userprogs are linked +with CC, so $(USERLDFLAGS) should include "-Wl," prefix as applicable. + KBUILD_KCONFIG -------------- Set the top-level Kconfig file to the value of this environment diff --git a/Documentation/kbuild/llvm.rst b/Documentation/kbuild/llvm.rst index 21c847890d03c4a337ec2824a1a152ede3b300f7..578f7e1621a5c50e14eb3a482694427b43fc7005 100644 --- a/Documentation/kbuild/llvm.rst +++ b/Documentation/kbuild/llvm.rst @@ -60,8 +60,71 @@ They can be enabled individually. The full list of the parameters: :: OBJCOPY=llvm-objcopy OBJDUMP=llvm-objdump READELF=llvm-readelf \ HOSTCC=clang HOSTCXX=clang++ HOSTAR=llvm-ar HOSTLD=ld.lld -Currently, the integrated assembler is disabled by default. You can pass -``LLVM_IAS=1`` to enable it. +The integrated assembler is enabled by default. You can pass ``LLVM_IAS=0`` to +disable it. + +Omitting CROSS_COMPILE +---------------------- + +As explained above, ``CROSS_COMPILE`` is used to set ``--target=``. + +If ``CROSS_COMPILE`` is not specified, the ``--target=`` is inferred +from ``ARCH``. + +That means if you use only LLVM tools, ``CROSS_COMPILE`` becomes unnecessary. + +For example, to cross-compile the arm64 kernel:: + + make ARCH=arm64 LLVM=1 + +If ``LLVM_IAS=0`` is specified, ``CROSS_COMPILE`` is also used to derive +``--prefix=`` to search for the GNU assembler and linker. :: + + make ARCH=arm64 LLVM=1 LLVM_IAS=0 CROSS_COMPILE=aarch64-linux-gnu- + +Supported Architectures +----------------------- + +LLVM does not target all of the architectures that Linux supports and +just because a target is supported in LLVM does not mean that the kernel +will build or work without any issues. Below is a general summary of +architectures that currently work with ``CC=clang`` or ``LLVM=1``. Level +of support corresponds to "S" values in the MAINTAINERS files. If an +architecture is not present, it either means that LLVM does not target +it or there are known issues. Using the latest stable version of LLVM or +even the development tree will generally yield the best results. +An architecture's ``defconfig`` is generally expected to work well, +certain configurations may have problems that have not been uncovered +yet. Bug reports are always welcome at the issue tracker below! + +.. list-table:: + :widths: 10 10 10 + :header-rows: 1 + + * - Architecture + - Level of support + - ``make`` command + * - arm + - Supported + - ``LLVM=1`` + * - arm64 + - Supported + - ``LLVM=1`` + * - mips + - Maintained + - ``CC=clang`` + * - powerpc + - Maintained + - ``CC=clang`` + * - riscv + - Maintained + - ``CC=clang`` + * - s390 + - Maintained + - ``CC=clang`` + * - x86 + - Supported + - ``LLVM=1`` Getting Help ------------ diff --git a/Documentation/kbuild/makefiles.rst b/Documentation/kbuild/makefiles.rst index 0d5dd5413af03fcda52614ca044fedb7d2cfca7f..1db205dc1d0feb12ed89ac94b833220cd5082ada 100644 --- a/Documentation/kbuild/makefiles.rst +++ b/Documentation/kbuild/makefiles.rst @@ -852,6 +852,8 @@ The syntax is quite similar. The difference is to use "userprogs" instead of When linking bpfilter_umh, it will be passed the extra option -static. + From command line, :ref:`USERCFLAGS and USERLDFLAGS ` will also be used. + 5.4 When userspace programs are actually built ---------------------------------------------- diff --git a/Documentation/locking/locktypes.rst b/Documentation/locking/locktypes.rst index ddada4a53749364b86a9d11f11d2dcf4b7470ca6..4fd7b70fcde19737652a75a754c1ab6b5d99f46c 100644 --- a/Documentation/locking/locktypes.rst +++ b/Documentation/locking/locktypes.rst @@ -439,11 +439,9 @@ preemption. The following substitution works on both kernels:: spin_lock(&p->lock); p->count += this_cpu_read(var2); -On a non-PREEMPT_RT kernel migrate_disable() maps to preempt_disable() -which makes the above code fully equivalent. On a PREEMPT_RT kernel migrate_disable() ensures that the task is pinned on the current CPU which in turn guarantees that the per-CPU access to var1 and var2 are staying on -the same CPU. +the same CPU while the task remains preemptible. The migrate_disable() substitution is not valid for the following scenario:: @@ -456,9 +454,8 @@ scenario:: p = this_cpu_ptr(&var1); p->val = func2(); -While correct on a non-PREEMPT_RT kernel, this breaks on PREEMPT_RT because -here migrate_disable() does not protect against reentrancy from a -preempting task. A correct substitution for this case is:: +This breaks because migrate_disable() does not protect against reentrancy from +a preempting task. A correct substitution for this case is:: func() { diff --git a/Documentation/networking/bonding.rst b/Documentation/networking/bonding.rst index adc314639085b273d9e99dd3fab97f387e4bffaf..413dca513e1db43e7bf6e6ab9c01235d76c9f58e 100644 --- a/Documentation/networking/bonding.rst +++ b/Documentation/networking/bonding.rst @@ -196,11 +196,12 @@ ad_actor_sys_prio ad_actor_system In an AD system, this specifies the mac-address for the actor in - protocol packet exchanges (LACPDUs). The value cannot be NULL or - multicast. It is preferred to have the local-admin bit set for this - mac but driver does not enforce it. If the value is not given then - system defaults to using the masters' mac address as actors' system - address. + protocol packet exchanges (LACPDUs). The value cannot be a multicast + address. If the all-zeroes MAC is specified, bonding will internally + use the MAC of the bond itself. It is preferred to have the + local-admin bit set for this mac but driver does not enforce it. If + the value is not given then system defaults to using the masters' + mac address as actors' system address. This parameter has effect only in 802.3ad mode and is available through SysFs interface. diff --git a/Documentation/networking/device_drivers/ethernet/intel/ixgbe.rst b/Documentation/networking/device_drivers/ethernet/intel/ixgbe.rst index f1d5233e5e510929ba6d3f4f8cd049dc8767677c..0a233b17c664e202b245e65061cb28fd0f8621ea 100644 --- a/Documentation/networking/device_drivers/ethernet/intel/ixgbe.rst +++ b/Documentation/networking/device_drivers/ethernet/intel/ixgbe.rst @@ -440,6 +440,22 @@ NOTE: For 82599-based network connections, if you are enabling jumbo frames in a virtual function (VF), jumbo frames must first be enabled in the physical function (PF). The VF MTU setting cannot be larger than the PF MTU. +NBASE-T Support +--------------- +The ixgbe driver supports NBASE-T on some devices. However, the advertisement +of NBASE-T speeds is suppressed by default, to accommodate broken network +switches which cannot cope with advertised NBASE-T speeds. Use the ethtool +command to enable advertising NBASE-T speeds on devices which support it:: + + ethtool -s eth? advertise 0x1800000001028 + +On Linux systems with INTERFACES(5), this can be specified as a pre-up command +in /etc/network/interfaces so that the interface is always brought up with +NBASE-T support, e.g.:: + + iface eth? inet dhcp + pre-up ethtool -s eth? advertise 0x1800000001028 || true + Generic Receive Offload, aka GRO -------------------------------- The driver supports the in-kernel software implementation of GRO. GRO has diff --git a/Documentation/networking/ipvs-sysctl.rst b/Documentation/networking/ipvs-sysctl.rst index 2afccc63856ee07357d6d800d9f200b3f6549d94..1cfbf1add2fc946a69254c2cf627dd598d583069 100644 --- a/Documentation/networking/ipvs-sysctl.rst +++ b/Documentation/networking/ipvs-sysctl.rst @@ -37,8 +37,7 @@ conn_reuse_mode - INTEGER 0: disable any special handling on port reuse. The new connection will be delivered to the same real server that was - servicing the previous connection. This will effectively - disable expire_nodest_conn. + servicing the previous connection. bit 1: enable rescheduling of new connections when it is safe. That is, whenever expire_nodest_conn and for TCP sockets, when diff --git a/Documentation/sound/hd-audio/models.rst b/Documentation/sound/hd-audio/models.rst index 0ea967d34583878dc48dba1493b4157945fd727d..d25335993e55309d8c7799178231a251ec890e42 100644 --- a/Documentation/sound/hd-audio/models.rst +++ b/Documentation/sound/hd-audio/models.rst @@ -326,6 +326,8 @@ usi-headset Headset support on USI machines dual-codecs Lenovo laptops with dual codecs +alc285-hp-amp-init + HP laptops which require speaker amplifier initialization (ALC285) ALC680 ====== diff --git a/Documentation/trace/events.rst b/Documentation/trace/events.rst index 2a5aa48eff6c78dd59ebbf23930f8ce135dd9e87..9df29a935757afe25590f36cc2746eeabf8646bb 100644 --- a/Documentation/trace/events.rst +++ b/Documentation/trace/events.rst @@ -198,6 +198,15 @@ The glob (~) accepts a wild card character (\*,?) and character classes prev_comm ~ "*sh*" prev_comm ~ "ba*sh" +If the field is a pointer that points into user space (for example +"filename" from sys_enter_openat), then you have to append ".ustring" to the +field name:: + + filename.ustring ~ "password" + +As the kernel will have to know how to retrieve the memory that the pointer +is at from user space. + 5.2 Setting filters ------------------- @@ -230,6 +239,16 @@ Currently the caret ('^') for an error always appears at the beginning of the filter string; the error message should still be useful though even without more accurate position info. +5.2.1 Filter limitations +------------------------ + +If a filter is placed on a string pointer ``(char *)`` that does not point +to a string on the ring buffer, but instead points to kernel or user space +memory, then, for safety reasons, at most 1024 bytes of the content is +copied onto a temporary buffer to do the compare. If the copy of the memory +faults (the pointer points to memory that should not be accessed), then the +string compare will be treated as not matching. + 5.3 Clearing filters -------------------- diff --git a/Documentation/trace/histogram.rst b/Documentation/trace/histogram.rst index f99be8062bc82861b1043bfddc31f91588838e02..1ef750ee72ceeca6c4eadb1aa7bfde1989b5d3f2 100644 --- a/Documentation/trace/histogram.rst +++ b/Documentation/trace/histogram.rst @@ -1762,6 +1762,21 @@ using the same key and variable from yet another event:: # echo 'hist:key=pid:wakeupswitch_lat=$wakeup_lat+$switchtime_lat ...' >> event3/trigger +Expressions support the use of addition, subtraction, multiplication and +division operators (+-\*/). + +Note if division by zero cannot be detected at parse time (i.e. the +divisor is not a constant), the result will be -1. + +Numeric constants can also be used directly in an expression:: + + # echo 'hist:keys=next_pid:timestamp_secs=common_timestamp/1000000 ...' >> event/trigger + +or assigned to a variable and referenced in a subsequent expression:: + + # echo 'hist:keys=next_pid:us_per_sec=1000000 ...' >> event/trigger + # echo 'hist:keys=next_pid:timestamp_secs=common_timestamp/$us_per_sec ...' >> event/trigger + 2.2.2 Synthetic Events ---------------------- diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 8881f006d677810ead50e56edb149b682bccbcf1..13a53dc40dce68cfd5680ef82456b8aff4344d03 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -5227,6 +5227,11 @@ Valid values for 'type' are: to ignore the request, or to gather VM memory core dump and/or reset/shutdown of the VM. +Valid flags are: + + - KVM_SYSTEM_EVENT_RESET_FLAG_PSCI_RESET2 (arm64 only) -- the guest issued + a SYSTEM_RESET2 call according to v1.1 of the PSCI specification. + :: /* KVM_EXIT_IOAPIC_EOI */ @@ -5328,6 +5333,13 @@ Note that KVM does not skip the faulting instruction as it does for KVM_EXIT_MMIO, but userspace has to emulate any change to the processing state if it decides to decode and emulate the instruction. +This feature isn't available to protected VMs, as userspace does not +have access to the state that is required to perform the emulation. +Instead, a data abort exception is directly injected in the guest. +Note that although KVM_CAP_ARM_NISV_TO_USER will be reported if +queried outside of a protected VM context, the feature will not be +exposed if queried on a protected VM file descriptor. + :: /* KVM_EXIT_X86_RDMSR / KVM_EXIT_X86_WRMSR */ diff --git a/Documentation/virt/kvm/arm/index.rst b/Documentation/virt/kvm/arm/index.rst index 78a9b670aafee61ca9bdc974ec41dd777248ea59..e77a0ee2e2d432c2cd1bde547db897261eb6dc9a 100644 --- a/Documentation/virt/kvm/arm/index.rst +++ b/Documentation/virt/kvm/arm/index.rst @@ -11,3 +11,4 @@ ARM psci pvtime ptp_kvm + mmio-guard diff --git a/Documentation/virt/kvm/arm/mmio-guard.rst b/Documentation/virt/kvm/arm/mmio-guard.rst new file mode 100644 index 0000000000000000000000000000000000000000..8b3c852c5d92e3003d19e62373d927d49a52f81b --- /dev/null +++ b/Documentation/virt/kvm/arm/mmio-guard.rst @@ -0,0 +1,74 @@ +.. SPDX-License-Identifier: GPL-2.0 + +============== +KVM MMIO guard +============== + +KVM implements device emulation by handling translation faults to any +IPA range that is not contained in a memory slot. Such a translation +fault is in most cases passed on to userspace (or in rare cases to the +host kernel) with the address, size and possibly data of the access +for emulation. + +Should the guest exit with an address that is not one that corresponds +to an emulatable device, userspace may take measures that are not the +most graceful as far as the guest is concerned (such as terminating it +or delivering a fatal exception). + +There is also an element of trust: by forwarding the request to +userspace, the kernel assumes that the guest trusts userspace to do +the right thing. + +The KVM MMIO guard offers a way to mitigate this last point: a guest +can request that only certain regions of the IPA space are valid as +MMIO. Only these regions will be handled as an MMIO, and any other +will result in an exception being delivered to the guest. + +This relies on a set of hypercalls defined in the KVM-specific range, +using the HVC64 calling convention. + +* ARM_SMCCC_KVM_FUNC_MMIO_GUARD_INFO + + ============== ======== ================================ + Function ID: (uint32) 0xC6000002 + Arguments: none + Return Values: (int64) NOT_SUPPORTED(-1) on error, or + (uint64) Protection Granule (PG) size in + bytes (r0) + ============== ======== ================================ + +* ARM_SMCCC_KVM_FUNC_MMIO_GUARD_ENROLL + + ============== ======== ============================== + Function ID: (uint32) 0xC6000003 + Arguments: none + Return Values: (int64) NOT_SUPPORTED(-1) on error, or + RET_SUCCESS(0) (r0) + ============== ======== ============================== + +* ARM_SMCCC_KVM_FUNC_MMIO_GUARD_MAP + + ============== ======== ==================================== + Function ID: (uint32) 0xC6000004 + Arguments: (uint64) The base of the PG-sized IPA range + that is allowed to be accessed as + MMIO. Must be aligned to the PG size + (r1) + (uint64) Index in the MAIR_EL1 register + providing the memory attribute that + is used by the guest (r2) + Return Values: (int64) NOT_SUPPORTED(-1) on error, or + RET_SUCCESS(0) (r0) + ============== ======== ==================================== + +* ARM_SMCCC_KVM_FUNC_MMIO_GUARD_UNMAP + + ============== ======== ====================================== + Function ID: (uint32) 0xC6000005 + Arguments: (uint64) PG-sized IPA range aligned to the PG + size which has been previously mapped. + Must be aligned to the PG size and + have been previously mapped (r1) + Return Values: (int64) NOT_SUPPORTED(-1) on error, or + RET_SUCCESS(0) (r0) + ============== ======== ====================================== diff --git a/Documentation/vm/index.rst b/Documentation/vm/index.rst index eff5fbd492d08fa68208771dcf8bf4751bec32af..9f80cc8a1cd9eb81efda1cb5246d324a4eb24917 100644 --- a/Documentation/vm/index.rst +++ b/Documentation/vm/index.rst @@ -41,6 +41,7 @@ descriptions of data structures and algorithms. ksm memory-model mmu_notifier + multigen_lru numa overcommit-accounting page_migration diff --git a/Documentation/vm/multigen_lru.rst b/Documentation/vm/multigen_lru.rst new file mode 100644 index 0000000000000000000000000000000000000000..8f6498002576c9f235ad9fd2a75863a0895d8f73 --- /dev/null +++ b/Documentation/vm/multigen_lru.rst @@ -0,0 +1,160 @@ +.. SPDX-License-Identifier: GPL-2.0 + +============= +Multi-Gen LRU +============= +The multi-gen LRU is an alternative LRU implementation that optimizes +page reclaim and improves performance under memory pressure. Page +reclaim decides the kernel's caching policy and ability to overcommit +memory. It directly impacts the kswapd CPU usage and RAM efficiency. + +Design overview +=============== +Objectives +---------- +The design objectives are: + +* Good representation of access recency +* Try to profit from spatial locality +* Fast paths to make obvious choices +* Simple self-correcting heuristics + +The representation of access recency is at the core of all LRU +implementations. In the multi-gen LRU, each generation represents a +group of pages with similar access recency. Generations establish a +common frame of reference and therefore help make better choices, +e.g., between different memcgs on a computer or different computers in +a data center (for job scheduling). + +Exploiting spatial locality improves efficiency when gathering the +accessed bit. A rmap walk targets a single page and does not try to +profit from discovering a young PTE. A page table walk can sweep all +the young PTEs in an address space, but the address space can be too +large to make a profit. The key is to optimize both methods and use +them in combination. + +Fast paths reduce code complexity and runtime overhead. Unmapped pages +do not require TLB flushes; clean pages do not require writeback. +These facts are only helpful when other conditions, e.g., access +recency, are similar. With generations as a common frame of reference, +additional factors stand out. But obvious choices might not be good +choices; thus self-correction is required. + +The benefits of simple self-correcting heuristics are self-evident. +Again, with generations as a common frame of reference, this becomes +attainable. Specifically, pages in the same generation can be +categorized based on additional factors, and a feedback loop can +statistically compare the refault percentages across those categories +and infer which of them are better choices. + +Assumptions +----------- +The protection of hot pages and the selection of cold pages are based +on page access channels and patterns. There are two access channels: + +* Accesses through page tables +* Accesses through file descriptors + +The protection of the former channel is by design stronger because: + +1. The uncertainty in determining the access patterns of the former + channel is higher due to the approximation of the accessed bit. +2. The cost of evicting the former channel is higher due to the TLB + flushes required and the likelihood of encountering the dirty bit. +3. The penalty of underprotecting the former channel is higher because + applications usually do not prepare themselves for major page + faults like they do for blocked I/O. E.g., GUI applications + commonly use dedicated I/O threads to avoid blocking the rendering + threads. + +There are also two access patterns: + +* Accesses exhibiting temporal locality +* Accesses not exhibiting temporal locality + +For the reasons listed above, the former channel is assumed to follow +the former pattern unless ``VM_SEQ_READ`` or ``VM_RAND_READ`` is +present, and the latter channel is assumed to follow the latter +pattern unless outlying refaults have been observed. + +Workflow overview +================= +Evictable pages are divided into multiple generations for each +``lruvec``. The youngest generation number is stored in +``lrugen->max_seq`` for both anon and file types as they are aged on +an equal footing. The oldest generation numbers are stored in +``lrugen->min_seq[]`` separately for anon and file types as clean file +pages can be evicted regardless of swap constraints. These three +variables are monotonically increasing. + +Generation numbers are truncated into ``order_base_2(MAX_NR_GENS+1)`` +bits in order to fit into the gen counter in ``page->flags``. Each +truncated generation number is an index to ``lrugen->lists[]``. The +sliding window technique is used to track at least ``MIN_NR_GENS`` and +at most ``MAX_NR_GENS`` generations. The gen counter stores a value +within ``[1, MAX_NR_GENS]`` while a page is on one of +``lrugen->lists[]``; otherwise it stores zero. + +Each generation is divided into multiple tiers. Tiers represent +different ranges of numbers of accesses through file descriptors. A +page accessed ``N`` times through file descriptors is in tier +``order_base_2(N)``. In contrast to moving across generations, which +requires the LRU lock, moving across tiers only requires operations on +``page->flags`` and therefore has a negligible cost. A feedback loop +modeled after the PID controller monitors refaults over all the tiers +from anon and file types and decides which tiers from which types to +evict or protect. + +There are two conceptually independent procedures: the aging and the +eviction. They form a closed-loop system, i.e., the page reclaim. + +Aging +----- +The aging produces young generations. Given an ``lruvec``, it +increments ``max_seq`` when ``max_seq-min_seq+1`` approaches +``MIN_NR_GENS``. The aging promotes hot pages to the youngest +generation when it finds them accessed through page tables; the +demotion of cold pages happens consequently when it increments +``max_seq``. The aging uses page table walks and rmap walks to find +young PTEs. For the former, it iterates ``lruvec_memcg()->mm_list`` +and calls ``walk_page_range()`` with each ``mm_struct`` on this list +to scan PTEs. On finding a young PTE, it clears the accessed bit and +updates the gen counter of the page mapped by this PTE to +``(max_seq%MAX_NR_GENS)+1``. After each iteration of this list, it +increments ``max_seq``. For the latter, when the eviction walks the +rmap and finds a young PTE, the aging scans the adjacent PTEs and +follows the same steps just described. + +Eviction +-------- +The eviction consumes old generations. Given an ``lruvec``, it +increments ``min_seq`` when ``lrugen->lists[]`` indexed by +``min_seq%MAX_NR_GENS`` becomes empty. To select a type and a tier to +evict from, it first compares ``min_seq[]`` to select the older type. +If both types are equally old, it selects the one whose first tier has +a lower refault percentage. The first tier contains single-use +unmapped clean pages, which are the best bet. The eviction sorts a +page according to the gen counter if the aging has found this page +accessed through page tables and updated the gen counter. It also +moves a page to the next generation, i.e., ``min_seq+1``, if this page +was accessed multiple times through file descriptors and the feedback +loop has detected outlying refaults from the tier this page is in. To +do this, the feedback loop uses the first tier as the baseline, for +the reason stated earlier. + +Summary +------- +The multi-gen LRU can be disassembled into the following parts: + +* Generations +* Page table walks +* Rmap walks +* Bloom filters +* The PID controller + +The aging and the eviction is a producer-consumer model; specifically, +the latter drives the former by the sliding window over generations. +Within the aging, rmap walks drive page table walks by inserting hot +densely populated page tables to the Bloom filters. Within the +eviction, the PID controller uses refaults as the feedback to select +types to evict and tiers to protect. diff --git a/MAINTAINERS b/MAINTAINERS index 12bf45f55f6a4a9230f1e6aa86f07f12c0bd39f9..3dc294427f6550572e9ef2063c8f6af2a96be055 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4294,8 +4294,9 @@ W: https://clangbuiltlinux.github.io/ B: https://github.com/ClangBuiltLinux/linux/issues C: irc://chat.freenode.net/clangbuiltlinux F: Documentation/kbuild/llvm.rst +F: include/linux/compiler-clang.h +F: scripts/Makefile.clang F: scripts/clang-tools/ -F: scripts/lld-version.sh K: \b(?i:clang|llvm)\b CLEANCACHE API @@ -6847,6 +6848,13 @@ F: include/linux/firewire.h F: include/uapi/linux/firewire*.h F: tools/firewire/ +FIRMWARE FRAMEWORK FOR ARMV8-A +M: Sudeep Holla +L: linux-arm-kernel@lists.infradead.org +S: Maintained +F: drivers/firmware/arm_ffa/ +F: include/linux/arm_ffa.h + FIRMWARE LOADER (request_firmware) M: Luis Chamberlain L: linux-kernel@vger.kernel.org @@ -7310,7 +7318,6 @@ L: linux-hardening@vger.kernel.org S: Maintained F: Documentation/kbuild/gcc-plugins.rst F: scripts/Makefile.gcc-plugins -F: scripts/gcc-plugin.sh F: scripts/gcc-plugins/ GCOV BASED KERNEL PROFILING diff --git a/Makefile b/Makefile index 26619d1c71299583c04d16295ce787d31f37f530..e4517d58b89f0bd5bc7d852d30ac8db2c3945f20 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 10 -SUBLEVEL = 70 +SUBLEVEL = 107 EXTRAVERSION = NAME = Dare mighty things @@ -354,14 +354,14 @@ __build_one_by_one: else # !mixed-build -include scripts/Kbuild.include +include $(srctree)/scripts/Kbuild.include # Read KERNELRELEASE from include/config/kernel.release (if it exists) KERNELRELEASE = $(shell cat include/config/kernel.release 2> /dev/null) KERNELVERSION = $(VERSION)$(if $(PATCHLEVEL),.$(PATCHLEVEL)$(if $(SUBLEVEL),.$(SUBLEVEL)))$(EXTRAVERSION) export VERSION PATCHLEVEL SUBLEVEL KERNELRELEASE KERNELVERSION -include scripts/subarch.include +include $(srctree)/scripts/subarch.include # Cross compiling and selecting different set of gcc/bin-utils # --------------------------------------------------------------------------- @@ -429,11 +429,12 @@ HOSTCC = gcc HOSTCXX = g++ endif -export KBUILD_USERCFLAGS := -Wall -Wmissing-prototypes -Wstrict-prototypes \ - -O2 -fomit-frame-pointer -std=gnu89 -export KBUILD_USERLDFLAGS := +KBUILD_USERHOSTCFLAGS := -Wall -Wmissing-prototypes -Wstrict-prototypes \ + -O2 -fomit-frame-pointer -std=gnu89 +KBUILD_USERCFLAGS := $(KBUILD_USERHOSTCFLAGS) $(USERCFLAGS) +KBUILD_USERLDFLAGS := $(USERLDFLAGS) -KBUILD_HOSTCFLAGS := $(KBUILD_USERCFLAGS) $(HOST_LFS_CFLAGS) $(HOSTCFLAGS) +KBUILD_HOSTCFLAGS := $(KBUILD_USERHOSTCFLAGS) $(HOST_LFS_CFLAGS) $(HOSTCFLAGS) KBUILD_HOSTCXXFLAGS := -Wall -O2 $(HOST_LFS_CFLAGS) $(HOSTCXXFLAGS) KBUILD_HOSTLDFLAGS := $(HOST_LFS_LDFLAGS) $(HOSTLDFLAGS) KBUILD_HOSTLDLIBS := $(HOST_LFS_LIBS) $(HOSTLDLIBS) @@ -526,6 +527,7 @@ export CPP AR NM STRIP OBJCOPY OBJDUMP READELF PAHOLE RESOLVE_BTFIDS LEX YACC AW export PERL PYTHON PYTHON3 CHECK CHECKFLAGS MAKE UTS_MACHINE HOSTCXX export KGZIP KBZIP2 KLZOP LZMA LZ4 XZ ZSTD export KBUILD_HOSTCXXFLAGS KBUILD_HOSTLDFLAGS KBUILD_HOSTLDLIBS LDFLAGS_MODULE +export KBUILD_USERCFLAGS KBUILD_USERLDFLAGS export KBUILD_CPPFLAGS NOSTDINC_FLAGS LINUXINCLUDE OBJCOPYFLAGS KBUILD_LDFLAGS export KBUILD_CFLAGS CFLAGS_KERNEL CFLAGS_MODULE @@ -574,30 +576,19 @@ ifdef building_out_of_srctree { echo "# this is build directory, ignore it"; echo "*"; } > .gitignore endif -ifneq ($(shell $(CC) --version 2>&1 | head -n 1 | grep clang),) -ifneq ($(CROSS_COMPILE),) -CLANG_FLAGS += --target=$(notdir $(CROSS_COMPILE:%-=%)) -GCC_TOOLCHAIN_DIR := $(dir $(shell which $(CROSS_COMPILE)elfedit)) -CLANG_FLAGS += --prefix=$(GCC_TOOLCHAIN_DIR)$(notdir $(CROSS_COMPILE)) -GCC_TOOLCHAIN := $(realpath $(GCC_TOOLCHAIN_DIR)/..) -endif -ifneq ($(GCC_TOOLCHAIN),) -CLANG_FLAGS += --gcc-toolchain=$(GCC_TOOLCHAIN) -endif -ifneq ($(LLVM_IAS),1) -CLANG_FLAGS += -no-integrated-as -endif -CLANG_FLAGS += -Werror=unknown-warning-option -KBUILD_CFLAGS += $(CLANG_FLAGS) -KBUILD_AFLAGS += $(CLANG_FLAGS) -export CLANG_FLAGS -endif - # The expansion should be delayed until arch/$(SRCARCH)/Makefile is included. # Some architectures define CROSS_COMPILE in arch/$(SRCARCH)/Makefile. # CC_VERSION_TEXT is referenced from Kconfig (so it needs export), # and from include/config/auto.conf.cmd to detect the compiler upgrade. -CC_VERSION_TEXT = $(shell $(CC) --version 2>/dev/null | head -n 1) +CC_VERSION_TEXT = $(subst $(pound),,$(shell LC_ALL=C $(CC) --version 2>/dev/null | head -n 1)) + +ifneq ($(findstring clang,$(CC_VERSION_TEXT)),) +include $(srctree)/scripts/Makefile.clang +endif + +# Include this also for config targets because some architectures need +# cc-cross-prefix to determine CROSS_COMPILE. +include $(srctree)/scripts/Makefile.compiler ifdef config-build # =========================================================================== @@ -607,7 +598,7 @@ ifdef config-build # Read arch specific Makefile to set KBUILD_DEFCONFIG as needed. # KBUILD_DEFCONFIG may point out an alternative default configuration # used for 'make defconfig' -include arch/$(SRCARCH)/Makefile +include $(srctree)/arch/$(SRCARCH)/Makefile export KBUILD_DEFCONFIG KBUILD_KCONFIG CC_VERSION_TEXT config: outputmakefile scripts_basic FORCE @@ -695,7 +686,7 @@ RETPOLINE_VDSO_CFLAGS := $(call cc-option,$(RETPOLINE_VDSO_CFLAGS_GCC),$(call cc export RETPOLINE_CFLAGS export RETPOLINE_VDSO_CFLAGS -include arch/$(SRCARCH)/Makefile +include $(srctree)/arch/$(SRCARCH)/Makefile ifdef need-config ifdef may-sync-config @@ -846,7 +837,7 @@ else DEBUG_CFLAGS += -g endif -ifneq ($(LLVM_IAS),1) +ifndef CONFIG_AS_IS_LLVM KBUILD_AFLAGS += -Wa,-gdwarf-2 endif @@ -1826,6 +1817,16 @@ clean-dirs := $(KBUILD_EXTMOD) clean: rm-files := $(KBUILD_EXTMOD)/Module.symvers $(KBUILD_EXTMOD)/modules.nsdeps \ $(KBUILD_EXTMOD)/compile_commands.json $(KBUILD_EXTMOD)/.thinlto-cache +PHONY += prepare +# now expand this into a simple variable to reduce the cost of shell evaluations +prepare: CC_VERSION_TEXT := $(CC_VERSION_TEXT) +prepare: + @if [ "$(CC_VERSION_TEXT)" != $(CONFIG_CC_VERSION_TEXT) ]; then \ + echo >&2 "warning: the compiler differs from the one used to build the kernel"; \ + echo >&2 " The kernel was built by: "$(CONFIG_CC_VERSION_TEXT); \ + echo >&2 " You are using: $(CC_VERSION_TEXT)"; \ + fi + PHONY += help help: @echo ' Building external modules.' @@ -1839,7 +1840,7 @@ help: @echo '' # no-op for external module builds -PHONY += prepare modules_prepare +PHONY += modules_prepare endif # KBUILD_EXTMOD diff --git a/android/abi_gki_aarch64.xml b/android/abi_gki_aarch64.xml new file mode 100644 index 0000000000000000000000000000000000000000..be34a71279005db12dc5c0ddab1ab38df285a350 --- /dev/null +++ b/android/abi_gki_aarch64.xml @@ -0,0 +1,124999 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/android/abi_gki_aarch64_db845c b/android/abi_gki_aarch64_db845c index 7e7d1c6734ea4598413879c912d2e4e04e2f2194..69be1905a22919a19034ed260bcecfcfb4dd143c 100644 --- a/android/abi_gki_aarch64_db845c +++ b/android/abi_gki_aarch64_db845c @@ -851,7 +851,9 @@ dma_buf_put dma_buf_unmap_attachment dma_get_sgtable_attrs + down_read find_vma + up_read wait_for_completion_interruptible # required by gpio-regulator.ko @@ -1672,19 +1674,22 @@ # required by ufs_qcom.ko phy_set_mode_ext + ufshcd_dme_configure_adapt ufshcd_dme_get_attr ufshcd_dme_set_attr ufshcd_dump_regs ufshcd_get_local_unipro_ver ufshcd_get_pwr_dev_param + ufshcd_init_pwr_dev_param ufshcd_pltfrm_init - ufshcd_pltfrm_resume - ufshcd_pltfrm_runtime_idle - ufshcd_pltfrm_runtime_resume - ufshcd_pltfrm_runtime_suspend ufshcd_pltfrm_shutdown - ufshcd_pltfrm_suspend ufshcd_remove + ufshcd_resume_complete + ufshcd_runtime_resume + ufshcd_runtime_suspend + ufshcd_suspend_prepare + ufshcd_system_resume + ufshcd_system_suspend ufshcd_uic_hibern8_enter ufshcd_uic_hibern8_exit diff --git a/android/abi_gki_aarch64_fips140 b/android/abi_gki_aarch64_fips140 index b4810fec641909e18874286d51adc0223f686b8a..361966d13262699419ef54354a87e61f2f249184 100644 --- a/android/abi_gki_aarch64_fips140 +++ b/android/abi_gki_aarch64_fips140 @@ -6,12 +6,14 @@ # required by fips140.ko add_random_ready_callback aead_register_instance - arm64_const_caps_ready + ahash_register_instance + arch_timer_read_counter bcmp cancel_work_sync __cfi_slowpath + complete_all + completion_done cpu_have_feature - cpu_hwcap_keys crypto_aead_decrypt crypto_aead_encrypt crypto_aead_setauthsize @@ -19,7 +21,6 @@ crypto_ahash_finup crypto_ahash_setkey crypto_alg_list - crypto_alg_mod_lookup crypto_alg_sem crypto_alloc_aead crypto_alloc_base @@ -28,7 +29,6 @@ crypto_alloc_skcipher crypto_attr_alg_name crypto_check_attr_type - crypto_cipher_decrypt_one crypto_cipher_encrypt_one crypto_cipher_setkey crypto_destroy_tfm @@ -42,15 +42,19 @@ crypto_inst_setname crypto_put_default_null_skcipher crypto_register_aead + crypto_register_aeads + crypto_register_ahash + crypto_register_ahashes crypto_register_alg + crypto_register_algs crypto_register_rng crypto_register_rngs crypto_register_shash crypto_register_shashes + crypto_register_skcipher crypto_register_skciphers crypto_register_template crypto_register_templates - crypto_remove_final crypto_remove_spawns crypto_req_done crypto_rng_reset @@ -64,8 +68,8 @@ crypto_skcipher_decrypt crypto_skcipher_encrypt crypto_skcipher_setkey - crypto_spawn_tfm crypto_spawn_tfm2 + crypto_spawn_tfm crypto_unregister_aead crypto_unregister_alg crypto_unregister_rng @@ -88,9 +92,9 @@ kfree_sensitive __kmalloc kmalloc_caches - kmalloc_order_trace kmem_cache_alloc_trace kmemdup + ktime_get __list_add_valid __list_del_entry_valid memcpy @@ -103,6 +107,9 @@ preempt_schedule_notrace printk queue_work_on + ___ratelimit + _raw_spin_lock + _raw_spin_unlock refcount_warn_saturate scatterwalk_ffwd scatterwalk_map_and_copy @@ -124,7 +131,6 @@ strlcpy strlen strncmp - synchronize_rcu_tasks system_wq __traceiter_android_vh_aes_decrypt __traceiter_android_vh_aes_encrypt @@ -137,27 +143,5 @@ tracepoint_probe_register up_write wait_for_completion - -# needed by fips140.ko but not identified by the tooling -# TODO(b/189327973): [GKI: ABI] Build of fips140.ko module fails to identify some symbols - __crypto_memneq - __crypto_xor - aes_decrypt - aes_encrypt - aes_expandkey - ce_aes_expandkey - crypto_aes_inv_sbox - crypto_aes_sbox - crypto_aes_set_key - crypto_ft_tab - crypto_inc - crypto_it_tab - crypto_sha1_finup - crypto_sha1_update - gf128mul_lle - sha1_transform - sha224_final - sha256 - sha256_block_data_order - sha256_final - sha256_update + xa_load + xa_store diff --git a/android/abi_gki_aarch64_generic b/android/abi_gki_aarch64_generic index 419bbb187d9bf87342332dd971b5c583010e661b..fc32aec4dd1e09e799a27d9fafd3cbbed43fd3c2 100644 --- a/android/abi_gki_aarch64_generic +++ b/android/abi_gki_aarch64_generic @@ -2,8 +2,11 @@ add_cpu add_timer add_timer_on + add_uevent_var add_wait_queue adjust_managed_page_count + aes_encrypt + aes_expandkey alarm_cancel alarm_init alarm_start_relative @@ -19,9 +22,12 @@ __alloc_percpu __alloc_percpu_gfp __alloc_skb + alloc_skb_with_frags alloc_workqueue + amba_bustype amba_driver_register amba_driver_unregister + android_debug_symbol android_rvh_probe_register anon_inode_getfd __arch_clear_user @@ -29,6 +35,8 @@ __arch_copy_in_user __arch_copy_to_user arch_timer_read_counter + argv_free + argv_split arm64_const_caps_ready arm64_use_ng_mappings __arm_smccc_smc @@ -40,6 +48,12 @@ autoremove_wake_function available_idle_cpu backlight_device_set_brightness + badblocks_check + badblocks_clear + badblocks_exit + badblocks_init + badblocks_set + badblocks_show bcmp bdget_disk bdput @@ -48,24 +62,43 @@ bio_alloc_bioset bio_associate_blkg bio_chain + bio_crypt_set_ctx bio_endio bio_init bio_put __bitmap_andnot __bitmap_clear __bitmap_equal + bitmap_find_next_zero_area_off + bitmap_from_arr32 __bitmap_or bitmap_parse bitmap_parselist bitmap_print_to_pagebuf + __bitmap_set bitmap_to_arr32 __bitmap_weight + blk_abort_request blk_alloc_queue + blk_check_plugged blk_cleanup_queue + blk_crypto_init_key blkdev_get_by_dev + blkdev_get_by_path blkdev_put blk_execute_rq_nowait blk_get_request + blk_ksm_init_passthrough + blk_mq_alloc_tag_set + blk_mq_complete_request + blk_mq_end_request + blk_mq_free_tag_set + blk_mq_init_queue_data + blk_mq_start_request + blk_mq_start_stopped_hw_queues + blk_mq_stop_hw_queues + blk_mq_unique_tag + blk_mq_update_nr_hw_queues blk_put_request blk_queue_flag_clear blk_queue_flag_set @@ -75,6 +108,7 @@ blk_queue_max_discard_sectors blk_queue_max_write_zeroes_sectors blk_queue_physical_block_size + blk_queue_write_cache blk_rq_map_user blk_rq_map_user_iov blk_rq_unmap_user @@ -85,6 +119,7 @@ blocking_notifier_chain_unregister bpf_trace_run1 bpf_trace_run10 + bpf_trace_run11 bpf_trace_run12 bpf_trace_run2 bpf_trace_run3 @@ -93,6 +128,7 @@ bpf_trace_run6 bpf_trace_run7 bpf_trace_run8 + bpf_trace_run9 build_skb bus_find_device bus_for_each_dev @@ -115,6 +151,7 @@ cdev_init __cfg80211_alloc_event_skb __cfg80211_alloc_reply_skb + cfg80211_calculate_bitrate cfg80211_chandef_create cfg80211_ch_switch_notify cfg80211_connect_done @@ -123,23 +160,30 @@ cfg80211_external_auth_request cfg80211_find_elem_match cfg80211_get_bss + cfg80211_gtk_rekey_notify cfg80211_ibss_joined cfg80211_inform_bss_frame_data cfg80211_mgmt_tx_status cfg80211_michael_mic_failure cfg80211_new_sta + cfg80211_pmksa_candidate_notify cfg80211_port_authorized cfg80211_put_bss cfg80211_ready_on_channel cfg80211_remain_on_channel_expired cfg80211_roamed cfg80211_rx_mgmt_khz + cfg80211_rx_unprot_mlme_mgmt cfg80211_scan_done cfg80211_sched_scan_results + cfg80211_sched_scan_stopped cfg80211_sched_scan_stopped_rtnl __cfg80211_send_event_skb + cfg80211_stop_iface + cfg80211_tdls_oper_request cfg80211_unlink_bss cfg80211_unregister_wdev + cfg80211_update_owe_info_event cfg80211_vendor_cmd_reply __cfi_slowpath __check_object_size @@ -155,6 +199,7 @@ clk_get __clk_get_hw __clk_get_name + clk_get_parent clk_get_rate clk_hw_get_name clk_hw_get_parent @@ -172,6 +217,9 @@ clocks_calc_mult_shift __clocksource_register_scale cma_alloc + cma_for_each_area + cma_get_name + cma_get_size cma_release compat_alloc_user_space complete @@ -186,7 +234,13 @@ component_match_add_release component_unbind_all config_ep_by_speed + configfs_register_subsystem + configfs_unregister_subsystem + config_group_init config_group_init_type_name + config_item_init_type_name + config_item_put + console_set_on_cmdline console_suspend_enabled console_trylock console_unlock @@ -194,6 +248,7 @@ consume_skb contig_page_data _copy_from_iter + _copy_from_iter_full _copy_to_iter __cpu_active_mask cpu_all_bits @@ -216,6 +271,7 @@ cpufreq_get_policy cpufreq_policy_transition_delay_us cpufreq_quick_get + cpufreq_quick_get_max cpufreq_register_driver cpufreq_register_governor cpufreq_register_notifier @@ -230,6 +286,7 @@ cpuhp_tasks_frozen cpu_hwcap_keys cpu_hwcaps + cpuidle_governor_latency_req cpuidle_pause_and_lock cpuidle_resume_and_unlock cpu_latency_qos_add_request @@ -242,12 +299,15 @@ cpu_number __cpu_online_mask cpu_pm_register_notifier + cpu_pm_unregister_notifier __cpu_possible_mask + __cpu_present_mask cpupri_find_fitness cpu_scale cpus_read_lock cpus_read_unlock cpu_subsys + cpu_topology crc32_le crc8 crc8_populate_msb @@ -258,23 +318,37 @@ crypto_alloc_aead crypto_alloc_base crypto_alloc_shash + crypto_alloc_skcipher crypto_alloc_sync_skcipher crypto_comp_compress crypto_comp_decompress crypto_destroy_tfm + __crypto_memneq crypto_register_alg + crypto_register_notifier crypto_register_scomp + crypto_register_shash crypto_shash_digest + crypto_shash_final crypto_shash_finup crypto_shash_setkey + crypto_shash_update + crypto_skcipher_decrypt crypto_skcipher_encrypt crypto_skcipher_setkey crypto_unregister_alg + crypto_unregister_notifier crypto_unregister_scomp + crypto_unregister_shash + csum_ipv6_magic + csum_partial + csum_tcpudp_nofold _ctype + datagram_poll debugfs_attr_read debugfs_attr_write debugfs_create_atomic_t + debugfs_create_blob debugfs_create_bool debugfs_create_dir debugfs_create_file @@ -295,34 +369,48 @@ del_timer_sync desc_to_gpio destroy_workqueue + dev_alloc_name dev_close _dev_crit dev_driver_string _dev_emerg _dev_err + dev_err_probe devfreq_add_device devfreq_add_governor + devfreq_monitor_resume + devfreq_monitor_start + devfreq_monitor_stop + devfreq_monitor_suspend devfreq_recommended_opp devfreq_register_opp_notifier devfreq_remove_device devfreq_unregister_opp_notifier + devfreq_update_interval dev_fwnode dev_get_by_name + dev_get_stats + device_add device_add_disk device_add_groups device_create device_create_file device_create_with_groups + device_del device_destroy device_find_child device_for_each_child + device_get_child_node_count device_get_dma_attr + device_get_next_child_node device_initialize device_init_wakeup device_link_add device_link_del + device_match_fwnode device_property_present device_property_read_string + device_property_read_string_array device_property_read_u32_array device_register device_remove_file @@ -339,13 +427,18 @@ devm_blk_ksm_init devm_clk_get devm_clk_get_optional + devm_clk_hw_register + devm_clk_hw_register_clkdev devm_clk_put + devm_device_add_group devm_device_add_groups + devm_device_remove_group __devm_drm_dev_alloc devm_drm_panel_bridge_add_typed devm_extcon_dev_allocate devm_extcon_dev_register devm_free_irq + devm_fwnode_pwm_get devm_gen_pool_create devm_gpiochip_add_data_with_key devm_gpiod_get @@ -369,8 +462,10 @@ devm_kmemdup devm_kstrdup devm_kstrdup_const + devm_led_classdev_register_ext devm_mfd_add_devices devm_nvmem_register + devm_of_clk_add_hw_provider __devm_of_phy_provider_register devm_of_platform_populate devm_phy_create @@ -383,12 +478,14 @@ devm_platform_ioremap_resource devm_platform_ioremap_resource_byname devm_power_supply_register + devm_pwm_get devm_regmap_add_irq_chip __devm_regmap_init __devm_regmap_init_i2c __devm_regmap_init_spi devm_regulator_bulk_get devm_regulator_get + devm_regulator_get_exclusive devm_regulator_get_optional devm_regulator_put devm_regulator_register @@ -398,6 +495,7 @@ devm_snd_soc_register_component devm_thermal_of_cooling_device_register devm_thermal_zone_of_sensor_register + devm_thermal_zone_of_sensor_unregister devm_usb_get_phy_by_phandle _dev_notice dev_pm_domain_attach_by_name @@ -416,8 +514,13 @@ dev_pm_opp_put dev_pm_opp_put_regulators dev_pm_opp_set_regulators + dev_pm_qos_add_notifier + dev_pm_qos_add_request dev_pm_qos_read_value + dev_pm_qos_remove_notifier + dev_pm_qos_remove_request dev_pm_qos_update_request + dev_printk dev_printk_emit dev_queue_xmit devres_add @@ -431,6 +534,7 @@ disk_end_io_acct disk_start_io_acct dma_alloc_attrs + dma_alloc_noncoherent dma_async_device_register dma_async_device_unregister dma_async_tx_descriptor_init @@ -453,6 +557,8 @@ dma_buf_unmap_attachment dma_buf_vmap dma_buf_vunmap + dma_direct_alloc + dma_direct_free dmaengine_unmap_put dma_fence_add_callback dma_fence_context_alloc @@ -463,7 +569,10 @@ dma_fence_remove_callback dma_fence_signal dma_fence_signal_locked + dma_fence_wait_timeout dma_free_attrs + dma_free_noncoherent + dma_get_sgtable_attrs dma_get_slave_caps dma_get_slave_channel dma_heap_add @@ -491,6 +600,7 @@ dma_unmap_page_attrs dma_unmap_resource dma_unmap_sg_attrs + do_exit do_SAK do_wait_intr_irq down @@ -499,9 +609,12 @@ down_read down_read_trylock down_timeout + down_trylock down_write drain_workqueue + driver_create_file driver_register + driver_remove_file driver_unregister drm_add_modes_noedid drm_atomic_add_affected_connectors @@ -571,6 +684,7 @@ drm_connector_list_iter_end drm_connector_list_iter_next drm_connector_register + drm_connector_set_panel_orientation drm_connector_unregister drm_crtc_arm_vblank_event drm_crtc_cleanup @@ -590,6 +704,7 @@ drm_dev_register drm_dev_unregister drm_display_mode_to_videomode + drm_dsc_pps_payload_pack drm_encoder_cleanup drm_encoder_init __drm_err @@ -657,6 +772,7 @@ drm_poll drm_prime_gem_destroy drm_printf + __drm_printfn_debug __drm_printfn_info __drm_printfn_seq_file drm_property_blob_get @@ -696,9 +812,12 @@ dw_pcie_write __dynamic_dev_dbg __dynamic_pr_debug + em_cpu_get emergency_restart enable_irq enable_percpu_irq + errno_to_blk_status + ether_setup eth_type_trans eventfd_ctx_fdget eventfd_ctx_put @@ -722,12 +841,16 @@ filp_open_block find_extend_vma find_get_pid + find_last_bit find_next_bit find_next_zero_bit + find_pid_ns find_task_by_vpid find_vma finish_wait + firmware_request_nowarn flush_dcache_page + flush_delayed_work flush_work flush_workqueue fput @@ -750,6 +873,11 @@ full_name_hash fwnode_get_name fwnode_gpiod_get_index + fwnode_handle_get + fwnode_handle_put + fwnode_property_present + fwnode_property_read_string + fwnode_property_read_u32_array gcd generic_file_llseek generic_handle_irq @@ -773,11 +901,29 @@ get_device __get_free_pages get_governor_parent_kobj + gether_cleanup + gether_connect + gether_disconnect + gether_get_dev_addr + gether_get_host_addr + gether_get_host_addr_u8 + gether_get_ifname + gether_get_qmult + gether_register_netdev + gether_set_dev_addr + gether_set_gadget + gether_set_host_addr + gether_set_ifname + gether_set_qmult + gether_setup_name_default + get_pfnblock_flags_mask get_pid_task get_random_bytes + get_random_bytes_arch get_random_u32 get_sg_io_hdr __get_task_comm + get_task_cred get_thermal_instance get_unused_fd_flags get_user_pages @@ -822,15 +968,27 @@ handle_nested_irq handle_simple_irq handle_sysrq + hashlen_string have_governor_per_policy hex2bin hex_dump_to_buffer + hex_to_bin + hid_add_device + hid_allocate_device + hid_debug + hid_destroy_device + hid_input_report + hid_parse_report hrtimer_active hrtimer_cancel hrtimer_forward hrtimer_init + hrtimer_init_sleeper + hrtimer_sleeper_start_expires hrtimer_start_range_ns hrtimer_try_to_cancel + hwrng_register + hwrng_unregister i2c_adapter_type i2c_add_numbered_adapter i2c_bus_type @@ -844,27 +1002,38 @@ i2c_new_dummy_device i2c_put_adapter i2c_register_driver + i2c_smbus_read_byte + i2c_smbus_read_byte_data + i2c_smbus_write_byte_data i2c_smbus_xfer i2c_transfer i2c_transfer_buffer_flags i2c_unregister_device i2c_verify_client ida_alloc_range + ida_destroy ida_free idr_alloc + idr_alloc_cyclic idr_destroy idr_find idr_for_each + idr_get_next idr_preload idr_remove ieee80211_channel_to_freq_khz ieee80211_freq_khz_to_channel ieee80211_get_channel_khz + ieee80211_hdrlen iio_device_unregister import_iovec + in6_pton + in_aton inc_zone_page_state + in_egroup_p init_dummy_netdev init_net + init_pid_ns init_pseudo __init_rwsem __init_swait_queue_head @@ -872,6 +1041,7 @@ init_timer_key init_wait_entry __init_waitqueue_head + input_alloc_absinfo input_allocate_device input_close_device input_event @@ -880,6 +1050,7 @@ input_mt_destroy_slots input_mt_init_slots input_mt_report_slot_state + input_mt_sync_frame input_open_device input_register_device input_register_handle @@ -890,7 +1061,12 @@ input_unregister_device input_unregister_handle input_unregister_handler + interval_tree_insert + interval_tree_iter_first + interval_tree_iter_next + interval_tree_remove int_sqrt + int_to_scsilun iomem_resource iommu_alloc_resv_region iommu_attach_group @@ -927,14 +1103,21 @@ iommu_unmap iommu_unregister_device_fault_handler __ioremap + io_schedule io_schedule_timeout iounmap + iov_iter_bvec + ip_compute_csum + ip_send_check iput + __ipv6_addr_type __irq_alloc_descs irq_create_mapping_affinity irq_create_of_mapping __irq_domain_add + irq_domain_get_irq_data irq_domain_remove + irq_domain_set_info irq_domain_xlate_twocell irq_find_mapping irq_get_irq_data @@ -956,9 +1139,14 @@ jiffies_to_usecs kasan_flag_enabled kasprintf + kernel_bind + kernel_connect kernel_cpustat + kernel_getsockname kernel_kobj + kernel_recvmsg kernel_restart + kernel_sendmsg kern_mount kern_unmount __kfifo_alloc @@ -968,6 +1156,7 @@ __kfifo_init __kfifo_in_r __kfifo_out + __kfifo_out_peek __kfifo_skip_r __kfifo_to_user_r kfree @@ -978,22 +1167,27 @@ kimage_voffset __kmalloc kmalloc_caches + kmalloc_order kmalloc_order_trace kmem_cache_alloc kmem_cache_alloc_trace kmem_cache_create + kmem_cache_create_usercopy kmem_cache_destroy kmem_cache_free kmemdup kobject_add kobject_create_and_add kobject_del + kobject_get kobject_init kobject_init_and_add kobject_put kobject_uevent + kobject_uevent_env kobj_sysfs_ops krealloc + ksize kstat kstrdup kstrndup @@ -1006,6 +1200,7 @@ kstrtos8 kstrtou16 kstrtou8 + kstrtou8_from_user kstrtouint kstrtouint_from_user kstrtoull @@ -1022,6 +1217,7 @@ kthread_flush_worker __kthread_init_worker kthread_mod_delayed_work + kthread_queue_delayed_work kthread_queue_work kthread_should_stop kthread_stop @@ -1032,6 +1228,7 @@ ktime_get_raw_ts64 ktime_get_real_seconds ktime_get_real_ts64 + ktime_get_snapshot ktime_get_ts64 ktime_get_with_offset kvfree @@ -1044,20 +1241,24 @@ list_sort __local_bh_enable_ip __lock_page + lock_sock_nested log_abnormal_wakeup_reason __log_post_read_mmio __log_read_mmio + log_threaded_irq_wakeup_reason __log_write_mmio loops_per_jiffy lzo1x_1_compress lzo1x_decompress_safe lzorle1x_1_compress + mac_pton mbox_chan_received_data mbox_controller_register mbox_controller_unregister mbox_free_channel mbox_request_channel mbox_send_message + memchr memcmp memcpy __memcpy_fromio @@ -1066,9 +1267,16 @@ memdup_user_nul memmove memparse + mempool_alloc + mempool_alloc_slab + mempool_create + mempool_destroy + mempool_free + mempool_free_slab memremap memset memset64 + __memset_io memstart_addr memunmap mfd_add_devices @@ -1099,6 +1307,7 @@ module_put __msecs_to_jiffies msleep + msleep_interruptible __mutex_init mutex_is_locked mutex_lock @@ -1115,20 +1324,29 @@ __netdev_alloc_skb netdev_state_change netdev_update_features + netif_carrier_off + netif_carrier_on netif_napi_add __netif_napi_del netif_receive_skb + netif_receive_skb_list netif_rx netif_rx_ni + netif_schedule_queue + netif_tx_stop_all_queues netif_tx_wake_queue + netlink_broadcast __netlink_kernel_create netlink_kernel_release netlink_unicast + net_ratelimit + nla_append nla_memcpy __nla_parse nla_put nla_put_64bit nla_put_nohdr + nla_reserve nla_strlcpy __nlmsg_put no_llseek @@ -1136,6 +1354,7 @@ noop_llseek nr_cpu_ids nr_irqs + ns_capable nsec_to_clock_t ns_to_timespec64 __num_online_cpus @@ -1147,13 +1366,16 @@ of_clk_add_provider of_clk_get of_clk_get_by_name + of_clk_hw_simple_get of_clk_src_onecell_get of_count_phandle_with_args + of_cpu_node_to_id of_device_get_match_data of_device_is_available of_device_is_compatible of_dma_controller_free of_dma_controller_register + of_dma_is_coherent of_drm_find_bridge of_drm_find_panel of_find_backlight_by_node @@ -1166,6 +1388,7 @@ of_find_node_by_phandle of_find_node_by_type of_find_node_opts_by_path + of_find_node_with_property of_find_property of_fwnode_ops of_genpd_add_provider_simple @@ -1175,10 +1398,17 @@ of_get_named_gpio_flags of_get_next_available_child of_get_next_child + of_get_next_parent of_get_property of_get_regulator_init_data + of_graph_get_next_endpoint + of_graph_get_port_parent + of_graph_get_remote_endpoint + of_graph_is_present + of_graph_parse_endpoint of_iomap of_irq_find_parent + of_irq_get of_irq_get_byname of_irq_parse_one of_machine_is_compatible @@ -1207,19 +1437,27 @@ of_property_read_variable_u8_array of_prop_next_string of_prop_next_u32 + of_pwm_xlate_with_flags of_reserved_mem_device_init_by_idx of_reserved_mem_device_release of_reserved_mem_lookup of_root of_thermal_get_ntrips + of_thermal_get_trip_points + of_thermal_is_trip_valid of_translate_address of_usb_host_tpl_support page_endio + page_frag_alloc + __page_frag_cache_drain + page_frag_free page_mapping - __page_pinner_migration_failed + __page_pinner_put_page panic panic_notifier_list param_array_ops + param_get_int + param_get_string param_ops_bool param_ops_byte param_ops_charp @@ -1228,11 +1466,15 @@ param_ops_string param_ops_uint param_ops_ulong + param_set_copystring + param_set_int pause_cpus pci_alloc_irq_vectors_affinity pci_assign_resource pci_clear_master pci_disable_device + pci_disable_msi + pcie_capability_read_word pci_enable_device pci_enable_wake pci_find_bus @@ -1240,12 +1482,18 @@ pci_find_ext_capability pci_free_irq_vectors pci_get_device + pci_iomap + pci_irq_vector pci_load_and_free_saved_state pci_load_saved_state + pci_msi_mask_irq + pci_msi_unmask_irq pci_read_config_dword pci_read_config_word __pci_register_driver + pci_release_region pci_release_regions + pci_request_region pci_rescan_bus pci_restore_msi_state pci_restore_state @@ -1258,14 +1506,21 @@ pci_write_config_dword pci_write_config_word PDE_DATA + pelt_load_avg_max __per_cpu_offset + perf_aux_output_begin + perf_aux_output_end + perf_aux_output_flag + perf_event_addr_filters_sync perf_event_create_kernel_counter perf_event_disable perf_event_enable perf_event_pause + perf_event_read_local perf_event_read_value perf_event_release_kernel perf_event_update_userpage + perf_get_aux perf_pmu_migrate_context perf_pmu_register perf_pmu_unregister @@ -1289,6 +1544,12 @@ pin_user_pages pin_user_pages_fast pin_user_pages_remote + pktgen_xfrm_outer_mode_output + pkvm_iommu_finalize + pkvm_iommu_resume + pkvm_iommu_s2mpu_register + pkvm_iommu_suspend + pkvm_iommu_sysmmu_sync_register platform_bus_type platform_device_add platform_device_add_data @@ -1306,6 +1567,7 @@ platform_find_device_by_driver platform_get_irq platform_get_irq_byname + platform_get_irq_optional platform_get_resource platform_get_resource_byname platform_irq_count @@ -1329,6 +1591,7 @@ __pm_runtime_use_autosuspend __pm_stay_awake pm_stay_awake + pm_system_wakeup pm_wakeup_dev_event pm_wakeup_ws_event power_supply_changed @@ -1342,6 +1605,8 @@ power_supply_set_property power_supply_unregister power_supply_unreg_notifier + prandom_bytes + prandom_u32 preempt_schedule preempt_schedule_notrace prepare_to_wait @@ -1356,8 +1621,14 @@ proc_dostring proc_douintvec_minmax proc_mkdir + proc_mkdir_data proc_remove + proc_set_size + proto_register + proto_unregister + pskb_expand_head __pskb_pull_tail + __put_cred put_device put_disk __put_page @@ -1366,16 +1637,30 @@ __put_task_struct put_unused_fd put_vaddr_frames + pwm_apply_state + pwmchip_add + pwmchip_remove + pwm_get_chip_data + pwm_set_chip_data queue_delayed_work_on queue_work_on + radix_tree_delete_item + radix_tree_gang_lookup + radix_tree_insert + radix_tree_iter_delete + radix_tree_lookup + radix_tree_next_chunk + radix_tree_preload ___ratelimit raw_notifier_call_chain raw_notifier_chain_register _raw_read_lock _raw_read_lock_bh + _raw_read_lock_irq _raw_read_lock_irqsave _raw_read_unlock _raw_read_unlock_bh + _raw_read_unlock_irq _raw_read_unlock_irqrestore _raw_spin_lock _raw_spin_lock_bh @@ -1387,9 +1672,11 @@ _raw_spin_unlock_irq _raw_spin_unlock_irqrestore _raw_write_lock + _raw_write_lock_bh _raw_write_lock_irq _raw_write_lock_irqsave _raw_write_unlock + _raw_write_unlock_bh _raw_write_unlock_irq _raw_write_unlock_irqrestore rb_erase @@ -1402,6 +1689,8 @@ __rcu_read_unlock rdev_get_drvdata rdev_get_id + reboot_mode + refcount_dec_not_one refcount_warn_saturate __refrigerator regcache_cache_only @@ -1419,7 +1708,9 @@ register_netdev register_netdevice register_netdevice_notifier + register_netevent_notifier register_oom_notifier + register_pernet_subsys register_pm_notifier register_reboot_notifier register_restart_handler @@ -1452,6 +1743,7 @@ regulator_enable_regmap regulator_get regulator_get_optional + regulator_get_voltage regulator_get_voltage_sel_regmap regulator_is_enabled regulator_is_enabled_regmap @@ -1460,12 +1752,16 @@ regulator_notifier_call_chain regulator_put regulator_register + regulator_set_load regulator_set_voltage regulator_set_voltage_sel_regmap regulator_unregister regulatory_hint + regulatory_set_wiphy_regd + regulatory_set_wiphy_regd_sync_rtnl release_firmware __release_region + release_sock remap_pfn_range remap_vmalloc_range remove_cpu @@ -1484,6 +1780,8 @@ rfkill_init_sw_state rfkill_register rfkill_unregister + __root_device_register + root_device_unregister root_task_group rps_needed rtc_class_close @@ -1493,13 +1791,20 @@ rtc_tm_to_time64 rtc_update_irq rtc_valid_tm + rt_mutex_destroy + __rt_mutex_init + rt_mutex_lock + rt_mutex_trylock + rt_mutex_unlock rtnl_is_locked rtnl_lock + rtnl_trylock rtnl_unlock runqueues sched_clock sched_feat_keys sched_setattr_nocheck + sched_set_fifo sched_setscheduler sched_setscheduler_nocheck sched_show_task @@ -1516,21 +1821,31 @@ schedule_timeout schedule_timeout_interruptible scnprintf + scsi_add_host_with_dma scsi_autopm_get_device scsi_autopm_put_device scsi_block_requests scsi_block_when_processing_errors + scsi_build_sense_buffer + scsi_change_queue_depth scsi_command_size_tbl scsi_compat_ioctl scsi_device_get scsi_device_put + scsi_host_alloc + scsi_host_put scsi_ioctl scsi_ioctl_block_when_processing_errors + __scsi_iterate_devices scsi_normalize_sense __scsi_print_sense scsi_register_interface + scsi_remove_host + scsi_scan_host + scsi_set_sense_information scsi_unblock_requests sdev_prefix_printk + send_sig_info seq_hex_dump seq_lseek seq_open @@ -1545,18 +1860,27 @@ seq_write set_blocksize set_cpus_allowed_ptr + set_freezable set_normalized_timespec64 set_page_dirty set_page_dirty_lock __SetPageMovable + set_user_nice sg_alloc_table sg_alloc_table_from_pages + sg_copy_buffer + sg_copy_from_buffer + sg_copy_to_buffer sg_free_table sg_init_one sg_init_table + sg_miter_next + sg_miter_start + sg_miter_stop sg_next __sg_page_iter_next __sg_page_iter_start + sg_pcopy_from_buffer sg_scsi_ioctl shmem_file_setup simple_attr_open @@ -1573,11 +1897,20 @@ single_open single_open_size single_release + sk_alloc + skb_add_rx_frag + skb_checksum skb_clone + skb_coalesce_rx_frag skb_copy + skb_copy_bits + skb_copy_datagram_iter skb_copy_expand skb_dequeue skb_dequeue_tail + skb_free_datagram + __skb_get_hash + __skb_pad skb_pull skb_push skb_put @@ -1585,7 +1918,12 @@ skb_queue_purge skb_queue_tail skb_realloc_headroom + skb_recv_datagram + skb_set_owner_w + skb_store_bits skb_trim + sk_free + skip_spaces smp_call_function smp_call_function_single smp_call_on_cpu @@ -1614,15 +1952,22 @@ snd_soc_component_enable_pin snd_soc_component_force_enable_pin snd_soc_component_get_pin_status + snd_soc_component_read snd_soc_component_set_jack snd_soc_component_set_pll snd_soc_component_set_sysclk + snd_soc_component_update_bits + snd_soc_component_write snd_soc_dai_set_pll snd_soc_dai_set_sysclk snd_soc_dai_set_tdm_slot snd_soc_dapm_add_routes snd_soc_dapm_del_routes + snd_soc_dapm_disable_pin + snd_soc_dapm_disable_pin_unlocked + snd_soc_dapm_force_enable_pin_unlocked snd_soc_dapm_get_enum_double + snd_soc_dapm_get_pin_status snd_soc_dapm_get_volsw snd_soc_dapm_ignore_suspend snd_soc_dapm_kcontrol_widget @@ -1631,6 +1976,7 @@ snd_soc_dapm_put_enum_double snd_soc_dapm_put_volsw snd_soc_dapm_sync + snd_soc_dapm_sync_unlocked snd_soc_get_enum_double snd_soc_get_volsw snd_soc_get_volsw_range @@ -1639,12 +1985,15 @@ snd_soc_info_volsw snd_soc_info_volsw_range snd_soc_info_volsw_sx + snd_soc_jack_report snd_soc_lookup_component snd_soc_new_compress snd_soc_of_get_dai_link_codecs snd_soc_of_get_dai_name snd_soc_of_parse_card_name snd_soc_of_parse_daifmt + snd_soc_params_to_bclk + snd_soc_params_to_frame_size snd_soc_put_enum_double snd_soc_put_volsw snd_soc_put_volsw_range @@ -1654,23 +2003,47 @@ snd_soc_runtime_set_dai_fmt snd_soc_unregister_card snd_soc_unregister_component + snd_vendor_set_ops snprintf soc_device_register + sock_alloc_send_skb + sock_create_kern + sock_gettstamp + sock_init_data + sock_no_accept + sock_no_listen + sock_no_mmap + sock_no_sendpage + sock_no_shutdown + sock_no_socketpair + sock_queue_rcv_skb + sock_register + sock_release + sock_setsockopt + sock_unregister sock_wfree softnet_data sort __spi_alloc_controller + spi_bus_lock + spi_bus_unlock spi_controller_resume spi_controller_suspend spi_finalize_current_message spi_register_controller __spi_register_driver + spi_set_cs_timing spi_setup spi_sync + spi_sync_locked spi_unregister_controller split_page sprintf sprint_symbol + srcu_init_notifier_head + srcu_notifier_call_chain + srcu_notifier_chain_register + srcu_notifier_chain_unregister sscanf __stack_chk_fail static_key_disable @@ -1681,6 +2054,7 @@ strcasecmp strcat strchr + strchrnul strcmp strcpy strcspn @@ -1691,9 +2065,11 @@ strlen strncasecmp strncat + strnchr strncmp strncpy strncpy_from_user + strndup_user strnlen strnstr strpbrk @@ -1705,6 +2081,7 @@ submit_bio submit_bio_wait subsys_system_register + suspend_set_ops __sw_hweight32 __sw_hweight64 sync_file_create @@ -1716,6 +2093,7 @@ sysctl_sched_features sysctl_sched_latency sysfs_add_file_to_group + sysfs_add_link_to_group sysfs_create_file_ns sysfs_create_files sysfs_create_group @@ -1723,21 +2101,27 @@ sysfs_create_link sysfs_emit sysfs_emit_at + __sysfs_match_string sysfs_notify + sysfs_remove_file_from_group sysfs_remove_file_ns sysfs_remove_group sysfs_remove_groups sysfs_remove_link + sysfs_remove_link_from_group sysfs_streq sysfs_update_group sysrq_mask system_freezing_cnt system_highpri_wq system_long_wq + system_power_efficient_wq system_state system_unbound_wq system_wq sys_tz + task_active_pid_ns + __tasklet_hi_schedule tasklet_init tasklet_kill __tasklet_schedule @@ -1745,6 +2129,7 @@ __task_pid_nr_ns task_rq_lock tcpci_get_tcpm_port + tcpci_irq tcpci_register_port tcpci_unregister_port tcpm_cc_change @@ -1772,47 +2157,85 @@ time64_to_tm topology_set_thermal_pressure _totalram_pages + touch_softlockup_watchdog + __trace_bprintk + __trace_bputs trace_event_buffer_commit trace_event_buffer_reserve trace_event_ignore_this_pid trace_event_raw_init trace_event_reg trace_handle_return + __traceiter_android_rvh_arm64_serror_panic + __traceiter_android_rvh_attach_entity_load_avg + __traceiter_android_rvh_bad_mode __traceiter_android_rvh_cgroup_force_kthread_migration __traceiter_android_rvh_check_preempt_wakeup __traceiter_android_rvh_cpu_cgroup_online __traceiter_android_rvh_cpu_overutilized __traceiter_android_rvh_dequeue_task + __traceiter_android_rvh_dequeue_task_fair + __traceiter_android_rvh_detach_entity_load_avg + __traceiter_android_rvh_die_kernel_fault + __traceiter_android_rvh_do_mem_abort + __traceiter_android_rvh_do_sea + __traceiter_android_rvh_do_sp_pc_abort + __traceiter_android_rvh_do_undefinstr + __traceiter_android_rvh_enqueue_task + __traceiter_android_rvh_enqueue_task_fair __traceiter_android_rvh_find_energy_efficient_cpu __traceiter_android_rvh_irqs_disable __traceiter_android_rvh_irqs_enable + __traceiter_android_rvh_pci_d3_sleep __traceiter_android_rvh_post_init_entity_util_avg __traceiter_android_rvh_preempt_disable __traceiter_android_rvh_preempt_enable + __traceiter_android_rvh_remove_entity_load_avg __traceiter_android_rvh_sched_fork + __traceiter_android_rvh_select_task_rq_fair __traceiter_android_rvh_select_task_rq_rt __traceiter_android_rvh_set_iowait + __traceiter_android_rvh_set_task_cpu __traceiter_android_rvh_typec_tcpci_chk_contaminant __traceiter_android_rvh_typec_tcpci_get_vbus __traceiter_android_rvh_uclamp_eff_get + __traceiter_android_rvh_uclamp_rq_util_with + __traceiter_android_rvh_ufs_complete_init __traceiter_android_rvh_ufs_reprogram_all_keys + __traceiter_android_rvh_update_blocked_fair + __traceiter_android_rvh_update_load_avg + __traceiter_android_rvh_update_rt_rq_load_avg __traceiter_android_rvh_util_est_update __traceiter_android_vh_arch_set_freq_scale + __traceiter_android_vh_cma_alloc_finish + __traceiter_android_vh_cma_alloc_start __traceiter_android_vh_cpu_idle_enter __traceiter_android_vh_cpu_idle_exit + __traceiter_android_vh_dup_task_struct __traceiter_android_vh_enable_thermal_genl_check __traceiter_android_vh_ep_create_wakeup_source + __traceiter_android_vh_get_user_pages + __traceiter_android_vh___get_user_pages_remote + __traceiter_android_vh_internal_get_user_pages_fast __traceiter_android_vh_ipi_stop + __traceiter_android_vh_meminfo_proc_show __traceiter_android_vh_of_i2c_get_board_info + __traceiter_android_vh_pagecache_get_page + __traceiter_android_vh_pin_user_pages + __traceiter_android_vh_rmqueue + __traceiter_android_vh_scheduler_tick __traceiter_android_vh_setscheduler_uclamp __traceiter_android_vh_snd_compr_use_pause_in_drain + __traceiter_android_vh_sound_usb_support_cpu_suspend __traceiter_android_vh_sysrq_crash __traceiter_android_vh_thermal_pm_notify_suspend __traceiter_android_vh_timerfd_create + __traceiter_android_vh_try_grab_compound_head __traceiter_android_vh_typec_store_partner_src_caps __traceiter_android_vh_typec_tcpci_override_toggling __traceiter_android_vh_typec_tcpm_get_timer __traceiter_android_vh_typec_tcpm_log + __traceiter_android_vh_typec_tcpm_modify_src_caps __traceiter_android_vh_ufs_check_int_errors __traceiter_android_vh_ufs_compl_command __traceiter_android_vh_ufs_fill_prdt @@ -1821,6 +2244,8 @@ __traceiter_android_vh_ufs_send_tm_command __traceiter_android_vh_ufs_send_uic_command __traceiter_android_vh_ufs_update_sysfs + __traceiter_android_vh_usb_dev_resume + __traceiter_android_vh_usb_dev_suspend __traceiter_clock_set_rate __traceiter_cpu_frequency __traceiter_device_pm_callback_end @@ -1828,6 +2253,8 @@ __traceiter_dwc3_readl __traceiter_dwc3_writel __traceiter_gpu_mem_total + __traceiter_mm_vmscan_direct_reclaim_begin + __traceiter_mm_vmscan_direct_reclaim_end __traceiter_pelt_cfs_tp __traceiter_pelt_dl_tp __traceiter_pelt_irq_tp @@ -1843,41 +2270,76 @@ __traceiter_sched_util_est_se_tp __traceiter_suspend_resume trace_output_call + __tracepoint_android_rvh_arm64_serror_panic + __tracepoint_android_rvh_attach_entity_load_avg + __tracepoint_android_rvh_bad_mode __tracepoint_android_rvh_cgroup_force_kthread_migration __tracepoint_android_rvh_check_preempt_wakeup __tracepoint_android_rvh_cpu_cgroup_online __tracepoint_android_rvh_cpu_overutilized __tracepoint_android_rvh_dequeue_task + __tracepoint_android_rvh_dequeue_task_fair + __tracepoint_android_rvh_detach_entity_load_avg + __tracepoint_android_rvh_die_kernel_fault + __tracepoint_android_rvh_do_mem_abort + __tracepoint_android_rvh_do_sea + __tracepoint_android_rvh_do_sp_pc_abort + __tracepoint_android_rvh_do_undefinstr + __tracepoint_android_rvh_enqueue_task + __tracepoint_android_rvh_enqueue_task_fair __tracepoint_android_rvh_find_energy_efficient_cpu __tracepoint_android_rvh_irqs_disable __tracepoint_android_rvh_irqs_enable + __tracepoint_android_rvh_pci_d3_sleep __tracepoint_android_rvh_post_init_entity_util_avg __tracepoint_android_rvh_preempt_disable __tracepoint_android_rvh_preempt_enable + __tracepoint_android_rvh_remove_entity_load_avg __tracepoint_android_rvh_sched_fork + __tracepoint_android_rvh_select_task_rq_fair __tracepoint_android_rvh_select_task_rq_rt __tracepoint_android_rvh_set_iowait + __tracepoint_android_rvh_set_task_cpu __tracepoint_android_rvh_typec_tcpci_chk_contaminant __tracepoint_android_rvh_typec_tcpci_get_vbus __tracepoint_android_rvh_uclamp_eff_get + __tracepoint_android_rvh_uclamp_rq_util_with + __tracepoint_android_rvh_ufs_complete_init __tracepoint_android_rvh_ufs_reprogram_all_keys + __tracepoint_android_rvh_update_blocked_fair + __tracepoint_android_rvh_update_load_avg + __tracepoint_android_rvh_update_rt_rq_load_avg __tracepoint_android_rvh_util_est_update __tracepoint_android_vh_arch_set_freq_scale + __tracepoint_android_vh_cma_alloc_finish + __tracepoint_android_vh_cma_alloc_start __tracepoint_android_vh_cpu_idle_enter __tracepoint_android_vh_cpu_idle_exit + __tracepoint_android_vh_dup_task_struct __tracepoint_android_vh_enable_thermal_genl_check __tracepoint_android_vh_ep_create_wakeup_source + __tracepoint_android_vh_get_user_pages + __tracepoint_android_vh___get_user_pages_remote + __tracepoint_android_vh_internal_get_user_pages_fast __tracepoint_android_vh_ipi_stop + __tracepoint_android_vh_meminfo_proc_show __tracepoint_android_vh_of_i2c_get_board_info + __tracepoint_android_vh_pagecache_get_page + __tracepoint_android_vh_pin_user_pages + __tracepoint_android_vh_rmqueue + __tracepoint_android_vh_scheduler_tick __tracepoint_android_vh_setscheduler_uclamp __tracepoint_android_vh_snd_compr_use_pause_in_drain + __tracepoint_android_vh_sound_usb_support_cpu_suspend __tracepoint_android_vh_sysrq_crash __tracepoint_android_vh_thermal_pm_notify_suspend __tracepoint_android_vh_timerfd_create + __tracepoint_android_vh_try_grab_compound_head __tracepoint_android_vh_typec_store_partner_src_caps __tracepoint_android_vh_typec_tcpci_override_toggling __tracepoint_android_vh_typec_tcpm_get_timer __tracepoint_android_vh_typec_tcpm_log + __tracepoint_android_vh_typec_tcpm_modify_src_caps __tracepoint_android_vh_ufs_check_int_errors __tracepoint_android_vh_ufs_compl_command __tracepoint_android_vh_ufs_fill_prdt @@ -1886,6 +2348,8 @@ __tracepoint_android_vh_ufs_send_tm_command __tracepoint_android_vh_ufs_send_uic_command __tracepoint_android_vh_ufs_update_sysfs + __tracepoint_android_vh_usb_dev_resume + __tracepoint_android_vh_usb_dev_suspend __tracepoint_clock_set_rate __tracepoint_cpu_frequency __tracepoint_device_pm_callback_end @@ -1893,6 +2357,8 @@ __tracepoint_dwc3_readl __tracepoint_dwc3_writel __tracepoint_gpu_mem_total + __tracepoint_mm_vmscan_direct_reclaim_begin + __tracepoint_mm_vmscan_direct_reclaim_end __tracepoint_pelt_cfs_tp __tracepoint_pelt_dl_tp __tracepoint_pelt_irq_tp @@ -1941,6 +2407,7 @@ uart_write_wakeup uclamp_eff_value __udelay + udp4_hwcsum ufshcd_bkops_ctrl ufshcd_hold ufshcd_pltfrm_init @@ -1965,7 +2432,9 @@ unregister_netdev unregister_netdevice_notifier unregister_netdevice_queue + unregister_netevent_notifier unregister_oom_notifier + unregister_pernet_subsys unregister_pm_notifier unregister_reboot_notifier unregister_shrinker @@ -1975,26 +2444,35 @@ unregister_virtio_driver up update_devfreq + ___update_load_avg __update_load_avg_blocked_se + ___update_load_sum update_rq_clock up_read up_write usb_add_function usb_add_hcd + usb_assign_descriptors usb_copy_descriptors __usb_create_hcd usb_disabled usb_enable_autosuspend + usb_ep_alloc_request usb_ep_autoconfig usb_ep_disable usb_ep_enable + usb_ep_free_request + usb_ep_queue + usb_free_all_descriptors usb_function_register usb_function_unregister usb_gadget_set_state + usb_gstrings_attach usb_hcd_is_primary_hcd usb_hcd_platform_shutdown usb_hub_find_child usb_interface_id + usb_os_desc_prepare_interf_dir usb_otg_state_string usb_put_function_instance usb_put_hcd @@ -2008,6 +2486,7 @@ usb_unregister_notify __usecs_to_jiffies usleep_range + uuid_gen v4l2_ctrl_handler_free v4l2_ctrl_handler_init_class v4l2_ctrl_log_status @@ -2044,6 +2523,7 @@ vb2_common_vm_ops vb2_create_framevec vb2_destroy_framevec + vb2_dma_sg_memops vb2_dqbuf vb2_fop_mmap vb2_fop_poll @@ -2089,6 +2569,7 @@ virtqueue_kick_prepare virtqueue_notify vmalloc + vmalloc_to_page vmalloc_user vmap vmf_insert_pfn_prot @@ -2099,12 +2580,16 @@ vring_del_virtqueue vring_interrupt vring_new_virtqueue + vscnprintf vsnprintf vsscanf vunmap vzalloc + vzalloc_node wait_for_completion + wait_for_completion_interruptible wait_for_completion_interruptible_timeout + wait_for_completion_killable wait_for_completion_timeout wait_woken __wake_up @@ -2114,7 +2599,9 @@ wakeup_source_create wakeup_source_destroy wakeup_source_register + wakeup_source_remove wakeup_source_unregister + __wake_up_sync __warn_printk watchdog_init_timeout watchdog_register_device @@ -2125,8 +2612,22 @@ wiphy_new_nm wiphy_register wiphy_unregister + wireless_send_event woken_wake_function work_busy + __xa_alloc + xa_clear_mark + xa_destroy + xa_erase + xa_find + xa_find_after + xa_get_mark + xa_load + xa_set_mark + xa_store + __xfrm_state_destroy + xfrm_state_lookup_byspi + xfrm_stateonly_find xhci_address_device xhci_bus_resume xhci_bus_suspend diff --git a/android/abi_gki_aarch64_hikey960 b/android/abi_gki_aarch64_hikey960 index 0609f21fcd3b7c8079188e9df073b25d77ccf4b5..eb9e5e380990982b0f7f73c8ae34f204bfd5e0b3 100644 --- a/android/abi_gki_aarch64_hikey960 +++ b/android/abi_gki_aarch64_hikey960 @@ -791,7 +791,6 @@ of_get_next_available_child of_parse_phandle of_property_read_u64 - __page_pinner_migration_failed __put_page put_unused_fd rb_erase diff --git a/android/abi_gki_aarch64_virtual_device b/android/abi_gki_aarch64_virtual_device index c7028c2f267aac1fdd0b2cdd0e99fcc45c9b059e..9968c643ed6bdfb5a4626ac2cc303a2156c054b1 100644 --- a/android/abi_gki_aarch64_virtual_device +++ b/android/abi_gki_aarch64_virtual_device @@ -13,11 +13,7 @@ arm64_const_caps_ready arm64_use_ng_mappings bcmp - bio_alloc_bioset - bio_endio - blk_alloc_queue blk_cleanup_queue - blk_queue_flag_clear blk_queue_flag_set blk_queue_io_min blk_queue_io_opt @@ -25,15 +21,13 @@ blk_queue_max_discard_sectors blk_queue_max_write_zeroes_sectors blk_queue_physical_block_size - blk_queue_write_cache - blk_status_to_errno bpf_trace_run2 bpf_trace_run3 + bt_err + bt_info + bt_warn cancel_delayed_work_sync cancel_work_sync - capable - cfg80211_inform_bss_data - cfg80211_put_bss __cfi_slowpath __check_object_size __class_create @@ -55,26 +49,22 @@ cpumask_next cpu_number __cpu_online_mask - __cpu_possible_mask crypto_register_alg crypto_register_scomp crypto_unregister_alg crypto_unregister_scomp - _ctype debugfs_create_dir debugfs_create_file debugfs_remove delayed_work_timer_fn del_gendisk del_timer - del_timer_sync destroy_workqueue dev_close _dev_err device_add_disk device_create - device_initialize - device_register + device_init_wakeup device_release_driver device_unregister _dev_info @@ -86,15 +76,19 @@ devm_request_threaded_irq _dev_notice dev_queue_xmit - dev_set_name _dev_warn + dma_alloc_attrs dma_buf_export + dma_fence_context_alloc + dma_fence_init + dma_fence_release + dma_fence_signal_locked + dma_free_attrs dma_map_sg_attrs dma_set_coherent_mask dma_set_mask dma_sync_sg_for_device dma_unmap_sg_attrs - down_read down_write ether_setup ethtool_op_get_link @@ -110,20 +104,19 @@ free_netdev __free_pages free_pages - fs_bio_set get_device __get_free_pages get_random_bytes get_unused_fd_flags gic_nonsecure_priorities - hwrng_register - hwrng_unregister + hci_alloc_dev + __hci_cmd_sync + hci_free_dev + hci_recv_frame + hci_register_dev + hci_unregister_dev ida_alloc_range ida_free - idr_alloc - idr_destroy - idr_remove - __init_rwsem __init_swait_queue_head init_timer_key init_wait_entry @@ -136,12 +129,14 @@ input_register_device input_set_abs_params input_unregister_device + __ioremap iounmap jiffies jiffies_to_msecs kasan_flag_enabled kfree kfree_skb + kimage_voffset __kmalloc kmalloc_caches kmem_cache_alloc @@ -150,21 +145,14 @@ kmem_cache_destroy kmem_cache_free kmemdup - kobject_add - kobject_init - kobject_put kobject_uevent - krealloc kstrdup kstrndup - kstrtobool kstrtoint kstrtouint - kstrtoull ktime_get ktime_get_mono_fast_ns ktime_get_raw_ts64 - ktime_get_real_seconds ktime_get_real_ts64 ktime_get_ts64 ktime_get_with_offset @@ -184,7 +172,6 @@ memstart_addr misc_deregister misc_register - mod_timer module_layout module_put __msecs_to_jiffies @@ -210,15 +197,11 @@ nf_conntrack_destroy no_llseek nonseekable_open - noop_llseek nr_cpu_ids __num_online_cpus - of_find_property - of_get_property of_property_read_variable_u32_array - __page_pinner_migration_failed + __page_pinner_put_page param_ops_bool - param_ops_charp param_ops_int param_ops_uint passthru_features_check @@ -227,29 +210,32 @@ pci_read_config_byte pci_read_config_dword __pci_register_driver + pci_release_region + pci_request_region pci_set_master pci_unregister_driver __per_cpu_offset perf_trace_buf_alloc perf_trace_run_bpf_submit + platform_device_add + platform_device_alloc + platform_device_del + platform_device_put platform_device_unregister __platform_driver_register platform_driver_unregister platform_get_irq platform_get_resource pm_runtime_allow - __pm_runtime_disable - pm_runtime_enable pm_runtime_force_resume pm_runtime_force_suspend - __pm_runtime_idle __pm_runtime_resume pm_runtime_set_autosuspend_delay __pm_runtime_suspend __pm_runtime_use_autosuspend + pm_wakeup_dev_event preempt_schedule preempt_schedule_notrace - prepare_to_wait prepare_to_wait_event printk put_device @@ -281,8 +267,9 @@ __regmap_init regmap_read regmap_write + release_firmware remap_pfn_range - __request_module + request_firmware request_threaded_irq revalidate_disk_size rtnl_link_register @@ -292,11 +279,7 @@ schedule schedule_timeout scnprintf - seq_lseek - seq_open seq_printf - seq_read - seq_release serio_close serio_interrupt serio_open @@ -304,7 +287,6 @@ __serio_register_driver __serio_register_port serio_unregister_driver - set_disk_ro sg_alloc_table sg_free_table sg_init_one @@ -313,41 +295,42 @@ skb_add_rx_frag skb_clone skb_dequeue + skb_pull skb_push skb_put skb_queue_tail snd_card_free snd_card_new snd_card_register - snd_ctl_add - snd_ctl_enum_info - snd_ctl_new1 snd_ctl_sync_vmaster snd_device_new snd_jack_new snd_jack_report snd_pci_quirk_lookup snd_pcm_add_chmap_ctls + snd_pcm_hw_constraint_integer snd_pcm_new snd_pcm_period_elapsed + snd_pcm_set_managed_buffer_all snd_pcm_set_ops snprintf sort sprintf + sscanf __stack_chk_fail + strchr strcmp strcpy strlcpy strlen strncmp strncpy - strsep - submit_bio + strscpy + sync_file_create synchronize_rcu sysfs_create_group __sysfs_match_string sysfs_remove_group - sysfs_remove_link system_wq trace_event_buffer_commit trace_event_buffer_reserve @@ -363,7 +346,6 @@ __tracepoint_rwmmio_write trace_raw_output_prep trace_seq_printf - try_module_get __udelay unlock_page unregister_blkdev @@ -372,9 +354,16 @@ unregister_netdevice_queue unregister_virtio_device unregister_virtio_driver - up_read up_write - __usecs_to_jiffies + usb_alloc_urb + usb_anchor_urb + usb_control_msg + usb_deregister + usb_free_urb + usb_kill_anchored_urbs + usb_register_driver + usb_submit_urb + usb_unanchor_urb usleep_range vabits_actual vfree @@ -395,6 +384,7 @@ virtqueue_kick virtqueue_kick_prepare virtqueue_notify + vmalloc_to_page vring_create_virtqueue vring_del_virtqueue vring_interrupt @@ -408,7 +398,6 @@ amba_request_regions clk_get clk_put - __ioremap serio_unregister_port # required by armmmci.ko @@ -435,10 +424,12 @@ mmc_remove_host mmc_request_done mmc_send_tuning + of_get_property pinctrl_lookup_state pinctrl_pm_select_sleep_state pinctrl_select_default_state pinctrl_select_state + __pm_runtime_idle regulator_disable regulator_enable reset_control_assert @@ -447,6 +438,46 @@ sg_miter_start sg_miter_stop +# required by btintel.ko + bt_to_errno + hci_cmd_sync + request_firmware_direct + +# required by btusb.ko + bit_wait_timeout + btbcm_set_bdaddr + btbcm_setup_apple + btbcm_setup_patchram + device_wakeup_disable + disable_irq + disable_irq_nosync + enable_irq + gpiod_get_optional + gpiod_put + gpiod_set_value_cansleep + __hci_cmd_sync_ev + hci_recv_diag + irq_modify_status + irq_set_irq_wake + of_irq_get_byname + of_match_device + of_property_read_variable_u16_array + out_of_line_wait_on_bit_timeout + pm_system_wakeup + usb_autopm_get_interface + usb_autopm_put_interface + usb_bulk_msg + usb_driver_claim_interface + usb_driver_release_interface + usb_enable_autosuspend + usb_get_from_anchor + usb_ifnum_to_if + usb_match_id + usb_queue_reset_device + usb_scuttle_anchored_urbs + usb_set_interface + wake_up_bit + # required by clk-vexpress-osc.ko clk_hw_set_rate_range devm_clk_hw_register @@ -463,26 +494,9 @@ netdev_master_upper_dev_link rtnl_is_locked -# required by gnss-cmdline.ko - bus_find_device - device_find_child - device_match_name - platform_bus_type - strstr - -# required by gnss-serial.ko - gnss_allocate_device - gnss_deregister_device - gnss_insert_raw - gnss_put_device - gnss_register_device - serdev_device_close - serdev_device_open - serdev_device_set_baudrate - serdev_device_set_flow_control - serdev_device_wait_until_sent - serdev_device_write - serdev_device_write_wakeup +# required by goldfish_address_space.ko + memremap + memunmap # required by goldfish_battery.ko power_supply_changed @@ -490,6 +504,17 @@ power_supply_register power_supply_unregister +# required by goldfish_pipe.ko + get_user_pages_fast + set_page_dirty + tasklet_init + tasklet_kill + __tasklet_schedule + +# required by goldfish_sync.ko + dma_fence_default_wait + dma_fence_free + # required by gs_usb.ko alloc_candev_mqs alloc_can_err_skb @@ -504,35 +529,13 @@ register_candev unregister_candev usb_alloc_coherent - usb_alloc_urb - usb_anchor_urb - usb_control_msg - usb_deregister usb_free_coherent - usb_free_urb - usb_kill_anchored_urbs - usb_register_driver - usb_submit_urb - usb_unanchor_urb # required by hci_vhci.ko - bt_err _copy_from_iter_full - hci_alloc_dev - hci_free_dev - hci_recv_frame - hci_register_dev - hci_unregister_dev - skb_pull skb_queue_head skb_queue_purge -# required by ledtrig-audio.ko - led_set_brightness_nosleep - led_trigger_event - led_trigger_register - led_trigger_unregister - # required by lzo-rle.ko lzorle1x_1_compress @@ -577,6 +580,7 @@ ieee80211_scan_completed ieee80211_stop_queues ieee80211_stop_tx_ba_cb_irqsafe + ieee80211_tx_prepare_skb ieee80211_tx_status_irqsafe ieee80211_unregister_hw ieee80211_wake_queues @@ -610,93 +614,12 @@ unregister_pernet_device wiphy_apply_custom_regulatory -# required by md-mod.ko - ack_all_badblocks - alloc_page_buffers - _atomic_dec_and_lock - autoremove_wake_function - badblocks_clear - badblocks_exit - badblocks_init - badblocks_set - badblocks_show - badblocks_store - bdev_check_media_change - bdevname - bdev_read_only - bd_link_disk_holder - bd_unlink_disk_holder - bio_add_page - bio_associate_blkg - bio_put - bioset_exit - bioset_init - blkdev_get_by_dev - blkdev_put - blk_finish_plug - blk_queue_split - blk_register_region - blk_set_stacking_limits - blk_start_plug - blk_sync_queue - blk_unregister_region - bmap - fget - file_path - flush_signals - free_buffer_head - int_pow - invalidate_bdev - invalidate_mapping_pages - kernel_sigaction - kernfs_find_and_get_ns - kernfs_notify - kernfs_put - kobject_del - kobject_get - kstrtoll - kthread_create_on_node - kthread_parkme - kthread_should_park - kthread_should_stop - kthread_stop - mempool_alloc - mempool_create - mempool_destroy - mempool_exit - mempool_free - mempool_init - mempool_kfree - mempool_kmalloc - mutex_trylock - part_end_io_acct - part_start_io_acct - percpu_ref_exit - percpu_ref_init - percpu_ref_is_zero - percpu_ref_switch_to_atomic_sync - percpu_ref_switch_to_percpu - proc_create - proc_dointvec - register_reboot_notifier - register_sysctl_table - remove_proc_entry - seq_file_path - simple_strtoul - skip_spaces - strreplace - submit_bh - submit_bio_wait - sync_blockdev - sysfs_create_link - unregister_reboot_notifier - unregister_sysctl_table - vfs_fsync - wake_up_process - # required by nd_virtio.ko + bio_alloc_bioset bio_chain bio_clone_blkg_association + fs_bio_set + submit_bio # required by net_failover.ko call_netdevice_notifiers @@ -713,10 +636,24 @@ netdev_lower_state_changed netdev_pick_tx pci_bus_type + vlan_uses_dev + vlan_vid_add + vlan_vid_del + vlan_vids_add_by_dev + vlan_vids_del_by_dev + +# required by open-dice.ko + devm_memremap + devm_memunmap + of_reserved_mem_lookup + __platform_driver_probe + simple_read_from_buffer + vm_iomap_memory # required by psmouse.ko bus_register_notifier bus_unregister_notifier + del_timer_sync device_add_groups device_create_file device_remove_file @@ -735,8 +672,9 @@ input_mt_report_slot_state input_mt_sync_frame input_set_capability + kstrtobool kstrtou8 - pm_wakeup_dev_event + mod_timer ps2_begin_command ps2_cmd_aborted ps2_command @@ -750,6 +688,7 @@ serio_rescan serio_unregister_child_port strcasecmp + strsep # required by pulse8-cec.ko cec_allocate_adapter @@ -760,23 +699,19 @@ cec_s_phys_addr cec_transmit_attempt_done_ts cec_unregister_adapter - strscpy wait_for_completion_timeout # required by rtc-test.ko add_timer - device_init_wakeup devm_rtc_allocate_device - platform_device_add - platform_device_alloc - platform_device_del - platform_device_put + ktime_get_real_seconds __rtc_register_device rtc_time64_to_tm rtc_tm_to_time64 rtc_update_irq # required by slcan.ko + capable hex_asc_upper hex_to_bin msleep_interruptible @@ -786,9 +721,8 @@ tty_unregister_ldisc # required by snd-hda-codec-generic.ko - devm_led_classdev_register_ext + _ctype snd_ctl_boolean_stereo_info - strchr strlcat __sw_hweight32 @@ -801,31 +735,36 @@ get_device_system_crosststamp kvasprintf ns_to_timespec64 + __pm_runtime_disable + pm_runtime_enable pm_runtime_forbid __printk_ratelimit regcache_mark_dirty + __request_module snd_card_rw_proc_new snd_component_add + snd_ctl_add _snd_ctl_add_follower snd_ctl_add_vmaster_hook snd_ctl_apply_vmaster_followers snd_ctl_boolean_mono_info + snd_ctl_enum_info snd_ctl_find_id snd_ctl_make_virtual_master + snd_ctl_new1 snd_ctl_remove snd_device_disconnect snd_device_free snd_jack_set_key - snd_pcm_hw_constraint_integer snd_pcm_hw_constraint_minmax snd_pcm_hw_constraint_step snd_pcm_hw_limit_rates snd_pcm_lib_default_mmap - snd_pcm_set_managed_buffer_all snd_pcm_set_sync snd_pcm_std_chmaps strncat timecounter_read + try_module_get # required by snd-hda-core.ko add_uevent_var @@ -833,10 +772,17 @@ bus_unregister device_add device_del + device_initialize + dev_set_name kasprintf + kobject_add kobject_create_and_add + kobject_init + kobject_put + krealloc pm_runtime_get_if_active __pm_runtime_set_status + prepare_to_wait regcache_cache_only regcache_sync regmap_update_bits_base @@ -852,6 +798,7 @@ param_array_ops param_get_int param_ops_bint + param_ops_charp param_set_int pci_dev_put pci_disable_msi @@ -885,34 +832,11 @@ vmap vunmap -# required by tpm.ko - alloc_chrdev_region - cdev_device_add - cdev_device_del - cdev_init - compat_only_sysfs_link_entry_to_kobj - devm_add_action - efi - efi_tpm_final_log_size - hash_digest_size - idr_get_next - idr_replace - jiffies_to_usecs - memchr_inv - memremap - memunmap - of_property_match_string - pm_suspend_global_flags - securityfs_create_dir - securityfs_create_file - securityfs_remove - seq_putc - seq_write - unregister_chrdev_region - -# required by tpm_vtpm_proxy.ko - anon_inode_getfile - compat_ptr_ioctl +# required by usbip-core.ko + iov_iter_kvec + param_ops_ulong + print_hex_dump + sock_recvmsg # required by vcan.ko sock_efree @@ -923,19 +847,52 @@ devres_free of_device_is_compatible of_find_compatible_node + of_find_property of_get_next_parent of_parse_phandle of_platform_populate of_root + __usecs_to_jiffies # required by vexpress-sysreg.ko bgpio_init devm_gpiochip_add_data_with_key devm_mfd_add_devices +# required by vhci-hcd.ko + kernel_sendmsg + kernel_sock_shutdown + kstrtoll + kthread_create_on_node + kthread_should_stop + kthread_stop + platform_bus + platform_device_add_data + sockfd_lookup + sysfs_remove_link + usb_add_hcd + usb_create_hcd + usb_create_shared_hcd + usb_disabled + usb_get_dev + usb_hcd_check_unlink_urb + usb_hcd_giveback_urb + usb_hcd_is_primary_hcd + usb_hcd_link_urb_to_ep + usb_hcd_poll_rh_status + usb_hcd_resume_root_hub + usb_hcd_unlink_urb_from_ep + usb_put_dev + usb_put_hcd + usb_remove_hcd + usb_speed_string + wake_up_process + # required by virt_wifi.ko cfg80211_connect_done cfg80211_disconnected + cfg80211_inform_bss_data + cfg80211_put_bss cfg80211_scan_done __dev_get_by_index dev_printk @@ -948,18 +905,9 @@ wiphy_register wiphy_unregister -# required by virt_wifi_sim.ko - ieee80211_get_channel_khz - release_firmware - request_firmware - # required by virtio-gpu.ko __devm_request_region - dma_fence_context_alloc - dma_fence_init dma_fence_match_context - dma_fence_release - dma_fence_signal_locked dma_fence_wait_timeout dma_resv_add_excl_fence dma_resv_test_signaled_rcu @@ -1074,20 +1022,21 @@ is_vmalloc_addr kmalloc_order_trace memdup_user + noop_llseek seq_puts - sync_file_create sync_file_get_fence __traceiter_dma_fence_emit __traceiter_gpu_mem_total __tracepoint_dma_fence_emit __tracepoint_gpu_mem_total - vmalloc_to_page vmemdup_user vm_get_page_prot ww_mutex_lock_interruptible ww_mutex_unlock # required by virtio-rng.ko + hwrng_register + hwrng_unregister wait_for_completion_killable # required by virtio_blk.ko @@ -1110,9 +1059,12 @@ blk_queue_max_hw_sectors blk_queue_max_segments blk_queue_max_segment_size + blk_queue_write_cache blk_rq_map_kern __blk_rq_map_sg + blk_status_to_errno set_capacity_revalidate_and_notify + set_disk_ro string_get_size virtio_max_dma_size @@ -1121,8 +1073,6 @@ cdev_alloc cdev_del device_destroy - dma_alloc_attrs - dma_free_attrs fasync_helper freezing_slow_path hvc_alloc @@ -1136,6 +1086,8 @@ pipe_unlock __refrigerator __register_chrdev + seq_lseek + seq_read single_open single_release __splice_from_pipe @@ -1144,9 +1096,9 @@ # required by virtio_mmio.ko device_for_each_child + device_register devm_platform_ioremap_resource platform_device_register_full - sscanf # required by virtio_net.ko bpf_dispatcher_xdp_func @@ -1223,9 +1175,7 @@ pci_iomap_range pci_irq_get_affinity pci_irq_vector - pci_release_region pci_release_selected_regions - pci_request_region pci_request_selected_regions pci_vfs_assigned synchronize_irq @@ -1239,14 +1189,8 @@ nvdimm_pmem_region_create # required by virtio_snd.ko - snd_ctl_notify snd_pcm_format_physical_width - snd_pcm_lib_free_pages snd_pcm_lib_ioctl - snd_pcm_lib_malloc_pages - snd_pcm_lib_preallocate_pages - _snd_pcm_stream_lock_irqsave - snd_pcm_stream_unlock_irqrestore wait_for_completion_interruptible_timeout # required by vmw_vsock_virtio_transport.ko @@ -1288,8 +1232,12 @@ __alloc_percpu bdget_disk bdput + bio_endio + blk_alloc_queue + blk_queue_flag_clear __class_register class_unregister + __cpu_possible_mask crypto_alloc_base crypto_comp_compress crypto_comp_decompress @@ -1297,16 +1245,23 @@ crypto_has_alg disk_end_io_acct disk_start_io_acct + down_read flush_dcache_page free_percpu fsync_bdev + idr_alloc + idr_destroy idr_find idr_for_each + idr_remove + __init_rwsem kstrtou16 + kstrtoull memset64 mutex_is_locked page_endio sysfs_streq + up_read vzalloc # required by zsmalloc.ko @@ -1329,3 +1284,4 @@ register_shrinker __SetPageMovable unregister_shrinker + diff --git a/android/abi_gki_modules_exports b/android/abi_gki_modules_exports new file mode 100644 index 0000000000000000000000000000000000000000..88b5a4b3adce4c4a30ff1446288c261fec5f157f --- /dev/null +++ b/android/abi_gki_modules_exports @@ -0,0 +1 @@ +[abi_symbol_list] diff --git a/android/abi_gki_modules_protected b/android/abi_gki_modules_protected new file mode 100644 index 0000000000000000000000000000000000000000..88b5a4b3adce4c4a30ff1446288c261fec5f157f --- /dev/null +++ b/android/abi_gki_modules_protected @@ -0,0 +1 @@ +[abi_symbol_list] diff --git a/android/gki_system_dlkm_modules b/android/gki_system_dlkm_modules new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/arch/Kconfig b/arch/Kconfig index 26916e0fe179763a66a53287432853dce3e3320f..d39f92e9cf51e0de0d30aa88571baebe90b03ce8 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -642,9 +642,7 @@ config ARCH_SUPPORTS_LTO_CLANG_THIN config HAS_LTO_CLANG def_bool y # Clang >= 11: https://github.com/ClangBuiltLinux/linux/issues/510 - depends on CC_IS_CLANG && CLANG_VERSION >= 110000 && LD_IS_LLD - depends on $(success,test $(LLVM) -eq 1) - depends on $(success,test $(LLVM_IAS) -eq 1) + depends on CC_IS_CLANG && CLANG_VERSION >= 110000 && LD_IS_LLD && AS_IS_LLVM depends on $(success,$(NM) --help | head -n 1 | grep -qi llvm) depends on $(success,$(AR) --help | head -n 1 | grep -qi llvm) depends on ARCH_SUPPORTS_LTO_CLANG @@ -1146,6 +1144,9 @@ config RELR config ARCH_HAS_MEM_ENCRYPT bool +config ARCH_HAS_CC_PLATFORM + bool + config HAVE_SPARSE_SYSCALL_NR bool help @@ -1179,6 +1180,15 @@ config ARCH_SPLIT_ARG64 If a 32-bit architecture requires 64-bit arguments to be split into pairs of 32-bit arguments, select this option. +config ARCH_HAS_NONLEAF_PMD_YOUNG + bool + depends on PGTABLE_LEVELS > 2 + help + Architectures that select this option are capable of setting the + accessed bit in non-leaf PMD entries when using them as part of linear + address translations. Page table walkers that clear the accessed bit + may use this capability to reduce their search space. + source "kernel/gcov/Kconfig" source "scripts/gcc-plugins/Kconfig" diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c index f3a58c1fdd55e39b35e76590ac9e845b74bd79ea..cb64e4797d2a89f2b15f8f44bab6be2203258e92 100644 --- a/arch/alpha/kernel/smp.c +++ b/arch/alpha/kernel/smp.c @@ -166,7 +166,6 @@ smp_callin(void) DBGS(("smp_callin: commencing CPU %d current %p active_mm %p\n", cpuid, current, current->active_mm)); - preempt_disable(); cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); } diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl index ee7b01bb7346c9c57843e68a3fff34ad703601fa..95b996b6c4f12f5c173d26ecf3bb073d2e01fffe 100644 --- a/arch/alpha/kernel/syscalls/syscall.tbl +++ b/arch/alpha/kernel/syscalls/syscall.tbl @@ -480,3 +480,5 @@ 548 common pidfd_getfd sys_pidfd_getfd 549 common faccessat2 sys_faccessat2 550 common process_madvise sys_process_madvise +# 557 reserved for memfd_secret +558 common process_mrelease sys_process_mrelease diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c index 52906d3145371b4a5a0bbaba5d1d9239480465da..db0e104d68355b6dd0a35524960983291ccda9c4 100644 --- a/arch/arc/kernel/smp.c +++ b/arch/arc/kernel/smp.c @@ -189,7 +189,6 @@ void start_kernel_secondary(void) pr_info("## CPU%u LIVE ##: Executing Code...\n", cpu); local_irq_enable(); - preempt_disable(); cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); } diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index e485c48134414959da5befdc78b7a7368805c5cb..31a9758c91b56cf03b14db0632e927e5da9afbaf 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -88,6 +88,7 @@ config ARM select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL select HAVE_FUNCTION_GRAPH_TRACER if !THUMB2_KERNEL && !CC_IS_CLANG select HAVE_FUNCTION_TRACER if !XIP_KERNEL + select HAVE_FUTEX_CMPXCHG if FUTEX select HAVE_GCC_PLUGINS select HAVE_HW_BREAKPOINT if PERF_EVENTS && (CPU_V6 || CPU_V6K || CPU_V7) select HAVE_IDE if PCI || ISA || PCMCIA diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug index 8986a91a6f31b82c2ef81f7b7a9b4c425425a9a6..dd1cf70353986045b55accb19ee4cff56e8b3d91 100644 --- a/arch/arm/Kconfig.debug +++ b/arch/arm/Kconfig.debug @@ -400,12 +400,12 @@ choice Say Y here if you want kernel low-level debugging support on i.MX25. - config DEBUG_IMX21_IMX27_UART - bool "i.MX21 and i.MX27 Debug UART" - depends on SOC_IMX21 || SOC_IMX27 + config DEBUG_IMX27_UART + bool "i.MX27 Debug UART" + depends on SOC_IMX27 help Say Y here if you want kernel low-level debugging support - on i.MX21 or i.MX27. + on i.MX27. config DEBUG_IMX28_UART bool "i.MX28 Debug UART" @@ -1523,7 +1523,7 @@ config DEBUG_IMX_UART_PORT int "i.MX Debug UART Port Selection" depends on DEBUG_IMX1_UART || \ DEBUG_IMX25_UART || \ - DEBUG_IMX21_IMX27_UART || \ + DEBUG_IMX27_UART || \ DEBUG_IMX31_UART || \ DEBUG_IMX35_UART || \ DEBUG_IMX50_UART || \ @@ -1591,12 +1591,12 @@ config DEBUG_LL_INCLUDE default "debug/icedcc.S" if DEBUG_ICEDCC default "debug/imx.S" if DEBUG_IMX1_UART || \ DEBUG_IMX25_UART || \ - DEBUG_IMX21_IMX27_UART || \ + DEBUG_IMX27_UART || \ DEBUG_IMX31_UART || \ DEBUG_IMX35_UART || \ DEBUG_IMX50_UART || \ DEBUG_IMX51_UART || \ - DEBUG_IMX53_UART ||\ + DEBUG_IMX53_UART || \ DEBUG_IMX6Q_UART || \ DEBUG_IMX6SL_UART || \ DEBUG_IMX6SX_UART || \ diff --git a/arch/arm/Makefile b/arch/arm/Makefile index e15f76ca2887cbd7fe3ecfcb0c214682d75f2484..0e5a8765e60b3f7afaec708ce767d213c6e0bcc2 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -60,15 +60,15 @@ KBUILD_CFLAGS += $(call cc-option,-fno-ipa-sra) # Note that GCC does not numerically define an architecture version # macro, but instead defines a whole series of macros which makes # testing for a specific architecture or later rather impossible. -arch-$(CONFIG_CPU_32v7M) =-D__LINUX_ARM_ARCH__=7 -march=armv7-m -Wa,-march=armv7-m -arch-$(CONFIG_CPU_32v7) =-D__LINUX_ARM_ARCH__=7 $(call cc-option,-march=armv7-a,-march=armv5t -Wa$(comma)-march=armv7-a) -arch-$(CONFIG_CPU_32v6) =-D__LINUX_ARM_ARCH__=6 $(call cc-option,-march=armv6,-march=armv5t -Wa$(comma)-march=armv6) +arch-$(CONFIG_CPU_32v7M) =-D__LINUX_ARM_ARCH__=7 -march=armv7-m +arch-$(CONFIG_CPU_32v7) =-D__LINUX_ARM_ARCH__=7 -march=armv7-a +arch-$(CONFIG_CPU_32v6) =-D__LINUX_ARM_ARCH__=6 -march=armv6 # Only override the compiler option if ARMv6. The ARMv6K extensions are # always available in ARMv7 ifeq ($(CONFIG_CPU_32v6),y) -arch-$(CONFIG_CPU_32v6K) =-D__LINUX_ARM_ARCH__=6 $(call cc-option,-march=armv6k,-march=armv5t -Wa$(comma)-march=armv6k) +arch-$(CONFIG_CPU_32v6K) =-D__LINUX_ARM_ARCH__=6 -march=armv6k endif -arch-$(CONFIG_CPU_32v5) =-D__LINUX_ARM_ARCH__=5 $(call cc-option,-march=armv5te,-march=armv4t) +arch-$(CONFIG_CPU_32v5) =-D__LINUX_ARM_ARCH__=5 -march=armv5te arch-$(CONFIG_CPU_32v4T) =-D__LINUX_ARM_ARCH__=4 -march=armv4t arch-$(CONFIG_CPU_32v4) =-D__LINUX_ARM_ARCH__=4 -march=armv4 arch-$(CONFIG_CPU_32v3) =-D__LINUX_ARM_ARCH__=3 -march=armv3m @@ -82,7 +82,7 @@ tune-$(CONFIG_CPU_ARM720T) =-mtune=arm7tdmi tune-$(CONFIG_CPU_ARM740T) =-mtune=arm7tdmi tune-$(CONFIG_CPU_ARM9TDMI) =-mtune=arm9tdmi tune-$(CONFIG_CPU_ARM940T) =-mtune=arm9tdmi -tune-$(CONFIG_CPU_ARM946E) =$(call cc-option,-mtune=arm9e,-mtune=arm9tdmi) +tune-$(CONFIG_CPU_ARM946E) =-mtune=arm9e tune-$(CONFIG_CPU_ARM920T) =-mtune=arm9tdmi tune-$(CONFIG_CPU_ARM922T) =-mtune=arm9tdmi tune-$(CONFIG_CPU_ARM925T) =-mtune=arm9tdmi @@ -90,11 +90,11 @@ tune-$(CONFIG_CPU_ARM926T) =-mtune=arm9tdmi tune-$(CONFIG_CPU_FA526) =-mtune=arm9tdmi tune-$(CONFIG_CPU_SA110) =-mtune=strongarm110 tune-$(CONFIG_CPU_SA1100) =-mtune=strongarm1100 -tune-$(CONFIG_CPU_XSCALE) =$(call cc-option,-mtune=xscale,-mtune=strongarm110) -Wa,-mcpu=xscale -tune-$(CONFIG_CPU_XSC3) =$(call cc-option,-mtune=xscale,-mtune=strongarm110) -Wa,-mcpu=xscale -tune-$(CONFIG_CPU_FEROCEON) =$(call cc-option,-mtune=marvell-f,-mtune=xscale) -tune-$(CONFIG_CPU_V6) =$(call cc-option,-mtune=arm1136j-s,-mtune=strongarm) -tune-$(CONFIG_CPU_V6K) =$(call cc-option,-mtune=arm1136j-s,-mtune=strongarm) +tune-$(CONFIG_CPU_XSCALE) =-mtune=xscale +tune-$(CONFIG_CPU_XSC3) =-mtune=xscale +tune-$(CONFIG_CPU_FEROCEON) =-mtune=xscale +tune-$(CONFIG_CPU_V6) =-mtune=arm1136j-s +tune-$(CONFIG_CPU_V6K) =-mtune=arm1136j-s # Evaluate tune cc-option calls now tune-y := $(tune-y) diff --git a/arch/arm/boot/compressed/decompress.c b/arch/arm/boot/compressed/decompress.c index aa075d8372ea20bd030d43663b61acba3e758d4e..74255e819831424216778c92fab3a1a92d6b3ab4 100644 --- a/arch/arm/boot/compressed/decompress.c +++ b/arch/arm/boot/compressed/decompress.c @@ -47,7 +47,10 @@ extern char * strchrnul(const char *, int); #endif #ifdef CONFIG_KERNEL_XZ +/* Prevent KASAN override of string helpers in decompressor */ +#undef memmove #define memmove memmove +#undef memcpy #define memcpy memcpy #include "../../../../lib/decompress_unxz.c" #endif diff --git a/arch/arm/boot/compressed/efi-header.S b/arch/arm/boot/compressed/efi-header.S index c0e7a745103e22b8618618e971ba3ab41b95e5af..230030c1308538f2cef2e9f7118d6e691ab8bd59 100644 --- a/arch/arm/boot/compressed/efi-header.S +++ b/arch/arm/boot/compressed/efi-header.S @@ -9,16 +9,22 @@ #include .macro __nop -#ifdef CONFIG_EFI_STUB - @ This is almost but not quite a NOP, since it does clobber the - @ condition flags. But it is the best we can do for EFI, since - @ PE/COFF expects the magic string "MZ" at offset 0, while the - @ ARM/Linux boot protocol expects an executable instruction - @ there. - .inst MZ_MAGIC | (0x1310 << 16) @ tstne r0, #0x4d000 -#else AR_CLASS( mov r0, r0 ) M_CLASS( nop.w ) + .endm + + .macro __initial_nops +#ifdef CONFIG_EFI_STUB + @ This is a two-instruction NOP, which happens to bear the + @ PE/COFF signature "MZ" in the first two bytes, so the kernel + @ is accepted as an EFI binary. Booting via the UEFI stub + @ will not execute those instructions, but the ARM/Linux + @ boot protocol does, so we need some NOPs here. + .inst MZ_MAGIC | (0xe225 << 16) @ eor r5, r5, 0x4d000 + eor r5, r5, 0x4d000 @ undo previous insn +#else + __nop + __nop #endif .endm diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index df7e4f8c828735bae523f5e9c628123f8161bd9b..da236cf4719e222352ed19adb4c5eed97644ad9e 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -197,7 +197,8 @@ start: * were patching the initial instructions of the kernel, i.e * had started to exploit this "patch area". */ - .rept 7 + __initial_nops + .rept 5 __nop .endr #ifndef CONFIG_THUMB2_KERNEL diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile index ce66ffd5a1bbc4e7ab6aa4b3e4b20cd1bef3c8ef..7e8151681597c18ad90e3b10c64f74b3f623aa5b 100644 --- a/arch/arm/boot/dts/Makefile +++ b/arch/arm/boot/dts/Makefile @@ -731,6 +731,7 @@ dtb-$(CONFIG_ARCH_OMAP3) += \ logicpd-som-lv-37xx-devkit.dtb \ omap3430-sdp.dtb \ omap3-beagle.dtb \ + omap3-beagle-ab4.dtb \ omap3-beagle-xm.dtb \ omap3-beagle-xm-ab.dtb \ omap3-cm-t3517.dtb \ diff --git a/arch/arm/boot/dts/armada-38x.dtsi b/arch/arm/boot/dts/armada-38x.dtsi index 9b1a24cc5e91f2f52c32eeb248831f60d13040e0..df3c8d1d8f641230f9b17b8dcde7f58336918eb1 100644 --- a/arch/arm/boot/dts/armada-38x.dtsi +++ b/arch/arm/boot/dts/armada-38x.dtsi @@ -168,7 +168,7 @@ }; uart0: serial@12000 { - compatible = "marvell,armada-38x-uart"; + compatible = "marvell,armada-38x-uart", "ns16550a"; reg = <0x12000 0x100>; reg-shift = <2>; interrupts = ; @@ -178,7 +178,7 @@ }; uart1: serial@12100 { - compatible = "marvell,armada-38x-uart"; + compatible = "marvell,armada-38x-uart", "ns16550a"; reg = <0x12100 0x100>; reg-shift = <2>; interrupts = ; diff --git a/arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi b/arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi index 910eacc8ad3bd25fb1eb0f84b6c9916bc66d3838..a362714ae9fc0307b083dc837c416e59c520ecc9 100644 --- a/arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi +++ b/arch/arm/boot/dts/aspeed-g6-pinctrl.dtsi @@ -118,7 +118,7 @@ }; pinctrl_fwqspid_default: fwqspid_default { - function = "FWQSPID"; + function = "FWSPID"; groups = "FWQSPID"; }; diff --git a/arch/arm/boot/dts/at91-sama5d27_som1_ek.dts b/arch/arm/boot/dts/at91-sama5d27_som1_ek.dts index 9a18453d7842876500a9e2f522f8dae616eb5746..0a53f21a8903256601598bda69fce063b85bc7bd 100644 --- a/arch/arm/boot/dts/at91-sama5d27_som1_ek.dts +++ b/arch/arm/boot/dts/at91-sama5d27_som1_ek.dts @@ -71,7 +71,6 @@ isc: isc@f0008000 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_isc_base &pinctrl_isc_data_8bit &pinctrl_isc_data_9_10 &pinctrl_isc_data_11_12>; - status = "okay"; }; qspi1: spi@f0024000 { diff --git a/arch/arm/boot/dts/at91-tse850-3.dts b/arch/arm/boot/dts/at91-tse850-3.dts index 3ca97b47c69ceb62e15574b27ff9c7c3f96d0cda..7e5c598e7e68f8457f930ae50d00be7e6485e6aa 100644 --- a/arch/arm/boot/dts/at91-tse850-3.dts +++ b/arch/arm/boot/dts/at91-tse850-3.dts @@ -262,7 +262,7 @@ &macb1 { status = "okay"; - phy-mode = "rgmii"; + phy-mode = "rmii"; #address-cells = <1>; #size-cells = <0>; diff --git a/arch/arm/boot/dts/bcm-nsp.dtsi b/arch/arm/boot/dts/bcm-nsp.dtsi index 605b6d2f4a569c8b858396866f8a25ea50d133b9..1dae02bb82c2d65dccd9d1e7c688d60ef15fe6cf 100644 --- a/arch/arm/boot/dts/bcm-nsp.dtsi +++ b/arch/arm/boot/dts/bcm-nsp.dtsi @@ -77,7 +77,7 @@ interrupt-affinity = <&cpu0>, <&cpu1>; }; - mpcore@19000000 { + mpcore-bus@19000000 { compatible = "simple-bus"; ranges = <0x00000000 0x19000000 0x00023000>; #address-cells = <1>; @@ -219,7 +219,7 @@ status = "disabled"; }; - sdio: sdhci@21000 { + sdio: mmc@21000 { compatible = "brcm,sdhci-iproc-cygnus"; reg = <0x21000 0x100>; interrupts = ; diff --git a/arch/arm/boot/dts/bcm2711-rpi-4-b.dts b/arch/arm/boot/dts/bcm2711-rpi-4-b.dts index 5395e8c2484e01796f20f6234fe60ea0ecefdad2..167538518a1ec5f1cd0b2b348cc19d6ae06dd675 100644 --- a/arch/arm/boot/dts/bcm2711-rpi-4-b.dts +++ b/arch/arm/boot/dts/bcm2711-rpi-4-b.dts @@ -54,8 +54,8 @@ regulator-always-on; regulator-settling-time-us = <5000>; gpios = <&expgpio 4 GPIO_ACTIVE_HIGH>; - states = <1800000 0x1 - 3300000 0x0>; + states = <1800000 0x1>, + <3300000 0x0>; status = "okay"; }; @@ -255,15 +255,16 @@ }; &pcie0 { - pci@1,0 { + pci@0,0 { + device_type = "pci"; #address-cells = <3>; #size-cells = <2>; ranges; reg = <0 0 0 0 0>; - usb@1,0 { - reg = <0x10000 0 0 0 0>; + usb@0,0 { + reg = <0 0 0 0 0>; resets = <&reset RASPBERRYPI_FIRMWARE_RESET_ID_USB>; }; }; diff --git a/arch/arm/boot/dts/bcm2711.dtsi b/arch/arm/boot/dts/bcm2711.dtsi index 3d040f6e2a20f3523481377d6c94a5b623dc3dd3..e46a3f4ad350a54815d6570aec357566fd387a16 100644 --- a/arch/arm/boot/dts/bcm2711.dtsi +++ b/arch/arm/boot/dts/bcm2711.dtsi @@ -290,6 +290,7 @@ hvs: hvs@7e400000 { compatible = "brcm,bcm2711-hvs"; + reg = <0x7e400000 0x8000>; interrupts = ; }; @@ -480,11 +481,17 @@ #address-cells = <3>; #interrupt-cells = <1>; #size-cells = <2>; - interrupts = , + interrupts = , ; interrupt-names = "pcie", "msi"; interrupt-map-mask = <0x0 0x0 0x0 0x7>; interrupt-map = <0 0 0 1 &gicv2 GIC_SPI 143 + IRQ_TYPE_LEVEL_HIGH>, + <0 0 0 2 &gicv2 GIC_SPI 144 + IRQ_TYPE_LEVEL_HIGH>, + <0 0 0 3 &gicv2 GIC_SPI 145 + IRQ_TYPE_LEVEL_HIGH>, + <0 0 0 4 &gicv2 GIC_SPI 146 IRQ_TYPE_LEVEL_HIGH>; msi-controller; msi-parent = <&pcie0>; @@ -514,8 +521,8 @@ compatible = "brcm,genet-mdio-v5"; reg = <0xe14 0x8>; reg-names = "mdio"; - #address-cells = <0x0>; - #size-cells = <0x1>; + #address-cells = <0x1>; + #size-cells = <0x0>; }; }; }; @@ -549,6 +556,8 @@ , ; + gpio-ranges = <&gpio 0 0 58>; + gpclk0_gpio49: gpclk0_gpio49 { pin-gpclk { pins = "gpio49"; diff --git a/arch/arm/boot/dts/bcm283x.dtsi b/arch/arm/boot/dts/bcm283x.dtsi index 0f3be55201a5bea78b2d75b4455ec7a3e6c5ebfc..ffdf7c4fba465c14660318d355f61156303f6f12 100644 --- a/arch/arm/boot/dts/bcm283x.dtsi +++ b/arch/arm/boot/dts/bcm283x.dtsi @@ -126,6 +126,8 @@ interrupt-controller; #interrupt-cells = <2>; + gpio-ranges = <&gpio 0 0 54>; + /* Defines common pin muxing groups * * While each pin can have its mux selected diff --git a/arch/arm/boot/dts/bcm4708-netgear-r6250.dts b/arch/arm/boot/dts/bcm4708-netgear-r6250.dts index 61c7b137607e5d31d73c6eef76b7960636038585..7900aac4f35a96de935a7a9031f4f4ba95ec4e13 100644 --- a/arch/arm/boot/dts/bcm4708-netgear-r6250.dts +++ b/arch/arm/boot/dts/bcm4708-netgear-r6250.dts @@ -20,7 +20,7 @@ bootargs = "console=ttyS0,115200 earlycon"; }; - memory { + memory@0 { device_type = "memory"; reg = <0x00000000 0x08000000>, <0x88000000 0x08000000>; diff --git a/arch/arm/boot/dts/bcm4709-asus-rt-ac87u.dts b/arch/arm/boot/dts/bcm4709-asus-rt-ac87u.dts index 6c6bb7b17d27ab0ded09f7dbdca4ba8b5fc719c8..7546c8d07bcd7a0c4fc204fa4a01e576e1aa1c57 100644 --- a/arch/arm/boot/dts/bcm4709-asus-rt-ac87u.dts +++ b/arch/arm/boot/dts/bcm4709-asus-rt-ac87u.dts @@ -19,7 +19,7 @@ bootargs = "console=ttyS0,115200"; }; - memory { + memory@0 { device_type = "memory"; reg = <0x00000000 0x08000000>, <0x88000000 0x08000000>; diff --git a/arch/arm/boot/dts/bcm4709-buffalo-wxr-1900dhp.dts b/arch/arm/boot/dts/bcm4709-buffalo-wxr-1900dhp.dts index d29e7f80ea6aa144e9dfee27fbdfe867a6befd06..beae9eab9cb8c48f3b35a28bcd5e521cd5e70e46 100644 --- a/arch/arm/boot/dts/bcm4709-buffalo-wxr-1900dhp.dts +++ b/arch/arm/boot/dts/bcm4709-buffalo-wxr-1900dhp.dts @@ -19,7 +19,7 @@ bootargs = "console=ttyS0,115200"; }; - memory { + memory@0 { device_type = "memory"; reg = <0x00000000 0x08000000>, <0x88000000 0x18000000>; diff --git a/arch/arm/boot/dts/bcm4709-linksys-ea9200.dts b/arch/arm/boot/dts/bcm4709-linksys-ea9200.dts index 38fbefdf2e4e410d2a0c105569843d1f1f2ec3fd..ee94455a7236dc8d71ae245124e8187bcd18eb70 100644 --- a/arch/arm/boot/dts/bcm4709-linksys-ea9200.dts +++ b/arch/arm/boot/dts/bcm4709-linksys-ea9200.dts @@ -16,7 +16,7 @@ bootargs = "console=ttyS0,115200"; }; - memory { + memory@0 { device_type = "memory"; reg = <0x00000000 0x08000000>, <0x88000000 0x08000000>; diff --git a/arch/arm/boot/dts/bcm4709-netgear-r7000.dts b/arch/arm/boot/dts/bcm4709-netgear-r7000.dts index 7989a53597d4fdd73f495b1d21d2aaa6377e16e2..56d309dbc6b0dd1f6a4bd303a5df0ba2e5c2c694 100644 --- a/arch/arm/boot/dts/bcm4709-netgear-r7000.dts +++ b/arch/arm/boot/dts/bcm4709-netgear-r7000.dts @@ -19,7 +19,7 @@ bootargs = "console=ttyS0,115200"; }; - memory { + memory@0 { device_type = "memory"; reg = <0x00000000 0x08000000>, <0x88000000 0x08000000>; diff --git a/arch/arm/boot/dts/bcm4709-netgear-r8000.dts b/arch/arm/boot/dts/bcm4709-netgear-r8000.dts index 87b655be674c540d9d288082eef71c072568c8e4..184e3039aa864000baa7857d864ba2e3f9a99589 100644 --- a/arch/arm/boot/dts/bcm4709-netgear-r8000.dts +++ b/arch/arm/boot/dts/bcm4709-netgear-r8000.dts @@ -30,7 +30,7 @@ bootargs = "console=ttyS0,115200"; }; - memory { + memory@0 { device_type = "memory"; reg = <0x00000000 0x08000000>, <0x88000000 0x08000000>; diff --git a/arch/arm/boot/dts/bcm4709-tplink-archer-c9-v1.dts b/arch/arm/boot/dts/bcm4709-tplink-archer-c9-v1.dts index f806be5da723779ebc9c40781191e711013cb926..c2a266a439d0504dd76a0404b9eb840c873a1c36 100644 --- a/arch/arm/boot/dts/bcm4709-tplink-archer-c9-v1.dts +++ b/arch/arm/boot/dts/bcm4709-tplink-archer-c9-v1.dts @@ -15,7 +15,7 @@ bootargs = "console=ttyS0,115200 earlycon"; }; - memory { + memory@0 { device_type = "memory"; reg = <0x00000000 0x08000000>; }; diff --git a/arch/arm/boot/dts/bcm47094-luxul-xwc-2000.dts b/arch/arm/boot/dts/bcm47094-luxul-xwc-2000.dts index 2666195b6ffebdac68905f37312d7b3e346a01f1..3d415d874bd391b90a126804eaad1321911685d6 100644 --- a/arch/arm/boot/dts/bcm47094-luxul-xwc-2000.dts +++ b/arch/arm/boot/dts/bcm47094-luxul-xwc-2000.dts @@ -16,7 +16,7 @@ bootargs = "earlycon"; }; - memory { + memory@0 { device_type = "memory"; reg = <0x00000000 0x08000000>, <0x88000000 0x18000000>; diff --git a/arch/arm/boot/dts/bcm53016-meraki-mr32.dts b/arch/arm/boot/dts/bcm53016-meraki-mr32.dts index 3b978dc8997a45113622a23235a5ad6f973c3f72..577a4dc604d93e89c3be314c2a6f495ba65de569 100644 --- a/arch/arm/boot/dts/bcm53016-meraki-mr32.dts +++ b/arch/arm/boot/dts/bcm53016-meraki-mr32.dts @@ -20,7 +20,7 @@ bootargs = " console=ttyS0,115200n8 earlycon"; }; - memory { + memory@0 { reg = <0x00000000 0x08000000>; device_type = "memory"; }; @@ -195,3 +195,25 @@ }; }; }; + +&srab { + status = "okay"; + + ports { + port@0 { + reg = <0>; + label = "poe"; + }; + + port@5 { + reg = <5>; + label = "cpu"; + ethernet = <&gmac0>; + + fixed-link { + speed = <1000>; + duplex-full; + }; + }; + }; +}; diff --git a/arch/arm/boot/dts/bcm5301x.dtsi b/arch/arm/boot/dts/bcm5301x.dtsi index 72b0df6910bd51c07d078a383f49221b087c39cb..9fdad20c40d170bf6ac6cf79aea8dfd9fb9680f5 100644 --- a/arch/arm/boot/dts/bcm5301x.dtsi +++ b/arch/arm/boot/dts/bcm5301x.dtsi @@ -242,6 +242,8 @@ gpio-controller; #gpio-cells = <2>; + interrupt-controller; + #interrupt-cells = <2>; }; pcie0: pcie@12000 { @@ -408,7 +410,7 @@ i2c0: i2c@18009000 { compatible = "brcm,iproc-i2c"; reg = <0x18009000 0x50>; - interrupts = ; + interrupts = ; #address-cells = <1>; #size-cells = <0>; clock-frequency = <100000>; diff --git a/arch/arm/boot/dts/bcm94708.dts b/arch/arm/boot/dts/bcm94708.dts index 3d13e46c69494579dbbbc34d65d0fd78af14eaa3..d9eb2040b96316e0cf15ca687d6043fb98e6b6a0 100644 --- a/arch/arm/boot/dts/bcm94708.dts +++ b/arch/arm/boot/dts/bcm94708.dts @@ -38,7 +38,7 @@ model = "NorthStar SVK (BCM94708)"; compatible = "brcm,bcm94708", "brcm,bcm4708"; - memory { + memory@0 { device_type = "memory"; reg = <0x00000000 0x08000000>; }; diff --git a/arch/arm/boot/dts/bcm94709.dts b/arch/arm/boot/dts/bcm94709.dts index 5017b7b259cbe20463753cd8e8303de09e6258aa..618c812eef73e11f8efa6bdb04f62786edf8efb0 100644 --- a/arch/arm/boot/dts/bcm94709.dts +++ b/arch/arm/boot/dts/bcm94709.dts @@ -38,7 +38,7 @@ model = "NorthStar SVK (BCM94709)"; compatible = "brcm,bcm94709", "brcm,bcm4709", "brcm,bcm4708"; - memory { + memory@0 { device_type = "memory"; reg = <0x00000000 0x08000000>; }; diff --git a/arch/arm/boot/dts/exynos4210-i9100.dts b/arch/arm/boot/dts/exynos4210-i9100.dts index 7777bf51a6e642c64775b175bfcc5426dd045235..ecc9d4dc707e45c5cc58e18b18b597f89f402d0e 100644 --- a/arch/arm/boot/dts/exynos4210-i9100.dts +++ b/arch/arm/boot/dts/exynos4210-i9100.dts @@ -765,7 +765,7 @@ compatible = "brcm,bcm4330-bt"; shutdown-gpios = <&gpl0 4 GPIO_ACTIVE_HIGH>; - reset-gpios = <&gpl1 0 GPIO_ACTIVE_HIGH>; + reset-gpios = <&gpl1 0 GPIO_ACTIVE_LOW>; device-wakeup-gpios = <&gpx3 1 GPIO_ACTIVE_HIGH>; host-wakeup-gpios = <&gpx2 6 GPIO_ACTIVE_HIGH>; }; diff --git a/arch/arm/boot/dts/gemini-nas4220b.dts b/arch/arm/boot/dts/gemini-nas4220b.dts index 13112a8a5dd88dbd7274e860ad3c17b4d295761b..6544c730340fa4cefd916c22d37342c3080084a9 100644 --- a/arch/arm/boot/dts/gemini-nas4220b.dts +++ b/arch/arm/boot/dts/gemini-nas4220b.dts @@ -84,7 +84,7 @@ partitions { compatible = "redboot-fis"; /* Eraseblock at 0xfe0000 */ - fis-index-block = <0x1fc>; + fis-index-block = <0x7f>; }; }; diff --git a/arch/arm/boot/dts/imx23-evk.dts b/arch/arm/boot/dts/imx23-evk.dts index 8cbaf1c8117456a11125de5f1f9986e1c9078e7d..3b609d987d88300f68f1402057dfc9b47b8bb146 100644 --- a/arch/arm/boot/dts/imx23-evk.dts +++ b/arch/arm/boot/dts/imx23-evk.dts @@ -79,7 +79,6 @@ MX23_PAD_LCD_RESET__GPIO_1_18 MX23_PAD_PWM3__GPIO_1_29 MX23_PAD_PWM4__GPIO_1_30 - MX23_PAD_SSP1_DETECT__SSP1_DETECT >; fsl,drive-strength = ; fsl,voltage = ; diff --git a/arch/arm/boot/dts/imx53-m53menlo.dts b/arch/arm/boot/dts/imx53-m53menlo.dts index d3082b9774e4094f4c9afe99dff9704cc8969d07..4f88e96d81ddbdd42078409d0a94e43f3e194566 100644 --- a/arch/arm/boot/dts/imx53-m53menlo.dts +++ b/arch/arm/boot/dts/imx53-m53menlo.dts @@ -56,6 +56,7 @@ panel { compatible = "edt,etm0700g0dh6"; pinctrl-0 = <&pinctrl_display_gpio>; + pinctrl-names = "default"; enable-gpios = <&gpio6 0 GPIO_ACTIVE_HIGH>; port { @@ -76,8 +77,7 @@ regulator-name = "vbus"; regulator-min-microvolt = <5000000>; regulator-max-microvolt = <5000000>; - gpio = <&gpio1 2 GPIO_ACTIVE_HIGH>; - enable-active-high; + gpio = <&gpio1 2 0>; }; }; diff --git a/arch/arm/boot/dts/imx6dl-yapp4-common.dtsi b/arch/arm/boot/dts/imx6dl-yapp4-common.dtsi index 9148a01ed6d9f085c76923d0d3f1532299d0c448..ebc0892e37c7a0fd7360c9052f876718d6b771e0 100644 --- a/arch/arm/boot/dts/imx6dl-yapp4-common.dtsi +++ b/arch/arm/boot/dts/imx6dl-yapp4-common.dtsi @@ -5,6 +5,7 @@ #include #include #include +#include #include / { @@ -275,6 +276,7 @@ led-cur = /bits/ 8 <0x20>; max-cur = /bits/ 8 <0x60>; reg = <0>; + color = ; }; chan@1 { @@ -282,6 +284,7 @@ led-cur = /bits/ 8 <0x20>; max-cur = /bits/ 8 <0x60>; reg = <1>; + color = ; }; chan@2 { @@ -289,6 +292,7 @@ led-cur = /bits/ 8 <0x20>; max-cur = /bits/ 8 <0x60>; reg = <2>; + color = ; }; chan@3 { @@ -296,6 +300,7 @@ led-cur = /bits/ 8 <0x0>; max-cur = /bits/ 8 <0x0>; reg = <3>; + color = ; }; }; diff --git a/arch/arm/boot/dts/imx6qdl-pico.dtsi b/arch/arm/boot/dts/imx6qdl-pico.dtsi index 5de4ccb979163dd7bc54b26658a3f69571382559..f7a56d6b160c8f75c5412be21449f5db58d84df7 100644 --- a/arch/arm/boot/dts/imx6qdl-pico.dtsi +++ b/arch/arm/boot/dts/imx6qdl-pico.dtsi @@ -176,7 +176,18 @@ pinctrl-0 = <&pinctrl_enet>; phy-mode = "rgmii-id"; phy-reset-gpios = <&gpio1 26 GPIO_ACTIVE_LOW>; + phy-handle = <&phy>; status = "okay"; + + mdio { + #address-cells = <1>; + #size-cells = <0>; + + phy: ethernet-phy@1 { + reg = <1>; + qca,clk-out-frequency = <125000000>; + }; + }; }; &hdmi { diff --git a/arch/arm/boot/dts/imx6qdl-udoo.dtsi b/arch/arm/boot/dts/imx6qdl-udoo.dtsi index d07d8f83456d241a288be60ce729579b9dded12f..ccfa8e320be62e26eea26a970eca940e440e2752 100644 --- a/arch/arm/boot/dts/imx6qdl-udoo.dtsi +++ b/arch/arm/boot/dts/imx6qdl-udoo.dtsi @@ -5,6 +5,8 @@ * Author: Fabio Estevam */ +#include + / { aliases { backlight = &backlight; @@ -226,6 +228,7 @@ MX6QDL_PAD_SD3_DAT1__SD3_DATA1 0x17059 MX6QDL_PAD_SD3_DAT2__SD3_DATA2 0x17059 MX6QDL_PAD_SD3_DAT3__SD3_DATA3 0x17059 + MX6QDL_PAD_SD3_DAT5__GPIO7_IO00 0x1b0b0 >; }; @@ -304,7 +307,7 @@ &usdhc3 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usdhc3>; - non-removable; + cd-gpios = <&gpio7 0 GPIO_ACTIVE_LOW>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx6qdl-wandboard.dtsi b/arch/arm/boot/dts/imx6qdl-wandboard.dtsi index c070893c509ee2dbbab43adbcdb417028c3776b0..5bad982bc5a0543c02b71e535dce181b8b6207fd 100644 --- a/arch/arm/boot/dts/imx6qdl-wandboard.dtsi +++ b/arch/arm/boot/dts/imx6qdl-wandboard.dtsi @@ -289,6 +289,7 @@ ethphy: ethernet-phy@1 { reg = <1>; + qca,clk-out-frequency = <125000000>; }; }; }; diff --git a/arch/arm/boot/dts/imx6ull-pinfunc.h b/arch/arm/boot/dts/imx6ull-pinfunc.h index eb025a9d4759255e57a3a3ba9b491167809278a2..7328d4ef8559f021aa087e5159f3275aef345452 100644 --- a/arch/arm/boot/dts/imx6ull-pinfunc.h +++ b/arch/arm/boot/dts/imx6ull-pinfunc.h @@ -82,6 +82,6 @@ #define MX6ULL_PAD_CSI_DATA04__ESAI_TX_FS 0x01F4 0x0480 0x0000 0x9 0x0 #define MX6ULL_PAD_CSI_DATA05__ESAI_TX_CLK 0x01F8 0x0484 0x0000 0x9 0x0 #define MX6ULL_PAD_CSI_DATA06__ESAI_TX5_RX0 0x01FC 0x0488 0x0000 0x9 0x0 -#define MX6ULL_PAD_CSI_DATA07__ESAI_T0 0x0200 0x048C 0x0000 0x9 0x0 +#define MX6ULL_PAD_CSI_DATA07__ESAI_TX0 0x0200 0x048C 0x0000 0x9 0x0 #endif /* __DTS_IMX6ULL_PINFUNC_H */ diff --git a/arch/arm/boot/dts/imx7ulp.dtsi b/arch/arm/boot/dts/imx7ulp.dtsi index b7ea37ad4e55c45f5615766f3fa30bf7d9eab4a4..bcec98b96411437c831d9e91ddaf2f63d657ea9e 100644 --- a/arch/arm/boot/dts/imx7ulp.dtsi +++ b/arch/arm/boot/dts/imx7ulp.dtsi @@ -259,7 +259,7 @@ interrupts = ; clocks = <&pcc2 IMX7ULP_CLK_WDG1>; assigned-clocks = <&pcc2 IMX7ULP_CLK_WDG1>; - assigned-clocks-parents = <&scg1 IMX7ULP_CLK_FIRC_BUS_CLK>; + assigned-clock-parents = <&scg1 IMX7ULP_CLK_FIRC_BUS_CLK>; timeout-sec = <40>; }; diff --git a/arch/arm/boot/dts/ls1021a-tsn.dts b/arch/arm/boot/dts/ls1021a-tsn.dts index 9d8f0c2a8aba3d24b0b4a1b24ee287541c25333d..aca78b5eddf20996bd38f110b5072494cb67202d 100644 --- a/arch/arm/boot/dts/ls1021a-tsn.dts +++ b/arch/arm/boot/dts/ls1021a-tsn.dts @@ -251,7 +251,7 @@ flash@0 { /* Rev. A uses 64MB flash, Rev. B & C use 32MB flash */ - compatible = "jedec,spi-nor", "s25fl256s1", "s25fl512s"; + compatible = "jedec,spi-nor"; spi-max-frequency = <20000000>; #address-cells = <1>; #size-cells = <1>; diff --git a/arch/arm/boot/dts/ls1021a.dtsi b/arch/arm/boot/dts/ls1021a.dtsi index 827373ef1a547ef6e5f74bd49787a3237853bc76..37026b2fa6497139fea7bca04e5f73c7476d3f7f 100644 --- a/arch/arm/boot/dts/ls1021a.dtsi +++ b/arch/arm/boot/dts/ls1021a.dtsi @@ -331,39 +331,6 @@ #thermal-sensor-cells = <1>; }; - thermal-zones { - cpu_thermal: cpu-thermal { - polling-delay-passive = <1000>; - polling-delay = <5000>; - - thermal-sensors = <&tmu 0>; - - trips { - cpu_alert: cpu-alert { - temperature = <85000>; - hysteresis = <2000>; - type = "passive"; - }; - cpu_crit: cpu-crit { - temperature = <95000>; - hysteresis = <2000>; - type = "critical"; - }; - }; - - cooling-maps { - map0 { - trip = <&cpu_alert>; - cooling-device = - <&cpu0 THERMAL_NO_LIMIT - THERMAL_NO_LIMIT>, - <&cpu1 THERMAL_NO_LIMIT - THERMAL_NO_LIMIT>; - }; - }; - }; - }; - dspi0: spi@2100000 { compatible = "fsl,ls1021a-v1.0-dspi"; #address-cells = <1>; @@ -1018,4 +985,37 @@ big-endian; }; }; + + thermal-zones { + cpu_thermal: cpu-thermal { + polling-delay-passive = <1000>; + polling-delay = <5000>; + + thermal-sensors = <&tmu 0>; + + trips { + cpu_alert: cpu-alert { + temperature = <85000>; + hysteresis = <2000>; + type = "passive"; + }; + cpu_crit: cpu-crit { + temperature = <95000>; + hysteresis = <2000>; + type = "critical"; + }; + }; + + cooling-maps { + map0 { + trip = <&cpu_alert>; + cooling-device = + <&cpu0 THERMAL_NO_LIMIT + THERMAL_NO_LIMIT>, + <&cpu1 THERMAL_NO_LIMIT + THERMAL_NO_LIMIT>; + }; + }; + }; + }; }; diff --git a/arch/arm/boot/dts/meson.dtsi b/arch/arm/boot/dts/meson.dtsi index 7649dd1e0b9ee5cd6aa67d0a8ef48a3f80e08a84..c928ae312e19c0c329efb8d68a4ca1f4a5561d21 100644 --- a/arch/arm/boot/dts/meson.dtsi +++ b/arch/arm/boot/dts/meson.dtsi @@ -42,14 +42,14 @@ }; uart_A: serial@84c0 { - compatible = "amlogic,meson6-uart", "amlogic,meson-uart"; + compatible = "amlogic,meson6-uart"; reg = <0x84c0 0x18>; interrupts = ; status = "disabled"; }; uart_B: serial@84dc { - compatible = "amlogic,meson6-uart", "amlogic,meson-uart"; + compatible = "amlogic,meson6-uart"; reg = <0x84dc 0x18>; interrupts = ; status = "disabled"; @@ -87,7 +87,7 @@ }; uart_C: serial@8700 { - compatible = "amlogic,meson6-uart", "amlogic,meson-uart"; + compatible = "amlogic,meson6-uart"; reg = <0x8700 0x18>; interrupts = ; status = "disabled"; @@ -203,7 +203,7 @@ }; uart_AO: serial@4c0 { - compatible = "amlogic,meson6-uart", "amlogic,meson-ao-uart", "amlogic,meson-uart"; + compatible = "amlogic,meson6-uart", "amlogic,meson-ao-uart"; reg = <0x4c0 0x18>; interrupts = ; status = "disabled"; diff --git a/arch/arm/boot/dts/meson8.dtsi b/arch/arm/boot/dts/meson8.dtsi index 740a6c816266cc6a20469f592f547474cc351a2c..08533116a39ce0a80264cafe325cdac5d3abc15a 100644 --- a/arch/arm/boot/dts/meson8.dtsi +++ b/arch/arm/boot/dts/meson8.dtsi @@ -598,27 +598,27 @@ }; &uart_AO { - compatible = "amlogic,meson8-uart", "amlogic,meson-uart"; - clocks = <&clkc CLKID_CLK81>, <&xtal>, <&clkc CLKID_CLK81>; - clock-names = "baud", "xtal", "pclk"; + compatible = "amlogic,meson8-uart", "amlogic,meson-ao-uart"; + clocks = <&xtal>, <&clkc CLKID_CLK81>, <&clkc CLKID_CLK81>; + clock-names = "xtal", "pclk", "baud"; }; &uart_A { - compatible = "amlogic,meson8-uart", "amlogic,meson-uart"; - clocks = <&clkc CLKID_CLK81>, <&xtal>, <&clkc CLKID_UART0>; - clock-names = "baud", "xtal", "pclk"; + compatible = "amlogic,meson8-uart"; + clocks = <&xtal>, <&clkc CLKID_UART0>, <&clkc CLKID_CLK81>; + clock-names = "xtal", "pclk", "baud"; }; &uart_B { - compatible = "amlogic,meson8-uart", "amlogic,meson-uart"; - clocks = <&clkc CLKID_CLK81>, <&xtal>, <&clkc CLKID_UART1>; - clock-names = "baud", "xtal", "pclk"; + compatible = "amlogic,meson8-uart"; + clocks = <&xtal>, <&clkc CLKID_UART0>, <&clkc CLKID_CLK81>; + clock-names = "xtal", "pclk", "baud"; }; &uart_C { - compatible = "amlogic,meson8-uart", "amlogic,meson-uart"; - clocks = <&clkc CLKID_CLK81>, <&xtal>, <&clkc CLKID_UART2>; - clock-names = "baud", "xtal", "pclk"; + compatible = "amlogic,meson8-uart"; + clocks = <&xtal>, <&clkc CLKID_UART0>, <&clkc CLKID_CLK81>; + clock-names = "xtal", "pclk", "baud"; }; &usb0 { diff --git a/arch/arm/boot/dts/meson8b.dtsi b/arch/arm/boot/dts/meson8b.dtsi index 2401cdf5f7511be3bd16e4b77db46e3e88f7152f..f6eb7c803174e7c74c52244d3a5913113d248f41 100644 --- a/arch/arm/boot/dts/meson8b.dtsi +++ b/arch/arm/boot/dts/meson8b.dtsi @@ -586,27 +586,27 @@ }; &uart_AO { - compatible = "amlogic,meson8b-uart", "amlogic,meson-uart"; - clocks = <&clkc CLKID_CLK81>, <&xtal>, <&clkc CLKID_CLK81>; - clock-names = "baud", "xtal", "pclk"; + compatible = "amlogic,meson8b-uart", "amlogic,meson-ao-uart"; + clocks = <&xtal>, <&clkc CLKID_CLK81>, <&clkc CLKID_CLK81>; + clock-names = "xtal", "pclk", "baud"; }; &uart_A { - compatible = "amlogic,meson8b-uart", "amlogic,meson-uart"; - clocks = <&clkc CLKID_CLK81>, <&xtal>, <&clkc CLKID_UART0>; - clock-names = "baud", "xtal", "pclk"; + compatible = "amlogic,meson8b-uart"; + clocks = <&xtal>, <&clkc CLKID_UART0>, <&clkc CLKID_CLK81>; + clock-names = "xtal", "pclk", "baud"; }; &uart_B { - compatible = "amlogic,meson8b-uart", "amlogic,meson-uart"; - clocks = <&clkc CLKID_CLK81>, <&xtal>, <&clkc CLKID_UART1>; - clock-names = "baud", "xtal", "pclk"; + compatible = "amlogic,meson8b-uart"; + clocks = <&xtal>, <&clkc CLKID_UART0>, <&clkc CLKID_CLK81>; + clock-names = "xtal", "pclk", "baud"; }; &uart_C { - compatible = "amlogic,meson8b-uart", "amlogic,meson-uart"; - clocks = <&clkc CLKID_CLK81>, <&xtal>, <&clkc CLKID_UART2>; - clock-names = "baud", "xtal", "pclk"; + compatible = "amlogic,meson8b-uart"; + clocks = <&xtal>, <&clkc CLKID_UART0>, <&clkc CLKID_CLK81>; + clock-names = "xtal", "pclk", "baud"; }; &usb0 { diff --git a/arch/arm/boot/dts/omap-gpmc-smsc9221.dtsi b/arch/arm/boot/dts/omap-gpmc-smsc9221.dtsi index 7f6aefd134514b9108865b8e086088b7c9809cf5..e7534fe9c53cf960b216d06d27d69f17316a23ca 100644 --- a/arch/arm/boot/dts/omap-gpmc-smsc9221.dtsi +++ b/arch/arm/boot/dts/omap-gpmc-smsc9221.dtsi @@ -29,7 +29,7 @@ compatible = "smsc,lan9221","smsc,lan9115"; bank-width = <2>; - gpmc,mux-add-data; + gpmc,mux-add-data = <0>; gpmc,cs-on-ns = <0>; gpmc,cs-rd-off-ns = <42>; gpmc,cs-wr-off-ns = <36>; diff --git a/arch/arm/boot/dts/omap3-beagle-ab4.dts b/arch/arm/boot/dts/omap3-beagle-ab4.dts new file mode 100644 index 0000000000000000000000000000000000000000..990ff2d8468684469dbe5704ebe6a75ef32bd66c --- /dev/null +++ b/arch/arm/boot/dts/omap3-beagle-ab4.dts @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: GPL-2.0-only +/dts-v1/; + +#include "omap3-beagle.dts" + +/ { + model = "TI OMAP3 BeagleBoard A to B4"; + compatible = "ti,omap3-beagle-ab4", "ti,omap3-beagle", "ti,omap3430", "ti,omap3"; +}; + +/* + * Workaround for capacitor C70 issue, see "Boards revision A and < B5" + * section at https://elinux.org/BeagleBoard_Community + */ + +/* Unusable as clocksource because of unreliable oscillator */ +&counter32k { + status = "disabled"; +}; + +/* Unusable as clockevent because of unreliable oscillator, allow to idle */ +&timer1_target { + /delete-property/ti,no-reset-on-init; + /delete-property/ti,no-idle; + timer@0 { + /delete-property/ti,timer-alwon; + }; +}; + +/* Preferred always-on timer for clocksource */ +&timer12_target { + ti,no-reset-on-init; + ti,no-idle; + timer@0 { + /* Always clocked by secure_32k_fck */ + }; +}; + +/* Preferred timer for clockevent */ +&timer2_target { + ti,no-reset-on-init; + ti,no-idle; + timer@0 { + assigned-clocks = <&gpt2_fck>; + assigned-clock-parents = <&sys_ck>; + }; +}; diff --git a/arch/arm/boot/dts/omap3-beagle.dts b/arch/arm/boot/dts/omap3-beagle.dts index f9f34b8458e91c871de4a2e2d1093a306ba99cfd..0548b391334fdbf992beb9a3ae41c3edb7b741b4 100644 --- a/arch/arm/boot/dts/omap3-beagle.dts +++ b/arch/arm/boot/dts/omap3-beagle.dts @@ -304,39 +304,6 @@ phys = <0 &hsusb2_phy>; }; -/* Unusable as clocksource because of unreliable oscillator */ -&counter32k { - status = "disabled"; -}; - -/* Unusable as clockevent because if unreliable oscillator, allow to idle */ -&timer1_target { - /delete-property/ti,no-reset-on-init; - /delete-property/ti,no-idle; - timer@0 { - /delete-property/ti,timer-alwon; - }; -}; - -/* Preferred always-on timer for clocksource */ -&timer12_target { - ti,no-reset-on-init; - ti,no-idle; - timer@0 { - /* Always clocked by secure_32k_fck */ - }; -}; - -/* Preferred timer for clockevent */ -&timer2_target { - ti,no-reset-on-init; - ti,no-idle; - timer@0 { - assigned-clocks = <&gpt2_fck>; - assigned-clock-parents = <&sys_ck>; - }; -}; - &twl_gpio { ti,use-leds; /* pullups: BIT(1) */ diff --git a/arch/arm/boot/dts/omap3-devkit8000-common.dtsi b/arch/arm/boot/dts/omap3-devkit8000-common.dtsi index 2c19d6e255bdc9c4c15cefcb4e12ecd85c2de28d..6883ccb45600b38a752353ad8118eb57695150c1 100644 --- a/arch/arm/boot/dts/omap3-devkit8000-common.dtsi +++ b/arch/arm/boot/dts/omap3-devkit8000-common.dtsi @@ -158,6 +158,24 @@ status = "disabled"; }; +/* Unusable as clockevent because if unreliable oscillator, allow to idle */ +&timer1_target { + /delete-property/ti,no-reset-on-init; + /delete-property/ti,no-idle; + timer@0 { + /delete-property/ti,timer-alwon; + }; +}; + +/* Preferred timer for clockevent */ +&timer12_target { + ti,no-reset-on-init; + ti,no-idle; + timer@0 { + /* Always clocked by secure_32k_fck */ + }; +}; + &twl_gpio { ti,use-leds; /* diff --git a/arch/arm/boot/dts/omap3-devkit8000.dts b/arch/arm/boot/dts/omap3-devkit8000.dts index c2995a280729d27900eeda63af011ce94d8b995a..162d0726b00801c50f4b029ef011f6d24cced686 100644 --- a/arch/arm/boot/dts/omap3-devkit8000.dts +++ b/arch/arm/boot/dts/omap3-devkit8000.dts @@ -14,36 +14,3 @@ display2 = &tv0; }; }; - -/* Unusable as clocksource because of unreliable oscillator */ -&counter32k { - status = "disabled"; -}; - -/* Unusable as clockevent because if unreliable oscillator, allow to idle */ -&timer1_target { - /delete-property/ti,no-reset-on-init; - /delete-property/ti,no-idle; - timer@0 { - /delete-property/ti,timer-alwon; - }; -}; - -/* Preferred always-on timer for clocksource */ -&timer12_target { - ti,no-reset-on-init; - ti,no-idle; - timer@0 { - /* Always clocked by secure_32k_fck */ - }; -}; - -/* Preferred timer for clockevent */ -&timer2_target { - ti,no-reset-on-init; - ti,no-idle; - timer@0 { - assigned-clocks = <&gpt2_fck>; - assigned-clock-parents = <&sys_ck>; - }; -}; diff --git a/arch/arm/boot/dts/omap3-gta04.dtsi b/arch/arm/boot/dts/omap3-gta04.dtsi index 7b8c18e6605e406339776d88b6e3b1b35213fac4..80c9e5e34136aa54caa924d100fb36c964a70cc3 100644 --- a/arch/arm/boot/dts/omap3-gta04.dtsi +++ b/arch/arm/boot/dts/omap3-gta04.dtsi @@ -515,7 +515,7 @@ compatible = "bosch,bma180"; reg = <0x41>; pinctrl-names = "default"; - pintcrl-0 = <&bma180_pins>; + pinctrl-0 = <&bma180_pins>; interrupt-parent = <&gpio4>; interrupts = <19 IRQ_TYPE_LEVEL_HIGH>; /* GPIO_115 */ }; diff --git a/arch/arm/boot/dts/omap3-n900.dts b/arch/arm/boot/dts/omap3-n900.dts index 32335d4ce478b77b4224f05f266c322641a76277..d40c3d2c4914e400cf1365397632dc3d59ebf0f8 100644 --- a/arch/arm/boot/dts/omap3-n900.dts +++ b/arch/arm/boot/dts/omap3-n900.dts @@ -8,6 +8,7 @@ #include "omap34xx.dtsi" #include +#include /* * Default secure signed bootloader (Nokia X-Loader) does not enable L3 firewall @@ -630,63 +631,92 @@ }; lp5523: lp5523@32 { + #address-cells = <1>; + #size-cells = <0>; compatible = "national,lp5523"; reg = <0x32>; clock-mode = /bits/ 8 <0>; /* LP55XX_CLOCK_AUTO */ - enable-gpio = <&gpio2 9 GPIO_ACTIVE_HIGH>; /* 41 */ + enable-gpios = <&gpio2 9 GPIO_ACTIVE_HIGH>; /* 41 */ - chan0 { + led@0 { + reg = <0>; chan-name = "lp5523:kb1"; led-cur = /bits/ 8 <50>; max-cur = /bits/ 8 <100>; + color = ; + function = LED_FUNCTION_KBD_BACKLIGHT; }; - chan1 { + led@1 { + reg = <1>; chan-name = "lp5523:kb2"; led-cur = /bits/ 8 <50>; max-cur = /bits/ 8 <100>; + color = ; + function = LED_FUNCTION_KBD_BACKLIGHT; }; - chan2 { + led@2 { + reg = <2>; chan-name = "lp5523:kb3"; led-cur = /bits/ 8 <50>; max-cur = /bits/ 8 <100>; + color = ; + function = LED_FUNCTION_KBD_BACKLIGHT; }; - chan3 { + led@3 { + reg = <3>; chan-name = "lp5523:kb4"; led-cur = /bits/ 8 <50>; max-cur = /bits/ 8 <100>; + color = ; + function = LED_FUNCTION_KBD_BACKLIGHT; }; - chan4 { + led@4 { + reg = <4>; chan-name = "lp5523:b"; led-cur = /bits/ 8 <50>; max-cur = /bits/ 8 <100>; + color = ; + function = LED_FUNCTION_STATUS; }; - chan5 { + led@5 { + reg = <5>; chan-name = "lp5523:g"; led-cur = /bits/ 8 <50>; max-cur = /bits/ 8 <100>; + color = ; + function = LED_FUNCTION_STATUS; }; - chan6 { + led@6 { + reg = <6>; chan-name = "lp5523:r"; led-cur = /bits/ 8 <50>; max-cur = /bits/ 8 <100>; + color = ; + function = LED_FUNCTION_STATUS; }; - chan7 { + led@7 { + reg = <7>; chan-name = "lp5523:kb5"; led-cur = /bits/ 8 <50>; max-cur = /bits/ 8 <100>; + color = ; + function = LED_FUNCTION_KBD_BACKLIGHT; }; - chan8 { + led@8 { + reg = <8>; chan-name = "lp5523:kb6"; led-cur = /bits/ 8 <50>; max-cur = /bits/ 8 <100>; + color = ; + function = LED_FUNCTION_KBD_BACKLIGHT; }; }; diff --git a/arch/arm/boot/dts/omap3-overo-tobiduo-common.dtsi b/arch/arm/boot/dts/omap3-overo-tobiduo-common.dtsi index e5da3bc6f1050e79e2573deb187b3ead25218a06..218a10c0d8159ed393b0e8f6122af96da5effaa5 100644 --- a/arch/arm/boot/dts/omap3-overo-tobiduo-common.dtsi +++ b/arch/arm/boot/dts/omap3-overo-tobiduo-common.dtsi @@ -22,7 +22,7 @@ compatible = "smsc,lan9221","smsc,lan9115"; bank-width = <2>; - gpmc,mux-add-data; + gpmc,mux-add-data = <0>; gpmc,cs-on-ns = <0>; gpmc,cs-rd-off-ns = <42>; gpmc,cs-wr-off-ns = <36>; diff --git a/arch/arm/boot/dts/omap3430-sdp.dts b/arch/arm/boot/dts/omap3430-sdp.dts index c5b903718414992c105b2895b24e9c44476ab9fc..7d530ae3483b806e2c125facb086eabab6fff3a7 100644 --- a/arch/arm/boot/dts/omap3430-sdp.dts +++ b/arch/arm/boot/dts/omap3430-sdp.dts @@ -101,7 +101,7 @@ nand@1,0 { compatible = "ti,omap2-nand"; - reg = <0 0 4>; /* CS0, offset 0, IO size 4 */ + reg = <1 0 4>; /* CS1, offset 0, IO size 4 */ interrupt-parent = <&gpmc>; interrupts = <0 IRQ_TYPE_NONE>, /* fifoevent */ <1 IRQ_TYPE_NONE>; /* termcount */ diff --git a/arch/arm/boot/dts/qcom-apq8064.dtsi b/arch/arm/boot/dts/qcom-apq8064.dtsi index e36d590e83732ad057bd4d38e9f64440095e9a4a..72c4a9fc41a2013f62a64f29dc91f9ba503e30c7 100644 --- a/arch/arm/boot/dts/qcom-apq8064.dtsi +++ b/arch/arm/boot/dts/qcom-apq8064.dtsi @@ -198,7 +198,7 @@ clock-frequency = <19200000>; }; - pxo_board { + pxo_board: pxo_board { compatible = "fixed-clock"; #clock-cells = <0>; clock-frequency = <27000000>; @@ -1148,7 +1148,7 @@ }; gpu: adreno-3xx@4300000 { - compatible = "qcom,adreno-3xx"; + compatible = "qcom,adreno-320.2", "qcom,adreno"; reg = <0x04300000 0x20000>; reg-names = "kgsl_3d0_reg_memory"; interrupts = ; @@ -1163,7 +1163,6 @@ <&mmcc GFX3D_AHB_CLK>, <&mmcc GFX3D_AXI_CLK>, <&mmcc MMSS_IMEM_AHB_CLK>; - qcom,chipid = <0x03020002>; iommus = <&gfx3d 0 &gfx3d 1 @@ -1306,7 +1305,7 @@ reg-names = "dsi_pll", "dsi_phy", "dsi_phy_regulator"; clock-names = "iface_clk", "ref"; clocks = <&mmcc DSI_M_AHB_CLK>, - <&cxo_board>; + <&pxo_board>; }; diff --git a/arch/arm/boot/dts/qcom-ipq8064-rb3011.dts b/arch/arm/boot/dts/qcom-ipq8064-rb3011.dts index 282b89ce3d451b4233fd21ab49bdb62070b2607d..33545cf40f3ab1c16b962dbd049aac37b8ee0202 100644 --- a/arch/arm/boot/dts/qcom-ipq8064-rb3011.dts +++ b/arch/arm/boot/dts/qcom-ipq8064-rb3011.dts @@ -19,12 +19,12 @@ stdout-path = "serial0:115200n8"; }; - memory@0 { + memory@42000000 { reg = <0x42000000 0x3e000000>; device_type = "memory"; }; - mdio0: mdio@0 { + mdio0: mdio-0 { status = "okay"; compatible = "virtual,mdio-gpio"; gpios = <&qcom_pinmux 1 GPIO_ACTIVE_HIGH>, @@ -91,7 +91,7 @@ }; }; - mdio1: mdio@1 { + mdio1: mdio-1 { status = "okay"; compatible = "virtual,mdio-gpio"; gpios = <&qcom_pinmux 11 GPIO_ACTIVE_HIGH>, diff --git a/arch/arm/boot/dts/qcom-msm8974.dtsi b/arch/arm/boot/dts/qcom-msm8974.dtsi index 51f5f904f9eb967b317b824b2b1d1df4d5e46c19..5f7426fb4e41986a2c5d11020bc63b81035a2476 100644 --- a/arch/arm/boot/dts/qcom-msm8974.dtsi +++ b/arch/arm/boot/dts/qcom-msm8974.dtsi @@ -1528,8 +1528,8 @@ #phy-cells = <0>; qcom,dsi-phy-index = <0>; - clocks = <&mmcc MDSS_AHB_CLK>; - clock-names = "iface"; + clocks = <&mmcc MDSS_AHB_CLK>, <&xo_board>; + clock-names = "iface", "ref"; }; }; diff --git a/arch/arm/boot/dts/rk322x.dtsi b/arch/arm/boot/dts/rk322x.dtsi index 7de8b006ca13aad537e06aa5fa41a57b1f14906a..2f17bf35d7a65f5a68bcd0bc3287cc46ca090053 100644 --- a/arch/arm/boot/dts/rk322x.dtsi +++ b/arch/arm/boot/dts/rk322x.dtsi @@ -640,8 +640,8 @@ interrupts = ; assigned-clocks = <&cru SCLK_HDMI_PHY>; assigned-clock-parents = <&hdmi_phy>; - clocks = <&cru SCLK_HDMI_HDCP>, <&cru PCLK_HDMI_CTRL>, <&cru SCLK_HDMI_CEC>; - clock-names = "isfr", "iahb", "cec"; + clocks = <&cru PCLK_HDMI_CTRL>, <&cru SCLK_HDMI_HDCP>, <&cru SCLK_HDMI_CEC>; + clock-names = "iahb", "isfr", "cec"; pinctrl-names = "default"; pinctrl-0 = <&hdmii2c_xfer &hdmi_hpd &hdmi_cec>; resets = <&cru SRST_HDMI_P>; diff --git a/arch/arm/boot/dts/rk3288.dtsi b/arch/arm/boot/dts/rk3288.dtsi index 0d89ad274268baec38f05ef8d624b3f9863233fe..9051fb4a267d4f9daac2db4d034945605172bbb4 100644 --- a/arch/arm/boot/dts/rk3288.dtsi +++ b/arch/arm/boot/dts/rk3288.dtsi @@ -990,7 +990,7 @@ status = "disabled"; }; - crypto: cypto-controller@ff8a0000 { + crypto: crypto@ff8a0000 { compatible = "rockchip,rk3288-crypto"; reg = <0x0 0xff8a0000 0x0 0x4000>; interrupts = ; diff --git a/arch/arm/boot/dts/socfpga_arria10_socdk_qspi.dts b/arch/arm/boot/dts/socfpga_arria10_socdk_qspi.dts index 2b645642b9352c13d2d0797d68a2053ec35df597..2a745522404d6b065ef71bbde83e648875d95014 100644 --- a/arch/arm/boot/dts/socfpga_arria10_socdk_qspi.dts +++ b/arch/arm/boot/dts/socfpga_arria10_socdk_qspi.dts @@ -12,7 +12,7 @@ flash0: n25q00@0 { #address-cells = <1>; #size-cells = <1>; - compatible = "n25q00aa"; + compatible = "micron,mt25qu02g", "jedec,spi-nor"; reg = <0>; spi-max-frequency = <100000000>; diff --git a/arch/arm/boot/dts/socfpga_arria5_socdk.dts b/arch/arm/boot/dts/socfpga_arria5_socdk.dts index 90e676e7019f23377dd05bbf70b45f7af0d47ea5..1b02d46496a852786705a5875e5e5adc20d9540d 100644 --- a/arch/arm/boot/dts/socfpga_arria5_socdk.dts +++ b/arch/arm/boot/dts/socfpga_arria5_socdk.dts @@ -119,7 +119,7 @@ flash: flash@0 { #address-cells = <1>; #size-cells = <1>; - compatible = "n25q256a"; + compatible = "micron,n25q256a", "jedec,spi-nor"; reg = <0>; spi-max-frequency = <100000000>; diff --git a/arch/arm/boot/dts/socfpga_cyclone5_socdk.dts b/arch/arm/boot/dts/socfpga_cyclone5_socdk.dts index 6f138b2b26163ada37d0c1cb0462ed70d44a4b4e..51bb436784e241470d7aba219b38323a37357007 100644 --- a/arch/arm/boot/dts/socfpga_cyclone5_socdk.dts +++ b/arch/arm/boot/dts/socfpga_cyclone5_socdk.dts @@ -124,7 +124,7 @@ flash0: n25q00@0 { #address-cells = <1>; #size-cells = <1>; - compatible = "n25q00"; + compatible = "micron,mt25qu02g", "jedec,spi-nor"; reg = <0>; /* chip select */ spi-max-frequency = <100000000>; diff --git a/arch/arm/boot/dts/socfpga_cyclone5_sockit.dts b/arch/arm/boot/dts/socfpga_cyclone5_sockit.dts index c155ff02eb6e035a7c7e526635a80a5feda9b4d8..cae9ddd5ed38bbd57efe8371724e59aa49fb0ac2 100644 --- a/arch/arm/boot/dts/socfpga_cyclone5_sockit.dts +++ b/arch/arm/boot/dts/socfpga_cyclone5_sockit.dts @@ -169,7 +169,7 @@ flash: flash@0 { #address-cells = <1>; #size-cells = <1>; - compatible = "n25q00"; + compatible = "micron,mt25qu02g", "jedec,spi-nor"; reg = <0>; spi-max-frequency = <100000000>; diff --git a/arch/arm/boot/dts/socfpga_cyclone5_socrates.dts b/arch/arm/boot/dts/socfpga_cyclone5_socrates.dts index 8d5d3996f6f27122412d68072767d621fa11ae8f..ca18b959e6559ebdd4515d71877211022559be19 100644 --- a/arch/arm/boot/dts/socfpga_cyclone5_socrates.dts +++ b/arch/arm/boot/dts/socfpga_cyclone5_socrates.dts @@ -80,7 +80,7 @@ flash: flash@0 { #address-cells = <1>; #size-cells = <1>; - compatible = "n25q256a"; + compatible = "micron,n25q256a", "jedec,spi-nor"; reg = <0>; spi-max-frequency = <100000000>; m25p,fast-read; diff --git a/arch/arm/boot/dts/socfpga_cyclone5_sodia.dts b/arch/arm/boot/dts/socfpga_cyclone5_sodia.dts index 99a71757cdf46330418b65aeae3646668229c378..3f7aa7bf0863aa1150b64ad4a58e120a1bdf7fc1 100644 --- a/arch/arm/boot/dts/socfpga_cyclone5_sodia.dts +++ b/arch/arm/boot/dts/socfpga_cyclone5_sodia.dts @@ -116,7 +116,7 @@ flash0: n25q512a@0 { #address-cells = <1>; #size-cells = <1>; - compatible = "n25q512a"; + compatible = "micron,n25q512a", "jedec,spi-nor"; reg = <0>; spi-max-frequency = <100000000>; diff --git a/arch/arm/boot/dts/socfpga_cyclone5_vining_fpga.dts b/arch/arm/boot/dts/socfpga_cyclone5_vining_fpga.dts index a060718758b6755a154c1842f95301d1a8c93806..25874e1b9c82987e090c7229cad34a07edb3c674 100644 --- a/arch/arm/boot/dts/socfpga_cyclone5_vining_fpga.dts +++ b/arch/arm/boot/dts/socfpga_cyclone5_vining_fpga.dts @@ -224,7 +224,7 @@ n25q128@0 { #address-cells = <1>; #size-cells = <1>; - compatible = "n25q128"; + compatible = "micron,n25q128", "jedec,spi-nor"; reg = <0>; /* chip select */ spi-max-frequency = <100000000>; m25p,fast-read; @@ -241,7 +241,7 @@ n25q00@1 { #address-cells = <1>; #size-cells = <1>; - compatible = "n25q00"; + compatible = "micron,mt25qu02g", "jedec,spi-nor"; reg = <1>; /* chip select */ spi-max-frequency = <100000000>; m25p,fast-read; diff --git a/arch/arm/boot/dts/spear3xx.dtsi b/arch/arm/boot/dts/spear3xx.dtsi index f266b7b034823a1063f4107fc6c0eda9e3e29767..cc88ebe7a60ced1725da1f3939f2fa0e462ef918 100644 --- a/arch/arm/boot/dts/spear3xx.dtsi +++ b/arch/arm/boot/dts/spear3xx.dtsi @@ -47,7 +47,7 @@ }; gmac: eth@e0800000 { - compatible = "st,spear600-gmac"; + compatible = "snps,dwmac-3.40a"; reg = <0xe0800000 0x8000>; interrupts = <23 22>; interrupt-names = "macirq", "eth_wake_irq"; diff --git a/arch/arm/boot/dts/ste-ux500-samsung-skomer.dts b/arch/arm/boot/dts/ste-ux500-samsung-skomer.dts index 27722c42b61c417e4bae239a14f36f73c8ddeee5..446d93c1c78241e388904ee6004c24e2f094a63c 100644 --- a/arch/arm/boot/dts/ste-ux500-samsung-skomer.dts +++ b/arch/arm/boot/dts/ste-ux500-samsung-skomer.dts @@ -154,10 +154,6 @@ cap-sd-highspeed; cap-mmc-highspeed; /* All direction control is used */ - st,sig-dir-cmd; - st,sig-dir-dat0; - st,sig-dir-dat2; - st,sig-dir-dat31; st,sig-pin-fbclk; full-pwr-cycle; vmmc-supply = <&ab8500_ldo_aux3_reg>; @@ -262,10 +258,10 @@ }; ab8500_ldo_aux2 { - /* Supplies the Cypress TMA140 touchscreen only with 3.3V */ + /* Supplies the Cypress TMA140 touchscreen only with 3.0V */ regulator-name = "AUX2"; - regulator-min-microvolt = <3300000>; - regulator-max-microvolt = <3300000>; + regulator-min-microvolt = <3000000>; + regulator-max-microvolt = <3000000>; }; ab8500_ldo_aux3 { @@ -284,9 +280,9 @@ ab8500_ldo_aux5 { regulator-name = "AUX5"; + /* Intended for 1V8 for touchscreen but actually left unused */ regulator-min-microvolt = <1050000>; regulator-max-microvolt = <2790000>; - regulator-always-on; }; ab8500_ldo_aux6 { diff --git a/arch/arm/boot/dts/stm32f429-disco.dts b/arch/arm/boot/dts/stm32f429-disco.dts index 075ac57d0bf4af7ed5c5fea8233725963fd00a08..6435e099c632627708b6b5ad648643752db3a9e7 100644 --- a/arch/arm/boot/dts/stm32f429-disco.dts +++ b/arch/arm/boot/dts/stm32f429-disco.dts @@ -192,7 +192,7 @@ display: display@1{ /* Connect panel-ilitek-9341 to ltdc */ - compatible = "st,sf-tc240t-9370-t"; + compatible = "st,sf-tc240t-9370-t", "ilitek,ili9341"; reg = <1>; spi-3wire; spi-max-frequency = <10000000>; diff --git a/arch/arm/boot/dts/stm32mp15-pinctrl.dtsi b/arch/arm/boot/dts/stm32mp15-pinctrl.dtsi index dee4d32ab32c4423468d730b493dd3aef01443ed..ccf66adbbf62363c5567a48d9f865f47c2a17160 100644 --- a/arch/arm/boot/dts/stm32mp15-pinctrl.dtsi +++ b/arch/arm/boot/dts/stm32mp15-pinctrl.dtsi @@ -1091,7 +1091,7 @@ }; }; - sai2a_pins_c: sai2a-4 { + sai2a_pins_c: sai2a-2 { pins { pinmux = , /* SAI2_SCK_A */ , /* SAI2_SD_A */ @@ -1102,7 +1102,7 @@ }; }; - sai2a_sleep_pins_c: sai2a-5 { + sai2a_sleep_pins_c: sai2a-2 { pins { pinmux = , /* SAI2_SCK_A */ , /* SAI2_SD_A */ @@ -1147,14 +1147,14 @@ }; }; - sai2b_pins_c: sai2a-4 { + sai2b_pins_c: sai2b-2 { pins1 { pinmux = ; /* SAI2_SD_B */ bias-disable; }; }; - sai2b_sleep_pins_c: sai2a-sleep-5 { + sai2b_sleep_pins_c: sai2b-sleep-2 { pins { pinmux = ; /* SAI2_SD_B */ }; diff --git a/arch/arm/boot/dts/stm32mp151.dtsi b/arch/arm/boot/dts/stm32mp151.dtsi index b479016fef008dc9a100f6b1cd2014d91330afeb..7a0ef01de969e58787863cdb8c635e0d5dc7971e 100644 --- a/arch/arm/boot/dts/stm32mp151.dtsi +++ b/arch/arm/boot/dts/stm32mp151.dtsi @@ -811,7 +811,7 @@ #sound-dai-cells = <0>; compatible = "st,stm32-sai-sub-a"; - reg = <0x4 0x1c>; + reg = <0x4 0x20>; clocks = <&rcc SAI1_K>; clock-names = "sai_ck"; dmas = <&dmamux1 87 0x400 0x01>; @@ -821,7 +821,7 @@ sai1b: audio-controller@4400a024 { #sound-dai-cells = <0>; compatible = "st,stm32-sai-sub-b"; - reg = <0x24 0x1c>; + reg = <0x24 0x20>; clocks = <&rcc SAI1_K>; clock-names = "sai_ck"; dmas = <&dmamux1 88 0x400 0x01>; @@ -842,7 +842,7 @@ sai2a: audio-controller@4400b004 { #sound-dai-cells = <0>; compatible = "st,stm32-sai-sub-a"; - reg = <0x4 0x1c>; + reg = <0x4 0x20>; clocks = <&rcc SAI2_K>; clock-names = "sai_ck"; dmas = <&dmamux1 89 0x400 0x01>; @@ -852,7 +852,7 @@ sai2b: audio-controller@4400b024 { #sound-dai-cells = <0>; compatible = "st,stm32-sai-sub-b"; - reg = <0x24 0x1c>; + reg = <0x24 0x20>; clocks = <&rcc SAI2_K>; clock-names = "sai_ck"; dmas = <&dmamux1 90 0x400 0x01>; @@ -873,7 +873,7 @@ sai3a: audio-controller@4400c004 { #sound-dai-cells = <0>; compatible = "st,stm32-sai-sub-a"; - reg = <0x04 0x1c>; + reg = <0x04 0x20>; clocks = <&rcc SAI3_K>; clock-names = "sai_ck"; dmas = <&dmamux1 113 0x400 0x01>; @@ -883,7 +883,7 @@ sai3b: audio-controller@4400c024 { #sound-dai-cells = <0>; compatible = "st,stm32-sai-sub-b"; - reg = <0x24 0x1c>; + reg = <0x24 0x20>; clocks = <&rcc SAI3_K>; clock-names = "sai_ck"; dmas = <&dmamux1 114 0x400 0x01>; @@ -1250,7 +1250,7 @@ sai4a: audio-controller@50027004 { #sound-dai-cells = <0>; compatible = "st,stm32-sai-sub-a"; - reg = <0x04 0x1c>; + reg = <0x04 0x20>; clocks = <&rcc SAI4_K>; clock-names = "sai_ck"; dmas = <&dmamux1 99 0x400 0x01>; @@ -1260,7 +1260,7 @@ sai4b: audio-controller@50027024 { #sound-dai-cells = <0>; compatible = "st,stm32-sai-sub-b"; - reg = <0x24 0x1c>; + reg = <0x24 0x20>; clocks = <&rcc SAI4_K>; clock-names = "sai_ck"; dmas = <&dmamux1 100 0x400 0x01>; diff --git a/arch/arm/boot/dts/stm32mp15xx-dhcor-som.dtsi b/arch/arm/boot/dts/stm32mp15xx-dhcor-som.dtsi index a9eb82b2f1704783dcac55342d94d691aa4434b9..5af32140e128bad7ce49aed337ac0b83b7168915 100644 --- a/arch/arm/boot/dts/stm32mp15xx-dhcor-som.dtsi +++ b/arch/arm/boot/dts/stm32mp15xx-dhcor-som.dtsi @@ -198,7 +198,7 @@ compatible = "jedec,spi-nor"; reg = <0>; spi-rx-bus-width = <4>; - spi-max-frequency = <108000000>; + spi-max-frequency = <50000000>; #address-cells = <1>; #size-cells = <1>; }; diff --git a/arch/arm/boot/dts/sun7i-a20-olinuxino-lime2.dts b/arch/arm/boot/dts/sun7i-a20-olinuxino-lime2.dts index 9ba62774e89a152365eb71e058e1f4a38910dd42..488933b87ad5a7bb774e4acbbb0aed85c747797b 100644 --- a/arch/arm/boot/dts/sun7i-a20-olinuxino-lime2.dts +++ b/arch/arm/boot/dts/sun7i-a20-olinuxino-lime2.dts @@ -112,7 +112,7 @@ pinctrl-names = "default"; pinctrl-0 = <&gmac_rgmii_pins>; phy-handle = <&phy1>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; status = "okay"; }; diff --git a/arch/arm/boot/dts/sun8i-a33.dtsi b/arch/arm/boot/dts/sun8i-a33.dtsi index c458f5fb124fb248b385a99589febaa9b1c3d352..46f4242e9f95d5718df40842a390c051788b19fc 100644 --- a/arch/arm/boot/dts/sun8i-a33.dtsi +++ b/arch/arm/boot/dts/sun8i-a33.dtsi @@ -46,7 +46,7 @@ #include / { - cpu0_opp_table: opp_table0 { + cpu0_opp_table: opp-table-cpu { compatible = "operating-points-v2"; opp-shared; @@ -164,7 +164,7 @@ io-channels = <&ths>; }; - mali_opp_table: gpu-opp-table { + mali_opp_table: opp-table-gpu { compatible = "operating-points-v2"; opp-144000000 { diff --git a/arch/arm/boot/dts/sun8i-a83t.dtsi b/arch/arm/boot/dts/sun8i-a83t.dtsi index c010b27fdb6a662a0d912d7100afd1f693f4d2b4..a746e449b0baec6b689e0fa09085e40855de842b 100644 --- a/arch/arm/boot/dts/sun8i-a83t.dtsi +++ b/arch/arm/boot/dts/sun8i-a83t.dtsi @@ -200,7 +200,7 @@ status = "disabled"; }; - cpu0_opp_table: opp_table0 { + cpu0_opp_table: opp-table-cluster0 { compatible = "operating-points-v2"; opp-shared; @@ -253,7 +253,7 @@ }; }; - cpu1_opp_table: opp_table1 { + cpu1_opp_table: opp-table-cluster1 { compatible = "operating-points-v2"; opp-shared; diff --git a/arch/arm/boot/dts/sun8i-h3.dtsi b/arch/arm/boot/dts/sun8i-h3.dtsi index 4e89701df91f8f94d59e366612842835cb52b6e2..ae4f933abb895a4562ed7588981613131c1affbf 100644 --- a/arch/arm/boot/dts/sun8i-h3.dtsi +++ b/arch/arm/boot/dts/sun8i-h3.dtsi @@ -44,7 +44,7 @@ #include / { - cpu0_opp_table: opp_table0 { + cpu0_opp_table: opp-table-cpu { compatible = "operating-points-v2"; opp-shared; @@ -112,7 +112,7 @@ }; }; - gpu_opp_table: gpu-opp-table { + gpu_opp_table: opp-table-gpu { compatible = "operating-points-v2"; opp-120000000 { diff --git a/arch/arm/boot/dts/tegra124-nyan-big.dts b/arch/arm/boot/dts/tegra124-nyan-big.dts index 1d2aac2cb6d038b50db7e48fbcdc2432e7564d13..fdc1d64dfff9dccbd9d3cc69a90e8c803de56dd1 100644 --- a/arch/arm/boot/dts/tegra124-nyan-big.dts +++ b/arch/arm/boot/dts/tegra124-nyan-big.dts @@ -13,12 +13,15 @@ "google,nyan-big-rev1", "google,nyan-big-rev0", "google,nyan-big", "google,nyan", "nvidia,tegra124"; - panel: panel { - compatible = "auo,b133xtn01"; - - power-supply = <&vdd_3v3_panel>; - backlight = <&backlight>; - ddc-i2c-bus = <&dpaux>; + host1x@50000000 { + dpaux@545c0000 { + aux-bus { + panel: panel { + compatible = "auo,b133xtn01"; + backlight = <&backlight>; + }; + }; + }; }; mmc@700b0400 { /* SD Card on this bus */ diff --git a/arch/arm/boot/dts/tegra124-nyan-blaze.dts b/arch/arm/boot/dts/tegra124-nyan-blaze.dts index 677babde6460ed1eb39a1e5d2db5fc42c896e1f4..abdf4456826f8f7100519e742fadb01b110e04db 100644 --- a/arch/arm/boot/dts/tegra124-nyan-blaze.dts +++ b/arch/arm/boot/dts/tegra124-nyan-blaze.dts @@ -15,12 +15,15 @@ "google,nyan-blaze-rev0", "google,nyan-blaze", "google,nyan", "nvidia,tegra124"; - panel: panel { - compatible = "samsung,ltn140at29-301"; - - power-supply = <&vdd_3v3_panel>; - backlight = <&backlight>; - ddc-i2c-bus = <&dpaux>; + host1x@50000000 { + dpaux@545c0000 { + aux-bus { + panel: panel { + compatible = "samsung,ltn140at29-301"; + backlight = <&backlight>; + }; + }; + }; }; sound { diff --git a/arch/arm/boot/dts/tegra124-venice2.dts b/arch/arm/boot/dts/tegra124-venice2.dts index e6b54ac1ebd1a4252c2386a2370795a0c9326876..84e2d24065e9ae98a635ac29d40aeb3a0a543986 100644 --- a/arch/arm/boot/dts/tegra124-venice2.dts +++ b/arch/arm/boot/dts/tegra124-venice2.dts @@ -48,6 +48,13 @@ dpaux@545c0000 { vdd-supply = <&vdd_3v3_panel>; status = "okay"; + + aux-bus { + panel: panel { + compatible = "lg,lp129qe"; + backlight = <&backlight>; + }; + }; }; }; @@ -1079,13 +1086,6 @@ }; }; - panel: panel { - compatible = "lg,lp129qe"; - power-supply = <&vdd_3v3_panel>; - backlight = <&backlight>; - ddc-i2c-bus = <&dpaux>; - }; - vdd_mux: regulator@0 { compatible = "regulator-fixed"; regulator-name = "+VDD_MUX"; diff --git a/arch/arm/boot/dts/vexpress-v2m.dtsi b/arch/arm/boot/dts/vexpress-v2m.dtsi index 2ac41ed3a57c7d6bc2c28df8d05c6850e27022b6..659dcf4004b4733956ffe32a77afdb3c15dd3ff8 100644 --- a/arch/arm/boot/dts/vexpress-v2m.dtsi +++ b/arch/arm/boot/dts/vexpress-v2m.dtsi @@ -19,7 +19,7 @@ */ / { - bus@4000000 { + bus@40000000 { motherboard { model = "V2M-P1"; arm,hbi = <0x190>; diff --git a/arch/arm/boot/dts/vexpress-v2p-ca9.dts b/arch/arm/boot/dts/vexpress-v2p-ca9.dts index 4c58479558562a5fa347bbe46bbb6fee280e8f03..1317f0f58d53dbd4a93f78104bc968dea529dd22 100644 --- a/arch/arm/boot/dts/vexpress-v2p-ca9.dts +++ b/arch/arm/boot/dts/vexpress-v2p-ca9.dts @@ -295,7 +295,7 @@ }; }; - smb: bus@4000000 { + smb: bus@40000000 { compatible = "simple-bus"; #address-cells = <2>; diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index 761e88c739558be26a70be09a2ae2cec5792d044..8ff20f9b06bad4f1834307020bf58f3653f1cf47 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -107,6 +107,16 @@ .endm #endif +#if __LINUX_ARM_ARCH__ < 7 + .macro dsb, args + mcr p15, 0, r0, c7, c10, 4 + .endm + + .macro isb, args + mcr p15, 0, r0, c7, c5, 4 + .endm +#endif + .macro asm_trace_hardirqs_off, save=1 #if defined(CONFIG_TRACE_IRQFLAGS) .if \save diff --git a/arch/arm/include/asm/hypervisor.h b/arch/arm/include/asm/hypervisor.h index bd61502b97153cfb83b12ccb9e2d5c42e8df6572..8133c8c81a3527f4455212b1829ca51fe1580761 100644 --- a/arch/arm/include/asm/hypervisor.h +++ b/arch/arm/include/asm/hypervisor.h @@ -6,5 +6,6 @@ void kvm_init_hyp_services(void); bool kvm_arm_hyp_service_available(u32 func_id); +void kvm_arm_init_hyp_services(void); #endif diff --git a/arch/arm/include/asm/spectre.h b/arch/arm/include/asm/spectre.h new file mode 100644 index 0000000000000000000000000000000000000000..85f9e538fb325730613f78419f52826a68b8d76c --- /dev/null +++ b/arch/arm/include/asm/spectre.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __ASM_SPECTRE_H +#define __ASM_SPECTRE_H + +enum { + SPECTRE_UNAFFECTED, + SPECTRE_MITIGATED, + SPECTRE_VULNERABLE, +}; + +enum { + __SPECTRE_V2_METHOD_BPIALL, + __SPECTRE_V2_METHOD_ICIALLU, + __SPECTRE_V2_METHOD_SMC, + __SPECTRE_V2_METHOD_HVC, + __SPECTRE_V2_METHOD_LOOP8, +}; + +enum { + SPECTRE_V2_METHOD_BPIALL = BIT(__SPECTRE_V2_METHOD_BPIALL), + SPECTRE_V2_METHOD_ICIALLU = BIT(__SPECTRE_V2_METHOD_ICIALLU), + SPECTRE_V2_METHOD_SMC = BIT(__SPECTRE_V2_METHOD_SMC), + SPECTRE_V2_METHOD_HVC = BIT(__SPECTRE_V2_METHOD_HVC), + SPECTRE_V2_METHOD_LOOP8 = BIT(__SPECTRE_V2_METHOD_LOOP8), +}; + +#ifdef CONFIG_GENERIC_CPU_VULNERABILITIES +void spectre_v2_update_state(unsigned int state, unsigned int methods); +#else +static inline void spectre_v2_update_state(unsigned int state, + unsigned int methods) +{} +#endif + +int spectre_bhb_update_vectors(unsigned int method); + +#endif diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h index a13d90206472263f2a782f213094149f387c92be..d9db752c51fe2b0c51c7bc0f4016e08a450061bb 100644 --- a/arch/arm/include/asm/uaccess.h +++ b/arch/arm/include/asm/uaccess.h @@ -200,6 +200,7 @@ extern int __get_user_64t_4(void *); register unsigned long __l asm("r1") = __limit; \ register int __e asm("r0"); \ unsigned int __ua_flags = uaccess_save_and_enable(); \ + int __tmp_e; \ switch (sizeof(*(__p))) { \ case 1: \ if (sizeof((x)) >= 8) \ @@ -227,9 +228,10 @@ extern int __get_user_64t_4(void *); break; \ default: __e = __get_user_bad(); break; \ } \ + __tmp_e = __e; \ uaccess_restore(__ua_flags); \ x = (typeof(*(p))) __r2; \ - __e; \ + __tmp_e; \ }) #define get_user(x, p) \ diff --git a/arch/arm/include/asm/vmlinux.lds.h b/arch/arm/include/asm/vmlinux.lds.h index 4a91428c324dbd2c68f30dbfb237d832c5dbeb22..fad45c884e98890b4958ec9f71d377d14b0d4173 100644 --- a/arch/arm/include/asm/vmlinux.lds.h +++ b/arch/arm/include/asm/vmlinux.lds.h @@ -26,6 +26,19 @@ #define ARM_MMU_DISCARD(x) x #endif +/* + * ld.lld does not support NOCROSSREFS: + * https://github.com/ClangBuiltLinux/linux/issues/1609 + */ +#ifdef CONFIG_LD_IS_LLD +#define NOCROSSREFS +#endif + +/* Set start/end symbol names to the LMA for the section */ +#define ARM_LMA(sym, section) \ + sym##_start = LOADADDR(section); \ + sym##_end = LOADADDR(section) + SIZEOF(section) + #define PROC_INFO \ . = ALIGN(4); \ __proc_info_begin = .; \ @@ -110,19 +123,31 @@ * only thing that matters is their relative offsets */ #define ARM_VECTORS \ - __vectors_start = .; \ - .vectors 0xffff0000 : AT(__vectors_start) { \ - *(.vectors) \ + __vectors_lma = .; \ + OVERLAY 0xffff0000 : NOCROSSREFS AT(__vectors_lma) { \ + .vectors { \ + *(.vectors) \ + } \ + .vectors.bhb.loop8 { \ + *(.vectors.bhb.loop8) \ + } \ + .vectors.bhb.bpiall { \ + *(.vectors.bhb.bpiall) \ + } \ } \ - . = __vectors_start + SIZEOF(.vectors); \ - __vectors_end = .; \ + ARM_LMA(__vectors, .vectors); \ + ARM_LMA(__vectors_bhb_loop8, .vectors.bhb.loop8); \ + ARM_LMA(__vectors_bhb_bpiall, .vectors.bhb.bpiall); \ + . = __vectors_lma + SIZEOF(.vectors) + \ + SIZEOF(.vectors.bhb.loop8) + \ + SIZEOF(.vectors.bhb.bpiall); \ \ - __stubs_start = .; \ - .stubs ADDR(.vectors) + 0x1000 : AT(__stubs_start) { \ + __stubs_lma = .; \ + .stubs ADDR(.vectors) + 0x1000 : AT(__stubs_lma) { \ *(.stubs) \ } \ - . = __stubs_start + SIZEOF(.stubs); \ - __stubs_end = .; \ + ARM_LMA(__stubs, .stubs); \ + . = __stubs_lma + SIZEOF(.stubs); \ \ PROVIDE(vector_fiq_offset = vector_fiq - ADDR(.vectors)); diff --git a/arch/arm/include/debug/imx-uart.h b/arch/arm/include/debug/imx-uart.h index c8eb83d4b8964b1a5e0d1a01ec8b486154bab456..3edbb3c5b42bfcfc81bf60db3d7baa55ae74af6e 100644 --- a/arch/arm/include/debug/imx-uart.h +++ b/arch/arm/include/debug/imx-uart.h @@ -11,13 +11,6 @@ #define IMX1_UART_BASE_ADDR(n) IMX1_UART##n##_BASE_ADDR #define IMX1_UART_BASE(n) IMX1_UART_BASE_ADDR(n) -#define IMX21_UART1_BASE_ADDR 0x1000a000 -#define IMX21_UART2_BASE_ADDR 0x1000b000 -#define IMX21_UART3_BASE_ADDR 0x1000c000 -#define IMX21_UART4_BASE_ADDR 0x1000d000 -#define IMX21_UART_BASE_ADDR(n) IMX21_UART##n##_BASE_ADDR -#define IMX21_UART_BASE(n) IMX21_UART_BASE_ADDR(n) - #define IMX25_UART1_BASE_ADDR 0x43f90000 #define IMX25_UART2_BASE_ADDR 0x43f94000 #define IMX25_UART3_BASE_ADDR 0x5000c000 @@ -26,6 +19,13 @@ #define IMX25_UART_BASE_ADDR(n) IMX25_UART##n##_BASE_ADDR #define IMX25_UART_BASE(n) IMX25_UART_BASE_ADDR(n) +#define IMX27_UART1_BASE_ADDR 0x1000a000 +#define IMX27_UART2_BASE_ADDR 0x1000b000 +#define IMX27_UART3_BASE_ADDR 0x1000c000 +#define IMX27_UART4_BASE_ADDR 0x1000d000 +#define IMX27_UART_BASE_ADDR(n) IMX27_UART##n##_BASE_ADDR +#define IMX27_UART_BASE(n) IMX27_UART_BASE_ADDR(n) + #define IMX31_UART1_BASE_ADDR 0x43f90000 #define IMX31_UART2_BASE_ADDR 0x43f94000 #define IMX31_UART3_BASE_ADDR 0x5000c000 @@ -112,10 +112,10 @@ #ifdef CONFIG_DEBUG_IMX1_UART #define UART_PADDR IMX_DEBUG_UART_BASE(IMX1) -#elif defined(CONFIG_DEBUG_IMX21_IMX27_UART) -#define UART_PADDR IMX_DEBUG_UART_BASE(IMX21) #elif defined(CONFIG_DEBUG_IMX25_UART) #define UART_PADDR IMX_DEBUG_UART_BASE(IMX25) +#elif defined(CONFIG_DEBUG_IMX27_UART) +#define UART_PADDR IMX_DEBUG_UART_BASE(IMX27) #elif defined(CONFIG_DEBUG_IMX31_UART) #define UART_PADDR IMX_DEBUG_UART_BASE(IMX31) #elif defined(CONFIG_DEBUG_IMX35_UART) diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 89e5d864e923461c40bf80f794d6f91e904bb9ab..79588b5623532376908e09fbb4a7f3702f490f7f 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -106,4 +106,6 @@ endif obj-$(CONFIG_HAVE_ARM_SMCCC) += smccc-call.o +obj-$(CONFIG_GENERIC_CPU_VULNERABILITIES) += spectre.o + extra-y := $(head-y) vmlinux.lds diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 1c9e6d1452c5b24401b5571f5e224f6e0ca346b9..c3ebe3584103b3bb96c295578d185737906f2b50 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -596,11 +596,9 @@ call_fpe: tstne r0, #0x04000000 @ bit 26 set on both ARM and Thumb-2 reteq lr and r8, r0, #0x00000f00 @ mask out CP number - THUMB( lsr r8, r8, #8 ) mov r7, #1 - add r6, r10, #TI_USED_CP - ARM( strb r7, [r6, r8, lsr #8] ) @ set appropriate used_cp[] - THUMB( strb r7, [r6, r8] ) @ set appropriate used_cp[] + add r6, r10, r8, lsr #8 @ add used_cp[] array offset first + strb r7, [r6, #TI_USED_CP] @ set appropriate used_cp[] #ifdef CONFIG_IWMMXT @ Test if we need to give access to iWMMXt coprocessors ldr r5, [r10, #TI_FLAGS] @@ -609,7 +607,7 @@ call_fpe: bcs iwmmxt_task_enable #endif ARM( add pc, pc, r8, lsr #6 ) - THUMB( lsl r8, r8, #2 ) + THUMB( lsr r8, r8, #6 ) THUMB( add pc, r8 ) nop @@ -1007,12 +1005,11 @@ vector_\name: sub lr, lr, #\correction .endif - @ - @ Save r0, lr_ (parent PC) and spsr_ - @ (parent CPSR) - @ + @ Save r0, lr_ (parent PC) stmia sp, {r0, lr} @ save r0, lr - mrs lr, spsr + + @ Save spsr_ (parent CPSR) +2: mrs lr, spsr str lr, [sp, #8] @ save spsr @ @@ -1033,6 +1030,44 @@ vector_\name: movs pc, lr @ branch to handler in SVC mode ENDPROC(vector_\name) +#ifdef CONFIG_HARDEN_BRANCH_HISTORY + .subsection 1 + .align 5 +vector_bhb_loop8_\name: + .if \correction + sub lr, lr, #\correction + .endif + + @ Save r0, lr_ (parent PC) + stmia sp, {r0, lr} + + @ bhb workaround + mov r0, #8 +3: b . + 4 + subs r0, r0, #1 + bne 3b + dsb + isb + b 2b +ENDPROC(vector_bhb_loop8_\name) + +vector_bhb_bpiall_\name: + .if \correction + sub lr, lr, #\correction + .endif + + @ Save r0, lr_ (parent PC) + stmia sp, {r0, lr} + + @ bhb workaround + mcr p15, 0, r0, c7, c5, 6 @ BPIALL + @ isb not needed due to "movs pc, lr" in the vector stub + @ which gives a "context synchronisation". + b 2b +ENDPROC(vector_bhb_bpiall_\name) + .previous +#endif + .align 2 @ handler addresses follow this label 1: @@ -1041,6 +1076,10 @@ ENDPROC(vector_\name) .section .stubs, "ax", %progbits @ This must be the first word .word vector_swi +#ifdef CONFIG_HARDEN_BRANCH_HISTORY + .word vector_bhb_loop8_swi + .word vector_bhb_bpiall_swi +#endif vector_rst: ARM( swi SYS_ERROR0 ) @@ -1155,8 +1194,10 @@ vector_addrexcptn: * FIQ "NMI" handler *----------------------------------------------------------------------------- * Handle a FIQ using the SVC stack allowing FIQ act like NMI on x86 - * systems. + * systems. This must be the last vector stub, so lets place it in its own + * subsection. */ + .subsection 2 vector_stub fiq, FIQ_MODE, 4 .long __fiq_usr @ 0 (USR_26 / USR_32) @@ -1189,6 +1230,30 @@ vector_addrexcptn: W(b) vector_irq W(b) vector_fiq +#ifdef CONFIG_HARDEN_BRANCH_HISTORY + .section .vectors.bhb.loop8, "ax", %progbits +.L__vectors_bhb_loop8_start: + W(b) vector_rst + W(b) vector_bhb_loop8_und + W(ldr) pc, .L__vectors_bhb_loop8_start + 0x1004 + W(b) vector_bhb_loop8_pabt + W(b) vector_bhb_loop8_dabt + W(b) vector_addrexcptn + W(b) vector_bhb_loop8_irq + W(b) vector_bhb_loop8_fiq + + .section .vectors.bhb.bpiall, "ax", %progbits +.L__vectors_bhb_bpiall_start: + W(b) vector_rst + W(b) vector_bhb_bpiall_und + W(ldr) pc, .L__vectors_bhb_bpiall_start + 0x1008 + W(b) vector_bhb_bpiall_pabt + W(b) vector_bhb_bpiall_dabt + W(b) vector_addrexcptn + W(b) vector_bhb_bpiall_irq + W(b) vector_bhb_bpiall_fiq +#endif + .data .align 2 diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 271cb8a1eba1eefe4469db9ae5a677385fa8c2cc..bd619da73c84ec6d6efe895df9abbe4ea908faeb 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -162,6 +162,29 @@ ENDPROC(ret_from_fork) *----------------------------------------------------------------------------- */ + .align 5 +#ifdef CONFIG_HARDEN_BRANCH_HISTORY +ENTRY(vector_bhb_loop8_swi) + sub sp, sp, #PT_REGS_SIZE + stmia sp, {r0 - r12} + mov r8, #8 +1: b 2f +2: subs r8, r8, #1 + bne 1b + dsb + isb + b 3f +ENDPROC(vector_bhb_loop8_swi) + + .align 5 +ENTRY(vector_bhb_bpiall_swi) + sub sp, sp, #PT_REGS_SIZE + stmia sp, {r0 - r12} + mcr p15, 0, r8, c7, c5, 6 @ BPIALL + isb + b 3f +ENDPROC(vector_bhb_bpiall_swi) +#endif .align 5 ENTRY(vector_swi) #ifdef CONFIG_CPU_V7M @@ -169,6 +192,7 @@ ENTRY(vector_swi) #else sub sp, sp, #PT_REGS_SIZE stmia sp, {r0 - r12} @ Calling r0 - r12 +3: ARM( add r8, sp, #S_PC ) ARM( stmdb r8, {sp, lr}^ ) @ Calling sp, lr THUMB( mov r8, sp ) diff --git a/arch/arm/kernel/kgdb.c b/arch/arm/kernel/kgdb.c index 7bd30c0a4280d9a6e029c05c49e8fb2d9b373b00..22f937e6f3ffb12a7e854179b73ea2a77c0eb06b 100644 --- a/arch/arm/kernel/kgdb.c +++ b/arch/arm/kernel/kgdb.c @@ -154,22 +154,38 @@ static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned int instr) return 0; } -static struct undef_hook kgdb_brkpt_hook = { +static struct undef_hook kgdb_brkpt_arm_hook = { .instr_mask = 0xffffffff, .instr_val = KGDB_BREAKINST, - .cpsr_mask = MODE_MASK, + .cpsr_mask = PSR_T_BIT | MODE_MASK, .cpsr_val = SVC_MODE, .fn = kgdb_brk_fn }; -static struct undef_hook kgdb_compiled_brkpt_hook = { +static struct undef_hook kgdb_brkpt_thumb_hook = { + .instr_mask = 0xffff, + .instr_val = KGDB_BREAKINST & 0xffff, + .cpsr_mask = PSR_T_BIT | MODE_MASK, + .cpsr_val = PSR_T_BIT | SVC_MODE, + .fn = kgdb_brk_fn +}; + +static struct undef_hook kgdb_compiled_brkpt_arm_hook = { .instr_mask = 0xffffffff, .instr_val = KGDB_COMPILED_BREAK, - .cpsr_mask = MODE_MASK, + .cpsr_mask = PSR_T_BIT | MODE_MASK, .cpsr_val = SVC_MODE, .fn = kgdb_compiled_brk_fn }; +static struct undef_hook kgdb_compiled_brkpt_thumb_hook = { + .instr_mask = 0xffff, + .instr_val = KGDB_COMPILED_BREAK & 0xffff, + .cpsr_mask = PSR_T_BIT | MODE_MASK, + .cpsr_val = PSR_T_BIT | SVC_MODE, + .fn = kgdb_compiled_brk_fn +}; + static int __kgdb_notify(struct die_args *args, unsigned long cmd) { struct pt_regs *regs = args->regs; @@ -210,8 +226,10 @@ int kgdb_arch_init(void) if (ret != 0) return ret; - register_undef_hook(&kgdb_brkpt_hook); - register_undef_hook(&kgdb_compiled_brkpt_hook); + register_undef_hook(&kgdb_brkpt_arm_hook); + register_undef_hook(&kgdb_brkpt_thumb_hook); + register_undef_hook(&kgdb_compiled_brkpt_arm_hook); + register_undef_hook(&kgdb_compiled_brkpt_thumb_hook); return 0; } @@ -224,8 +242,10 @@ int kgdb_arch_init(void) */ void kgdb_arch_exit(void) { - unregister_undef_hook(&kgdb_brkpt_hook); - unregister_undef_hook(&kgdb_compiled_brkpt_hook); + unregister_undef_hook(&kgdb_brkpt_arm_hook); + unregister_undef_hook(&kgdb_brkpt_thumb_hook); + unregister_undef_hook(&kgdb_compiled_brkpt_arm_hook); + unregister_undef_hook(&kgdb_compiled_brkpt_thumb_hook); unregister_die_notifier(&kgdb_notifier); } diff --git a/arch/arm/kernel/perf_callchain.c b/arch/arm/kernel/perf_callchain.c index 3b69a76d341e784075a1f8ef053f0d308177feee..1626dfc6f6ce61c6aa7b94973190a86e6887f74a 100644 --- a/arch/arm/kernel/perf_callchain.c +++ b/arch/arm/kernel/perf_callchain.c @@ -62,9 +62,10 @@ user_backtrace(struct frame_tail __user *tail, void perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { + struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs(); struct frame_tail __user *tail; - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + if (guest_cbs && guest_cbs->is_in_guest()) { /* We don't support guest os callchain now */ return; } @@ -98,9 +99,10 @@ callchain_trace(struct stackframe *fr, void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { + struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs(); struct stackframe fr; - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + if (guest_cbs && guest_cbs->is_in_guest()) { /* We don't support guest os callchain now */ return; } @@ -111,18 +113,21 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re unsigned long perf_instruction_pointer(struct pt_regs *regs) { - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) - return perf_guest_cbs->get_guest_ip(); + struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs(); + + if (guest_cbs && guest_cbs->is_in_guest()) + return guest_cbs->get_guest_ip(); return instruction_pointer(regs); } unsigned long perf_misc_flags(struct pt_regs *regs) { + struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs(); int misc = 0; - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { - if (perf_guest_cbs->is_user_mode()) + if (guest_cbs && guest_cbs->is_in_guest()) { + if (guest_cbs->is_user_mode()) misc |= PERF_RECORD_MISC_GUEST_USER; else misc |= PERF_RECORD_MISC_GUEST_KERNEL; diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index ba27e184402a76be90af84442108458ae88a60ac..123432bb500ed4353e27c4b6c43e87e1efb34c53 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -436,7 +436,6 @@ asmlinkage void secondary_start_kernel(void) #endif pr_debug("CPU%u: Booted secondary processor\n", cpu); - preempt_disable(); trace_hardirqs_off(); /* diff --git a/arch/arm/kernel/spectre.c b/arch/arm/kernel/spectre.c new file mode 100644 index 0000000000000000000000000000000000000000..0dcefc36fb7a08af113ef923af1fe9c826da662d --- /dev/null +++ b/arch/arm/kernel/spectre.c @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include +#include +#include + +#include + +static bool _unprivileged_ebpf_enabled(void) +{ +#ifdef CONFIG_BPF_SYSCALL + return !sysctl_unprivileged_bpf_disabled; +#else + return false; +#endif +} + +ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, + char *buf) +{ + return sprintf(buf, "Mitigation: __user pointer sanitization\n"); +} + +static unsigned int spectre_v2_state; +static unsigned int spectre_v2_methods; + +void spectre_v2_update_state(unsigned int state, unsigned int method) +{ + if (state > spectre_v2_state) + spectre_v2_state = state; + spectre_v2_methods |= method; +} + +ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, + char *buf) +{ + const char *method; + + if (spectre_v2_state == SPECTRE_UNAFFECTED) + return sprintf(buf, "%s\n", "Not affected"); + + if (spectre_v2_state != SPECTRE_MITIGATED) + return sprintf(buf, "%s\n", "Vulnerable"); + + if (_unprivileged_ebpf_enabled()) + return sprintf(buf, "Vulnerable: Unprivileged eBPF enabled\n"); + + switch (spectre_v2_methods) { + case SPECTRE_V2_METHOD_BPIALL: + method = "Branch predictor hardening"; + break; + + case SPECTRE_V2_METHOD_ICIALLU: + method = "I-cache invalidation"; + break; + + case SPECTRE_V2_METHOD_SMC: + case SPECTRE_V2_METHOD_HVC: + method = "Firmware call"; + break; + + case SPECTRE_V2_METHOD_LOOP8: + method = "History overwrite"; + break; + + default: + method = "Multiple mitigations"; + break; + } + + return sprintf(buf, "Mitigation: %s\n", method); +} diff --git a/arch/arm/kernel/stacktrace.c b/arch/arm/kernel/stacktrace.c index 76ea4178a55cb37384c43999c59a938cdbe81bf1..db798eac743159ce288640970cdc09616868a82a 100644 --- a/arch/arm/kernel/stacktrace.c +++ b/arch/arm/kernel/stacktrace.c @@ -54,8 +54,7 @@ int notrace unwind_frame(struct stackframe *frame) frame->sp = frame->fp; frame->fp = *(unsigned long *)(fp); - frame->pc = frame->lr; - frame->lr = *(unsigned long *)(fp + 4); + frame->pc = *(unsigned long *)(fp + 4); #else /* check current frame pointer is within bounds */ if (fp < low + 12 || fp > high - 4) diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 17d5a785df28b10ee5714246d3ed342192c25e75..2d9e72ad1b0f923d8afafa49fd4e5cd5d1feadb6 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -806,10 +807,59 @@ static inline void __init kuser_init(void *vectors) } #endif +#ifndef CONFIG_CPU_V7M +static void copy_from_lma(void *vma, void *lma_start, void *lma_end) +{ + memcpy(vma, lma_start, lma_end - lma_start); +} + +static void flush_vectors(void *vma, size_t offset, size_t size) +{ + unsigned long start = (unsigned long)vma + offset; + unsigned long end = start + size; + + flush_icache_range(start, end); +} + +#ifdef CONFIG_HARDEN_BRANCH_HISTORY +int spectre_bhb_update_vectors(unsigned int method) +{ + extern char __vectors_bhb_bpiall_start[], __vectors_bhb_bpiall_end[]; + extern char __vectors_bhb_loop8_start[], __vectors_bhb_loop8_end[]; + void *vec_start, *vec_end; + + if (system_state > SYSTEM_SCHEDULING) { + pr_err("CPU%u: Spectre BHB workaround too late - system vulnerable\n", + smp_processor_id()); + return SPECTRE_VULNERABLE; + } + + switch (method) { + case SPECTRE_V2_METHOD_LOOP8: + vec_start = __vectors_bhb_loop8_start; + vec_end = __vectors_bhb_loop8_end; + break; + + case SPECTRE_V2_METHOD_BPIALL: + vec_start = __vectors_bhb_bpiall_start; + vec_end = __vectors_bhb_bpiall_end; + break; + + default: + pr_err("CPU%u: unknown Spectre BHB state %d\n", + smp_processor_id(), method); + return SPECTRE_VULNERABLE; + } + + copy_from_lma(vectors_page, vec_start, vec_end); + flush_vectors(vectors_page, 0, vec_end - vec_start); + + return SPECTRE_MITIGATED; +} +#endif + void __init early_trap_init(void *vectors_base) { -#ifndef CONFIG_CPU_V7M - unsigned long vectors = (unsigned long)vectors_base; extern char __stubs_start[], __stubs_end[]; extern char __vectors_start[], __vectors_end[]; unsigned i; @@ -830,17 +880,20 @@ void __init early_trap_init(void *vectors_base) * into the vector page, mapped at 0xffff0000, and ensure these * are visible to the instruction stream. */ - memcpy((void *)vectors, __vectors_start, __vectors_end - __vectors_start); - memcpy((void *)vectors + 0x1000, __stubs_start, __stubs_end - __stubs_start); + copy_from_lma(vectors_base, __vectors_start, __vectors_end); + copy_from_lma(vectors_base + 0x1000, __stubs_start, __stubs_end); kuser_init(vectors_base); - flush_icache_range(vectors, vectors + PAGE_SIZE * 2); + flush_vectors(vectors_base, 0, PAGE_SIZE * 2); +} #else /* ifndef CONFIG_CPU_V7M */ +void __init early_trap_init(void *vectors_base) +{ /* * on V7-M there is no need to copy the vector table to a dedicated * memory area. The address is configurable and so a table in the kernel * image can be used. */ -#endif } +#endif diff --git a/arch/arm/kernel/vmlinux-xip.lds.S b/arch/arm/kernel/vmlinux-xip.lds.S index 50136828f5b54eb88fd7c7628837f4d64bed7636..f14c2360ea0b1e4efcc67a6798e9daf7283a38c0 100644 --- a/arch/arm/kernel/vmlinux-xip.lds.S +++ b/arch/arm/kernel/vmlinux-xip.lds.S @@ -40,6 +40,10 @@ SECTIONS ARM_DISCARD *(.alt.smp.init) *(.pv_table) +#ifndef CONFIG_ARM_UNWIND + *(.ARM.exidx) *(.ARM.exidx.*) + *(.ARM.extab) *(.ARM.extab.*) +#endif } . = XIP_VIRT_ADDR(CONFIG_XIP_PHYS_ADDR); @@ -172,7 +176,7 @@ ASSERT((__arch_info_end - __arch_info_begin), "no machine record defined") ASSERT((_end - __bss_start) >= 12288, ".bss too small for CONFIG_XIP_DEFLATED_DATA") #endif -#ifdef CONFIG_ARM_MPU +#if defined(CONFIG_ARM_MPU) && !defined(CONFIG_COMPILE_TEST) /* * Due to PMSAv7 restriction on base address and size we have to * enforce minimal alignment restrictions. It was seen that weaker diff --git a/arch/arm/mach-at91/pm.c b/arch/arm/mach-at91/pm.c index 120f9aa6fff32578d439c4880d950ded86cd43a6..3f015cb6ec2b0758cc38fcdc7e6c2128b797d68a 100644 --- a/arch/arm/mach-at91/pm.c +++ b/arch/arm/mach-at91/pm.c @@ -517,18 +517,22 @@ static const struct of_device_id ramc_ids[] __initconst = { { /*sentinel*/ } }; -static __init void at91_dt_ramc(void) +static __init int at91_dt_ramc(void) { struct device_node *np; const struct of_device_id *of_id; int idx = 0; void *standby = NULL; const struct ramc_info *ramc; + int ret; for_each_matching_node_and_match(np, ramc_ids, &of_id) { soc_pm.data.ramc[idx] = of_iomap(np, 0); - if (!soc_pm.data.ramc[idx]) - panic(pr_fmt("unable to map ramc[%d] cpu registers\n"), idx); + if (!soc_pm.data.ramc[idx]) { + pr_err("unable to map ramc[%d] cpu registers\n", idx); + ret = -ENOMEM; + goto unmap_ramc; + } ramc = of_id->data; if (!standby) @@ -538,15 +542,26 @@ static __init void at91_dt_ramc(void) idx++; } - if (!idx) - panic(pr_fmt("unable to find compatible ram controller node in dtb\n")); + if (!idx) { + pr_err("unable to find compatible ram controller node in dtb\n"); + ret = -ENODEV; + goto unmap_ramc; + } if (!standby) { pr_warn("ramc no standby function available\n"); - return; + return 0; } at91_cpuidle_device.dev.platform_data = standby; + + return 0; + +unmap_ramc: + while (idx) + iounmap(soc_pm.data.ramc[--idx]); + + return ret; } static void at91rm9200_idle(void) @@ -869,6 +884,8 @@ static void __init at91_pm_init(void (*pm_idle)(void)) void __init at91rm9200_pm_init(void) { + int ret; + if (!IS_ENABLED(CONFIG_SOC_AT91RM9200)) return; @@ -880,7 +897,9 @@ void __init at91rm9200_pm_init(void) soc_pm.data.standby_mode = AT91_PM_STANDBY; soc_pm.data.suspend_mode = AT91_PM_ULP0; - at91_dt_ramc(); + ret = at91_dt_ramc(); + if (ret) + return; /* * AT91RM9200 SDRAM low-power mode cannot be used with self-refresh. @@ -895,13 +914,17 @@ void __init sam9x60_pm_init(void) static const int modes[] __initconst = { AT91_PM_STANDBY, AT91_PM_ULP0, AT91_PM_ULP0_FAST, AT91_PM_ULP1, }; + int ret; if (!IS_ENABLED(CONFIG_SOC_SAM9X60)) return; at91_pm_modes_validate(modes, ARRAY_SIZE(modes)); at91_pm_modes_init(); - at91_dt_ramc(); + ret = at91_dt_ramc(); + if (ret) + return; + at91_pm_init(NULL); soc_pm.ws_ids = sam9x60_ws_ids; @@ -910,6 +933,8 @@ void __init sam9x60_pm_init(void) void __init at91sam9_pm_init(void) { + int ret; + if (!IS_ENABLED(CONFIG_SOC_AT91SAM9)) return; @@ -921,7 +946,10 @@ void __init at91sam9_pm_init(void) soc_pm.data.standby_mode = AT91_PM_STANDBY; soc_pm.data.suspend_mode = AT91_PM_ULP0; - at91_dt_ramc(); + ret = at91_dt_ramc(); + if (ret) + return; + at91_pm_init(at91sam9_idle); } @@ -930,12 +958,16 @@ void __init sama5_pm_init(void) static const int modes[] __initconst = { AT91_PM_STANDBY, AT91_PM_ULP0, AT91_PM_ULP0_FAST, }; + int ret; if (!IS_ENABLED(CONFIG_SOC_SAMA5)) return; at91_pm_modes_validate(modes, ARRAY_SIZE(modes)); - at91_dt_ramc(); + ret = at91_dt_ramc(); + if (ret) + return; + at91_pm_init(NULL); } @@ -945,13 +977,17 @@ void __init sama5d2_pm_init(void) AT91_PM_STANDBY, AT91_PM_ULP0, AT91_PM_ULP0_FAST, AT91_PM_ULP1, AT91_PM_BACKUP, }; + int ret; if (!IS_ENABLED(CONFIG_SOC_SAMA5D2)) return; at91_pm_modes_validate(modes, ARRAY_SIZE(modes)); at91_pm_modes_init(); - at91_dt_ramc(); + ret = at91_dt_ramc(); + if (ret) + return; + at91_pm_init(NULL); soc_pm.ws_ids = sama5d2_ws_ids; diff --git a/arch/arm/mach-imx/pm-imx6.c b/arch/arm/mach-imx/pm-imx6.c index 40c74b4c4d730151ea8a805344961acc90470dcb..e24409c1f5d394842228a158c300f9c3a63b7047 100644 --- a/arch/arm/mach-imx/pm-imx6.c +++ b/arch/arm/mach-imx/pm-imx6.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -618,6 +619,7 @@ static void __init imx6_pm_common_init(const struct imx6_pm_socdata static void imx6_pm_stby_poweroff(void) { + gic_cpu_if_down(0); imx6_set_lpm(STOP_POWER_OFF); imx6q_suspend_finish(0); diff --git a/arch/arm/mach-omap2/display.c b/arch/arm/mach-omap2/display.c index 2000fca6bd4e6ec585930729381e552e8f13c251..6098666e928d0678e8fa2cba26d51e039515ea2e 100644 --- a/arch/arm/mach-omap2/display.c +++ b/arch/arm/mach-omap2/display.c @@ -263,9 +263,9 @@ static int __init omapdss_init_of(void) } r = of_platform_populate(node, NULL, NULL, &pdev->dev); + put_device(&pdev->dev); if (r) { pr_err("Unable to populate DSS submodule devices\n"); - put_device(&pdev->dev); return r; } diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c index 83d595ebcf1f6112ef90f3a6f2d0df826844184c..1fd67abca055b26dd489fc736769ffdb946dcf50 100644 --- a/arch/arm/mach-omap2/omap_hwmod.c +++ b/arch/arm/mach-omap2/omap_hwmod.c @@ -749,8 +749,10 @@ static int __init _init_clkctrl_providers(void) for_each_matching_node(np, ti_clkctrl_match_table) { ret = _setup_clkctrl_provider(np); - if (ret) + if (ret) { + of_node_put(np); break; + } } return ret; @@ -3618,6 +3620,8 @@ int omap_hwmod_init_module(struct device *dev, oh->flags |= HWMOD_SWSUP_SIDLE_ACT; if (data->cfg->quirks & SYSC_QUIRK_SWSUP_MSTANDBY) oh->flags |= HWMOD_SWSUP_MSTANDBY; + if (data->cfg->quirks & SYSC_QUIRK_CLKDM_NOAUTO) + oh->flags |= HWMOD_CLKDM_NOAUTO; error = omap_hwmod_check_module(dev, oh, data, sysc_fields, rev_offs, sysc_offs, syss_offs, diff --git a/arch/arm/mach-s3c/irq-s3c24xx.c b/arch/arm/mach-s3c/irq-s3c24xx.c index 79b5f19af7a52a20b9c33b236522e045a7d2961f..19fb9bdf446b4a84db4a2978708e59677ca840ab 100644 --- a/arch/arm/mach-s3c/irq-s3c24xx.c +++ b/arch/arm/mach-s3c/irq-s3c24xx.c @@ -360,11 +360,25 @@ static inline int s3c24xx_handle_intc(struct s3c_irq_intc *intc, asmlinkage void __exception_irq_entry s3c24xx_handle_irq(struct pt_regs *regs) { do { - if (likely(s3c_intc[0])) - if (s3c24xx_handle_intc(s3c_intc[0], regs, 0)) - continue; + /* + * For platform based machines, neither ERR nor NULL can happen here. + * The s3c24xx_handle_irq() will be set as IRQ handler iff this succeeds: + * + * s3c_intc[0] = s3c24xx_init_intc() + * + * If this fails, the next calls to s3c24xx_init_intc() won't be executed. + * + * For DT machine, s3c_init_intc_of() could set the IRQ handler without + * setting s3c_intc[0] only if it was called with num_ctrl=0. There is no + * such code path, so again the s3c_intc[0] will have a valid pointer if + * set_handle_irq() is called. + * + * Therefore in s3c24xx_handle_irq(), the s3c_intc[0] is always something. + */ + if (s3c24xx_handle_intc(s3c_intc[0], regs, 0)) + continue; - if (s3c_intc[2]) + if (!IS_ERR_OR_NULL(s3c_intc[2])) if (s3c24xx_handle_intc(s3c_intc[2], regs, 64)) continue; diff --git a/arch/arm/mach-shmobile/regulator-quirk-rcar-gen2.c b/arch/arm/mach-shmobile/regulator-quirk-rcar-gen2.c index ee949255ced3f01ad4256b2b7bea3ba669006309..09ef73b99dd86a851a98bc7c75c433821c5b5bf1 100644 --- a/arch/arm/mach-shmobile/regulator-quirk-rcar-gen2.c +++ b/arch/arm/mach-shmobile/regulator-quirk-rcar-gen2.c @@ -154,8 +154,10 @@ static int __init rcar_gen2_regulator_quirk(void) return -ENODEV; for_each_matching_node_and_match(np, rcar_gen2_quirk_match, &id) { - if (!of_device_is_available(np)) + if (!of_device_is_available(np)) { + of_node_put(np); break; + } ret = of_property_read_u32(np, "reg", &addr); if (ret) /* Skip invalid entry and continue */ @@ -164,6 +166,7 @@ static int __init rcar_gen2_regulator_quirk(void) quirk = kzalloc(sizeof(*quirk), GFP_KERNEL); if (!quirk) { ret = -ENOMEM; + of_node_put(np); goto err_mem; } diff --git a/arch/arm/mach-socfpga/Kconfig b/arch/arm/mach-socfpga/Kconfig index c3bb68d57cea2e5143a61073744050c23be44233..b62ae4dafa2eb676a11e5b6ac9c01a4833dc2966 100644 --- a/arch/arm/mach-socfpga/Kconfig +++ b/arch/arm/mach-socfpga/Kconfig @@ -2,6 +2,7 @@ menuconfig ARCH_SOCFPGA bool "Altera SOCFPGA family" depends on ARCH_MULTI_V7 + select ARCH_HAS_RESET_CONTROLLER select ARCH_SUPPORTS_BIG_ENDIAN select ARM_AMBA select ARM_GIC @@ -18,6 +19,7 @@ menuconfig ARCH_SOCFPGA select PL310_ERRATA_727915 select PL310_ERRATA_753970 if PL310 select PL310_ERRATA_769419 + select RESET_CONTROLLER if ARCH_SOCFPGA config SOCFPGA_SUSPEND diff --git a/arch/arm/mach-socfpga/core.h b/arch/arm/mach-socfpga/core.h index fc2608b18a0d048c62aed905f4f76da438f94426..18f01190dcfd423d3e8f9ac23408f26af1b729c8 100644 --- a/arch/arm/mach-socfpga/core.h +++ b/arch/arm/mach-socfpga/core.h @@ -33,7 +33,7 @@ extern void __iomem *sdr_ctl_base_addr; u32 socfpga_sdram_self_refresh(u32 sdr_base); extern unsigned int socfpga_sdram_self_refresh_sz; -extern char secondary_trampoline, secondary_trampoline_end; +extern char secondary_trampoline[], secondary_trampoline_end[]; extern unsigned long socfpga_cpu1start_addr; diff --git a/arch/arm/mach-socfpga/platsmp.c b/arch/arm/mach-socfpga/platsmp.c index fbb80b883e5dd2606ee0c8130e242e42439d8468..201191cf68f324ca970e93162d46500214b449a5 100644 --- a/arch/arm/mach-socfpga/platsmp.c +++ b/arch/arm/mach-socfpga/platsmp.c @@ -20,14 +20,14 @@ static int socfpga_boot_secondary(unsigned int cpu, struct task_struct *idle) { - int trampoline_size = &secondary_trampoline_end - &secondary_trampoline; + int trampoline_size = secondary_trampoline_end - secondary_trampoline; if (socfpga_cpu1start_addr) { /* This will put CPU #1 into reset. */ writel(RSTMGR_MPUMODRST_CPU1, rst_manager_base_addr + SOCFPGA_RSTMGR_MODMPURST); - memcpy(phys_to_virt(0), &secondary_trampoline, trampoline_size); + memcpy(phys_to_virt(0), secondary_trampoline, trampoline_size); writel(__pa_symbol(secondary_startup), sys_manager_base_addr + (socfpga_cpu1start_addr & 0x000000ff)); @@ -45,12 +45,12 @@ static int socfpga_boot_secondary(unsigned int cpu, struct task_struct *idle) static int socfpga_a10_boot_secondary(unsigned int cpu, struct task_struct *idle) { - int trampoline_size = &secondary_trampoline_end - &secondary_trampoline; + int trampoline_size = secondary_trampoline_end - secondary_trampoline; if (socfpga_cpu1start_addr) { writel(RSTMGR_MPUMODRST_CPU1, rst_manager_base_addr + SOCFPGA_A10_RSTMGR_MODMPURST); - memcpy(phys_to_virt(0), &secondary_trampoline, trampoline_size); + memcpy(phys_to_virt(0), secondary_trampoline, trampoline_size); writel(__pa_symbol(secondary_startup), sys_manager_base_addr + (socfpga_cpu1start_addr & 0x00000fff)); diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index 02692fbe2db5c599cf5945c42fb217b712d0b212..c6bf34a33849c77a5c1fa4af189d87ac1b1c7156 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -753,7 +753,7 @@ config CPU_BIG_ENDIAN config CPU_ENDIAN_BE8 bool depends on CPU_BIG_ENDIAN - default CPU_V6 || CPU_V6K || CPU_V7 + default CPU_V6 || CPU_V6K || CPU_V7 || CPU_V7M help Support for the BE-8 (big-endian) mode on ARMv6 and ARMv7 processors. @@ -833,6 +833,7 @@ config CPU_BPREDICT_DISABLE config CPU_SPECTRE bool + select GENERIC_CPU_VULNERABILITIES config HARDEN_BRANCH_PREDICTOR bool "Harden the branch predictor against aliasing attacks" if EXPERT @@ -853,6 +854,16 @@ config HARDEN_BRANCH_PREDICTOR If unsure, say Y. +config HARDEN_BRANCH_HISTORY + bool "Harden Spectre style attacks against branch history" if EXPERT + depends on CPU_SPECTRE + default y + help + Speculation attacks against some high-performance processors can + make use of branch history to influence future speculation. When + taking an exception, a sequence of branches overwrites the branch + history, or branch history is invalidated. + config TLS_REG_EMUL bool select NEED_KUSER_HELPERS diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 75f3ab531bdf433aeccc7afc9739591d70e824e0..15af4dd52426278d984c401ceb6c1c06ddb5dfde 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -125,11 +125,22 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max_low, int pfn_valid(unsigned long pfn) { phys_addr_t addr = __pfn_to_phys(pfn); + unsigned long pageblock_size = PAGE_SIZE * pageblock_nr_pages; if (__phys_to_pfn(addr) != pfn) return 0; - return memblock_is_map_memory(addr); + /* + * If address less than pageblock_size bytes away from a present + * memory chunk there still will be a memory map entry for it + * because we round freed memory map to the pageblock boundaries. + */ + if (memblock_overlaps_region(&memblock.memory, + ALIGN_DOWN(addr, pageblock_size), + pageblock_size)) + return 1; + + return 0; } EXPORT_SYMBOL(pfn_valid); #endif @@ -313,14 +324,14 @@ static void __init free_unused_memmap(void) */ start = min(start, ALIGN(prev_end, PAGES_PER_SECTION)); -#else +#endif /* - * Align down here since the VM subsystem insists that the - * memmap entries are valid from the bank start aligned to - * MAX_ORDER_NR_PAGES. + * Align down here since many operations in VM subsystem + * presume that there are no holes in the memory map inside + * a pageblock */ - start = round_down(start, MAX_ORDER_NR_PAGES); -#endif + start = round_down(start, pageblock_nr_pages); + /* * If we had a previous bank, and there is a space * between the current bank and the previous, free it. @@ -329,17 +340,19 @@ static void __init free_unused_memmap(void) free_memmap(prev_end, start); /* - * Align up here since the VM subsystem insists that the - * memmap entries are valid from the bank end aligned to - * MAX_ORDER_NR_PAGES. + * Align up here since many operations in VM subsystem + * presume that there are no holes in the memory map inside + * a pageblock */ - prev_end = ALIGN(end, MAX_ORDER_NR_PAGES); + prev_end = ALIGN(end, pageblock_nr_pages); } #ifdef CONFIG_SPARSEMEM - if (!IS_ALIGNED(prev_end, PAGES_PER_SECTION)) + if (!IS_ALIGNED(prev_end, PAGES_PER_SECTION)) { + prev_end = ALIGN(end, pageblock_nr_pages); free_memmap(prev_end, ALIGN(prev_end, PAGES_PER_SECTION)); + } #endif } diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c index 000e8210000bdb3039f53eeb9935807c07c0ac9a..80fb5a4a5c050a955e343486648c8ee723841881 100644 --- a/arch/arm/mm/ioremap.c +++ b/arch/arm/mm/ioremap.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -284,7 +285,8 @@ static void __iomem * __arm_ioremap_pfn_caller(unsigned long pfn, * Don't allow RAM to be mapped with mismatched attributes - this * causes problems with ARMv6+ */ - if (WARN_ON(pfn_valid(pfn) && mtype != MT_MEMORY_RW)) + if (WARN_ON(memblock_is_map_memory(PFN_PHYS(pfn)) && + mtype != MT_MEMORY_RW)) return NULL; area = get_vm_area_caller(size, VM_IOREMAP, caller); diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index fa259825310c5aced95ac256171d907e3c0f3549..3e3001998460b807a697534c2dd7dbf4e95c7122 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -212,12 +212,14 @@ early_param("ecc", early_ecc); static int __init early_cachepolicy(char *p) { pr_warn("cachepolicy kernel parameter not supported without cp15\n"); + return 0; } early_param("cachepolicy", early_cachepolicy); static int __init noalign_setup(char *__unused) { pr_warn("noalign kernel parameter not supported without cp15\n"); + return 1; } __setup("noalign", noalign_setup); @@ -391,9 +393,9 @@ void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot) FIXADDR_END); BUG_ON(idx >= __end_of_fixed_addresses); - /* we only support device mappings until pgprot_kernel has been set */ + /* We support only device mappings before pgprot_kernel is set. */ if (WARN_ON(pgprot_val(prot) != pgprot_val(FIXMAP_PAGE_IO) && - pgprot_val(pgprot_kernel) == 0)) + pgprot_val(prot) && pgprot_val(pgprot_kernel) == 0)) return; if (pgprot_val(prot)) diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S index e2c743aa2eb2b88042064657e2307ab0134d31d8..d9f7dfe2a7ed3efac90d0c0fe5852b5503e7eb49 100644 --- a/arch/arm/mm/proc-macros.S +++ b/arch/arm/mm/proc-macros.S @@ -340,6 +340,7 @@ ENTRY(\name\()_cache_fns) .macro define_tlb_functions name:req, flags_up:req, flags_smp .type \name\()_tlb_fns, #object + .align 2 ENTRY(\name\()_tlb_fns) .long \name\()_flush_user_tlb_range .long \name\()_flush_kern_tlb_range diff --git a/arch/arm/mm/proc-v7-bugs.c b/arch/arm/mm/proc-v7-bugs.c index 114c05ab4dd919a26064eda1f395f1c8f4119ec1..06dbfb968182de7c585e41308da09eec15360600 100644 --- a/arch/arm/mm/proc-v7-bugs.c +++ b/arch/arm/mm/proc-v7-bugs.c @@ -6,8 +6,35 @@ #include #include #include +#include #include +#ifdef CONFIG_ARM_PSCI +static int __maybe_unused spectre_v2_get_cpu_fw_mitigation_state(void) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, + ARM_SMCCC_ARCH_WORKAROUND_1, &res); + + switch ((int)res.a0) { + case SMCCC_RET_SUCCESS: + return SPECTRE_MITIGATED; + + case SMCCC_ARCH_WORKAROUND_RET_UNAFFECTED: + return SPECTRE_UNAFFECTED; + + default: + return SPECTRE_VULNERABLE; + } +} +#else +static int __maybe_unused spectre_v2_get_cpu_fw_mitigation_state(void) +{ + return SPECTRE_VULNERABLE; +} +#endif + #ifdef CONFIG_HARDEN_BRANCH_PREDICTOR DEFINE_PER_CPU(harden_branch_predictor_fn_t, harden_branch_predictor_fn); @@ -36,13 +63,61 @@ static void __maybe_unused call_hvc_arch_workaround_1(void) arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL); } -static void cpu_v7_spectre_init(void) +static unsigned int spectre_v2_install_workaround(unsigned int method) { const char *spectre_v2_method = NULL; int cpu = smp_processor_id(); if (per_cpu(harden_branch_predictor_fn, cpu)) - return; + return SPECTRE_MITIGATED; + + switch (method) { + case SPECTRE_V2_METHOD_BPIALL: + per_cpu(harden_branch_predictor_fn, cpu) = + harden_branch_predictor_bpiall; + spectre_v2_method = "BPIALL"; + break; + + case SPECTRE_V2_METHOD_ICIALLU: + per_cpu(harden_branch_predictor_fn, cpu) = + harden_branch_predictor_iciallu; + spectre_v2_method = "ICIALLU"; + break; + + case SPECTRE_V2_METHOD_HVC: + per_cpu(harden_branch_predictor_fn, cpu) = + call_hvc_arch_workaround_1; + cpu_do_switch_mm = cpu_v7_hvc_switch_mm; + spectre_v2_method = "hypervisor"; + break; + + case SPECTRE_V2_METHOD_SMC: + per_cpu(harden_branch_predictor_fn, cpu) = + call_smc_arch_workaround_1; + cpu_do_switch_mm = cpu_v7_smc_switch_mm; + spectre_v2_method = "firmware"; + break; + } + + if (spectre_v2_method) + pr_info("CPU%u: Spectre v2: using %s workaround\n", + smp_processor_id(), spectre_v2_method); + + return SPECTRE_MITIGATED; +} +#else +static unsigned int spectre_v2_install_workaround(unsigned int method) +{ + pr_info("CPU%u: Spectre V2: workarounds disabled by configuration\n", + smp_processor_id()); + + return SPECTRE_VULNERABLE; +} +#endif + +static void cpu_v7_spectre_v2_init(void) +{ + unsigned int state, method = 0; switch (read_cpuid_part()) { case ARM_CPU_PART_CORTEX_A8: @@ -51,69 +126,133 @@ static void cpu_v7_spectre_init(void) case ARM_CPU_PART_CORTEX_A17: case ARM_CPU_PART_CORTEX_A73: case ARM_CPU_PART_CORTEX_A75: - per_cpu(harden_branch_predictor_fn, cpu) = - harden_branch_predictor_bpiall; - spectre_v2_method = "BPIALL"; + state = SPECTRE_MITIGATED; + method = SPECTRE_V2_METHOD_BPIALL; break; case ARM_CPU_PART_CORTEX_A15: case ARM_CPU_PART_BRAHMA_B15: - per_cpu(harden_branch_predictor_fn, cpu) = - harden_branch_predictor_iciallu; - spectre_v2_method = "ICIALLU"; + state = SPECTRE_MITIGATED; + method = SPECTRE_V2_METHOD_ICIALLU; break; -#ifdef CONFIG_ARM_PSCI case ARM_CPU_PART_BRAHMA_B53: /* Requires no workaround */ + state = SPECTRE_UNAFFECTED; break; + default: /* Other ARM CPUs require no workaround */ - if (read_cpuid_implementor() == ARM_CPU_IMP_ARM) + if (read_cpuid_implementor() == ARM_CPU_IMP_ARM) { + state = SPECTRE_UNAFFECTED; break; + } + fallthrough; - /* Cortex A57/A72 require firmware workaround */ - case ARM_CPU_PART_CORTEX_A57: - case ARM_CPU_PART_CORTEX_A72: { - struct arm_smccc_res res; - arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, - ARM_SMCCC_ARCH_WORKAROUND_1, &res); - if ((int)res.a0 != 0) - return; + /* Cortex A57/A72 require firmware workaround */ + case ARM_CPU_PART_CORTEX_A57: + case ARM_CPU_PART_CORTEX_A72: + state = spectre_v2_get_cpu_fw_mitigation_state(); + if (state != SPECTRE_MITIGATED) + break; switch (arm_smccc_1_1_get_conduit()) { case SMCCC_CONDUIT_HVC: - per_cpu(harden_branch_predictor_fn, cpu) = - call_hvc_arch_workaround_1; - cpu_do_switch_mm = cpu_v7_hvc_switch_mm; - spectre_v2_method = "hypervisor"; + method = SPECTRE_V2_METHOD_HVC; break; case SMCCC_CONDUIT_SMC: - per_cpu(harden_branch_predictor_fn, cpu) = - call_smc_arch_workaround_1; - cpu_do_switch_mm = cpu_v7_smc_switch_mm; - spectre_v2_method = "firmware"; + method = SPECTRE_V2_METHOD_SMC; break; default: + state = SPECTRE_VULNERABLE; break; } } -#endif + + if (state == SPECTRE_MITIGATED) + state = spectre_v2_install_workaround(method); + + spectre_v2_update_state(state, method); +} + +#ifdef CONFIG_HARDEN_BRANCH_HISTORY +static int spectre_bhb_method; + +static const char *spectre_bhb_method_name(int method) +{ + switch (method) { + case SPECTRE_V2_METHOD_LOOP8: + return "loop"; + + case SPECTRE_V2_METHOD_BPIALL: + return "BPIALL"; + + default: + return "unknown"; } +} - if (spectre_v2_method) - pr_info("CPU%u: Spectre v2: using %s workaround\n", - smp_processor_id(), spectre_v2_method); +static int spectre_bhb_install_workaround(int method) +{ + if (spectre_bhb_method != method) { + if (spectre_bhb_method) { + pr_err("CPU%u: Spectre BHB: method disagreement, system vulnerable\n", + smp_processor_id()); + + return SPECTRE_VULNERABLE; + } + + if (spectre_bhb_update_vectors(method) == SPECTRE_VULNERABLE) + return SPECTRE_VULNERABLE; + + spectre_bhb_method = method; + } + + pr_info("CPU%u: Spectre BHB: using %s workaround\n", + smp_processor_id(), spectre_bhb_method_name(method)); + + return SPECTRE_MITIGATED; } #else -static void cpu_v7_spectre_init(void) +static int spectre_bhb_install_workaround(int method) { + return SPECTRE_VULNERABLE; } #endif +static void cpu_v7_spectre_bhb_init(void) +{ + unsigned int state, method = 0; + + switch (read_cpuid_part()) { + case ARM_CPU_PART_CORTEX_A15: + case ARM_CPU_PART_BRAHMA_B15: + case ARM_CPU_PART_CORTEX_A57: + case ARM_CPU_PART_CORTEX_A72: + state = SPECTRE_MITIGATED; + method = SPECTRE_V2_METHOD_LOOP8; + break; + + case ARM_CPU_PART_CORTEX_A73: + case ARM_CPU_PART_CORTEX_A75: + state = SPECTRE_MITIGATED; + method = SPECTRE_V2_METHOD_BPIALL; + break; + + default: + state = SPECTRE_UNAFFECTED; + break; + } + + if (state == SPECTRE_MITIGATED) + state = spectre_bhb_install_workaround(method); + + spectre_v2_update_state(state, method); +} + static __maybe_unused bool cpu_v7_check_auxcr_set(bool *warned, u32 mask, const char *msg) { @@ -142,16 +281,17 @@ static bool check_spectre_auxcr(bool *warned, u32 bit) void cpu_v7_ca8_ibe(void) { if (check_spectre_auxcr(this_cpu_ptr(&spectre_warned), BIT(6))) - cpu_v7_spectre_init(); + cpu_v7_spectre_v2_init(); } void cpu_v7_ca15_ibe(void) { if (check_spectre_auxcr(this_cpu_ptr(&spectre_warned), BIT(0))) - cpu_v7_spectre_init(); + cpu_v7_spectre_v2_init(); } void cpu_v7_bugs_init(void) { - cpu_v7_spectre_init(); + cpu_v7_spectre_v2_init(); + cpu_v7_spectre_bhb_init(); } diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index ce8b043263521b059974ada9260dd177f87b8066..1214e39aad5ec6476617eb3bec1ecaaf5240411d 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -36,6 +36,10 @@ * +-----+ * |RSVD | JIT scratchpad * current ARM_SP => +-----+ <= (BPF_FP - STACK_SIZE + SCRATCH_SIZE) + * | ... | caller-saved registers + * +-----+ + * | ... | arguments passed on stack + * ARM_SP during call => +-----| * | | * | ... | Function call stack * | | @@ -63,6 +67,12 @@ * * When popping registers off the stack at the end of a BPF function, we * reference them via the current ARM_FP register. + * + * Some eBPF operations are implemented via a call to a helper function. + * Such calls are "invisible" in the eBPF code, so it is up to the calling + * program to preserve any caller-saved ARM registers during the call. The + * JIT emits code to push and pop those registers onto the stack, immediately + * above the callee stack frame. */ #define CALLEE_MASK (1 << ARM_R4 | 1 << ARM_R5 | 1 << ARM_R6 | \ 1 << ARM_R7 | 1 << ARM_R8 | 1 << ARM_R9 | \ @@ -70,6 +80,8 @@ #define CALLEE_PUSH_MASK (CALLEE_MASK | 1 << ARM_LR) #define CALLEE_POP_MASK (CALLEE_MASK | 1 << ARM_PC) +#define CALLER_MASK (1 << ARM_R0 | 1 << ARM_R1 | 1 << ARM_R2 | 1 << ARM_R3) + enum { /* Stack layout - these are offsets from (top of stack - 4) */ BPF_R2_HI, @@ -464,6 +476,7 @@ static inline int epilogue_offset(const struct jit_ctx *ctx) static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op) { + const int exclude_mask = BIT(ARM_R0) | BIT(ARM_R1); const s8 *tmp = bpf2a32[TMP_REG_1]; #if __LINUX_ARM_ARCH__ == 7 @@ -495,11 +508,17 @@ static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op) emit(ARM_MOV_R(ARM_R0, rm), ctx); } + /* Push caller-saved registers on stack */ + emit(ARM_PUSH(CALLER_MASK & ~exclude_mask), ctx); + /* Call appropriate function */ emit_mov_i(ARM_IP, op == BPF_DIV ? (u32)jit_udiv32 : (u32)jit_mod32, ctx); emit_blx_r(ARM_IP, ctx); + /* Restore caller-saved registers from stack */ + emit(ARM_POP(CALLER_MASK & ~exclude_mask), ctx); + /* Save return value */ if (rd != ARM_R0) emit(ARM_MOV_R(rd, ARM_R0), ctx); diff --git a/arch/arm/probes/kprobes/core.c b/arch/arm/probes/kprobes/core.c index a9653117ca0dd619365b4bf30fe332736c632f24..e513d8a467760d3645e2624525ecd24459944963 100644 --- a/arch/arm/probes/kprobes/core.c +++ b/arch/arm/probes/kprobes/core.c @@ -462,7 +462,7 @@ static struct undef_hook kprobes_arm_break_hook = { #endif /* !CONFIG_THUMB2_KERNEL */ -int __init arch_init_kprobes() +int __init arch_init_kprobes(void) { arm_probes_decode_init(); #ifdef CONFIG_THUMB2_KERNEL diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index d056a548358ea024fc64f3a107849951a26838d7..ee8207e943fae3a49ed28d14a80ec7b6543e9589 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -454,3 +454,5 @@ 438 common pidfd_getfd sys_pidfd_getfd 439 common faccessat2 sys_faccessat2 440 common process_madvise sys_process_madvise +# 447 reserved for memfd_secret +448 common process_mrelease sys_process_mrelease diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 96d0e0b0aab5cf0e711721fa6cc030b6390ca26f..d29a255f8cdd8186a2a1cc035c0fa7cc4ab558ac 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -20,9 +20,11 @@ config ARM64 select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_GIGANTIC_PAGE + select ARCH_HAS_IOREMAP_PHYS_HOOKS select ARCH_HAS_KCOV select ARCH_HAS_KEEPINITRD select ARCH_HAS_MEMBARRIER_SYNC_CORE + select ARCH_HAS_MEM_ENCRYPT select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE select ARCH_HAS_PTE_DEVMAP select ARCH_HAS_PTE_SPECIAL @@ -139,6 +141,7 @@ config ARM64 select HAVE_ARCH_JUMP_LABEL select HAVE_ARCH_JUMP_LABEL_RELATIVE select HAVE_ARCH_KASAN if !(ARM64_16K_PAGES && ARM64_VA_BITS_48) + select HAVE_ARCH_KASAN_VMALLOC if HAVE_ARCH_KASAN select HAVE_ARCH_KASAN_SW_TAGS if HAVE_ARCH_KASAN select HAVE_ARCH_KASAN_HW_TAGS if (HAVE_ARCH_KASAN && ARM64_MTE) select HAVE_ARCH_KFENCE @@ -176,6 +179,7 @@ config ARM64 select HAVE_GCC_PLUGINS select HAVE_HW_BREAKPOINT if PERF_EVENTS select HAVE_IRQ_TIME_ACCOUNTING + select HAVE_KVM select HAVE_NMI select HAVE_PATA_PLATFORM select HAVE_PERF_EVENTS @@ -194,6 +198,7 @@ config ARM64 select IOMMU_DMA if IOMMU_SUPPORT select IRQ_DOMAIN select IRQ_FORCED_THREADING + select KASAN_VMALLOC if KASAN select MODULES_USE_ELF_RELA select NEED_DMA_MAP_STATE select NEED_SG_DMA_LENGTH @@ -667,6 +672,39 @@ config ARM64_ERRATUM_1508412 If unsure, say Y. +config ARM64_WORKAROUND_TSB_FLUSH_FAILURE + bool + +config ARM64_ERRATUM_2054223 + bool "Cortex-A710: 2054223: workaround TSB instruction failing to flush trace" + default y + select ARM64_WORKAROUND_TSB_FLUSH_FAILURE + help + Enable workaround for ARM Cortex-A710 erratum 2054223 + + Affected cores may fail to flush the trace data on a TSB instruction, when + the PE is in trace prohibited state. This will cause losing a few bytes + of the trace cached. + + Workaround is to issue two TSB consecutively on affected cores. + + If unsure, say Y. + +config ARM64_ERRATUM_2067961 + bool "Neoverse-N2: 2067961: workaround TSB instruction failing to flush trace" + default y + select ARM64_WORKAROUND_TSB_FLUSH_FAILURE + help + Enable workaround for ARM Neoverse-N2 erratum 2067961 + + Affected cores may fail to flush the trace data on a TSB instruction, when + the PE is in trace prohibited state. This will cause losing a few bytes + of the trace cached. + + Workaround is to issue two TSB consecutively on affected cores. + + If unsure, say Y. + config CAVIUM_ERRATUM_22375 bool "Cavium erratum 22375, 24313" default y @@ -1194,6 +1232,15 @@ config UNMAP_KERNEL_AT_EL0 If unsure, say Y. +config MITIGATE_SPECTRE_BRANCH_HISTORY + bool "Mitigate Spectre style attacks against branch history" if EXPERT + default y + help + Speculation attacks against some high-performance processors can + make use of branch history to influence future speculation. + When taking an exception from user-space, a sequence of branches + or a firmware call overwrites the branch history. + config RODATA_FULL_DEFAULT_ENABLED bool "Apply r/o permissions of VM areas also to their linear aliases" default y @@ -1275,7 +1322,8 @@ config KUSER_HELPERS config COMPAT_VDSO bool "Enable vDSO for 32-bit applications" - depends on !CPU_BIG_ENDIAN && "$(CROSS_COMPILE_COMPAT)" != "" + depends on !CPU_BIG_ENDIAN + depends on (CC_IS_CLANG && LD_IS_LLD) || "$(CROSS_COMPILE_COMPAT)" != "" select GENERIC_COMPAT_VDSO default y help @@ -1503,7 +1551,7 @@ config ARM64_PTR_AUTH depends on (CC_HAS_SIGN_RETURN_ADDRESS || CC_HAS_BRANCH_PROT_PAC_RET) && AS_HAS_PAC # Modern compilers insert a .note.gnu.property section note for PAC # which is only understood by binutils starting with version 2.33.1. - depends on LD_IS_LLD || LD_VERSION >= 233010000 || (CC_IS_GCC && GCC_VERSION < 90100) + depends on LD_IS_LLD || LD_VERSION >= 23301 || (CC_IS_GCC && GCC_VERSION < 90100) depends on !CC_IS_CLANG || AS_HAS_CFI_NEGATE_RA_STATE depends on (!FUNCTION_GRAPH_TRACER || DYNAMIC_FTRACE_WITH_REGS) help diff --git a/arch/arm64/Makefile.postlink b/arch/arm64/Makefile.postlink index eedcf35f8d944bb350ba639c9137b1a2b8f62230..a13789148bfb62a414804d84a7eee0d3498ea413 100644 --- a/arch/arm64/Makefile.postlink +++ b/arch/arm64/Makefile.postlink @@ -3,8 +3,11 @@ # # This file is included by the generic Kbuild makefile to permit the # architecture to perform postlink actions on vmlinux and any .ko module file. -# In this case, we only need it for fips140.ko, which needs a HMAC digest to be -# injected into it. All other targets are NOPs. +# In this case, we only need it for fips140.ko, which needs some postprocessing +# for the integrity check mandated by FIPS. This involves making copies of the +# relocation sections so that the module will have access to them at +# initialization time, and calculating and injecting a HMAC digest into the +# module. All other targets are NOPs. # PHONY := __archpost @@ -15,7 +18,14 @@ include scripts/Kbuild.include CMD_FIPS140_GEN_HMAC = crypto/fips140_gen_hmac quiet_cmd_gen_hmac = HMAC $@ - cmd_gen_hmac = $(CMD_FIPS140_GEN_HMAC) $@ + cmd_gen_hmac = $(OBJCOPY) $@ \ + --dump-section=$(shell $(READELF) -SW $@|grep -Eo '\.rela\.text\S*')=$@.rela.text \ + --dump-section=$(shell $(READELF) -SW $@|grep -Eo '\.rela\.rodata\S*')=$@.rela.rodata \ + --add-section=.init.rela.text=$@.rela.text \ + --add-section=.init.rela.rodata=$@.rela.rodata \ + --set-section-flags=.init.rela.text=alloc,readonly \ + --set-section-flags=.init.rela.rodata=alloc,readonly && \ + $(CMD_FIPS140_GEN_HMAC) $@ # `@true` prevents complaints when there is nothing to be done @@ -29,7 +39,7 @@ $(objtree)/crypto/fips140.ko: FORCE @true clean: - @true + rm -f $(objtree)/crypto/fips140.ko.rela.* PHONY += FORCE clean diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a100.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-a100.dtsi index cc321c04f1219edc0f35e3acb02bcd3aabc40298..f6d7d7f7fdabe908bb26806ae3f117c29b2b969a 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a100.dtsi +++ b/arch/arm64/boot/dts/allwinner/sun50i-a100.dtsi @@ -343,19 +343,19 @@ }; thermal-zones { - cpu-thermal-zone { + cpu-thermal { polling-delay-passive = <0>; polling-delay = <0>; thermal-sensors = <&ths 0>; }; - ddr-thermal-zone { + ddr-thermal { polling-delay-passive = <0>; polling-delay = <0>; thermal-sensors = <&ths 2>; }; - gpu-thermal-zone { + gpu-thermal { polling-delay-passive = <0>; polling-delay = <0>; thermal-sensors = <&ths 1>; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-cpu-opp.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-a64-cpu-opp.dtsi index 578c37490d901da5e13d50954954c522cb76e31a..e39db51eb4489f06940cf161466b686b26e649b5 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-cpu-opp.dtsi +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-cpu-opp.dtsi @@ -4,7 +4,7 @@ */ / { - cpu0_opp_table: opp_table0 { + cpu0_opp_table: opp-table-cpu { compatible = "operating-points-v2"; opp-shared; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h5-cpu-opp.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-h5-cpu-opp.dtsi index b2657201957ebba4ab5011e0c17312f0a4ae3bd7..1afad8b437d7233c5e3ee5d9de4089a4e0700972 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h5-cpu-opp.dtsi +++ b/arch/arm64/boot/dts/allwinner/sun50i-h5-cpu-opp.dtsi @@ -2,7 +2,7 @@ // Copyright (C) 2020 Chen-Yu Tsai / { - cpu_opp_table: cpu-opp-table { + cpu_opp_table: opp-table-cpu { compatible = "operating-points-v2"; opp-shared; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h5-nanopi-neo2.dts b/arch/arm64/boot/dts/allwinner/sun50i-h5-nanopi-neo2.dts index b059e20813bdff18070601e661efcaa1029ebe1f..e8ab8c2df51a081f670e76de3acbf19adb9f6370 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h5-nanopi-neo2.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h5-nanopi-neo2.dts @@ -75,7 +75,7 @@ pinctrl-0 = <&emac_rgmii_pins>; phy-supply = <®_gmac_3v3>; phy-handle = <&ext_rgmii_phy>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; status = "okay"; }; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-zero-plus.dts b/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-zero-plus.dts index ef5ca6444220336398588735482ca1b2360fadc6..de448ca51e216c3aa37900094626d62b20fd8cce 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-zero-plus.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-h5-orangepi-zero-plus.dts @@ -69,7 +69,7 @@ pinctrl-0 = <&emac_rgmii_pins>; phy-supply = <®_gmac_3v3>; phy-handle = <&ext_rgmii_phy>; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; status = "okay"; }; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h5.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-h5.dtsi index 10489e5086956b816af8ad2b76ebfef02341b7a1..0ee8a5adf02b0f8b93b460b5457a59f29ea9296b 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h5.dtsi +++ b/arch/arm64/boot/dts/allwinner/sun50i-h5.dtsi @@ -204,7 +204,7 @@ }; }; - gpu_thermal { + gpu-thermal { polling-delay-passive = <0>; polling-delay = <0>; thermal-sensors = <&ths 1>; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h6-cpu-opp.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-h6-cpu-opp.dtsi index 1a5eddc5a40f3c5133f4f1e652946aacea95bb64..653452926d8578b566c7131cc018f062a1b4ad71 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h6-cpu-opp.dtsi +++ b/arch/arm64/boot/dts/allwinner/sun50i-h6-cpu-opp.dtsi @@ -3,7 +3,7 @@ // Copyright (C) 2020 Clément Péron / { - cpu_opp_table: cpu-opp-table { + cpu_opp_table: opp-table-cpu { compatible = "allwinner,sun50i-h6-operating-points"; nvmem-cells = <&cpu_speed_grade>; opp-shared; diff --git a/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi b/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi index 959b299344e54c7346b596fd990e66a68d69faf0..075153a4d49fc597a616467f3decf8135125c7db 100644 --- a/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-g12-common.dtsi @@ -52,7 +52,7 @@ secure-monitor = <&sm>; }; - gpu_opp_table: gpu-opp-table { + gpu_opp_table: opp-table-gpu { compatible = "operating-points-v2"; opp-124999998 { @@ -101,6 +101,12 @@ no-map; }; + /* 32 MiB reserved for ARM Trusted Firmware (BL32) */ + secmon_reserved_bl32: secmon@5300000 { + reg = <0x0 0x05300000 0x0 0x2000000>; + no-map; + }; + linux,cma { compatible = "shared-dma-pool"; reusable; diff --git a/arch/arm64/boot/dts/amlogic/meson-g12a-sei510.dts b/arch/arm64/boot/dts/amlogic/meson-g12a-sei510.dts index b00d0468c7534ab49ad346d303b43e182176a3c4..71f91e31c181887b6aa6abbd69365f8917d8439c 100644 --- a/arch/arm64/boot/dts/amlogic/meson-g12a-sei510.dts +++ b/arch/arm64/boot/dts/amlogic/meson-g12a-sei510.dts @@ -139,7 +139,7 @@ regulator-min-microvolt = <721000>; regulator-max-microvolt = <1022000>; - vin-supply = <&dc_in>; + pwm-supply = <&dc_in>; pwms = <&pwm_AO_cd 1 1250 0>; pwm-dutycycle-range = <100 0>; @@ -157,14 +157,6 @@ regulator-always-on; }; - reserved-memory { - /* TEE Reserved Memory */ - bl32_reserved: bl32@5000000 { - reg = <0x0 0x05300000 0x0 0x2000000>; - no-map; - }; - }; - sdio_pwrseq: sdio-pwrseq { compatible = "mmc-pwrseq-simple"; reset-gpios = <&gpio GPIOX_6 GPIO_ACTIVE_LOW>; diff --git a/arch/arm64/boot/dts/amlogic/meson-g12a-u200.dts b/arch/arm64/boot/dts/amlogic/meson-g12a-u200.dts index a26bfe72550fe47cb94dab419adc15531a8d2b0e..4b5d11e56364df8a9632f0aa4cf775867577d757 100644 --- a/arch/arm64/boot/dts/amlogic/meson-g12a-u200.dts +++ b/arch/arm64/boot/dts/amlogic/meson-g12a-u200.dts @@ -139,7 +139,7 @@ regulator-min-microvolt = <721000>; regulator-max-microvolt = <1022000>; - vin-supply = <&main_12v>; + pwm-supply = <&main_12v>; pwms = <&pwm_AO_cd 1 1250 0>; pwm-dutycycle-range = <100 0>; diff --git a/arch/arm64/boot/dts/amlogic/meson-g12a-x96-max.dts b/arch/arm64/boot/dts/amlogic/meson-g12a-x96-max.dts index 463a72d6bb7c7beb9547120a7f30c20f4c982db7..26b5d9327324aeb7d8532a2c53d66af15a7d323d 100644 --- a/arch/arm64/boot/dts/amlogic/meson-g12a-x96-max.dts +++ b/arch/arm64/boot/dts/amlogic/meson-g12a-x96-max.dts @@ -139,7 +139,7 @@ regulator-min-microvolt = <721000>; regulator-max-microvolt = <1022000>; - vin-supply = <&dc_in>; + pwm-supply = <&dc_in>; pwms = <&pwm_AO_cd 1 1250 0>; pwm-dutycycle-range = <100 0>; diff --git a/arch/arm64/boot/dts/amlogic/meson-g12b-khadas-vim3.dtsi b/arch/arm64/boot/dts/amlogic/meson-g12b-khadas-vim3.dtsi index f42cf4b8af2d4915a721318f1ae87c762897f6e3..16dd409051b400c91819b71cbd2f66bfd642191d 100644 --- a/arch/arm64/boot/dts/amlogic/meson-g12b-khadas-vim3.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-g12b-khadas-vim3.dtsi @@ -18,7 +18,7 @@ regulator-min-microvolt = <690000>; regulator-max-microvolt = <1050000>; - vin-supply = <&dc_in>; + pwm-supply = <&dc_in>; pwms = <&pwm_ab 0 1250 0>; pwm-dutycycle-range = <100 0>; @@ -37,7 +37,7 @@ regulator-min-microvolt = <690000>; regulator-max-microvolt = <1050000>; - vin-supply = <&vsys_3v3>; + pwm-supply = <&vsys_3v3>; pwms = <&pwm_AO_cd 1 1250 0>; pwm-dutycycle-range = <100 0>; diff --git a/arch/arm64/boot/dts/amlogic/meson-g12b-odroid-n2.dtsi b/arch/arm64/boot/dts/amlogic/meson-g12b-odroid-n2.dtsi index 39a09661c5f62f18f1f1312d57da01528b8e57ac..87e8e64ad5caee06df5c2a6a50338c74d8ae48ff 100644 --- a/arch/arm64/boot/dts/amlogic/meson-g12b-odroid-n2.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-g12b-odroid-n2.dtsi @@ -15,7 +15,7 @@ ethernet0 = ðmac; }; - dioo2133: audio-amplifier-0 { + dio2133: audio-amplifier-0 { compatible = "simple-audio-amplifier"; enable-gpios = <&gpio_ao GPIOAO_2 GPIO_ACTIVE_HIGH>; VCC-supply = <&vcc_5v>; @@ -128,7 +128,7 @@ regulator-min-microvolt = <721000>; regulator-max-microvolt = <1022000>; - vin-supply = <&main_12v>; + pwm-supply = <&main_12v>; pwms = <&pwm_ab 0 1250 0>; pwm-dutycycle-range = <100 0>; @@ -147,7 +147,7 @@ regulator-min-microvolt = <721000>; regulator-max-microvolt = <1022000>; - vin-supply = <&main_12v>; + pwm-supply = <&main_12v>; pwms = <&pwm_AO_cd 1 1250 0>; pwm-dutycycle-range = <100 0>; @@ -215,7 +215,7 @@ audio-widgets = "Line", "Lineout"; audio-aux-devs = <&tdmout_b>, <&tdmout_c>, <&tdmin_a>, <&tdmin_b>, <&tdmin_c>, <&tdmin_lb>, - <&dioo2133>; + <&dio2133>; audio-routing = "TDMOUT_B IN 0", "FRDDR_A OUT 1", "TDMOUT_B IN 1", "FRDDR_B OUT 1", "TDMOUT_B IN 2", "FRDDR_C OUT 1", @@ -543,7 +543,7 @@ pinctrl-0 = <&nor_pins>; pinctrl-names = "default"; - mx25u64: spi-flash@0 { + mx25u64: flash@0 { #address-cells = <1>; #size-cells = <1>; compatible = "mxicy,mx25u6435f", "jedec,spi-nor"; diff --git a/arch/arm64/boot/dts/amlogic/meson-g12b-w400.dtsi b/arch/arm64/boot/dts/amlogic/meson-g12b-w400.dtsi index feb088504740002490658a94bdea8744e45091c5..b40d2c1002c921836b640033fe51afed3022f5b0 100644 --- a/arch/arm64/boot/dts/amlogic/meson-g12b-w400.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-g12b-w400.dtsi @@ -96,7 +96,7 @@ regulator-min-microvolt = <721000>; regulator-max-microvolt = <1022000>; - vin-supply = <&main_12v>; + pwm-supply = <&main_12v>; pwms = <&pwm_ab 0 1250 0>; pwm-dutycycle-range = <100 0>; @@ -115,7 +115,7 @@ regulator-min-microvolt = <721000>; regulator-max-microvolt = <1022000>; - vin-supply = <&main_12v>; + pwm-supply = <&main_12v>; pwms = <&pwm_AO_cd 1 1250 0>; pwm-dutycycle-range = <100 0>; diff --git a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi index 0edd137151f89eb6ba63695d28abdfaf18cf6434..47cbb0a1eb183ca0c7993f9f69337a47f7306a81 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gx.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gx.dtsi @@ -43,6 +43,12 @@ no-map; }; + /* 32 MiB reserved for ARM Trusted Firmware (BL32) */ + secmon_reserved_bl32: secmon@5300000 { + reg = <0x0 0x05300000 0x0 0x2000000>; + no-map; + }; + linux,cma { compatible = "shared-dma-pool"; reusable; diff --git a/arch/arm64/boot/dts/amlogic/meson-gxbb-wetek.dtsi b/arch/arm64/boot/dts/amlogic/meson-gxbb-wetek.dtsi index a350fee1264d7b1dfe2cf83d3a8a41bd16efebc3..a4d34398da358c054749e6599bb445dee01ab3e9 100644 --- a/arch/arm64/boot/dts/amlogic/meson-gxbb-wetek.dtsi +++ b/arch/arm64/boot/dts/amlogic/meson-gxbb-wetek.dtsi @@ -6,6 +6,7 @@ */ #include "meson-gxbb.dtsi" +#include / { aliases { @@ -64,6 +65,7 @@ regulator-name = "VDDIO_AO18"; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; + regulator-always-on; }; vcc_3v3: regulator-vcc_3v3 { @@ -161,6 +163,7 @@ status = "okay"; pinctrl-0 = <&hdmi_hpd_pins>, <&hdmi_i2c_pins>; pinctrl-names = "default"; + hdmi-supply = <&vddio_ao18>; }; &hdmi_tx_tmds_port { diff --git a/arch/arm64/boot/dts/amlogic/meson-sm1-sei610.dts b/arch/arm64/boot/dts/amlogic/meson-sm1-sei610.dts index 5ab139a34c0186dd522b063000ef7f90e315a9c5..c21178e9c6064fc49a67219878ccfbac8004ee7e 100644 --- a/arch/arm64/boot/dts/amlogic/meson-sm1-sei610.dts +++ b/arch/arm64/boot/dts/amlogic/meson-sm1-sei610.dts @@ -203,14 +203,6 @@ regulator-always-on; }; - reserved-memory { - /* TEE Reserved Memory */ - bl32_reserved: bl32@5000000 { - reg = <0x0 0x05300000 0x0 0x2000000>; - no-map; - }; - }; - sdio_pwrseq: sdio-pwrseq { compatible = "mmc-pwrseq-simple"; reset-gpios = <&gpio GPIOX_6 GPIO_ACTIVE_LOW>; diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a-qds.dts b/arch/arm64/boot/dts/freescale/fsl-ls1028a-qds.dts index 13cdc958ba3ea2182a60cfad0e29be68bb0fb567..71858c9376c25d87330e236b089c85a4c294e408 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1028a-qds.dts +++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a-qds.dts @@ -261,11 +261,6 @@ vcc-supply = <&sb_3v3>; }; - rtc@51 { - compatible = "nxp,pcf2129"; - reg = <0x51>; - }; - eeprom@56 { compatible = "atmel,24c512"; reg = <0x56>; @@ -307,6 +302,15 @@ }; +&i2c1 { + status = "okay"; + + rtc@51 { + compatible = "nxp,pcf2129"; + reg = <0x51>; + }; +}; + &enetc_port1 { phy-handle = <&qds_phy1>; phy-connection-type = "rgmii-id"; diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi index 5f42904d53ab6d420d474270d54d5367c5ee6a59..580690057601c546b030cdb33a447f30b464edcc 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi @@ -386,6 +386,24 @@ status = "disabled"; }; + can0: can@2180000 { + compatible = "fsl,ls1028ar1-flexcan", "fsl,lx2160ar1-flexcan"; + reg = <0x0 0x2180000 0x0 0x10000>; + interrupts = ; + clocks = <&sysclk>, <&clockgen 4 1>; + clock-names = "ipg", "per"; + status = "disabled"; + }; + + can1: can@2190000 { + compatible = "fsl,ls1028ar1-flexcan", "fsl,lx2160ar1-flexcan"; + reg = <0x0 0x2190000 0x0 0x10000>; + interrupts = ; + clocks = <&sysclk>, <&clockgen 4 1>; + clock-names = "ipg", "per"; + status = "disabled"; + }; + duart0: serial@21c0500 { compatible = "fsl,ns16550", "ns16550a"; reg = <0x00 0x21c0500 0x0 0x100>; diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi index 692d8f4a206da914323292b51dc9c1f94ed99486..334af263d7b5d954b334b2208e1a859b4ccef60c 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1088a.dtsi @@ -673,56 +673,56 @@ }; cluster1_core0_watchdog: wdt@c000000 { - compatible = "arm,sp805-wdt", "arm,primecell"; + compatible = "arm,sp805", "arm,primecell"; reg = <0x0 0xc000000 0x0 0x1000>; clocks = <&clockgen 4 15>, <&clockgen 4 15>; clock-names = "wdog_clk", "apb_pclk"; }; cluster1_core1_watchdog: wdt@c010000 { - compatible = "arm,sp805-wdt", "arm,primecell"; + compatible = "arm,sp805", "arm,primecell"; reg = <0x0 0xc010000 0x0 0x1000>; clocks = <&clockgen 4 15>, <&clockgen 4 15>; clock-names = "wdog_clk", "apb_pclk"; }; cluster1_core2_watchdog: wdt@c020000 { - compatible = "arm,sp805-wdt", "arm,primecell"; + compatible = "arm,sp805", "arm,primecell"; reg = <0x0 0xc020000 0x0 0x1000>; clocks = <&clockgen 4 15>, <&clockgen 4 15>; clock-names = "wdog_clk", "apb_pclk"; }; cluster1_core3_watchdog: wdt@c030000 { - compatible = "arm,sp805-wdt", "arm,primecell"; + compatible = "arm,sp805", "arm,primecell"; reg = <0x0 0xc030000 0x0 0x1000>; clocks = <&clockgen 4 15>, <&clockgen 4 15>; clock-names = "wdog_clk", "apb_pclk"; }; cluster2_core0_watchdog: wdt@c100000 { - compatible = "arm,sp805-wdt", "arm,primecell"; + compatible = "arm,sp805", "arm,primecell"; reg = <0x0 0xc100000 0x0 0x1000>; clocks = <&clockgen 4 15>, <&clockgen 4 15>; clock-names = "wdog_clk", "apb_pclk"; }; cluster2_core1_watchdog: wdt@c110000 { - compatible = "arm,sp805-wdt", "arm,primecell"; + compatible = "arm,sp805", "arm,primecell"; reg = <0x0 0xc110000 0x0 0x1000>; clocks = <&clockgen 4 15>, <&clockgen 4 15>; clock-names = "wdog_clk", "apb_pclk"; }; cluster2_core2_watchdog: wdt@c120000 { - compatible = "arm,sp805-wdt", "arm,primecell"; + compatible = "arm,sp805", "arm,primecell"; reg = <0x0 0xc120000 0x0 0x1000>; clocks = <&clockgen 4 15>, <&clockgen 4 15>; clock-names = "wdog_clk", "apb_pclk"; }; cluster2_core3_watchdog: wdt@c130000 { - compatible = "arm,sp805-wdt", "arm,primecell"; + compatible = "arm,sp805", "arm,primecell"; reg = <0x0 0xc130000 0x0 0x1000>; clocks = <&clockgen 4 15>, <&clockgen 4 15>; clock-names = "wdog_clk", "apb_pclk"; diff --git a/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi index 4d34d82b898a42ff24224f2b4b1353ca143574cd..eb6641a3566e15ddbd34c4638334fad6bd5d6f40 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls208xa.dtsi @@ -351,56 +351,56 @@ }; cluster1_core0_watchdog: wdt@c000000 { - compatible = "arm,sp805-wdt", "arm,primecell"; + compatible = "arm,sp805", "arm,primecell"; reg = <0x0 0xc000000 0x0 0x1000>; clocks = <&clockgen 4 3>, <&clockgen 4 3>; clock-names = "wdog_clk", "apb_pclk"; }; cluster1_core1_watchdog: wdt@c010000 { - compatible = "arm,sp805-wdt", "arm,primecell"; + compatible = "arm,sp805", "arm,primecell"; reg = <0x0 0xc010000 0x0 0x1000>; clocks = <&clockgen 4 3>, <&clockgen 4 3>; clock-names = "wdog_clk", "apb_pclk"; }; cluster2_core0_watchdog: wdt@c100000 { - compatible = "arm,sp805-wdt", "arm,primecell"; + compatible = "arm,sp805", "arm,primecell"; reg = <0x0 0xc100000 0x0 0x1000>; clocks = <&clockgen 4 3>, <&clockgen 4 3>; clock-names = "wdog_clk", "apb_pclk"; }; cluster2_core1_watchdog: wdt@c110000 { - compatible = "arm,sp805-wdt", "arm,primecell"; + compatible = "arm,sp805", "arm,primecell"; reg = <0x0 0xc110000 0x0 0x1000>; clocks = <&clockgen 4 3>, <&clockgen 4 3>; clock-names = "wdog_clk", "apb_pclk"; }; cluster3_core0_watchdog: wdt@c200000 { - compatible = "arm,sp805-wdt", "arm,primecell"; + compatible = "arm,sp805", "arm,primecell"; reg = <0x0 0xc200000 0x0 0x1000>; clocks = <&clockgen 4 3>, <&clockgen 4 3>; clock-names = "wdog_clk", "apb_pclk"; }; cluster3_core1_watchdog: wdt@c210000 { - compatible = "arm,sp805-wdt", "arm,primecell"; + compatible = "arm,sp805", "arm,primecell"; reg = <0x0 0xc210000 0x0 0x1000>; clocks = <&clockgen 4 3>, <&clockgen 4 3>; clock-names = "wdog_clk", "apb_pclk"; }; cluster4_core0_watchdog: wdt@c300000 { - compatible = "arm,sp805-wdt", "arm,primecell"; + compatible = "arm,sp805", "arm,primecell"; reg = <0x0 0xc300000 0x0 0x1000>; clocks = <&clockgen 4 3>, <&clockgen 4 3>; clock-names = "wdog_clk", "apb_pclk"; }; cluster4_core1_watchdog: wdt@c310000 { - compatible = "arm,sp805-wdt", "arm,primecell"; + compatible = "arm,sp805", "arm,primecell"; reg = <0x0 0xc310000 0x0 0x1000>; clocks = <&clockgen 4 3>, <&clockgen 4 3>; clock-names = "wdog_clk", "apb_pclk"; diff --git a/arch/arm64/boot/dts/freescale/imx8mm.dtsi b/arch/arm64/boot/dts/freescale/imx8mm.dtsi index 05ee062548e4fc2aca21e654aedf1ad7a89d9fce..f4d7bb75707df286168bf0a1c5a004f7d15bf961 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm.dtsi @@ -866,11 +866,12 @@ assigned-clocks = <&clk IMX8MM_CLK_ENET_AXI>, <&clk IMX8MM_CLK_ENET_TIMER>, <&clk IMX8MM_CLK_ENET_REF>, - <&clk IMX8MM_CLK_ENET_TIMER>; + <&clk IMX8MM_CLK_ENET_PHY_REF>; assigned-clock-parents = <&clk IMX8MM_SYS_PLL1_266M>, <&clk IMX8MM_SYS_PLL2_100M>, - <&clk IMX8MM_SYS_PLL2_125M>; - assigned-clock-rates = <0>, <0>, <125000000>, <100000000>; + <&clk IMX8MM_SYS_PLL2_125M>, + <&clk IMX8MM_SYS_PLL2_50M>; + assigned-clock-rates = <0>, <100000000>, <125000000>, <0>; fsl,num-tx-queues = <3>; fsl,num-rx-queues = <3>; status = "disabled"; diff --git a/arch/arm64/boot/dts/freescale/imx8mn.dtsi b/arch/arm64/boot/dts/freescale/imx8mn.dtsi index 16c7202885d7094bdc03f0a07188a3a7e2d17c54..aea723eb2ba3f42b72b0f49f22aa545d30f6b3ac 100644 --- a/arch/arm64/boot/dts/freescale/imx8mn.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mn.dtsi @@ -753,11 +753,12 @@ assigned-clocks = <&clk IMX8MN_CLK_ENET_AXI>, <&clk IMX8MN_CLK_ENET_TIMER>, <&clk IMX8MN_CLK_ENET_REF>, - <&clk IMX8MN_CLK_ENET_TIMER>; + <&clk IMX8MN_CLK_ENET_PHY_REF>; assigned-clock-parents = <&clk IMX8MN_SYS_PLL1_266M>, <&clk IMX8MN_SYS_PLL2_100M>, - <&clk IMX8MN_SYS_PLL2_125M>; - assigned-clock-rates = <0>, <0>, <125000000>, <100000000>; + <&clk IMX8MN_SYS_PLL2_125M>, + <&clk IMX8MN_SYS_PLL2_50M>; + assigned-clock-rates = <0>, <100000000>, <125000000>, <0>; fsl,num-tx-queues = <3>; fsl,num-rx-queues = <3>; status = "disabled"; diff --git a/arch/arm64/boot/dts/freescale/imx8mp-evk.dts b/arch/arm64/boot/dts/freescale/imx8mp-evk.dts index ad66f1286d95c0d5f67f644ef3ca82d32714d455..c13b4a02d12f855c744482f26b13393b687255b6 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-evk.dts +++ b/arch/arm64/boot/dts/freescale/imx8mp-evk.dts @@ -62,6 +62,8 @@ reg = <1>; eee-broken-1000t; reset-gpios = <&gpio4 2 GPIO_ACTIVE_LOW>; + reset-assert-us = <10000>; + reset-deassert-us = <80000>; }; }; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mp.dtsi b/arch/arm64/boot/dts/freescale/imx8mp.dtsi index 03ef0e5f909e478649f70638168b05bc29555821..acee71ca32d83ebdbbe9685f689661db368f6e52 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp.dtsi @@ -725,11 +725,12 @@ assigned-clocks = <&clk IMX8MP_CLK_ENET_AXI>, <&clk IMX8MP_CLK_ENET_TIMER>, <&clk IMX8MP_CLK_ENET_REF>, - <&clk IMX8MP_CLK_ENET_TIMER>; + <&clk IMX8MP_CLK_ENET_PHY_REF>; assigned-clock-parents = <&clk IMX8MP_SYS_PLL1_266M>, <&clk IMX8MP_SYS_PLL2_100M>, - <&clk IMX8MP_SYS_PLL2_125M>; - assigned-clock-rates = <0>, <0>, <125000000>, <100000000>; + <&clk IMX8MP_SYS_PLL2_125M>, + <&clk IMX8MP_SYS_PLL2_50M>; + assigned-clock-rates = <0>, <100000000>, <125000000>, <0>; fsl,num-tx-queues = <3>; fsl,num-rx-queues = <3>; status = "disabled"; diff --git a/arch/arm64/boot/dts/freescale/imx8mq.dtsi b/arch/arm64/boot/dts/freescale/imx8mq.dtsi index c86cf786f4061f0cc4df9b3660f79cb4b2851b4c..8d0d41973ff54288b91d0f8841129787e0acbc6c 100644 --- a/arch/arm64/boot/dts/freescale/imx8mq.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mq.dtsi @@ -524,7 +524,7 @@ assigned-clock-rates = <0>, <0>, <0>, <594000000>; status = "disabled"; - port@0 { + port { lcdif_mipi_dsi: endpoint { remote-endpoint = <&mipi_dsi_lcdif_in>; }; diff --git a/arch/arm64/boot/dts/hisilicon/hi3660.dtsi b/arch/arm64/boot/dts/hisilicon/hi3660.dtsi index 710bff64c4759af32c362e7b1e68cfee2ad660d9..edbcf3c46fc8ae315baeb251f5b3b77ec88a0681 100644 --- a/arch/arm64/boot/dts/hisilicon/hi3660.dtsi +++ b/arch/arm64/boot/dts/hisilicon/hi3660.dtsi @@ -1086,7 +1086,7 @@ }; watchdog0: watchdog@e8a06000 { - compatible = "arm,sp805-wdt", "arm,primecell"; + compatible = "arm,sp805", "arm,primecell"; reg = <0x0 0xe8a06000 0x0 0x1000>; interrupts = ; clocks = <&crg_ctrl HI3660_OSC32K>, @@ -1095,7 +1095,7 @@ }; watchdog1: watchdog@e8a07000 { - compatible = "arm,sp805-wdt", "arm,primecell"; + compatible = "arm,sp805", "arm,primecell"; reg = <0x0 0xe8a07000 0x0 0x1000>; interrupts = ; clocks = <&crg_ctrl HI3660_OSC32K>, diff --git a/arch/arm64/boot/dts/hisilicon/hi6220.dtsi b/arch/arm64/boot/dts/hisilicon/hi6220.dtsi index 014735a9bc7312d35b4d8303ea70d369ae4140a9..fbce014bdc2709fbbdbb508a73a4a78bb23759c8 100644 --- a/arch/arm64/boot/dts/hisilicon/hi6220.dtsi +++ b/arch/arm64/boot/dts/hisilicon/hi6220.dtsi @@ -840,7 +840,7 @@ }; watchdog0: watchdog@f8005000 { - compatible = "arm,sp805-wdt", "arm,primecell"; + compatible = "arm,sp805", "arm,primecell"; reg = <0x0 0xf8005000 0x0 0x1000>; interrupts = ; clocks = <&ao_ctrl HI6220_WDT0_PCLK>, diff --git a/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi b/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi index 07c099b4ed5b565f3832c24d4240eef8a9414bf0..1e0c9415bfcd044507d9ced5e0ff4eeea2fea6f0 100644 --- a/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi +++ b/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi @@ -476,7 +476,7 @@ }; usb0: usb@ffb00000 { - compatible = "snps,dwc2"; + compatible = "intel,socfpga-agilex-hsotg", "snps,dwc2"; reg = <0xffb00000 0x40000>; interrupts = <0 93 4>; phys = <&usbphy0>; @@ -489,7 +489,7 @@ }; usb1: usb@ffb40000 { - compatible = "snps,dwc2"; + compatible = "intel,socfpga-agilex-hsotg", "snps,dwc2"; reg = <0xffb40000 0x40000>; interrupts = <0 94 4>; phys = <&usbphy0>; diff --git a/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts b/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts index 2e437f20da39b72d638e9da6d3a7eff9e6540dc3..00e5dbf4b82363094e30a387f082ae6b739c440f 100644 --- a/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts +++ b/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts @@ -18,6 +18,7 @@ aliases { spi0 = &spi0; + ethernet0 = ð0; ethernet1 = ð1; mmc0 = &sdhci0; mmc1 = &sdhci1; @@ -137,7 +138,9 @@ /* * U-Boot port for Turris Mox has a bug which always expects that "ranges" DT property * contains exactly 2 ranges with 3 (child) address cells, 2 (parent) address cells and - * 2 size cells and also expects that the second range starts at 16 MB offset. If these + * 2 size cells and also expects that the second range starts at 16 MB offset. Also it + * expects that first range uses same address for PCI (child) and CPU (parent) cells (so + * no remapping) and that this address is the lowest from all specified ranges. If these * conditions are not met then U-Boot crashes during loading kernel DTB file. PCIe address * space is 128 MB long, so the best split between MEM and IO is to use fixed 16 MB window * for IO and the rest 112 MB (64+32+16) for MEM, despite that maximal IO size is just 64 kB. @@ -146,6 +149,9 @@ * https://source.denx.de/u-boot/u-boot/-/commit/cb2ddb291ee6fcbddd6d8f4ff49089dfe580f5d7 * https://source.denx.de/u-boot/u-boot/-/commit/c64ac3b3185aeb3846297ad7391fc6df8ecd73bf * https://source.denx.de/u-boot/u-boot/-/commit/4a82fca8e330157081fc132a591ebd99ba02ee33 + * Bug related to requirement of same child and parent addresses for first range is fixed + * in U-Boot version 2022.04 by following commit: + * https://source.denx.de/u-boot/u-boot/-/commit/1fd54253bca7d43d046bba4853fe5fafd034bc17 */ #address-cells = <3>; #size-cells = <2>; diff --git a/arch/arm64/boot/dts/marvell/armada-37xx.dtsi b/arch/arm64/boot/dts/marvell/armada-37xx.dtsi index 2a2015a153627c7e05b976597204b4e694a2fd77..0f4bcd15d8580a2ad0d42f0ca1d05c0a83cbf393 100644 --- a/arch/arm64/boot/dts/marvell/armada-37xx.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-37xx.dtsi @@ -495,7 +495,7 @@ * (totaling 127 MiB) for MEM. */ ranges = <0x82000000 0 0xe8000000 0 0xe8000000 0 0x07f00000 /* Port 0 MEM */ - 0x81000000 0 0xefff0000 0 0xefff0000 0 0x00010000>; /* Port 0 IO */ + 0x81000000 0 0x00000000 0 0xefff0000 0 0x00010000>; /* Port 0 IO */ interrupt-map-mask = <0 0 0 7>; interrupt-map = <0 0 0 1 &pcie_intc 0>, <0 0 0 2 &pcie_intc 1>, diff --git a/arch/arm64/boot/dts/marvell/cn9130.dtsi b/arch/arm64/boot/dts/marvell/cn9130.dtsi index a2b7e5ec979d325d935a42bdda4caa13dc565f09..327b04134134ff1b0dd19ce3cdff2b42f96beddb 100644 --- a/arch/arm64/boot/dts/marvell/cn9130.dtsi +++ b/arch/arm64/boot/dts/marvell/cn9130.dtsi @@ -11,6 +11,13 @@ model = "Marvell Armada CN9130 SoC"; compatible = "marvell,cn9130", "marvell,armada-ap807-quad", "marvell,armada-ap807"; + + aliases { + gpio1 = &cp0_gpio1; + gpio2 = &cp0_gpio2; + spi1 = &cp0_spi0; + spi2 = &cp0_spi1; + }; }; /* @@ -35,3 +42,11 @@ #undef CP11X_PCIE0_BASE #undef CP11X_PCIE1_BASE #undef CP11X_PCIE2_BASE + +&cp0_gpio1 { + status = "okay"; +}; + +&cp0_gpio2 { + status = "okay"; +}; diff --git a/arch/arm64/boot/dts/nvidia/tegra186.dtsi b/arch/arm64/boot/dts/nvidia/tegra186.dtsi index 0c46ab7bbbf376eecadb9f25b1b74abdd7ed2097..eec6418ecdb1a3cb6fecaec1cae7e4a88dbd9d54 100644 --- a/arch/arm64/boot/dts/nvidia/tegra186.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra186.dtsi @@ -985,7 +985,7 @@ ccplex@e000000 { compatible = "nvidia,tegra186-ccplex-cluster"; - reg = <0x0 0x0e000000 0x0 0x3fffff>; + reg = <0x0 0x0e000000 0x0 0x400000>; nvidia,bpmp = <&bpmp>; }; diff --git a/arch/arm64/boot/dts/nvidia/tegra194.dtsi b/arch/arm64/boot/dts/nvidia/tegra194.dtsi index 9b5007e5f790fd802eefc6e15c7643cc423552a6..05cf606b85c9fd8907e851a718cc6e0e635cc78d 100644 --- a/arch/arm64/boot/dts/nvidia/tegra194.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra194.dtsi @@ -782,13 +782,12 @@ reg = <0x3510000 0x10000>; interrupts = ; clocks = <&bpmp TEGRA194_CLK_HDA>, - <&bpmp TEGRA194_CLK_HDA2CODEC_2X>, - <&bpmp TEGRA194_CLK_HDA2HDMICODEC>; - clock-names = "hda", "hda2codec_2x", "hda2hdmi"; + <&bpmp TEGRA194_CLK_HDA2HDMICODEC>, + <&bpmp TEGRA194_CLK_HDA2CODEC_2X>; + clock-names = "hda", "hda2hdmi", "hda2codec_2x"; resets = <&bpmp TEGRA194_RESET_HDA>, - <&bpmp TEGRA194_RESET_HDA2CODEC_2X>, <&bpmp TEGRA194_RESET_HDA2HDMICODEC>; - reset-names = "hda", "hda2codec_2x", "hda2hdmi"; + reset-names = "hda", "hda2hdmi"; power-domains = <&bpmp TEGRA194_POWER_DOMAIN_DISP>; interconnects = <&mc TEGRA194_MEMORY_CLIENT_HDAR &emc>, <&mc TEGRA194_MEMORY_CLIENT_HDAW &emc>; diff --git a/arch/arm64/boot/dts/qcom/ipq6018.dtsi b/arch/arm64/boot/dts/qcom/ipq6018.dtsi index 3ceb36cac512ff46a9bfeee118eef8fae3918752..2a1f03cdb52c77bb1bb9d9aa9850dde1c27b0227 100644 --- a/arch/arm64/boot/dts/qcom/ipq6018.dtsi +++ b/arch/arm64/boot/dts/qcom/ipq6018.dtsi @@ -200,7 +200,7 @@ clock-names = "bam_clk"; #dma-cells = <1>; qcom,ee = <1>; - qcom,controlled-remotely = <1>; + qcom,controlled-remotely; qcom,config-pipe-trust-reg = <0>; }; @@ -221,7 +221,7 @@ interrupts = ; gpio-controller; #gpio-cells = <2>; - gpio-ranges = <&tlmm 0 80>; + gpio-ranges = <&tlmm 0 0 80>; interrupt-controller; #interrupt-cells = <2>; diff --git a/arch/arm64/boot/dts/qcom/ipq8074.dtsi b/arch/arm64/boot/dts/qcom/ipq8074.dtsi index 776a6b0f61a622ba9a30797a702299696c3d5208..89e5ee6b78cdf737feb94c1a0d6b28c75d6fd137 100644 --- a/arch/arm64/boot/dts/qcom/ipq8074.dtsi +++ b/arch/arm64/boot/dts/qcom/ipq8074.dtsi @@ -433,7 +433,6 @@ interrupts = ; phys = <&qusb_phy_0>, <&usb0_ssphy>; phy-names = "usb2-phy", "usb3-phy"; - tx-fifo-resize; snps,is-utmi-l1-suspend; snps,hird-threshold = /bits/ 8 <0x0>; snps,dis_u2_susphy_quirk; @@ -474,7 +473,6 @@ interrupts = ; phys = <&qusb_phy_1>, <&usb1_ssphy>; phy-names = "usb2-phy", "usb3-phy"; - tx-fifo-resize; snps,is-utmi-l1-suspend; snps,hird-threshold = /bits/ 8 <0x0>; snps,dis_u2_susphy_quirk; diff --git a/arch/arm64/boot/dts/qcom/msm8916.dtsi b/arch/arm64/boot/dts/qcom/msm8916.dtsi index 0e34ed48b9fae48c9ff476a51bd65311788a83d5..291276a38d7cd77df5609b89c6134af5ff9a3beb 100644 --- a/arch/arm64/boot/dts/qcom/msm8916.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8916.dtsi @@ -18,8 +18,8 @@ #size-cells = <2>; aliases { - sdhc1 = &sdhc_1; /* SDC1 eMMC slot */ - sdhc2 = &sdhc_2; /* SDC2 SD card slot */ + mmc0 = &sdhc_1; /* SDC1 eMMC slot */ + mmc1 = &sdhc_2; /* SDC2 SD card slot */ }; chosen { }; @@ -1322,11 +1322,17 @@ lpass: audio-controller@7708000 { status = "disabled"; compatible = "qcom,lpass-cpu-apq8016"; + + /* + * Note: Unlike the name would suggest, the SEC_I2S_CLK + * is actually only used by Tertiary MI2S while + * Primary/Secondary MI2S both use the PRI_I2S_CLK. + */ clocks = <&gcc GCC_ULTAUDIO_AHBFABRIC_IXFABRIC_CLK>, <&gcc GCC_ULTAUDIO_PCNOC_MPORT_CLK>, <&gcc GCC_ULTAUDIO_PCNOC_SWAY_CLK>, <&gcc GCC_ULTAUDIO_LPAIF_PRI_I2S_CLK>, - <&gcc GCC_ULTAUDIO_LPAIF_SEC_I2S_CLK>, + <&gcc GCC_ULTAUDIO_LPAIF_PRI_I2S_CLK>, <&gcc GCC_ULTAUDIO_LPAIF_SEC_I2S_CLK>, <&gcc GCC_ULTAUDIO_LPAIF_AUX_I2S_CLK>; diff --git a/arch/arm64/boot/dts/qcom/msm8996.dtsi b/arch/arm64/boot/dts/qcom/msm8996.dtsi index eef17434d12ae624e42fb020807a22590bd1be31..ef5d03a1506930262c2d6308b255f56a10e2e834 100644 --- a/arch/arm64/boot/dts/qcom/msm8996.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8996.dtsi @@ -645,9 +645,6 @@ nvmem-cells = <&gpu_speed_bin>; nvmem-cell-names = "speed_bin"; - qcom,gpu-quirk-two-pass-use-wfi; - qcom,gpu-quirk-fault-detect-mask; - operating-points-v2 = <&gpu_opp_table>; gpu_opp_table: opp-table { diff --git a/arch/arm64/boot/dts/qcom/msm8998.dtsi b/arch/arm64/boot/dts/qcom/msm8998.dtsi index c45870600909f3cd676740a409aa0f4015e69a0e..9e04ac3f596d01b97a0e773694e8fcbebb9c98f0 100644 --- a/arch/arm64/boot/dts/qcom/msm8998.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8998.dtsi @@ -300,38 +300,42 @@ LITTLE_CPU_SLEEP_0: cpu-sleep-0-0 { compatible = "arm,idle-state"; idle-state-name = "little-retention"; + /* CPU Retention (C2D), L2 Active */ arm,psci-suspend-param = <0x00000002>; entry-latency-us = <81>; exit-latency-us = <86>; - min-residency-us = <200>; + min-residency-us = <504>; }; LITTLE_CPU_SLEEP_1: cpu-sleep-0-1 { compatible = "arm,idle-state"; idle-state-name = "little-power-collapse"; + /* CPU + L2 Power Collapse (C3, D4) */ arm,psci-suspend-param = <0x40000003>; - entry-latency-us = <273>; - exit-latency-us = <612>; - min-residency-us = <1000>; + entry-latency-us = <814>; + exit-latency-us = <4562>; + min-residency-us = <9183>; local-timer-stop; }; BIG_CPU_SLEEP_0: cpu-sleep-1-0 { compatible = "arm,idle-state"; idle-state-name = "big-retention"; + /* CPU Retention (C2D), L2 Active */ arm,psci-suspend-param = <0x00000002>; entry-latency-us = <79>; exit-latency-us = <82>; - min-residency-us = <200>; + min-residency-us = <1302>; }; BIG_CPU_SLEEP_1: cpu-sleep-1-1 { compatible = "arm,idle-state"; idle-state-name = "big-power-collapse"; + /* CPU + L2 Power Collapse (C3, D4) */ arm,psci-suspend-param = <0x40000003>; - entry-latency-us = <336>; - exit-latency-us = <525>; - min-residency-us = <1000>; + entry-latency-us = <724>; + exit-latency-us = <2027>; + min-residency-us = <9419>; local-timer-stop; }; }; diff --git a/arch/arm64/boot/dts/qcom/pm8150.dtsi b/arch/arm64/boot/dts/qcom/pm8150.dtsi index 1b6406927509f398168e9c923f3c7dd49c0d9bcb..82edcd74ce983619b0e3f84491c4a21c0c53b945 100644 --- a/arch/arm64/boot/dts/qcom/pm8150.dtsi +++ b/arch/arm64/boot/dts/qcom/pm8150.dtsi @@ -48,7 +48,7 @@ #size-cells = <0>; pon: power-on@800 { - compatible = "qcom,pm8916-pon"; + compatible = "qcom,pm8998-pon"; reg = <0x0800>; pwrkey { compatible = "qcom,pm8941-pwrkey"; diff --git a/arch/arm64/boot/dts/qcom/pm8916.dtsi b/arch/arm64/boot/dts/qcom/pm8916.dtsi index f931cb0de231f932659e007d40054ff30295d574..42180f1b5dbbb7b9f64d7d19ee14dcb3a3c7041b 100644 --- a/arch/arm64/boot/dts/qcom/pm8916.dtsi +++ b/arch/arm64/boot/dts/qcom/pm8916.dtsi @@ -86,7 +86,6 @@ rtc@6000 { compatible = "qcom,pm8941-rtc"; reg = <0x6000>; - reg-names = "rtc", "alarm"; interrupts = <0x0 0x61 0x1 IRQ_TYPE_EDGE_RISING>; }; diff --git a/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts b/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts index ad6561843ba28489effeb34e75aecabbc4f91d9e..e080c317b5e3dd52090daaae94f2e9ae866ec6ca 100644 --- a/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts +++ b/arch/arm64/boot/dts/qcom/sdm850-lenovo-yoga-c630.dts @@ -365,6 +365,10 @@ dai@1 { reg = <1>; }; + + dai@2 { + reg = <2>; + }; }; &sound { @@ -377,6 +381,7 @@ "SpkrLeft IN", "SPK1 OUT", "SpkrRight IN", "SPK2 OUT", "MM_DL1", "MultiMedia1 Playback", + "MM_DL3", "MultiMedia3 Playback", "MultiMedia2 Capture", "MM_UL2"; mm1-dai-link { @@ -393,6 +398,13 @@ }; }; + mm3-dai-link { + link-name = "MultiMedia3"; + cpu { + sound-dai = <&q6asmdai MSM_FRONTEND_DAI_MULTIMEDIA3>; + }; + }; + slim-dai-link { link-name = "SLIM Playback"; cpu { @@ -422,6 +434,21 @@ sound-dai = <&wcd9340 1>; }; }; + + slim-wcd-dai-link { + link-name = "SLIM WCD Playback"; + cpu { + sound-dai = <&q6afedai SLIMBUS_1_RX>; + }; + + platform { + sound-dai = <&q6routing>; + }; + + codec { + sound-dai = <&wcd9340 2>; + }; + }; }; &tlmm { diff --git a/arch/arm64/boot/dts/renesas/beacon-renesom-som.dtsi b/arch/arm64/boot/dts/renesas/beacon-renesom-som.dtsi index 3c73dfc430afccab0aa79fa9a9236e19cb3c7f69..929c7910c68df9d3c1d027cb283aabcddeb7d8a6 100644 --- a/arch/arm64/boot/dts/renesas/beacon-renesom-som.dtsi +++ b/arch/arm64/boot/dts/renesas/beacon-renesom-som.dtsi @@ -54,6 +54,7 @@ &avb { pinctrl-0 = <&avb_pins>; pinctrl-names = "default"; + phy-mode = "rgmii-rxid"; phy-handle = <&phy0>; rx-internal-delay-ps = <1800>; tx-internal-delay-ps = <2000>; diff --git a/arch/arm64/boot/dts/renesas/cat875.dtsi b/arch/arm64/boot/dts/renesas/cat875.dtsi index 801ea54b027c43d96a78ea29c3e7589a0ff15419..20f8adc635e72aa0ea0e20ed0a009120b9660003 100644 --- a/arch/arm64/boot/dts/renesas/cat875.dtsi +++ b/arch/arm64/boot/dts/renesas/cat875.dtsi @@ -18,6 +18,7 @@ pinctrl-names = "default"; renesas,no-ether-link; phy-handle = <&phy0>; + phy-mode = "rgmii-id"; status = "okay"; phy0: ethernet-phy@0 { diff --git a/arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts b/arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts index bce6f8b7db43629fcb5306c1584ada9c541562c4..fbcb9531cc70d551cf2355c376193004bec6e3c6 100644 --- a/arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts +++ b/arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts @@ -91,7 +91,7 @@ regulator-max-microvolt = <3300000>; regulator-always-on; regulator-boot-on; - vim-supply = <&vcc_io>; + vin-supply = <&vcc_io>; }; vdd_core: vdd-core { diff --git a/arch/arm64/boot/dts/rockchip/rk3328.dtsi b/arch/arm64/boot/dts/rockchip/rk3328.dtsi index e546c9d1d64637573250fe7d54eba3c486f02284..72112fe05a5c40bb3a405aa1f254fadeea6e3a74 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3328.dtsi @@ -603,7 +603,7 @@ gpu: gpu@ff300000 { compatible = "rockchip,rk3328-mali", "arm,mali-450"; - reg = <0x0 0xff300000 0x0 0x40000>; + reg = <0x0 0xff300000 0x0 0x30000>; interrupts = , , , diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi index 765b24a2bcbf06d217a9400cca4e7df51c732088..fb0a13cad6c93d8d8dd1033e1efe5d6d0491bdc6 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi @@ -281,7 +281,7 @@ sound: sound { compatible = "rockchip,rk3399-gru-sound"; - rockchip,cpu = <&i2s0 &i2s2>; + rockchip,cpu = <&i2s0 &spdif>; }; }; @@ -432,10 +432,6 @@ ap_i2c_audio: &i2c8 { status = "okay"; }; -&i2s2 { - status = "okay"; -}; - &io_domains { status = "okay"; @@ -532,6 +528,17 @@ ap_i2c_audio: &i2c8 { vqmmc-supply = <&ppvar_sd_card_io>; }; +&spdif { + status = "okay"; + + /* + * SPDIF is routed internally to DP; we either don't use these pins, or + * mux them to something else. + */ + /delete-property/ pinctrl-0; + /delete-property/ pinctrl-names; +}; + &spi1 { status = "okay"; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-khadas-edge.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-khadas-edge.dtsi index 635afdd99122f6e8883980fbad2045ebc015b793..2c644ac1f84b9996b733e02a783b20d651e73062 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-khadas-edge.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-khadas-edge.dtsi @@ -699,7 +699,6 @@ &sdhci { bus-width = <8>; mmc-hs400-1_8v; - mmc-hs400-enhanced-strobe; non-removable; status = "okay"; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-leez-p710.dts b/arch/arm64/boot/dts/rockchip/rk3399-leez-p710.dts index 1fa80ac15464bd30a90b930b4a8c604d48d54cad..88984b5e67b6ed85d8c4d4e24adee69c80e57384 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-leez-p710.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-leez-p710.dts @@ -49,7 +49,7 @@ regulator-boot-on; regulator-min-microvolt = <3300000>; regulator-max-microvolt = <3300000>; - vim-supply = <&vcc3v3_sys>; + vin-supply = <&vcc3v3_sys>; }; vcc3v3_sys: vcc3v3-sys { diff --git a/arch/arm64/boot/dts/rockchip/rk3399-pinebook-pro.dts b/arch/arm64/boot/dts/rockchip/rk3399-pinebook-pro.dts index 219b7507a10fb7732f1a1cbc56cc52c07ef0893b..4297c1db5a413eaabc2578794a21c91247a3b5a6 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-pinebook-pro.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-pinebook-pro.dts @@ -379,10 +379,6 @@ }; }; -&cdn_dp { - status = "okay"; -}; - &cpu_b0 { cpu-supply = <&vdd_cpu_b>; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi index 4660416c8f382aa97b485023e8682fb8725b00a3..544110aaffc569b183a61820a7fcd895f4a29a46 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-puma.dtsi @@ -472,6 +472,12 @@ }; &sdhci { + /* + * Signal integrity isn't great at 200MHz but 100MHz has proven stable + * enough. + */ + max-frequency = <100000000>; + bus-width = <8>; mmc-hs400-1_8v; mmc-hs400-enhanced-strobe; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi index 678a336010bf8e0b47d813ed596a3ddf1849ea54..f121203081b97c9a59b52644107ddaabe52a030d 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi @@ -459,7 +459,7 @@ status = "okay"; bt656-supply = <&vcc_3v0>; - audio-supply = <&vcc_3v0>; + audio-supply = <&vcc1v8_codec>; sdmmc-supply = <&vcc_sdio>; gpio1830-supply = <&vcc_3v0>; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3399.dtsi b/arch/arm64/boot/dts/rockchip/rk3399.dtsi index 4b6065dbba55eacb162b730895db83abdf8cbab0..52ba4d07e77123bb5fbcd257e32ed81ee10e0dbf 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399.dtsi @@ -1770,10 +1770,10 @@ interrupts = ; clocks = <&cru PCLK_HDMI_CTRL>, <&cru SCLK_HDMI_SFR>, - <&cru PLL_VPLL>, + <&cru SCLK_HDMI_CEC>, <&cru PCLK_VIO_GRF>, - <&cru SCLK_HDMI_CEC>; - clock-names = "iahb", "isfr", "vpll", "grf", "cec"; + <&cru PLL_VPLL>; + clock-names = "iahb", "isfr", "cec", "grf", "vpll"; power-domains = <&power RK3399_PD_HDCP>; reg-io-width = <4>; rockchip,grf = <&grf>; diff --git a/arch/arm64/boot/dts/ti/k3-j7200-main.dtsi b/arch/arm64/boot/dts/ti/k3-j7200-main.dtsi index 5832ad830ed149c06c371bc3f39734e2a76e5905..1ab9f9604af6c569788745ccd829df182407f2d5 100644 --- a/arch/arm64/boot/dts/ti/k3-j7200-main.dtsi +++ b/arch/arm64/boot/dts/ti/k3-j7200-main.dtsi @@ -25,7 +25,7 @@ #size-cells = <1>; ranges = <0x00 0x00 0x00100000 0x1c000>; - serdes_ln_ctrl: serdes-ln-ctrl@4080 { + serdes_ln_ctrl: mux-controller@4080 { compatible = "mmio-mux"; #mux-control-cells = <1>; mux-reg-masks = <0x4080 0x3>, <0x4084 0x3>, /* SERDES0 lane0/1 select */ diff --git a/arch/arm64/boot/dts/ti/k3-j7200.dtsi b/arch/arm64/boot/dts/ti/k3-j7200.dtsi index 66169bcf7c9a408e827dd21ffd51fe1617bd77d7..03a9623f0f9562e902184cc68490a27b175aac81 100644 --- a/arch/arm64/boot/dts/ti/k3-j7200.dtsi +++ b/arch/arm64/boot/dts/ti/k3-j7200.dtsi @@ -60,7 +60,7 @@ i-cache-sets = <256>; d-cache-size = <0x8000>; d-cache-line-size = <64>; - d-cache-sets = <128>; + d-cache-sets = <256>; next-level-cache = <&L2_0>; }; @@ -74,7 +74,7 @@ i-cache-sets = <256>; d-cache-size = <0x8000>; d-cache-line-size = <64>; - d-cache-sets = <128>; + d-cache-sets = <256>; next-level-cache = <&L2_0>; }; }; @@ -84,7 +84,7 @@ cache-level = <2>; cache-size = <0x100000>; cache-line-size = <64>; - cache-sets = <2048>; + cache-sets = <1024>; next-level-cache = <&msmc_l3>; }; diff --git a/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi b/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi index 6ffdebd6012231e58050118bd3bac7852e52096e..85526f72b4616c888fc953bc21d4b7f25dc0c48d 100644 --- a/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi +++ b/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi @@ -629,7 +629,7 @@ clock-names = "fck"; #address-cells = <3>; #size-cells = <2>; - bus-range = <0x0 0xf>; + bus-range = <0x0 0xff>; vendor-id = <0x104c>; device-id = <0xb00d>; msi-map = <0x0 &gic_its 0x0 0x10000>; @@ -656,7 +656,7 @@ clock-names = "fck"; cdns,max-outbound-regions = <16>; max-functions = /bits/ 8 <6>; - max-virtual-functions = /bits/ 16 <4 4 4 4 0 0>; + max-virtual-functions = /bits/ 8 <4 4 4 4 0 0>; dma-coherent; }; @@ -678,7 +678,7 @@ clock-names = "fck"; #address-cells = <3>; #size-cells = <2>; - bus-range = <0x0 0xf>; + bus-range = <0x0 0xff>; vendor-id = <0x104c>; device-id = <0xb00d>; msi-map = <0x0 &gic_its 0x10000 0x10000>; @@ -705,7 +705,7 @@ clock-names = "fck"; cdns,max-outbound-regions = <16>; max-functions = /bits/ 8 <6>; - max-virtual-functions = /bits/ 16 <4 4 4 4 0 0>; + max-virtual-functions = /bits/ 8 <4 4 4 4 0 0>; dma-coherent; }; @@ -727,7 +727,7 @@ clock-names = "fck"; #address-cells = <3>; #size-cells = <2>; - bus-range = <0x0 0xf>; + bus-range = <0x0 0xff>; vendor-id = <0x104c>; device-id = <0xb00d>; msi-map = <0x0 &gic_its 0x20000 0x10000>; @@ -754,7 +754,7 @@ clock-names = "fck"; cdns,max-outbound-regions = <16>; max-functions = /bits/ 8 <6>; - max-virtual-functions = /bits/ 16 <4 4 4 4 0 0>; + max-virtual-functions = /bits/ 8 <4 4 4 4 0 0>; dma-coherent; }; @@ -776,7 +776,7 @@ clock-names = "fck"; #address-cells = <3>; #size-cells = <2>; - bus-range = <0x0 0xf>; + bus-range = <0x0 0xff>; vendor-id = <0x104c>; device-id = <0xb00d>; msi-map = <0x0 &gic_its 0x30000 0x10000>; @@ -803,7 +803,7 @@ clock-names = "fck"; cdns,max-outbound-regions = <16>; max-functions = /bits/ 8 <6>; - max-virtual-functions = /bits/ 16 <4 4 4 4 0 0>; + max-virtual-functions = /bits/ 8 <4 4 4 4 0 0>; dma-coherent; #address-cells = <2>; #size-cells = <2>; diff --git a/arch/arm64/boot/dts/ti/k3-j721e.dtsi b/arch/arm64/boot/dts/ti/k3-j721e.dtsi index cc483f7344af3132aaa0059eb3e5b6c38ea19a5a..a199227327ed299907f84fb925d9ca52832d7724 100644 --- a/arch/arm64/boot/dts/ti/k3-j721e.dtsi +++ b/arch/arm64/boot/dts/ti/k3-j721e.dtsi @@ -61,7 +61,7 @@ i-cache-sets = <256>; d-cache-size = <0x8000>; d-cache-line-size = <64>; - d-cache-sets = <128>; + d-cache-sets = <256>; next-level-cache = <&L2_0>; }; @@ -75,7 +75,7 @@ i-cache-sets = <256>; d-cache-size = <0x8000>; d-cache-line-size = <64>; - d-cache-sets = <128>; + d-cache-sets = <256>; next-level-cache = <&L2_0>; }; }; @@ -85,7 +85,7 @@ cache-level = <2>; cache-size = <0x100000>; cache-line-size = <64>; - cache-sets = <2048>; + cache-sets = <1024>; next-level-cache = <&msmc_l3>; }; diff --git a/arch/arm64/boot/dts/xilinx/zynqmp-zc1751-xm016-dc2.dts b/arch/arm64/boot/dts/xilinx/zynqmp-zc1751-xm016-dc2.dts index 4a86efa32d687f186f6a7936457623b18614bcd9..f7124e15f0ff6deedf4d7fd0123354d421d504e5 100644 --- a/arch/arm64/boot/dts/xilinx/zynqmp-zc1751-xm016-dc2.dts +++ b/arch/arm64/boot/dts/xilinx/zynqmp-zc1751-xm016-dc2.dts @@ -131,7 +131,7 @@ reg = <0>; partition@0 { - label = "data"; + label = "spi0-data"; reg = <0x0 0x100000>; }; }; @@ -149,7 +149,7 @@ reg = <0>; partition@0 { - label = "data"; + label = "spi1-data"; reg = <0x0 0x84000>; }; }; diff --git a/arch/arm64/boot/dts/xilinx/zynqmp.dtsi b/arch/arm64/boot/dts/xilinx/zynqmp.dtsi index 771f60e0346d0941f8d2d4f9515ccd5b0a9fa3a5..9e198cacc37ddbb4e7b6fa8bd04c51d87068ea85 100644 --- a/arch/arm64/boot/dts/xilinx/zynqmp.dtsi +++ b/arch/arm64/boot/dts/xilinx/zynqmp.dtsi @@ -688,7 +688,7 @@ }; uart0: serial@ff000000 { - compatible = "cdns,uart-r1p12", "xlnx,xuartps"; + compatible = "xlnx,zynqmp-uart", "cdns,uart-r1p12"; status = "disabled"; interrupt-parent = <&gic>; interrupts = <0 21 4>; @@ -698,7 +698,7 @@ }; uart1: serial@ff010000 { - compatible = "cdns,uart-r1p12", "xlnx,xuartps"; + compatible = "xlnx,zynqmp-uart", "cdns,uart-r1p12"; status = "disabled"; interrupt-parent = <&gic>; interrupts = <0 22 4>; diff --git a/arch/arm64/configs/db845c_gki.fragment b/arch/arm64/configs/db845c_gki.fragment index 5ba41f2d8ddcb64264156794a66b3dbafb38ec91..7260af26f4a36ada76372142c87d33de0d0f04b0 100644 --- a/arch/arm64/configs/db845c_gki.fragment +++ b/arch/arm64/configs/db845c_gki.fragment @@ -1,7 +1,6 @@ CONFIG_QRTR=m CONFIG_QRTR_TUN=m CONFIG_SCSI_UFS_QCOM=m -CONFIG_USB_NET_AX88179_178A=m CONFIG_INPUT_PM8941_PWRKEY=m CONFIG_SERIAL_MSM=m CONFIG_SERIAL_QCOM_GENI=m diff --git a/arch/arm64/configs/gki_defconfig b/arch/arm64/configs/gki_defconfig index 2163631885fe72f56f467aa7a66c1954ce1065e0..86f63c51de4d7ba8209f104c95f13f9914943dda 100644 --- a/arch/arm64/configs/gki_defconfig +++ b/arch/arm64/configs/gki_defconfig @@ -8,6 +8,7 @@ CONFIG_TASK_IO_ACCOUNTING=y CONFIG_PSI=y CONFIG_RCU_EXPERT=y CONFIG_RCU_FAST_NO_HZ=y +CONFIG_RCU_BOOST=y CONFIG_RCU_NOCB_CPU=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y @@ -87,6 +88,7 @@ CONFIG_ARM_SCPI_PROTOCOL=y # CONFIG_EFI_ARMSTUB_DTB_LOADER is not set CONFIG_VIRTUALIZATION=y CONFIG_KVM=y +CONFIG_KVM_S2MPU=y CONFIG_CRYPTO_SHA2_ARM64_CE=y CONFIG_CRYPTO_SHA512_ARM64_CE=y CONFIG_CRYPTO_AES_ARM64_CE_BLK=y @@ -99,7 +101,10 @@ CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODVERSIONS=y CONFIG_MODULE_SCMVERSION=y +CONFIG_MODULE_SIG=y +CONFIG_MODULE_SIG_PROTECT=y CONFIG_BLK_CGROUP_IOCOST=y +CONFIG_BLK_CGROUP_IOPRIO=y CONFIG_BLK_INLINE_ENCRYPTION=y CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK=y CONFIG_IOSCHED_BFQ=y @@ -118,6 +123,11 @@ CONFIG_CMA_DEBUGFS=y CONFIG_CMA_SYSFS=y CONFIG_CMA_AREAS=16 CONFIG_READ_ONLY_THP_FOR_FS=y +CONFIG_ANON_VMA_NAME=y +CONFIG_LRU_GEN=y +CONFIG_DAMON=y +CONFIG_DAMON_PADDR=y +CONFIG_DAMON_RECLAIM=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y @@ -225,6 +235,7 @@ CONFIG_IP6_NF_RAW=y CONFIG_TIPC=y CONFIG_L2TP=y CONFIG_BRIDGE=y +CONFIG_VLAN_8021Q=y CONFIG_6LOWPAN=y CONFIG_IEEE802154=y CONFIG_IEEE802154_6LOWPAN=y @@ -259,6 +270,7 @@ CONFIG_NET_ACT_POLICE=y CONFIG_NET_ACT_GACT=y CONFIG_NET_ACT_MIRRED=y CONFIG_NET_ACT_SKBEDIT=y +CONFIG_NET_ACT_BPF=y CONFIG_VSOCKETS=y CONFIG_CGROUP_NET_PRIO=y CONFIG_BPF_JIT=y @@ -323,6 +335,7 @@ CONFIG_NETDEVICES=y CONFIG_DUMMY=y CONFIG_WIREGUARD=y CONFIG_IFB=y +CONFIG_MACSEC=y CONFIG_TUN=y CONFIG_VETH=y CONFIG_PPP=y @@ -334,7 +347,6 @@ CONFIG_PPPOL2TP=y CONFIG_USB_RTL8150=y CONFIG_USB_RTL8152=y CONFIG_USB_USBNET=y -# CONFIG_USB_NET_AX88179_178A is not set CONFIG_USB_NET_CDC_EEM=y # CONFIG_USB_NET_NET1080 is not set # CONFIG_USB_NET_CDC_SUBSET is not set @@ -372,6 +384,7 @@ CONFIG_SERIAL_8250=y # CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set CONFIG_SERIAL_8250_CONSOLE=y # CONFIG_SERIAL_8250_EXAR is not set +CONFIG_SERIAL_8250_RUNTIME_UARTS=0 CONFIG_SERIAL_8250_DW=y CONFIG_SERIAL_OF_PLATFORM=y CONFIG_SERIAL_AMBA_PL011=y @@ -388,6 +401,7 @@ CONFIG_HW_RANDOM=y # CONFIG_HW_RANDOM_CAVIUM is not set # CONFIG_DEVMEM is not set # CONFIG_DEVPORT is not set +CONFIG_RANDOM_TRUST_CPU=y # CONFIG_I2C_COMPAT is not set # CONFIG_I2C_HELPER_AUTO is not set CONFIG_I3C=y @@ -491,7 +505,6 @@ CONFIG_USB_CONFIGFS_SERIAL=y CONFIG_USB_CONFIGFS_ACM=y CONFIG_USB_CONFIGFS_NCM=y CONFIG_USB_CONFIGFS_ECM=y -CONFIG_USB_CONFIGFS_RNDIS=y CONFIG_USB_CONFIGFS_EEM=y CONFIG_USB_CONFIGFS_MASS_STORAGE=y CONFIG_USB_CONFIGFS_F_FS=y @@ -566,6 +579,7 @@ CONFIG_EXT4_FS_SECURITY=y CONFIG_F2FS_FS=y CONFIG_F2FS_FS_SECURITY=y CONFIG_F2FS_FS_COMPRESSION=y +CONFIG_F2FS_UNFAIR_RWSEM=y CONFIG_FS_ENCRYPTION=y CONFIG_FS_ENCRYPTION_INLINE_CRYPT=y CONFIG_FS_VERITY=y @@ -575,6 +589,7 @@ CONFIG_QUOTA=y CONFIG_QFMT_V2=y CONFIG_FUSE_FS=y CONFIG_VIRTIO_FS=y +CONFIG_FUSE_BPF=y CONFIG_OVERLAY_FS=y CONFIG_INCREMENTAL_FS=y CONFIG_MSDOS_FS=y @@ -642,7 +657,6 @@ CONFIG_SECURITYFS=y CONFIG_SECURITY_NETWORK=y CONFIG_HARDENED_USERCOPY=y # CONFIG_HARDENED_USERCOPY_FALLBACK is not set -CONFIG_FORTIFY_SOURCE=y CONFIG_STATIC_USERMODEHELPER=y CONFIG_STATIC_USERMODEHELPER_PATH="" CONFIG_SECURITY_SELINUX=y @@ -666,6 +680,7 @@ CONFIG_PRINTK_CALLER=y CONFIG_DYNAMIC_DEBUG_CORE=y CONFIG_DEBUG_INFO=y CONFIG_DEBUG_INFO_DWARF4=y +CONFIG_DEBUG_INFO_BTF=y # CONFIG_ENABLE_MUST_CHECK is not set CONFIG_HEADERS_INSTALL=y # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set @@ -684,7 +699,7 @@ CONFIG_KFENCE_SAMPLE_INTERVAL=500 CONFIG_KFENCE_NUM_OBJECTS=63 CONFIG_PANIC_ON_OOPS=y CONFIG_PANIC_TIMEOUT=-1 -CONFIG_DETECT_HUNG_TASK=y +CONFIG_SOFTLOCKUP_DETECTOR=y CONFIG_WQ_WATCHDOG=y CONFIG_SCHEDSTATS=y # CONFIG_DEBUG_PREEMPT is not set @@ -692,4 +707,5 @@ CONFIG_BUG_ON_DATA_CORRUPTION=y CONFIG_TRACE_MMIO_ACCESS=y CONFIG_TRACEFS_DISABLE_AUTOMOUNT=y CONFIG_HIST_TRIGGERS=y +CONFIG_PID_IN_CONTEXTIDR=y # CONFIG_RUNTIME_TESTING_MENU is not set diff --git a/arch/arm64/include/asm/asm-prototypes.h b/arch/arm64/include/asm/asm-prototypes.h index 1c9a3a0c5fa5fedee46d1c983832a4505098326f..ec1d9655f885019b3f553b155dd2ff4090b926e2 100644 --- a/arch/arm64/include/asm/asm-prototypes.h +++ b/arch/arm64/include/asm/asm-prototypes.h @@ -23,4 +23,10 @@ long long __ashlti3(long long a, int b); long long __ashrti3(long long a, int b); long long __lshrti3(long long a, int b); +/* + * This function uses a custom calling convention and cannot be called from C so + * this prototype is not entirely accurate. + */ +void __hwasan_tag_mismatch(unsigned long addr, unsigned long access_info); + #endif /* __ASM_PROTOTYPES_H */ diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 19150fc1c794294476ce5ca46461d1c6186a67dc..c51ce0dd57d0a1d984e8cab1e5f3f1f53e38a110 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -107,6 +107,13 @@ hint #20 .endm +/* + * Clear Branch History instruction + */ + .macro clearbhb + hint #22 + .endm + /* * Speculation barrier */ @@ -817,4 +824,50 @@ alternative_endif #endif /* GNU_PROPERTY_AARCH64_FEATURE_1_DEFAULT */ + .macro __mitigate_spectre_bhb_loop tmp +#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY +alternative_cb spectre_bhb_patch_loop_iter + mov \tmp, #32 // Patched to correct the immediate +alternative_cb_end +.Lspectre_bhb_loop\@: + b . + 4 + subs \tmp, \tmp, #1 + b.ne .Lspectre_bhb_loop\@ + sb +#endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */ + .endm + + .macro mitigate_spectre_bhb_loop tmp +#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY +alternative_cb spectre_bhb_patch_loop_mitigation_enable + b .L_spectre_bhb_loop_done\@ // Patched to NOP +alternative_cb_end + __mitigate_spectre_bhb_loop \tmp +.L_spectre_bhb_loop_done\@: +#endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */ + .endm + + /* Save/restores x0-x3 to the stack */ + .macro __mitigate_spectre_bhb_fw +#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY + stp x0, x1, [sp, #-16]! + stp x2, x3, [sp, #-16]! + mov w0, #ARM_SMCCC_ARCH_WORKAROUND_3 +alternative_cb smccc_patch_fw_mitigation_conduit + nop // Patched to SMC/HVC #0 +alternative_cb_end + ldp x2, x3, [sp], #16 + ldp x0, x1, [sp], #16 +#endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */ + .endm + + .macro mitigate_spectre_bhb_clear_insn +#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY +alternative_cb spectre_bhb_patch_clearbhb + /* Patched to NOP when not supported */ + clearbhb + isb +alternative_cb_end +#endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */ + .endm #endif /* __ASM_ASSEMBLER_H */ diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index bffa8860af1c13af3d824523215031982e737a99..e9eeeb14485b715fd9d76653b92d77034f7b8ce3 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -23,7 +23,7 @@ #define dsb(opt) asm volatile("dsb " #opt : : : "memory") #define psb_csync() asm volatile("hint #17" : : : "memory") -#define tsb_csync() asm volatile("hint #18" : : : "memory") +#define __tsb_csync() asm volatile("hint #18" : : : "memory") #define csdb() asm volatile("hint #20" : : : "memory") #define spec_bar() asm volatile(ALTERNATIVE("dsb nsh\nisb\n", \ @@ -50,6 +50,20 @@ #define dma_rmb() dmb(oshld) #define dma_wmb() dmb(oshst) + +#define tsb_csync() \ + do { \ + /* \ + * CPUs affected by Arm Erratum 2054223 or 2067961 needs \ + * another TSB to ensure the trace is flushed. The barriers \ + * don't have to be strictly back to back, as long as the \ + * CPU is in trace prohibited state. \ + */ \ + if (cpus_have_final_cap(ARM64_WORKAROUND_TSB_FLUSH_FAILURE)) \ + __tsb_csync(); \ + __tsb_csync(); \ + } while (0) + /* * Generate a mask for array_index__nospec() that is ~0UL when 0 <= idx < sz * and 0 otherwise. diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h index f4e01aa0f44244cba4376a8c4f2a949f082fde38..70dd1d6c873608d1655e0b865ce20d3911b064b9 100644 --- a/arch/arm64/include/asm/cpu.h +++ b/arch/arm64/include/asm/cpu.h @@ -50,6 +50,7 @@ struct cpuinfo_arm64 { u64 reg_id_aa64dfr1; u64 reg_id_aa64isar0; u64 reg_id_aa64isar1; + u64 reg_id_aa64isar2; u64 reg_id_aa64mmfr0; u64 reg_id_aa64mmfr1; u64 reg_id_aa64mmfr2; diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index 10a2d914d4caa06b0f15f9e9b6ebe71dde47498c..c9cc0fb6fc0def6d9b58e2515a8262b97f760389 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -69,8 +69,11 @@ #define ARM64_WORKAROUND_1508412 58 #define ARM64_HAS_LDAPR 59 #define ARM64_KVM_PROTECTED_MODE 60 +#define ARM64_WORKAROUND_TSB_FLUSH_FAILURE 61 +#define ARM64_MTE_ASYMM 62 +#define ARM64_SPECTRE_BHB 63 -/* kabi: reserve 62 - 76 for future cpu capabilities */ +/* kabi: reserve 64 - 76 for future cpu capabilities */ #define ARM64_NCAPS 76 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index b54bbd219f09668daf47378a82428bec51ba12cc..a36174096572a87220cb51b0d969a26ed0d41af9 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -630,6 +630,35 @@ static inline bool cpu_supports_mixed_endian_el0(void) return id_aa64mmfr0_mixed_endian_el0(read_cpuid(ID_AA64MMFR0_EL1)); } + +static inline bool supports_csv2p3(int scope) +{ + u64 pfr0; + u8 csv2_val; + + if (scope == SCOPE_LOCAL_CPU) + pfr0 = read_sysreg_s(SYS_ID_AA64PFR0_EL1); + else + pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1); + + csv2_val = cpuid_feature_extract_unsigned_field(pfr0, + ID_AA64PFR0_CSV2_SHIFT); + return csv2_val == 3; +} + +static inline bool supports_clearbhb(int scope) +{ + u64 isar2; + + if (scope == SCOPE_LOCAL_CPU) + isar2 = read_sysreg_s(SYS_ID_AA64ISAR2_EL1); + else + isar2 = read_sanitised_ftr_reg(SYS_ID_AA64ISAR2_EL1); + + return cpuid_feature_extract_unsigned_field(isar2, + ID_AA64ISAR2_CLEARBHB_SHIFT); +} + const struct cpumask *system_32bit_el0_cpumask(void); DECLARE_STATIC_KEY_FALSE(arm64_mismatched_32bit_el0); diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index ef5b040dee44dff5bd9fd0011e56fa792833f4d9..bfbf0c4c7c5e5e6f43c60a8ebd2030ec12b7d233 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -59,6 +59,7 @@ #define ARM_CPU_IMP_NVIDIA 0x4E #define ARM_CPU_IMP_FUJITSU 0x46 #define ARM_CPU_IMP_HISI 0x48 +#define ARM_CPU_IMP_APPLE 0x61 #define ARM_CPU_PART_AEM_V8 0xD0F #define ARM_CPU_PART_FOUNDATION 0xD00 @@ -72,6 +73,14 @@ #define ARM_CPU_PART_CORTEX_A76 0xD0B #define ARM_CPU_PART_NEOVERSE_N1 0xD0C #define ARM_CPU_PART_CORTEX_A77 0xD0D +#define ARM_CPU_PART_NEOVERSE_V1 0xD40 +#define ARM_CPU_PART_CORTEX_A78 0xD41 +#define ARM_CPU_PART_CORTEX_X1 0xD44 +#define ARM_CPU_PART_CORTEX_A510 0xD46 +#define ARM_CPU_PART_CORTEX_A710 0xD47 +#define ARM_CPU_PART_CORTEX_X2 0xD48 +#define ARM_CPU_PART_NEOVERSE_N2 0xD49 +#define ARM_CPU_PART_CORTEX_A78C 0xD4B #define APM_CPU_PART_POTENZA 0x000 @@ -99,6 +108,9 @@ #define HISI_CPU_PART_TSV110 0xD01 +#define APPLE_CPU_PART_M1_ICESTORM 0x022 +#define APPLE_CPU_PART_M1_FIRESTORM 0x023 + #define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53) #define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57) #define MIDR_CORTEX_A72 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A72) @@ -109,6 +121,14 @@ #define MIDR_CORTEX_A76 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A76) #define MIDR_NEOVERSE_N1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N1) #define MIDR_CORTEX_A77 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A77) +#define MIDR_NEOVERSE_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V1) +#define MIDR_CORTEX_A78 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78) +#define MIDR_CORTEX_X1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1) +#define MIDR_CORTEX_A510 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A510) +#define MIDR_CORTEX_A710 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A710) +#define MIDR_CORTEX_X2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X2) +#define MIDR_NEOVERSE_N2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N2) +#define MIDR_CORTEX_A78C MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78C) #define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX) @@ -127,6 +147,8 @@ #define MIDR_NVIDIA_CARMEL MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_CARMEL) #define MIDR_FUJITSU_A64FX MIDR_CPU_MODEL(ARM_CPU_IMP_FUJITSU, FUJITSU_CPU_PART_A64FX) #define MIDR_HISI_TSV110 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_TSV110) +#define MIDR_APPLE_M1_ICESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM) +#define MIDR_APPLE_M1_FIRESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM) /* Fujitsu Erratum 010001 affects A64FX 1.0 and 1.1, (v0r0 and v1r0) */ #define MIDR_FUJITSU_ERRATUM_010001 MIDR_FUJITSU_A64FX diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index bda918948471df9a842dbfc0c36b58bd52ab1e6b..1bc198a5faadeb606d9ede01eb36120b26e1331a 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -149,6 +149,20 @@ msr spsr_el2, x0 .endm +.macro __init_el2_mpam + /* Memory Partioning And Monitoring: disable EL2 traps */ + mrs x1, id_aa64pfr0_el1 + ubfx x0, x1, #ID_AA64PFR0_MPAM_SHIFT, #4 + cbz x0, .Lskip_mpam_\@ // skip if no MPAM + msr_s SYS_MPAM0_EL1, xzr // use the default partition.. + msr_s SYS_MPAM2_EL2, xzr // ..and disable lower traps + msr_s SYS_MPAM1_EL1, xzr + mrs_s x0, SYS_MPAMIDR_EL1 + tbz x0, #17, .Lskip_mpam_\@ // skip if no MPAMHCR reg + msr_s SYS_MPAMHCR_EL2, xzr // clear TRAP_MPAMIDR_EL1 -> EL2 +.Lskip_mpam_\@: +.endm + /** * Initialize EL2 registers to sane values. This should be called early on all * cores that were booted in EL2. Note that everything gets initialised as @@ -165,6 +179,7 @@ __init_el2_stage2 __init_el2_gicv3 __init_el2_hstr + __init_el2_mpam __init_el2_nvhe_idregs __init_el2_nvhe_cptr __init_el2_nvhe_sve diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 29f97eb3dad41529033b2125721567056731fe36..8f59bbeba7a7ebba25d4faf1589e4465e4360def 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -68,6 +68,7 @@ #define ESR_ELx_EC_MAX (0x3F) #define ESR_ELx_EC_SHIFT (26) +#define ESR_ELx_EC_WIDTH (6) #define ESR_ELx_EC_MASK (UL(0x3F) << ESR_ELx_EC_SHIFT) #define ESR_ELx_EC(esr) (((esr) & ESR_ELx_EC_MASK) >> ESR_ELx_EC_SHIFT) diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h index 4335800201c97ad1d22bc61ec2a3b12f498f2377..f4b7a1c1e52240e109569537e67760538349671f 100644 --- a/arch/arm64/include/asm/fixmap.h +++ b/arch/arm64/include/asm/fixmap.h @@ -62,9 +62,11 @@ enum fixed_addresses { #endif /* CONFIG_ACPI_APEI_GHES */ #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 + FIX_ENTRY_TRAMP_TEXT3, + FIX_ENTRY_TRAMP_TEXT2, + FIX_ENTRY_TRAMP_TEXT1, FIX_ENTRY_TRAMP_DATA, - FIX_ENTRY_TRAMP_TEXT, -#define TRAMP_VALIAS (__fix_to_virt(FIX_ENTRY_TRAMP_TEXT)) +#define TRAMP_VALIAS (__fix_to_virt(FIX_ENTRY_TRAMP_TEXT1)) #endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ __end_of_permanent_fixed_addresses, @@ -105,6 +107,8 @@ void __init early_fixmap_init(void); extern void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot); +extern pte_t *__get_fixmap_pte(enum fixed_addresses idx); + #include #endif /* !__ASSEMBLY__ */ diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index 9a5498c2c8eeac5786883af84acc1c73d73d0b99..0bd73aeaf14a72309ab60812098ddd91023eb4c8 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -105,6 +105,10 @@ #define KERNEL_HWCAP_RNG __khwcap2_feature(RNG) #define KERNEL_HWCAP_BTI __khwcap2_feature(BTI) #define KERNEL_HWCAP_MTE __khwcap2_feature(MTE) +#define KERNEL_HWCAP_ECV __khwcap2_feature(ECV) +#define KERNEL_HWCAP_AFP __khwcap2_feature(AFP) +#define KERNEL_HWCAP_RPRES __khwcap2_feature(RPRES) +#define KERNEL_HWCAP_MTE3 __khwcap2_feature(MTE3) /* * This yields a mask that user programs can use to figure out what diff --git a/arch/arm64/include/asm/hypervisor.h b/arch/arm64/include/asm/hypervisor.h index 0ae427f352c8cab9440ae723735c396794cbe208..ee45ae14be27d406ec2772cb25878fad42dbe6ad 100644 --- a/arch/arm64/include/asm/hypervisor.h +++ b/arch/arm64/include/asm/hypervisor.h @@ -6,5 +6,8 @@ void kvm_init_hyp_services(void); bool kvm_arm_hyp_service_available(u32 func_id); +void kvm_arm_init_hyp_services(void); +void kvm_init_memshare_services(void); +void kvm_init_ioremap_services(void); #endif diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h index 4ebb9c054cccd7fa9c70a47d3065dad6de0842ca..c0973345e6e1d582ff3261f3e2d61a6e6da4706a 100644 --- a/arch/arm64/include/asm/insn.h +++ b/arch/arm64/include/asm/insn.h @@ -64,6 +64,7 @@ enum aarch64_insn_hint_cr_op { AARCH64_INSN_HINT_PSB = 0x11 << 5, AARCH64_INSN_HINT_TSB = 0x12 << 5, AARCH64_INSN_HINT_CSDB = 0x14 << 5, + AARCH64_INSN_HINT_CLEARBHB = 0x16 << 5, AARCH64_INSN_HINT_BTI = 0x20 << 5, AARCH64_INSN_HINT_BTIC = 0x22 << 5, diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 327120c0089fe37684719aa23e3ee6ea2e6aa433..7463d2cc435a453ac6d13308103ce1e0fd8ffa7b 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -91,7 +91,7 @@ #define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H) /* TCR_EL2 Registers bits */ -#define TCR_EL2_RES1 ((1 << 31) | (1 << 23)) +#define TCR_EL2_RES1 ((1U << 31) | (1 << 23)) #define TCR_EL2_TBI (1 << 20) #define TCR_EL2_PS_SHIFT 16 #define TCR_EL2_PS_MASK (7 << TCR_EL2_PS_SHIFT) @@ -134,7 +134,7 @@ * 40 bits wide (T0SZ = 24). Systems with a PARange smaller than 40 bits are * not known to exist and will break with this configuration. * - * The VTCR_EL2 is configured per VM and is initialised in kvm_arm_setup_stage2(). + * The VTCR_EL2 is configured per VM and is initialised in kvm_init_stage2_mmu. * * Note that when using 4K pages, we concatenate two first level page tables * together. With 16K pages, we concatenate 16 first level page tables. @@ -276,7 +276,7 @@ #define CPTR_EL2_TFP_SHIFT 10 /* Hyp Coprocessor Trap Register */ -#define CPTR_EL2_TCPAC (1 << 31) +#define CPTR_EL2_TCPAC (1U << 31) #define CPTR_EL2_TAM (1 << 30) #define CPTR_EL2_TTA (1 << 20) #define CPTR_EL2_TFP (1 << CPTR_EL2_TFP_SHIFT) @@ -295,6 +295,7 @@ #define MDCR_EL2_HPMFZO (UL(1) << 29) #define MDCR_EL2_MTPME (UL(1) << 28) #define MDCR_EL2_TDCC (UL(1) << 27) +#define MDCR_EL2_HLP (UL(1) << 26) #define MDCR_EL2_HCCD (UL(1) << 23) #define MDCR_EL2_TTRF (UL(1) << 19) #define MDCR_EL2_HPMD (UL(1) << 17) @@ -341,6 +342,8 @@ #define PAR_TO_HPFAR(par) \ (((par) & GENMASK_ULL(PHYS_MASK_SHIFT - 1, 12)) >> 8) +#define FAR_MASK GENMASK_ULL(11, 0) + #define ECN(x) { ESR_ELx_EC_##x, #x } #define kvm_arm_exception_class \ @@ -358,4 +361,13 @@ #define CPACR_EL1_TTA (1 << 28) #define CPACR_EL1_DEFAULT (CPACR_EL1_FPEN | CPACR_EL1_ZEN_EL1EN) +/* + * ARMv8 Reset Values + */ +#define VCPU_RESET_PSTATE_EL1 (PSR_MODE_EL1h | PSR_A_BIT | PSR_I_BIT | \ + PSR_F_BIT | PSR_D_BIT) + +#define VCPU_RESET_PSTATE_SVC (PSR_AA32_MODE_SVC | PSR_AA32_A_BIT | \ + PSR_AA32_I_BIT | PSR_AA32_F_BIT) + #endif /* __ARM64_KVM_ARM_H__ */ diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 899f7291083fd163494c4240ccd82b6732a00fa4..ef1cf41ea394fae3fc20fc1e4248f12f62994777 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -43,31 +43,48 @@ #define KVM_HOST_SMCCC_FUNC(name) KVM_HOST_SMCCC_ID(__KVM_HOST_SMCCC_FUNC_##name) #define __KVM_HOST_SMCCC_FUNC___kvm_hyp_init 0 -#define __KVM_HOST_SMCCC_FUNC___kvm_vcpu_run 1 -#define __KVM_HOST_SMCCC_FUNC___kvm_flush_vm_context 2 -#define __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa 3 -#define __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid 4 -#define __KVM_HOST_SMCCC_FUNC___kvm_flush_cpu_context 5 -#define __KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff 6 -#define __KVM_HOST_SMCCC_FUNC___kvm_enable_ssbs 7 -#define __KVM_HOST_SMCCC_FUNC___vgic_v3_get_gic_config 8 -#define __KVM_HOST_SMCCC_FUNC___vgic_v3_read_vmcr 9 -#define __KVM_HOST_SMCCC_FUNC___vgic_v3_write_vmcr 10 -#define __KVM_HOST_SMCCC_FUNC___vgic_v3_init_lrs 11 -#define __KVM_HOST_SMCCC_FUNC___kvm_get_mdcr_el2 12 -#define __KVM_HOST_SMCCC_FUNC___vgic_v3_save_aprs 13 -#define __KVM_HOST_SMCCC_FUNC___vgic_v3_restore_aprs 14 -#define __KVM_HOST_SMCCC_FUNC___pkvm_init 15 -#define __KVM_HOST_SMCCC_FUNC___pkvm_host_share_hyp 16 -#define __KVM_HOST_SMCCC_FUNC___pkvm_create_private_mapping 17 -#define __KVM_HOST_SMCCC_FUNC___pkvm_cpu_set_vector 18 -#define __KVM_HOST_SMCCC_FUNC___pkvm_prot_finalize 19 -#define __KVM_HOST_SMCCC_FUNC___kvm_adjust_pc 20 #ifndef __ASSEMBLY__ #include +enum __kvm_host_smccc_func { + /* Hypercalls available only prior to pKVM finalisation */ + /* __KVM_HOST_SMCCC_FUNC___kvm_hyp_init */ + __KVM_HOST_SMCCC_FUNC___kvm_get_mdcr_el2 = __KVM_HOST_SMCCC_FUNC___kvm_hyp_init + 1, + __KVM_HOST_SMCCC_FUNC___pkvm_init, + __KVM_HOST_SMCCC_FUNC___pkvm_create_private_mapping, + __KVM_HOST_SMCCC_FUNC___pkvm_cpu_set_vector, + __KVM_HOST_SMCCC_FUNC___kvm_enable_ssbs, + __KVM_HOST_SMCCC_FUNC___vgic_v3_init_lrs, + __KVM_HOST_SMCCC_FUNC___vgic_v3_get_gic_config, + __KVM_HOST_SMCCC_FUNC___pkvm_prot_finalize, + + /* Hypercalls available after pKVM finalisation */ + __KVM_HOST_SMCCC_FUNC___pkvm_host_share_hyp, + __KVM_HOST_SMCCC_FUNC___pkvm_host_unshare_hyp, + __KVM_HOST_SMCCC_FUNC___pkvm_host_reclaim_page, + __KVM_HOST_SMCCC_FUNC___pkvm_host_donate_guest, + __KVM_HOST_SMCCC_FUNC___kvm_adjust_pc, + __KVM_HOST_SMCCC_FUNC___kvm_vcpu_run, + __KVM_HOST_SMCCC_FUNC___kvm_flush_vm_context, + __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa, + __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid, + __KVM_HOST_SMCCC_FUNC___kvm_flush_cpu_context, + __KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff, + __KVM_HOST_SMCCC_FUNC___vgic_v3_save_vmcr_aprs, + __KVM_HOST_SMCCC_FUNC___vgic_v3_restore_vmcr_aprs, + __KVM_HOST_SMCCC_FUNC___pkvm_init_shadow, + __KVM_HOST_SMCCC_FUNC___pkvm_teardown_shadow, + __KVM_HOST_SMCCC_FUNC___pkvm_vcpu_load, + __KVM_HOST_SMCCC_FUNC___pkvm_vcpu_put, + __KVM_HOST_SMCCC_FUNC___pkvm_vcpu_sync_state, + __KVM_HOST_SMCCC_FUNC___pkvm_iommu_driver_init, + __KVM_HOST_SMCCC_FUNC___pkvm_iommu_register, + __KVM_HOST_SMCCC_FUNC___pkvm_iommu_pm_notify, + __KVM_HOST_SMCCC_FUNC___pkvm_iommu_finalize, +}; + #define DECLARE_KVM_VHE_SYM(sym) extern char sym[] #define DECLARE_KVM_NVHE_SYM(sym) extern char kvm_nvhe_sym(sym)[] @@ -96,7 +113,7 @@ #define per_cpu_ptr_nvhe_sym(sym, cpu) \ ({ \ unsigned long base, off; \ - base = kvm_arm_hyp_percpu_base[cpu]; \ + base = kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu]; \ off = (unsigned long)&CHOOSE_NVHE_SYM(sym) - \ (unsigned long)&CHOOSE_NVHE_SYM(__per_cpu_start); \ base ? (typeof(CHOOSE_NVHE_SYM(sym))*)(base + off) : NULL; \ @@ -184,7 +201,7 @@ DECLARE_KVM_HYP_SYM(__kvm_hyp_vector); #define __kvm_hyp_init CHOOSE_NVHE_SYM(__kvm_hyp_init) #define __kvm_hyp_vector CHOOSE_HYP_SYM(__kvm_hyp_vector) -extern unsigned long kvm_arm_hyp_percpu_base[NR_CPUS]; +extern unsigned long kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[]; DECLARE_KVM_NVHE_SYM(__per_cpu_start); DECLARE_KVM_NVHE_SYM(__per_cpu_end); @@ -204,8 +221,6 @@ extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); extern void __kvm_adjust_pc(struct kvm_vcpu *vcpu); extern u64 __vgic_v3_get_gic_config(void); -extern u64 __vgic_v3_read_vmcr(void); -extern void __vgic_v3_write_vmcr(u32 vmcr); extern void __vgic_v3_init_lrs(void); extern u64 __kvm_get_mdcr_el2(void); diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index fd418955e31e68c2d2d7ec2d477d724f3dd34bcc..f583373ff8abed8778c98b777642c70313797592 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -41,6 +41,27 @@ void kvm_inject_vabt(struct kvm_vcpu *vcpu); void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr); void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr); +unsigned long get_except64_offset(unsigned long psr, unsigned long target_mode, + enum exception_type type); +unsigned long get_except64_cpsr(unsigned long old, bool has_mte, + unsigned long sctlr, unsigned long mode); + +static inline int kvm_vcpu_enable_ptrauth(struct kvm_vcpu *vcpu) +{ + /* + * For now make sure that both address/generic pointer authentication + * features are requested by the userspace together and the system + * supports these capabilities. + */ + if (!test_bit(KVM_ARM_VCPU_PTRAUTH_ADDRESS, vcpu->arch.features) || + !test_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, vcpu->arch.features) || + !system_has_full_ptr_auth()) + return -EINVAL; + + vcpu->arch.flags |= KVM_ARM64_GUEST_HAS_PTRAUTH; + return 0; +} + static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu) { return !(vcpu->arch.hcr_el2 & HCR_RW); @@ -386,7 +407,7 @@ static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu) *vcpu_cpsr(vcpu) |= PSR_AA32_E_BIT; } else { u64 sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1); - sctlr |= (1 << 25); + sctlr |= SCTLR_ELx_EE; vcpu_write_sys_reg(vcpu, sctlr, SCTLR_EL1); } } @@ -396,7 +417,10 @@ static inline bool kvm_vcpu_is_be(struct kvm_vcpu *vcpu) if (vcpu_mode_is_32bit(vcpu)) return !!(*vcpu_cpsr(vcpu) & PSR_AA32_E_BIT); - return !!(vcpu_read_sys_reg(vcpu, SCTLR_EL1) & (1 << 25)); + if (vcpu_mode_priv(vcpu)) + return !!(vcpu_read_sys_reg(vcpu, SCTLR_EL1) & SCTLR_ELx_EE); + else + return !!(vcpu_read_sys_reg(vcpu, SCTLR_EL1) & SCTLR_EL1_E0E); } static inline unsigned long vcpu_data_guest_to_host(struct kvm_vcpu *vcpu, @@ -471,4 +495,34 @@ static inline bool vcpu_has_feature(struct kvm_vcpu *vcpu, int feature) return test_bit(feature, vcpu->arch.features); } +/* Narrow the PSCI register arguments (r1 to r3) to 32 bits. */ +static inline void kvm_psci_narrow_to_32bit(struct kvm_vcpu *vcpu) +{ + int i; + + /* + * Zero the input registers' upper 32 bits. They will be fully + * zeroed on exit, so we're fine changing them in place. + */ + for (i = 1; i < 4; i++) + vcpu_set_reg(vcpu, i, lower_32_bits(vcpu_get_reg(vcpu, i))); +} + +static inline bool kvm_psci_valid_affinity(struct kvm_vcpu *vcpu, + unsigned long affinity) +{ + return !(affinity & ~MPIDR_HWID_BITMASK); +} + + +#define AFFINITY_MASK(level) ~((0x1UL << ((level) * MPIDR_LEVEL_BITS)) - 1) + +static inline unsigned long psci_affinity_mask(unsigned long affinity_level) +{ + if (affinity_level <= 3) + return MPIDR_HWID_BITMASK & AFFINITY_MASK(affinity_level); + + return 0; +} + #endif /* __ARM64_KVM_EMULATE_H__ */ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index d276bc897ad92f9a056e3110f785fdaadd225d27..f73ed0c867be0febb3c8fb19512d814e60264dca 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -26,7 +26,6 @@ #include #include #include -#include #define __KVM_HAVE_ARCH_INTC_INITIALIZED @@ -72,6 +71,63 @@ u32 __attribute_const__ kvm_target_cpu(void); int kvm_reset_vcpu(struct kvm_vcpu *vcpu); void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu); +struct kvm_hyp_memcache { + phys_addr_t head; + unsigned long nr_pages; +}; + +static inline void push_hyp_memcache(struct kvm_hyp_memcache *mc, + phys_addr_t *p, + phys_addr_t (*to_pa)(void *virt)) +{ + *p = mc->head; + mc->head = to_pa(p); + mc->nr_pages++; +} + +static inline void *pop_hyp_memcache(struct kvm_hyp_memcache *mc, + void *(*to_va)(phys_addr_t phys)) +{ + phys_addr_t *p = to_va(mc->head); + + if (!mc->nr_pages) + return NULL; + + mc->head = *p; + mc->nr_pages--; + + return p; +} + +static inline int __topup_hyp_memcache(struct kvm_hyp_memcache *mc, + unsigned long min_pages, + void *(*alloc_fn)(void *arg), + phys_addr_t (*to_pa)(void *virt), + void *arg) +{ + while (mc->nr_pages < min_pages) { + phys_addr_t *p = alloc_fn(arg); + + if (!p) + return -ENOMEM; + push_hyp_memcache(mc, p, to_pa); + } + + return 0; +} + +static inline void __free_hyp_memcache(struct kvm_hyp_memcache *mc, + void (*free_fn)(void *virt, void *arg), + void *(*to_va)(phys_addr_t phys), + void *arg) +{ + while (mc->nr_pages) + free_fn(pop_hyp_memcache(mc, to_va), arg); +} + +void free_hyp_memcache(struct kvm_hyp_memcache *mc); +int topup_hyp_memcache(struct kvm_vcpu *vcpu); + struct kvm_vmid { /* The VMID generation used for the virt. memory system */ u64 vmid_gen; @@ -103,6 +159,20 @@ struct kvm_s2_mmu { struct kvm_arch_memory_slot { }; +struct kvm_pinned_page { + struct list_head link; + struct page *page; +}; + +struct kvm_protected_vm { + bool enabled; + int shadow_handle; + struct mutex shadow_lock; + struct kvm_hyp_memcache teardown_mc; + struct list_head pinned_pages; + gpa_t pvmfw_load_addr; +}; + struct kvm_arch { struct kvm_s2_mmu mmu; @@ -124,7 +194,12 @@ struct kvm_arch { * should) opt in to this feature if KVM_CAP_ARM_NISV_TO_USER is * supported. */ - bool return_nisv_io_abort_to_user; +#define KVM_ARCH_FLAG_RETURN_NISV_IO_ABORT_TO_USER 0 + /* Memory Tagging Extension enabled for the guest */ +#define KVM_ARCH_FLAG_MTE_ENABLED 1 + /* Guest has bought into the MMIO guard extension */ +#define KVM_ARCH_FLAG_MMIO_GUARD 2 + unsigned long flags; /* * VM-wide PMU filter, implemented as a bitmap and big enough for @@ -136,8 +211,33 @@ struct kvm_arch { u8 pfr0_csv2; u8 pfr0_csv3; - /* Memory Tagging Extension enabled for the guest */ - bool mte_enabled; + struct kvm_protected_vm pkvm; +}; + +struct kvm_protected_vcpu { + /* A unique id to the shadow structs in the hyp shadow area. */ + int shadow_handle; + + /* A pointer to the host's vcpu. */ + struct kvm_vcpu *host_vcpu; + + /* A pointer to the shadow vm. */ + struct kvm_shadow_vm *shadow_vm; + + /* Tracks exit code for the protected guest. */ + int exit_code; + + /* + * Track the power state transition of a protected vcpu. + * Can be in one of three states: + * PSCI_0_2_AFFINITY_LEVEL_ON + * PSCI_0_2_AFFINITY_LEVEL_OFF + * PSCI_0_2_AFFINITY_LEVEL_PENDING + */ + int power_state; + + /* True if this vcpu is currently loaded on a cpu. */ + bool loaded_on_cpu; }; struct kvm_vcpu_fault_info { @@ -254,6 +354,7 @@ struct kvm_host_data { struct kvm_host_psci_config { /* PSCI version used by host. */ u32 version; + u32 smccc_version; /* Function IDs used by host if version is v0.1. */ struct psci_0_1_function_ids function_ids_0_1; @@ -273,6 +374,29 @@ extern s64 kvm_nvhe_sym(hyp_physvirt_offset); extern u64 kvm_nvhe_sym(hyp_cpu_logical_map)[NR_CPUS]; #define hyp_cpu_logical_map CHOOSE_NVHE_SYM(hyp_cpu_logical_map) +enum pkvm_iommu_driver_id { + PKVM_IOMMU_DRIVER_S2MPU, + PKVM_IOMMU_DRIVER_SYSMMU_SYNC, + PKVM_IOMMU_NR_DRIVERS, +}; + +enum pkvm_iommu_pm_event { + PKVM_IOMMU_PM_SUSPEND, + PKVM_IOMMU_PM_RESUME, +}; + +int pkvm_iommu_driver_init(enum pkvm_iommu_driver_id drv_id, void *data, size_t size); +int pkvm_iommu_register(struct device *dev, enum pkvm_iommu_driver_id drv_id, + phys_addr_t pa, size_t size, struct device *parent); +int pkvm_iommu_suspend(struct device *dev); +int pkvm_iommu_resume(struct device *dev); + +int pkvm_iommu_s2mpu_register(struct device *dev, phys_addr_t pa); +int pkvm_iommu_sysmmu_sync_register(struct device *dev, phys_addr_t pa, + struct device *parent); +/* Reject future calls to pkvm_iommu_driver_init() and pkvm_iommu_register(). */ +int pkvm_iommu_finalize(void); + struct vcpu_reset_state { unsigned long pc; unsigned long r0; @@ -299,9 +423,6 @@ struct kvm_vcpu_arch { /* Exception Information */ struct kvm_vcpu_fault_info fault; - /* State of various workarounds, see kvm_asm.h for bit assignment */ - u64 workaround_flags; - /* Miscellaneous vcpu state flags */ u64 flags; @@ -322,8 +443,8 @@ struct kvm_vcpu_arch { struct kvm_guest_debug_arch vcpu_debug_state; struct kvm_guest_debug_arch external_debug_state; - struct thread_info *host_thread_info; /* hyp VA */ struct user_fpsimd_state *host_fpsimd_state; /* hyp VA */ + struct task_struct *parent_task; struct { /* {Break,watch}point registers */ @@ -361,16 +482,17 @@ struct kvm_vcpu_arch { /* Don't run the guest (internal implementation need) */ bool pause; - /* Cache some mmu pages needed inside spinlock regions */ - struct kvm_mmu_memory_cache mmu_page_cache; + union { + /* Cache some mmu pages needed inside spinlock regions */ + struct kvm_mmu_memory_cache mmu_page_cache; + /* Pages to be donated to pkvm/EL2e if it runs out */ + struct kvm_hyp_memcache pkvm_memcache; + }; /* Target CPU and feature flags */ int target; DECLARE_BITMAP(features, KVM_VCPU_MAX_FEATURES); - /* Detect first run of a vcpu */ - bool has_run_once; - /* Virtual SError ESR to restore when HCR_EL2.VSE is set */ u64 vsesr_el2; @@ -386,6 +508,8 @@ struct kvm_vcpu_arch { u64 last_steal; gpa_t base; } steal; + + struct kvm_protected_vcpu pkvm; }; /* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */ @@ -412,15 +536,17 @@ struct kvm_vcpu_arch { #define KVM_ARM64_DEBUG_DIRTY (1 << 0) #define KVM_ARM64_FP_ENABLED (1 << 1) /* guest FP regs loaded */ #define KVM_ARM64_FP_HOST (1 << 2) /* host FP regs loaded */ -#define KVM_ARM64_HOST_SVE_IN_USE (1 << 3) /* backup for host TIF_SVE */ #define KVM_ARM64_HOST_SVE_ENABLED (1 << 4) /* SVE enabled for EL0 */ #define KVM_ARM64_GUEST_HAS_SVE (1 << 5) /* SVE exposed to guest */ #define KVM_ARM64_VCPU_SVE_FINALIZED (1 << 6) /* SVE config completed */ #define KVM_ARM64_GUEST_HAS_PTRAUTH (1 << 7) /* PTRAUTH exposed to guest */ #define KVM_ARM64_PENDING_EXCEPTION (1 << 8) /* Exception pending */ +/* + * Overlaps with KVM_ARM64_EXCEPT_MASK on purpose so that it can't be + * set together with an exception... + */ +#define KVM_ARM64_INCREMENT_PC (1 << 9) /* Increment PC */ #define KVM_ARM64_EXCEPT_MASK (7 << 9) /* Target EL/MODE */ -#define KVM_ARM64_DEBUG_STATE_SAVE_SPE (1 << 12) /* Save SPE context if active */ -#define KVM_ARM64_DEBUG_STATE_SAVE_TRBE (1 << 13) /* Save TRBE context if active */ /* * When KVM_ARM64_PENDING_EXCEPTION is set, KVM_ARM64_EXCEPT_MASK can @@ -439,11 +565,15 @@ struct kvm_vcpu_arch { #define KVM_ARM64_EXCEPT_AA64_EL1 (0 << 11) #define KVM_ARM64_EXCEPT_AA64_EL2 (1 << 11) -/* - * Overlaps with KVM_ARM64_EXCEPT_MASK on purpose so that it can't be - * set together with an exception... - */ -#define KVM_ARM64_INCREMENT_PC (1 << 9) /* Increment PC */ +#define KVM_ARM64_DEBUG_STATE_SAVE_SPE (1 << 12) /* Save SPE context if active */ +#define KVM_ARM64_DEBUG_STATE_SAVE_TRBE (1 << 13) /* Save TRBE context if active */ +#define KVM_ARM64_FP_FOREIGN_FPSTATE (1 << 14) +#define KVM_ARM64_PKVM_STATE_DIRTY (1 << 15) + +#define KVM_GUESTDBG_VALID_MASK (KVM_GUESTDBG_ENABLE | \ + KVM_GUESTDBG_USE_SW_BP | \ + KVM_GUESTDBG_USE_HW | \ + KVM_GUESTDBG_SINGLESTEP) #define vcpu_has_sve(vcpu) (system_supports_sve() && \ ((vcpu)->arch.flags & KVM_ARM64_GUEST_HAS_SVE)) @@ -472,9 +602,6 @@ struct kvm_vcpu_arch { #define __vcpu_sys_reg(v,r) (ctxt_sys_reg(&(v)->arch.ctxt, (r))) -u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg); -void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg); - static inline bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val) { /* @@ -566,6 +693,31 @@ static inline bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg) return true; } +static inline u64 vcpu_arch_read_sys_reg(const struct kvm_vcpu_arch *vcpu_arch, int reg) +{ + u64 val = 0x8badf00d8badf00d; + + /* sysregs_loaded_on_cpu is only used in VHE */ + if (!is_nvhe_hyp_code() && vcpu_arch->sysregs_loaded_on_cpu && + __vcpu_read_sys_reg_from_cpu(reg, &val)) + return val; + + return ctxt_sys_reg(&vcpu_arch->ctxt, reg); +} + +static inline void vcpu_arch_write_sys_reg(struct kvm_vcpu_arch *vcpu_arch, u64 val, int reg) +{ + /* sysregs_loaded_on_cpu is only used in VHE */ + if (!is_nvhe_hyp_code() && vcpu_arch->sysregs_loaded_on_cpu && + __vcpu_write_sys_reg_to_cpu(val, reg)) + return; + + ctxt_sys_reg(&vcpu_arch->ctxt, reg) = val; +} + +#define vcpu_read_sys_reg(vcpu, reg) vcpu_arch_read_sys_reg(&((vcpu)->arch), reg) +#define vcpu_write_sys_reg(vcpu, val, reg) vcpu_arch_write_sys_reg(&((vcpu)->arch), val, reg) + struct kvm_vm_stat { ulong remote_tlb_flush; }; @@ -586,7 +738,7 @@ struct kvm_vcpu_stat { u64 exits; }; -int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init); +void kvm_vcpu_preferred_target(struct kvm_vcpu_init *init); unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); @@ -613,6 +765,8 @@ int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); void kvm_arm_halt_guest(struct kvm *kvm); void kvm_arm_resume_guest(struct kvm *kvm); +#define vcpu_has_run_once(vcpu) !!rcu_access_pointer((vcpu)->pid) + #ifndef __KVM_NVHE_HYPERVISOR__ #define kvm_call_hyp_nvhe(f, ...) \ ({ \ @@ -719,6 +873,11 @@ static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt) ctxt_sys_reg(cpu_ctxt, MPIDR_EL1) = read_cpuid_mpidr(); } +static inline bool kvm_system_needs_idmapped_vectors(void) +{ + return cpus_have_const_cap(ARM64_SPECTRE_V3A); +} + void kvm_arm_vcpu_ptrauth_trap(struct kvm_vcpu *vcpu); static inline void kvm_arch_hardware_unsetup(void) {} @@ -744,8 +903,10 @@ long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm, /* Guest/host FPSIMD coordination helpers */ int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu); +void kvm_arch_vcpu_ctxflush_fp(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu); +void kvm_vcpu_unshare_task_fp(struct kvm_vcpu *vcpu); static inline bool kvm_pmu_counter_deferred(struct perf_event_attr *attr) { @@ -756,12 +917,7 @@ static inline bool kvm_pmu_counter_deferred(struct perf_event_attr *attr) void kvm_arch_vcpu_load_debug_state_flags(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_put_debug_state_flags(struct kvm_vcpu *vcpu); -#ifdef CONFIG_KVM /* Avoid conflicts with core headers if CONFIG_KVM=n */ -static inline int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu) -{ - return kvm_arch_vcpu_run_map_fp(vcpu); -} - +#ifdef CONFIG_KVM void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr); void kvm_clr_pmu_events(u32 clr); @@ -781,12 +937,9 @@ int kvm_set_ipa_limit(void); struct kvm *kvm_arch_alloc_vm(void); void kvm_arch_free_vm(struct kvm *kvm); -int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type); +#define kvm_vm_is_protected(kvm) ((kvm)->arch.pkvm.enabled) -static inline bool kvm_vm_is_protected(struct kvm *kvm) -{ - return false; -} +void kvm_init_protected_traps(struct kvm_vcpu *vcpu); int kvm_arm_vcpu_finalize(struct kvm_vcpu *vcpu, int feature); bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu); @@ -794,7 +947,9 @@ bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu); #define kvm_arm_vcpu_sve_finalized(vcpu) \ ((vcpu)->arch.flags & KVM_ARM64_VCPU_SVE_FINALIZED) -#define kvm_has_mte(kvm) (system_supports_mte() && (kvm)->arch.mte_enabled) +#define kvm_has_mte(kvm) \ + (system_supports_mte() && \ + test_bit(KVM_ARCH_FLAG_MTE_ENABLED, &(kvm)->arch.flags)) #define kvm_vcpu_has_pmu(vcpu) \ (test_bit(KVM_ARM_VCPU_PMU_V3, (vcpu)->arch.features)) diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index 657d0c94cf828964cf7e9a8c9b0940f2f629f76a..dc9c66f3c94c0230a204a1cb63845262fd31b770 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -61,8 +61,8 @@ void __vgic_v3_save_state(struct vgic_v3_cpu_if *cpu_if); void __vgic_v3_restore_state(struct vgic_v3_cpu_if *cpu_if); void __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if); void __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if); -void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if); -void __vgic_v3_restore_aprs(struct vgic_v3_cpu_if *cpu_if); +void __vgic_v3_save_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if); +void __vgic_v3_restore_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if); int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu); #ifdef __KVM_NVHE_HYPERVISOR__ @@ -90,7 +90,6 @@ void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu); void __fpsimd_save_state(struct user_fpsimd_state *fp_regs); void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs); -void __sve_save_state(void *sve_pffr, u32 *fpsr); void __sve_restore_state(void *sve_pffr, u32 *fpsr); #ifndef __KVM_NVHE_HYPERVISOR__ @@ -115,7 +114,14 @@ int __pkvm_init(phys_addr_t phys, unsigned long size, unsigned long nr_cpus, void __noreturn __host_enter(struct kvm_cpu_context *host_ctxt); #endif +extern u64 kvm_nvhe_sym(id_aa64pfr0_el1_sys_val); +extern u64 kvm_nvhe_sym(id_aa64pfr1_el1_sys_val); +extern u64 kvm_nvhe_sym(id_aa64isar0_el1_sys_val); +extern u64 kvm_nvhe_sym(id_aa64isar1_el1_sys_val); extern u64 kvm_nvhe_sym(id_aa64mmfr0_el1_sys_val); extern u64 kvm_nvhe_sym(id_aa64mmfr1_el1_sys_val); +extern u64 kvm_nvhe_sym(id_aa64mmfr2_el1_sys_val); +extern unsigned long kvm_nvhe_sym(__icache_flags); +extern bool kvm_nvhe_sym(smccc_trng_available); #endif /* __ARM64_KVM_HYP_H__ */ diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 02d37888774383f241ecaa641992408c1325a05c..b3b03f1549ebf1f534df79fd95c31a2f1f9d48ac 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -118,6 +118,10 @@ alternative_cb_end void kvm_update_va_mask(struct alt_instr *alt, __le32 *origptr, __le32 *updptr, int nr_inst); +void kvm_get__text(struct alt_instr *alt, + __le32 *origptr, __le32 *updptr, int nr_inst); +void kvm_get__etext(struct alt_instr *alt, + __le32 *origptr, __le32 *updptr, int nr_inst); void kvm_compute_layout(void); void kvm_apply_hyp_relocations(void); @@ -150,6 +154,8 @@ static __always_inline unsigned long __kern_hyp_va(unsigned long v) #include #include +int kvm_share_hyp(void *from, void *to); +void kvm_unshare_hyp(void *from, void *to); int create_hyp_mappings(void *from, void *to, enum kvm_pgtable_prot prot); int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size, void __iomem **kaddr, @@ -159,7 +165,7 @@ int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size, void free_hyp_pgds(void); void stage2_unmap_vm(struct kvm *kvm); -int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu); +int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long type); void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu); int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, phys_addr_t pa, unsigned long size, bool writable); @@ -180,8 +186,13 @@ static inline void *__kvm_vector_slot2addr(void *base, struct kvm; -#define kvm_flush_dcache_to_poc(a,l) \ - dcache_clean_inval_poc((unsigned long)(a), (unsigned long)(a)+(l)) +#define kvm_flush_dcache_to_poc(a, l) do { \ + unsigned long __a = (unsigned long)(a); \ + unsigned long __l = (unsigned long)(l); \ + \ + if (__l) \ + dcache_clean_inval_poc(__a, __a + __l); \ +} while (0) static inline bool vcpu_has_cache_enabled(struct kvm_vcpu *vcpu) { diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index 0277838295840c6ee2c4d4b79040034f2abc37fb..190df60f3aaa417b1599b62939007a9bbce8c778 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -251,6 +251,27 @@ void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt); int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys, enum kvm_pgtable_prot prot); +/** + * kvm_pgtable_hyp_unmap() - Remove a mapping from a hypervisor stage-1 page-table. + * @pgt: Page-table structure initialised by kvm_pgtable_hyp_init(). + * @addr: Virtual address from which to remove the mapping. + * @size: Size of the mapping. + * + * The offset of @addr within a page is ignored, @size is rounded-up to + * the next page boundary and @phys is rounded-down to the previous page + * boundary. + * + * TLB invalidation is performed for each page-table entry cleared during the + * unmapping operation and the reference count for the page-table page + * containing the cleared entry is decremented, with unreferenced pages being + * freed. The unmapping operation will stop early if it encounters either an + * invalid page-table entry or a valid block mapping which maps beyond the range + * being unmapped. + * + * Return: Number of bytes unmapped, which may be 0. + */ +u64 kvm_pgtable_hyp_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size); + /** * kvm_get_vtcr() - Helper to construct VTCR_EL2 * @mmfr0: Sanitized value of SYS_ID_AA64MMFR0_EL1 register. @@ -267,11 +288,18 @@ int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys, */ u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift); +/* + * kvm_pgtable_stage2_pgd_size() - Helper to compute size of a stage-2 PGD + * @vtcr: Content of the VTCR register. + * + * Return: the size (in bytes) of the stage-2 PGD + */ +size_t kvm_pgtable_stage2_pgd_size(u64 vtcr); + /** * __kvm_pgtable_stage2_init() - Initialise a guest stage-2 page-table. * @pgt: Uninitialised page-table structure to initialise. - * @arch: Arch-specific KVM structure representing the guest virtual - * machine. + * @mmu: S2 MMU context for this S2 translation * @mm_ops: Memory management callbacks. * @flags: Stage-2 configuration flags. * @force_pte_cb: Function that returns true if page level mappings must @@ -279,13 +307,13 @@ u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift); * * Return: 0 on success, negative error code on failure. */ -int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_arch *arch, +int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu, struct kvm_pgtable_mm_ops *mm_ops, enum kvm_pgtable_stage2_flags flags, kvm_pgtable_force_pte_cb_t force_pte_cb); -#define kvm_pgtable_stage2_init(pgt, arch, mm_ops) \ - __kvm_pgtable_stage2_init(pgt, arch, mm_ops, 0, NULL) +#define kvm_pgtable_stage2_init(pgt, mmu, mm_ops) \ + __kvm_pgtable_stage2_init(pgt, mmu, mm_ops, 0, NULL) /** * kvm_pgtable_stage2_destroy() - Destroy an unused guest stage-2 page-table. @@ -329,14 +357,16 @@ int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, void *mc); /** - * kvm_pgtable_stage2_set_owner() - Unmap and annotate pages in the IPA space to - * track ownership. + * kvm_pgtable_stage2_annotate() - Unmap and annotate pages in the IPA space + * to track ownership (and more). * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*(). * @addr: Base intermediate physical address to annotate. * @size: Size of the annotated range. * @mc: Cache of pre-allocated and zeroed memory from which to allocate * page-table pages. - * @owner_id: Unique identifier for the owner of the page. + * @annotation: A 63 bit value that will be stored in the page tables. + * @annotation[0] must be 0, and @annotation[63:1] is stored + * in the page tables. * * By default, all page-tables are owned by identifier 0. This function can be * used to mark portions of the IPA space as owned by other entities. When a @@ -345,8 +375,8 @@ int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, * * Return: 0 on success, negative error code on failure. */ -int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size, - void *mc, u8 owner_id); +int kvm_pgtable_stage2_annotate(struct kvm_pgtable *pgt, u64 addr, u64 size, + void *mc, kvm_pte_t annotation); /** * kvm_pgtable_stage2_unmap() - Remove a mapping from a guest stage-2 page-table. diff --git a/arch/arm64/include/asm/kvm_pkvm.h b/arch/arm64/include/asm/kvm_pkvm.h new file mode 100644 index 0000000000000000000000000000000000000000..89895ed9c5911a510dfa08de8db3feac6da3f500 --- /dev/null +++ b/arch/arm64/include/asm/kvm_pkvm.h @@ -0,0 +1,347 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 - Google LLC + * Author: Quentin Perret + * Author: Fuad Tabba + */ +#ifndef __ARM64_KVM_PKVM_H__ +#define __ARM64_KVM_PKVM_H__ + +#include +#include +#include +#include +#include + +/* Maximum number of protected VMs that can be created. */ +#define KVM_MAX_PVMS 255 + +#define HYP_MEMBLOCK_REGIONS 128 +#define PVMFW_INVALID_LOAD_ADDR (-1) + +int kvm_arm_vm_ioctl_pkvm(struct kvm *kvm, struct kvm_enable_cap *cap); +int kvm_init_pvm(struct kvm *kvm, unsigned long type); +int create_el2_shadow(struct kvm *kvm); +void kvm_shadow_destroy(struct kvm *kvm); + +/* + * Definitions for features to be allowed or restricted for guest virtual + * machines, depending on the mode KVM is running in and on the type of guest + * that is running. + * + * The ALLOW masks represent a bitmask of feature fields that are allowed + * without any restrictions as long as they are supported by the system. + * + * The RESTRICT_UNSIGNED masks, if present, represent unsigned fields for + * features that are restricted to support at most the specified feature. + * + * If a feature field is not present in either, than it is not supported. + * + * The approach taken for protected VMs is to allow features that are: + * - Needed by common Linux distributions (e.g., floating point) + * - Trivial to support, e.g., supporting the feature does not introduce or + * require tracking of additional state in KVM + * - Cannot be trapped or prevent the guest from using anyway + */ + +/* + * Allow for protected VMs: + * - Floating-point and Advanced SIMD + * - Data Independent Timing + */ +#define PVM_ID_AA64PFR0_ALLOW (\ + ARM64_FEATURE_MASK(ID_AA64PFR0_FP) | \ + ARM64_FEATURE_MASK(ID_AA64PFR0_ASIMD) | \ + ARM64_FEATURE_MASK(ID_AA64PFR0_DIT) \ + ) + +/* + * Restrict to the following *unsigned* features for protected VMs: + * - AArch64 guests only (no support for AArch32 guests): + * AArch32 adds complexity in trap handling, emulation, condition codes, + * etc... + * - RAS (v1) + * Supported by KVM + */ +#define PVM_ID_AA64PFR0_RESTRICT_UNSIGNED (\ + FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL0), ID_AA64PFR0_ELx_64BIT_ONLY) | \ + FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1), ID_AA64PFR0_ELx_64BIT_ONLY) | \ + FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL2), ID_AA64PFR0_ELx_64BIT_ONLY) | \ + FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL3), ID_AA64PFR0_ELx_64BIT_ONLY) | \ + FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_RAS), ID_AA64PFR0_RAS_V1) \ + ) + +/* + * Allow for protected VMs: + * - Branch Target Identification + * - Speculative Store Bypassing + */ +#define PVM_ID_AA64PFR1_ALLOW (\ + ARM64_FEATURE_MASK(ID_AA64PFR1_BT) | \ + ARM64_FEATURE_MASK(ID_AA64PFR1_SSBS) \ + ) + +/* + * Allow for protected VMs: + * - Mixed-endian + * - Distinction between Secure and Non-secure Memory + * - Mixed-endian at EL0 only + * - Non-context synchronizing exception entry and exit + */ +#define PVM_ID_AA64MMFR0_ALLOW (\ + ARM64_FEATURE_MASK(ID_AA64MMFR0_BIGENDEL) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR0_SNSMEM) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR0_BIGENDEL0) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR0_EXS) \ + ) + +/* + * Restrict to the following *unsigned* features for protected VMs: + * - 40-bit IPA + * - 16-bit ASID + */ +#define PVM_ID_AA64MMFR0_RESTRICT_UNSIGNED (\ + FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64MMFR0_PARANGE), ID_AA64MMFR0_PARANGE_40) | \ + FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64MMFR0_ASID), ID_AA64MMFR0_ASID_16) \ + ) + +/* + * Allow for protected VMs: + * - Hardware translation table updates to Access flag and Dirty state + * - Number of VMID bits from CPU + * - Hierarchical Permission Disables + * - Privileged Access Never + * - SError interrupt exceptions from speculative reads + * - Enhanced Translation Synchronization + */ +#define PVM_ID_AA64MMFR1_ALLOW (\ + ARM64_FEATURE_MASK(ID_AA64MMFR1_HADBS) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR1_VMIDBITS) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR1_HPD) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR1_PAN) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR1_SPECSEI) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR1_ETS) \ + ) + +/* + * Allow for protected VMs: + * - Common not Private translations + * - User Access Override + * - IESB bit in the SCTLR_ELx registers + * - Unaligned single-copy atomicity and atomic functions + * - ESR_ELx.EC value on an exception by read access to feature ID space + * - TTL field in address operations. + * - Break-before-make sequences when changing translation block size + * - E0PDx mechanism + */ +#define PVM_ID_AA64MMFR2_ALLOW (\ + ARM64_FEATURE_MASK(ID_AA64MMFR2_CNP) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR2_UAO) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR2_IESB) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR2_AT) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR2_IDS) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR2_TTL) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR2_BBM) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR2_E0PD) \ + ) + +/* + * No support for Scalable Vectors for protected VMs: + * Requires additional support from KVM, e.g., context-switching and + * trapping at EL2 + */ +#define PVM_ID_AA64ZFR0_ALLOW (0ULL) + +/* + * No support for debug, including breakpoints, and watchpoints for protected + * VMs: + * The Arm architecture mandates support for at least the Armv8 debug + * architecture, which would include at least 2 hardware breakpoints and + * watchpoints. Providing that support to protected guests adds + * considerable state and complexity. Therefore, the reserved value of 0 is + * used for debug-related fields. + */ +#define PVM_ID_AA64DFR0_ALLOW (0ULL) +#define PVM_ID_AA64DFR1_ALLOW (0ULL) + +/* + * No support for implementation defined features. + */ +#define PVM_ID_AA64AFR0_ALLOW (0ULL) +#define PVM_ID_AA64AFR1_ALLOW (0ULL) + +/* + * No restrictions on instructions implemented in AArch64. + */ +#define PVM_ID_AA64ISAR0_ALLOW (\ + ARM64_FEATURE_MASK(ID_AA64ISAR0_AES) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_SHA1) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_SHA2) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_CRC32) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_ATOMICS) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_RDM) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_SHA3) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_SM3) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_SM4) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_DP) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_FHM) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_TS) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_TLB) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_RNDR) \ + ) + +#define PVM_ID_AA64ISAR1_ALLOW (\ + ARM64_FEATURE_MASK(ID_AA64ISAR1_DPB) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_APA) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_API) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_JSCVT) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_FCMA) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_LRCPC) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_GPA) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_GPI) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_FRINTTS) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_SB) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_SPECRES) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_BF16) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_DGH) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_I8MM) \ + ) + +/* + * Returns the maximum number of breakpoints supported for protected VMs. + */ +static inline int pkvm_get_max_brps(void) +{ + int num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_BRPS), + PVM_ID_AA64DFR0_ALLOW); + + /* + * If breakpoints are supported, the maximum number is 1 + the field. + * Otherwise, return 0, which is not compliant with the architecture, + * but is reserved and is used here to indicate no debug support. + */ + return num ? num + 1 : 0; +} + +/* + * Returns the maximum number of watchpoints supported for protected VMs. + */ +static inline int pkvm_get_max_wrps(void) +{ + int num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_WRPS), + PVM_ID_AA64DFR0_ALLOW); + + return num ? num + 1 : 0; +} + +extern struct memblock_region kvm_nvhe_sym(hyp_memory)[]; +extern unsigned int kvm_nvhe_sym(hyp_memblock_nr); + +extern phys_addr_t kvm_nvhe_sym(pvmfw_base); +extern phys_addr_t kvm_nvhe_sym(pvmfw_size); + +static inline unsigned long +hyp_vmemmap_memblock_size(struct memblock_region *reg, size_t vmemmap_entry_size) +{ + unsigned long nr_pages = reg->size >> PAGE_SHIFT; + unsigned long start, end; + + start = (reg->base >> PAGE_SHIFT) * vmemmap_entry_size; + end = start + nr_pages * vmemmap_entry_size; + start = ALIGN_DOWN(start, PAGE_SIZE); + end = ALIGN(end, PAGE_SIZE); + + return end - start; +} + +static inline unsigned long hyp_vmemmap_pages(size_t vmemmap_entry_size) +{ + unsigned long res = 0, i; + + for (i = 0; i < kvm_nvhe_sym(hyp_memblock_nr); i++) { + res += hyp_vmemmap_memblock_size(&kvm_nvhe_sym(hyp_memory)[i], + vmemmap_entry_size); + } + + return res >> PAGE_SHIFT; +} + +static inline unsigned long hyp_shadow_table_pages(size_t shadow_entry_size) +{ + return PAGE_ALIGN(KVM_MAX_PVMS * shadow_entry_size) >> PAGE_SHIFT; +} + +static inline unsigned long __hyp_pgtable_max_pages(unsigned long nr_pages) +{ + unsigned long total = 0, i; + + /* Provision the worst case scenario */ + for (i = 0; i < KVM_PGTABLE_MAX_LEVELS; i++) { + nr_pages = DIV_ROUND_UP(nr_pages, PTRS_PER_PTE); + total += nr_pages; + } + + return total; +} + +static inline unsigned long __hyp_pgtable_total_pages(void) +{ + unsigned long res = 0, i; + + /* Cover all of memory with page-granularity */ + for (i = 0; i < kvm_nvhe_sym(hyp_memblock_nr); i++) { + struct memblock_region *reg = &kvm_nvhe_sym(hyp_memory)[i]; + res += __hyp_pgtable_max_pages(reg->size >> PAGE_SHIFT); + } + + return res; +} + +static inline unsigned long hyp_s1_pgtable_pages(void) +{ + unsigned long res; + + res = __hyp_pgtable_total_pages(); + + /* Allow 1 GiB for private mappings */ + res += __hyp_pgtable_max_pages(SZ_1G >> PAGE_SHIFT); + + return res; +} + +static inline unsigned long host_s2_pgtable_pages(void) +{ + unsigned long res; + + /* + * Include an extra 16 pages to safely upper-bound the worst case of + * concatenated pgds. + */ + res = __hyp_pgtable_total_pages() + 16; + + /* Allow 1 GiB for MMIO mappings */ + res += __hyp_pgtable_max_pages(SZ_1G >> PAGE_SHIFT); + + return res; +} + +#define KVM_FFA_MBOX_NR_PAGES 1 + +static inline unsigned long hyp_ffa_proxy_pages(void) +{ + size_t desc_max; + + /* + * The hypervisor FFA proxy needs enough memory to buffer a fragmented + * descriptor returned from EL3 in response to a RETRIEVE_REQ call. + */ + desc_max = sizeof(struct ffa_mem_region) + + sizeof(struct ffa_mem_region_attributes) + + sizeof(struct ffa_composite_mem_region) + + SG_MAX_SEGMENTS * sizeof(struct ffa_mem_region_addr_range); + + /* Plus a page each for the hypervisor's RX and TX mailboxes. */ + return (2 * KVM_FFA_MBOX_NR_PAGES) + DIV_ROUND_UP(desc_max, PAGE_SIZE); +} + +#endif /* __ARM64_KVM_PKVM_H__ */ diff --git a/arch/arm64/include/asm/kvm_s2mpu.h b/arch/arm64/include/asm/kvm_s2mpu.h new file mode 100644 index 0000000000000000000000000000000000000000..b1075abd604caef6a6a12b26623a48fd23a2dbce --- /dev/null +++ b/arch/arm64/include/asm/kvm_s2mpu.h @@ -0,0 +1,357 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2021 - Google LLC + * Author: David Brazdil + */ + +#ifndef __ARM64_KVM_S2MPU_H__ +#define __ARM64_KVM_S2MPU_H__ + +#include + +#include + +#define S2MPU_MMIO_SIZE SZ_64K +#define SYSMMU_SYNC_MMIO_SIZE SZ_64K +#define SYSMMU_SYNC_S2_OFFSET SZ_32K +#define SYSMMU_SYNC_S2_MMIO_SIZE (SYSMMU_SYNC_MMIO_SIZE - \ + SYSMMU_SYNC_S2_OFFSET) + +#define NR_VIDS 8 +#define NR_CTX_IDS 8 + +#define ALL_VIDS_BITMAP GENMASK(NR_VIDS - 1, 0) + +#define REG_NS_CTRL0 0x0 +#define REG_NS_CTRL1 0x4 +#define REG_NS_CFG 0x10 +#define REG_NS_INTERRUPT_ENABLE_PER_VID_SET 0x20 +#define REG_NS_INTERRUPT_CLEAR 0x2c +#define REG_NS_VERSION 0x60 +#define REG_NS_INFO 0x64 +#define REG_NS_STATUS 0x68 +#define REG_NS_NUM_CONTEXT 0x100 +#define REG_NS_CONTEXT_CFG_VALID_VID 0x104 +#define REG_NS_ALL_INVALIDATION 0x1000 +#define REG_NS_RANGE_INVALIDATION 0x1020 +#define REG_NS_RANGE_INVALIDATION_START_PPN 0x1024 +#define REG_NS_RANGE_INVALIDATION_END_PPN 0x1028 +#define REG_NS_FAULT_STATUS 0x2000 +#define REG_NS_FAULT_PA_LOW(vid) (0x2004 + ((vid) * 0x20)) +#define REG_NS_FAULT_PA_HIGH(vid) (0x2008 + ((vid) * 0x20)) +#define REG_NS_FAULT_INFO(vid) (0x2010 + ((vid) * 0x20)) +#define REG_NS_READ_MPTC 0x3000 +#define REG_NS_READ_MPTC_TAG_PPN 0x3004 +#define REG_NS_READ_MPTC_TAG_OTHERS 0x3008 +#define REG_NS_READ_MPTC_DATA 0x3010 +#define REG_NS_L1ENTRY_L2TABLE_ADDR(vid, gb) (0x4000 + ((vid) * 0x200) + ((gb) * 0x8)) +#define REG_NS_L1ENTRY_ATTR(vid, gb) (0x4004 + ((vid) * 0x200) + ((gb) * 0x8)) + +#define CTRL0_ENABLE BIT(0) +#define CTRL0_INTERRUPT_ENABLE BIT(1) +#define CTRL0_FAULT_RESP_TYPE_SLVERR BIT(2) /* for v8 */ +#define CTRL0_FAULT_RESP_TYPE_DECERR BIT(2) /* for v9 */ +#define CTRL0_MASK (CTRL0_ENABLE | \ + CTRL0_INTERRUPT_ENABLE | \ + CTRL0_FAULT_RESP_TYPE_SLVERR | \ + CTRL0_FAULT_RESP_TYPE_DECERR) + +#define CTRL1_DISABLE_CHK_S1L1PTW BIT(0) +#define CTRL1_DISABLE_CHK_S1L2PTW BIT(1) +#define CTRL1_ENABLE_PAGE_SIZE_AWARENESS BIT(2) +#define CTRL1_DISABLE_CHK_USER_MATCHED_REQ BIT(3) +#define CTRL1_MASK (CTRL1_DISABLE_CHK_S1L1PTW | \ + CTRL1_DISABLE_CHK_S1L2PTW | \ + CTRL1_ENABLE_PAGE_SIZE_AWARENESS | \ + CTRL1_DISABLE_CHK_USER_MATCHED_REQ) + +#define CFG_MPTW_CACHE_OVERRIDE BIT(0) +#define CFG_MPTW_CACHE_VALUE GENMASK(7, 4) +#define CFG_MPTW_QOS_OVERRIDE BIT(8) +#define CFG_MPTW_QOS_VALUE GENMASK(15, 12) +#define CFG_MPTW_SHAREABLE BIT(16) +#define CFG_MASK (CFG_MPTW_CACHE_OVERRIDE | \ + CFG_MPTW_CACHE_VALUE | \ + CFG_MPTW_QOS_OVERRIDE | \ + CFG_MPTW_QOS_VALUE | \ + CFG_MPTW_SHAREABLE) + +/* For use with hi_lo_readq_relaxed(). */ +#define REG_NS_FAULT_PA_HIGH_LOW(vid) REG_NS_FAULT_PA_LOW(vid) + +/* Mask used for extracting VID from FAULT_* register offset. */ +#define REG_NS_FAULT_VID_MASK GENMASK(7, 5) + +#define VERSION_MAJOR_ARCH_VER_MASK GENMASK(31, 28) +#define VERSION_MINOR_ARCH_VER_MASK GENMASK(27, 24) +#define VERSION_REV_ARCH_VER_MASK GENMASK(23, 16) +#define VERSION_RTL_VER_MASK GENMASK(7, 0) + +/* Ignore RTL version in driver version check. */ +#define VERSION_CHECK_MASK (VERSION_MAJOR_ARCH_VER_MASK | \ + VERSION_MINOR_ARCH_VER_MASK | \ + VERSION_REV_ARCH_VER_MASK) + +#define INFO_NUM_SET_MASK GENMASK(15, 0) + +#define STATUS_BUSY BIT(0) +#define STATUS_ON_INVALIDATING BIT(1) + +#define NUM_CONTEXT_MASK GENMASK(3, 0) + +#define CONTEXT_CFG_VALID_VID_CTX_VALID(ctx) BIT((4 * (ctx)) + 3) +#define CONTEXT_CFG_VALID_VID_CTX_VID(ctx, vid) \ + FIELD_PREP(GENMASK((4 * (ctx) + 2), 4 * (ctx)), (vid)) + +#define INVALIDATION_INVALIDATE BIT(0) +#define RANGE_INVALIDATION_PPN_SHIFT 12 + +#define NR_FAULT_INFO_REGS 8 +#define FAULT_INFO_VID_MASK GENMASK(26, 24) +#define FAULT_INFO_TYPE_MASK GENMASK(23, 21) +#define FAULT_INFO_TYPE_CONTEXT 0x4 /* v9 only */ +#define FAULT_INFO_TYPE_AP 0x2 +#define FAULT_INFO_TYPE_MPTW 0x1 +#define FAULT_INFO_RW_BIT BIT(20) +#define FAULT_INFO_LEN_MASK GENMASK(19, 16) +#define FAULT_INFO_ID_MASK GENMASK(15, 0) + +#define L1ENTRY_L2TABLE_ADDR_SHIFT 4 +#define L1ENTRY_L2TABLE_ADDR(pa) ((pa) >> L1ENTRY_L2TABLE_ADDR_SHIFT) + +#define READ_MPTC_WAY_MASK GENMASK(18, 16) +#define READ_MPTC_SET_MASK GENMASK(15, 0) +#define READ_MPTC_MASK (READ_MPTC_WAY_MASK | READ_MPTC_SET_MASK) +#define READ_MPTC_WAY(way) FIELD_PREP(READ_MPTC_WAY_MASK, (way)) +#define READ_MPTC_SET(set) FIELD_PREP(READ_MPTC_SET_MASK, (set)) +#define READ_MPTC(set, way) (READ_MPTC_SET(set) | READ_MPTC_WAY(way)) +#define READ_MPTC_TAG_PPN_MASK GENMASK(23, 0) +#define READ_MPTC_TAG_OTHERS_VID_MASK GENMASK(10, 8) +#define READ_MPTC_TAG_OTHERS_GRAN_MASK GENMASK(5, 4) +#define READ_MPTC_TAG_OTHERS_VALID_BIT BIT(0) +#define READ_MPTC_TAG_OTHERS_MASK (READ_MPTC_TAG_OTHERS_VID_MASK | \ + READ_MPTC_TAG_OTHERS_GRAN_MASK | \ + READ_MPTC_TAG_OTHERS_VALID_BIT) + +#define L1ENTRY_ATTR_L2TABLE_EN BIT(0) +#define L1ENTRY_ATTR_GRAN_4K 0x0 +#define L1ENTRY_ATTR_GRAN_64K 0x1 +#define L1ENTRY_ATTR_GRAN_2M 0x2 +#define L1ENTRY_ATTR_PROT_MASK GENMASK(2, 1) +#define L1ENTRY_ATTR_GRAN_MASK GENMASK(5, 4) +#define L1ENTRY_ATTR_PROT(prot) FIELD_PREP(L1ENTRY_ATTR_PROT_MASK, prot) +#define L1ENTRY_ATTR_GRAN(gran) FIELD_PREP(L1ENTRY_ATTR_GRAN_MASK, gran) +#define L1ENTRY_ATTR_1G(prot) L1ENTRY_ATTR_PROT(prot) +#define L1ENTRY_ATTR_L2(gran) (L1ENTRY_ATTR_GRAN(gran) | \ + L1ENTRY_ATTR_L2TABLE_EN) + +#define NR_GIGABYTES 64 +#define RO_GIGABYTES_FIRST 4 +#define RO_GIGABYTES_LAST 33 +#define NR_RO_GIGABYTES (RO_GIGABYTES_LAST - RO_GIGABYTES_FIRST + 1) +#define NR_RW_GIGABYTES (NR_GIGABYTES - NR_RO_GIGABYTES) + +#ifdef CONFIG_ARM64_64K_PAGES +#define SMPT_GRAN SZ_64K +#define SMPT_GRAN_ATTR L1ENTRY_ATTR_GRAN_64K +#else +#define SMPT_GRAN SZ_4K +#define SMPT_GRAN_ATTR L1ENTRY_ATTR_GRAN_4K +#endif +static_assert(SMPT_GRAN <= PAGE_SIZE); + +#define MPT_PROT_BITS 2 +#define SMPT_WORD_SIZE sizeof(u32) +#define SMPT_ELEMS_PER_BYTE (BITS_PER_BYTE / MPT_PROT_BITS) +#define SMPT_ELEMS_PER_WORD (SMPT_WORD_SIZE * SMPT_ELEMS_PER_BYTE) +#define SMPT_WORD_BYTE_RANGE (SMPT_GRAN * SMPT_ELEMS_PER_WORD) +#define SMPT_NUM_ELEMS (SZ_1G / SMPT_GRAN) +#define SMPT_SIZE (SMPT_NUM_ELEMS / SMPT_ELEMS_PER_BYTE) +#define SMPT_NUM_WORDS (SMPT_SIZE / SMPT_WORD_SIZE) +#define SMPT_NUM_PAGES (SMPT_SIZE / PAGE_SIZE) +#define SMPT_ORDER get_order(SMPT_SIZE) + +/* SysMMU_SYNC registers, relative to SYSMMU_SYNC_S2_OFFSET. */ +#define REG_NS_SYNC_CMD 0x0 +#define REG_NS_SYNC_COMP 0x4 + +#define SYNC_CMD_SYNC BIT(0) +#define SYNC_COMP_COMPLETE BIT(0) + +/* + * Iterate over S2MPU gigabyte regions. Skip those that cannot be modified + * (the MMIO registers are read only, with reset value MPT_PROT_NONE). + */ +#define for_each_gb_in_range(i, first, last) \ + for ((i) = (first); (i) <= (last) && (i) < NR_GIGABYTES; \ + (i) = (((i) + 1 == RO_GIGABYTES_FIRST) ? RO_GIGABYTES_LAST : (i)) + 1) + +#define for_each_gb(i) for_each_gb_in_range(i, 0, NR_GIGABYTES - 1) +#define for_each_vid(i) for ((i) = 0; (i) < NR_VIDS; (i)++) +#define for_each_gb_and_vid(gb, vid) for_each_vid((vid)) for_each_gb((gb)) + +enum s2mpu_version { + S2MPU_VERSION_8 = 0x11000000, + S2MPU_VERSION_9 = 0x20000000, +}; + +enum mpt_prot { + MPT_PROT_NONE = 0, + MPT_PROT_R = BIT(0), + MPT_PROT_W = BIT(1), + MPT_PROT_RW = MPT_PROT_R | MPT_PROT_W, + MPT_PROT_MASK = MPT_PROT_RW, +}; + +static const u64 mpt_prot_doubleword[] = { + [MPT_PROT_NONE] = 0x0000000000000000, + [MPT_PROT_R] = 0x5555555555555555, + [MPT_PROT_W] = 0xaaaaaaaaaaaaaaaa, + [MPT_PROT_RW] = 0xffffffffffffffff, +}; + +enum mpt_update_flags { + MPT_UPDATE_L1 = BIT(0), + MPT_UPDATE_L2 = BIT(1), +}; + +struct fmpt { + u32 *smpt; + bool gran_1g; + enum mpt_prot prot; + enum mpt_update_flags flags; +}; + +struct mpt { + struct fmpt fmpt[NR_GIGABYTES]; +}; + +/* Set protection bits of SMPT in a given range without using memset. */ +static inline void __set_smpt_range_slow(u32 *smpt, size_t start_gb_byte, + size_t end_gb_byte, enum mpt_prot prot) +{ + size_t i, start_word_byte, end_word_byte, word_idx, first_elem, last_elem; + u32 val; + + /* Iterate over u32 words. */ + start_word_byte = start_gb_byte; + while (start_word_byte < end_gb_byte) { + /* Determine the range of bytes covered by this word. */ + word_idx = start_word_byte / SMPT_WORD_BYTE_RANGE; + end_word_byte = min( + ALIGN(start_word_byte + 1, SMPT_WORD_BYTE_RANGE), + end_gb_byte); + + /* Identify protection bit offsets within the word. */ + first_elem = (start_word_byte / SMPT_GRAN) % SMPT_ELEMS_PER_WORD; + last_elem = ((end_word_byte - 1) / SMPT_GRAN) % SMPT_ELEMS_PER_WORD; + + /* Modify the corresponding word. */ + val = READ_ONCE(smpt[word_idx]); + for (i = first_elem; i <= last_elem; i++) { + val &= ~(MPT_PROT_MASK << (i * MPT_PROT_BITS)); + val |= prot << (i * MPT_PROT_BITS); + } + WRITE_ONCE(smpt[word_idx], val); + + start_word_byte = end_word_byte; + } +} + +/* Set protection bits of SMPT in a given range. */ +static inline void __set_smpt_range(u32 *smpt, size_t start_gb_byte, + size_t end_gb_byte, enum mpt_prot prot) +{ + size_t interlude_start, interlude_end, interlude_bytes, word_idx; + char prot_byte = (char)mpt_prot_doubleword[prot]; + + if (start_gb_byte >= end_gb_byte) + return; + + /* Check if range spans at least one full u32 word. */ + interlude_start = ALIGN(start_gb_byte, SMPT_WORD_BYTE_RANGE); + interlude_end = ALIGN_DOWN(end_gb_byte, SMPT_WORD_BYTE_RANGE); + + /* If not, fall back to editing bits in the given range. */ + if (interlude_start >= interlude_end) { + __set_smpt_range_slow(smpt, start_gb_byte, end_gb_byte, prot); + return; + } + + /* Use bit-editing for prologue/epilogue, memset for interlude. */ + word_idx = interlude_start / SMPT_WORD_BYTE_RANGE; + interlude_bytes = (interlude_end - interlude_start) / SMPT_GRAN / SMPT_ELEMS_PER_BYTE; + + __set_smpt_range_slow(smpt, start_gb_byte, interlude_start, prot); + memset(&smpt[word_idx], prot_byte, interlude_bytes); + __set_smpt_range_slow(smpt, interlude_end, end_gb_byte, prot); +} + +/* Returns true if all SMPT protection bits match 'prot'. */ +static inline bool __is_smpt_uniform(u32 *smpt, enum mpt_prot prot) +{ + size_t i; + u64 *doublewords = (u64 *)smpt; + + for (i = 0; i < SMPT_NUM_WORDS / 2; i++) { + if (doublewords[i] != mpt_prot_doubleword[prot]) + return false; + } + return true; +} + +/* + * Set protection bits of FMPT/SMPT in a given range. + * Returns flags specifying whether L1/L2 changes need to be made visible + * to the device. + */ +static inline void __set_fmpt_range(struct fmpt *fmpt, size_t start_gb_byte, + size_t end_gb_byte, enum mpt_prot prot) +{ + if (start_gb_byte == 0 && end_gb_byte >= SZ_1G) { + /* Update covers the entire GB region. */ + if (fmpt->gran_1g && fmpt->prot == prot) { + fmpt->flags = 0; + return; + } + + fmpt->gran_1g = true; + fmpt->prot = prot; + fmpt->flags = MPT_UPDATE_L1; + return; + } + + if (fmpt->gran_1g) { + /* GB region currently uses 1G mapping. */ + if (fmpt->prot == prot) { + fmpt->flags = 0; + return; + } + + /* + * Range has different mapping than the rest of the GB. + * Convert to PAGE_SIZE mapping. + */ + fmpt->gran_1g = false; + __set_smpt_range(fmpt->smpt, 0, start_gb_byte, fmpt->prot); + __set_smpt_range(fmpt->smpt, start_gb_byte, end_gb_byte, prot); + __set_smpt_range(fmpt->smpt, end_gb_byte, SZ_1G, fmpt->prot); + fmpt->flags = MPT_UPDATE_L1 | MPT_UPDATE_L2; + return; + } + + /* GB region currently uses PAGE_SIZE mapping. */ + __set_smpt_range(fmpt->smpt, start_gb_byte, end_gb_byte, prot); + + /* Check if the entire GB region has the same prot bits. */ + if (!__is_smpt_uniform(fmpt->smpt, prot)) { + fmpt->flags = MPT_UPDATE_L2; + return; + } + + fmpt->gran_1g = true; + fmpt->prot = prot; + fmpt->flags = MPT_UPDATE_L1; +} + +#endif /* __ARM64_KVM_S2MPU_H__ */ diff --git a/arch/arm64/include/asm/mem_encrypt.h b/arch/arm64/include/asm/mem_encrypt.h new file mode 100644 index 0000000000000000000000000000000000000000..300c8b8cbebe10a8d3d33bcbddf031db9d386162 --- /dev/null +++ b/arch/arm64/include/asm/mem_encrypt.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef __ASM_MEM_ENCRYPT_H +#define __ASM_MEM_ENCRYPT_H + +bool mem_encrypt_active(void); +int set_memory_encrypted(unsigned long addr, int numpages); +int set_memory_decrypted(unsigned long addr, int numpages); + +#endif /* __ASM_MEM_ENCRYPT_H */ diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index f50e752d31f9e98e8b7bf7759fabe19885900c4b..0fc40a286033361d92aff2ac27cf7652782af67d 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -234,9 +234,8 @@ static inline const void *__tag_set(const void *addr, u8 tag) #ifdef CONFIG_KASAN_HW_TAGS #define arch_enable_tagging_sync() mte_enable_kernel_sync() #define arch_enable_tagging_async() mte_enable_kernel_async() -#define arch_set_tagging_report_once(state) mte_set_report_once(state) +#define arch_enable_tagging_asymm() mte_enable_kernel_asymm() #define arch_force_async_tag_fault() mte_check_tfsr_exit() -#define arch_init_tags(max_tag) mte_init_tags(max_tag) #define arch_get_random_tag() mte_get_random_tag() #define arch_get_mem_tag(addr) mte_get_mem_tag(addr) #define arch_set_mem_tag_range(addr, size, tag, init) \ diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index e9c30859f80cdc90edd3d3a8102d5754e424fc53..48f8466a4be92ac376957dc4118d851f1186d701 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -15,6 +15,7 @@ #ifndef __ASSEMBLY__ #include +#include typedef struct { atomic64_t id; diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h index 65fc16f2c2850a3989030b3a24e6d92fe0900be5..4e7fa2623896b008afb775c2cd4d41f7a72491c5 100644 --- a/arch/arm64/include/asm/module.h +++ b/arch/arm64/include/asm/module.h @@ -20,12 +20,6 @@ struct mod_arch_specific { /* for CONFIG_DYNAMIC_FTRACE */ struct plt_entry *ftrace_trampolines; - - /* for FIPS 140 certified kernel module */ - const Elf64_Rela *text_relocations; - const Elf64_Rela *rodata_relocations; - int num_text_relocations; - int num_rodata_relocations; }; #endif diff --git a/arch/arm64/include/asm/module.lds.h b/arch/arm64/include/asm/module.lds.h index 0b736bff0bdf132f6e5178c8ff942fedb2f9cbf2..bbbd234845dae30e714803123c6a6a5bfef6b968 100644 --- a/arch/arm64/include/asm/module.lds.h +++ b/arch/arm64/include/asm/module.lds.h @@ -1,5 +1,5 @@ -#ifdef CONFIG_ARM64_MODULE_PLTS SECTIONS { +#ifdef CONFIG_ARM64_MODULE_PLTS .plt 0 (NOLOAD) : { BYTE(0) } .init.plt 0 (NOLOAD) : { BYTE(0) } .text.ftrace_trampoline 0 (NOLOAD) : { BYTE(0) } @@ -32,5 +32,18 @@ SECTIONS { *(.initcalls._end) } #endif -} #endif + +#ifdef CONFIG_KASAN_SW_TAGS + /* + * Outlined checks go into comdat-deduplicated sections named .text.hot. + * Because they are in comdats they are not combined by the linker and + * we otherwise end up with multiple sections with the same .text.hot + * name in the .ko file. The kernel module loader warns if it sees + * multiple sections with the same name so we use this sections + * directive to force them into a single section and silence the + * warning. + */ + .text.hot : { *(.text.hot) } +#endif +} diff --git a/arch/arm64/include/asm/mte-kasan.h b/arch/arm64/include/asm/mte-kasan.h index d952352bd0088d5ce8f602a3b39724d308f4b4ad..e4704a403237e2f0d6b253e09964caf47b4101cd 100644 --- a/arch/arm64/include/asm/mte-kasan.h +++ b/arch/arm64/include/asm/mte-kasan.h @@ -84,10 +84,12 @@ static inline void __dc_gzva(u64 p) static inline void mte_set_mem_tag_range(void *addr, size_t size, u8 tag, bool init) { - u64 curr, mask, dczid_bs, end1, end2, end3; + u64 curr, mask, dczid, dczid_bs, dczid_dzp, end1, end2, end3; /* Read DC G(Z)VA block size from the system register. */ - dczid_bs = 4ul << (read_cpuid(DCZID_EL0) & 0xf); + dczid = read_cpuid(DCZID_EL0); + dczid_bs = 4ul << (dczid & 0xf); + dczid_dzp = (dczid >> 4) & 1; curr = (u64)__tag_set(addr, tag); mask = dczid_bs - 1; @@ -106,7 +108,7 @@ static inline void mte_set_mem_tag_range(void *addr, size_t size, u8 tag, */ #define SET_MEMTAG_RANGE(stg_post, dc_gva) \ do { \ - if (size >= 2 * dczid_bs) { \ + if (!dczid_dzp && size >= 2 * dczid_bs) {\ do { \ curr = stg_post(curr); \ } while (curr < end1); \ @@ -130,10 +132,7 @@ static inline void mte_set_mem_tag_range(void *addr, size_t size, u8 tag, void mte_enable_kernel_sync(void); void mte_enable_kernel_async(void); -void mte_init_tags(u64 max_tag); - -void mte_set_report_once(bool state); -bool mte_report_once(void); +void mte_enable_kernel_asymm(void); #else /* CONFIG_ARM64_MTE */ @@ -165,17 +164,8 @@ static inline void mte_enable_kernel_async(void) { } -static inline void mte_init_tags(u64 max_tag) -{ -} - -static inline void mte_set_report_once(bool state) -{ -} - -static inline bool mte_report_once(void) +static inline void mte_enable_kernel_asymm(void) { - return false; } #endif /* CONFIG_ARM64_MTE */ diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h index 58c7f80f55961f598b0a52ec03a37a4e88ec578f..075539f5f1c88f42298ae4c6b824cb34a9728a7e 100644 --- a/arch/arm64/include/asm/mte.h +++ b/arch/arm64/include/asm/mte.h @@ -16,8 +16,6 @@ #include -extern u64 gcr_kernel_excl; - void mte_clear_page_tags(void *addr); unsigned long mte_copy_tags_from_user(void *to, const void __user *from, unsigned long n); @@ -43,7 +41,6 @@ void mte_copy_page_tags(void *kto, const void *kfrom); void mte_thread_init_user(void); void mte_thread_switch(struct task_struct *next); void mte_suspend_enter(void); -void mte_suspend_exit(void); long set_mte_ctrl(struct task_struct *task, unsigned long arg); long get_mte_ctrl(struct task_struct *task); int mte_ptrace_copy_tags(struct task_struct *child, long request, @@ -72,9 +69,6 @@ static inline void mte_thread_switch(struct task_struct *next) static inline void mte_suspend_enter(void) { } -static inline void mte_suspend_exit(void) -{ -} static inline long set_mte_ctrl(struct task_struct *task, unsigned long arg) { return 0; @@ -94,22 +88,28 @@ static inline int mte_ptrace_copy_tags(struct task_struct *child, #ifdef CONFIG_KASAN_HW_TAGS /* Whether the MTE asynchronous mode is enabled. */ -DECLARE_STATIC_KEY_FALSE(mte_async_mode); +DECLARE_STATIC_KEY_FALSE(mte_async_or_asymm_mode); -static inline bool system_uses_mte_async_mode(void) +static inline bool system_uses_mte_async_or_asymm_mode(void) { - return static_branch_unlikely(&mte_async_mode); + return static_branch_unlikely(&mte_async_or_asymm_mode); } void mte_check_tfsr_el1(void); static inline void mte_check_tfsr_entry(void) { + if (!system_supports_mte()) + return; + mte_check_tfsr_el1(); } static inline void mte_check_tfsr_exit(void) { + if (!system_supports_mte()) + return; + /* * The asynchronous faults are sync'ed automatically with * TFSR_EL1 on kernel entry but for exit an explicit dsb() @@ -121,7 +121,7 @@ static inline void mte_check_tfsr_exit(void) mte_check_tfsr_el1(); } #else -static inline bool system_uses_mte_async_mode(void) +static inline bool system_uses_mte_async_or_asymm_mode(void) { return false; } diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h index 3c6a7f5988b127e54b14d422489feda06d03f847..27cc643d05095d657e9855afe7e4d564f74a8b3b 100644 --- a/arch/arm64/include/asm/pgalloc.h +++ b/arch/arm64/include/asm/pgalloc.h @@ -27,7 +27,7 @@ static inline void __pud_populate(pud_t *pudp, phys_addr_t pmdp, pudval_t prot) static inline void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmdp) { - __pud_populate(pudp, __pa(pmdp), PMD_TYPE_TABLE); + __pud_populate(pudp, __pa(pmdp), PUD_TYPE_TABLE); } #else static inline void __pud_populate(pud_t *pudp, phys_addr_t pmdp, pudval_t prot) @@ -45,7 +45,7 @@ static inline void __p4d_populate(p4d_t *p4dp, phys_addr_t pudp, p4dval_t prot) static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4dp, pud_t *pudp) { - __p4d_populate(p4dp, __pa(pudp), PUD_TYPE_TABLE); + __p4d_populate(p4dp, __pa(pudp), P4D_TYPE_TABLE); } #else static inline void __p4d_populate(p4d_t *p4dp, phys_addr_t pudp, p4dval_t prot) diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index 42442a0ae2ab9b3d37afd3f2cc3ae920c37f76db..e64e77a345b27b6678218f919bdffd3f76fc4677 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -94,6 +94,15 @@ /* * Hardware page table definitions. * + * Level 0 descriptor (P4D). + */ +#define P4D_TYPE_TABLE (_AT(p4dval_t, 3) << 0) +#define P4D_TABLE_BIT (_AT(p4dval_t, 1) << 1) +#define P4D_TYPE_MASK (_AT(p4dval_t, 3) << 0) +#define P4D_TYPE_SECT (_AT(p4dval_t, 1) << 0) +#define P4D_SECT_RDONLY (_AT(p4dval_t, 1) << 7) /* AP[2] */ + +/* * Level 1 descriptor (PUD). */ #define PUD_TYPE_TABLE (_AT(pudval_t, 3) << 0) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 5c1d05891ed8c1ca7f1a69c5cd452b60c4ac571a..e4796c3ad19acf04efecdd07ff1b15758b780cba 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -69,9 +69,15 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; * page table entry, taking care of 52-bit addresses. */ #ifdef CONFIG_ARM64_PA_BITS_52 -#define __pte_to_phys(pte) \ - ((pte_val(pte) & PTE_ADDR_LOW) | ((pte_val(pte) & PTE_ADDR_HIGH) << 36)) -#define __phys_to_pte_val(phys) (((phys) | ((phys) >> 36)) & PTE_ADDR_MASK) +static inline phys_addr_t __pte_to_phys(pte_t pte) +{ + return (pte_val(pte) & PTE_ADDR_LOW) | + ((pte_val(pte) & PTE_ADDR_HIGH) << 36); +} +static inline pteval_t __phys_to_pte_val(phys_addr_t phys) +{ + return (phys | (phys >> 36)) & PTE_ADDR_MASK; +} #else #define __pte_to_phys(pte) (pte_val(pte) & PTE_ADDR_MASK) #define __phys_to_pte_val(phys) (phys) @@ -1005,23 +1011,13 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, * page after fork() + CoW for pfn mappings. We don't always have a * hardware-managed access flag on arm64. */ -static inline bool arch_faults_on_old_pte(void) -{ - WARN_ON(preemptible()); - - return !cpu_has_hw_af(); -} -#define arch_faults_on_old_pte arch_faults_on_old_pte +#define arch_has_hw_pte_young cpu_has_hw_af /* * Experimentally, it's cheap to set the access flag in hardware and we * benefit from prefaulting mappings as 'old' to start with. */ -static inline bool arch_wants_old_prefaulted_pte(void) -{ - return !arch_faults_on_old_pte(); -} -#define arch_wants_old_prefaulted_pte arch_wants_old_prefaulted_pte +#define arch_wants_old_prefaulted_pte cpu_has_hw_af #endif /* !__ASSEMBLY__ */ diff --git a/arch/arm64/include/asm/preempt.h b/arch/arm64/include/asm/preempt.h index 80e946b2abee276479fde238d3f01e5d7eecacb3..e83f0982b99c1a6a773401e276b90a1f32fefb0a 100644 --- a/arch/arm64/include/asm/preempt.h +++ b/arch/arm64/include/asm/preempt.h @@ -23,7 +23,7 @@ static inline void preempt_count_set(u64 pc) } while (0) #define init_idle_preempt_count(p, cpu) do { \ - task_thread_info(p)->preempt_count = PREEMPT_ENABLED; \ + task_thread_info(p)->preempt_count = PREEMPT_DISABLED; \ } while (0) static inline void set_preempt_need_resched(void) diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index e85a712f1e3d4696185d962835563c306d8992b4..abeebb43b3fe51706b77fba8049dd7d2598faa8d 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -24,6 +24,7 @@ #define MTE_CTRL_TCF_SYNC (1UL << 16) #define MTE_CTRL_TCF_ASYNC (1UL << 17) +#define MTE_CTRL_TCF_ASYMM (1UL << 18) #ifndef __ASSEMBLY__ @@ -338,13 +339,13 @@ long get_tagged_addr_ctrl(struct task_struct *task); * of header definitions for the use of task_stack_page. */ -#define current_top_of_stack() \ -({ \ - struct stack_info _info; \ - BUG_ON(!on_accessible_stack(current, current_stack_pointer, &_info)); \ - _info.high; \ +#define current_top_of_stack() \ +({ \ + struct stack_info _info; \ + BUG_ON(!on_accessible_stack(current, current_stack_pointer, 1, &_info)); \ + _info.high; \ }) -#define on_thread_stack() (on_task_stack(current, current_stack_pointer, NULL)) +#define on_thread_stack() (on_task_stack(current, current_stack_pointer, 1, NULL)) #endif /* __ASSEMBLY__ */ #endif /* __ASM_PROCESSOR_H */ diff --git a/arch/arm64/include/asm/rwonce.h b/arch/arm64/include/asm/rwonce.h index 1bce62fa908a37f4f204524635e9422d79ee8255..56f7b1d4d54b9a2d2784926de576fa6caa28306b 100644 --- a/arch/arm64/include/asm/rwonce.h +++ b/arch/arm64/include/asm/rwonce.h @@ -5,7 +5,7 @@ #ifndef __ASM_RWONCE_H #define __ASM_RWONCE_H -#ifdef CONFIG_LTO +#if defined(CONFIG_LTO) && !defined(__ASSEMBLY__) #include #include @@ -66,7 +66,7 @@ }) #endif /* !BUILD_VDSO */ -#endif /* CONFIG_LTO */ +#endif /* CONFIG_LTO && !__ASSEMBLY__ */ #include diff --git a/arch/arm64/include/asm/sdei.h b/arch/arm64/include/asm/sdei.h index 63e0b92a5fbb069bc232a93e32e17a39d6df9bd1..8bc30a5c456939e28cba34e07eb8c4a670473f5c 100644 --- a/arch/arm64/include/asm/sdei.h +++ b/arch/arm64/include/asm/sdei.h @@ -42,8 +42,9 @@ unsigned long sdei_arch_get_entry_point(int conduit); struct stack_info; -bool _on_sdei_stack(unsigned long sp, struct stack_info *info); -static inline bool on_sdei_stack(unsigned long sp, +bool _on_sdei_stack(unsigned long sp, unsigned long size, + struct stack_info *info); +static inline bool on_sdei_stack(unsigned long sp, unsigned long size, struct stack_info *info) { if (!IS_ENABLED(CONFIG_VMAP_STACK)) @@ -51,7 +52,7 @@ static inline bool on_sdei_stack(unsigned long sp, if (!IS_ENABLED(CONFIG_ARM_SDE_INTERFACE)) return false; if (in_nmi()) - return _on_sdei_stack(sp, info); + return _on_sdei_stack(sp, size, info); return false; } diff --git a/arch/arm64/include/asm/sections.h b/arch/arm64/include/asm/sections.h index e4ad9db53af1d7ad4a32c999bcbfcfdbf70a3b7b..6b2ce1ed1d4c3ae84d326e0204f5ea129e0e7717 100644 --- a/arch/arm64/include/asm/sections.h +++ b/arch/arm64/include/asm/sections.h @@ -11,6 +11,7 @@ extern char __alt_instructions[], __alt_instructions_end[]; extern char __hibernate_exit_text_start[], __hibernate_exit_text_end[]; extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[]; extern char __hyp_text_start[], __hyp_text_end[]; +extern char __hyp_data_start[], __hyp_data_end[]; extern char __hyp_rodata_start[], __hyp_rodata_end[]; extern char __hyp_reloc_begin[], __hyp_reloc_end[]; extern char __hyp_bss_start[], __hyp_bss_end[]; @@ -22,4 +23,9 @@ extern char __irqentry_text_start[], __irqentry_text_end[]; extern char __mmuoff_data_start[], __mmuoff_data_end[]; extern char __entry_tramp_text_start[], __entry_tramp_text_end[]; +static inline size_t entry_tramp_text_size(void) +{ + return __entry_tramp_text_end - __entry_tramp_text_start; +} + #endif /* __ASM_SECTIONS_H */ diff --git a/arch/arm64/include/asm/spectre.h b/arch/arm64/include/asm/spectre.h index f62ca39da6c5a70fef556c1b62fc72e0a10b0739..c04d01dd4457ed498d06d0fc0d81f4d67eef1fd8 100644 --- a/arch/arm64/include/asm/spectre.h +++ b/arch/arm64/include/asm/spectre.h @@ -92,6 +92,9 @@ void spectre_v4_enable_mitigation(const struct arm64_cpu_capabilities *__unused) void spectre_v4_enable_task_mitigation(struct task_struct *tsk); enum mitigation_state arm64_get_meltdown_state(void); - +enum mitigation_state arm64_get_spectre_bhb_state(void); +bool is_spectre_bhb_affected(const struct arm64_cpu_capabilities *entry, int scope); +u8 spectre_bhb_loop_affected(int scope); +void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *__unused); #endif /* __ASSEMBLY__ */ #endif /* __ASM_SPECTRE_H */ diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h index eb29b1fe8255eb23d0cf2e9e588dd19ddee5061b..77192df232bfa2fc7bed5d57d3ec48ea32f2886b 100644 --- a/arch/arm64/include/asm/stacktrace.h +++ b/arch/arm64/include/asm/stacktrace.h @@ -69,14 +69,14 @@ extern void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk, DECLARE_PER_CPU(unsigned long *, irq_stack_ptr); -static inline bool on_stack(unsigned long sp, unsigned long low, - unsigned long high, enum stack_type type, - struct stack_info *info) +static inline bool on_stack(unsigned long sp, unsigned long size, + unsigned long low, unsigned long high, + enum stack_type type, struct stack_info *info) { if (!low) return false; - if (sp < low || sp >= high) + if (sp < low || sp + size < sp || sp + size > high) return false; if (info) { @@ -87,38 +87,38 @@ static inline bool on_stack(unsigned long sp, unsigned long low, return true; } -static inline bool on_irq_stack(unsigned long sp, +static inline bool on_irq_stack(unsigned long sp, unsigned long size, struct stack_info *info) { unsigned long low = (unsigned long)raw_cpu_read(irq_stack_ptr); unsigned long high = low + IRQ_STACK_SIZE; - return on_stack(sp, low, high, STACK_TYPE_IRQ, info); + return on_stack(sp, size, low, high, STACK_TYPE_IRQ, info); } static inline bool on_task_stack(const struct task_struct *tsk, - unsigned long sp, + unsigned long sp, unsigned long size, struct stack_info *info) { unsigned long low = (unsigned long)task_stack_page(tsk); unsigned long high = low + THREAD_SIZE; - return on_stack(sp, low, high, STACK_TYPE_TASK, info); + return on_stack(sp, size, low, high, STACK_TYPE_TASK, info); } #ifdef CONFIG_VMAP_STACK DECLARE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack); -static inline bool on_overflow_stack(unsigned long sp, +static inline bool on_overflow_stack(unsigned long sp, unsigned long size, struct stack_info *info) { unsigned long low = (unsigned long)raw_cpu_ptr(overflow_stack); unsigned long high = low + OVERFLOW_STACK_SIZE; - return on_stack(sp, low, high, STACK_TYPE_OVERFLOW, info); + return on_stack(sp, size, low, high, STACK_TYPE_OVERFLOW, info); } #else -static inline bool on_overflow_stack(unsigned long sp, +static inline bool on_overflow_stack(unsigned long sp, unsigned long size, struct stack_info *info) { return false; } #endif @@ -128,21 +128,21 @@ static inline bool on_overflow_stack(unsigned long sp, * context. */ static inline bool on_accessible_stack(const struct task_struct *tsk, - unsigned long sp, + unsigned long sp, unsigned long size, struct stack_info *info) { if (info) info->type = STACK_TYPE_UNKNOWN; - if (on_task_stack(tsk, sp, info)) + if (on_task_stack(tsk, sp, size, info)) return true; if (tsk != current || preemptible()) return false; - if (on_irq_stack(sp, info)) + if (on_irq_stack(sp, size, info)) return true; - if (on_overflow_stack(sp, info)) + if (on_overflow_stack(sp, size, info)) return true; - if (on_sdei_stack(sp, info)) + if (on_sdei_stack(sp, size, info)) return true; return false; diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 6a7b689e6bbe19b6062e931b717ec7cae1a6970f..e706e55c6494d173932c590891887e55402c24e3 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -11,6 +11,7 @@ #include #include +#include /* * ARMv8 ARM reserves the following encoding for system registers: @@ -179,6 +180,7 @@ #define SYS_ID_AA64ISAR0_EL1 sys_reg(3, 0, 0, 6, 0) #define SYS_ID_AA64ISAR1_EL1 sys_reg(3, 0, 0, 6, 1) +#define SYS_ID_AA64ISAR2_EL1 sys_reg(3, 0, 0, 6, 2) #define SYS_ID_AA64MMFR0_EL1 sys_reg(3, 0, 0, 7, 0) #define SYS_ID_AA64MMFR1_EL1 sys_reg(3, 0, 0, 7, 1) @@ -392,7 +394,10 @@ #define SYS_LOREA_EL1 sys_reg(3, 0, 10, 4, 1) #define SYS_LORN_EL1 sys_reg(3, 0, 10, 4, 2) #define SYS_LORC_EL1 sys_reg(3, 0, 10, 4, 3) +#define SYS_MPAMIDR_EL1 sys_reg(3, 0, 10, 4, 4) #define SYS_LORID_EL1 sys_reg(3, 0, 10, 4, 7) +#define SYS_MPAM1_EL1 sys_reg(3, 0, 10, 5, 0) +#define SYS_MPAM0_EL1 sys_reg(3, 0, 10, 5, 1) #define SYS_VBAR_EL1 sys_reg(3, 0, 12, 0, 0) #define SYS_DISR_EL1 sys_reg(3, 0, 12, 1, 1) @@ -534,6 +539,10 @@ #define SYS_TFSR_EL2 sys_reg(3, 4, 5, 6, 0) #define SYS_FAR_EL2 sys_reg(3, 4, 6, 0, 0) +#define SYS_MPAMHCR_EL2 sys_reg(3, 4, 10, 4, 0) +#define SYS_MPAMVPMV_EL2 sys_reg(3, 4, 10, 4, 1) +#define SYS_MPAM2_EL2 sys_reg(3, 4, 10, 5, 0) + #define SYS_VDISR_EL2 sys_reg(3, 4, 12, 1, 1) #define __SYS__AP0Rx_EL2(x) sys_reg(3, 4, 12, 8, x) #define SYS_ICH_AP0R0_EL2 __SYS__AP0Rx_EL2(0) @@ -610,6 +619,7 @@ #define SCTLR_ELx_TCF_NONE (UL(0x0) << SCTLR_ELx_TCF_SHIFT) #define SCTLR_ELx_TCF_SYNC (UL(0x1) << SCTLR_ELx_TCF_SHIFT) #define SCTLR_ELx_TCF_ASYNC (UL(0x2) << SCTLR_ELx_TCF_SHIFT) +#define SCTLR_ELx_TCF_ASYMM (UL(0x3) << SCTLR_ELx_TCF_SHIFT) #define SCTLR_ELx_TCF_MASK (UL(0x3) << SCTLR_ELx_TCF_SHIFT) #define SCTLR_ELx_ENIA_SHIFT 31 @@ -654,6 +664,7 @@ #define SCTLR_EL1_TCF0_NONE (UL(0x0) << SCTLR_EL1_TCF0_SHIFT) #define SCTLR_EL1_TCF0_SYNC (UL(0x1) << SCTLR_EL1_TCF0_SHIFT) #define SCTLR_EL1_TCF0_ASYNC (UL(0x2) << SCTLR_EL1_TCF0_SHIFT) +#define SCTLR_EL1_TCF0_ASYMM (UL(0x3) << SCTLR_EL1_TCF0_SHIFT) #define SCTLR_EL1_TCF0_MASK (UL(0x3) << SCTLR_EL1_TCF0_SHIFT) #define SCTLR_EL1_BT1 (BIT(36)) @@ -756,6 +767,21 @@ #define ID_AA64ISAR1_GPI_NI 0x0 #define ID_AA64ISAR1_GPI_IMP_DEF 0x1 +/* id_aa64isar2 */ +#define ID_AA64ISAR2_CLEARBHB_SHIFT 28 +#define ID_AA64ISAR2_RPRES_SHIFT 4 +#define ID_AA64ISAR2_WFXT_SHIFT 0 + +#define ID_AA64ISAR2_RPRES_8BIT 0x0 +#define ID_AA64ISAR2_RPRES_12BIT 0x1 +/* + * Value 0x1 has been removed from the architecture, and is + * reserved, but has not yet been removed from the ARM ARM + * as of ARM DDI 0487G.b. + */ +#define ID_AA64ISAR2_WFXT_NI 0x0 +#define ID_AA64ISAR2_WFXT_SUPPORTED 0x2 + /* id_aa64pfr0 */ #define ID_AA64PFR0_CSV3_SHIFT 60 #define ID_AA64PFR0_CSV2_SHIFT 56 @@ -799,6 +825,7 @@ #define ID_AA64PFR1_MTE_NI 0x0 #define ID_AA64PFR1_MTE_EL0 0x1 #define ID_AA64PFR1_MTE 0x2 +#define ID_AA64PFR1_MTE_ASYMM 0x3 /* id_aa64zfr0 */ #define ID_AA64ZFR0_F64MM_SHIFT 56 @@ -873,6 +900,8 @@ #endif /* id_aa64mmfr1 */ +#define ID_AA64MMFR1_ECBHB_SHIFT 60 +#define ID_AA64MMFR1_AFP_SHIFT 44 #define ID_AA64MMFR1_ETS_SHIFT 36 #define ID_AA64MMFR1_TWED_SHIFT 32 #define ID_AA64MMFR1_XNX_SHIFT 28 @@ -921,6 +950,7 @@ #define ID_AA64DFR0_PMUVER_8_1 0x4 #define ID_AA64DFR0_PMUVER_8_4 0x5 #define ID_AA64DFR0_PMUVER_8_5 0x6 +#define ID_AA64DFR0_PMUVER_8_7 0x7 #define ID_AA64DFR0_PMUVER_IMP_DEF 0xf #define ID_DFR0_PERFMON_SHIFT 24 @@ -1073,6 +1103,21 @@ #define SYS_GCR_EL1_RRND (BIT(16)) #define SYS_GCR_EL1_EXCL_MASK 0xffffUL +#ifdef CONFIG_KASAN_HW_TAGS +/* + * KASAN always uses a whole byte for its tags. With CONFIG_KASAN_HW_TAGS it + * only uses tags in the range 0xF0-0xFF, which we map to MTE tags 0x0-0xF. + */ +#define __MTE_TAG_MIN (KASAN_TAG_MIN & 0xf) +#define __MTE_TAG_MAX (KASAN_TAG_MAX & 0xf) +#define __MTE_TAG_INCL GENMASK(__MTE_TAG_MAX, __MTE_TAG_MIN) +#define KERNEL_GCR_EL1_EXCL (SYS_GCR_EL1_EXCL_MASK & ~__MTE_TAG_INCL) +#else +#define KERNEL_GCR_EL1_EXCL SYS_GCR_EL1_EXCL_MASK +#endif + +#define KERNEL_GCR_EL1 (SYS_GCR_EL1_RRND | KERNEL_GCR_EL1_EXCL) + /* RGSR_EL1 Definitions */ #define SYS_RGSR_EL1_TAG_MASK 0xfUL #define SYS_RGSR_EL1_SEED_SHIFT 8 @@ -1099,6 +1144,68 @@ #define TRFCR_ELx_ExTRE BIT(1) #define TRFCR_ELx_E0TRE BIT(0) +/* GIC Hypervisor interface registers */ +/* ICH_MISR_EL2 bit definitions */ +#define ICH_MISR_EOI (1 << 0) +#define ICH_MISR_U (1 << 1) + +/* ICH_LR*_EL2 bit definitions */ +#define ICH_LR_VIRTUAL_ID_MASK ((1ULL << 32) - 1) + +#define ICH_LR_EOI (1ULL << 41) +#define ICH_LR_GROUP (1ULL << 60) +#define ICH_LR_HW (1ULL << 61) +#define ICH_LR_STATE (3ULL << 62) +#define ICH_LR_PENDING_BIT (1ULL << 62) +#define ICH_LR_ACTIVE_BIT (1ULL << 63) +#define ICH_LR_PHYS_ID_SHIFT 32 +#define ICH_LR_PHYS_ID_MASK (0x3ffULL << ICH_LR_PHYS_ID_SHIFT) +#define ICH_LR_PRIORITY_SHIFT 48 +#define ICH_LR_PRIORITY_MASK (0xffULL << ICH_LR_PRIORITY_SHIFT) + +/* ICH_HCR_EL2 bit definitions */ +#define ICH_HCR_EN (1 << 0) +#define ICH_HCR_UIE (1 << 1) +#define ICH_HCR_NPIE (1 << 3) +#define ICH_HCR_TC (1 << 10) +#define ICH_HCR_TALL0 (1 << 11) +#define ICH_HCR_TALL1 (1 << 12) +#define ICH_HCR_TDIR (1 << 14) +#define ICH_HCR_EOIcount_SHIFT 27 +#define ICH_HCR_EOIcount_MASK (0x1f << ICH_HCR_EOIcount_SHIFT) + +/* ICH_VMCR_EL2 bit definitions */ +#define ICH_VMCR_ACK_CTL_SHIFT 2 +#define ICH_VMCR_ACK_CTL_MASK (1 << ICH_VMCR_ACK_CTL_SHIFT) +#define ICH_VMCR_FIQ_EN_SHIFT 3 +#define ICH_VMCR_FIQ_EN_MASK (1 << ICH_VMCR_FIQ_EN_SHIFT) +#define ICH_VMCR_CBPR_SHIFT 4 +#define ICH_VMCR_CBPR_MASK (1 << ICH_VMCR_CBPR_SHIFT) +#define ICH_VMCR_EOIM_SHIFT 9 +#define ICH_VMCR_EOIM_MASK (1 << ICH_VMCR_EOIM_SHIFT) +#define ICH_VMCR_BPR1_SHIFT 18 +#define ICH_VMCR_BPR1_MASK (7 << ICH_VMCR_BPR1_SHIFT) +#define ICH_VMCR_BPR0_SHIFT 21 +#define ICH_VMCR_BPR0_MASK (7 << ICH_VMCR_BPR0_SHIFT) +#define ICH_VMCR_PMR_SHIFT 24 +#define ICH_VMCR_PMR_MASK (0xffUL << ICH_VMCR_PMR_SHIFT) +#define ICH_VMCR_ENG0_SHIFT 0 +#define ICH_VMCR_ENG0_MASK (1 << ICH_VMCR_ENG0_SHIFT) +#define ICH_VMCR_ENG1_SHIFT 1 +#define ICH_VMCR_ENG1_MASK (1 << ICH_VMCR_ENG1_SHIFT) + +/* ICH_VTR_EL2 bit definitions */ +#define ICH_VTR_PRI_BITS_SHIFT 29 +#define ICH_VTR_PRI_BITS_MASK (7 << ICH_VTR_PRI_BITS_SHIFT) +#define ICH_VTR_ID_BITS_SHIFT 23 +#define ICH_VTR_ID_BITS_MASK (7 << ICH_VTR_ID_BITS_SHIFT) +#define ICH_VTR_SEIS_SHIFT 22 +#define ICH_VTR_SEIS_MASK (1 << ICH_VTR_SEIS_SHIFT) +#define ICH_VTR_A3V_SHIFT 21 +#define ICH_VTR_A3V_MASK (1 << ICH_VTR_A3V_SHIFT) +#define ICH_VTR_TDS_SHIFT 19 +#define ICH_VTR_TDS_MASK (1 << ICH_VTR_TDS_SHIFT) + #define ARM64_FEATURE_FIELD_BITS 4 /* Create a mask for the feature bits of the specified feature. */ diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 7450d252b8689ff54c5e0f5d851b6237c791c32d..e4a69f5708e8369a2b447c7436806f4e740ae617 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -239,13 +239,13 @@ static inline void __uaccess_enable_tco(void) */ static inline void __uaccess_disable_tco_async(void) { - if (system_uses_mte_async_mode()) + if (system_uses_mte_async_or_asymm_mode()) __uaccess_disable_tco(); } static inline void __uaccess_enable_tco_async(void) { - if (system_uses_mte_async_mode()) + if (system_uses_mte_async_or_asymm_mode()) __uaccess_enable_tco(); } @@ -342,12 +342,22 @@ do { \ (x) = (__force __typeof__(*(ptr)))__gu_val; \ } while (0) +/* + * We must not call into the scheduler between uaccess_enable_not_uao() and + * uaccess_disable_not_uao(). As `x` and `ptr` could contain blocking functions, + * we must evaluate these outside of the critical section. + */ #define __raw_get_user(x, ptr, err) \ do { \ + __typeof__(*(ptr)) __user *__rgu_ptr = (ptr); \ + __typeof__(x) __rgu_val; \ __chk_user_ptr(ptr); \ + \ uaccess_enable_not_uao(); \ - __raw_get_mem("ldtr", x, ptr, err); \ + __raw_get_mem("ldtr", __rgu_val, __rgu_ptr, err); \ uaccess_disable_not_uao(); \ + \ + (x) = __rgu_val; \ } while (0) #define __get_user_error(x, ptr, err) \ @@ -371,14 +381,22 @@ do { \ #define get_user __get_user +/* + * We must not call into the scheduler between __uaccess_enable_tco_async() and + * __uaccess_disable_tco_async(). As `dst` and `src` may contain blocking + * functions, we must evaluate these outside of the critical section. + */ #define __get_kernel_nofault(dst, src, type, err_label) \ do { \ + __typeof__(dst) __gkn_dst = (dst); \ + __typeof__(src) __gkn_src = (src); \ int __gkn_err = 0; \ \ __uaccess_enable_tco_async(); \ - __raw_get_mem("ldr", *((type *)(dst)), \ - (__force type *)(src), __gkn_err); \ + __raw_get_mem("ldr", *((type *)(__gkn_dst)), \ + (__force type *)(__gkn_src), __gkn_err); \ __uaccess_disable_tco_async(); \ + \ if (unlikely(__gkn_err)) \ goto err_label; \ } while (0) @@ -417,11 +435,19 @@ do { \ } \ } while (0) +/* + * We must not call into the scheduler between uaccess_enable_not_uao() and + * uaccess_disable_not_uao(). As `x` and `ptr` could contain blocking functions, + * we must evaluate these outside of the critical section. + */ #define __raw_put_user(x, ptr, err) \ do { \ - __chk_user_ptr(ptr); \ + __typeof__(*(ptr)) __user *__rpu_ptr = (ptr); \ + __typeof__(*(ptr)) __rpu_val = (x); \ + __chk_user_ptr(__rpu_ptr); \ + \ uaccess_enable_not_uao(); \ - __raw_put_mem("sttr", x, ptr, err); \ + __raw_put_mem("sttr", __rpu_val, __rpu_ptr, err); \ uaccess_disable_not_uao(); \ } while (0) @@ -446,14 +472,22 @@ do { \ #define put_user __put_user +/* + * We must not call into the scheduler between __uaccess_enable_tco_async() and + * __uaccess_disable_tco_async(). As `dst` and `src` may contain blocking + * functions, we must evaluate these outside of the critical section. + */ #define __put_kernel_nofault(dst, src, type, err_label) \ do { \ + __typeof__(dst) __pkn_dst = (dst); \ + __typeof__(src) __pkn_src = (src); \ int __pkn_err = 0; \ \ __uaccess_enable_tco_async(); \ - __raw_put_mem("str", *((type *)(src)), \ - (__force type *)(dst), __pkn_err); \ + __raw_put_mem("str", *((type *)(__pkn_src)), \ + (__force type *)(__pkn_dst), __pkn_err); \ __uaccess_disable_tco_async(); \ + \ if (unlikely(__pkn_err)) \ goto err_label; \ } while(0) diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index b3b2019f8d16bbeef51948c249dcfb6dbd97a0fb..3cb206aea3db108cd7c41228af95b4fa26f43691 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -38,7 +38,7 @@ #define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5) #define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800) -#define __NR_compat_syscalls 441 +#define __NR_compat_syscalls 449 #endif #define __ARCH_WANT_SYS_CLONE diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index 107f08e03b9fdc4d54b41c7fca707727e1524c92..7ae50cc6cd1cd6a0a5eb9a1a5694c9e42865306d 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -889,6 +889,8 @@ __SYSCALL(__NR_pidfd_getfd, sys_pidfd_getfd) __SYSCALL(__NR_faccessat2, sys_faccessat2) #define __NR_process_madvise 440 __SYSCALL(__NR_process_madvise, sys_process_madvise) +#define __NR_process_mrelease 448 +__SYSCALL(__NR_process_mrelease, sys_process_mrelease) /* * Please add new compat syscalls above this comment and update diff --git a/arch/arm64/include/asm/vectors.h b/arch/arm64/include/asm/vectors.h new file mode 100644 index 0000000000000000000000000000000000000000..f64613a96d5300de67dba128d13e091298967512 --- /dev/null +++ b/arch/arm64/include/asm/vectors.h @@ -0,0 +1,73 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2022 ARM Ltd. + */ +#ifndef __ASM_VECTORS_H +#define __ASM_VECTORS_H + +#include +#include + +#include + +extern char vectors[]; +extern char tramp_vectors[]; +extern char __bp_harden_el1_vectors[]; + +/* + * Note: the order of this enum corresponds to two arrays in entry.S: + * tramp_vecs and __bp_harden_el1_vectors. By default the canonical + * 'full fat' vectors are used directly. + */ +enum arm64_bp_harden_el1_vectors { +#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY + /* + * Perform the BHB loop mitigation, before branching to the canonical + * vectors. + */ + EL1_VECTOR_BHB_LOOP, + + /* + * Make the SMC call for firmware mitigation, before branching to the + * canonical vectors. + */ + EL1_VECTOR_BHB_FW, + + /* + * Use the ClearBHB instruction, before branching to the canonical + * vectors. + */ + EL1_VECTOR_BHB_CLEAR_INSN, +#endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */ + + /* + * Remap the kernel before branching to the canonical vectors. + */ + EL1_VECTOR_KPTI, +}; + +#ifndef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY +#define EL1_VECTOR_BHB_LOOP -1 +#define EL1_VECTOR_BHB_FW -1 +#define EL1_VECTOR_BHB_CLEAR_INSN -1 +#endif /* !CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */ + +/* The vectors to use on return from EL0. e.g. to remap the kernel */ +DECLARE_PER_CPU_READ_MOSTLY(const char *, this_cpu_vector); + +#ifndef CONFIG_UNMAP_KERNEL_AT_EL0 +#define TRAMP_VALIAS 0 +#endif + +static inline const char * +arm64_get_bp_hardening_vector(enum arm64_bp_harden_el1_vectors slot) +{ + if (arm64_kernel_unmapped_at_el0()) + return (char *)TRAMP_VALIAS + SZ_2K * slot; + + WARN_ON_ONCE(slot == EL1_VECTOR_KPTI); + + return __bp_harden_el1_vectors + SZ_2K * slot; +} + +#endif /* __ASM_VECTORS_H */ diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h index 7379f35ae2c654c27e78588af298871eca4450ef..8bc48dec69528e0a6256e3ab83da690452ebae67 100644 --- a/arch/arm64/include/asm/virt.h +++ b/arch/arm64/include/asm/virt.h @@ -72,6 +72,12 @@ void __hyp_reset_vectors(void); DECLARE_STATIC_KEY_FALSE(kvm_protected_mode_initialized); +static inline bool is_pkvm_initialized(void) +{ + return IS_ENABLED(CONFIG_KVM) && + static_branch_likely(&kvm_protected_mode_initialized); +} + /* Reports the availability of HYP mode */ static inline bool is_hyp_mode_available(void) { @@ -79,8 +85,7 @@ static inline bool is_hyp_mode_available(void) * If KVM protected mode is initialized, all CPUs must have been booted * in EL2. Avoid checking __boot_cpu_mode as CPUs now come up in EL1. */ - if (IS_ENABLED(CONFIG_KVM) && - static_branch_likely(&kvm_protected_mode_initialized)) + if (is_pkvm_initialized()) return true; return (__boot_cpu_mode[0] == BOOT_CPU_MODE_EL2 && @@ -94,8 +99,7 @@ static inline bool is_hyp_mode_mismatched(void) * If KVM protected mode is initialized, all CPUs must have been booted * in EL2. Avoid checking __boot_cpu_mode as CPUs now come up in EL1. */ - if (IS_ENABLED(CONFIG_KVM) && - static_branch_likely(&kvm_protected_mode_initialized)) + if (is_pkvm_initialized()) return false; return __boot_cpu_mode[0] != __boot_cpu_mode[1]; @@ -111,6 +115,9 @@ static __always_inline bool has_vhe(void) /* * Code only run in VHE/NVHE hyp context can assume VHE is present or * absent. Otherwise fall back to caps. + * This allows the compiler to discard VHE-specific code from the + * nVHE object, reducing the number of external symbol references + * needed to link. */ if (is_vhe_hyp_code()) return true; diff --git a/arch/arm64/include/asm/vmalloc.h b/arch/arm64/include/asm/vmalloc.h index 2ca708ab9b20b0c647c6839d682f5de93cf1541c..ac64f379e0ef1e07d881b9ecad2bcdffe75004a9 100644 --- a/arch/arm64/include/asm/vmalloc.h +++ b/arch/arm64/include/asm/vmalloc.h @@ -1,4 +1,13 @@ #ifndef _ASM_ARM64_VMALLOC_H #define _ASM_ARM64_VMALLOC_H +#include +#include + +#define arch_vmap_pgprot_tagged arch_vmap_pgprot_tagged +static inline pgprot_t arch_vmap_pgprot_tagged(pgprot_t prot) +{ + return pgprot_tagged(prot); +} + #endif /* _ASM_ARM64_VMALLOC_H */ diff --git a/arch/arm64/include/asm/vmap_stack.h b/arch/arm64/include/asm/vmap_stack.h index 894e031b28d2852df9836875fe5c6103eb05f05f..20873099c035c8bcaf6071e0344fde2f248a08d2 100644 --- a/arch/arm64/include/asm/vmap_stack.h +++ b/arch/arm64/include/asm/vmap_stack.h @@ -17,10 +17,13 @@ */ static inline unsigned long *arch_alloc_vmap_stack(size_t stack_size, int node) { + void *p; + BUILD_BUG_ON(!IS_ENABLED(CONFIG_VMAP_STACK)); - return __vmalloc_node(stack_size, THREAD_ALIGN, THREADINFO_GFP, node, + p = __vmalloc_node(stack_size, THREAD_ALIGN, THREADINFO_GFP, node, __builtin_return_address(0)); + return kasan_reset_tag(p); } #endif /* __ASM_VMAP_STACK_H */ diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index b8f41aa234ee1678ea38c9188ecafd0d47520506..99cb5d383048dd8dedb08f51be0d3951dc6b0f4f 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -75,5 +75,9 @@ #define HWCAP2_RNG (1 << 16) #define HWCAP2_BTI (1 << 17) #define HWCAP2_MTE (1 << 18) +#define HWCAP2_ECV (1 << 19) +#define HWCAP2_AFP (1 << 20) +#define HWCAP2_RPRES (1 << 21) +#define HWCAP2_MTE3 (1 << 22) #endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index b3edde68bc3e013c66d3272e8848a090800362d3..0c93ab42164718a7eb9dc1d52b5f97dacfce783e 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -281,6 +281,11 @@ struct kvm_arm_copy_mte_tags { #define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED 3 #define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED (1U << 4) +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3 KVM_REG_ARM_FW_REG(3) +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_AVAIL 0 +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_AVAIL 1 +#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_REQUIRED 2 + /* SVE registers */ #define KVM_REG_ARM64_SVE (0x15 << KVM_REG_ARM_COPROC_SHIFT) @@ -413,6 +418,22 @@ struct kvm_arm_copy_mte_tags { #define KVM_PSCI_RET_INVAL PSCI_RET_INVALID_PARAMS #define KVM_PSCI_RET_DENIED PSCI_RET_DENIED +/* arm64-specific kvm_run::system_event flags */ +/* + * Reset caused by a PSCI v1.1 SYSTEM_RESET2 call. + * Valid only when the system event has a type of KVM_SYSTEM_EVENT_RESET. + */ +#define KVM_SYSTEM_EVENT_RESET_FLAG_PSCI_RESET2 (1ULL << 0) + +/* Protected KVM */ +#define KVM_CAP_ARM_PROTECTED_VM_FLAGS_SET_FW_IPA 0 +#define KVM_CAP_ARM_PROTECTED_VM_FLAGS_INFO 1 + +struct kvm_protected_vm_info { + __u64 firmware_size; + __u64 __reserved[7]; +}; + #endif #endif /* __ARM_KVM_H__ */ diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 7eef8bead7ad13c5fe7100ee0c84bfeb5e71f7a8..05e3851a5d0d70fac2ca9bf383b5e1662a5a7f6d 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -109,7 +109,6 @@ int main(void) #ifdef CONFIG_KVM DEFINE(VCPU_CONTEXT, offsetof(struct kvm_vcpu, arch.ctxt)); DEFINE(VCPU_FAULT_DISR, offsetof(struct kvm_vcpu, arch.fault.disr_el1)); - DEFINE(VCPU_WORKAROUND_FLAGS, offsetof(struct kvm_vcpu, arch.workaround_flags)); DEFINE(VCPU_HCR_EL2, offsetof(struct kvm_vcpu, arch.hcr_el2)); DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_cpu_context, regs)); DEFINE(CPU_RGSR_EL1, offsetof(struct kvm_cpu_context, sys_regs[RGSR_EL1])); diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index a63428301f423b2b53819de4c56bbb29bdca1c96..e458bf3e9e8d288833fee7b04a2d66f27dccebc6 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -342,6 +342,18 @@ static const struct midr_range erratum_1463225[] = { }; #endif +#ifdef CONFIG_ARM64_WORKAROUND_TSB_FLUSH_FAILURE +static const struct midr_range tsb_flush_fail_cpus[] = { +#ifdef CONFIG_ARM64_ERRATUM_2067961 + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), +#endif +#ifdef CONFIG_ARM64_ERRATUM_2054223 + MIDR_ALL_VERSIONS(MIDR_CORTEX_A710), +#endif + {}, +}; +#endif /* CONFIG_ARM64_WORKAROUND_TSB_FLUSH_FAILURE */ + const struct arm64_cpu_capabilities arm64_errata[] = { #ifdef CONFIG_ARM64_WORKAROUND_CLEAN_CACHE { @@ -466,6 +478,13 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .matches = has_spectre_v4, .cpu_enable = spectre_v4_enable_mitigation, }, + { + .desc = "Spectre-BHB", + .capability = ARM64_SPECTRE_BHB, + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, + .matches = is_spectre_bhb_affected, + .cpu_enable = spectre_bhb_enable_mitigation, + }, #ifdef CONFIG_ARM64_ERRATUM_1418040 { .desc = "ARM erratum 1418040", @@ -527,6 +546,13 @@ const struct arm64_cpu_capabilities arm64_errata[] = { 0, 0, 1, 0), }, +#endif +#ifdef CONFIG_ARM64_WORKAROUND_TSB_FLUSH_FAILURE + { + .desc = "ARM erratum 2067961 or 2054223", + .capability = ARM64_WORKAROUND_TSB_FLUSH_FAILURE, + ERRATA_MIDR_RANGE_LIST(tsb_flush_fail_cpus), + }, #endif { } diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index db4aac06d7054e0b163f5c4d5c420f14261d0af1..e2cbe76c86623511a0df1fbdd5ce2436c8ab81e8 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -65,6 +65,7 @@ #include #include #include +#include #include #include #include @@ -72,6 +73,7 @@ #include #include #include + #include #include #include @@ -82,6 +84,7 @@ #include #include #include +#include #include /* Kernel representation of AT_HWCAP and AT_HWCAP2 */ @@ -107,6 +110,8 @@ DECLARE_BITMAP(boot_capabilities, ARM64_NPATCHABLE); bool arm64_use_ng_mappings = false; EXPORT_SYMBOL(arm64_use_ng_mappings); +DEFINE_PER_CPU_READ_MOSTLY(const char *, this_cpu_vector) = vectors; + /* * Permit PER_LINUX32 and execve() of 32-bit binaries even if not all CPUs * support it? @@ -226,6 +231,12 @@ static const struct arm64_ftr_bits ftr_id_aa64isar1[] = { ARM64_FTR_END, }; +static const struct arm64_ftr_bits ftr_id_aa64isar2[] = { + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_HIGHER_SAFE, ID_AA64ISAR2_CLEARBHB_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_RPRES_SHIFT, 4, 0), + ARM64_FTR_END, +}; + static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_CSV3_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_CSV2_SHIFT, 4, 0), @@ -280,7 +291,7 @@ static const struct arm64_ftr_bits ftr_id_aa64zfr0[] = { }; static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = { - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_ECV_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_ECV_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_FGT_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_EXS_SHIFT, 4, 0), /* @@ -326,6 +337,7 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = { }; static const struct arm64_ftr_bits ftr_id_aa64mmfr1[] = { + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_AFP_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_ETS_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_TWED_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR1_XNX_SHIFT, 4, 0), @@ -569,15 +581,19 @@ static const struct arm64_ftr_bits ftr_raz[] = { ARM64_FTR_END, }; -#define ARM64_FTR_REG_OVERRIDE(id, table, ovr) { \ +#define __ARM64_FTR_REG_OVERRIDE(id_str, id, table, ovr) { \ .sys_id = id, \ .reg = &(struct arm64_ftr_reg){ \ - .name = #id, \ + .name = id_str, \ .override = (ovr), \ .ftr_bits = &((table)[0]), \ }} -#define ARM64_FTR_REG(id, table) ARM64_FTR_REG_OVERRIDE(id, table, &no_override) +#define ARM64_FTR_REG_OVERRIDE(id, table, ovr) \ + __ARM64_FTR_REG_OVERRIDE(#id, id, table, ovr) + +#define ARM64_FTR_REG(id, table) \ + __ARM64_FTR_REG_OVERRIDE(#id, id, table, &no_override) struct arm64_ftr_override __ro_after_init id_aa64mmfr1_override; struct arm64_ftr_override __ro_after_init id_aa64pfr1_override; @@ -629,6 +645,7 @@ static const struct __ftr_reg_entry { ARM64_FTR_REG(SYS_ID_AA64ISAR0_EL1, ftr_id_aa64isar0), ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64ISAR1_EL1, ftr_id_aa64isar1, &id_aa64isar1_override), + ARM64_FTR_REG(SYS_ID_AA64ISAR2_EL1, ftr_id_aa64isar2), /* Op1 = 0, CRn = 0, CRm = 7 */ ARM64_FTR_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0), @@ -922,6 +939,7 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info) init_cpu_ftr_reg(SYS_ID_AA64DFR1_EL1, info->reg_id_aa64dfr1); init_cpu_ftr_reg(SYS_ID_AA64ISAR0_EL1, info->reg_id_aa64isar0); init_cpu_ftr_reg(SYS_ID_AA64ISAR1_EL1, info->reg_id_aa64isar1); + init_cpu_ftr_reg(SYS_ID_AA64ISAR2_EL1, info->reg_id_aa64isar2); init_cpu_ftr_reg(SYS_ID_AA64MMFR0_EL1, info->reg_id_aa64mmfr0); init_cpu_ftr_reg(SYS_ID_AA64MMFR1_EL1, info->reg_id_aa64mmfr1); init_cpu_ftr_reg(SYS_ID_AA64MMFR2_EL1, info->reg_id_aa64mmfr2); @@ -1137,6 +1155,8 @@ void update_cpu_features(int cpu, info->reg_id_aa64isar0, boot->reg_id_aa64isar0); taint |= check_update_ftr_reg(SYS_ID_AA64ISAR1_EL1, cpu, info->reg_id_aa64isar1, boot->reg_id_aa64isar1); + taint |= check_update_ftr_reg(SYS_ID_AA64ISAR2_EL1, cpu, + info->reg_id_aa64isar2, boot->reg_id_aa64isar2); /* * Differing PARange support is fine as long as all peripherals and @@ -1247,6 +1267,7 @@ u64 __read_sysreg_by_encoding(u32 sys_id) read_sysreg_case(SYS_ID_AA64MMFR2_EL1); read_sysreg_case(SYS_ID_AA64ISAR0_EL1); read_sysreg_case(SYS_ID_AA64ISAR1_EL1); + read_sysreg_case(SYS_ID_AA64ISAR2_EL1); read_sysreg_case(SYS_CNTFRQ_EL0); read_sysreg_case(SYS_CTR_EL0); @@ -1548,6 +1569,12 @@ kpti_install_ng_mappings(const struct arm64_cpu_capabilities *__unused) int cpu = smp_processor_id(); + if (__this_cpu_read(this_cpu_vector) == vectors) { + const char *v = arm64_get_bp_hardening_vector(EL1_VECTOR_KPTI); + + __this_cpu_write(this_cpu_vector, v); + } + /* * We don't need to rewrite the page-tables if either we've done * it already or we have KASLR enabled and therefore have not @@ -1853,15 +1880,7 @@ static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap) #ifdef CONFIG_KVM static bool is_kvm_protected_mode(const struct arm64_cpu_capabilities *entry, int __unused) { - if (kvm_get_mode() != KVM_MODE_PROTECTED) - return false; - - if (is_kernel_in_hyp_mode()) { - pr_warn("Protected KVM not available with VHE\n"); - return false; - } - - return true; + return kvm_get_mode() == KVM_MODE_PROTECTED; } #endif /* CONFIG_KVM */ @@ -2296,6 +2315,16 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sign = FTR_UNSIGNED, .cpu_enable = cpu_enable_mte, }, + { + .desc = "Asymmetric MTE Tag Check Fault", + .capability = ARM64_MTE_ASYMM, + .type = ARM64_CPUCAP_BOOT_CPU_FEATURE, + .matches = has_cpuid_feature, + .sys_reg = SYS_ID_AA64PFR1_EL1, + .field_pos = ID_AA64PFR1_MTE_SHIFT, + .min_field_value = ID_AA64PFR1_MTE_ASYMM, + .sign = FTR_UNSIGNED, + }, #endif /* CONFIG_ARM64_MTE */ { .desc = "RCpc load-acquire (LDAPR)", @@ -2425,7 +2454,11 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { #endif #ifdef CONFIG_ARM64_MTE HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_MTE_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_MTE, CAP_HWCAP, KERNEL_HWCAP_MTE), + HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_MTE_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_MTE_ASYMM, CAP_HWCAP, KERNEL_HWCAP_MTE3), #endif /* CONFIG_ARM64_MTE */ + HWCAP_CAP(SYS_ID_AA64MMFR0_EL1, ID_AA64MMFR0_ECV_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ECV), + HWCAP_CAP(SYS_ID_AA64MMFR1_EL1, ID_AA64MMFR1_AFP_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_AFP), + HWCAP_CAP(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_RPRES_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_RPRES), {}, }; diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 8ce33742ad6a42918fd8665cbd752a7078c59458..2a72a34d7852a5773540aaf50f354d0547ac7d39 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -94,6 +94,10 @@ static const char *const hwcap_str[] = { [KERNEL_HWCAP_RNG] = "rng", [KERNEL_HWCAP_BTI] = "bti", [KERNEL_HWCAP_MTE] = "mte", + [KERNEL_HWCAP_ECV] = "ecv", + [KERNEL_HWCAP_AFP] = "afp", + [KERNEL_HWCAP_RPRES] = "rpres", + [KERNEL_HWCAP_MTE3] = "mte3", }; #ifdef CONFIG_COMPAT @@ -390,6 +394,7 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) info->reg_id_aa64dfr1 = read_cpuid(ID_AA64DFR1_EL1); info->reg_id_aa64isar0 = read_cpuid(ID_AA64ISAR0_EL1); info->reg_id_aa64isar1 = read_cpuid(ID_AA64ISAR1_EL1); + info->reg_id_aa64isar2 = read_cpuid(ID_AA64ISAR2_EL1); info->reg_id_aa64mmfr0 = read_cpuid(ID_AA64MMFR0_EL1); info->reg_id_aa64mmfr1 = read_cpuid(ID_AA64MMFR1_EL1); info->reg_id_aa64mmfr2 = read_cpuid(ID_AA64MMFR2_EL1); diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S index a338f40e64d393b2f78d5b01b4bda24930e0db87..67f68c9ef94c43fb918cd493360a802c7134ad90 100644 --- a/arch/arm64/kernel/entry-ftrace.S +++ b/arch/arm64/kernel/entry-ftrace.S @@ -77,11 +77,17 @@ .endm SYM_CODE_START(ftrace_regs_caller) +#ifdef BTI_C + BTI_C +#endif ftrace_regs_entry 1 b ftrace_common SYM_CODE_END(ftrace_regs_caller) SYM_CODE_START(ftrace_caller) +#ifdef BTI_C + BTI_C +#endif ftrace_regs_entry 0 b ftrace_common SYM_CODE_END(ftrace_caller) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 5369db4b53a40df7906214976167c974e3d11bb3..9f19e6bb48df01427302704dd8a9e492d06acb08 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -62,18 +62,21 @@ .macro kernel_ventry, el, label, regsize = 64 .align 7 -#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +.Lventry_start\@: .if \el == 0 -alternative_if ARM64_UNMAP_KERNEL_AT_EL0 + /* + * This must be the first instruction of the EL0 vector entries. It is + * skipped by the trampoline vectors, to trigger the cleanup. + */ + b .Lskip_tramp_vectors_cleanup\@ .if \regsize == 64 mrs x30, tpidrro_el0 msr tpidrro_el0, xzr .else mov x30, xzr .endif -alternative_else_nop_endif +.Lskip_tramp_vectors_cleanup\@: .endif -#endif sub sp, sp, #S_FRAME_SIZE #ifdef CONFIG_VMAP_STACK @@ -120,11 +123,15 @@ alternative_else_nop_endif mrs x0, tpidrro_el0 #endif b el\()\el\()_\label +.org .Lventry_start\@ + 128 // Did we overflow the ventry slot? .endm - .macro tramp_alias, dst, sym + .macro tramp_alias, dst, sym, tmp mov_q \dst, TRAMP_VALIAS - add \dst, \dst, #(\sym - .entry.tramp.text) + adr_l \tmp, \sym + add \dst, \dst, \tmp + adr_l \tmp, .entry.tramp.text + sub \dst, \dst, \tmp .endm /* @@ -141,7 +148,7 @@ alternative_cb_end tbnz \tmp2, #TIF_SSBD, .L__asm_ssbd_skip\@ mov w0, #ARM_SMCCC_ARCH_WORKAROUND_2 mov w1, #\state -alternative_cb spectre_v4_patch_fw_mitigation_conduit +alternative_cb smccc_patch_fw_mitigation_conduit nop // Patched to SMC/HVC #0 alternative_cb_end .L__asm_ssbd_skip\@: @@ -193,21 +200,20 @@ alternative_else_nop_endif .macro mte_set_kernel_gcr, tmp, tmp2 #ifdef CONFIG_KASAN_HW_TAGS -alternative_if_not ARM64_MTE +alternative_cb kasan_hw_tags_enable b 1f -alternative_else_nop_endif - ldr_l \tmp, gcr_kernel_excl - - mte_set_gcr \tmp, \tmp2 +alternative_cb_end + mov \tmp, KERNEL_GCR_EL1 + msr_s SYS_GCR_EL1, \tmp 1: #endif .endm .macro mte_set_user_gcr, tsk, tmp, tmp2 -#ifdef CONFIG_ARM64_MTE -alternative_if_not ARM64_MTE +#ifdef CONFIG_KASAN_HW_TAGS +alternative_cb kasan_hw_tags_enable b 1f -alternative_else_nop_endif +alternative_cb_end ldr \tmp, [\tsk, #THREAD_MTE_CTRL] mte_set_gcr \tmp, \tmp2 @@ -451,21 +457,26 @@ alternative_else_nop_endif ldp x24, x25, [sp, #16 * 12] ldp x26, x27, [sp, #16 * 13] ldp x28, x29, [sp, #16 * 14] - ldr lr, [sp, #S_LR] - add sp, sp, #S_FRAME_SIZE // restore sp .if \el == 0 -alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0 +alternative_if_not ARM64_UNMAP_KERNEL_AT_EL0 + ldr lr, [sp, #S_LR] + add sp, sp, #S_FRAME_SIZE // restore sp + eret +alternative_else_nop_endif #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 bne 4f - msr far_el1, x30 - tramp_alias x30, tramp_exit_native + msr far_el1, x29 + tramp_alias x30, tramp_exit_native, x29 br x30 4: - tramp_alias x30, tramp_exit_compat + tramp_alias x30, tramp_exit_compat, x29 br x30 #endif .else + ldr lr, [sp, #S_LR] + add sp, sp, #S_FRAME_SIZE // restore sp + /* Ensure any device/NC reads complete */ alternative_insn nop, "dmb sy", ARM64_WORKAROUND_1508412 @@ -862,12 +873,6 @@ SYM_CODE_END(ret_to_user) .popsection // .entry.text -#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 -/* - * Exception vectors trampoline. - */ - .pushsection ".entry.tramp.text", "ax" - // Move from tramp_pg_dir to swapper_pg_dir .macro tramp_map_kernel, tmp mrs \tmp, ttbr1_el1 @@ -901,12 +906,47 @@ alternative_else_nop_endif */ .endm - .macro tramp_ventry, regsize = 64 + .macro tramp_data_page dst + adr_l \dst, .entry.tramp.text + sub \dst, \dst, PAGE_SIZE + .endm + + .macro tramp_data_read_var dst, var +#ifdef CONFIG_RANDOMIZE_BASE + tramp_data_page \dst + add \dst, \dst, #:lo12:__entry_tramp_data_\var + ldr \dst, [\dst] +#else + ldr \dst, =\var +#endif + .endm + +#define BHB_MITIGATION_NONE 0 +#define BHB_MITIGATION_LOOP 1 +#define BHB_MITIGATION_FW 2 +#define BHB_MITIGATION_INSN 3 + + .macro tramp_ventry, vector_start, regsize, kpti, bhb .align 7 1: .if \regsize == 64 msr tpidrro_el0, x30 // Restored in kernel_ventry .endif + + .if \bhb == BHB_MITIGATION_LOOP + /* + * This sequence must appear before the first indirect branch. i.e. the + * ret out of tramp_ventry. It appears here because x30 is free. + */ + __mitigate_spectre_bhb_loop x30 + .endif // \bhb == BHB_MITIGATION_LOOP + + .if \bhb == BHB_MITIGATION_INSN + clearbhb + isb + .endif // \bhb == BHB_MITIGATION_INSN + + .if \kpti == 1 /* * Defend against branch aliasing attacks by pushing a dummy * entry onto the return stack and using a RET instruction to @@ -916,46 +956,75 @@ alternative_else_nop_endif b . 2: tramp_map_kernel x30 -#ifdef CONFIG_RANDOMIZE_BASE - adr x30, tramp_vectors + PAGE_SIZE alternative_insn isb, nop, ARM64_WORKAROUND_QCOM_FALKOR_E1003 - ldr x30, [x30] -#else - ldr x30, =vectors -#endif + tramp_data_read_var x30, vectors alternative_if_not ARM64_WORKAROUND_CAVIUM_TX2_219_PRFM - prfm plil1strm, [x30, #(1b - tramp_vectors)] + prfm plil1strm, [x30, #(1b - \vector_start)] alternative_else_nop_endif + msr vbar_el1, x30 - add x30, x30, #(1b - tramp_vectors) isb + .else + ldr x30, =vectors + .endif // \kpti == 1 + + .if \bhb == BHB_MITIGATION_FW + /* + * The firmware sequence must appear before the first indirect branch. + * i.e. the ret out of tramp_ventry. But it also needs the stack to be + * mapped to save/restore the registers the SMC clobbers. + */ + __mitigate_spectre_bhb_fw + .endif // \bhb == BHB_MITIGATION_FW + + add x30, x30, #(1b - \vector_start + 4) ret +.org 1b + 128 // Did we overflow the ventry slot? .endm .macro tramp_exit, regsize = 64 - adr x30, tramp_vectors + tramp_data_read_var x30, this_cpu_vector + this_cpu_offset x29 + ldr x30, [x30, x29] + msr vbar_el1, x30 - tramp_unmap_kernel x30 + ldr lr, [sp, #S_LR] + tramp_unmap_kernel x29 .if \regsize == 64 - mrs x30, far_el1 + mrs x29, far_el1 .endif + add sp, sp, #S_FRAME_SIZE // restore sp eret sb .endm - .align 11 -SYM_CODE_START_NOALIGN(tramp_vectors) + .macro generate_tramp_vector, kpti, bhb +.Lvector_start\@: .space 0x400 - tramp_ventry - tramp_ventry - tramp_ventry - tramp_ventry + .rept 4 + tramp_ventry .Lvector_start\@, 64, \kpti, \bhb + .endr + .rept 4 + tramp_ventry .Lvector_start\@, 32, \kpti, \bhb + .endr + .endm - tramp_ventry 32 - tramp_ventry 32 - tramp_ventry 32 - tramp_ventry 32 +#ifdef CONFIG_UNMAP_KERNEL_AT_EL0 +/* + * Exception vectors trampoline. + * The order must match __bp_harden_el1_vectors and the + * arm64_bp_harden_el1_vectors enum. + */ + .pushsection ".entry.tramp.text", "ax" + .align 11 +SYM_CODE_START_NOALIGN(tramp_vectors) +#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY + generate_tramp_vector kpti=1, bhb=BHB_MITIGATION_LOOP + generate_tramp_vector kpti=1, bhb=BHB_MITIGATION_FW + generate_tramp_vector kpti=1, bhb=BHB_MITIGATION_INSN +#endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */ + generate_tramp_vector kpti=1, bhb=BHB_MITIGATION_NONE SYM_CODE_END(tramp_vectors) SYM_CODE_START(tramp_exit_native) @@ -972,12 +1041,56 @@ SYM_CODE_END(tramp_exit_compat) .pushsection ".rodata", "a" .align PAGE_SHIFT SYM_DATA_START(__entry_tramp_data_start) +__entry_tramp_data_vectors: .quad vectors +#ifdef CONFIG_ARM_SDE_INTERFACE +__entry_tramp_data___sdei_asm_handler: + .quad __sdei_asm_handler +#endif /* CONFIG_ARM_SDE_INTERFACE */ +__entry_tramp_data_this_cpu_vector: + .quad this_cpu_vector SYM_DATA_END(__entry_tramp_data_start) .popsection // .rodata #endif /* CONFIG_RANDOMIZE_BASE */ #endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ +/* + * Exception vectors for spectre mitigations on entry from EL1 when + * kpti is not in use. + */ + .macro generate_el1_vector, bhb +.Lvector_start\@: + kernel_ventry 1, sync_invalid // Synchronous EL1t + kernel_ventry 1, irq_invalid // IRQ EL1t + kernel_ventry 1, fiq_invalid // FIQ EL1t + kernel_ventry 1, error_invalid // Error EL1t + + kernel_ventry 1, sync // Synchronous EL1h + kernel_ventry 1, irq // IRQ EL1h + kernel_ventry 1, fiq_invalid // FIQ EL1h + kernel_ventry 1, error // Error EL1h + + .rept 4 + tramp_ventry .Lvector_start\@, 64, 0, \bhb + .endr + .rept 4 + tramp_ventry .Lvector_start\@, 32, 0, \bhb + .endr + .endm + +/* The order must match tramp_vecs and the arm64_bp_harden_el1_vectors enum. */ + .pushsection ".entry.text", "ax" + .align 11 +SYM_CODE_START(__bp_harden_el1_vectors) +#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY + generate_el1_vector bhb=BHB_MITIGATION_LOOP + generate_el1_vector bhb=BHB_MITIGATION_FW + generate_el1_vector bhb=BHB_MITIGATION_INSN +#endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */ +SYM_CODE_END(__bp_harden_el1_vectors) + .popsection + + /* * Register switch for AArch64. The callee-saved registers need to be saved * and restored. On entry: @@ -1067,13 +1180,7 @@ SYM_CODE_START(__sdei_asm_entry_trampoline) */ 1: str x4, [x1, #(SDEI_EVENT_INTREGS + S_ORIG_ADDR_LIMIT)] -#ifdef CONFIG_RANDOMIZE_BASE - adr x4, tramp_vectors + PAGE_SIZE - add x4, x4, #:lo12:__sdei_asm_trampoline_next_handler - ldr x4, [x4] -#else - ldr x4, =__sdei_asm_handler -#endif + tramp_data_read_var x4, __sdei_asm_handler br x4 SYM_CODE_END(__sdei_asm_entry_trampoline) NOKPROBE(__sdei_asm_entry_trampoline) @@ -1096,13 +1203,6 @@ SYM_CODE_END(__sdei_asm_exit_trampoline) NOKPROBE(__sdei_asm_exit_trampoline) .ltorg .popsection // .entry.tramp.text -#ifdef CONFIG_RANDOMIZE_BASE -.pushsection ".rodata", "a" -SYM_DATA_START(__sdei_asm_trampoline_next_handler) - .quad __sdei_asm_handler -SYM_DATA_END(__sdei_asm_trampoline_next_handler) -.popsection // .rodata -#endif /* CONFIG_RANDOMIZE_BASE */ #endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */ /* @@ -1210,7 +1310,7 @@ alternative_if_not ARM64_UNMAP_KERNEL_AT_EL0 alternative_else_nop_endif #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 - tramp_alias dst=x5, sym=__sdei_asm_exit_trampoline + tramp_alias dst=x5, sym=__sdei_asm_exit_trampoline, tmp=x3 br x5 #endif SYM_CODE_END(__sdei_asm_handler) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 5335a6bd1a0d4685cc3d2da397046c6942f9df61..81183a38d6767b79619d5eda385097997fc9679d 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -78,7 +78,11 @@ * indicate whether or not the userland FPSIMD state of the current task is * present in the registers. The flag is set unless the FPSIMD registers of this * CPU currently contain the most recent userland FPSIMD state of the current - * task. + * task. If the task is behaving as a VMM, then this is will be managed by + * KVM which will clear it to indicate that the vcpu FPSIMD state is currently + * loaded on the CPU, allowing the state to be saved if a FPSIMD-aware + * softirq kicks in. Upon vcpu_put(), KVM will save the vcpu FP state and + * flag the register state as invalid. * * In order to allow softirq handlers to use FPSIMD, kernel_neon_begin() may * save the task's FPSIMD context back to task_struct from softirq context. diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c index 3ef853a792f3f656694e2dcd039c9f8089b53604..f2f6f4150bdc624c0d7dd73646889eebde9a8c3a 100644 --- a/arch/arm64/kernel/idreg-override.c +++ b/arch/arm64/kernel/idreg-override.c @@ -94,7 +94,6 @@ static const struct { char alias[FTR_ALIAS_NAME_LEN]; char feature[FTR_ALIAS_OPTION_LEN]; } aliases[] __initconst = { - { "kvm-arm.mode=none", "id_aa64mmfr1.vh=0" }, { "kvm-arm.mode=nvhe", "id_aa64mmfr1.vh=0" }, { "kvm-arm.mode=protected", "id_aa64mmfr1.vh=0" }, { "arm64.nobti", "id_aa64pfr1.bt=0" }, diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index c96a9a0043bf4a6d73d90f3d435f2c80b5bbce85..8f674fbab573dd3c089313719282026e95ff2592 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -65,7 +65,13 @@ __efistub__ctype = _ctype; KVM_NVHE_ALIAS(kvm_patch_vector_branch); KVM_NVHE_ALIAS(kvm_update_va_mask); KVM_NVHE_ALIAS(kvm_get_kimage_voffset); +KVM_NVHE_ALIAS(kvm_get__text); +KVM_NVHE_ALIAS(kvm_get__etext); KVM_NVHE_ALIAS(kvm_compute_final_ctr_el0); +KVM_NVHE_ALIAS(spectre_bhb_patch_loop_iter); +KVM_NVHE_ALIAS(spectre_bhb_patch_loop_mitigation_enable); +KVM_NVHE_ALIAS(spectre_bhb_patch_wa3); +KVM_NVHE_ALIAS(spectre_bhb_patch_clearbhb); /* Global kernel state accessed by nVHE hyp code. */ KVM_NVHE_ALIAS(kvm_vgic_global_state); @@ -76,9 +82,6 @@ KVM_NVHE_ALIAS(nvhe_hyp_panic_handler); /* Vectors installed by hyp-init on reset HVC. */ KVM_NVHE_ALIAS(__hyp_stub_vectors); -/* Kernel symbol used by icache_is_vpipt(). */ -KVM_NVHE_ALIAS(__icache_flags); - /* Kernel symbols needed for cpus_have_final/const_caps checks. */ KVM_NVHE_ALIAS(arm64_const_caps_ready); KVM_NVHE_ALIAS(cpu_hwcap_keys); @@ -98,9 +101,6 @@ KVM_NVHE_ALIAS(gic_nonsecure_priorities); KVM_NVHE_ALIAS(__start___kvm_ex_table); KVM_NVHE_ALIAS(__stop___kvm_ex_table); -/* Array containing bases of nVHE per-CPU memory regions. */ -KVM_NVHE_ALIAS(kvm_arm_hyp_percpu_base); - /* PMU available static key */ KVM_NVHE_ALIAS(kvm_arm_pmu_available); @@ -115,12 +115,6 @@ KVM_NVHE_ALIAS_HYP(__memcpy, __pi_memcpy); KVM_NVHE_ALIAS_HYP(__memset, __pi_memset); #endif -/* Kernel memory sections */ -KVM_NVHE_ALIAS(__start_rodata); -KVM_NVHE_ALIAS(__end_rodata); -KVM_NVHE_ALIAS(__bss_start); -KVM_NVHE_ALIAS(__bss_stop); - /* Hyp memory sections */ KVM_NVHE_ALIAS(__hyp_idmap_text_start); KVM_NVHE_ALIAS(__hyp_idmap_text_end); @@ -128,6 +122,8 @@ KVM_NVHE_ALIAS(__hyp_text_start); KVM_NVHE_ALIAS(__hyp_text_end); KVM_NVHE_ALIAS(__hyp_bss_start); KVM_NVHE_ALIAS(__hyp_bss_end); +KVM_NVHE_ALIAS(__hyp_data_start); +KVM_NVHE_ALIAS(__hyp_data_end); KVM_NVHE_ALIAS(__hyp_rodata_start); KVM_NVHE_ALIAS(__hyp_rodata_end); diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c index 8997d6445f21841b958b54ed45927ddfd9345c34..cfa2cfde3019d678e32241acd21416891dd74c0e 100644 --- a/arch/arm64/kernel/kaslr.c +++ b/arch/arm64/kernel/kaslr.c @@ -130,15 +130,17 @@ u64 __init kaslr_early_init(void) /* use the top 16 bits to randomize the linear region */ memstart_offset_seed = seed >> 48; - if (IS_ENABLED(CONFIG_KASAN_GENERIC) || - IS_ENABLED(CONFIG_KASAN_SW_TAGS)) + if (!IS_ENABLED(CONFIG_KASAN_VMALLOC) && + (IS_ENABLED(CONFIG_KASAN_GENERIC) || + IS_ENABLED(CONFIG_KASAN_SW_TAGS))) /* - * KASAN does not expect the module region to intersect the - * vmalloc region, since shadow memory is allocated for each - * module at load time, whereas the vmalloc region is shadowed - * by KASAN zero pages. So keep modules out of the vmalloc - * region if KASAN is enabled, and put the kernel well within - * 4 GB of the module region. + * KASAN without KASAN_VMALLOC does not expect the module region + * to intersect the vmalloc region, since shadow memory is + * allocated for each module at load time, whereas the vmalloc + * region is shadowed by KASAN zero pages. So keep modules + * out of the vmalloc region if KASAN is enabled without + * KASAN_VMALLOC, and put the kernel well within 4 GB of the + * module region. */ return offset % SZ_2G; diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c index cee0781112d40f9dd52b1fa95ad810c395c9bf3c..2e224435c0249ac28c5eb63f8577250a3ff89f40 100644 --- a/arch/arm64/kernel/module-plts.c +++ b/arch/arm64/kernel/module-plts.c @@ -7,7 +7,6 @@ #include #include #include -#include #include static struct plt_entry __get_adrp_add_pair(u64 dst, u64 pc, @@ -291,7 +290,6 @@ static int partition_branch_plt_relas(Elf64_Sym *syms, Elf64_Rela *rela, int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, char *secstrings, struct module *mod) { - bool copy_rela_for_fips140 = false; unsigned long core_plts = 0; unsigned long init_plts = 0; Elf64_Sym *syms = NULL; @@ -323,10 +321,6 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, return -ENOEXEC; } - if (IS_ENABLED(CONFIG_CRYPTO_FIPS140) && - !strcmp(mod->name, "fips140")) - copy_rela_for_fips140 = true; - for (i = 0; i < ehdr->e_shnum; i++) { Elf64_Rela *rels = (void *)ehdr + sechdrs[i].sh_offset; int nents, numrels = sechdrs[i].sh_size / sizeof(Elf64_Rela); @@ -335,38 +329,10 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, if (sechdrs[i].sh_type != SHT_RELA) continue; -#ifdef CONFIG_CRYPTO_FIPS140 - if (copy_rela_for_fips140 && - !strcmp(secstrings + dstsec->sh_name, ".rodata")) { - void *p = kmemdup(rels, numrels * sizeof(Elf64_Rela), - GFP_KERNEL); - if (!p) { - pr_err("fips140: failed to allocate .rodata RELA buffer\n"); - return -ENOMEM; - } - mod->arch.rodata_relocations = p; - mod->arch.num_rodata_relocations = numrels; - } -#endif - /* ignore relocations that operate on non-exec sections */ if (!(dstsec->sh_flags & SHF_EXECINSTR)) continue; -#ifdef CONFIG_CRYPTO_FIPS140 - if (copy_rela_for_fips140 && - !strcmp(secstrings + dstsec->sh_name, ".text")) { - void *p = kmemdup(rels, numrels * sizeof(Elf64_Rela), - GFP_KERNEL); - if (!p) { - pr_err("fips140: failed to allocate .text RELA buffer\n"); - return -ENOMEM; - } - mod->arch.text_relocations = p; - mod->arch.num_text_relocations = numrels; - } -#endif - /* * sort branch relocations requiring a PLT by type, symbol index * and addend diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index fe21e0f06492ee5e4328288b191e25d08ef28d08..f2d4bb14bfabe28e7fa79333c6c290bc9a3043c4 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -36,18 +36,20 @@ void *module_alloc(unsigned long size) module_alloc_end = MODULES_END; p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base, - module_alloc_end, gfp_mask, PAGE_KERNEL, 0, + module_alloc_end, gfp_mask, PAGE_KERNEL, VM_DEFER_KMEMLEAK, NUMA_NO_NODE, __builtin_return_address(0)); if (!p && IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) && - !IS_ENABLED(CONFIG_KASAN_GENERIC) && - !IS_ENABLED(CONFIG_KASAN_SW_TAGS)) + (IS_ENABLED(CONFIG_KASAN_VMALLOC) || + (!IS_ENABLED(CONFIG_KASAN_GENERIC) && + !IS_ENABLED(CONFIG_KASAN_SW_TAGS)))) /* - * KASAN can only deal with module allocations being served - * from the reserved module region, since the remainder of - * the vmalloc region is already backed by zero shadow pages, - * and punching holes into it is non-trivial. Since the module - * region is not randomized when KASAN is enabled, it is even + * KASAN without KASAN_VMALLOC can only deal with module + * allocations being served from the reserved module region, + * since the remainder of the vmalloc region is already + * backed by zero shadow pages, and punching holes into it + * is non-trivial. Since the module region is not randomized + * when KASAN is enabled without KASAN_VMALLOC, it is even * less likely that the module region gets exhausted, so we * can simply omit this fallback in that case. */ @@ -56,12 +58,13 @@ void *module_alloc(unsigned long size) PAGE_KERNEL, 0, NUMA_NO_NODE, __builtin_return_address(0)); - if (p && (kasan_module_alloc(p, size) < 0)) { + if (p && (kasan_alloc_module_shadow(p, size, gfp_mask) < 0)) { vfree(p); return NULL; } - return p; + /* Memory is intended to be executable, reset the pointer tag. */ + return kasan_reset_tag(p); } enum aarch64_reloc_op { diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index 20a40e5a14fa60ff794c34902ac4eeee1066edf2..2b35b706a7baadcfe2ad596e9b759302eed3fd09 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -23,16 +23,15 @@ #include #include -u64 gcr_kernel_excl __ro_after_init; - -static bool report_fault_once = true; - static DEFINE_PER_CPU_READ_MOSTLY(u64, mte_tcf_preferred); #ifdef CONFIG_KASAN_HW_TAGS -/* Whether the MTE asynchronous mode is enabled. */ -DEFINE_STATIC_KEY_FALSE(mte_async_mode); -EXPORT_SYMBOL_GPL(mte_async_mode); +/* + * The asynchronous and asymmetric MTE modes have the same behavior for + * store operations. This flag is set when either of these modes is enabled. + */ +DEFINE_STATIC_KEY_FALSE(mte_async_or_asymm_mode); +EXPORT_SYMBOL_GPL(mte_async_or_asymm_mode); #endif static void mte_sync_page_tags(struct page *page, pte_t old_pte, @@ -104,26 +103,6 @@ int memcmp_pages(struct page *page1, struct page *page2) return ret; } -void mte_init_tags(u64 max_tag) -{ - static bool gcr_kernel_excl_initialized; - - if (!gcr_kernel_excl_initialized) { - /* - * The format of the tags in KASAN is 0xFF and in MTE is 0xF. - * This conversion extracts an MTE tag from a KASAN tag. - */ - u64 incl = GENMASK(FIELD_GET(MTE_TAG_MASK >> MTE_TAG_SHIFT, - max_tag), 0); - - gcr_kernel_excl = ~incl & SYS_GCR_EL1_EXCL_MASK; - gcr_kernel_excl_initialized = true; - } - - /* Enable the kernel exclude mask for random tags generation. */ - write_sysreg_s(SYS_GCR_EL1_RRND | gcr_kernel_excl, SYS_GCR_EL1); -} - static inline void __mte_enable_kernel(const char *mode, unsigned long tcf) { /* Enable MTE Sync Mode for EL1. */ @@ -140,7 +119,7 @@ void mte_enable_kernel_sync(void) * Make sure we enter this function when no PE has set * async mode previously. */ - WARN_ONCE(system_uses_mte_async_mode(), + WARN_ONCE(system_uses_mte_async_or_asymm_mode(), "MTE async mode enabled system wide!"); __mte_enable_kernel("synchronous", SCTLR_ELx_TCF_SYNC); @@ -158,30 +137,41 @@ void mte_enable_kernel_async(void) * mode in between sync and async, this strategy needs * to be reviewed. */ - if (!system_uses_mte_async_mode()) - static_branch_enable(&mte_async_mode); + if (!system_uses_mte_async_or_asymm_mode()) + static_branch_enable(&mte_async_or_asymm_mode); } -#endif -void mte_set_report_once(bool state) +void mte_enable_kernel_asymm(void) { - WRITE_ONCE(report_fault_once, state); -} + if (cpus_have_cap(ARM64_MTE_ASYMM)) { + __mte_enable_kernel("asymmetric", SCTLR_ELx_TCF_ASYMM); -bool mte_report_once(void) -{ - return READ_ONCE(report_fault_once); + /* + * MTE asymm mode behaves as async mode for store + * operations. The mode is set system wide by the + * first PE that executes this function. + * + * Note: If in future KASAN acquires a runtime switching + * mode in between sync and async, this strategy needs + * to be reviewed. + */ + if (!system_uses_mte_async_or_asymm_mode()) + static_branch_enable(&mte_async_or_asymm_mode); + } else { + /* + * If the CPU does not support MTE asymmetric mode the + * kernel falls back on synchronous mode which is the + * default for kasan=on. + */ + mte_enable_kernel_sync(); + } } +#endif #ifdef CONFIG_KASAN_HW_TAGS void mte_check_tfsr_el1(void) { - u64 tfsr_el1; - - if (!system_supports_mte()) - return; - - tfsr_el1 = read_sysreg_s(SYS_TFSR_EL1); + u64 tfsr_el1 = read_sysreg_s(SYS_TFSR_EL1); if (unlikely(tfsr_el1 & SYS_TFSR_EL1_TF1)) { /* @@ -196,6 +186,11 @@ void mte_check_tfsr_el1(void) } #endif +/* + * This is where we actually resolve the system and process MTE mode + * configuration into an actual value in SCTLR_EL1 that affects + * userspace. + */ static void mte_update_sctlr_user(struct task_struct *task) { /* @@ -209,15 +204,50 @@ static void mte_update_sctlr_user(struct task_struct *task) unsigned long pref, resolved_mte_tcf; pref = __this_cpu_read(mte_tcf_preferred); + /* + * If there is no overlap between the system preferred and + * program requested values go with what was requested. + */ resolved_mte_tcf = (mte_ctrl & pref) ? pref : mte_ctrl; sctlr &= ~SCTLR_EL1_TCF0_MASK; - if (resolved_mte_tcf & MTE_CTRL_TCF_ASYNC) + /* + * Pick an actual setting. The order in which we check for + * set bits and map into register values determines our + * default order. + */ + if (resolved_mte_tcf & MTE_CTRL_TCF_ASYMM) + sctlr |= SCTLR_EL1_TCF0_ASYMM; + else if (resolved_mte_tcf & MTE_CTRL_TCF_ASYNC) sctlr |= SCTLR_EL1_TCF0_ASYNC; else if (resolved_mte_tcf & MTE_CTRL_TCF_SYNC) sctlr |= SCTLR_EL1_TCF0_SYNC; task->thread.sctlr_user = sctlr; } +static void mte_update_gcr_excl(struct task_struct *task) +{ + /* + * SYS_GCR_EL1 will be set to current->thread.mte_ctrl value by + * mte_set_user_gcr() in kernel_exit, but only if KASAN is enabled. + */ + if (kasan_hw_tags_enabled()) + return; + + write_sysreg_s( + ((task->thread.mte_ctrl >> MTE_CTRL_GCR_USER_EXCL_SHIFT) & + SYS_GCR_EL1_EXCL_MASK) | SYS_GCR_EL1_RRND, + SYS_GCR_EL1); +} + +void __init kasan_hw_tags_enable(struct alt_instr *alt, __le32 *origptr, + __le32 *updptr, int nr_inst) +{ + BUG_ON(nr_inst != 1); /* Branch -> NOP */ + + if (kasan_hw_tags_enabled()) + *updptr = cpu_to_le32(aarch64_insn_gen_nop()); +} + void mte_thread_init_user(void) { if (!system_supports_mte()) @@ -233,7 +263,11 @@ void mte_thread_init_user(void) void mte_thread_switch(struct task_struct *next) { + if (!system_supports_mte()) + return; + mte_update_sctlr_user(next); + mte_update_gcr_excl(next); /* * Check if an async tag exception occurred at EL1. @@ -262,15 +296,6 @@ void mte_suspend_enter(void) mte_check_tfsr_el1(); } -void mte_suspend_exit(void) -{ - if (!system_supports_mte()) - return; - - sysreg_clear_set_s(SYS_GCR_EL1, SYS_GCR_EL1_EXCL_MASK, gcr_kernel_excl); - isb(); -} - long set_mte_ctrl(struct task_struct *task, unsigned long arg) { u64 mte_ctrl = (~((arg & PR_MTE_TAG_MASK) >> PR_MTE_TAG_SHIFT) & @@ -284,10 +309,22 @@ long set_mte_ctrl(struct task_struct *task, unsigned long arg) if (arg & PR_MTE_TCF_SYNC) mte_ctrl |= MTE_CTRL_TCF_SYNC; + /* + * If the system supports it and both sync and async modes are + * specified then implicitly enable asymmetric mode. + * Userspace could see a mix of both sync and async anyway due + * to differing or changing defaults on CPUs. + */ + if (cpus_have_cap(ARM64_MTE_ASYMM) && + (arg & PR_MTE_TCF_ASYNC) && + (arg & PR_MTE_TCF_SYNC)) + mte_ctrl |= MTE_CTRL_TCF_ASYMM; + task->thread.mte_ctrl = mte_ctrl; if (task == current) { preempt_disable(); mte_update_sctlr_user(task); + mte_update_gcr_excl(task); update_sctlr_el1(task->thread.sctlr_user); preempt_enable(); } @@ -457,6 +494,8 @@ static ssize_t mte_tcf_preferred_show(struct device *dev, return sysfs_emit(buf, "async\n"); case MTE_CTRL_TCF_SYNC: return sysfs_emit(buf, "sync\n"); + case MTE_CTRL_TCF_ASYMM: + return sysfs_emit(buf, "asymm\n"); default: return sysfs_emit(buf, "???\n"); } @@ -472,6 +511,8 @@ static ssize_t mte_tcf_preferred_store(struct device *dev, tcf = MTE_CTRL_TCF_ASYNC; else if (sysfs_streq(buf, "sync")) tcf = MTE_CTRL_TCF_SYNC; + else if (cpus_have_cap(ARM64_MTE_ASYMM) && sysfs_streq(buf, "asymm")) + tcf = MTE_CTRL_TCF_ASYMM; else return -EINVAL; diff --git a/arch/arm64/kernel/perf_callchain.c b/arch/arm64/kernel/perf_callchain.c index 88ff471b0bce5f2c49cb49cd38a3603fc952b2aa..86d9f20131723c2cd5fd100d5130c6708d7e1b81 100644 --- a/arch/arm64/kernel/perf_callchain.c +++ b/arch/arm64/kernel/perf_callchain.c @@ -102,7 +102,9 @@ compat_user_backtrace(struct compat_frame_tail __user *tail, void perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs(); + + if (guest_cbs && guest_cbs->is_in_guest()) { /* We don't support guest os callchain now */ return; } @@ -116,7 +118,7 @@ void perf_callchain_user(struct perf_callchain_entry_ctx *entry, tail = (struct frame_tail __user *)regs->regs[29]; while (entry->nr < entry->max_stack && - tail && !((unsigned long)tail & 0xf)) + tail && !((unsigned long)tail & 0x7)) tail = user_backtrace(tail, entry); } else { #ifdef CONFIG_COMPAT @@ -147,9 +149,10 @@ static bool callchain_trace(void *data, unsigned long pc) void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { + struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs(); struct stackframe frame; - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + if (guest_cbs && guest_cbs->is_in_guest()) { /* We don't support guest os callchain now */ return; } @@ -160,18 +163,21 @@ void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, unsigned long perf_instruction_pointer(struct pt_regs *regs) { - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) - return perf_guest_cbs->get_guest_ip(); + struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs(); + + if (guest_cbs && guest_cbs->is_in_guest()) + return guest_cbs->get_guest_ip(); return instruction_pointer(regs); } unsigned long perf_misc_flags(struct pt_regs *regs) { + struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs(); int misc = 0; - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { - if (perf_guest_cbs->is_user_mode()) + if (guest_cbs && guest_cbs->is_in_guest()) { + if (guest_cbs->is_user_mode()) misc |= PERF_RECORD_MISC_GUEST_USER; else misc |= PERF_RECORD_MISC_GUEST_KERNEL; diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index f4f14a9978ec80a108e6477d4290f70cec8b9563..00bff856d496c3fac5b754d910c7fcfd40fcf118 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -507,34 +507,26 @@ static void entry_task_switch(struct task_struct *next) /* * ARM erratum 1418040 handling, affecting the 32bit view of CNTVCT. - * Assuming the virtual counter is enabled at the beginning of times: - * - * - disable access when switching from a 64bit task to a 32bit task - * - enable access when switching from a 32bit task to a 64bit task + * Ensure access is disabled when switching to a 32bit task, ensure + * access is enabled when switching to a 64bit task. */ -static void erratum_1418040_thread_switch(struct task_struct *prev, - struct task_struct *next) +static void erratum_1418040_thread_switch(struct task_struct *next) { - bool prev32, next32; - u64 val; - - if (!IS_ENABLED(CONFIG_ARM64_ERRATUM_1418040)) - return; - - prev32 = is_compat_thread(task_thread_info(prev)); - next32 = is_compat_thread(task_thread_info(next)); - - if (prev32 == next32 || !this_cpu_has_cap(ARM64_WORKAROUND_1418040)) + if (!IS_ENABLED(CONFIG_ARM64_ERRATUM_1418040) || + !this_cpu_has_cap(ARM64_WORKAROUND_1418040)) return; - val = read_sysreg(cntkctl_el1); - - if (!next32) - val |= ARCH_TIMER_USR_VCT_ACCESS_EN; + if (is_compat_thread(task_thread_info(next))) + sysreg_clear_set(cntkctl_el1, ARCH_TIMER_USR_VCT_ACCESS_EN, 0); else - val &= ~ARCH_TIMER_USR_VCT_ACCESS_EN; + sysreg_clear_set(cntkctl_el1, 0, ARCH_TIMER_USR_VCT_ACCESS_EN); +} - write_sysreg(val, cntkctl_el1); +static void erratum_1418040_new_exec(void) +{ + preempt_disable(); + erratum_1418040_thread_switch(current); + preempt_enable(); } /* @@ -570,7 +562,7 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev, entry_task_switch(next); uao_thread_switch(next); ssbs_thread_switch(next); - erratum_1418040_thread_switch(prev, next); + erratum_1418040_thread_switch(next); ptrauth_thread_switch_user(next); /* * vendor hook is needed before the dsb(), @@ -664,6 +656,7 @@ void arch_setup_new_exec(void) current->mm->context.flags = mmflags; ptrauth_thread_init_user(); + erratum_1418040_new_exec(); mte_thread_init_user(); if (task_spec_ssb_noexec(current)) { @@ -687,7 +680,8 @@ long set_tagged_addr_ctrl(struct task_struct *task, unsigned long arg) return -EINVAL; if (system_supports_mte()) - valid_mask |= PR_MTE_TCF_MASK | PR_MTE_TAG_MASK; + valid_mask |= PR_MTE_TCF_SYNC | PR_MTE_TCF_ASYNC \ + | PR_MTE_TAG_MASK; if (arg & ~valid_mask) return -EINVAL; diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c index 902e4084c4775251fa77a4aecdb6617969b31784..6d45c63c6454884ac7c13c8dd962d0018e7a7e6a 100644 --- a/arch/arm64/kernel/proton-pack.c +++ b/arch/arm64/kernel/proton-pack.c @@ -18,15 +18,18 @@ */ #include +#include #include #include #include #include #include +#include #include #include #include +#include #include /* @@ -96,14 +99,51 @@ static bool spectre_v2_mitigations_off(void) return ret; } +static const char *get_bhb_affected_string(enum mitigation_state bhb_state) +{ + switch (bhb_state) { + case SPECTRE_UNAFFECTED: + return ""; + default: + case SPECTRE_VULNERABLE: + return ", but not BHB"; + case SPECTRE_MITIGATED: + return ", BHB"; + } +} + +static bool _unprivileged_ebpf_enabled(void) +{ +#ifdef CONFIG_BPF_SYSCALL + return !sysctl_unprivileged_bpf_disabled; +#else + return false; +#endif +} + ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf) { + enum mitigation_state bhb_state = arm64_get_spectre_bhb_state(); + const char *bhb_str = get_bhb_affected_string(bhb_state); + const char *v2_str = "Branch predictor hardening"; + switch (spectre_v2_state) { case SPECTRE_UNAFFECTED: - return sprintf(buf, "Not affected\n"); + if (bhb_state == SPECTRE_UNAFFECTED) + return sprintf(buf, "Not affected\n"); + + /* + * Platforms affected by Spectre-BHB can't report + * "Not affected" for Spectre-v2. + */ + v2_str = "CSV2"; + fallthrough; case SPECTRE_MITIGATED: - return sprintf(buf, "Mitigation: Branch predictor hardening\n"); + if (bhb_state == SPECTRE_MITIGATED && _unprivileged_ebpf_enabled()) + return sprintf(buf, "Vulnerable: Unprivileged eBPF enabled\n"); + + return sprintf(buf, "Mitigation: %s%s\n", v2_str, bhb_str); case SPECTRE_VULNERABLE: fallthrough; default: @@ -554,9 +594,9 @@ void __init spectre_v4_patch_fw_mitigation_enable(struct alt_instr *alt, * Patch a NOP in the Spectre-v4 mitigation code with an SMC/HVC instruction * to call into firmware to adjust the mitigation state. */ -void __init spectre_v4_patch_fw_mitigation_conduit(struct alt_instr *alt, - __le32 *origptr, - __le32 *updptr, int nr_inst) +void __init smccc_patch_fw_mitigation_conduit(struct alt_instr *alt, + __le32 *origptr, + __le32 *updptr, int nr_inst) { u32 insn; @@ -770,3 +810,344 @@ int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) return -ENODEV; } } + +/* + * Spectre BHB. + * + * A CPU is either: + * - Mitigated by a branchy loop a CPU specific number of times, and listed + * in our "loop mitigated list". + * - Mitigated in software by the firmware Spectre v2 call. + * - Has the ClearBHB instruction to perform the mitigation. + * - Has the 'Exception Clears Branch History Buffer' (ECBHB) feature, so no + * software mitigation in the vectors is needed. + * - Has CSV2.3, so is unaffected. + */ +static enum mitigation_state spectre_bhb_state; + +enum mitigation_state arm64_get_spectre_bhb_state(void) +{ + return spectre_bhb_state; +} + +enum bhb_mitigation_bits { + BHB_LOOP, + BHB_FW, + BHB_HW, + BHB_INSN, +}; +static unsigned long system_bhb_mitigations; + +/* + * This must be called with SCOPE_LOCAL_CPU for each type of CPU, before any + * SCOPE_SYSTEM call will give the right answer. + */ +u8 spectre_bhb_loop_affected(int scope) +{ + u8 k = 0; + static u8 max_bhb_k; + + if (scope == SCOPE_LOCAL_CPU) { + static const struct midr_range spectre_bhb_k32_list[] = { + MIDR_ALL_VERSIONS(MIDR_CORTEX_A78), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A78C), + MIDR_ALL_VERSIONS(MIDR_CORTEX_X1), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A710), + MIDR_ALL_VERSIONS(MIDR_CORTEX_X2), + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1), + {}, + }; + static const struct midr_range spectre_bhb_k24_list[] = { + MIDR_ALL_VERSIONS(MIDR_CORTEX_A76), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A77), + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1), + {}, + }; + static const struct midr_range spectre_bhb_k8_list[] = { + MIDR_ALL_VERSIONS(MIDR_CORTEX_A72), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A57), + {}, + }; + + if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k32_list)) + k = 32; + else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k24_list)) + k = 24; + else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k8_list)) + k = 8; + + max_bhb_k = max(max_bhb_k, k); + } else { + k = max_bhb_k; + } + + return k; +} + +static enum mitigation_state spectre_bhb_get_cpu_fw_mitigation_state(void) +{ + int ret; + struct arm_smccc_res res; + + arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, + ARM_SMCCC_ARCH_WORKAROUND_3, &res); + + ret = res.a0; + switch (ret) { + case SMCCC_RET_SUCCESS: + return SPECTRE_MITIGATED; + case SMCCC_ARCH_WORKAROUND_RET_UNAFFECTED: + return SPECTRE_UNAFFECTED; + default: + fallthrough; + case SMCCC_RET_NOT_SUPPORTED: + return SPECTRE_VULNERABLE; + } +} + +static bool is_spectre_bhb_fw_affected(int scope) +{ + static bool system_affected; + enum mitigation_state fw_state; + bool has_smccc = arm_smccc_1_1_get_conduit() != SMCCC_CONDUIT_NONE; + static const struct midr_range spectre_bhb_firmware_mitigated_list[] = { + MIDR_ALL_VERSIONS(MIDR_CORTEX_A73), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A75), + {}, + }; + bool cpu_in_list = is_midr_in_range_list(read_cpuid_id(), + spectre_bhb_firmware_mitigated_list); + + if (scope != SCOPE_LOCAL_CPU) + return system_affected; + + fw_state = spectre_bhb_get_cpu_fw_mitigation_state(); + if (cpu_in_list || (has_smccc && fw_state == SPECTRE_MITIGATED)) { + system_affected = true; + return true; + } + + return false; +} + +static bool supports_ecbhb(int scope) +{ + u64 mmfr1; + + if (scope == SCOPE_LOCAL_CPU) + mmfr1 = read_sysreg_s(SYS_ID_AA64MMFR1_EL1); + else + mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); + + return cpuid_feature_extract_unsigned_field(mmfr1, + ID_AA64MMFR1_ECBHB_SHIFT); +} + +bool is_spectre_bhb_affected(const struct arm64_cpu_capabilities *entry, + int scope) +{ + WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); + + if (supports_csv2p3(scope)) + return false; + + if (supports_clearbhb(scope)) + return true; + + if (spectre_bhb_loop_affected(scope)) + return true; + + if (is_spectre_bhb_fw_affected(scope)) + return true; + + return false; +} + +static void this_cpu_set_vectors(enum arm64_bp_harden_el1_vectors slot) +{ + const char *v = arm64_get_bp_hardening_vector(slot); + + if (slot < 0) + return; + + __this_cpu_write(this_cpu_vector, v); + + /* + * When KPTI is in use, the vectors are switched when exiting to + * user-space. + */ + if (arm64_kernel_unmapped_at_el0()) + return; + + write_sysreg(v, vbar_el1); + isb(); +} + +void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *entry) +{ + bp_hardening_cb_t cpu_cb; + enum mitigation_state fw_state, state = SPECTRE_VULNERABLE; + struct bp_hardening_data *data = this_cpu_ptr(&bp_hardening_data); + + if (!is_spectre_bhb_affected(entry, SCOPE_LOCAL_CPU)) + return; + + if (arm64_get_spectre_v2_state() == SPECTRE_VULNERABLE) { + /* No point mitigating Spectre-BHB alone. */ + } else if (!IS_ENABLED(CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY)) { + pr_info_once("spectre-bhb mitigation disabled by compile time option\n"); + } else if (cpu_mitigations_off()) { + pr_info_once("spectre-bhb mitigation disabled by command line option\n"); + } else if (supports_ecbhb(SCOPE_LOCAL_CPU)) { + state = SPECTRE_MITIGATED; + set_bit(BHB_HW, &system_bhb_mitigations); + } else if (supports_clearbhb(SCOPE_LOCAL_CPU)) { + /* + * Ensure KVM uses the indirect vector which will have ClearBHB + * added. + */ + if (!data->slot) + data->slot = HYP_VECTOR_INDIRECT; + + this_cpu_set_vectors(EL1_VECTOR_BHB_CLEAR_INSN); + state = SPECTRE_MITIGATED; + set_bit(BHB_INSN, &system_bhb_mitigations); + } else if (spectre_bhb_loop_affected(SCOPE_LOCAL_CPU)) { + /* + * Ensure KVM uses the indirect vector which will have the + * branchy-loop added. A57/A72-r0 will already have selected + * the spectre-indirect vector, which is sufficient for BHB + * too. + */ + if (!data->slot) + data->slot = HYP_VECTOR_INDIRECT; + + this_cpu_set_vectors(EL1_VECTOR_BHB_LOOP); + state = SPECTRE_MITIGATED; + set_bit(BHB_LOOP, &system_bhb_mitigations); + } else if (is_spectre_bhb_fw_affected(SCOPE_LOCAL_CPU)) { + fw_state = spectre_bhb_get_cpu_fw_mitigation_state(); + if (fw_state == SPECTRE_MITIGATED) { + /* + * Ensure KVM uses one of the spectre bp_hardening + * vectors. The indirect vector doesn't include the EL3 + * call, so needs upgrading to + * HYP_VECTOR_SPECTRE_INDIRECT. + */ + if (!data->slot || data->slot == HYP_VECTOR_INDIRECT) + data->slot += 1; + + this_cpu_set_vectors(EL1_VECTOR_BHB_FW); + + /* + * The WA3 call in the vectors supersedes the WA1 call + * made during context-switch. Uninstall any firmware + * bp_hardening callback. + */ + cpu_cb = spectre_v2_get_sw_mitigation_cb(); + if (__this_cpu_read(bp_hardening_data.fn) != cpu_cb) + __this_cpu_write(bp_hardening_data.fn, NULL); + + state = SPECTRE_MITIGATED; + set_bit(BHB_FW, &system_bhb_mitigations); + } + } + + update_mitigation_state(&spectre_bhb_state, state); +} + +/* Patched to NOP when enabled */ +void noinstr spectre_bhb_patch_loop_mitigation_enable(struct alt_instr *alt, + __le32 *origptr, + __le32 *updptr, int nr_inst) +{ + BUG_ON(nr_inst != 1); + + if (test_bit(BHB_LOOP, &system_bhb_mitigations)) + *updptr++ = cpu_to_le32(aarch64_insn_gen_nop()); +} + +/* Patched to NOP when enabled */ +void noinstr spectre_bhb_patch_fw_mitigation_enabled(struct alt_instr *alt, + __le32 *origptr, + __le32 *updptr, int nr_inst) +{ + BUG_ON(nr_inst != 1); + + if (test_bit(BHB_FW, &system_bhb_mitigations)) + *updptr++ = cpu_to_le32(aarch64_insn_gen_nop()); +} + +/* Patched to correct the immediate */ +void noinstr spectre_bhb_patch_loop_iter(struct alt_instr *alt, + __le32 *origptr, __le32 *updptr, int nr_inst) +{ + u8 rd; + u32 insn; + u16 loop_count = spectre_bhb_loop_affected(SCOPE_SYSTEM); + + BUG_ON(nr_inst != 1); /* MOV -> MOV */ + + if (!IS_ENABLED(CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY)) + return; + + insn = le32_to_cpu(*origptr); + rd = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RD, insn); + insn = aarch64_insn_gen_movewide(rd, loop_count, 0, + AARCH64_INSN_VARIANT_64BIT, + AARCH64_INSN_MOVEWIDE_ZERO); + *updptr++ = cpu_to_le32(insn); +} + +/* Patched to mov WA3 when supported */ +void noinstr spectre_bhb_patch_wa3(struct alt_instr *alt, + __le32 *origptr, __le32 *updptr, int nr_inst) +{ + u8 rd; + u32 insn; + + BUG_ON(nr_inst != 1); /* MOV -> MOV */ + + if (!IS_ENABLED(CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY) || + !test_bit(BHB_FW, &system_bhb_mitigations)) + return; + + insn = le32_to_cpu(*origptr); + rd = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RD, insn); + + insn = aarch64_insn_gen_logical_immediate(AARCH64_INSN_LOGIC_ORR, + AARCH64_INSN_VARIANT_32BIT, + AARCH64_INSN_REG_ZR, rd, + ARM_SMCCC_ARCH_WORKAROUND_3); + if (WARN_ON_ONCE(insn == AARCH64_BREAK_FAULT)) + return; + + *updptr++ = cpu_to_le32(insn); +} + +/* Patched to NOP when not supported */ +void __init spectre_bhb_patch_clearbhb(struct alt_instr *alt, + __le32 *origptr, __le32 *updptr, int nr_inst) +{ + BUG_ON(nr_inst != 2); + + if (test_bit(BHB_INSN, &system_bhb_mitigations)) + return; + + *updptr++ = cpu_to_le32(aarch64_insn_gen_nop()); + *updptr++ = cpu_to_le32(aarch64_insn_gen_nop()); +} + +#ifdef CONFIG_BPF_SYSCALL +#define EBPF_WARN "Unprivileged eBPF is enabled, data leaks possible via Spectre v2 BHB attacks!\n" +void unpriv_ebpf_notify(int new_state) +{ + if (spectre_v2_state == SPECTRE_VULNERABLE || + spectre_bhb_state != SPECTRE_MITIGATED) + return; + + if (!new_state) + pr_err("WARNING: %s", EBPF_WARN); +} +#endif diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index bbb94747bfeeacb7683d4672fad1d7a57477c190..d59d8aa44863e796270d750ab2620a1243ffb6c3 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -122,7 +122,7 @@ static bool regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr) { return ((addr & ~(THREAD_SIZE - 1)) == (kernel_stack_pointer(regs) & ~(THREAD_SIZE - 1))) || - on_irq_stack(addr, NULL); + on_irq_stack(addr, sizeof(unsigned long), NULL); } /** diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c index 3f129b40d0db919bff7d6c41aad24e249481aa39..618a480a90d076611d3d0f8d1073e944af8029f2 100644 --- a/arch/arm64/kernel/sdei.c +++ b/arch/arm64/kernel/sdei.c @@ -162,31 +162,33 @@ static int init_sdei_scs(void) return err; } -static bool on_sdei_normal_stack(unsigned long sp, struct stack_info *info) +static bool on_sdei_normal_stack(unsigned long sp, unsigned long size, + struct stack_info *info) { unsigned long low = (unsigned long)raw_cpu_read(sdei_stack_normal_ptr); unsigned long high = low + SDEI_STACK_SIZE; - return on_stack(sp, low, high, STACK_TYPE_SDEI_NORMAL, info); + return on_stack(sp, size, low, high, STACK_TYPE_SDEI_NORMAL, info); } -static bool on_sdei_critical_stack(unsigned long sp, struct stack_info *info) +static bool on_sdei_critical_stack(unsigned long sp, unsigned long size, + struct stack_info *info) { unsigned long low = (unsigned long)raw_cpu_read(sdei_stack_critical_ptr); unsigned long high = low + SDEI_STACK_SIZE; - return on_stack(sp, low, high, STACK_TYPE_SDEI_CRITICAL, info); + return on_stack(sp, size, low, high, STACK_TYPE_SDEI_CRITICAL, info); } -bool _on_sdei_stack(unsigned long sp, struct stack_info *info) +bool _on_sdei_stack(unsigned long sp, unsigned long size, struct stack_info *info) { if (!IS_ENABLED(CONFIG_VMAP_STACK)) return false; - if (on_sdei_critical_stack(sp, info)) + if (on_sdei_critical_stack(sp, size, info)) return true; - if (on_sdei_normal_stack(sp, info)) + if (on_sdei_normal_stack(sp, size, info)) return true; return false; diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index a6ec917a12407612f1382557196b51239a9c279f..549071115e1b98174705e51c816a88b97bee4836 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include @@ -48,6 +49,7 @@ #include #include #include +#include #include #include @@ -453,3 +455,9 @@ static int __init register_arm64_panic_block(void) return 0; } device_initcall(register_arm64_panic_block); + +void kvm_arm_init_hyp_services(void) +{ + kvm_init_ioremap_services(); + kvm_init_memshare_services(); +} diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 1bcf403a8e67370117a877aa724828600ea7d7b4..fd9c0ce1b64576eb309b4b75adc6e03a63d96e34 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -232,7 +232,6 @@ asmlinkage notrace void secondary_start_kernel(void) init_gic_priority_masking(); rcu_cpu_starting(cpu); - preempt_disable(); trace_hardirqs_off(); /* @@ -1146,7 +1145,8 @@ bool cpus_are_stuck_in_kernel(void) { bool smp_spin_tables = (num_possible_cpus() > 1 && !have_cpu_die()); - return !!cpus_stuck_in_kernel || smp_spin_tables; + return !!cpus_stuck_in_kernel || smp_spin_tables || + is_protected_kvm_enabled(); } int nr_ipi_get(void) diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index a3367829b154f1e317f2ebc4f69e35bb587e0f1b..954cd44be81c56901b14b59ea44f8eb78b52db99 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -44,13 +44,13 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame) unsigned long fp = frame->fp; struct stack_info info; - if (fp & 0xf) + if (fp & 0x7) return -EINVAL; if (!tsk) tsk = current; - if (!on_accessible_stack(tsk, fp, &info)) + if (!on_accessible_stack(tsk, fp, 16, &info)) return -EINVAL; if (test_bit(info.type, frame->stacks_done)) diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index 1e8dc6f7d178e29f0c1f73c79c5f01561a5526a6..ea019a852adc803dfb8dfd51760a4d53682adf46 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -76,7 +76,6 @@ void notrace __cpu_suspend_exit(void) spectre_v4_enable_mitigation(NULL); /* Restore additional feature-specific configuration */ - mte_suspend_exit(); ptrauth_suspend_exit(); } diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile index 90f13cbad88418952359f5dc91c1f5bef58cfc05..57b28f1e5d97c7dcff1a8b8ee21189e66fb36b8d 100644 --- a/arch/arm64/kernel/vdso32/Makefile +++ b/arch/arm64/kernel/vdso32/Makefile @@ -10,18 +10,15 @@ include $(srctree)/lib/vdso/Makefile # Same as cc-*option, but using CC_COMPAT instead of CC ifeq ($(CONFIG_CC_IS_CLANG), y) -CC_COMPAT_CLANG_FLAGS := --target=$(notdir $(CROSS_COMPILE_COMPAT:%-=%)) - CC_COMPAT ?= $(CC) -CC_COMPAT += $(CC_COMPAT_CLANG_FLAGS) - -ifneq ($(LLVM),) -LD_COMPAT ?= $(LD) +CC_COMPAT += --target=arm-linux-gnueabi else -LD_COMPAT ?= $(CROSS_COMPILE_COMPAT)ld +CC_COMPAT ?= $(CROSS_COMPILE_COMPAT)gcc endif + +ifeq ($(CONFIG_LD_IS_LLD), y) +LD_COMPAT ?= $(LD) else -CC_COMPAT ?= $(CROSS_COMPILE_COMPAT)gcc LD_COMPAT ?= $(CROSS_COMPILE_COMPAT)ld endif @@ -40,16 +37,13 @@ cc32-as-instr = $(call try-run,\ # As a result we set our own flags here. # KBUILD_CPPFLAGS and NOSTDINC_FLAGS from top-level Makefile -VDSO_CPPFLAGS := -DBUILD_VDSO -D__KERNEL__ -nostdinc -isystem $(shell $(CC_COMPAT) -print-file-name=include) +VDSO_CPPFLAGS := -DBUILD_VDSO -D__KERNEL__ -nostdinc +VDSO_CPPFLAGS += -isystem $(shell $(CC_COMPAT) -print-file-name=include 2>/dev/null) VDSO_CPPFLAGS += $(LINUXINCLUDE) # Common C and assembly flags # From top-level Makefile VDSO_CAFLAGS := $(VDSO_CPPFLAGS) -ifneq ($(shell $(CC_COMPAT) --version 2>&1 | head -n 1 | grep clang),) -VDSO_CAFLAGS += --target=$(notdir $(CROSS_COMPILE_COMPAT:%-=%)) -endif - VDSO_CAFLAGS += $(call cc32-option,-fno-PIE) ifdef CONFIG_DEBUG_INFO VDSO_CAFLAGS += -g diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index b00961be177d0f8bdb23688c5d33f91bc2fdaf0e..ec52e85131f3765942f8b963bd0c1e359f5a84f3 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -13,7 +13,7 @@ *(__kvm_ex_table) \ __stop___kvm_ex_table = .; -#define HYPERVISOR_DATA_SECTIONS \ +#define HYPERVISOR_RODATA_SECTIONS \ HYP_SECTION_NAME(.rodata) : { \ . = ALIGN(PAGE_SIZE); \ __hyp_rodata_start = .; \ @@ -23,6 +23,15 @@ __hyp_rodata_end = .; \ } +#define HYPERVISOR_DATA_SECTION \ + HYP_SECTION_NAME(.data) : { \ + . = ALIGN(PAGE_SIZE); \ + __hyp_data_start = .; \ + *(HYP_SECTION_NAME(.data)) \ + . = ALIGN(PAGE_SIZE); \ + __hyp_data_end = .; \ + } + #define HYPERVISOR_PERCPU_SECTION \ . = ALIGN(PAGE_SIZE); \ HYP_SECTION_NAME(.data..percpu) : { \ @@ -51,7 +60,8 @@ #define SBSS_ALIGN PAGE_SIZE #else /* CONFIG_KVM */ #define HYPERVISOR_EXTABLE -#define HYPERVISOR_DATA_SECTIONS +#define HYPERVISOR_RODATA_SECTIONS +#define HYPERVISOR_DATA_SECTION #define HYPERVISOR_PERCPU_SECTION #define HYPERVISOR_RELOC_SECTION #define SBSS_ALIGN 0 @@ -181,7 +191,7 @@ SECTIONS /* everything from this point to __init_begin will be marked RO NX */ RO_DATA(PAGE_SIZE) - HYPERVISOR_DATA_SECTIONS + HYPERVISOR_RODATA_SECTIONS idmap_pg_dir = .; . += IDMAP_DIR_SIZE; @@ -262,6 +272,8 @@ SECTIONS _sdata = .; RW_DATA(L1_CACHE_BYTES, PAGE_SIZE, THREAD_ALIGN) + HYPERVISOR_DATA_SECTION + /* * Data written with the MMU off but read with the MMU on requires * cache lines to be invalidated, discarding up to a Cache Writeback @@ -330,7 +342,7 @@ ASSERT(__hibernate_exit_text_end - (__hibernate_exit_text_start & ~(SZ_4K - 1)) <= SZ_4K, "Hibernate exit text too big or misaligned") #endif #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 -ASSERT((__entry_tramp_text_end - __entry_tramp_text_start) == PAGE_SIZE, +ASSERT((__entry_tramp_text_end - __entry_tramp_text_start) <= 3*PAGE_SIZE, "Entry trampoline text too big") #endif #ifdef CONFIG_KVM diff --git a/arch/arm64/kvm/.gitignore b/arch/arm64/kvm/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..6182aefb830241da0cdae7354f938b650e7535af --- /dev/null +++ b/arch/arm64/kvm/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +hyp_constants.h diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index 7634c1ecc0ed6f10577d6d02c8eab099d54f08fd..408dc0ff661e0626a98b40ce5a065f068b573520 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -4,6 +4,7 @@ # source "virt/lib/Kconfig" +source "virt/kvm/Kconfig" menuconfig VIRTUALIZATION bool "Virtualization" @@ -19,7 +20,7 @@ if VIRTUALIZATION menuconfig KVM bool "Kernel-based Virtual Machine (KVM) support" - depends on OF + depends on HAVE_KVM # for TASKSTATS/TASK_DELAY_ACCT: depends on NET && MULTIUSER select MMU_NOTIFIER @@ -45,12 +46,9 @@ menuconfig KVM If unsure, say N. -if KVM - -source "virt/kvm/Kconfig" - config NVHE_EL2_DEBUG bool "Debug mode for non-VHE EL2 object" + depends on KVM help Say Y here to enable the debug mode for the non-VHE KVM EL2 object. Failure reports will BUG() in the hypervisor. This is intended for @@ -58,6 +56,19 @@ config NVHE_EL2_DEBUG If unsure, say N. -endif # KVM +config KVM_S2MPU + bool "Stage-2 Memory Protection Unit support" + depends on KVM + help + Support for the Stage-2 Memory Protection Unit (S2MPU) and Stream + Security Mapping Table (SSMT) devices in KVM. This allows the + hypervisor to restrict DMA access to its memory and the memory of + protected guests. + +config TEST_KVM_S2MPU + tristate "Test kernel module for S2MPU" + depends on KVM_S2MPU + help + Kernel module for kselftests that exercises the KVM S2MPU driver. endif # VIRTUALIZATION diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index 589921392cb12b5bca9631f85984c0f135879f84..fd09abbe25a79411943787f9822e7e0fa8d4b60a 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -8,14 +8,14 @@ ccflags-y += -I $(srctree)/$(src) KVM=../../../virt/kvm obj-$(CONFIG_KVM) += kvm.o -obj-$(CONFIG_KVM) += hyp/ +obj-$(CONFIG_KVM) += hyp/ iommu/ kvm-y := $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o \ - $(KVM)/vfio.o $(KVM)/irqchip.o \ + $(KVM)/vfio.o $(KVM)/irqchip.o iommu.o \ arm.o mmu.o mmio.o psci.o perf.o hypercalls.o pvtime.o \ inject_fault.o va_layout.o handle_exit.o \ guest.o debug.o reset.o sys_regs.o \ - vgic-sys-reg-v3.o fpsimd.o pmu.o \ + vgic-sys-reg-v3.o fpsimd.o pmu.o pkvm.o \ arch_timer.o trng.o\ vgic/vgic.o vgic/vgic-init.o \ vgic/vgic-irqfd.o vgic/vgic-v2.o \ @@ -25,3 +25,19 @@ kvm-y := $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o \ vgic/vgic-its.o vgic/vgic-debug.o kvm-$(CONFIG_HW_PERF_EVENTS) += pmu-emul.o + +always-y := hyp_constants.h hyp-constants.s + +define rule_gen_hyp_constants + $(call filechk,offsets,__HYP_CONSTANTS_H__) +endef + +CFLAGS_hyp-constants.o = -I $(srctree)/$(src)/hyp/include -D__KVM_NVHE_HYPERVISOR__=1 +$(obj)/hyp-constants.s: $(src)/hyp/hyp-constants.c FORCE + $(call if_changed_dep,cc_s_c) + +$(obj)/hyp_constants.h: $(obj)/hyp-constants.s FORCE + $(call if_changed_rule,gen_hyp_constants) + +obj-kvm := $(addprefix $(obj)/, $(kvm-y)) +$(obj-kvm): $(obj)/hyp_constants.h diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c index 3df67c127489813b8c2e5cb7e1c5285fec5388fb..b81944f25fc8b0fc9cd797094409c8ec087736fd 100644 --- a/arch/arm64/kvm/arch_timer.c +++ b/arch/arm64/kvm/arch_timer.c @@ -88,7 +88,9 @@ static u64 timer_get_offset(struct arch_timer_context *ctxt) switch(arch_timer_ctx_index(ctxt)) { case TIMER_VTIMER: - return __vcpu_sys_reg(vcpu, CNTVOFF_EL2); + if (likely(!kvm_vm_is_protected(vcpu->kvm))) + return __vcpu_sys_reg(vcpu, CNTVOFF_EL2); + fallthrough; default: return 0; } @@ -754,6 +756,9 @@ static void update_vtimer_cntvoff(struct kvm_vcpu *vcpu, u64 cntvoff) struct kvm *kvm = vcpu->kvm; struct kvm_vcpu *tmp; + if (unlikely(kvm_vm_is_protected(vcpu->kvm))) + cntvoff = 0; + mutex_lock(&kvm->lock); kvm_for_each_vcpu(i, tmp, kvm) timer_set_offset(vcpu_vtimer(tmp), cntvoff); diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index d0688ceaccf06dc86a97ad1b3dd849a3be59ccb8..f0e282244752754afc63944589d92f057bbc5ebd 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include @@ -49,7 +50,6 @@ DEFINE_STATIC_KEY_FALSE(kvm_protected_mode_initialized); DECLARE_KVM_HYP_PER_CPU(unsigned long, kvm_hyp_vector); static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page); -unsigned long kvm_arm_hyp_percpu_base[NR_CPUS]; DECLARE_KVM_NVHE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params); /* The VMID used in the VTTBR */ @@ -82,13 +82,25 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, { int r; - if (cap->flags) - return -EINVAL; + /* Capabilities with flags */ + switch (cap->cap) { + case KVM_CAP_ARM_PROTECTED_VM: + return kvm_arm_vm_ioctl_pkvm(kvm, cap); + default: + if (cap->flags) + return -EINVAL; + } + /* Capabilities without flags */ switch (cap->cap) { case KVM_CAP_ARM_NISV_TO_USER: - r = 0; - kvm->arch.return_nisv_io_abort_to_user = true; + if (kvm_vm_is_protected(kvm)) { + r = -EINVAL; + } else { + r = 0; + set_bit(KVM_ARCH_FLAG_RETURN_NISV_IO_ABORT_TO_USER, + &kvm->arch.flags); + } break; case KVM_CAP_ARM_MTE: mutex_lock(&kvm->lock); @@ -96,7 +108,7 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, r = -EINVAL; } else { r = 0; - kvm->arch.mte_enabled = true; + set_bit(KVM_ARCH_FLAG_MTE_ENABLED, &kvm->arch.flags); } mutex_unlock(&kvm->lock); break; @@ -137,17 +149,20 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) { int ret; - ret = kvm_arm_setup_stage2(kvm, type); + if (type & ~KVM_VM_TYPE_MASK) + return -EINVAL; + + ret = kvm_share_hyp(kvm, kvm + 1); if (ret) return ret; - ret = kvm_init_stage2_mmu(kvm, &kvm->arch.mmu); + ret = kvm_init_pvm(kvm, type); if (ret) return ret; - ret = create_hyp_mappings(kvm, kvm + 1, PAGE_HYP); + ret = kvm_init_stage2_mmu(kvm, &kvm->arch.mmu, type); if (ret) - goto out_free_stage2_pgd; + return ret; kvm_vgic_early_init(kvm); @@ -156,9 +171,6 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) set_default_spectre(kvm); - return ret; -out_free_stage2_pgd: - kvm_free_stage2_pgd(&kvm->arch.mmu); return ret; } @@ -167,7 +179,6 @@ vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) return VM_FAULT_SIGBUS; } - /** * kvm_arch_destroy_vm - destroy the VM data structure * @kvm: pointer to the KVM struct @@ -180,6 +191,9 @@ void kvm_arch_destroy_vm(struct kvm *kvm) kvm_vgic_destroy(kvm); + if (is_protected_kvm_enabled()) + kvm_shadow_destroy(kvm); + for (i = 0; i < KVM_MAX_VCPUS; ++i) { if (kvm->vcpus[i]) { kvm_vcpu_destroy(kvm->vcpus[i]); @@ -187,11 +201,14 @@ void kvm_arch_destroy_vm(struct kvm *kvm) } } atomic_set(&kvm->online_vcpus, 0); + + kvm_unshare_hyp(kvm, kvm + 1); } -int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) +static int kvm_check_extension(struct kvm *kvm, long ext) { int r; + switch (ext) { case KVM_CAP_IRQCHIP: r = vgic_present; @@ -209,18 +226,27 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_IMMEDIATE_EXIT: case KVM_CAP_VCPU_EVENTS: case KVM_CAP_ARM_IRQ_LINE_LAYOUT_2: - case KVM_CAP_ARM_NISV_TO_USER: case KVM_CAP_ARM_INJECT_EXT_DABT: case KVM_CAP_SET_GUEST_DEBUG: case KVM_CAP_VCPU_ATTRIBUTES: case KVM_CAP_PTP_KVM: r = 1; break; + case KVM_CAP_ARM_NISV_TO_USER: + r = !kvm || !kvm_vm_is_protected(kvm); + break; case KVM_CAP_ARM_SET_DEVICE_ADDR: r = 1; break; case KVM_CAP_NR_VCPUS: - r = num_online_cpus(); + /* + * ARM64 treats KVM_CAP_NR_CPUS differently from all other + * architectures, as it does not always bound it to + * KVM_CAP_MAX_VCPUS. It should not matter much because + * this is just an advisory value. + */ + r = min_t(unsigned int, num_online_cpus(), + kvm_arm_default_max_vcpus()); break; case KVM_CAP_MAX_VCPUS: case KVM_CAP_MAX_VCPU_ID: @@ -280,6 +306,74 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) return r; } +/* + * Checks whether the exctension specified in ext is supported for protected + * vms. The capabilities supported by kvm in general are passed in kvm_cap. + */ +static int pkvm_check_extension(struct kvm *kvm, long ext, int kvm_cap) +{ + int r; + + switch (ext) { + case KVM_CAP_IRQCHIP: + case KVM_CAP_ARM_PSCI: + case KVM_CAP_ARM_PSCI_0_2: + case KVM_CAP_NR_VCPUS: + case KVM_CAP_MAX_VCPUS: + case KVM_CAP_MAX_VCPU_ID: + case KVM_CAP_MSI_DEVID: + case KVM_CAP_ARM_VM_IPA_SIZE: + r = kvm_cap; + break; + case KVM_CAP_GUEST_DEBUG_HW_BPS: + r = min(kvm_cap, pkvm_get_max_brps()); + break; + case KVM_CAP_GUEST_DEBUG_HW_WPS: + r = min(kvm_cap, pkvm_get_max_wrps()); + break; + case KVM_CAP_ARM_PMU_V3: + r = kvm_cap && FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_PMUVER), + PVM_ID_AA64DFR0_ALLOW); + break; + case KVM_CAP_ARM_SVE: + r = kvm_cap && FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_SVE), + PVM_ID_AA64PFR0_RESTRICT_UNSIGNED); + break; + case KVM_CAP_ARM_PTRAUTH_ADDRESS: + r = kvm_cap && + FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR1_API), + PVM_ID_AA64ISAR1_ALLOW) && + FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR1_APA), + PVM_ID_AA64ISAR1_ALLOW); + break; + case KVM_CAP_ARM_PTRAUTH_GENERIC: + r = kvm_cap && + FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR1_GPI), + PVM_ID_AA64ISAR1_ALLOW) && + FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR1_GPA), + PVM_ID_AA64ISAR1_ALLOW); + break; + case KVM_CAP_ARM_PROTECTED_VM: + r = 1; + break; + default: + r = 0; + break; + } + + return r; +} + +int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) +{ + int r = kvm_check_extension(kvm, ext); + + if (unlikely(kvm && kvm_vm_is_protected(kvm))) + r = pkvm_check_extension(kvm, ext, r); + + return r; +} + long kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -288,10 +382,12 @@ long kvm_arch_dev_ioctl(struct file *filp, struct kvm *kvm_arch_alloc_vm(void) { + size_t sz = sizeof(struct kvm); + if (!has_vhe()) - return kzalloc(sizeof(struct kvm), GFP_KERNEL); + return kzalloc(sz, GFP_KERNEL_ACCOUNT); - return vzalloc(sizeof(struct kvm)); + return __vmalloc(sz, GFP_KERNEL_ACCOUNT | __GFP_HIGHMEM | __GFP_ZERO); } void kvm_arch_free_vm(struct kvm *kvm) @@ -338,7 +434,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) if (err) return err; - return create_hyp_mappings(vcpu, vcpu + 1, PAGE_HYP); + return kvm_share_hyp(vcpu, vcpu + 1); } void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) @@ -347,10 +443,13 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) { - if (vcpu->arch.has_run_once && unlikely(!irqchip_in_kernel(vcpu->kvm))) + if (vcpu_has_run_once(vcpu) && unlikely(!irqchip_in_kernel(vcpu->kvm))) static_branch_dec(&userspace_irqchip_in_use); - kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache); + if (is_protected_kvm_enabled()) + free_hyp_memcache(&vcpu->arch.pkvm_memcache); + else + kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache); kvm_timer_vcpu_terminate(vcpu); kvm_pmu_vcpu_destroy(vcpu); @@ -375,15 +474,14 @@ void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) * doorbells to be signalled, should an interrupt become pending. */ preempt_disable(); - kvm_vgic_vmcr_sync(vcpu); - vgic_v4_put(vcpu, true); + kvm_vgic_put(vcpu, true); preempt_enable(); } void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) { preempt_disable(); - vgic_v4_load(vcpu); + kvm_vgic_load(vcpu); preempt_enable(); } @@ -392,6 +490,9 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) struct kvm_s2_mmu *mmu; int *last_ran; + if (is_protected_kvm_enabled()) + goto nommu; + mmu = vcpu->arch.hw_mmu; last_ran = this_cpu_ptr(mmu->last_vcpu_ran); @@ -409,6 +510,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) *last_ran = vcpu->vcpu_id; } +nommu: vcpu->cpu = cpu; kvm_vgic_load(vcpu); @@ -428,16 +530,34 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) if (vcpu_has_ptrauth(vcpu)) vcpu_ptrauth_disable(vcpu); kvm_arch_vcpu_load_debug_state_flags(vcpu); + + if (is_protected_kvm_enabled()) { + kvm_call_hyp_nvhe(__pkvm_vcpu_load, + vcpu->kvm->arch.pkvm.shadow_handle, + vcpu->vcpu_idx, vcpu->arch.hcr_el2); + kvm_call_hyp(__vgic_v3_restore_vmcr_aprs, + &vcpu->arch.vgic_cpu.vgic_v3); + } } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { + if (is_protected_kvm_enabled()) { + kvm_call_hyp(__vgic_v3_save_vmcr_aprs, + &vcpu->arch.vgic_cpu.vgic_v3); + kvm_call_hyp_nvhe(__pkvm_vcpu_put); + + /* __pkvm_vcpu_put implies a sync of the state */ + if (!kvm_vm_is_protected(vcpu->kvm)) + vcpu->arch.flags |= KVM_ARM64_PKVM_STATE_DIRTY; + } + kvm_arch_vcpu_put_debug_state_flags(vcpu); kvm_arch_vcpu_put_fp(vcpu); if (has_vhe()) kvm_vcpu_put_sysregs_vhe(vcpu); kvm_timer_vcpu_put(vcpu); - kvm_vgic_put(vcpu); + kvm_vgic_put(vcpu, false); kvm_vcpu_pmu_restore_host(vcpu); vcpu->cpu = -1; @@ -580,18 +700,33 @@ static void update_vmid(struct kvm_vmid *vmid) spin_unlock(&kvm_vmid_lock); } -static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) +static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.target >= 0; +} + +/* + * Handle both the initialisation that is being done when the vcpu is + * run for the first time, as well as the updates that must be + * performed each time we get a new thread dealing with this vcpu. + */ +int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu) { struct kvm *kvm = vcpu->kvm; - int ret = 0; + int ret; - if (likely(vcpu->arch.has_run_once)) - return 0; + if (!kvm_vcpu_initialized(vcpu)) + return -ENOEXEC; if (!kvm_arm_vcpu_is_finalized(vcpu)) return -EPERM; - vcpu->arch.has_run_once = true; + ret = kvm_arch_vcpu_run_map_fp(vcpu); + if (ret) + return ret; + + if (likely(vcpu_has_run_once(vcpu))) + return 0; kvm_arm_vcpu_init_debug(vcpu); @@ -603,12 +738,6 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) ret = kvm_vgic_map_resources(kvm); if (ret) return ret; - } else { - /* - * Tell the rest of the code that there are userspace irqchip - * VMs in the wild. - */ - static_branch_inc(&userspace_irqchip_in_use); } ret = kvm_timer_enable(vcpu); @@ -616,6 +745,23 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) return ret; ret = kvm_arm_pmu_v3_enable(vcpu); + if (ret) + return ret; + + if (!irqchip_in_kernel(kvm)) { + /* + * Tell the rest of the code that there are userspace irqchip + * VMs in the wild. + */ + static_branch_inc(&userspace_irqchip_in_use); + } + + if (is_protected_kvm_enabled()) { + /* Start with the vcpu in a dirty state */ + if (!kvm_vm_is_protected(vcpu->kvm)) + vcpu->arch.flags |= KVM_ARM64_PKVM_STATE_DIRTY; + ret = create_el2_shadow(kvm); + } return ret; } @@ -667,11 +813,6 @@ static void vcpu_req_sleep(struct kvm_vcpu *vcpu) smp_rmb(); } -static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu) -{ - return vcpu->arch.target >= 0; -} - static void check_vcpu_requests(struct kvm_vcpu *vcpu) { if (kvm_request_pending(vcpu)) { @@ -728,13 +869,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) struct kvm_run *run = vcpu->run; int ret; - if (unlikely(!kvm_vcpu_initialized(vcpu))) - return -ENOEXEC; - - ret = kvm_vcpu_first_run_init(vcpu); - if (ret) - return ret; - if (run->exit_reason == KVM_EXIT_MMIO) { ret = kvm_handle_mmio_return(vcpu); if (ret) @@ -822,6 +956,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) } kvm_arm_setup_debug(vcpu); + kvm_arch_vcpu_ctxflush_fp(vcpu); /************************************************************** * Enter the guest @@ -1096,7 +1231,7 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu, * need to invalidate the I-cache though, as FWB does *not* * imply CTR_EL0.DIC. */ - if (vcpu->arch.has_run_once) { + if (vcpu_has_run_once(vcpu)) { if (!cpus_have_final_cap(ARM64_HAS_STAGE2_FWB)) stage2_unmap_vm(vcpu->kvm); else @@ -1369,12 +1504,9 @@ long kvm_arch_vm_ioctl(struct file *filp, return kvm_vm_ioctl_set_device_addr(kvm, &dev_addr); } case KVM_ARM_PREFERRED_TARGET: { - int err; struct kvm_vcpu_init init; - err = kvm_vcpu_preferred_target(&init); - if (err) - return err; + kvm_vcpu_preferred_target(&init); if (copy_to_user(argp, &init, sizeof(init))) return -EFAULT; @@ -1425,10 +1557,7 @@ static int kvm_init_vector_slots(void) base = kern_hyp_va(kvm_ksym_ref(__bp_harden_hyp_vecs)); kvm_init_vector_slot(base, HYP_VECTOR_SPECTRE_DIRECT); - if (!cpus_have_const_cap(ARM64_SPECTRE_V3A)) - return 0; - - if (!has_vhe()) { + if (kvm_system_needs_idmapped_vectors() && !has_vhe()) { err = create_hyp_exec_mappings(__pa_symbol(__bp_harden_hyp_vecs), __BP_HARDEN_HYP_VECS_SZ, &base); if (err) @@ -1561,25 +1690,33 @@ static void cpu_set_hyp_vector(void) kvm_call_hyp_nvhe(__pkvm_cpu_set_vector, data->slot); } -static void cpu_hyp_reinit(void) +static void cpu_hyp_init_context(void) { kvm_init_host_cpu_context(&this_cpu_ptr_hyp_sym(kvm_host_data)->host_ctxt); - cpu_hyp_reset(); - - if (is_kernel_in_hyp_mode()) - kvm_timer_init_vhe(); - else + if (!is_kernel_in_hyp_mode()) cpu_init_hyp_mode(); +} +static void cpu_hyp_init_features(void) +{ cpu_set_hyp_vector(); - kvm_arm_init_debug(); + if (is_kernel_in_hyp_mode()) + kvm_timer_init_vhe(); + if (vgic_present) kvm_vgic_init_cpu_hardware(); } +static void cpu_hyp_reinit(void) +{ + cpu_hyp_reset(); + cpu_hyp_init_context(); + cpu_hyp_init_features(); +} + static void _kvm_arch_hardware_enable(void *discard) { if (!__this_cpu_read(kvm_arm_hardware_enabled)) { @@ -1694,6 +1831,7 @@ static bool init_psci_relay(void) } kvm_host_psci_config.version = psci_ops.get_version(); + kvm_host_psci_config.smccc_version = arm_smccc_get_version(); if (kvm_host_psci_config.version == PSCI_VERSION(0, 1)) { kvm_host_psci_config.function_ids_0_1 = get_psci_0_1_function_ids(); @@ -1760,20 +1898,27 @@ static void teardown_hyp_mode(void) free_hyp_pgds(); for_each_possible_cpu(cpu) { free_page(per_cpu(kvm_arm_hyp_stack_page, cpu)); - free_pages(kvm_arm_hyp_percpu_base[cpu], nvhe_percpu_order()); + free_pages(kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu], nvhe_percpu_order()); } } static int do_pkvm_init(u32 hyp_va_bits) { - void *per_cpu_base = kvm_ksym_ref(kvm_arm_hyp_percpu_base); + void *per_cpu_base = kvm_ksym_ref(kvm_nvhe_sym(kvm_arm_hyp_percpu_base)); int ret; preempt_disable(); - hyp_install_host_vector(); + cpu_hyp_init_context(); ret = kvm_call_hyp_nvhe(__pkvm_init, hyp_mem_base, hyp_mem_size, num_possible_cpus(), kern_hyp_va(per_cpu_base), hyp_va_bits); + cpu_hyp_init_features(); + + /* + * The stub hypercalls are now disabled, so set our local flag to + * prevent a later re-init attempt in kvm_arch_hardware_enable(). + */ + __this_cpu_write(kvm_arm_hardware_enabled, 1); preempt_enable(); return ret; @@ -1784,8 +1929,15 @@ static int kvm_hyp_init_protection(u32 hyp_va_bits) void *addr = phys_to_virt(hyp_mem_base); int ret; + kvm_nvhe_sym(id_aa64pfr0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1); + kvm_nvhe_sym(id_aa64pfr1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1); + kvm_nvhe_sym(id_aa64isar0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64ISAR0_EL1); + kvm_nvhe_sym(id_aa64isar1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64ISAR1_EL1); kvm_nvhe_sym(id_aa64mmfr0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); kvm_nvhe_sym(id_aa64mmfr1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); + kvm_nvhe_sym(id_aa64mmfr2_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR2_EL1); + kvm_nvhe_sym(__icache_flags) = __icache_flags; + kvm_nvhe_sym(smccc_trng_available) = smccc_trng_available; ret = create_hyp_mappings(addr, addr + hyp_mem_size, PAGE_HYP); if (ret) @@ -1853,7 +2005,7 @@ static int init_hyp_mode(void) page_addr = page_address(page); memcpy(page_addr, CHOOSE_NVHE_SYM(__per_cpu_start), nvhe_percpu_size()); - kvm_arm_hyp_percpu_base[cpu] = (unsigned long)page_addr; + kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu] = (unsigned long)page_addr; } /* @@ -1866,6 +2018,13 @@ static int init_hyp_mode(void) goto out_err; } + err = create_hyp_mappings(kvm_ksym_ref(__hyp_data_start), + kvm_ksym_ref(__hyp_data_end), PAGE_HYP); + if (err) { + kvm_err("Cannot map .hyp.data section\n"); + goto out_err; + } + err = create_hyp_mappings(kvm_ksym_ref(__hyp_rodata_start), kvm_ksym_ref(__hyp_rodata_end), PAGE_HYP_RO); if (err) { @@ -1914,7 +2073,7 @@ static int init_hyp_mode(void) } for_each_possible_cpu(cpu) { - char *percpu_begin = (char *)kvm_arm_hyp_percpu_base[cpu]; + char *percpu_begin = (char *)kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu]; char *percpu_end = percpu_begin + nvhe_percpu_size(); /* Map Hyp percpu pages */ @@ -1953,9 +2112,25 @@ out_err: return err; } -static void _kvm_host_prot_finalize(void *discard) +static void _kvm_host_prot_finalize(void *arg) { - WARN_ON(kvm_call_hyp_nvhe(__pkvm_prot_finalize)); + int *err = arg; + + if (WARN_ON(kvm_call_hyp_nvhe(__pkvm_prot_finalize))) + WRITE_ONCE(*err, -EINVAL); +} + +static int pkvm_drop_host_privileges(void) +{ + int ret = 0; + + /* + * Flip the static key upfront as that may no longer be possible + * once the host stage 2 is installed. + */ + static_branch_enable(&kvm_protected_mode_initialized); + on_each_cpu(_kvm_host_prot_finalize, &ret, 1); + return ret; } static int finalize_hyp_mode(void) @@ -1969,15 +2144,7 @@ static int finalize_hyp_mode(void) * None of other sections should ever be introspected. */ kmemleak_free_part(__hyp_bss_start, __hyp_bss_end - __hyp_bss_start); - - /* - * Flip the static key upfront as that may no longer be possible - * once the host stage 2 is installed. - */ - static_branch_enable(&kvm_protected_mode_initialized); - on_each_cpu(_kvm_host_prot_finalize, NULL, 1); - - return 0; + return pkvm_drop_host_privileges(); } struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr) @@ -2120,7 +2287,11 @@ static int __init early_kvm_mode_cfg(char *arg) return -EINVAL; if (strcmp(arg, "protected") == 0) { - kvm_mode = KVM_MODE_PROTECTED; + if (!is_kernel_in_hyp_mode()) + kvm_mode = KVM_MODE_PROTECTED; + else + pr_warn_once("Protected KVM not available with VHE\n"); + return 0; } @@ -2129,7 +2300,7 @@ static int __init early_kvm_mode_cfg(char *arg) return 0; } - if (strcmp(arg, "none") == 0 && !WARN_ON(is_kernel_in_hyp_mode())) { + if (strcmp(arg, "none") == 0) { kvm_mode = KVM_MODE_NONE; return 0; } diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index 5621020b28de9b19d495a766b2ac79d44ae123f5..2f48fd362a8c50df505031cd6059ec335944f0ee 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -7,7 +7,6 @@ */ #include #include -#include #include #include #include @@ -15,6 +14,19 @@ #include #include +void kvm_vcpu_unshare_task_fp(struct kvm_vcpu *vcpu) +{ + struct task_struct *p = vcpu->arch.parent_task; + struct user_fpsimd_state *fpsimd; + + if (!is_protected_kvm_enabled() || !p) + return; + + fpsimd = &p->thread.uw.fpsimd_state; + kvm_unshare_hyp(fpsimd, fpsimd + 1); + put_task_struct(p); +} + /* * Called on entry to KVM_RUN unless this vcpu previously ran at least * once and the most recent prior KVM_RUN for this vcpu was called from @@ -28,36 +40,29 @@ int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu) { int ret; - struct thread_info *ti = ¤t->thread_info; struct user_fpsimd_state *fpsimd = ¤t->thread.uw.fpsimd_state; - /* - * Make sure the host task thread flags and fpsimd state are - * visible to hyp: - */ - ret = create_hyp_mappings(ti, ti + 1, PAGE_HYP); - if (ret) - goto error; + kvm_vcpu_unshare_task_fp(vcpu); - ret = create_hyp_mappings(fpsimd, fpsimd + 1, PAGE_HYP); + /* Make sure the host task fpsimd state is visible to hyp: */ + ret = kvm_share_hyp(fpsimd, fpsimd + 1); if (ret) - goto error; - - if (vcpu->arch.sve_state) { - void *sve_end; + return ret; - sve_end = vcpu->arch.sve_state + vcpu_sve_state_size(vcpu); + vcpu->arch.host_fpsimd_state = kern_hyp_va(fpsimd); - ret = create_hyp_mappings(vcpu->arch.sve_state, sve_end, - PAGE_HYP); - if (ret) - goto error; + /* + * We need to keep current's task_struct pinned until its data has been + * unshared with the hypervisor to make sure it is not re-used by the + * kernel and donated to someone else while already shared -- see + * kvm_vcpu_unshare_task_fp() for the matching put_task_struct(). + */ + if (is_protected_kvm_enabled()) { + get_task_struct(current); + vcpu->arch.parent_task = current; } - vcpu->arch.host_thread_info = kern_hyp_va(ti); - vcpu->arch.host_fpsimd_state = kern_hyp_va(fpsimd); -error: - return ret; + return 0; } /* @@ -66,26 +71,27 @@ error: * * Here, we just set the correct metadata to indicate that the FPSIMD * state in the cpu regs (if any) belongs to current on the host. - * - * TIF_SVE is backed up here, since it may get clobbered with guest state. - * This flag is restored by kvm_arch_vcpu_put_fp(vcpu). */ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu) { BUG_ON(!current->mm); + BUG_ON(test_thread_flag(TIF_SVE)); - vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED | - KVM_ARM64_HOST_SVE_IN_USE | - KVM_ARM64_HOST_SVE_ENABLED); + vcpu->arch.flags &= ~KVM_ARM64_FP_ENABLED; vcpu->arch.flags |= KVM_ARM64_FP_HOST; - if (test_thread_flag(TIF_SVE)) - vcpu->arch.flags |= KVM_ARM64_HOST_SVE_IN_USE; - if (read_sysreg(cpacr_el1) & CPACR_EL1_ZEN_EL0EN) vcpu->arch.flags |= KVM_ARM64_HOST_SVE_ENABLED; } +void kvm_arch_vcpu_ctxflush_fp(struct kvm_vcpu *vcpu) +{ + if (test_thread_flag(TIF_FOREIGN_FPSTATE)) + vcpu->arch.flags |= KVM_ARM64_FP_FOREIGN_FPSTATE; + else + vcpu->arch.flags &= ~KVM_ARM64_FP_FOREIGN_FPSTATE; +} + /* * If the guest FPSIMD state was loaded, update the host's context * tracking data mark the CPU FPSIMD regs as dirty and belonging to vcpu @@ -115,13 +121,11 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) { unsigned long flags; - bool host_has_sve = system_supports_sve(); - bool guest_has_sve = vcpu_has_sve(vcpu); local_irq_save(flags); if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) { - if (guest_has_sve) { + if (vcpu_has_sve(vcpu)) { __vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_el1(SYS_ZCR); /* Restore the VL that was saved when bound to the CPU */ @@ -131,7 +135,7 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) } fpsimd_save_and_flush_cpu_state(); - } else if (has_vhe() && host_has_sve) { + } else if (has_vhe() && system_supports_sve()) { /* * The FPSIMD/SVE state in the CPU has not been touched, and we * have SVE (and VHE): CPACR_EL1 (alias CPTR_EL2) has been @@ -145,8 +149,7 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu) sysreg_clear_set(CPACR_EL1, CPACR_EL1_ZEN_EL0EN, 0); } - update_thread_flag(TIF_SVE, - vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE); + update_thread_flag(TIF_SVE, 0); local_irq_restore(flags); } diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index ffbe664ff838c4386ca1e0b1af2ddf501ab2afbf..d709f47cc046b899352b657ea189edd6bf566665 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -853,13 +853,10 @@ u32 __attribute_const__ kvm_target_cpu(void) return KVM_ARM_TARGET_GENERIC_V8; } -int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init) +void kvm_vcpu_preferred_target(struct kvm_vcpu_init *init) { u32 target = kvm_target_cpu(); - if (target < 0) - return -ENODEV; - memset(init, 0, sizeof(*init)); /* @@ -869,8 +866,6 @@ int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init) * target type. */ init->target = (__u32)target; - - return 0; } int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) @@ -889,11 +884,6 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, return -EINVAL; } -#define KVM_GUESTDBG_VALID_MASK (KVM_GUESTDBG_ENABLE | \ - KVM_GUESTDBG_USE_SW_BP | \ - KVM_GUESTDBG_USE_HW | \ - KVM_GUESTDBG_SINGLESTEP) - /** * kvm_arch_vcpu_ioctl_set_guest_debug - set up guest debugging * @kvm: pointer to the KVM struct diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 275a27368a04c21264406cd68351592836c5800a..2b988ba29f408fb0277246343e331072d2bec9e4 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -201,6 +201,21 @@ static int handle_trap_exceptions(struct kvm_vcpu *vcpu) { int handled; + /* + * If we run a non-protected VM when protection is enabled + * system-wide, resync the state from the hypervisor and mark + * it as dirty on the host side if it wasn't dirty already + * (which could happen if preemption has taken place). + */ + if (is_protected_kvm_enabled() && !kvm_vm_is_protected(vcpu->kvm)) { + preempt_disable(); + if (!(vcpu->arch.flags & KVM_ARM64_PKVM_STATE_DIRTY)) { + kvm_call_hyp_nvhe(__pkvm_vcpu_sync_state); + vcpu->arch.flags |= KVM_ARM64_PKVM_STATE_DIRTY; + } + preempt_enable(); + } + /* * See ARM ARM B1.14.1: "Hyp traps on instructions * that fail their condition code check" @@ -260,6 +275,13 @@ int handle_exit(struct kvm_vcpu *vcpu, int exception_index) /* For exit types that need handling before we can be preempted */ void handle_exit_early(struct kvm_vcpu *vcpu, int exception_index) { + /* + * We just exited, so the state is clean from a hypervisor + * perspective. + */ + if (is_protected_kvm_enabled()) + vcpu->arch.flags &= ~KVM_ARM64_PKVM_STATE_DIRTY; + if (ARM_SERROR_PENDING(exception_index)) { if (this_cpu_has_cap(ARM64_HAS_RAS_EXTN)) { u64 disr = kvm_vcpu_get_disr(vcpu); diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile index b726332eec49eb2a254a68c82b713d5945f7b43d..687598e41b21f68c76479aac298507af58dd7221 100644 --- a/arch/arm64/kvm/hyp/Makefile +++ b/arch/arm64/kvm/hyp/Makefile @@ -10,4 +10,4 @@ subdir-ccflags-y := -I$(incdir) \ -DDISABLE_BRANCH_PROFILING \ $(DISABLE_STACKLEAK_PLUGIN) -obj-$(CONFIG_KVM) += vhe/ nvhe/ pgtable.o reserved_mem.o +obj-$(CONFIG_KVM) += vhe/ nvhe/ pgtable.o diff --git a/arch/arm64/kvm/hyp/exception.c b/arch/arm64/kvm/hyp/exception.c index 0418399e0a2016738c632ef286ec985febc24cdb..14a80b0e2f91de9e1e3b9c97d5bef47100817602 100644 --- a/arch/arm64/kvm/hyp/exception.c +++ b/arch/arm64/kvm/hyp/exception.c @@ -38,7 +38,10 @@ static inline void __vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg) static void __vcpu_write_spsr(struct kvm_vcpu *vcpu, u64 val) { - write_sysreg_el1(val, SYS_SPSR); + if (has_vhe()) + write_sysreg_el1(val, SYS_SPSR); + else + __vcpu_sys_reg(vcpu, SPSR_EL1) = val; } static void __vcpu_write_spsr_abt(struct kvm_vcpu *vcpu, u64 val) @@ -57,31 +60,12 @@ static void __vcpu_write_spsr_und(struct kvm_vcpu *vcpu, u64 val) vcpu->arch.ctxt.spsr_und = val; } -/* - * This performs the exception entry at a given EL (@target_mode), stashing PC - * and PSTATE into ELR and SPSR respectively, and compute the new PC/PSTATE. - * The EL passed to this function *must* be a non-secure, privileged mode with - * bit 0 being set (PSTATE.SP == 1). - * - * When an exception is taken, most PSTATE fields are left unchanged in the - * handler. However, some are explicitly overridden (e.g. M[4:0]). Luckily all - * of the inherited bits have the same position in the AArch64/AArch32 SPSR_ELx - * layouts, so we don't need to shuffle these for exceptions from AArch32 EL0. - * - * For the SPSR_ELx layout for AArch64, see ARM DDI 0487E.a page C5-429. - * For the SPSR_ELx layout for AArch32, see ARM DDI 0487E.a page C5-426. - * - * Here we manipulate the fields in order of the AArch64 SPSR_ELx layout, from - * MSB to LSB. - */ -static void enter_exception64(struct kvm_vcpu *vcpu, unsigned long target_mode, - enum exception_type type) +unsigned long get_except64_offset(unsigned long psr, unsigned long target_mode, + enum exception_type type) { - unsigned long sctlr, vbar, old, new, mode; + u64 mode = psr & (PSR_MODE_MASK | PSR_MODE32_BIT); u64 exc_offset; - mode = *vcpu_cpsr(vcpu) & (PSR_MODE_MASK | PSR_MODE32_BIT); - if (mode == target_mode) exc_offset = CURRENT_EL_SP_ELx_VECTOR; else if ((mode | PSR_MODE_THREAD_BIT) == target_mode) @@ -91,28 +75,32 @@ static void enter_exception64(struct kvm_vcpu *vcpu, unsigned long target_mode, else exc_offset = LOWER_EL_AArch32_VECTOR; - switch (target_mode) { - case PSR_MODE_EL1h: - vbar = __vcpu_read_sys_reg(vcpu, VBAR_EL1); - sctlr = __vcpu_read_sys_reg(vcpu, SCTLR_EL1); - __vcpu_write_sys_reg(vcpu, *vcpu_pc(vcpu), ELR_EL1); - break; - default: - /* Don't do that */ - BUG(); - } - - *vcpu_pc(vcpu) = vbar + exc_offset + type; + return exc_offset + type; +} - old = *vcpu_cpsr(vcpu); - new = 0; +/* + * When an exception is taken, most PSTATE fields are left unchanged in the + * handler. However, some are explicitly overridden (e.g. M[4:0]). Luckily all + * of the inherited bits have the same position in the AArch64/AArch32 SPSR_ELx + * layouts, so we don't need to shuffle these for exceptions from AArch32 EL0. + * + * For the SPSR_ELx layout for AArch64, see ARM DDI 0487E.a page C5-429. + * For the SPSR_ELx layout for AArch32, see ARM DDI 0487E.a page C5-426. + * + * Here we manipulate the fields in order of the AArch64 SPSR_ELx layout, from + * MSB to LSB. + */ +unsigned long get_except64_cpsr(unsigned long old, bool has_mte, + unsigned long sctlr, unsigned long target_mode) +{ + u64 new = 0; new |= (old & PSR_N_BIT); new |= (old & PSR_Z_BIT); new |= (old & PSR_C_BIT); new |= (old & PSR_V_BIT); - if (kvm_has_mte(vcpu->kvm)) + if (has_mte) new |= PSR_TCO_BIT; new |= (old & PSR_DIT_BIT); @@ -148,6 +136,36 @@ static void enter_exception64(struct kvm_vcpu *vcpu, unsigned long target_mode, new |= target_mode; + return new; +} + +/* + * This performs the exception entry at a given EL (@target_mode), stashing PC + * and PSTATE into ELR and SPSR respectively, and compute the new PC/PSTATE. + * The EL passed to this function *must* be a non-secure, privileged mode with + * bit 0 being set (PSTATE.SP == 1). + */ +static void enter_exception64(struct kvm_vcpu *vcpu, unsigned long target_mode, + enum exception_type type) +{ + u64 offset = get_except64_offset(*vcpu_cpsr(vcpu), target_mode, type); + unsigned long sctlr, vbar, old, new; + + switch (target_mode) { + case PSR_MODE_EL1h: + vbar = __vcpu_read_sys_reg(vcpu, VBAR_EL1); + sctlr = __vcpu_read_sys_reg(vcpu, SCTLR_EL1); + __vcpu_write_sys_reg(vcpu, *vcpu_pc(vcpu), ELR_EL1); + break; + default: + /* Don't do that */ + BUG(); + } + + *vcpu_pc(vcpu) = vbar + offset; + + old = *vcpu_cpsr(vcpu); + new = get_except64_cpsr(old, kvm_has_mte(vcpu->kvm), sctlr, target_mode); *vcpu_cpsr(vcpu) = new; __vcpu_write_spsr(vcpu, old); } diff --git a/arch/arm64/kvm/hyp/fpsimd.S b/arch/arm64/kvm/hyp/fpsimd.S index 3c635929771a82d08397c0e16eea457dcd186928..7b789ac4731517f5314b5881e8662ef12972a361 100644 --- a/arch/arm64/kvm/hyp/fpsimd.S +++ b/arch/arm64/kvm/hyp/fpsimd.S @@ -24,8 +24,3 @@ SYM_FUNC_START(__sve_restore_state) __sve_load 0, x1, 2 ret SYM_FUNC_END(__sve_restore_state) - -SYM_FUNC_START(__sve_save_state) - sve_save 0, x1, 2 - ret -SYM_FUNC_END(__sve_save_state) diff --git a/arch/arm64/kvm/hyp/hyp-constants.c b/arch/arm64/kvm/hyp/hyp-constants.c new file mode 100644 index 0000000000000000000000000000000000000000..ef55191cd05d0bb39ae10e91bc2e665039c5c4df --- /dev/null +++ b/arch/arm64/kvm/hyp/hyp-constants.c @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include +#include +#include + +int main(void) +{ + DEFINE(STRUCT_HYP_PAGE_SIZE, sizeof(struct hyp_page)); + DEFINE(KVM_SHADOW_VM_SIZE, sizeof(struct kvm_shadow_vm)); + DEFINE(SHADOW_VCPU_STATE_SIZE, sizeof(struct shadow_vcpu_state)); + return 0; +} diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S index 9aa9b73475c95d59b9c1d99a7ec9426625c7bf76..7839d075729b1601f28fad70443f405e98ea9498 100644 --- a/arch/arm64/kvm/hyp/hyp-entry.S +++ b/arch/arm64/kvm/hyp/hyp-entry.S @@ -44,7 +44,7 @@ el1_sync: // Guest trapped into EL2 mrs x0, esr_el2 - lsr x0, x0, #ESR_ELx_EC_SHIFT + ubfx x0, x0, #ESR_ELx_EC_SHIFT, #ESR_ELx_EC_WIDTH cmp x0, #ESR_ELx_EC_HVC64 ccmp x0, #ESR_ELx_EC_HVC32, #4, ne b.ne el1_trap @@ -62,6 +62,10 @@ el1_sync: // Guest trapped into EL2 /* ARM_SMCCC_ARCH_WORKAROUND_2 handling */ eor w1, w1, #(ARM_SMCCC_ARCH_WORKAROUND_1 ^ \ ARM_SMCCC_ARCH_WORKAROUND_2) + cbz w1, wa_epilogue + + eor w1, w1, #(ARM_SMCCC_ARCH_WORKAROUND_2 ^ \ + ARM_SMCCC_ARCH_WORKAROUND_3) cbnz w1, el1_trap wa_epilogue: @@ -192,7 +196,10 @@ SYM_CODE_END(__kvm_hyp_vector) sub sp, sp, #(8 * 4) stp x2, x3, [sp, #(8 * 0)] stp x0, x1, [sp, #(8 * 2)] + alternative_cb spectre_bhb_patch_wa3 + /* Patched to mov WA3 when supported */ mov w0, #ARM_SMCCC_ARCH_WORKAROUND_1 + alternative_cb_end smc #0 ldp x2, x3, [sp, #(8 * 0)] add sp, sp, #(8 * 2) @@ -205,6 +212,8 @@ SYM_CODE_END(__kvm_hyp_vector) spectrev2_smccc_wa1_smc .else stp x0, x1, [sp, #-16]! + mitigate_spectre_bhb_loop x0 + mitigate_spectre_bhb_clear_insn .endif .if \indirect != 0 alternative_cb kvm_patch_vector_branch diff --git a/arch/arm64/kvm/hyp/include/hyp/fault.h b/arch/arm64/kvm/hyp/include/hyp/fault.h new file mode 100644 index 0000000000000000000000000000000000000000..1b8a2dcd712f321c94b3304012770a62ef6bc61a --- /dev/null +++ b/arch/arm64/kvm/hyp/include/hyp/fault.h @@ -0,0 +1,75 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015 - ARM Ltd + * Author: Marc Zyngier + */ + +#ifndef __ARM64_KVM_HYP_FAULT_H__ +#define __ARM64_KVM_HYP_FAULT_H__ + +#include +#include +#include +#include + +static inline bool __translate_far_to_hpfar(u64 far, u64 *hpfar) +{ + u64 par, tmp; + + /* + * Resolve the IPA the hard way using the guest VA. + * + * Stage-1 translation already validated the memory access + * rights. As such, we can use the EL1 translation regime, and + * don't have to distinguish between EL0 and EL1 access. + * + * We do need to save/restore PAR_EL1 though, as we haven't + * saved the guest context yet, and we may return early... + */ + par = read_sysreg_par(); + if (!__kvm_at("s1e1r", far)) + tmp = read_sysreg_par(); + else + tmp = SYS_PAR_EL1_F; /* back to the guest */ + write_sysreg(par, par_el1); + + if (unlikely(tmp & SYS_PAR_EL1_F)) + return false; /* Translation failed, back to guest */ + + /* Convert PAR to HPFAR format */ + *hpfar = PAR_TO_HPFAR(tmp); + return true; +} + +static inline bool __get_fault_info(u64 esr, struct kvm_vcpu_fault_info *fault) +{ + u64 hpfar, far; + + far = read_sysreg_el2(SYS_FAR); + + /* + * The HPFAR can be invalid if the stage 2 fault did not + * happen during a stage 1 page table walk (the ESR_EL2.S1PTW + * bit is clear) and one of the two following cases are true: + * 1. The fault was due to a permission fault + * 2. The processor carries errata 834220 + * + * Therefore, for all non S1PTW faults where we either have a + * permission fault or the errata workaround is enabled, we + * resolve the IPA using the AT instruction. + */ + if (!(esr & ESR_ELx_S1PTW) && + (cpus_have_final_cap(ARM64_WORKAROUND_834220) || + (esr & ESR_ELx_FSC_TYPE) == FSC_PERM)) { + if (!__translate_far_to_hpfar(far, &hpfar)) + return false; + } else { + hpfar = read_sysreg(hpfar_el2); + } + + fault->far_el2 = far; + fault->hpfar_el2 = hpfar; + return true; +} + +#endif diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index a0e78a6027be0d1f5a7a1cd4f74fa1d38feeece5..c3463dda749c61883bb1ca76a9d0da99419a84fe 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -8,6 +8,7 @@ #define __ARM64_KVM_HYP_SWITCH_H__ #include +#include #include #include @@ -28,7 +29,6 @@ #include #include #include -#include extern struct exception_table_entry __start___kvm_ex_table; extern struct exception_table_entry __stop___kvm_ex_table; @@ -44,7 +44,7 @@ static inline bool update_fp_enabled(struct kvm_vcpu *vcpu) * trap the accesses. */ if (!system_supports_fpsimd() || - vcpu->arch.host_thread_info->flags & _TIF_FOREIGN_FPSTATE) + vcpu->arch.flags & KVM_ARM64_FP_FOREIGN_FPSTATE) vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED | KVM_ARM64_FP_HOST); @@ -133,88 +133,9 @@ static inline void ___deactivate_traps(struct kvm_vcpu *vcpu) } } -static inline bool __translate_far_to_hpfar(u64 far, u64 *hpfar) -{ - u64 par, tmp; - - /* - * Resolve the IPA the hard way using the guest VA. - * - * Stage-1 translation already validated the memory access - * rights. As such, we can use the EL1 translation regime, and - * don't have to distinguish between EL0 and EL1 access. - * - * We do need to save/restore PAR_EL1 though, as we haven't - * saved the guest context yet, and we may return early... - */ - par = read_sysreg_par(); - if (!__kvm_at("s1e1r", far)) - tmp = read_sysreg_par(); - else - tmp = SYS_PAR_EL1_F; /* back to the guest */ - write_sysreg(par, par_el1); - - if (unlikely(tmp & SYS_PAR_EL1_F)) - return false; /* Translation failed, back to guest */ - - /* Convert PAR to HPFAR format */ - *hpfar = PAR_TO_HPFAR(tmp); - return true; -} - -static inline bool __get_fault_info(u64 esr, struct kvm_vcpu_fault_info *fault) -{ - u64 hpfar, far; - - far = read_sysreg_el2(SYS_FAR); - - /* - * The HPFAR can be invalid if the stage 2 fault did not - * happen during a stage 1 page table walk (the ESR_EL2.S1PTW - * bit is clear) and one of the two following cases are true: - * 1. The fault was due to a permission fault - * 2. The processor carries errata 834220 - * - * Therefore, for all non S1PTW faults where we either have a - * permission fault or the errata workaround is enabled, we - * resolve the IPA using the AT instruction. - */ - if (!(esr & ESR_ELx_S1PTW) && - (cpus_have_final_cap(ARM64_WORKAROUND_834220) || - (esr & ESR_ELx_FSC_TYPE) == FSC_PERM)) { - if (!__translate_far_to_hpfar(far, &hpfar)) - return false; - } else { - hpfar = read_sysreg(hpfar_el2); - } - - fault->far_el2 = far; - fault->hpfar_el2 = hpfar; - return true; -} - static inline bool __populate_fault_info(struct kvm_vcpu *vcpu) { - u8 ec; - u64 esr; - - esr = vcpu->arch.fault.esr_el2; - ec = ESR_ELx_EC(esr); - - if (ec != ESR_ELx_EC_DABT_LOW && ec != ESR_ELx_EC_IABT_LOW) - return true; - - return __get_fault_info(esr, &vcpu->arch.fault); -} - -static inline void __hyp_sve_save_host(struct kvm_vcpu *vcpu) -{ - struct thread_struct *thread; - - thread = container_of(vcpu->arch.host_fpsimd_state, struct thread_struct, - uw.fpsimd_state); - - __sve_save_state(sve_pffr(thread), &vcpu->arch.host_fpsimd_state->fpsr); + return __get_fault_info(vcpu->arch.fault.esr_el2, &vcpu->arch.fault); } static inline void __hyp_sve_restore_guest(struct kvm_vcpu *vcpu) @@ -225,28 +146,23 @@ static inline void __hyp_sve_restore_guest(struct kvm_vcpu *vcpu) write_sysreg_el1(__vcpu_sys_reg(vcpu, ZCR_EL1), SYS_ZCR); } -/* Check for an FPSIMD/SVE trap and handle as appropriate */ -static inline bool __hyp_handle_fpsimd(struct kvm_vcpu *vcpu) +/* + * We trap the first access to the FP/SIMD to save the host context and + * restore the guest context lazily. + * If FP/SIMD is not implemented, handle the trap and inject an undefined + * instruction exception to the guest. Similarly for trapped SVE accesses. + */ +static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code) { - bool sve_guest, sve_host; + bool sve_guest; u8 esr_ec; u64 reg; if (!system_supports_fpsimd()) return false; - if (system_supports_sve()) { - sve_guest = vcpu_has_sve(vcpu); - sve_host = vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE; - } else { - sve_guest = false; - sve_host = false; - } - + sve_guest = vcpu_has_sve(vcpu); esr_ec = kvm_vcpu_trap_get_class(vcpu); - if (esr_ec != ESR_ELx_EC_FP_ASIMD && - esr_ec != ESR_ELx_EC_SVE) - return false; /* Don't handle SVE traps for non-SVE vcpus here: */ if (!sve_guest && esr_ec != ESR_ELx_EC_FP_ASIMD) @@ -269,11 +185,7 @@ static inline bool __hyp_handle_fpsimd(struct kvm_vcpu *vcpu) isb(); if (vcpu->arch.flags & KVM_ARM64_FP_HOST) { - if (sve_host) - __hyp_sve_save_host(vcpu); - else - __fpsimd_save_state(vcpu->arch.host_fpsimd_state); - + __fpsimd_save_state(vcpu->arch.host_fpsimd_state); vcpu->arch.flags &= ~KVM_ARM64_FP_HOST; } @@ -348,14 +260,6 @@ static inline bool handle_tx2_tvm(struct kvm_vcpu *vcpu) static inline bool esr_is_ptrauth_trap(u32 esr) { - u32 ec = ESR_ELx_EC(esr); - - if (ec == ESR_ELx_EC_PAC) - return true; - - if (ec != ESR_ELx_EC_SYS64) - return false; - switch (esr_sys64_to_sysreg(esr)) { case SYS_APIAKEYLO_EL1: case SYS_APIAKEYHI_EL1: @@ -384,13 +288,12 @@ static inline bool esr_is_ptrauth_trap(u32 esr) DECLARE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt); -static inline bool __hyp_handle_ptrauth(struct kvm_vcpu *vcpu) +static bool kvm_hyp_handle_ptrauth(struct kvm_vcpu *vcpu, u64 *exit_code) { struct kvm_cpu_context *ctxt; u64 val; - if (!vcpu_has_ptrauth(vcpu) || - !esr_is_ptrauth_trap(kvm_vcpu_get_esr(vcpu))) + if (!vcpu_has_ptrauth(vcpu)) return false; ctxt = this_cpu_ptr(&kvm_hyp_ctxt); @@ -409,6 +312,92 @@ static inline bool __hyp_handle_ptrauth(struct kvm_vcpu *vcpu) return true; } +static bool kvm_hyp_handle_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code) +{ + if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM) && + handle_tx2_tvm(vcpu)) + return true; + + if (static_branch_unlikely(&vgic_v3_cpuif_trap) && + __vgic_v3_perform_cpuif_access(vcpu) == 1) + return true; + + if (esr_is_ptrauth_trap(kvm_vcpu_get_esr(vcpu))) + return kvm_hyp_handle_ptrauth(vcpu, exit_code); + + return false; +} + +static bool kvm_hyp_handle_cp15_32(struct kvm_vcpu *vcpu, u64 *exit_code) +{ + if (static_branch_unlikely(&vgic_v3_cpuif_trap) && + __vgic_v3_perform_cpuif_access(vcpu) == 1) + return true; + + return false; +} + +static bool kvm_hyp_handle_iabt_low(struct kvm_vcpu *vcpu, u64 *exit_code) +{ + if (!__populate_fault_info(vcpu)) + return true; + + return false; +} + +static bool kvm_hyp_handle_dabt_low(struct kvm_vcpu *vcpu, u64 *exit_code) +{ + if (!__populate_fault_info(vcpu)) + return true; + + if (static_branch_unlikely(&vgic_v2_cpuif_trap)) { + bool valid; + + valid = kvm_vcpu_trap_get_fault_type(vcpu) == FSC_FAULT && + kvm_vcpu_dabt_isvalid(vcpu) && + !kvm_vcpu_abt_issea(vcpu) && + !kvm_vcpu_abt_iss1tw(vcpu); + + if (valid) { + int ret = __vgic_v2_perform_cpuif_access(vcpu); + + if (ret == 1) + return true; + + /* Promote an illegal access to an SError.*/ + if (ret == -1) + *exit_code = ARM_EXCEPTION_EL1_SERROR; + } + } + + return false; +} + +typedef bool (*exit_handler_fn)(struct kvm_vcpu *, u64 *); + +static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu); + +static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code); + +/* + * Allow the hypervisor to handle the exit with an exit handler if it has one. + * + * Returns true if the hypervisor handled the exit, and control should go back + * to the guest, or false if it hasn't. + */ +static inline bool kvm_hyp_handle_exit(struct kvm_vcpu *vcpu, u64 *exit_code) +{ + const exit_handler_fn *handlers = kvm_get_exit_handler_array(vcpu); + exit_handler_fn fn; + + fn = handlers[kvm_vcpu_trap_get_class(vcpu)]; + + if (fn) + return fn(vcpu, exit_code); + + return false; +} + /* * Return true when we were able to fixup the guest exit and should return to * the guest, false when we should restore the host state and return to the @@ -416,6 +405,18 @@ static inline bool __hyp_handle_ptrauth(struct kvm_vcpu *vcpu) */ static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) { + /* + * Save PSTATE early so that we can evaluate the vcpu mode + * early on. + */ + vcpu->arch.ctxt.regs.pstate = read_sysreg_el2(SYS_SPSR); + + /* + * Check whether we want to repaint the state one way or + * another. + */ + early_exit_filter(vcpu, exit_code); + if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ) vcpu->arch.fault.esr_el2 = read_sysreg_el2(SYS_ESR); @@ -443,59 +444,9 @@ static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) if (*exit_code != ARM_EXCEPTION_TRAP) goto exit; - if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM) && - kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 && - handle_tx2_tvm(vcpu)) - goto guest; - - /* - * We trap the first access to the FP/SIMD to save the host context - * and restore the guest context lazily. - * If FP/SIMD is not implemented, handle the trap and inject an - * undefined instruction exception to the guest. - * Similarly for trapped SVE accesses. - */ - if (__hyp_handle_fpsimd(vcpu)) - goto guest; - - if (__hyp_handle_ptrauth(vcpu)) + /* Check if there's an exit handler and allow it to handle the exit. */ + if (kvm_hyp_handle_exit(vcpu, exit_code)) goto guest; - - if (!__populate_fault_info(vcpu)) - goto guest; - - if (static_branch_unlikely(&vgic_v2_cpuif_trap)) { - bool valid; - - valid = kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_DABT_LOW && - kvm_vcpu_trap_get_fault_type(vcpu) == FSC_FAULT && - kvm_vcpu_dabt_isvalid(vcpu) && - !kvm_vcpu_abt_issea(vcpu) && - !kvm_vcpu_abt_iss1tw(vcpu); - - if (valid) { - int ret = __vgic_v2_perform_cpuif_access(vcpu); - - if (ret == 1) - goto guest; - - /* Promote an illegal access to an SError.*/ - if (ret == -1) - *exit_code = ARM_EXCEPTION_EL1_SERROR; - - goto exit; - } - } - - if (static_branch_unlikely(&vgic_v3_cpuif_trap) && - (kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 || - kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_CP15_32)) { - int ret = __vgic_v3_perform_cpuif_access(vcpu); - - if (ret == 1) - goto guest; - } - exit: /* Return to the host kernel and handle the exit */ return false; diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h index de7e14c862e6c9b5415df6a7daf815142a302a16..7ecca8b078519fd315c92cbc7cc059c0d2269bed 100644 --- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h +++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h @@ -70,7 +70,12 @@ static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt) static inline void __sysreg_save_el2_return_state(struct kvm_cpu_context *ctxt) { ctxt->regs.pc = read_sysreg_el2(SYS_ELR); - ctxt->regs.pstate = read_sysreg_el2(SYS_SPSR); + /* + * Guest PSTATE gets saved at guest fixup time in all + * cases. We still need to handle the nVHE host side here. + */ + if (!has_vhe() && ctxt->__hyp_running_vcpu) + ctxt->regs.pstate = read_sysreg_el2(SYS_SPSR); if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) ctxt_sys_reg(ctxt, DISR_EL1) = read_sysreg_s(SYS_VDISR_EL2); diff --git a/arch/arm64/kvm/hyp/include/nvhe/ffa.h b/arch/arm64/kvm/hyp/include/nvhe/ffa.h new file mode 100644 index 0000000000000000000000000000000000000000..39666259820f2872654f01a1347bcff681eb182e --- /dev/null +++ b/arch/arm64/kvm/hyp/include/nvhe/ffa.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2022 - Google LLC + * Author: Andrew Walbran + */ +#ifndef __KVM_HYP_FFA_H +#define __KVM_HYP_FFA_H + +#include +#include + +#define FFA_MIN_FUNC_NUM 0x60 +#define FFA_MAX_FUNC_NUM 0x7F + +struct kvm_ffa_buffers { + hyp_spinlock_t lock; + void *tx; + void *rx; +}; + +int hyp_ffa_init(void *pages); +bool kvm_host_ffa_handler(struct kvm_cpu_context *host_ctxt); + +#endif /* __KVM_HYP_FFA_H */ diff --git a/arch/arm64/kvm/hyp/include/nvhe/gfp.h b/arch/arm64/kvm/hyp/include/nvhe/gfp.h index fb0f523d14921c6c79b246e63111f4c1cbbf0767..9330b13075f8a0e30cf4bb2af68f7d66cea1ab39 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/gfp.h +++ b/arch/arm64/kvm/hyp/include/nvhe/gfp.h @@ -7,7 +7,7 @@ #include #include -#define HYP_NO_ORDER USHRT_MAX +#define HYP_NO_ORDER 0xff struct hyp_pool { /* @@ -19,11 +19,12 @@ struct hyp_pool { struct list_head free_area[MAX_ORDER]; phys_addr_t range_start; phys_addr_t range_end; - unsigned short max_order; + u8 max_order; }; /* Allocation */ -void *hyp_alloc_pages(struct hyp_pool *pool, unsigned short order); +void *hyp_alloc_pages(struct hyp_pool *pool, u8 order); +void hyp_split_page(struct hyp_page *page); void hyp_get_page(struct hyp_pool *pool, void *addr); void hyp_put_page(struct hyp_pool *pool, void *addr); diff --git a/arch/arm64/kvm/hyp/include/nvhe/iommu.h b/arch/arm64/kvm/hyp/include/nvhe/iommu.h new file mode 100644 index 0000000000000000000000000000000000000000..69bce1f017174dd49d9e7986155dd555c4e78daf --- /dev/null +++ b/arch/arm64/kvm/hyp/include/nvhe/iommu.h @@ -0,0 +1,100 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef __ARM64_KVM_NVHE_IOMMU_H__ +#define __ARM64_KVM_NVHE_IOMMU_H__ + +#include +#include + +#include + +struct pkvm_iommu; + +struct pkvm_iommu_ops { + /* + * Global driver initialization called before devices are registered. + * Driver-specific arguments are passed in a buffer shared by the host. + * The buffer memory has been pinned in EL2 but host retains R/W access. + * Extra care must be taken when reading from it to avoid TOCTOU bugs. + * If the driver maintains its own page tables, it is expected to + * initialize them to all memory owned by the host. + * Driver initialization lock held during callback. + */ + int (*init)(void *data, size_t size); + + /* + * Driver-specific validation of a device that is being registered. + * All fields of the device struct have been populated. + * Called with the host lock held. + */ + int (*validate)(struct pkvm_iommu *dev); + + /* + * Validation of a new child device that is being register by + * the parent device the child selected. Called with the host lock held. + */ + int (*validate_child)(struct pkvm_iommu *dev, struct pkvm_iommu *child); + + /* + * Callback to apply a host stage-2 mapping change at driver level. + * Called before 'host_stage2_idmap_apply' with host lock held. + */ + void (*host_stage2_idmap_prepare)(phys_addr_t start, phys_addr_t end, + enum kvm_pgtable_prot prot); + + /* + * Callback to apply a host stage-2 mapping change at device level. + * Called after 'host_stage2_idmap_prepare' with host lock held. + */ + void (*host_stage2_idmap_apply)(struct pkvm_iommu *dev, + phys_addr_t start, phys_addr_t end); + + /* Power management callbacks. Called with host lock held. */ + int (*suspend)(struct pkvm_iommu *dev); + int (*resume)(struct pkvm_iommu *dev); + + /* + * Host data abort handler callback. Called with host lock held. + * Returns true if the data abort has been handled. + */ + bool (*host_dabt_handler)(struct pkvm_iommu *dev, + struct kvm_cpu_context *host_ctxt, + u32 esr, size_t off); + + /* Amount of memory allocated per-device for use by the driver. */ + size_t data_size; +}; + +struct pkvm_iommu { + struct pkvm_iommu *parent; + struct list_head list; + struct list_head siblings; + struct list_head children; + unsigned long id; + const struct pkvm_iommu_ops *ops; + phys_addr_t pa; + void *va; + size_t size; + bool powered; + char data[]; +}; + +int __pkvm_iommu_driver_init(enum pkvm_iommu_driver_id id, void *data, size_t size); +int __pkvm_iommu_register(unsigned long dev_id, + enum pkvm_iommu_driver_id drv_id, + phys_addr_t dev_pa, size_t dev_size, + unsigned long parent_id, + void *kern_mem_va, size_t mem_size); +int __pkvm_iommu_pm_notify(unsigned long dev_id, + enum pkvm_iommu_pm_event event); +int __pkvm_iommu_finalize(void); +int pkvm_iommu_host_stage2_adjust_range(phys_addr_t addr, phys_addr_t *start, + phys_addr_t *end); +bool pkvm_iommu_host_dabt_handler(struct kvm_cpu_context *host_ctxt, u32 esr, + phys_addr_t fault_pa); +void pkvm_iommu_host_stage2_idmap(phys_addr_t start, phys_addr_t end, + enum kvm_pgtable_prot prot); + +extern const struct pkvm_iommu_ops pkvm_s2mpu_ops; +extern const struct pkvm_iommu_ops pkvm_sysmmu_sync_ops; + +#endif /* __ARM64_KVM_NVHE_IOMMU_H__ */ diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h index b58c910babafdf9a2e778b9d55c210b436c8c79a..e796ff5e74fb16fd698e48ab751ddf36b458fb83 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h @@ -8,8 +8,10 @@ #define __KVM_NVHE_MEM_PROTECT__ #include #include +#include #include #include +#include #include /* @@ -24,6 +26,11 @@ enum pkvm_page_state { PKVM_PAGE_OWNED = 0ULL, PKVM_PAGE_SHARED_OWNED = KVM_PGTABLE_PROT_SW0, PKVM_PAGE_SHARED_BORROWED = KVM_PGTABLE_PROT_SW1, + __PKVM_PAGE_RESERVED = KVM_PGTABLE_PROT_SW0 | + KVM_PGTABLE_PROT_SW1, + + /* Meta-states which aren't encoded directly in the PTE's SW bits */ + PKVM_NOPAGE, }; #define PKVM_PAGE_STATE_PROT_MASK (KVM_PGTABLE_PROT_SW0 | KVM_PGTABLE_PROT_SW1) @@ -42,21 +49,51 @@ struct host_kvm { struct kvm_arch arch; struct kvm_pgtable pgt; struct kvm_pgtable_mm_ops mm_ops; + struct kvm_ffa_buffers ffa; hyp_spinlock_t lock; }; extern struct host_kvm host_kvm; -extern const u8 pkvm_hyp_id; +typedef u32 pkvm_id; +static const pkvm_id pkvm_host_id = 0; +static const pkvm_id pkvm_hyp_id = (1 << 16); +static const pkvm_id pkvm_ffa_id = pkvm_hyp_id + 1; /* Secure world */ + +extern unsigned long hyp_nr_cpus; int __pkvm_prot_finalize(void); int __pkvm_host_share_hyp(u64 pfn); +int __pkvm_host_unshare_hyp(u64 pfn); +int __pkvm_host_reclaim_page(u64 pfn); +int __pkvm_host_donate_hyp(u64 pfn, u64 nr_pages); +int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages); +int __pkvm_host_share_guest(u64 pfn, u64 gfn, struct kvm_vcpu *vcpu); +int __pkvm_host_donate_guest(u64 pfn, u64 gfn, struct kvm_vcpu *vcpu); +int __pkvm_guest_share_host(struct kvm_vcpu *vcpu, u64 ipa); +int __pkvm_guest_unshare_host(struct kvm_vcpu *vcpu, u64 ipa); +int __pkvm_host_share_ffa(u64 pfn, u64 nr_pages); +int __pkvm_host_unshare_ffa(u64 pfn, u64 nr_pages); +int __pkvm_install_ioguard_page(struct kvm_vcpu *vcpu, u64 ipa); +int __pkvm_remove_ioguard_page(struct kvm_vcpu *vcpu, u64 ipa); +bool __pkvm_check_ioguard_page(struct kvm_vcpu *vcpu); bool addr_is_memory(phys_addr_t phys); int host_stage2_idmap_locked(phys_addr_t addr, u64 size, enum kvm_pgtable_prot prot); -int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id); +int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, pkvm_id owner_id); +int host_stage2_unmap_dev_locked(phys_addr_t start, u64 size); int kvm_host_prepare_stage2(void *pgt_pool_base); +int kvm_guest_prepare_stage2(struct kvm_shadow_vm *vm, void *pgd); void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt); +int hyp_pin_shared_mem(void *from, void *to); +void hyp_unpin_shared_mem(void *from, void *to); +int refill_memcache(struct kvm_hyp_memcache *mc, unsigned long min_pages, + struct kvm_hyp_memcache *host_mc); +void reclaim_guest_pages(struct kvm_shadow_vm *vm, struct kvm_hyp_memcache *mc); + +void psci_mem_protect_inc(void); +void psci_mem_protect_dec(void); + static __always_inline void __load_host_stage2(void) { if (static_branch_likely(&kvm_protected_mode_initialized)) diff --git a/arch/arm64/kvm/hyp/include/nvhe/memory.h b/arch/arm64/kvm/hyp/include/nvhe/memory.h index 083b401e5840e2d95bd8a0b8de8c33a65896946c..8fd0418e04ff39954a03e34dc1dbe02d4c429494 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/memory.h +++ b/arch/arm64/kvm/hyp/include/nvhe/memory.h @@ -7,9 +7,17 @@ #include +/* + * Accesses to struct hyp_page flags must be serialized by the host stage-2 + * page-table lock due to the lack of atomics at EL2. + */ +#define HOST_PAGE_NEED_POISONING BIT(0) +#define HOST_PAGE_PENDING_RECLAIM BIT(1) + struct hyp_page { unsigned short refcount; - unsigned short order; + u8 order; + u8 flags; }; extern s64 hyp_physvirt_offset; @@ -46,4 +54,27 @@ static inline int hyp_page_count(void *addr) return p->refcount; } +static inline void hyp_page_ref_inc(struct hyp_page *p) +{ + BUG_ON(p->refcount == USHRT_MAX); + p->refcount++; +} + +static inline void hyp_page_ref_dec(struct hyp_page *p) +{ + BUG_ON(!p->refcount); + p->refcount--; +} + +static inline int hyp_page_ref_dec_and_test(struct hyp_page *p) +{ + hyp_page_ref_dec(p); + return (p->refcount == 0); +} + +static inline void hyp_set_page_refcounted(struct hyp_page *p) +{ + BUG_ON(p->refcount); + p->refcount = 1; +} #endif /* __KVM_HYP_MEMORY_H */ diff --git a/arch/arm64/kvm/hyp/include/nvhe/mm.h b/arch/arm64/kvm/hyp/include/nvhe/mm.h index c9a8f535212ea277a70d6875124a4241099d726f..45b04bfee171e68d2920fd2f71ac031e4cb2df2e 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mm.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mm.h @@ -10,87 +10,20 @@ #include #include -#define HYP_MEMBLOCK_REGIONS 128 -extern struct memblock_region kvm_nvhe_sym(hyp_memory)[]; -extern unsigned int kvm_nvhe_sym(hyp_memblock_nr); extern struct kvm_pgtable pkvm_pgtable; extern hyp_spinlock_t pkvm_pgd_lock; -extern struct hyp_pool hpool; -extern u64 __io_map_base; + +int hyp_create_pcpu_fixmap(void); +void *hyp_fixmap_map(phys_addr_t phys); +int hyp_fixmap_unmap(void); int hyp_create_idmap(u32 hyp_va_bits); int hyp_map_vectors(void); -int hyp_back_vmemmap(phys_addr_t phys, unsigned long size, phys_addr_t back); +int hyp_back_vmemmap(phys_addr_t back); int pkvm_cpu_set_vector(enum arm64_hyp_spectre_vector slot); int pkvm_create_mappings(void *from, void *to, enum kvm_pgtable_prot prot); int pkvm_create_mappings_locked(void *from, void *to, enum kvm_pgtable_prot prot); unsigned long __pkvm_create_private_mapping(phys_addr_t phys, size_t size, enum kvm_pgtable_prot prot); -static inline void hyp_vmemmap_range(phys_addr_t phys, unsigned long size, - unsigned long *start, unsigned long *end) -{ - unsigned long nr_pages = size >> PAGE_SHIFT; - struct hyp_page *p = hyp_phys_to_page(phys); - - *start = (unsigned long)p; - *end = *start + nr_pages * sizeof(struct hyp_page); - *start = ALIGN_DOWN(*start, PAGE_SIZE); - *end = ALIGN(*end, PAGE_SIZE); -} - -static inline unsigned long __hyp_pgtable_max_pages(unsigned long nr_pages) -{ - unsigned long total = 0, i; - - /* Provision the worst case scenario */ - for (i = 0; i < KVM_PGTABLE_MAX_LEVELS; i++) { - nr_pages = DIV_ROUND_UP(nr_pages, PTRS_PER_PTE); - total += nr_pages; - } - - return total; -} - -static inline unsigned long __hyp_pgtable_total_pages(void) -{ - unsigned long res = 0, i; - - /* Cover all of memory with page-granularity */ - for (i = 0; i < kvm_nvhe_sym(hyp_memblock_nr); i++) { - struct memblock_region *reg = &kvm_nvhe_sym(hyp_memory)[i]; - res += __hyp_pgtable_max_pages(reg->size >> PAGE_SHIFT); - } - - return res; -} - -static inline unsigned long hyp_s1_pgtable_pages(void) -{ - unsigned long res; - - res = __hyp_pgtable_total_pages(); - - /* Allow 1 GiB for private mappings */ - res += __hyp_pgtable_max_pages(SZ_1G >> PAGE_SHIFT); - - return res; -} - -static inline unsigned long host_s2_pgtable_pages(void) -{ - unsigned long res; - - /* - * Include an extra 16 pages to safely upper-bound the worst case of - * concatenated pgds. - */ - res = __hyp_pgtable_total_pages() + 16; - - /* Allow 1 GiB for MMIO mappings */ - res += __hyp_pgtable_max_pages(SZ_1G >> PAGE_SHIFT); - - return res; -} - #endif /* __KVM_HYP_MM_H */ diff --git a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h new file mode 100644 index 0000000000000000000000000000000000000000..80ecc831fc313f720657f83baf9887622e283b67 --- /dev/null +++ b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h @@ -0,0 +1,103 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2021 Google LLC + * Author: Fuad Tabba + */ + +#ifndef __ARM64_KVM_NVHE_PKVM_H__ +#define __ARM64_KVM_NVHE_PKVM_H__ + +#include + +#include +#include + +/* + * A container for the vcpu state that hyp needs to maintain for protected VMs. + */ +struct shadow_vcpu_state { + struct kvm_shadow_vm *vm; + struct kvm_vcpu vcpu; +}; + +/* + * Holds the relevant data for running a protected vm. + */ +struct kvm_shadow_vm { + /* A unique id to the shadow structs in the hyp shadow area. */ + int shadow_handle; + + /* Number of vcpus for the vm. */ + int created_vcpus; + + /* Pointers to the shadow vcpus of the shadow vm. */ + struct kvm_vcpu *vcpus[KVM_MAX_VCPUS]; + + /* Primary vCPU pending entry to the pvmfw */ + struct kvm_vcpu *pvmfw_entry_vcpu; + + /* The host's kvm structure. */ + struct kvm *host_kvm; + + /* The total size of the donated shadow area. */ + size_t shadow_area_size; + + struct kvm_arch arch; + struct kvm_pgtable pgt; + struct kvm_pgtable_mm_ops mm_ops; + struct hyp_pool pool; + hyp_spinlock_t lock; + + /* Array of the shadow state per vcpu. */ + struct shadow_vcpu_state shadow_vcpus[0]; +}; + +static inline bool vcpu_is_protected(struct kvm_vcpu *vcpu) +{ + if (!is_protected_kvm_enabled()) + return false; + + return vcpu->arch.pkvm.shadow_vm->arch.pkvm.enabled; +} + +extern phys_addr_t pvmfw_base; +extern phys_addr_t pvmfw_size; + +void hyp_shadow_table_init(void *tbl); +int __pkvm_init_shadow(struct kvm *kvm, void *shadow_va, size_t size, void *pgd); +int __pkvm_teardown_shadow(int shadow_handle); +struct kvm_vcpu *get_shadow_vcpu(int shadow_handle, unsigned int vcpu_idx); +void put_shadow_vcpu(struct kvm_vcpu *vcpu); + +u64 pvm_read_id_reg(const struct kvm_vcpu *vcpu, u32 id); +bool kvm_handle_pvm_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code); +bool kvm_handle_pvm_restricted(struct kvm_vcpu *vcpu, u64 *exit_code); +void kvm_reset_pvm_sys_regs(struct kvm_vcpu *vcpu); +int kvm_check_pvm_sysreg_table(void); + +void pkvm_reset_vcpu(struct kvm_vcpu *vcpu); + +bool kvm_handle_pvm_hvc64(struct kvm_vcpu *vcpu, u64 *exit_code); + +struct kvm_vcpu *pvm_mpidr_to_vcpu(struct kvm_shadow_vm *vm, unsigned long mpidr); + +static inline bool pvm_has_pvmfw(struct kvm_shadow_vm *vm) +{ + return vm->arch.pkvm.pvmfw_load_addr != PVMFW_INVALID_LOAD_ADDR; +} + +static inline bool ipa_in_pvmfw_region(struct kvm_shadow_vm *vm, u64 ipa) +{ + struct kvm_protected_vm *pkvm = &vm->arch.pkvm; + + if (!pvm_has_pvmfw(vm)) + return false; + + return ipa - pkvm->pvmfw_load_addr < pvmfw_size; +} + +int pkvm_load_pvmfw_pages(struct kvm_shadow_vm *vm, u64 ipa, phys_addr_t phys, + u64 size); +void pkvm_clear_pvmfw_pages(void); + +#endif /* __ARM64_KVM_NVHE_PKVM_H__ */ diff --git a/arch/arm64/kvm/hyp/include/nvhe/spinlock.h b/arch/arm64/kvm/hyp/include/nvhe/spinlock.h index 4652fd04bdbee5b94f4922f010585bc4fa6a65e6..7c7ea8c55405f589f741a414f71d486120710092 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/spinlock.h +++ b/arch/arm64/kvm/hyp/include/nvhe/spinlock.h @@ -28,9 +28,17 @@ typedef union hyp_spinlock { }; } hyp_spinlock_t; +#define __HYP_SPIN_LOCK_INITIALIZER \ + { .__val = 0 } + +#define __HYP_SPIN_LOCK_UNLOCKED \ + ((hyp_spinlock_t) __HYP_SPIN_LOCK_INITIALIZER) + +#define DEFINE_HYP_SPINLOCK(x) hyp_spinlock_t x = __HYP_SPIN_LOCK_UNLOCKED + #define hyp_spin_lock_init(l) \ do { \ - *(l) = (hyp_spinlock_t){ .__val = 0 }; \ + *(l) = __HYP_SPIN_LOCK_UNLOCKED; \ } while (0) static inline void hyp_spin_lock(hyp_spinlock_t *lock) diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile index 73bd835d6aad1ab3a40e6c708f830329219cdfcf..5145c238ff4d66747bd5bb0135b3c4edde97c2f4 100644 --- a/arch/arm64/kvm/hyp/nvhe/Makefile +++ b/arch/arm64/kvm/hyp/nvhe/Makefile @@ -14,11 +14,13 @@ lib-objs := $(addprefix ../../../lib/, $(lib-objs)) obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o \ hyp-main.o hyp-smp.o psci-relay.o early_alloc.o stub.o page_alloc.o \ - cache.o setup.o mm.o mem_protect.o + cache.o ffa.o setup.o mm.o mem_protect.o sys_regs.o pkvm.o iommu.o obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \ ../fpsimd.o ../hyp-entry.o ../exception.o ../pgtable.o obj-y += $(lib-objs) +obj-$(CONFIG_KVM_S2MPU) += iommu/s2mpu.o + ## ## Build rules for compiling nVHE hyp code ## Output of this folder is `kvm_nvhe.o`, a partially linked object @@ -54,7 +56,7 @@ $(obj)/kvm_nvhe.tmp.o: $(obj)/hyp.lds $(addprefix $(obj)/,$(hyp-obj)) FORCE # runtime. Because the hypervisor is part of the kernel binary, relocations # produce a kernel VA. We enumerate relocations targeting hyp at build time # and convert the kernel VAs at those positions to hyp VAs. -$(obj)/hyp-reloc.S: $(obj)/kvm_nvhe.tmp.o $(obj)/gen-hyprel +$(obj)/hyp-reloc.S: $(obj)/kvm_nvhe.tmp.o $(obj)/gen-hyprel FORCE $(call if_changed,hyprel) # 5) Compile hyp-reloc.S and link it into the existing partially linked object. diff --git a/arch/arm64/kvm/hyp/nvhe/cache.S b/arch/arm64/kvm/hyp/nvhe/cache.S index 958734f4d6b0ed820ee7fc006d2fd38d2cf0a105..4c447f2a31e16af9844d7d37eebc881a797e0157 100644 --- a/arch/arm64/kvm/hyp/nvhe/cache.S +++ b/arch/arm64/kvm/hyp/nvhe/cache.S @@ -11,3 +11,13 @@ SYM_FUNC_START_PI(dcache_clean_inval_poc) dcache_by_line_op civac, sy, x0, x1, x2, x3 ret SYM_FUNC_END_PI(dcache_clean_inval_poc) + +SYM_FUNC_START_PI(icache_inval_pou) +alternative_if ARM64_HAS_CACHE_DIC + isb + ret +alternative_else_nop_endif + + invalidate_icache_by_line x0, x1, x2, x3 + ret +SYM_FUNC_END_PI(icache_inval_pou) diff --git a/arch/arm64/kvm/hyp/nvhe/early_alloc.c b/arch/arm64/kvm/hyp/nvhe/early_alloc.c index 1306c430ab876c7b7fee82898233acfb5f2f2442..00de04153cc648de32c23f6e0c26fcbf23f02553 100644 --- a/arch/arm64/kvm/hyp/nvhe/early_alloc.c +++ b/arch/arm64/kvm/hyp/nvhe/early_alloc.c @@ -43,6 +43,9 @@ void *hyp_early_alloc_page(void *arg) return hyp_early_alloc_contig(1); } +static void hyp_early_alloc_get_page(void *addr) { } +static void hyp_early_alloc_put_page(void *addr) { } + void hyp_early_alloc_init(void *virt, unsigned long size) { base = cur = (unsigned long)virt; @@ -51,4 +54,6 @@ void hyp_early_alloc_init(void *virt, unsigned long size) hyp_early_alloc_mm_ops.zalloc_page = hyp_early_alloc_page; hyp_early_alloc_mm_ops.phys_to_virt = hyp_phys_to_virt; hyp_early_alloc_mm_ops.virt_to_phys = hyp_virt_to_phys; + hyp_early_alloc_mm_ops.get_page = hyp_early_alloc_get_page; + hyp_early_alloc_mm_ops.put_page = hyp_early_alloc_put_page; } diff --git a/arch/arm64/kvm/hyp/nvhe/ffa.c b/arch/arm64/kvm/hyp/nvhe/ffa.c new file mode 100644 index 0000000000000000000000000000000000000000..1fd6c6e267babca8bb2479ed7221667ddd6e1f65 --- /dev/null +++ b/arch/arm64/kvm/hyp/nvhe/ffa.c @@ -0,0 +1,734 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * FF-A v1.0 proxy to filter out invalid memory-sharing SMC calls issued by + * the host. FF-A is a slightly more palatable abbreviation of "Arm Firmware + * Framework for Arm A-profile", which is specified by Arm in document + * number DEN0077. + * + * Copyright (C) 2022 - Google LLC + * Author: Andrew Walbran + * + * This driver hooks into the SMC trapping logic for the host and intercepts + * all calls falling within the FF-A range. Each call is either: + * + * - Forwarded on unmodified to the SPMD at EL3 + * - Rejected as "unsupported" + * - Accompanied by a host stage-2 page-table check/update and reissued + * + * Consequently, any attempts by the host to make guest memory pages + * accessible to the secure world using FF-A will be detected either here + * (in the case that the memory is already owned by the guest) or during + * donation to the guest (in the case that the memory was previously shared + * with the secure world). + * + * To allow the rolling-back of page-table updates and FF-A calls in the + * event of failure, operations involving the RXTX buffers are locked for + * the duration and are therefore serialised. + */ + +#include +#include +#include + +#include +#include +#include +#include + +/* + * "ID value 0 must be returned at the Non-secure physical FF-A instance" + * We share this ID with the host. + */ +#define HOST_FFA_ID 0 + +/* + * A buffer to hold the maximum descriptor size we can see from the host, + * which is required when the SPMD returns a fragmented FFA_MEM_RETRIEVE_RESP + * when resolving the handle on the reclaim path. + */ +struct kvm_ffa_descriptor_buffer { + void *buf; + size_t len; +}; + +static struct kvm_ffa_descriptor_buffer ffa_desc_buf; + +/* + * Note that we don't currently lock these buffers explicitly, instead + * relying on the locking of the host FFA buffers as we only have one + * client. + */ +static struct kvm_ffa_buffers ffa_buffers; + +static void ffa_to_smccc_error(struct arm_smccc_res *res, u64 ffa_errno) +{ + *res = (struct arm_smccc_res) { + .a0 = FFA_ERROR, + .a2 = ffa_errno, + }; +} + +static void ffa_to_smccc_res_prop(struct arm_smccc_res *res, int ret, u64 prop) +{ + if (ret == FFA_RET_SUCCESS) { + *res = (struct arm_smccc_res) { .a0 = FFA_SUCCESS, + .a2 = prop }; + } else { + ffa_to_smccc_error(res, ret); + } +} + +static void ffa_to_smccc_res(struct arm_smccc_res *res, int ret) +{ + ffa_to_smccc_res_prop(res, ret, 0); +} + +static void ffa_set_retval(struct kvm_cpu_context *ctxt, + struct arm_smccc_res *res) +{ + cpu_reg(ctxt, 0) = res->a0; + cpu_reg(ctxt, 1) = res->a1; + cpu_reg(ctxt, 2) = res->a2; + cpu_reg(ctxt, 3) = res->a3; +} + +static bool is_ffa_call(u64 func_id) +{ + return ARM_SMCCC_IS_FAST_CALL(func_id) && + ARM_SMCCC_OWNER_NUM(func_id) == ARM_SMCCC_OWNER_STANDARD && + ARM_SMCCC_FUNC_NUM(func_id) >= FFA_MIN_FUNC_NUM && + ARM_SMCCC_FUNC_NUM(func_id) <= FFA_MAX_FUNC_NUM; +} + +static int spmd_map_ffa_buffers(u64 ffa_page_count) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_smc(FFA_FN64_RXTX_MAP, + hyp_virt_to_phys(ffa_buffers.tx), + hyp_virt_to_phys(ffa_buffers.rx), + ffa_page_count, + 0, 0, 0, 0, + &res); + + return res.a0 == FFA_SUCCESS ? FFA_RET_SUCCESS : res.a2; +} + +static int spmd_unmap_ffa_buffers(void) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_smc(FFA_RXTX_UNMAP, + HOST_FFA_ID, + 0, 0, 0, 0, 0, 0, + &res); + + return res.a0 == FFA_SUCCESS ? FFA_RET_SUCCESS : res.a2; +} + +static void spmd_mem_frag_tx(struct arm_smccc_res *res, u32 handle_lo, + u32 handle_hi, u32 fraglen, u32 endpoint_id) +{ + arm_smccc_1_1_smc(FFA_MEM_FRAG_TX, + handle_lo, handle_hi, fraglen, endpoint_id, + 0, 0, 0, + res); +} + +static void spmd_mem_frag_rx(struct arm_smccc_res *res, u32 handle_lo, + u32 handle_hi, u32 fragoff) +{ + arm_smccc_1_1_smc(FFA_MEM_FRAG_RX, + handle_lo, handle_hi, fragoff, HOST_FFA_ID, + 0, 0, 0, + res); +} + +static void spmd_mem_xfer(struct arm_smccc_res *res, u64 func_id, u32 len, + u32 fraglen) +{ + arm_smccc_1_1_smc(func_id, len, fraglen, + 0, 0, 0, 0, 0, + res); +} + +static void spmd_mem_reclaim(struct arm_smccc_res *res, u32 handle_lo, + u32 handle_hi, u32 flags) +{ + arm_smccc_1_1_smc(FFA_MEM_RECLAIM, + handle_lo, handle_hi, flags, + 0, 0, 0, 0, + res); +} + +static void spmd_retrieve_req(struct arm_smccc_res *res, u32 len) +{ + arm_smccc_1_1_smc(FFA_FN64_MEM_RETRIEVE_REQ, + len, len, + 0, 0, 0, 0, 0, + res); +} + +static void do_ffa_rxtx_map(struct arm_smccc_res *res, + struct kvm_cpu_context *ctxt) +{ + DECLARE_REG(phys_addr_t, tx, ctxt, 1); + DECLARE_REG(phys_addr_t, rx, ctxt, 2); + DECLARE_REG(u32, npages, ctxt, 3); + int ret = 0; + void *rx_virt, *tx_virt; + + if (npages != (KVM_FFA_MBOX_NR_PAGES * PAGE_SIZE) / FFA_PAGE_SIZE) { + ret = FFA_RET_INVALID_PARAMETERS; + goto out; + } + + if (!PAGE_ALIGNED(tx) || !PAGE_ALIGNED(rx)) { + ret = FFA_RET_INVALID_PARAMETERS; + goto out; + } + + hyp_spin_lock(&host_kvm.ffa.lock); + if (host_kvm.ffa.tx) { + ret = FFA_RET_DENIED; + goto out_unlock; + } + + ret = spmd_map_ffa_buffers(npages); + if (ret) + goto out_unlock; + + ret = __pkvm_host_share_hyp(hyp_phys_to_pfn(tx)); + if (ret) { + ret = FFA_RET_INVALID_PARAMETERS; + goto err_unmap; + } + + ret = __pkvm_host_share_hyp(hyp_phys_to_pfn(rx)); + if (ret) { + ret = FFA_RET_INVALID_PARAMETERS; + goto err_unshare_tx; + } + + tx_virt = hyp_phys_to_virt(tx); + ret = hyp_pin_shared_mem(tx_virt, tx_virt + 1); + if (ret) { + ret = FFA_RET_INVALID_PARAMETERS; + goto err_unshare_rx; + } + + rx_virt = hyp_phys_to_virt(rx); + ret = hyp_pin_shared_mem(rx_virt, rx_virt + 1); + if (ret) { + ret = FFA_RET_INVALID_PARAMETERS; + goto err_unpin_tx; + } + + host_kvm.ffa.tx = tx_virt; + host_kvm.ffa.rx = rx_virt; + +out_unlock: + hyp_spin_unlock(&host_kvm.ffa.lock); +out: + ffa_to_smccc_res(res, ret); + return; + +err_unpin_tx: + hyp_unpin_shared_mem(tx_virt, tx_virt + 1); +err_unshare_rx: + __pkvm_host_unshare_hyp(hyp_phys_to_pfn(rx)); +err_unshare_tx: + __pkvm_host_unshare_hyp(hyp_phys_to_pfn(tx)); +err_unmap: + spmd_unmap_ffa_buffers(); + goto out_unlock; +} + +static void do_ffa_rxtx_unmap(struct arm_smccc_res *res, + struct kvm_cpu_context *ctxt) +{ + DECLARE_REG(u32, id, ctxt, 1); + int ret = 0; + + if (id != HOST_FFA_ID) { + ret = FFA_RET_INVALID_PARAMETERS; + goto out; + } + + hyp_spin_lock(&host_kvm.ffa.lock); + if (!host_kvm.ffa.tx) { + ret = FFA_RET_INVALID_PARAMETERS; + goto out_unlock; + } + + hyp_unpin_shared_mem(host_kvm.ffa.tx, host_kvm.ffa.tx + 1); + WARN_ON(__pkvm_host_unshare_hyp(hyp_virt_to_pfn(host_kvm.ffa.tx))); + host_kvm.ffa.tx = NULL; + + hyp_unpin_shared_mem(host_kvm.ffa.rx, host_kvm.ffa.rx + 1); + WARN_ON(__pkvm_host_unshare_hyp(hyp_virt_to_pfn(host_kvm.ffa.rx))); + host_kvm.ffa.rx = NULL; + + spmd_unmap_ffa_buffers(); + +out_unlock: + hyp_spin_unlock(&host_kvm.ffa.lock); +out: + ffa_to_smccc_res(res, ret); +} + +static u32 __ffa_host_share_ranges(struct ffa_mem_region_addr_range *ranges, + u32 nranges) +{ + u32 i; + + for (i = 0; i < nranges; ++i) { + struct ffa_mem_region_addr_range *range = &ranges[i]; + u64 sz = (u64)range->pg_cnt * FFA_PAGE_SIZE; + u64 pfn = hyp_phys_to_pfn(range->address); + + if (!PAGE_ALIGNED(sz)) + break; + + if (__pkvm_host_share_ffa(pfn, sz / PAGE_SIZE)) + break; + } + + return i; +} + +static u32 __ffa_host_unshare_ranges(struct ffa_mem_region_addr_range *ranges, + u32 nranges) +{ + u32 i; + + for (i = 0; i < nranges; ++i) { + struct ffa_mem_region_addr_range *range = &ranges[i]; + u64 sz = (u64)range->pg_cnt * FFA_PAGE_SIZE; + u64 pfn = hyp_phys_to_pfn(range->address); + + if (!PAGE_ALIGNED(sz)) + break; + + if (__pkvm_host_unshare_ffa(pfn, sz / PAGE_SIZE)) + break; + } + + return i; +} + +static int ffa_host_share_ranges(struct ffa_mem_region_addr_range *ranges, + u32 nranges) +{ + u32 nshared = __ffa_host_share_ranges(ranges, nranges); + int ret = 0; + + if (nshared != nranges) { + WARN_ON(__ffa_host_unshare_ranges(ranges, nshared)); + ret = FFA_RET_DENIED; + } + + return ret; +} + +static int ffa_host_unshare_ranges(struct ffa_mem_region_addr_range *ranges, + u32 nranges) +{ + u32 nunshared = __ffa_host_unshare_ranges(ranges, nranges); + int ret = 0; + + if (nunshared != nranges) { + WARN_ON(__ffa_host_share_ranges(ranges, nunshared)); + ret = FFA_RET_DENIED; + } + + return ret; +} + +static void do_ffa_mem_frag_tx(struct arm_smccc_res *res, + struct kvm_cpu_context *ctxt) +{ + DECLARE_REG(u32, handle_lo, ctxt, 1); + DECLARE_REG(u32, handle_hi, ctxt, 2); + DECLARE_REG(u32, fraglen, ctxt, 3); + DECLARE_REG(u32, endpoint_id, ctxt, 4); + struct ffa_mem_region_addr_range *buf; + int ret = FFA_RET_INVALID_PARAMETERS; + u32 nr_ranges; + + if (fraglen > KVM_FFA_MBOX_NR_PAGES * PAGE_SIZE) + goto out; + + if (fraglen % sizeof(*buf)) + goto out; + + hyp_spin_lock(&host_kvm.ffa.lock); + if (!host_kvm.ffa.tx) + goto out_unlock; + + buf = ffa_buffers.tx; + memcpy(buf, host_kvm.ffa.tx, fraglen); + nr_ranges = fraglen / sizeof(*buf); + + ret = ffa_host_share_ranges(buf, nr_ranges); + if (ret) { + /* + * We're effectively aborting the transaction, so we need + * to restore the global state back to what it was prior to + * transmission of the first fragment. + */ + spmd_mem_reclaim(res, handle_lo, handle_hi, 0); + WARN_ON(res->a0 != FFA_SUCCESS); + goto out_unlock; + } + + spmd_mem_frag_tx(res, handle_lo, handle_hi, fraglen, endpoint_id); + if (res->a0 != FFA_SUCCESS && res->a0 != FFA_MEM_FRAG_RX) + WARN_ON(ffa_host_unshare_ranges(buf, nr_ranges)); + +out_unlock: + hyp_spin_unlock(&host_kvm.ffa.lock); +out: + if (ret) + ffa_to_smccc_res(res, ret); + + /* + * If for any reason this did not succeed, we're in trouble as we have + * now lost the content of the previous fragments and we can't rollback + * the host stage-2 changes. The pages previously marked as shared will + * remain stuck in that state forever, hence preventing the host from + * sharing/donating them again and may possibly lead to subsequent + * failures, but this will not compromise confidentiality. + */ + return; +} + +static __always_inline void do_ffa_mem_xfer(const u64 func_id, + struct arm_smccc_res *res, + struct kvm_cpu_context *ctxt) +{ + DECLARE_REG(u32, len, ctxt, 1); + DECLARE_REG(u32, fraglen, ctxt, 2); + DECLARE_REG(u64, addr_mbz, ctxt, 3); + DECLARE_REG(u32, npages_mbz, ctxt, 4); + struct ffa_composite_mem_region *reg; + struct ffa_mem_region *buf; + u32 offset, nr_ranges; + int ret = 0; + + BUILD_BUG_ON(func_id != FFA_FN64_MEM_SHARE && + func_id != FFA_FN64_MEM_LEND); + + if (addr_mbz || npages_mbz || fraglen > len || + fraglen > KVM_FFA_MBOX_NR_PAGES * PAGE_SIZE) { + ret = FFA_RET_INVALID_PARAMETERS; + goto out; + } + + if (fraglen < sizeof(struct ffa_mem_region) + + sizeof(struct ffa_mem_region_attributes)) { + ret = FFA_RET_INVALID_PARAMETERS; + goto out; + } + + hyp_spin_lock(&host_kvm.ffa.lock); + if (!host_kvm.ffa.tx) { + ret = FFA_RET_INVALID_PARAMETERS; + goto out_unlock; + } + + buf = ffa_buffers.tx; + memcpy(buf, host_kvm.ffa.tx, fraglen); + + offset = buf->ep_mem_access[0].composite_off; + if (!offset || buf->ep_count != 1 || buf->sender_id != HOST_FFA_ID) { + ret = FFA_RET_INVALID_PARAMETERS; + goto out_unlock; + } + + if (fraglen < offset + sizeof(struct ffa_composite_mem_region)) { + ret = FFA_RET_INVALID_PARAMETERS; + goto out_unlock; + } + + reg = (void *)buf + offset; + nr_ranges = ((void *)buf + fraglen) - (void *)reg->constituents; + if (nr_ranges % sizeof(reg->constituents[0])) { + ret = FFA_RET_INVALID_PARAMETERS; + goto out_unlock; + } + + nr_ranges /= sizeof(reg->constituents[0]); + ret = ffa_host_share_ranges(reg->constituents, nr_ranges); + if (ret) + goto out_unlock; + + spmd_mem_xfer(res, func_id, len, fraglen); + if (fraglen != len) { + if (res->a0 != FFA_MEM_FRAG_RX) + goto err_unshare; + + if (res->a3 != fraglen) + goto err_unshare; + } else if (res->a0 != FFA_SUCCESS) { + goto err_unshare; + } + +out_unlock: + hyp_spin_unlock(&host_kvm.ffa.lock); +out: + if (ret) + ffa_to_smccc_res(res, ret); + return; + +err_unshare: + WARN_ON(ffa_host_unshare_ranges(reg->constituents, nr_ranges)); + goto out_unlock; +} + +static void do_ffa_mem_reclaim(struct arm_smccc_res *res, + struct kvm_cpu_context *ctxt) +{ + DECLARE_REG(u32, handle_lo, ctxt, 1); + DECLARE_REG(u32, handle_hi, ctxt, 2); + DECLARE_REG(u32, flags, ctxt, 3); + struct ffa_composite_mem_region *reg; + u32 offset, len, fraglen, fragoff; + struct ffa_mem_region *buf; + int ret = 0; + u64 handle; + + handle = PACK_HANDLE(handle_lo, handle_hi); + + hyp_spin_lock(&host_kvm.ffa.lock); + + buf = ffa_buffers.tx; + *buf = (struct ffa_mem_region) { + .sender_id = HOST_FFA_ID, + .handle = handle, + }; + + spmd_retrieve_req(res, sizeof(*buf)); + buf = ffa_buffers.rx; + if (res->a0 != FFA_MEM_RETRIEVE_RESP) + goto out_unlock; + + len = res->a1; + fraglen = res->a2; + + offset = buf->ep_mem_access[0].composite_off; + /* + * We can trust the SPMD to get this right, but let's at least + * check that we end up with something that doesn't look _completely_ + * bogus. + */ + if (WARN_ON(offset > len || + fraglen > KVM_FFA_MBOX_NR_PAGES * PAGE_SIZE)) { + ret = FFA_RET_ABORTED; + goto out_unlock; + } + + if (len > ffa_desc_buf.len) { + ret = FFA_RET_NO_MEMORY; + goto out_unlock; + } + + buf = ffa_desc_buf.buf; + memcpy(buf, ffa_buffers.rx, fraglen); + + for (fragoff = fraglen; fragoff < len; fragoff += fraglen) { + spmd_mem_frag_rx(res, handle_lo, handle_hi, fragoff); + if (res->a0 != FFA_MEM_FRAG_TX) { + ret = FFA_RET_INVALID_PARAMETERS; + goto out_unlock; + } + + fraglen = res->a3; + memcpy((void *)buf + fragoff, ffa_buffers.rx, fraglen); + } + + spmd_mem_reclaim(res, handle_lo, handle_hi, flags); + if (res->a0 != FFA_SUCCESS) + goto out_unlock; + + reg = (void *)buf + offset; + /* If the SPMD was happy, then we should be too. */ + WARN_ON(ffa_host_unshare_ranges(reg->constituents, + reg->addr_range_cnt)); +out_unlock: + hyp_spin_unlock(&host_kvm.ffa.lock); + + if (ret) + ffa_to_smccc_res(res, ret); + return; +} + +static bool ffa_call_unsupported(u64 func_id) +{ + switch (func_id) { + /* Unsupported memory management calls */ + case FFA_FN64_MEM_RETRIEVE_REQ: + case FFA_MEM_RETRIEVE_RESP: + case FFA_MEM_RELINQUISH: + case FFA_MEM_OP_PAUSE: + case FFA_MEM_OP_RESUME: + case FFA_MEM_FRAG_RX: + case FFA_FN64_MEM_DONATE: + /* Indirect message passing via RX/TX buffers */ + case FFA_MSG_SEND: + case FFA_MSG_POLL: + case FFA_MSG_WAIT: + /* 32-bit variants of 64-bit calls */ + case FFA_MSG_SEND_DIRECT_REQ: + case FFA_MSG_SEND_DIRECT_RESP: + case FFA_RXTX_MAP: + case FFA_MEM_DONATE: + case FFA_MEM_RETRIEVE_REQ: + return true; + } + + return false; +} + +static bool do_ffa_features(struct arm_smccc_res *res, + struct kvm_cpu_context *ctxt) +{ + DECLARE_REG(u32, id, ctxt, 1); + u64 prop = 0; + int ret = 0; + + if (ffa_call_unsupported(id)) { + ret = FFA_RET_NOT_SUPPORTED; + goto out_handled; + } + + switch (id) { + case FFA_MEM_SHARE: + case FFA_FN64_MEM_SHARE: + case FFA_MEM_LEND: + case FFA_FN64_MEM_LEND: + ret = FFA_RET_SUCCESS; + prop = 0; /* No support for dynamic buffers */ + goto out_handled; + default: + return false; + } + +out_handled: + ffa_to_smccc_res_prop(res, ret, prop); + return true; +} + +bool kvm_host_ffa_handler(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(u64, func_id, host_ctxt, 0); + struct arm_smccc_res res; + + if (!is_ffa_call(func_id)) + return false; + + switch (func_id) { + case FFA_FEATURES: + if (!do_ffa_features(&res, host_ctxt)) + return false; + goto out_handled; + /* Memory management */ + case FFA_FN64_RXTX_MAP: + do_ffa_rxtx_map(&res, host_ctxt); + goto out_handled; + case FFA_RXTX_UNMAP: + do_ffa_rxtx_unmap(&res, host_ctxt); + goto out_handled; + case FFA_MEM_SHARE: + case FFA_FN64_MEM_SHARE: + do_ffa_mem_xfer(FFA_FN64_MEM_SHARE, &res, host_ctxt); + goto out_handled; + case FFA_MEM_RECLAIM: + do_ffa_mem_reclaim(&res, host_ctxt); + goto out_handled; + case FFA_MEM_LEND: + case FFA_FN64_MEM_LEND: + do_ffa_mem_xfer(FFA_FN64_MEM_LEND, &res, host_ctxt); + goto out_handled; + case FFA_MEM_FRAG_TX: + do_ffa_mem_frag_tx(&res, host_ctxt); + goto out_handled; + } + + if (!ffa_call_unsupported(func_id)) + return false; /* Pass through */ + + ffa_to_smccc_error(&res, FFA_RET_NOT_SUPPORTED); +out_handled: + ffa_set_retval(host_ctxt, &res); + return true; +} + +int hyp_ffa_init(void *pages) +{ + struct arm_smccc_res res; + size_t min_rxtx_sz; + void *tx, *rx; + + if (kvm_host_psci_config.smccc_version < ARM_SMCCC_VERSION_1_1) + return 0; + + arm_smccc_1_1_smc(FFA_VERSION, FFA_VERSION_1_0, 0, 0, 0, 0, 0, 0, &res); + if (res.a0 == FFA_RET_NOT_SUPPORTED) + return 0; + + if (res.a0 != FFA_VERSION_1_0) + return -EOPNOTSUPP; + + arm_smccc_1_1_smc(FFA_ID_GET, 0, 0, 0, 0, 0, 0, 0, &res); + if (res.a0 != FFA_SUCCESS) + return -EOPNOTSUPP; + + if (res.a2 != HOST_FFA_ID) + return -EINVAL; + + arm_smccc_1_1_smc(FFA_FEATURES, FFA_FN64_RXTX_MAP, + 0, 0, 0, 0, 0, 0, &res); + if (res.a0 != FFA_SUCCESS) + return -EOPNOTSUPP; + + switch (res.a2) { + case FFA_FEAT_RXTX_MIN_SZ_4K: + min_rxtx_sz = SZ_4K; + break; + case FFA_FEAT_RXTX_MIN_SZ_16K: + min_rxtx_sz = SZ_16K; + break; + case FFA_FEAT_RXTX_MIN_SZ_64K: + min_rxtx_sz = SZ_64K; + break; + default: + return -EINVAL; + } + + if (min_rxtx_sz > PAGE_SIZE) + return -EOPNOTSUPP; + + tx = pages; + pages += KVM_FFA_MBOX_NR_PAGES * PAGE_SIZE; + rx = pages; + pages += KVM_FFA_MBOX_NR_PAGES * PAGE_SIZE; + + ffa_desc_buf = (struct kvm_ffa_descriptor_buffer) { + .buf = pages, + .len = PAGE_SIZE * + (hyp_ffa_proxy_pages() - (2 * KVM_FFA_MBOX_NR_PAGES)), + }; + + ffa_buffers = (struct kvm_ffa_buffers) { + .lock = __HYP_SPIN_LOCK_UNLOCKED, + .tx = tx, + .rx = rx, + }; + + host_kvm.ffa = (struct kvm_ffa_buffers) { + .lock = __HYP_SPIN_LOCK_UNLOCKED, + }; + + return 0; +} diff --git a/arch/arm64/kvm/hyp/nvhe/host.S b/arch/arm64/kvm/hyp/nvhe/host.S index 7c2bf715963057a9748f40793f1178cce8e7d8f7..85788e511156af9f927a4807498f6b7a71ff30ed 100644 --- a/arch/arm64/kvm/hyp/nvhe/host.S +++ b/arch/arm64/kvm/hyp/nvhe/host.S @@ -110,17 +110,14 @@ SYM_FUNC_START(__hyp_do_panic) b __host_enter_for_panic SYM_FUNC_END(__hyp_do_panic) -.macro host_el1_sync_vect - .align 7 -.L__vect_start\@: - stp x0, x1, [sp, #-16]! - mrs x0, esr_el2 - lsr x0, x0, #ESR_ELx_EC_SHIFT - cmp x0, #ESR_ELx_EC_HVC64 - b.ne __host_exit - +SYM_FUNC_START(__host_hvc) ldp x0, x1, [sp] // Don't fixup the stack yet + /* No stub for you, sonny Jim */ +alternative_if ARM64_KVM_PROTECTED_MODE + b __host_exit +alternative_else_nop_endif + /* Check for a stub HVC call */ cmp x0, #HVC_STUB_HCALL_NR b.hs __host_exit @@ -137,6 +134,17 @@ SYM_FUNC_END(__hyp_do_panic) ldr x5, =__kvm_handle_stub_hvc hyp_pa x5, x6 br x5 +SYM_FUNC_END(__host_hvc) + +.macro host_el1_sync_vect + .align 7 +.L__vect_start\@: + stp x0, x1, [sp, #-16]! + mrs x0, esr_el2 + ubfx x0, x0, #ESR_ELx_EC_SHIFT, #ESR_ELx_EC_WIDTH + cmp x0, #ESR_ELx_EC_HVC64 + b.eq __host_hvc + b __host_exit .L__vect_end\@: .if ((.L__vect_end\@ - .L__vect_start\@) > 0x80) .error "host_el1_sync_vect larger than vector entry" @@ -190,15 +198,15 @@ SYM_CODE_START(__kvm_hyp_host_vector) invalid_host_el2_vect // FIQ EL2h invalid_host_el2_vect // Error EL2h - host_el1_sync_vect // Synchronous 64-bit EL1 - invalid_host_el1_vect // IRQ 64-bit EL1 - invalid_host_el1_vect // FIQ 64-bit EL1 - invalid_host_el1_vect // Error 64-bit EL1 + host_el1_sync_vect // Synchronous 64-bit EL1/EL0 + invalid_host_el1_vect // IRQ 64-bit EL1/EL0 + invalid_host_el1_vect // FIQ 64-bit EL1/EL0 + invalid_host_el1_vect // Error 64-bit EL1/EL0 - invalid_host_el1_vect // Synchronous 32-bit EL1 - invalid_host_el1_vect // IRQ 32-bit EL1 - invalid_host_el1_vect // FIQ 32-bit EL1 - invalid_host_el1_vect // Error 32-bit EL1 + host_el1_sync_vect // Synchronous 32-bit EL1/EL0 + invalid_host_el1_vect // IRQ 32-bit EL1/EL0 + invalid_host_el1_vect // FIQ 32-bit EL1/EL0 + invalid_host_el1_vect // Error 32-bit EL1/EL0 SYM_CODE_END(__kvm_hyp_host_vector) /* diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index 2da6aa8da8680d1e6c17415e0ee16d13fd26d3bc..14f9da9f87b90659330a3052bd4fd49d8bc03b5f 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -4,7 +4,9 @@ * Author: Andrew Scull */ -#include +#include + +#include #include #include @@ -13,26 +15,808 @@ #include #include +#include +#include #include #include +#include #include +#include +#include + +#include "../../sys_regs.h" + +struct pkvm_loaded_state { + /* loaded vcpu is HYP VA */ + struct kvm_vcpu *vcpu; + bool is_protected; + + /* + * Host FPSIMD state. Written to when the guest accesses its + * own FPSIMD state, and read when the guest state is live and + * that it needs to be switched back to the host. + * + * Only valid when the KVM_ARM64_FP_ENABLED flag is set in the + * shadow structure. + */ + struct user_fpsimd_state host_fpsimd_state; +}; + +static DEFINE_PER_CPU(struct pkvm_loaded_state, loaded_state); + DEFINE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params); void __kvm_hyp_host_forward_smc(struct kvm_cpu_context *host_ctxt); +typedef void (*shadow_entry_exit_handler_fn)(struct kvm_vcpu *, struct kvm_vcpu *); + +static void handle_pvm_entry_wfx(struct kvm_vcpu *host_vcpu, struct kvm_vcpu *shadow_vcpu) +{ + shadow_vcpu->arch.flags |= host_vcpu->arch.flags & KVM_ARM64_INCREMENT_PC; +} + +static int pkvm_refill_memcache(struct kvm_vcpu *shadow_vcpu, + struct kvm_vcpu *host_vcpu) +{ + u64 nr_pages; + + nr_pages = VTCR_EL2_LVLS(shadow_vcpu->arch.pkvm.shadow_vm->arch.vtcr) - 1; + return refill_memcache(&shadow_vcpu->arch.pkvm_memcache, nr_pages, + &host_vcpu->arch.pkvm_memcache); +} + +static void handle_pvm_entry_psci(struct kvm_vcpu *host_vcpu, struct kvm_vcpu *shadow_vcpu) +{ + u32 psci_fn = smccc_get_function(shadow_vcpu); + u64 ret = vcpu_get_reg(host_vcpu, 0); + + switch (psci_fn) { + case PSCI_0_2_FN_CPU_ON: + case PSCI_0_2_FN64_CPU_ON: + /* + * Check whether the cpu_on request to the host was successful. + * If not, reset the vcpu state from ON_PENDING to OFF. + * This could happen if this vcpu attempted to turn on the other + * vcpu while the other one is in the process of turning itself + * off. + */ + if (ret != PSCI_RET_SUCCESS) { + struct kvm_shadow_vm *vm = shadow_vcpu->arch.pkvm.shadow_vm; + unsigned long cpu_id = smccc_get_arg1(shadow_vcpu); + struct kvm_vcpu *vcpu = pvm_mpidr_to_vcpu(vm, cpu_id); + + if (vcpu && READ_ONCE(vcpu->arch.pkvm.power_state) == PSCI_0_2_AFFINITY_LEVEL_ON_PENDING) + WRITE_ONCE(vcpu->arch.pkvm.power_state, PSCI_0_2_AFFINITY_LEVEL_OFF); + + ret = PSCI_RET_INTERNAL_FAILURE; + } + break; + default: + break; + } + + vcpu_set_reg(shadow_vcpu, 0, ret); +} + +static void handle_pvm_entry_hvc64(struct kvm_vcpu *host_vcpu, struct kvm_vcpu *shadow_vcpu) +{ + u32 fn = smccc_get_function(shadow_vcpu); + + switch (fn) { + case ARM_SMCCC_VENDOR_HYP_KVM_MMIO_GUARD_MAP_FUNC_ID: + pkvm_refill_memcache(shadow_vcpu, host_vcpu); + break; + case ARM_SMCCC_VENDOR_HYP_KVM_MEM_SHARE_FUNC_ID: + fallthrough; + case ARM_SMCCC_VENDOR_HYP_KVM_MEM_UNSHARE_FUNC_ID: + vcpu_set_reg(shadow_vcpu, 0, SMCCC_RET_SUCCESS); + break; + default: + handle_pvm_entry_psci(host_vcpu, shadow_vcpu); + break; + } +} + +static void handle_pvm_entry_sys64(struct kvm_vcpu *host_vcpu, struct kvm_vcpu *shadow_vcpu) +{ + unsigned long host_flags; + + host_flags = READ_ONCE(host_vcpu->arch.flags); + + /* Exceptions have priority on anything else */ + if (host_flags & KVM_ARM64_PENDING_EXCEPTION) { + /* Exceptions caused by this should be undef exceptions. */ + u32 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT); + + __vcpu_sys_reg(shadow_vcpu, ESR_EL1) = esr; + shadow_vcpu->arch.flags &= ~(KVM_ARM64_PENDING_EXCEPTION | + KVM_ARM64_EXCEPT_MASK); + shadow_vcpu->arch.flags |= (KVM_ARM64_PENDING_EXCEPTION | + KVM_ARM64_EXCEPT_AA64_ELx_SYNC | + KVM_ARM64_EXCEPT_AA64_EL1); + + return; + } + + + if (host_flags & KVM_ARM64_INCREMENT_PC) { + shadow_vcpu->arch.flags &= ~(KVM_ARM64_PENDING_EXCEPTION | + KVM_ARM64_EXCEPT_MASK); + shadow_vcpu->arch.flags |= KVM_ARM64_INCREMENT_PC; + } + + if (!esr_sys64_to_params(shadow_vcpu->arch.fault.esr_el2).is_write) { + /* r0 as transfer register between the guest and the host. */ + u64 rt_val = vcpu_get_reg(host_vcpu, 0); + int rt = kvm_vcpu_sys_get_rt(shadow_vcpu); + + vcpu_set_reg(shadow_vcpu, rt, rt_val); + } +} + +static void handle_pvm_entry_iabt(struct kvm_vcpu *host_vcpu, struct kvm_vcpu *shadow_vcpu) +{ + unsigned long cpsr = *vcpu_cpsr(shadow_vcpu); + unsigned long host_flags; + u32 esr = ESR_ELx_IL; + + host_flags = READ_ONCE(host_vcpu->arch.flags); + + if (!(host_flags & KVM_ARM64_PENDING_EXCEPTION)) + return; + + /* + * If the host wants to inject an exception, get syndrom and + * fault address. + */ + if ((cpsr & PSR_MODE_MASK) == PSR_MODE_EL0t) + esr |= (ESR_ELx_EC_IABT_LOW << ESR_ELx_EC_SHIFT); + else + esr |= (ESR_ELx_EC_IABT_CUR << ESR_ELx_EC_SHIFT); + + esr |= ESR_ELx_FSC_EXTABT; + + __vcpu_sys_reg(shadow_vcpu, ESR_EL1) = esr; + __vcpu_sys_reg(shadow_vcpu, FAR_EL1) = kvm_vcpu_get_hfar(shadow_vcpu); + + /* Tell the run loop that we want to inject something */ + shadow_vcpu->arch.flags &= ~(KVM_ARM64_PENDING_EXCEPTION | + KVM_ARM64_EXCEPT_MASK); + shadow_vcpu->arch.flags |= (KVM_ARM64_PENDING_EXCEPTION | + KVM_ARM64_EXCEPT_AA64_ELx_SYNC | + KVM_ARM64_EXCEPT_AA64_EL1); +} + +static void handle_pvm_entry_dabt(struct kvm_vcpu *host_vcpu, struct kvm_vcpu *shadow_vcpu) +{ + unsigned long host_flags; + bool rd_update; + + host_flags = READ_ONCE(host_vcpu->arch.flags); + + /* Exceptions have priority over anything else */ + if (host_flags & KVM_ARM64_PENDING_EXCEPTION) { + unsigned long cpsr = *vcpu_cpsr(shadow_vcpu); + u32 esr = ESR_ELx_IL; + + if ((cpsr & PSR_MODE_MASK) == PSR_MODE_EL0t) + esr |= (ESR_ELx_EC_DABT_LOW << ESR_ELx_EC_SHIFT); + else + esr |= (ESR_ELx_EC_DABT_CUR << ESR_ELx_EC_SHIFT); + + esr |= ESR_ELx_FSC_EXTABT; + + __vcpu_sys_reg(shadow_vcpu, ESR_EL1) = esr; + __vcpu_sys_reg(shadow_vcpu, FAR_EL1) = kvm_vcpu_get_hfar(shadow_vcpu); + /* Tell the run loop that we want to inject something */ + shadow_vcpu->arch.flags &= ~(KVM_ARM64_PENDING_EXCEPTION | + KVM_ARM64_EXCEPT_MASK); + shadow_vcpu->arch.flags |= (KVM_ARM64_PENDING_EXCEPTION | + KVM_ARM64_EXCEPT_AA64_ELx_SYNC | + KVM_ARM64_EXCEPT_AA64_EL1); + + /* Cancel potential in-flight MMIO */ + shadow_vcpu->mmio_needed = false; + return; + } + + /* Handle PC increment on MMIO */ + if ((host_flags & KVM_ARM64_INCREMENT_PC) && shadow_vcpu->mmio_needed) { + shadow_vcpu->arch.flags &= ~(KVM_ARM64_PENDING_EXCEPTION | + KVM_ARM64_EXCEPT_MASK); + shadow_vcpu->arch.flags |= KVM_ARM64_INCREMENT_PC; + } + + /* If we were doing an MMIO read access, update the register*/ + rd_update = (shadow_vcpu->mmio_needed && + (host_flags & KVM_ARM64_INCREMENT_PC)); + rd_update &= !kvm_vcpu_dabt_iswrite(shadow_vcpu); + + if (rd_update) { + /* r0 as transfer register between the guest and the host. */ + u64 rd_val = vcpu_get_reg(host_vcpu, 0); + int rd = kvm_vcpu_dabt_get_rd(shadow_vcpu); + + vcpu_set_reg(shadow_vcpu, rd, rd_val); + } + + shadow_vcpu->mmio_needed = false; +} + +static void handle_pvm_exit_wfx(struct kvm_vcpu *host_vcpu, struct kvm_vcpu *shadow_vcpu) +{ + host_vcpu->arch.ctxt.regs.pstate = shadow_vcpu->arch.ctxt.regs.pstate & + PSR_MODE_MASK; + host_vcpu->arch.fault.esr_el2 = shadow_vcpu->arch.fault.esr_el2; +} + +static void handle_pvm_exit_sys64(struct kvm_vcpu *host_vcpu, struct kvm_vcpu *shadow_vcpu) +{ + u32 esr_el2 = shadow_vcpu->arch.fault.esr_el2; + + /* r0 as transfer register between the guest and the host. */ + WRITE_ONCE(host_vcpu->arch.fault.esr_el2, + esr_el2 & ~ESR_ELx_SYS64_ISS_RT_MASK); + + /* The mode is required for the host to emulate some sysregs */ + WRITE_ONCE(host_vcpu->arch.ctxt.regs.pstate, + shadow_vcpu->arch.ctxt.regs.pstate & PSR_MODE_MASK); + + if (esr_sys64_to_params(esr_el2).is_write) { + int rt = kvm_vcpu_sys_get_rt(shadow_vcpu); + u64 rt_val = vcpu_get_reg(shadow_vcpu, rt); + + vcpu_set_reg(host_vcpu, 0, rt_val); + } +} + +static void handle_pvm_exit_hvc64(struct kvm_vcpu *host_vcpu, struct kvm_vcpu *shadow_vcpu) +{ + int n, i; + + switch (smccc_get_function(shadow_vcpu)) { + /* + * CPU_ON takes 3 arguments, however, to wake up the target vcpu the + * host only needs to know the target's cpu_id, which is passed as the + * first argument. The processing of the reset state is done at hyp. + */ + case PSCI_0_2_FN_CPU_ON: + case PSCI_0_2_FN64_CPU_ON: + n = 2; + break; + + case PSCI_0_2_FN_CPU_OFF: + case PSCI_0_2_FN_SYSTEM_OFF: + case PSCI_0_2_FN_SYSTEM_RESET: + case PSCI_0_2_FN_CPU_SUSPEND: + case PSCI_0_2_FN64_CPU_SUSPEND: + n = 1; + break; + + case ARM_SMCCC_VENDOR_HYP_KVM_MEM_SHARE_FUNC_ID: + fallthrough; + case ARM_SMCCC_VENDOR_HYP_KVM_MEM_UNSHARE_FUNC_ID: + n = 4; + break; + + case PSCI_1_1_FN_SYSTEM_RESET2: + case PSCI_1_1_FN64_SYSTEM_RESET2: + case ARM_SMCCC_VENDOR_HYP_KVM_MMIO_GUARD_MAP_FUNC_ID: + n = 3; + break; + + /* + * The rest are either blocked or handled by HYP, so we should + * really never be here. + */ + default: + BUG(); + } + + host_vcpu->arch.fault.esr_el2 = shadow_vcpu->arch.fault.esr_el2; + + /* Pass the hvc function id (r0) as well as any potential arguments. */ + for (i = 0; i < n; i++) + vcpu_set_reg(host_vcpu, i, vcpu_get_reg(shadow_vcpu, i)); +} + +static void handle_pvm_exit_iabt(struct kvm_vcpu *host_vcpu, struct kvm_vcpu *shadow_vcpu) +{ + WRITE_ONCE(host_vcpu->arch.fault.esr_el2, + shadow_vcpu->arch.fault.esr_el2); + WRITE_ONCE(host_vcpu->arch.fault.hpfar_el2, + shadow_vcpu->arch.fault.hpfar_el2); +} + +static void handle_pvm_exit_dabt(struct kvm_vcpu *host_vcpu, struct kvm_vcpu *shadow_vcpu) +{ + shadow_vcpu->mmio_needed = __pkvm_check_ioguard_page(shadow_vcpu); + + if (shadow_vcpu->mmio_needed) { + /* r0 as transfer register between the guest and the host. */ + WRITE_ONCE(host_vcpu->arch.fault.esr_el2, + shadow_vcpu->arch.fault.esr_el2 & ~ESR_ELx_SRT_MASK); + + if (kvm_vcpu_dabt_iswrite(shadow_vcpu)) { + int rt = kvm_vcpu_dabt_get_rd(shadow_vcpu); + u64 rt_val = vcpu_get_reg(shadow_vcpu, rt); + + vcpu_set_reg(host_vcpu, 0, rt_val); + } + } else { + WRITE_ONCE(host_vcpu->arch.fault.esr_el2, + shadow_vcpu->arch.fault.esr_el2 & ~ESR_ELx_ISV); + } + + WRITE_ONCE(host_vcpu->arch.ctxt.regs.pstate, + shadow_vcpu->arch.ctxt.regs.pstate & PSR_MODE_MASK); + WRITE_ONCE(host_vcpu->arch.fault.far_el2, + shadow_vcpu->arch.fault.far_el2 & FAR_MASK); + WRITE_ONCE(host_vcpu->arch.fault.hpfar_el2, + shadow_vcpu->arch.fault.hpfar_el2); + WRITE_ONCE(__vcpu_sys_reg(host_vcpu, SCTLR_EL1), + __vcpu_sys_reg(shadow_vcpu, SCTLR_EL1) & (SCTLR_ELx_EE | SCTLR_EL1_E0E)); +} + +static void handle_vm_entry_generic(struct kvm_vcpu *host_vcpu, struct kvm_vcpu *shadow_vcpu) +{ + unsigned long host_flags = READ_ONCE(host_vcpu->arch.flags); + + shadow_vcpu->arch.flags &= ~(KVM_ARM64_PENDING_EXCEPTION | + KVM_ARM64_EXCEPT_MASK); + + if (host_flags & KVM_ARM64_PENDING_EXCEPTION) { + shadow_vcpu->arch.flags |= KVM_ARM64_PENDING_EXCEPTION; + shadow_vcpu->arch.flags |= host_flags & KVM_ARM64_EXCEPT_MASK; + } else if (host_flags & KVM_ARM64_INCREMENT_PC) { + shadow_vcpu->arch.flags |= KVM_ARM64_INCREMENT_PC; + } +} + +static void handle_vm_exit_generic(struct kvm_vcpu *host_vcpu, struct kvm_vcpu *shadow_vcpu) +{ + host_vcpu->arch.fault.esr_el2 = shadow_vcpu->arch.fault.esr_el2; +} + +static void handle_vm_exit_abt(struct kvm_vcpu *host_vcpu, struct kvm_vcpu *shadow_vcpu) +{ + host_vcpu->arch.fault = shadow_vcpu->arch.fault; +} + +static const shadow_entry_exit_handler_fn entry_pvm_shadow_handlers[] = { + [0 ... ESR_ELx_EC_MAX] = NULL, + [ESR_ELx_EC_WFx] = handle_pvm_entry_wfx, + [ESR_ELx_EC_HVC64] = handle_pvm_entry_hvc64, + [ESR_ELx_EC_SYS64] = handle_pvm_entry_sys64, + [ESR_ELx_EC_IABT_LOW] = handle_pvm_entry_iabt, + [ESR_ELx_EC_DABT_LOW] = handle_pvm_entry_dabt, +}; + +static const shadow_entry_exit_handler_fn exit_pvm_shadow_handlers[] = { + [0 ... ESR_ELx_EC_MAX] = NULL, + [ESR_ELx_EC_WFx] = handle_pvm_exit_wfx, + [ESR_ELx_EC_HVC64] = handle_pvm_exit_hvc64, + [ESR_ELx_EC_SYS64] = handle_pvm_exit_sys64, + [ESR_ELx_EC_IABT_LOW] = handle_pvm_exit_iabt, + [ESR_ELx_EC_DABT_LOW] = handle_pvm_exit_dabt, +}; + +static const shadow_entry_exit_handler_fn entry_vm_shadow_handlers[] = { + [0 ... ESR_ELx_EC_MAX] = handle_vm_entry_generic, +}; + +static const shadow_entry_exit_handler_fn exit_vm_shadow_handlers[] = { + [0 ... ESR_ELx_EC_MAX] = handle_vm_exit_generic, + [ESR_ELx_EC_IABT_LOW] = handle_vm_exit_abt, + [ESR_ELx_EC_DABT_LOW] = handle_vm_exit_abt, +}; + +static void flush_vgic_state(struct kvm_vcpu *host_vcpu, + struct kvm_vcpu *shadow_vcpu) +{ + struct vgic_v3_cpu_if *host_cpu_if, *shadow_cpu_if; + unsigned int used_lrs, max_lrs, i; + + host_cpu_if = &host_vcpu->arch.vgic_cpu.vgic_v3; + shadow_cpu_if = &shadow_vcpu->arch.vgic_cpu.vgic_v3; + + max_lrs = (read_gicreg(ICH_VTR_EL2) & 0xf) + 1; + used_lrs = READ_ONCE(host_cpu_if->used_lrs); + used_lrs = min(used_lrs, max_lrs); + + shadow_cpu_if->vgic_hcr = host_cpu_if->vgic_hcr; + /* Should be a one-off */ + shadow_cpu_if->vgic_sre = (ICC_SRE_EL1_DIB | + ICC_SRE_EL1_DFB | + ICC_SRE_EL1_SRE); + shadow_cpu_if->used_lrs = used_lrs; + + for (i = 0; i < used_lrs; i++) + shadow_cpu_if->vgic_lr[i] = host_cpu_if->vgic_lr[i]; +} + +static void sync_vgic_state(struct kvm_vcpu *host_vcpu, + struct kvm_vcpu *shadow_vcpu) +{ + struct vgic_v3_cpu_if *host_cpu_if, *shadow_cpu_if; + unsigned int i; + + host_cpu_if = &host_vcpu->arch.vgic_cpu.vgic_v3; + shadow_cpu_if = &shadow_vcpu->arch.vgic_cpu.vgic_v3; + + host_cpu_if->vgic_hcr = shadow_cpu_if->vgic_hcr; + + for (i = 0; i < shadow_cpu_if->used_lrs; i++) + host_cpu_if->vgic_lr[i] = shadow_cpu_if->vgic_lr[i]; +} + +static void flush_timer_state(struct pkvm_loaded_state *state) +{ + struct kvm_vcpu *shadow_vcpu = state->vcpu; + + if (!state->is_protected) + return; + + /* + * A shadow vcpu has no offset, and sees vtime == ptime. The + * ptimer is fully emulated by EL1 and cannot be trusted. + */ + write_sysreg(0, cntvoff_el2); + isb(); + write_sysreg_el0(__vcpu_sys_reg(shadow_vcpu, CNTV_CVAL_EL0), SYS_CNTV_CVAL); + write_sysreg_el0(__vcpu_sys_reg(shadow_vcpu, CNTV_CTL_EL0), SYS_CNTV_CTL); +} + +static void sync_timer_state(struct pkvm_loaded_state *state) +{ + struct kvm_vcpu *shadow_vcpu = state->vcpu; + + if (!state->is_protected) + return; + + /* + * Preserve the vtimer state so that it is always correct, + * even if the host tries to make a mess. + */ + __vcpu_sys_reg(shadow_vcpu, CNTV_CVAL_EL0) = read_sysreg_el0(SYS_CNTV_CVAL); + __vcpu_sys_reg(shadow_vcpu, CNTV_CTL_EL0) = read_sysreg_el0(SYS_CNTV_CTL); +} + +static void __copy_vcpu_state(const struct kvm_vcpu *from_vcpu, + struct kvm_vcpu *to_vcpu) +{ + int i; + + to_vcpu->arch.ctxt.regs = from_vcpu->arch.ctxt.regs; + to_vcpu->arch.ctxt.spsr_abt = from_vcpu->arch.ctxt.spsr_abt; + to_vcpu->arch.ctxt.spsr_und = from_vcpu->arch.ctxt.spsr_und; + to_vcpu->arch.ctxt.spsr_irq = from_vcpu->arch.ctxt.spsr_irq; + to_vcpu->arch.ctxt.spsr_fiq = from_vcpu->arch.ctxt.spsr_fiq; + + /* + * Copy the sysregs, but don't mess with the timer state which + * is directly handled by EL1 and is expected to be preserved. + */ + for (i = 1; i < NR_SYS_REGS; i++) { + if (i >= CNTVOFF_EL2 && i <= CNTP_CTL_EL0) + continue; + to_vcpu->arch.ctxt.sys_regs[i] = from_vcpu->arch.ctxt.sys_regs[i]; + } +} + +static void __sync_vcpu_state(struct kvm_vcpu *shadow_vcpu) +{ + struct kvm_vcpu *host_vcpu = shadow_vcpu->arch.pkvm.host_vcpu; + + __copy_vcpu_state(shadow_vcpu, host_vcpu); +} + +static void __flush_vcpu_state(struct kvm_vcpu *shadow_vcpu) +{ + struct kvm_vcpu *host_vcpu = shadow_vcpu->arch.pkvm.host_vcpu; + + __copy_vcpu_state(host_vcpu, shadow_vcpu); +} + +static void flush_shadow_state(struct pkvm_loaded_state *state) +{ + struct kvm_vcpu *shadow_vcpu = state->vcpu; + struct kvm_vcpu *host_vcpu = shadow_vcpu->arch.pkvm.host_vcpu; + u8 esr_ec; + shadow_entry_exit_handler_fn ec_handler; + + if (READ_ONCE(shadow_vcpu->arch.pkvm.power_state) == PSCI_0_2_AFFINITY_LEVEL_ON_PENDING) + pkvm_reset_vcpu(shadow_vcpu); + + /* + * If we deal with a non-protected guest and that the state is + * dirty (from a host perspective), copy the state back into + * the shadow. + */ + if (!state->is_protected) { + if (READ_ONCE(host_vcpu->arch.flags) & KVM_ARM64_PKVM_STATE_DIRTY) + __flush_vcpu_state(shadow_vcpu); + + state->vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS & ~(HCR_RW | HCR_TWI | HCR_TWE); + state->vcpu->arch.hcr_el2 |= host_vcpu->arch.hcr_el2; + } + + flush_vgic_state(host_vcpu, shadow_vcpu); + flush_timer_state(state); + + switch (ARM_EXCEPTION_CODE(shadow_vcpu->arch.pkvm.exit_code)) { + case ARM_EXCEPTION_IRQ: + case ARM_EXCEPTION_EL1_SERROR: + case ARM_EXCEPTION_IL: + break; + case ARM_EXCEPTION_TRAP: + esr_ec = ESR_ELx_EC(kvm_vcpu_get_esr(shadow_vcpu)); + if (state->is_protected) + ec_handler = entry_pvm_shadow_handlers[esr_ec]; + else + ec_handler = entry_vm_shadow_handlers[esr_ec]; + + if (ec_handler) + ec_handler(host_vcpu, shadow_vcpu); + + break; + default: + BUG(); + } + + shadow_vcpu->arch.pkvm.exit_code = 0; +} + +static void sync_shadow_state(struct pkvm_loaded_state *state, u32 exit_reason) +{ + struct kvm_vcpu *shadow_vcpu = state->vcpu; + struct kvm_vcpu *host_vcpu = shadow_vcpu->arch.pkvm.host_vcpu; + u8 esr_ec; + shadow_entry_exit_handler_fn ec_handler; + + /* + * Don't sync the vcpu GPR/sysreg state after a run. Instead, + * leave it in the shadow until someone actually requires it. + */ + sync_vgic_state(host_vcpu, shadow_vcpu); + sync_timer_state(state); + + switch (ARM_EXCEPTION_CODE(exit_reason)) { + case ARM_EXCEPTION_IRQ: + break; + case ARM_EXCEPTION_TRAP: + esr_ec = ESR_ELx_EC(kvm_vcpu_get_esr(shadow_vcpu)); + if (state->is_protected) + ec_handler = exit_pvm_shadow_handlers[esr_ec]; + else + ec_handler = exit_vm_shadow_handlers[esr_ec]; + + if (ec_handler) + ec_handler(host_vcpu, shadow_vcpu); + + break; + case ARM_EXCEPTION_EL1_SERROR: + case ARM_EXCEPTION_IL: + break; + default: + BUG(); + } + + host_vcpu->arch.flags &= ~(KVM_ARM64_PENDING_EXCEPTION | KVM_ARM64_INCREMENT_PC); + shadow_vcpu->arch.pkvm.exit_code = exit_reason; +} + +static void fpsimd_host_restore(void) +{ + sysreg_clear_set(cptr_el2, CPTR_EL2_TZ | CPTR_EL2_TFP, 0); + isb(); + + if (unlikely(is_protected_kvm_enabled())) { + struct pkvm_loaded_state *state = this_cpu_ptr(&loaded_state); + + __fpsimd_save_state(&state->vcpu->arch.ctxt.fp_regs); + __fpsimd_restore_state(&state->host_fpsimd_state); + + state->vcpu->arch.flags &= ~KVM_ARM64_FP_ENABLED; + state->vcpu->arch.flags |= KVM_ARM64_FP_HOST; + } + + if (system_supports_sve()) + sve_cond_update_zcr_vq(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2); +} + +static void handle___pkvm_vcpu_load(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(int, shadow_handle, host_ctxt, 1); + DECLARE_REG(int, vcpu_idx, host_ctxt, 2); + DECLARE_REG(u64, hcr_el2, host_ctxt, 3); + struct pkvm_loaded_state *state; + + /* Why did you bother? */ + if (!is_protected_kvm_enabled()) + return; + + state = this_cpu_ptr(&loaded_state); + + /* Nice try */ + if (state->vcpu) + return; + + state->vcpu = get_shadow_vcpu(shadow_handle, vcpu_idx); + + if (!state->vcpu) + return; + + state->is_protected = state->vcpu->arch.pkvm.shadow_vm->arch.pkvm.enabled; + + state->vcpu->arch.host_fpsimd_state = &state->host_fpsimd_state; + state->vcpu->arch.flags |= KVM_ARM64_FP_HOST; + + if (state->is_protected) { + /* Propagate WFx trapping flags, trap ptrauth */ + state->vcpu->arch.hcr_el2 &= ~(HCR_TWE | HCR_TWI | + HCR_API | HCR_APK); + state->vcpu->arch.hcr_el2 |= hcr_el2 & (HCR_TWE | HCR_TWI); + } +} + +static void handle___pkvm_vcpu_put(struct kvm_cpu_context *host_ctxt) +{ + if (unlikely(is_protected_kvm_enabled())) { + struct pkvm_loaded_state *state = this_cpu_ptr(&loaded_state); + + if (state->vcpu) { + struct kvm_vcpu *host_vcpu = state->vcpu->arch.pkvm.host_vcpu; + + if (state->vcpu->arch.flags & KVM_ARM64_FP_ENABLED) + fpsimd_host_restore(); + + if (!state->is_protected && + !(READ_ONCE(host_vcpu->arch.flags) & KVM_ARM64_PKVM_STATE_DIRTY)) + __sync_vcpu_state(state->vcpu); + + put_shadow_vcpu(state->vcpu); + + /* "It's over and done with..." */ + state->vcpu = NULL; + } + } +} + +static void handle___pkvm_vcpu_sync_state(struct kvm_cpu_context *host_ctxt) +{ + if (unlikely(is_protected_kvm_enabled())) { + struct pkvm_loaded_state *state = this_cpu_ptr(&loaded_state); + + if (!state->vcpu || state->is_protected) + return; + + __sync_vcpu_state(state->vcpu); + } +} + +static struct kvm_vcpu *__get_current_vcpu(struct kvm_vcpu *vcpu, + struct pkvm_loaded_state **state) +{ + struct pkvm_loaded_state *sstate = NULL; + + vcpu = kern_hyp_va(vcpu); + + if (unlikely(is_protected_kvm_enabled())) { + sstate = this_cpu_ptr(&loaded_state); + + if (!sstate || vcpu != sstate->vcpu->arch.pkvm.host_vcpu) { + sstate = NULL; + vcpu = NULL; + } + } + + *state = sstate; + return vcpu; +} + +#define get_current_vcpu(ctxt, regnr, statepp) \ + ({ \ + DECLARE_REG(struct kvm_vcpu *, __vcpu, ctxt, regnr); \ + __get_current_vcpu(__vcpu, statepp); \ + }) + +#define get_current_vcpu_from_cpu_if(ctxt, regnr, statepp) \ + ({ \ + DECLARE_REG(struct vgic_v3_cpu_if *, cif, ctxt, regnr); \ + struct kvm_vcpu *__vcpu; \ + __vcpu = container_of(cif, \ + struct kvm_vcpu, \ + arch.vgic_cpu.vgic_v3); \ + \ + __get_current_vcpu(__vcpu, statepp); \ + }) + static void handle___kvm_vcpu_run(struct kvm_cpu_context *host_ctxt) { - DECLARE_REG(struct kvm_vcpu *, vcpu, host_ctxt, 1); + struct pkvm_loaded_state *shadow_state; + struct kvm_vcpu *vcpu; + int ret; + + vcpu = get_current_vcpu(host_ctxt, 1, &shadow_state); + if (!vcpu) { + cpu_reg(host_ctxt, 1) = -EINVAL; + return; + } + + if (unlikely(shadow_state)) { + flush_shadow_state(shadow_state); + + ret = __kvm_vcpu_run(shadow_state->vcpu); + + sync_shadow_state(shadow_state, ret); + + if (shadow_state->vcpu->arch.flags & KVM_ARM64_FP_ENABLED) { + /* + * The guest has used the FP, trap all accesses + * from the host (both FP and SVE). + */ + u64 reg = CPTR_EL2_TFP; + if (system_supports_sve()) + reg |= CPTR_EL2_TZ; - cpu_reg(host_ctxt, 1) = __kvm_vcpu_run(kern_hyp_va(vcpu)); + sysreg_clear_set(cptr_el2, 0, reg); + } + } else { + ret = __kvm_vcpu_run(vcpu); + } + + cpu_reg(host_ctxt, 1) = ret; +} + +static void handle___pkvm_host_donate_guest(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(u64, pfn, host_ctxt, 1); + DECLARE_REG(u64, gfn, host_ctxt, 2); + struct kvm_vcpu *host_vcpu; + struct pkvm_loaded_state *state; + int ret = -EINVAL; + + if (!is_protected_kvm_enabled()) + goto out; + + state = this_cpu_ptr(&loaded_state); + if (!state->vcpu) + goto out; + + host_vcpu = state->vcpu->arch.pkvm.host_vcpu; + + /* Topup shadow memcache with the host's */ + ret = pkvm_refill_memcache(state->vcpu, host_vcpu); + if (!ret) { + if (state->is_protected) + ret = __pkvm_host_donate_guest(pfn, gfn, state->vcpu); + else + ret = __pkvm_host_share_guest(pfn, gfn, state->vcpu); + } +out: + cpu_reg(host_ctxt, 1) = ret; } static void handle___kvm_adjust_pc(struct kvm_cpu_context *host_ctxt) { - DECLARE_REG(struct kvm_vcpu *, vcpu, host_ctxt, 1); + struct pkvm_loaded_state *shadow_state; + struct kvm_vcpu *vcpu; - __kvm_adjust_pc(kern_hyp_va(vcpu)); + vcpu = get_current_vcpu(host_ctxt, 1, &shadow_state); + if (!vcpu) + return; + + if (shadow_state) { + /* This only applies to non-protected VMs */ + if (shadow_state->is_protected) + return; + + vcpu = shadow_state->vcpu; + } + + __kvm_adjust_pc(vcpu); } static void handle___kvm_flush_vm_context(struct kvm_cpu_context *host_ctxt) @@ -82,16 +866,6 @@ static void handle___vgic_v3_get_gic_config(struct kvm_cpu_context *host_ctxt) cpu_reg(host_ctxt, 1) = __vgic_v3_get_gic_config(); } -static void handle___vgic_v3_read_vmcr(struct kvm_cpu_context *host_ctxt) -{ - cpu_reg(host_ctxt, 1) = __vgic_v3_read_vmcr(); -} - -static void handle___vgic_v3_write_vmcr(struct kvm_cpu_context *host_ctxt) -{ - __vgic_v3_write_vmcr(cpu_reg(host_ctxt, 1)); -} - static void handle___vgic_v3_init_lrs(struct kvm_cpu_context *host_ctxt) { __vgic_v3_init_lrs(); @@ -102,18 +876,64 @@ static void handle___kvm_get_mdcr_el2(struct kvm_cpu_context *host_ctxt) cpu_reg(host_ctxt, 1) = __kvm_get_mdcr_el2(); } -static void handle___vgic_v3_save_aprs(struct kvm_cpu_context *host_ctxt) +static void handle___vgic_v3_save_vmcr_aprs(struct kvm_cpu_context *host_ctxt) { - DECLARE_REG(struct vgic_v3_cpu_if *, cpu_if, host_ctxt, 1); + struct pkvm_loaded_state *shadow_state; + struct kvm_vcpu *vcpu; + + vcpu = get_current_vcpu_from_cpu_if(host_ctxt, 1, &shadow_state); + if (!vcpu) + return; + + if (shadow_state) { + struct vgic_v3_cpu_if *shadow_cpu_if, *cpu_if; + int i; - __vgic_v3_save_aprs(kern_hyp_va(cpu_if)); + shadow_cpu_if = &shadow_state->vcpu->arch.vgic_cpu.vgic_v3; + __vgic_v3_save_vmcr_aprs(shadow_cpu_if); + + cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; + + cpu_if->vgic_vmcr = shadow_cpu_if->vgic_vmcr; + for (i = 0; i < ARRAY_SIZE(cpu_if->vgic_ap0r); i++) { + cpu_if->vgic_ap0r[i] = shadow_cpu_if->vgic_ap0r[i]; + cpu_if->vgic_ap1r[i] = shadow_cpu_if->vgic_ap1r[i]; + } + } else { + __vgic_v3_save_vmcr_aprs(&vcpu->arch.vgic_cpu.vgic_v3); + } } -static void handle___vgic_v3_restore_aprs(struct kvm_cpu_context *host_ctxt) +static void handle___vgic_v3_restore_vmcr_aprs(struct kvm_cpu_context *host_ctxt) { - DECLARE_REG(struct vgic_v3_cpu_if *, cpu_if, host_ctxt, 1); + struct pkvm_loaded_state *shadow_state; + struct kvm_vcpu *vcpu; + + vcpu = get_current_vcpu_from_cpu_if(host_ctxt, 1, &shadow_state); + if (!vcpu) + return; + + if (shadow_state) { + struct vgic_v3_cpu_if *shadow_cpu_if, *cpu_if; + int i; + + shadow_cpu_if = &shadow_state->vcpu->arch.vgic_cpu.vgic_v3; + cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; - __vgic_v3_restore_aprs(kern_hyp_va(cpu_if)); + shadow_cpu_if->vgic_vmcr = cpu_if->vgic_vmcr; + /* Should be a one-off */ + shadow_cpu_if->vgic_sre = (ICC_SRE_EL1_DIB | + ICC_SRE_EL1_DFB | + ICC_SRE_EL1_SRE); + for (i = 0; i < ARRAY_SIZE(cpu_if->vgic_ap0r); i++) { + shadow_cpu_if->vgic_ap0r[i] = cpu_if->vgic_ap0r[i]; + shadow_cpu_if->vgic_ap1r[i] = cpu_if->vgic_ap1r[i]; + } + + __vgic_v3_restore_vmcr_aprs(shadow_cpu_if); + } else { + __vgic_v3_restore_vmcr_aprs(&vcpu->arch.vgic_cpu.vgic_v3); + } } static void handle___pkvm_init(struct kvm_cpu_context *host_ctxt) @@ -147,6 +967,20 @@ static void handle___pkvm_host_share_hyp(struct kvm_cpu_context *host_ctxt) cpu_reg(host_ctxt, 1) = __pkvm_host_share_hyp(pfn); } +static void handle___pkvm_host_unshare_hyp(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(u64, pfn, host_ctxt, 1); + + cpu_reg(host_ctxt, 1) = __pkvm_host_unshare_hyp(pfn); +} + +static void handle___pkvm_host_reclaim_page(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(u64, pfn, host_ctxt, 1); + + cpu_reg(host_ctxt, 1) = __pkvm_host_reclaim_page(pfn); +} + static void handle___pkvm_create_private_mapping(struct kvm_cpu_context *host_ctxt) { DECLARE_REG(phys_addr_t, phys, host_ctxt, 1); @@ -160,41 +994,155 @@ static void handle___pkvm_prot_finalize(struct kvm_cpu_context *host_ctxt) { cpu_reg(host_ctxt, 1) = __pkvm_prot_finalize(); } + +static void handle___pkvm_init_shadow(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(struct kvm *, host_kvm, host_ctxt, 1); + DECLARE_REG(void *, host_shadow_va, host_ctxt, 2); + DECLARE_REG(size_t, shadow_size, host_ctxt, 3); + DECLARE_REG(void *, pgd, host_ctxt, 4); + + cpu_reg(host_ctxt, 1) = __pkvm_init_shadow(host_kvm, host_shadow_va, + shadow_size, pgd); +} + +static void handle___pkvm_teardown_shadow(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(int, shadow_handle, host_ctxt, 1); + + cpu_reg(host_ctxt, 1) = __pkvm_teardown_shadow(shadow_handle); +} + +static void handle___pkvm_iommu_driver_init(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(enum pkvm_iommu_driver_id, id, host_ctxt, 1); + DECLARE_REG(void *, data, host_ctxt, 2); + DECLARE_REG(size_t, size, host_ctxt, 3); + + cpu_reg(host_ctxt, 1) = __pkvm_iommu_driver_init(id, data, size); +} + +static void handle___pkvm_iommu_register(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(unsigned long, dev_id, host_ctxt, 1); + DECLARE_REG(enum pkvm_iommu_driver_id, drv_id, host_ctxt, 2); + DECLARE_REG(phys_addr_t, dev_pa, host_ctxt, 3); + DECLARE_REG(size_t, dev_size, host_ctxt, 4); + DECLARE_REG(unsigned long, parent_id, host_ctxt, 5); + DECLARE_REG(void *, mem, host_ctxt, 6); + DECLARE_REG(size_t, mem_size, host_ctxt, 7); + + cpu_reg(host_ctxt, 1) = __pkvm_iommu_register(dev_id, drv_id, dev_pa, + dev_size, parent_id, + mem, mem_size); +} + +static void handle___pkvm_iommu_pm_notify(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(unsigned long, dev_id, host_ctxt, 1); + DECLARE_REG(enum pkvm_iommu_pm_event, event, host_ctxt, 2); + + cpu_reg(host_ctxt, 1) = __pkvm_iommu_pm_notify(dev_id, event); +} + +static void handle___pkvm_iommu_finalize(struct kvm_cpu_context *host_ctxt) +{ + cpu_reg(host_ctxt, 1) = __pkvm_iommu_finalize(); +} + typedef void (*hcall_t)(struct kvm_cpu_context *); #define HANDLE_FUNC(x) [__KVM_HOST_SMCCC_FUNC_##x] = (hcall_t)handle_##x static const hcall_t host_hcall[] = { - HANDLE_FUNC(__kvm_vcpu_run), - HANDLE_FUNC(__kvm_adjust_pc), + /* ___kvm_hyp_init */ + HANDLE_FUNC(__kvm_get_mdcr_el2), + HANDLE_FUNC(__pkvm_init), + HANDLE_FUNC(__pkvm_create_private_mapping), + HANDLE_FUNC(__pkvm_cpu_set_vector), + HANDLE_FUNC(__kvm_enable_ssbs), + HANDLE_FUNC(__vgic_v3_init_lrs), + HANDLE_FUNC(__vgic_v3_get_gic_config), HANDLE_FUNC(__kvm_flush_vm_context), HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa), HANDLE_FUNC(__kvm_tlb_flush_vmid), HANDLE_FUNC(__kvm_flush_cpu_context), - HANDLE_FUNC(__kvm_timer_set_cntvoff), - HANDLE_FUNC(__kvm_enable_ssbs), - HANDLE_FUNC(__vgic_v3_get_gic_config), - HANDLE_FUNC(__vgic_v3_read_vmcr), - HANDLE_FUNC(__vgic_v3_write_vmcr), - HANDLE_FUNC(__vgic_v3_init_lrs), - HANDLE_FUNC(__kvm_get_mdcr_el2), - HANDLE_FUNC(__vgic_v3_save_aprs), - HANDLE_FUNC(__vgic_v3_restore_aprs), - HANDLE_FUNC(__pkvm_init), - HANDLE_FUNC(__pkvm_cpu_set_vector), - HANDLE_FUNC(__pkvm_host_share_hyp), - HANDLE_FUNC(__pkvm_create_private_mapping), HANDLE_FUNC(__pkvm_prot_finalize), + + HANDLE_FUNC(__pkvm_host_share_hyp), + HANDLE_FUNC(__pkvm_host_unshare_hyp), + HANDLE_FUNC(__pkvm_host_reclaim_page), + HANDLE_FUNC(__pkvm_host_donate_guest), + HANDLE_FUNC(__kvm_adjust_pc), + HANDLE_FUNC(__kvm_vcpu_run), + HANDLE_FUNC(__kvm_timer_set_cntvoff), + HANDLE_FUNC(__vgic_v3_save_vmcr_aprs), + HANDLE_FUNC(__vgic_v3_restore_vmcr_aprs), + HANDLE_FUNC(__pkvm_init_shadow), + HANDLE_FUNC(__pkvm_teardown_shadow), + HANDLE_FUNC(__pkvm_vcpu_load), + HANDLE_FUNC(__pkvm_vcpu_put), + HANDLE_FUNC(__pkvm_vcpu_sync_state), + HANDLE_FUNC(__pkvm_iommu_driver_init), + HANDLE_FUNC(__pkvm_iommu_register), + HANDLE_FUNC(__pkvm_iommu_pm_notify), + HANDLE_FUNC(__pkvm_iommu_finalize), }; +static inline u64 kernel__text_addr(void) +{ + u64 val; + + asm volatile(ALTERNATIVE_CB("movz %0, #0\n" + "movk %0, #0, lsl #16\n" + "movk %0, #0, lsl #32\n" + "movk %0, #0, lsl #48\n", + kvm_get__text) + : "=r" (val)); + + return val; +} + +static inline u64 kernel__etext_addr(void) +{ + u64 val; + + asm volatile(ALTERNATIVE_CB("movz %0, #0\n" + "movk %0, #0, lsl #16\n" + "movk %0, #0, lsl #32\n" + "movk %0, #0, lsl #48\n", + kvm_get__etext) + : "=r" (val)); + + return val; +} + static void handle_host_hcall(struct kvm_cpu_context *host_ctxt) { DECLARE_REG(unsigned long, id, host_ctxt, 0); + u64 elr = read_sysreg_el2(SYS_ELR) - 4; + unsigned long hcall_min = 0; hcall_t hfn; + /* Check for the provenance of the HC */ + if (unlikely(elr < kernel__text_addr() || elr >= kernel__etext_addr())) + goto inval; + + /* + * If pKVM has been initialised then reject any calls to the + * early "privileged" hypercalls. Note that we cannot reject + * calls to __pkvm_prot_finalize for two reasons: (1) The static + * key used to determine initialisation must be toggled prior to + * finalisation and (2) finalisation is performed on a per-CPU + * basis. This is all fine, however, since __pkvm_prot_finalize + * returns -EPERM after the first call for a given CPU. + */ + if (static_branch_unlikely(&kvm_protected_mode_initialized)) + hcall_min = __KVM_HOST_SMCCC_FUNC___pkvm_prot_finalize; + id -= KVM_HOST_SMCCC_ID(0); - if (unlikely(id >= ARRAY_SIZE(host_hcall))) + if (unlikely(id < hcall_min || id >= ARRAY_SIZE(host_hcall))) goto inval; hfn = host_hcall[id]; @@ -219,6 +1167,8 @@ static void handle_host_smc(struct kvm_cpu_context *host_ctxt) bool handled; handled = kvm_host_psci_handler(host_ctxt); + if (!handled) + handled = kvm_host_ffa_handler(host_ctxt); if (!handled) default_host_smc_handler(host_ctxt); @@ -237,10 +1187,9 @@ void handle_trap(struct kvm_cpu_context *host_ctxt) case ESR_ELx_EC_SMC64: handle_host_smc(host_ctxt); break; + case ESR_ELx_EC_FP_ASIMD: case ESR_ELx_EC_SVE: - sysreg_clear_set(cptr_el2, CPTR_EL2_TZ, 0); - isb(); - sve_cond_update_zcr_vq(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2); + fpsimd_host_restore(); break; case ESR_ELx_EC_IABT_LOW: case ESR_ELx_EC_DABT_LOW: diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-smp.c b/arch/arm64/kvm/hyp/nvhe/hyp-smp.c index 9f54833af400972670ceddec0a7904d190a005e5..04d194583f1e47a42079726276bc941cdc146641 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-smp.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-smp.c @@ -23,6 +23,8 @@ u64 cpu_logical_map(unsigned int cpu) return hyp_cpu_logical_map[cpu]; } +unsigned long __ro_after_init kvm_arm_hyp_percpu_base[NR_CPUS]; + unsigned long __hyp_per_cpu_offset(unsigned int cpu) { unsigned long *cpu_base_array; diff --git a/arch/arm64/kvm/hyp/nvhe/hyp.lds.S b/arch/arm64/kvm/hyp/nvhe/hyp.lds.S index f4562f417d3fc1d54351c7897d22c7df6a35637a..d724f6d69302a0500f7c3d5cde6390fc4bc30b00 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp.lds.S +++ b/arch/arm64/kvm/hyp/nvhe/hyp.lds.S @@ -25,5 +25,7 @@ SECTIONS { BEGIN_HYP_SECTION(.data..percpu) PERCPU_INPUT(L1_CACHE_BYTES) END_HYP_SECTION + HYP_SECTION(.bss) + HYP_SECTION(.data) } diff --git a/arch/arm64/kvm/hyp/nvhe/iommu.c b/arch/arm64/kvm/hyp/nvhe/iommu.c new file mode 100644 index 0000000000000000000000000000000000000000..3fe47e54208834657f58aca7480569614eed6b4f --- /dev/null +++ b/arch/arm64/kvm/hyp/nvhe/iommu.c @@ -0,0 +1,549 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2022 Google LLC + * Author: David Brazdil + */ + +#include + +#include +#include +#include +#include + +#include +#include +#include + +enum { + IOMMU_DRIVER_NOT_READY = 0, + IOMMU_DRIVER_INITIALIZING, + IOMMU_DRIVER_READY, +}; + +struct pkvm_iommu_driver { + const struct pkvm_iommu_ops *ops; + atomic_t state; +}; + +static struct pkvm_iommu_driver iommu_drivers[PKVM_IOMMU_NR_DRIVERS]; + +/* IOMMU device list. Must only be accessed with host_kvm.lock held. */ +static LIST_HEAD(iommu_list); + +static bool iommu_finalized; +static DEFINE_HYP_SPINLOCK(iommu_registration_lock); + +static void *iommu_mem_pool; +static size_t iommu_mem_remaining; + +static void assert_host_component_locked(void) +{ + hyp_assert_lock_held(&host_kvm.lock); +} + +static void host_lock_component(void) +{ + hyp_spin_lock(&host_kvm.lock); +} + +static void host_unlock_component(void) +{ + hyp_spin_unlock(&host_kvm.lock); +} + +/* + * Find IOMMU driver by its ID. The input ID is treated as unstrusted + * and is properly validated. + */ +static inline struct pkvm_iommu_driver *get_driver(enum pkvm_iommu_driver_id id) +{ + size_t index = (size_t)id; + + if (index >= ARRAY_SIZE(iommu_drivers)) + return NULL; + + return &iommu_drivers[index]; +} + +static const struct pkvm_iommu_ops *get_driver_ops(enum pkvm_iommu_driver_id id) +{ + switch (id) { + case PKVM_IOMMU_DRIVER_S2MPU: + return IS_ENABLED(CONFIG_KVM_S2MPU) ? &pkvm_s2mpu_ops : NULL; + case PKVM_IOMMU_DRIVER_SYSMMU_SYNC: + return IS_ENABLED(CONFIG_KVM_S2MPU) ? &pkvm_sysmmu_sync_ops : NULL; + default: + return NULL; + } +} + +static inline bool driver_acquire_init(struct pkvm_iommu_driver *drv) +{ + return atomic_cmpxchg_acquire(&drv->state, IOMMU_DRIVER_NOT_READY, + IOMMU_DRIVER_INITIALIZING) + == IOMMU_DRIVER_NOT_READY; +} + +static inline void driver_release_init(struct pkvm_iommu_driver *drv, + bool success) +{ + atomic_set_release(&drv->state, success ? IOMMU_DRIVER_READY + : IOMMU_DRIVER_NOT_READY); +} + +static inline bool is_driver_ready(struct pkvm_iommu_driver *drv) +{ + return atomic_read(&drv->state) == IOMMU_DRIVER_READY; +} + +static size_t __iommu_alloc_size(struct pkvm_iommu_driver *drv) +{ + return ALIGN(sizeof(struct pkvm_iommu) + drv->ops->data_size, + sizeof(unsigned long)); +} + +/* Global memory pool for allocating IOMMU list entry structs. */ +static inline struct pkvm_iommu *alloc_iommu(struct pkvm_iommu_driver *drv, + void *mem, size_t mem_size) +{ + size_t size = __iommu_alloc_size(drv); + void *ptr; + + assert_host_component_locked(); + + /* + * If new memory is being provided, replace the existing pool with it. + * Any remaining memory in the pool is discarded. + */ + if (mem && mem_size) { + iommu_mem_pool = mem; + iommu_mem_remaining = mem_size; + } + + if (size > iommu_mem_remaining) + return NULL; + + ptr = iommu_mem_pool; + iommu_mem_pool += size; + iommu_mem_remaining -= size; + return ptr; +} + +static inline void free_iommu(struct pkvm_iommu_driver *drv, struct pkvm_iommu *ptr) +{ + size_t size = __iommu_alloc_size(drv); + + assert_host_component_locked(); + + if (!ptr) + return; + + /* Only allow freeing the last allocated buffer. */ + if ((void*)ptr + size != iommu_mem_pool) + return; + + iommu_mem_pool -= size; + iommu_mem_remaining += size; +} + +static bool is_overlap(phys_addr_t r1_start, size_t r1_size, + phys_addr_t r2_start, size_t r2_size) +{ + phys_addr_t r1_end = r1_start + r1_size; + phys_addr_t r2_end = r2_start + r2_size; + + return (r1_start < r2_end) && (r2_start < r1_end); +} + +static bool is_mmio_range(phys_addr_t base, size_t size) +{ + struct memblock_region *reg; + phys_addr_t limit = BIT(host_kvm.pgt.ia_bits); + size_t i; + + /* Check against limits of host IPA space. */ + if ((base >= limit) || !size || (size > limit - base)) + return false; + + for (i = 0; i < hyp_memblock_nr; i++) { + reg = &hyp_memory[i]; + if (is_overlap(base, size, reg->base, reg->size)) + return false; + } + return true; +} + +static int __snapshot_host_stage2(u64 start, u64 pa_max, u32 level, + kvm_pte_t *ptep, + enum kvm_pgtable_walk_flags flags, + void * const arg) +{ + struct pkvm_iommu_driver * const drv = arg; + u64 end = start + kvm_granule_size(level); + kvm_pte_t pte = *ptep; + + /* + * Valid stage-2 entries are created lazily, invalid ones eagerly. + * Note: In the future we may need to check if [start,end) is MMIO. + * Note: Drivers initialize their PTs to all memory owned by the host, + * so we only call the driver on regions where that is not the case. + */ + if (pte && !kvm_pte_valid(pte)) + drv->ops->host_stage2_idmap_prepare(start, end, /*prot*/ 0); + return 0; +} + +static int snapshot_host_stage2(struct pkvm_iommu_driver * const drv) +{ + struct kvm_pgtable_walker walker = { + .cb = __snapshot_host_stage2, + .arg = drv, + .flags = KVM_PGTABLE_WALK_LEAF, + }; + struct kvm_pgtable *pgt = &host_kvm.pgt; + + if (!drv->ops->host_stage2_idmap_prepare) + return 0; + + return kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker); +} + +static bool validate_against_existing_iommus(struct pkvm_iommu *dev) +{ + struct pkvm_iommu *other; + + assert_host_component_locked(); + + list_for_each_entry(other, &iommu_list, list) { + /* Device ID must be unique. */ + if (dev->id == other->id) + return false; + + /* MMIO regions must not overlap. */ + if (is_overlap(dev->pa, dev->size, other->pa, other->size)) + return false; + } + return true; +} + +static struct pkvm_iommu *find_iommu_by_id(unsigned long id) +{ + struct pkvm_iommu *dev; + + assert_host_component_locked(); + + list_for_each_entry(dev, &iommu_list, list) { + if (dev->id == id) + return dev; + } + return NULL; +} + +/* + * Initialize EL2 IOMMU driver. + * + * This is a common hypercall for driver initialization. Driver-specific + * arguments are passed in a shared memory buffer. The driver is expected to + * initialize it's page-table bookkeeping. + */ +int __pkvm_iommu_driver_init(enum pkvm_iommu_driver_id id, void *data, size_t size) +{ + struct pkvm_iommu_driver *drv; + const struct pkvm_iommu_ops *ops; + int ret = 0; + + data = kern_hyp_va(data); + + /* New driver initialization not allowed after __pkvm_iommu_finalize(). */ + hyp_spin_lock(&iommu_registration_lock); + if (iommu_finalized) { + ret = -EPERM; + goto out_unlock; + } + + drv = get_driver(id); + ops = get_driver_ops(id); + if (!drv || !ops) { + ret = -EINVAL; + goto out_unlock; + } + + if (!driver_acquire_init(drv)) { + ret = -EBUSY; + goto out_unlock; + } + + drv->ops = ops; + + /* This can change stage-2 mappings. */ + if (ops->init) { + ret = hyp_pin_shared_mem(data, data + size); + if (!ret) { + ret = ops->init(data, size); + hyp_unpin_shared_mem(data, data + size); + } + if (ret) + goto out_release; + } + + /* + * Walk host stage-2 and pass current mappings to the driver. Start + * accepting host stage-2 updates as soon as the host lock is released. + */ + host_lock_component(); + ret = snapshot_host_stage2(drv); + if (!ret) + driver_release_init(drv, /*success=*/true); + host_unlock_component(); + +out_release: + if (ret) + driver_release_init(drv, /*success=*/false); + +out_unlock: + hyp_spin_unlock(&iommu_registration_lock); + return ret; +} + +int __pkvm_iommu_register(unsigned long dev_id, + enum pkvm_iommu_driver_id drv_id, + phys_addr_t dev_pa, size_t dev_size, + unsigned long parent_id, + void *kern_mem_va, size_t mem_size) +{ + struct pkvm_iommu *dev = NULL; + struct pkvm_iommu_driver *drv; + void *mem_va = NULL; + int ret = 0; + + /* New device registration not allowed after __pkvm_iommu_finalize(). */ + hyp_spin_lock(&iommu_registration_lock); + if (iommu_finalized) { + ret = -EPERM; + goto out_unlock; + } + + drv = get_driver(drv_id); + if (!drv || !is_driver_ready(drv)) { + ret = -ENOENT; + goto out_unlock; + } + + if (!PAGE_ALIGNED(dev_pa) || !PAGE_ALIGNED(dev_size)) { + ret = -EINVAL; + goto out_unlock; + } + + if (!is_mmio_range(dev_pa, dev_size)) { + ret = -EINVAL; + goto out_unlock; + } + + /* + * Accept memory donation if the host is providing new memory. + * Note: We do not return the memory even if there is an error later. + */ + if (kern_mem_va && mem_size) { + mem_va = kern_hyp_va(kern_mem_va); + + if (!PAGE_ALIGNED(mem_va) || !PAGE_ALIGNED(mem_size)) { + ret = -EINVAL; + goto out_unlock; + } + + ret = __pkvm_host_donate_hyp(hyp_virt_to_pfn(mem_va), + mem_size >> PAGE_SHIFT); + if (ret) + goto out_unlock; + } + + host_lock_component(); + + /* Allocate memory for the new device entry. */ + dev = alloc_iommu(drv, mem_va, mem_size); + if (!dev) { + ret = -ENOMEM; + goto out_free; + } + + /* Populate the new device entry. */ + *dev = (struct pkvm_iommu){ + .children = LIST_HEAD_INIT(dev->children), + .id = dev_id, + .ops = drv->ops, + .pa = dev_pa, + .size = dev_size, + }; + + if (!validate_against_existing_iommus(dev)) { + ret = -EBUSY; + goto out_free; + } + + if (parent_id) { + dev->parent = find_iommu_by_id(parent_id); + if (!dev->parent) { + ret = -EINVAL; + goto out_free; + } + + if (dev->parent->ops->validate_child) { + ret = dev->parent->ops->validate_child(dev->parent, dev); + if (ret) + goto out_free; + } + } + + if (dev->ops->validate) { + ret = dev->ops->validate(dev); + if (ret) + goto out_free; + } + + /* + * Unmap the device's MMIO range from host stage-2. If registration + * is successful, future attempts to re-map will be blocked by + * pkvm_iommu_host_stage2_adjust_range. + */ + ret = host_stage2_unmap_dev_locked(dev_pa, dev_size); + if (ret) + goto out_free; + + /* Create EL2 mapping for the device. Do it last as it is irreversible. */ + dev->va = (void *)__pkvm_create_private_mapping(dev_pa, dev_size, + PAGE_HYP_DEVICE); + if (IS_ERR(dev->va)) { + ret = PTR_ERR(dev->va); + goto out_free; + } + + /* Register device and prevent host from mapping the MMIO range. */ + list_add_tail(&dev->list, &iommu_list); + if (dev->parent) + list_add_tail(&dev->siblings, &dev->parent->children); + +out_free: + if (ret) + free_iommu(drv, dev); + host_unlock_component(); + +out_unlock: + hyp_spin_unlock(&iommu_registration_lock); + return ret; +} + +int __pkvm_iommu_finalize(void) +{ + int ret = 0; + + hyp_spin_lock(&iommu_registration_lock); + if (!iommu_finalized) + iommu_finalized = true; + else + ret = -EPERM; + hyp_spin_unlock(&iommu_registration_lock); + return ret; +} + +int __pkvm_iommu_pm_notify(unsigned long dev_id, enum pkvm_iommu_pm_event event) +{ + struct pkvm_iommu *dev; + int ret; + + host_lock_component(); + dev = find_iommu_by_id(dev_id); + if (dev) { + if (event == PKVM_IOMMU_PM_SUSPEND) { + ret = dev->ops->suspend ? dev->ops->suspend(dev) : 0; + if (!ret) + dev->powered = false; + } else if (event == PKVM_IOMMU_PM_RESUME) { + ret = dev->ops->resume ? dev->ops->resume(dev) : 0; + if (!ret) + dev->powered = true; + } else { + ret = -EINVAL; + } + } else { + ret = -ENODEV; + } + host_unlock_component(); + return ret; +} + +/* + * Check host memory access against IOMMUs' MMIO regions. + * Returns -EPERM if the address is within the bounds of a registered device. + * Otherwise returns zero and adjusts boundaries of the new mapping to avoid + * MMIO regions of registered IOMMUs. + */ +int pkvm_iommu_host_stage2_adjust_range(phys_addr_t addr, phys_addr_t *start, + phys_addr_t *end) +{ + struct pkvm_iommu *dev; + phys_addr_t new_start = *start; + phys_addr_t new_end = *end; + phys_addr_t dev_start, dev_end; + + assert_host_component_locked(); + + list_for_each_entry(dev, &iommu_list, list) { + dev_start = dev->pa; + dev_end = dev_start + dev->size; + + if (addr < dev_start) + new_end = min(new_end, dev_start); + else if (addr >= dev_end) + new_start = max(new_start, dev_end); + else + return -EPERM; + } + + *start = new_start; + *end = new_end; + return 0; +} + +bool pkvm_iommu_host_dabt_handler(struct kvm_cpu_context *host_ctxt, u32 esr, + phys_addr_t pa) +{ + struct pkvm_iommu *dev; + + assert_host_component_locked(); + + list_for_each_entry(dev, &iommu_list, list) { + if (pa < dev->pa || pa >= dev->pa + dev->size) + continue; + + /* No 'powered' check - the host assumes it is powered. */ + if (!dev->ops->host_dabt_handler || + !dev->ops->host_dabt_handler(dev, host_ctxt, esr, pa - dev->pa)) + return false; + + kvm_skip_host_instr(); + return true; + } + return false; +} + +void pkvm_iommu_host_stage2_idmap(phys_addr_t start, phys_addr_t end, + enum kvm_pgtable_prot prot) +{ + struct pkvm_iommu_driver *drv; + struct pkvm_iommu *dev; + size_t i; + + assert_host_component_locked(); + + for (i = 0; i < ARRAY_SIZE(iommu_drivers); i++) { + drv = get_driver(i); + if (drv && is_driver_ready(drv) && drv->ops->host_stage2_idmap_prepare) + drv->ops->host_stage2_idmap_prepare(start, end, prot); + } + + list_for_each_entry(dev, &iommu_list, list) { + if (dev->powered && dev->ops->host_stage2_idmap_apply) + dev->ops->host_stage2_idmap_apply(dev, start, end); + } +} diff --git a/arch/arm64/kvm/hyp/nvhe/iommu/s2mpu.c b/arch/arm64/kvm/hyp/nvhe/iommu/s2mpu.c new file mode 100644 index 0000000000000000000000000000000000000000..ff5d7d1044e522e4340a39c92badf83141ccabbf --- /dev/null +++ b/arch/arm64/kvm/hyp/nvhe/iommu/s2mpu.c @@ -0,0 +1,559 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2021 - Google LLC + * Author: David Brazdil + */ + +#include + +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include + +#define SMC_CMD_PREPARE_PD_ONOFF 0x82000410 +#define SMC_MODE_POWER_UP 1 + +#define PA_MAX ((phys_addr_t)SZ_1G * NR_GIGABYTES) + +#define CTX_CFG_ENTRY(ctxid, nr_ctx, vid) \ + (CONTEXT_CFG_VALID_VID_CTX_VID(ctxid, vid) \ + | (((ctxid) < (nr_ctx)) ? CONTEXT_CFG_VALID_VID_CTX_VALID(ctxid) : 0)) + +#define for_each_child(child, dev) \ + list_for_each_entry((child), &(dev)->children, siblings) + +struct s2mpu_drv_data { + u32 version; + u32 context_cfg_valid_vid; +}; + +static struct mpt host_mpt; + +static inline enum mpt_prot prot_to_mpt(enum kvm_pgtable_prot prot) +{ + return ((prot & KVM_PGTABLE_PROT_R) ? MPT_PROT_R : 0) | + ((prot & KVM_PGTABLE_PROT_W) ? MPT_PROT_W : 0); +} + +static bool is_version(struct pkvm_iommu *dev, u32 version) +{ + struct s2mpu_drv_data *data = (struct s2mpu_drv_data *)dev->data; + + return (data->version & VERSION_CHECK_MASK) == version; +} + +static u32 __context_cfg_valid_vid(struct pkvm_iommu *dev, u32 vid_bmap) +{ + struct s2mpu_drv_data *data = (struct s2mpu_drv_data *)dev->data; + u8 ctx_vid[NR_CTX_IDS] = { 0 }; + unsigned int vid, ctx = 0; + unsigned int num_ctx; + u32 res; + + /* Only initialize once. */ + if (data->context_cfg_valid_vid) + return data->context_cfg_valid_vid; + + num_ctx = readl_relaxed(dev->va + REG_NS_NUM_CONTEXT) & NUM_CONTEXT_MASK; + while (vid_bmap) { + /* Break if we cannot allocate more. */ + if (ctx >= num_ctx) + break; + + vid = __ffs(vid_bmap); + vid_bmap &= ~BIT(vid); + ctx_vid[ctx++] = vid; + } + + /* The following loop was unrolled so bitmasks are constant. */ + BUILD_BUG_ON(NR_CTX_IDS != 8); + res = CTX_CFG_ENTRY(0, ctx, ctx_vid[0]) + | CTX_CFG_ENTRY(1, ctx, ctx_vid[1]) + | CTX_CFG_ENTRY(2, ctx, ctx_vid[2]) + | CTX_CFG_ENTRY(3, ctx, ctx_vid[3]) + | CTX_CFG_ENTRY(4, ctx, ctx_vid[4]) + | CTX_CFG_ENTRY(5, ctx, ctx_vid[5]) + | CTX_CFG_ENTRY(6, ctx, ctx_vid[6]) + | CTX_CFG_ENTRY(7, ctx, ctx_vid[7]); + + data->context_cfg_valid_vid = res; + return res; +} + +static int __initialize_v9(struct pkvm_iommu *dev) +{ + u32 ssmt_valid_vid_bmap, ctx_cfg; + + /* Assume all VIDs may be generated by the connected SSMTs for now. */ + ssmt_valid_vid_bmap = ALL_VIDS_BITMAP; + ctx_cfg = __context_cfg_valid_vid(dev, ssmt_valid_vid_bmap); + if (!ctx_cfg) + return -EINVAL; + + /* + * Write CONTEXT_CFG_VALID_VID configuration before touching L1ENTRY* + * registers. Writes to those registers are ignored unless there is + * a context ID allocated to the corresponding VID (v9 only). + */ + writel_relaxed(ctx_cfg, dev->va + REG_NS_CONTEXT_CFG_VALID_VID); + return 0; +} + +static int __initialize(struct pkvm_iommu *dev) +{ + struct s2mpu_drv_data *data = (struct s2mpu_drv_data *)dev->data; + + if (!data->version) + data->version = readl_relaxed(dev->va + REG_NS_VERSION); + + switch (data->version & VERSION_CHECK_MASK) { + case S2MPU_VERSION_8: + return 0; + case S2MPU_VERSION_9: + return __initialize_v9(dev); + default: + return -EINVAL; + } +} + +static void __set_control_regs(struct pkvm_iommu *dev) +{ + u32 ctrl0 = 0, irq_vids; + + /* + * Note: We set the values of CTRL0, CTRL1 and CFG registers here but we + * still rely on the correctness of their reset values. S2MPUs *must* + * reset to a state where all DMA traffic is blocked until the hypervisor + * writes its configuration to the S2MPU. A malicious EL1 could otherwise + * attempt to bypass the permission checks in the window between powering + * on the S2MPU and this function being called. + */ + + /* Enable the S2MPU, otherwise all traffic would be allowed through. */ + ctrl0 |= CTRL0_ENABLE; + + /* + * Enable interrupts on fault for all VIDs. The IRQ must also be + * specified in DT to get unmasked in the GIC. + */ + ctrl0 |= CTRL0_INTERRUPT_ENABLE; + irq_vids = ALL_VIDS_BITMAP; + + /* Return SLVERR/DECERR to device on permission fault. */ + ctrl0 |= is_version(dev, S2MPU_VERSION_9) ? CTRL0_FAULT_RESP_TYPE_DECERR + : CTRL0_FAULT_RESP_TYPE_SLVERR; + + writel_relaxed(irq_vids, dev->va + REG_NS_INTERRUPT_ENABLE_PER_VID_SET); + writel_relaxed(0, dev->va + REG_NS_CFG); + writel_relaxed(0, dev->va + REG_NS_CTRL1); + writel_relaxed(ctrl0, dev->va + REG_NS_CTRL0); +} + +/* Poll the given SFR until its value has all bits of a given mask set. */ +static void __wait_until(void __iomem *addr, u32 mask) +{ + while ((readl_relaxed(addr) & mask) != mask) + continue; +} + +/* Poll the given SFR as long as its value has all bits of a given mask set. */ +static void __wait_while(void __iomem *addr, u32 mask) +{ + while ((readl_relaxed(addr) & mask) == mask) + continue; +} + +static void __wait_for_invalidation_complete(struct pkvm_iommu *dev) +{ + struct pkvm_iommu *sync; + + /* + * Wait for transactions to drain if SysMMU_SYNCs were registered. + * Assumes that they are in the same power domain as the S2MPU. + */ + for_each_child(sync, dev) { + writel_relaxed(SYNC_CMD_SYNC, sync->va + REG_NS_SYNC_CMD); + __wait_until(sync->va + REG_NS_SYNC_COMP, SYNC_COMP_COMPLETE); + } + + /* Must not access SFRs while S2MPU is busy invalidating (v9 only). */ + if (is_version(dev, S2MPU_VERSION_9)) { + __wait_while(dev->va + REG_NS_STATUS, + STATUS_BUSY | STATUS_ON_INVALIDATING); + } +} + +static void __all_invalidation(struct pkvm_iommu *dev) +{ + writel_relaxed(INVALIDATION_INVALIDATE, dev->va + REG_NS_ALL_INVALIDATION); + __wait_for_invalidation_complete(dev); +} + +static void __range_invalidation(struct pkvm_iommu *dev, phys_addr_t first_byte, + phys_addr_t last_byte) +{ + u32 start_ppn = first_byte >> RANGE_INVALIDATION_PPN_SHIFT; + u32 end_ppn = last_byte >> RANGE_INVALIDATION_PPN_SHIFT; + + writel_relaxed(start_ppn, dev->va + REG_NS_RANGE_INVALIDATION_START_PPN); + writel_relaxed(end_ppn, dev->va + REG_NS_RANGE_INVALIDATION_END_PPN); + writel_relaxed(INVALIDATION_INVALIDATE, dev->va + REG_NS_RANGE_INVALIDATION); + __wait_for_invalidation_complete(dev); +} + +static void __set_l1entry_attr_with_prot(struct pkvm_iommu *dev, unsigned int gb, + unsigned int vid, enum mpt_prot prot) +{ + writel_relaxed(L1ENTRY_ATTR_1G(prot), + dev->va + REG_NS_L1ENTRY_ATTR(vid, gb)); +} + +static void __set_l1entry_attr_with_fmpt(struct pkvm_iommu *dev, unsigned int gb, + unsigned int vid, struct fmpt *fmpt) +{ + if (fmpt->gran_1g) { + __set_l1entry_attr_with_prot(dev, gb, vid, fmpt->prot); + } else { + /* Order against writes to the SMPT. */ + writel(L1ENTRY_ATTR_L2(SMPT_GRAN_ATTR), + dev->va + REG_NS_L1ENTRY_ATTR(vid, gb)); + } +} + +static void __set_l1entry_l2table_addr(struct pkvm_iommu *dev, unsigned int gb, + unsigned int vid, phys_addr_t addr) +{ + /* Order against writes to the SMPT. */ + writel(L1ENTRY_L2TABLE_ADDR(addr), + dev->va + REG_NS_L1ENTRY_L2TABLE_ADDR(vid, gb)); +} + +/* + * Initialize S2MPU device and set all GB regions to 1G granularity with + * given protection bits. + */ +static int initialize_with_prot(struct pkvm_iommu *dev, enum mpt_prot prot) +{ + unsigned int gb, vid; + int ret; + + ret = __initialize(dev); + if (ret) + return ret; + + for_each_gb_and_vid(gb, vid) + __set_l1entry_attr_with_prot(dev, gb, vid, prot); + __all_invalidation(dev); + + /* Set control registers, enable the S2MPU. */ + __set_control_regs(dev); + return 0; +} + +/* + * Initialize S2MPU device, set L2 table addresses and configure L1TABLE_ATTR + * registers according to the given MPT struct. + */ +static int initialize_with_mpt(struct pkvm_iommu *dev, struct mpt *mpt) +{ + unsigned int gb, vid; + struct fmpt *fmpt; + int ret; + + ret = __initialize(dev); + if (ret) + return ret; + + for_each_gb_and_vid(gb, vid) { + fmpt = &mpt->fmpt[gb]; + __set_l1entry_l2table_addr(dev, gb, vid, __hyp_pa(fmpt->smpt)); + __set_l1entry_attr_with_fmpt(dev, gb, vid, fmpt); + } + __all_invalidation(dev); + + /* Set control registers, enable the S2MPU. */ + __set_control_regs(dev); + return 0; +} + +static bool to_valid_range(phys_addr_t *start, phys_addr_t *end) +{ + phys_addr_t new_start = *start; + phys_addr_t new_end = *end; + + if (new_end > PA_MAX) + new_end = PA_MAX; + + new_start = ALIGN_DOWN(new_start, SMPT_GRAN); + new_end = ALIGN(new_end, SMPT_GRAN); + + if (new_start >= new_end) + return false; + + *start = new_start; + *end = new_end; + return true; +} + +static void __mpt_idmap_prepare(struct mpt *mpt, phys_addr_t first_byte, + phys_addr_t last_byte, enum mpt_prot prot) +{ + unsigned int first_gb = first_byte / SZ_1G; + unsigned int last_gb = last_byte / SZ_1G; + size_t start_gb_byte, end_gb_byte; + unsigned int gb; + struct fmpt *fmpt; + + for_each_gb_in_range(gb, first_gb, last_gb) { + fmpt = &mpt->fmpt[gb]; + start_gb_byte = (gb == first_gb) ? first_byte % SZ_1G : 0; + end_gb_byte = (gb == last_gb) ? (last_byte % SZ_1G) + 1 : SZ_1G; + + __set_fmpt_range(fmpt, start_gb_byte, end_gb_byte, prot); + + if (fmpt->flags & MPT_UPDATE_L2) + kvm_flush_dcache_to_poc(fmpt->smpt, SMPT_SIZE); + } +} + +static void __mpt_idmap_apply(struct pkvm_iommu *dev, struct mpt *mpt, + phys_addr_t first_byte, phys_addr_t last_byte) +{ + unsigned int first_gb = first_byte / SZ_1G; + unsigned int last_gb = last_byte / SZ_1G; + unsigned int gb, vid; + struct fmpt *fmpt; + + for_each_gb_in_range(gb, first_gb, last_gb) { + fmpt = &mpt->fmpt[gb]; + + if (fmpt->flags & MPT_UPDATE_L1) { + for_each_vid(vid) + __set_l1entry_attr_with_fmpt(dev, gb, vid, fmpt); + } + } + __range_invalidation(dev, first_byte, last_byte); +} + +static void s2mpu_host_stage2_idmap_prepare(phys_addr_t start, phys_addr_t end, + enum kvm_pgtable_prot prot) +{ + if (!to_valid_range(&start, &end)) + return; + + __mpt_idmap_prepare(&host_mpt, start, end - 1, prot_to_mpt(prot)); +} + +static void s2mpu_host_stage2_idmap_apply(struct pkvm_iommu *dev, + phys_addr_t start, phys_addr_t end) +{ + if (!to_valid_range(&start, &end)) + return; + + __mpt_idmap_apply(dev, &host_mpt, start, end - 1); +} + +static int s2mpu_resume(struct pkvm_iommu *dev) +{ + /* + * Initialize the S2MPU with the host stage-2 MPT. It is paramount + * that the S2MPU reset state is enabled and blocking all traffic, + * otherwise the host would not be forced to call the resume HVC + * before issuing DMA traffic. + */ + return initialize_with_mpt(dev, &host_mpt); +} + +static int s2mpu_suspend(struct pkvm_iommu *dev) +{ + /* + * Stop updating the S2MPU when the host informs us about the intention + * to suspend it. Writes to powered-down MMIO registers would trigger + * SErrors in EL1 otherwise. However, hyp must put S2MPU back to + * blocking state first, in case the host does not actually power it + * down and continues issuing DMA traffic. + */ + return initialize_with_prot(dev, MPT_PROT_NONE); +} + +static u32 host_mmio_reg_access_mask(size_t off, bool is_write) +{ + const u32 no_access = 0; + const u32 read_write = (u32)(-1); + const u32 read_only = is_write ? no_access : read_write; + const u32 write_only = is_write ? read_write : no_access; + u32 masked_off; + + switch (off) { + /* Allow reading control registers for debugging. */ + case REG_NS_CTRL0: + return read_only & CTRL0_MASK; + case REG_NS_CTRL1: + return read_only & CTRL1_MASK; + case REG_NS_CFG: + return read_only & CFG_MASK; + /* Allow EL1 IRQ handler to clear interrupts. */ + case REG_NS_INTERRUPT_CLEAR: + return write_only & ALL_VIDS_BITMAP; + /* Allow reading number of sets used by MPTC. */ + case REG_NS_INFO: + return read_only & INFO_NUM_SET_MASK; + /* Allow EL1 IRQ handler to read bitmap of pending interrupts. */ + case REG_NS_FAULT_STATUS: + return read_only & ALL_VIDS_BITMAP; + /* + * Allow reading MPTC entries for debugging. That involves: + * - writing (set,way) to READ_MPTC + * - reading READ_MPTC_* + */ + case REG_NS_READ_MPTC: + return write_only & READ_MPTC_MASK; + case REG_NS_READ_MPTC_TAG_PPN: + return read_only & READ_MPTC_TAG_PPN_MASK; + case REG_NS_READ_MPTC_TAG_OTHERS: + return read_only & READ_MPTC_TAG_OTHERS_MASK; + case REG_NS_READ_MPTC_DATA: + return read_only; + } + + /* Allow reading L1ENTRY registers for debugging. */ + if (off >= REG_NS_L1ENTRY_L2TABLE_ADDR(0, 0) && + off < REG_NS_L1ENTRY_ATTR(NR_VIDS, 0)) + return read_only; + + /* Allow EL1 IRQ handler to read fault information. */ + masked_off = off & ~REG_NS_FAULT_VID_MASK; + if ((masked_off == REG_NS_FAULT_PA_LOW(0)) || + (masked_off == REG_NS_FAULT_PA_HIGH(0)) || + (masked_off == REG_NS_FAULT_INFO(0))) + return read_only; + + return no_access; +} + +static bool s2mpu_host_dabt_handler(struct pkvm_iommu *dev, + struct kvm_cpu_context *host_ctxt, + u32 esr, size_t off) +{ + bool is_write = esr & ESR_ELx_WNR; + unsigned int len = BIT((esr & ESR_ELx_SAS) >> ESR_ELx_SAS_SHIFT); + int rd = (esr & ESR_ELx_SRT_MASK) >> ESR_ELx_SRT_SHIFT; + u32 mask; + + /* Only handle MMIO access with u32 size and alignment. */ + if ((len != sizeof(u32)) || (off & (sizeof(u32) - 1))) + return false; + + mask = host_mmio_reg_access_mask(off, is_write); + if (!mask) + return false; + + if (is_write) + writel_relaxed(cpu_reg(host_ctxt, rd) & mask, dev->va + off); + else + cpu_reg(host_ctxt, rd) = readl_relaxed(dev->va + off) & mask; + return true; +} + +static int s2mpu_init(void *data, size_t size) +{ + struct mpt in_mpt; + u32 *smpt; + phys_addr_t pa; + unsigned int gb; + int ret = 0; + + if (size != sizeof(in_mpt)) + return -EINVAL; + + /* The host can concurrently modify 'data'. Copy it to avoid TOCTOU. */ + memcpy(&in_mpt, data, sizeof(in_mpt)); + + /* Take ownership of all SMPT buffers. This will also map them in. */ + for_each_gb(gb) { + smpt = kern_hyp_va(in_mpt.fmpt[gb].smpt); + pa = __hyp_pa(smpt); + + if (!IS_ALIGNED(pa, SMPT_SIZE)) { + ret = -EINVAL; + break; + } + + ret = __pkvm_host_donate_hyp(pa >> PAGE_SHIFT, SMPT_NUM_PAGES); + if (ret) + break; + + host_mpt.fmpt[gb] = (struct fmpt){ + .smpt = smpt, + .gran_1g = true, + .prot = MPT_PROT_RW, + }; + } + + /* Try to return memory back if there was an error. */ + if (ret) { + for_each_gb(gb) { + smpt = host_mpt.fmpt[gb].smpt; + if (!smpt) + break; + + WARN_ON(__pkvm_hyp_donate_host(__hyp_pa(smpt) >> PAGE_SHIFT, + SMPT_NUM_PAGES)); + } + memset(&host_mpt, 0, sizeof(host_mpt)); + } + + return ret; +} + +static int s2mpu_validate(struct pkvm_iommu *dev) +{ + if (dev->size != S2MPU_MMIO_SIZE) + return -EINVAL; + + return 0; +} + +static int s2mpu_validate_child(struct pkvm_iommu *dev, struct pkvm_iommu *child) +{ + if (child->ops != &pkvm_sysmmu_sync_ops) + return -EINVAL; + + return 0; +} + +static int sysmmu_sync_validate(struct pkvm_iommu *dev) +{ + if (dev->size != SYSMMU_SYNC_S2_MMIO_SIZE) + return -EINVAL; + + if (!dev->parent || dev->parent->ops != &pkvm_s2mpu_ops) + return -EINVAL; + + return 0; +} + +const struct pkvm_iommu_ops pkvm_s2mpu_ops = (struct pkvm_iommu_ops){ + .init = s2mpu_init, + .validate = s2mpu_validate, + .validate_child = s2mpu_validate_child, + .resume = s2mpu_resume, + .suspend = s2mpu_suspend, + .host_stage2_idmap_prepare = s2mpu_host_stage2_idmap_prepare, + .host_stage2_idmap_apply = s2mpu_host_stage2_idmap_apply, + .host_dabt_handler = s2mpu_host_dabt_handler, + .data_size = sizeof(struct s2mpu_drv_data), +}; + +const struct pkvm_iommu_ops pkvm_sysmmu_sync_ops = (struct pkvm_iommu_ops){ + .validate = sysmmu_sync_validate, +}; diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index bacd493a4eacdefc1af400ec1a97309f02ac3510..76703b2b6f3c55be815c95db9a2a399f5babe6e6 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -9,14 +9,18 @@ #include #include #include +#include #include -#include +#include +#include #include +#include #include #include #include +#include #define KVM_HOST_S2_FLAGS (KVM_PGTABLE_S2_NOFWB | KVM_PGTABLE_S2_IDMAP) @@ -25,17 +29,71 @@ struct host_kvm host_kvm; static struct hyp_pool host_s2_pool; -/* - * Copies of the host's CPU features registers holding sanitized values. - */ -u64 id_aa64mmfr0_el1_sys_val; -u64 id_aa64mmfr1_el1_sys_val; +static pkvm_id pkvm_guest_id(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.hw_mmu->vmid.vmid; + +} + +static DEFINE_PER_CPU(struct kvm_shadow_vm *, __current_vm); +#define current_vm (*this_cpu_ptr(&__current_vm)) + +static void __guest_lock(struct kvm_shadow_vm *vm) +{ + hyp_spin_lock(&vm->lock); + current_vm = vm; +} + +static void __guest_unlock(struct kvm_shadow_vm *vm) +{ + current_vm = NULL; + hyp_spin_unlock(&vm->lock); +} + +static void host_lock_component(void) +{ + hyp_spin_lock(&host_kvm.lock); +} -const u8 pkvm_hyp_id = 1; +static void host_unlock_component(void) +{ + hyp_spin_unlock(&host_kvm.lock); +} + +static void hyp_lock_component(void) +{ + hyp_spin_lock(&pkvm_pgd_lock); +} + +static void hyp_unlock_component(void) +{ + hyp_spin_unlock(&pkvm_pgd_lock); +} + +static void guest_lock_component(struct kvm_vcpu *vcpu) +{ + __guest_lock(vcpu->arch.pkvm.shadow_vm); +} + +static void guest_unlock_component(struct kvm_vcpu *vcpu) +{ + __guest_unlock(vcpu->arch.pkvm.shadow_vm); +} static void *host_s2_zalloc_pages_exact(size_t size) { - return hyp_alloc_pages(&host_s2_pool, get_order(size)); + void *addr = hyp_alloc_pages(&host_s2_pool, get_order(size)); + + hyp_split_page(hyp_virt_to_page(addr)); + + /* + * The size of concatenated PGDs is always a power of two of PAGE_SIZE, + * so there should be no need to free any of the tail pages to make the + * allocation exact. + */ + WARN_ON(size != (PAGE_SIZE << get_order(size))); + + return addr; } static void *host_s2_zalloc_page(void *pool) @@ -98,19 +156,19 @@ int kvm_host_prepare_stage2(void *pgt_pool_base) prepare_host_vtcr(); hyp_spin_lock_init(&host_kvm.lock); + mmu->arch = &host_kvm.arch; ret = prepare_s2_pool(pgt_pool_base); if (ret) return ret; - ret = __kvm_pgtable_stage2_init(&host_kvm.pgt, &host_kvm.arch, + ret = __kvm_pgtable_stage2_init(&host_kvm.pgt, mmu, &host_kvm.mm_ops, KVM_HOST_S2_FLAGS, host_stage2_force_pte_cb); if (ret) return ret; mmu->pgd_phys = __hyp_pa(host_kvm.pgt.pgd); - mmu->arch = &host_kvm.arch; mmu->pgt = &host_kvm.pgt; WRITE_ONCE(mmu->vmid.vmid_gen, 0); WRITE_ONCE(mmu->vmid.vmid, 0); @@ -118,11 +176,174 @@ int kvm_host_prepare_stage2(void *pgt_pool_base) return 0; } +static bool guest_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot prot) +{ + return true; +} + +static void *guest_s2_zalloc_pages_exact(size_t size) +{ + void *addr = hyp_alloc_pages(¤t_vm->pool, get_order(size)); + + WARN_ON(size != (PAGE_SIZE << get_order(size))); + hyp_split_page(hyp_virt_to_page(addr)); + + return addr; +} + +static void guest_s2_free_pages_exact(void *addr, unsigned long size) +{ + u8 order = get_order(size); + unsigned int i; + + for (i = 0; i < (1 << order); i++) + hyp_put_page(¤t_vm->pool, addr + (i * PAGE_SIZE)); +} + +static void *guest_s2_zalloc_page(void *mc) +{ + struct hyp_page *p; + void *addr; + + addr = hyp_alloc_pages(¤t_vm->pool, 0); + if (addr) + return addr; + + addr = pop_hyp_memcache(mc, hyp_phys_to_virt); + if (!addr) + return addr; + + memset(addr, 0, PAGE_SIZE); + p = hyp_virt_to_page(addr); + memset(p, 0, sizeof(*p)); + p->refcount = 1; + + return addr; +} + +static void guest_s2_get_page(void *addr) +{ + hyp_get_page(¤t_vm->pool, addr); +} + +static void guest_s2_put_page(void *addr) +{ + hyp_put_page(¤t_vm->pool, addr); +} + +static void clean_dcache_guest_page(void *va, size_t size) +{ + __clean_dcache_guest_page(hyp_fixmap_map(__hyp_pa(va)), size); + hyp_fixmap_unmap(); +} + +static void invalidate_icache_guest_page(void *va, size_t size) +{ + __invalidate_icache_guest_page(hyp_fixmap_map(__hyp_pa(va)), size); + hyp_fixmap_unmap(); +} + +int kvm_guest_prepare_stage2(struct kvm_shadow_vm *vm, void *pgd) +{ + struct kvm_s2_mmu *mmu = &vm->arch.mmu; + unsigned long nr_pages; + int ret; + + nr_pages = kvm_pgtable_stage2_pgd_size(vm->arch.vtcr) >> PAGE_SHIFT; + + ret = hyp_pool_init(&vm->pool, hyp_virt_to_pfn(pgd), nr_pages, 0); + if (ret) + return ret; + + hyp_spin_lock_init(&vm->lock); + vm->mm_ops = (struct kvm_pgtable_mm_ops) { + .zalloc_pages_exact = guest_s2_zalloc_pages_exact, + .free_pages_exact = guest_s2_free_pages_exact, + .zalloc_page = guest_s2_zalloc_page, + .phys_to_virt = hyp_phys_to_virt, + .virt_to_phys = hyp_virt_to_phys, + .page_count = hyp_page_count, + .get_page = guest_s2_get_page, + .put_page = guest_s2_put_page, + .dcache_clean_inval_poc = clean_dcache_guest_page, + .icache_inval_pou = invalidate_icache_guest_page, + }; + + __guest_lock(vm); + ret = __kvm_pgtable_stage2_init(mmu->pgt, mmu, &vm->mm_ops, 0, + guest_stage2_force_pte_cb); + __guest_unlock(vm); + if (ret) + return ret; + + vm->arch.mmu.pgd_phys = __hyp_pa(vm->pgt.pgd); + + return 0; +} + +static int reclaim_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, + enum kvm_pgtable_walk_flags flag, + void * const arg) +{ + kvm_pte_t pte = *ptep; + struct hyp_page *page; + + if (!kvm_pte_valid(pte)) + return 0; + + page = hyp_phys_to_page(kvm_pte_to_phys(pte)); + switch (pkvm_getstate(kvm_pgtable_stage2_pte_prot(pte))) { + case PKVM_PAGE_OWNED: + page->flags |= HOST_PAGE_NEED_POISONING; + fallthrough; + case PKVM_PAGE_SHARED_BORROWED: + case PKVM_PAGE_SHARED_OWNED: + page->flags |= HOST_PAGE_PENDING_RECLAIM; + break; + default: + return -EPERM; + } + + return 0; +} + +void reclaim_guest_pages(struct kvm_shadow_vm *vm, struct kvm_hyp_memcache *mc) +{ + struct kvm_pgtable_walker walker = { + .cb = reclaim_walker, + .flags = KVM_PGTABLE_WALK_LEAF + }; + void *addr; + + host_lock_component(); + __guest_lock(vm); + + /* Reclaim all guest pages, and dump all pgtable pages in the hyp_pool */ + BUG_ON(kvm_pgtable_walk(&vm->pgt, 0, BIT(vm->pgt.ia_bits), &walker)); + kvm_pgtable_stage2_destroy(&vm->pgt); + vm->arch.mmu.pgd_phys = 0ULL; + + __guest_unlock(vm); + host_unlock_component(); + + /* Drain the hyp_pool into the memcache */ + addr = hyp_alloc_pages(&vm->pool, 0); + while (addr) { + memset(hyp_virt_to_page(addr), 0, sizeof(struct hyp_page)); + push_hyp_memcache(mc, addr, hyp_virt_to_phys); + WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(addr), 1)); + addr = hyp_alloc_pages(&vm->pool, 0); + } +} + int __pkvm_prot_finalize(void) { struct kvm_s2_mmu *mmu = &host_kvm.arch.mmu; struct kvm_nvhe_init_params *params = this_cpu_ptr(&kvm_init_params); + if (params->hcr_el2 & HCR_VM) + return -EPERM; + params->vttbr = kvm_get_vttbr(mmu); params->vtcr = host_kvm.arch.vtcr; params->hcr_el2 |= HCR_VM; @@ -145,6 +366,20 @@ int __pkvm_prot_finalize(void) return 0; } +int host_stage2_unmap_dev_locked(phys_addr_t start, u64 size) +{ + int ret; + + hyp_assert_lock_held(&host_kvm.lock); + + ret = kvm_pgtable_stage2_unmap(&host_kvm.pgt, start, size); + if (ret) + return ret; + + pkvm_iommu_host_stage2_idmap(start, start + size, 0); + return 0; +} + static int host_stage2_unmap_dev_all(void) { struct kvm_pgtable *pgt = &host_kvm.pgt; @@ -155,11 +390,11 @@ static int host_stage2_unmap_dev_all(void) /* Unmap all non-memory regions to recycle the pages */ for (i = 0; i < hyp_memblock_nr; i++, addr = reg->base + reg->size) { reg = &hyp_memory[i]; - ret = kvm_pgtable_stage2_unmap(pgt, addr, reg->base - addr); + ret = host_stage2_unmap_dev_locked(addr, reg->base - addr); if (ret) return ret; } - return kvm_pgtable_stage2_unmap(pgt, addr, BIT(pgt->ia_bits) - addr); + return host_stage2_unmap_dev_locked(addr, BIT(pgt->ia_bits) - addr); } struct kvm_mem_range { @@ -167,7 +402,7 @@ struct kvm_mem_range { u64 end; }; -static bool find_mem_range(phys_addr_t addr, struct kvm_mem_range *range) +static struct memblock_region *find_mem_range(phys_addr_t addr, struct kvm_mem_range *range) { int cur, left = 0, right = hyp_memblock_nr; struct memblock_region *reg; @@ -190,18 +425,28 @@ static bool find_mem_range(phys_addr_t addr, struct kvm_mem_range *range) } else { range->start = reg->base; range->end = end; - return true; + return reg; } } - return false; + return NULL; } bool addr_is_memory(phys_addr_t phys) { struct kvm_mem_range range; - return find_mem_range(phys, &range); + return !!find_mem_range(phys, &range); +} + +static bool addr_is_allowed_memory(phys_addr_t phys) +{ + struct memblock_region *reg; + struct kvm_mem_range range; + + reg = find_mem_range(phys, &range); + + return reg && !(reg->flags & MEMBLOCK_NOMAP); } static bool is_in_mem_range(u64 addr, struct kvm_mem_range *range) @@ -222,8 +467,15 @@ static bool range_is_memory(u64 start, u64 end) static inline int __host_stage2_idmap(u64 start, u64 end, enum kvm_pgtable_prot prot) { - return kvm_pgtable_stage2_map(&host_kvm.pgt, start, end - start, start, - prot, &host_s2_pool); + int ret; + + ret = kvm_pgtable_stage2_map(&host_kvm.pgt, start, end - start, start, + prot, &host_s2_pool); + if (ret) + return ret; + + pkvm_iommu_host_stage2_idmap(start, end, prot); + return 0; } /* @@ -286,17 +538,30 @@ static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range) int host_stage2_idmap_locked(phys_addr_t addr, u64 size, enum kvm_pgtable_prot prot) { - hyp_assert_lock_held(&host_kvm.lock); - return host_stage2_try(__host_stage2_idmap, addr, addr + size, prot); } -int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id) +#define KVM_INVALID_PTE_OWNER_MASK GENMASK(32, 1) +static kvm_pte_t kvm_init_invalid_leaf_owner(pkvm_id owner_id) { - hyp_assert_lock_held(&host_kvm.lock); + return FIELD_PREP(KVM_INVALID_PTE_OWNER_MASK, owner_id); +} + +int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, + pkvm_id owner_id) +{ + kvm_pte_t annotation = kvm_init_invalid_leaf_owner(owner_id); + enum kvm_pgtable_prot prot; + int ret; + + ret = host_stage2_try(kvm_pgtable_stage2_annotate, &host_kvm.pgt, + addr, size, &host_s2_pool, annotation); + if (ret) + return ret; - return host_stage2_try(kvm_pgtable_stage2_set_owner, &host_kvm.pgt, - addr, size, &host_s2_pool, owner_id); + prot = owner_id == pkvm_host_id ? PKVM_HOST_MEM_PROT : 0; + pkvm_iommu_host_stage2_idmap(addr, addr + size, prot); + return 0; } static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot prot) @@ -324,122 +589,1477 @@ static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot pr static int host_stage2_idmap(u64 addr) { struct kvm_mem_range range; - bool is_memory = find_mem_range(addr, &range); + bool is_memory = !!find_mem_range(addr, &range); enum kvm_pgtable_prot prot; int ret; + hyp_assert_lock_held(&host_kvm.lock); + prot = is_memory ? PKVM_HOST_MEM_PROT : PKVM_HOST_MMIO_PROT; - hyp_spin_lock(&host_kvm.lock); + /* + * Adjust against IOMMU devices first. host_stage2_adjust_range() should + * be called last for proper alignment. + */ + if (!is_memory) { + ret = pkvm_iommu_host_stage2_adjust_range(addr, &range.start, + &range.end); + if (ret) + return ret; + } + ret = host_stage2_adjust_range(addr, &range); if (ret) - goto unlock; + return ret; - ret = host_stage2_idmap_locked(range.start, range.end - range.start, prot); -unlock: - hyp_spin_unlock(&host_kvm.lock); + return host_stage2_idmap_locked(range.start, range.end - range.start, prot); +} - return ret; +static bool is_dabt(u64 esr) +{ + return ESR_ELx_EC(esr) == ESR_ELx_EC_DABT_LOW; } -static inline bool check_prot(enum kvm_pgtable_prot prot, - enum kvm_pgtable_prot required, - enum kvm_pgtable_prot denied) +static void host_inject_abort(struct kvm_cpu_context *host_ctxt) { - return (prot & (required | denied)) == required; + u64 spsr = read_sysreg_el2(SYS_SPSR); + u64 esr = read_sysreg_el2(SYS_ESR); + u64 ventry, ec; + + /* Repaint the ESR to report a same-level fault if taken from EL1 */ + if ((spsr & PSR_MODE_MASK) != PSR_MODE_EL0t) { + ec = ESR_ELx_EC(esr); + if (ec == ESR_ELx_EC_DABT_LOW) + ec = ESR_ELx_EC_DABT_CUR; + else if (ec == ESR_ELx_EC_IABT_LOW) + ec = ESR_ELx_EC_IABT_CUR; + else + WARN_ON(1); + esr &= ~ESR_ELx_EC_MASK; + esr |= ec << ESR_ELx_EC_SHIFT; + } + + /* + * Since S1PTW should only ever be set for stage-2 faults, we're pretty + * much guaranteed that it won't be set in ESR_EL1 by the hardware. So, + * let's use that bit to allow the host abort handler to differentiate + * this abort from normal userspace faults. + * + * Note: although S1PTW is RES0 at EL1, it is guaranteed by the + * architecture to be backed by flops, so it should be safe to use. + */ + esr |= ESR_ELx_S1PTW; + + write_sysreg_el1(esr, SYS_ESR); + write_sysreg_el1(spsr, SYS_SPSR); + write_sysreg_el1(read_sysreg_el2(SYS_ELR), SYS_ELR); + write_sysreg_el1(read_sysreg_el2(SYS_FAR), SYS_FAR); + + ventry = read_sysreg_el1(SYS_VBAR); + ventry += get_except64_offset(spsr, PSR_MODE_EL1h, except_type_sync); + write_sysreg_el2(ventry, SYS_ELR); + + spsr = get_except64_cpsr(spsr, system_supports_mte(), + read_sysreg_el1(SYS_SCTLR), PSR_MODE_EL1h); + write_sysreg_el2(spsr, SYS_SPSR); } -int __pkvm_host_share_hyp(u64 pfn) +void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt) { - phys_addr_t addr = hyp_pfn_to_phys(pfn); - enum kvm_pgtable_prot prot, cur; - void *virt = __hyp_va(addr); - enum pkvm_page_state state; - kvm_pte_t pte; - int ret; + struct kvm_vcpu_fault_info fault; + u64 esr, addr; + int ret = -EPERM; - if (!addr_is_memory(addr)) - return -EINVAL; + esr = read_sysreg_el2(SYS_ESR); + BUG_ON(!__get_fault_info(esr, &fault)); - hyp_spin_lock(&host_kvm.lock); - hyp_spin_lock(&pkvm_pgd_lock); + addr = (fault.hpfar_el2 & HPFAR_MASK) << 8; + addr |= fault.far_el2 & FAR_MASK; - ret = kvm_pgtable_get_leaf(&host_kvm.pgt, addr, &pte, NULL); - if (ret) - goto unlock; - if (!pte) - goto map_shared; + host_lock_component(); - /* - * Check attributes in the host stage-2 PTE. We need the page to be: - * - mapped RWX as we're sharing memory; - * - not borrowed, as that implies absence of ownership. - * Otherwise, we can't let it got through - */ - cur = kvm_pgtable_stage2_pte_prot(pte); - prot = pkvm_mkstate(0, PKVM_PAGE_SHARED_BORROWED); - if (!check_prot(cur, PKVM_HOST_MEM_PROT, prot)) { - ret = -EPERM; - goto unlock; - } + /* Check if an IOMMU device can handle the DABT. */ + if (is_dabt(esr) && !addr_is_memory(addr) && + pkvm_iommu_host_dabt_handler(host_ctxt, esr, addr)) + ret = 0; - state = pkvm_getstate(cur); - if (state == PKVM_PAGE_OWNED) - goto map_shared; + /* If not handled, attempt to map the page. */ + if (ret == -EPERM) + ret = host_stage2_idmap(addr); - /* - * Tolerate double-sharing the same page, but this requires - * cross-checking the hypervisor stage-1. - */ - if (state != PKVM_PAGE_SHARED_OWNED) { - ret = -EPERM; - goto unlock; + host_unlock_component(); + + if (ret == -EPERM) + host_inject_abort(host_ctxt); + else + BUG_ON(ret && ret != -EAGAIN); +} + +/* This corresponds to locking order */ +enum pkvm_component_id { + PKVM_ID_HOST, + PKVM_ID_HYP, + PKVM_ID_GUEST, + PKVM_ID_FFA, +}; + +struct pkvm_mem_transition { + u64 nr_pages; + + struct { + enum pkvm_component_id id; + /* Address in the initiator's address space */ + u64 addr; + + union { + struct { + /* Address in the completer's address space */ + u64 completer_addr; + } host; + struct { + u64 completer_addr; + } hyp; + + struct { + struct kvm_vcpu *vcpu; + } guest; + }; + } initiator; + + struct { + enum pkvm_component_id id; + + union { + struct { + struct kvm_vcpu *vcpu; + phys_addr_t phys; + } guest; + }; + } completer; +}; + +struct pkvm_mem_share { + const struct pkvm_mem_transition tx; + const enum kvm_pgtable_prot completer_prot; +}; + +struct pkvm_mem_donation { + const struct pkvm_mem_transition tx; +}; + +static pkvm_id initiator_owner_id(const struct pkvm_mem_transition *tx) +{ + switch (tx->initiator.id) { + case PKVM_ID_HOST: + return pkvm_host_id; + case PKVM_ID_HYP: + return pkvm_hyp_id; + case PKVM_ID_GUEST: + return pkvm_guest_id(tx->initiator.guest.vcpu); + default: + WARN_ON(1); + return -1; } +} - ret = kvm_pgtable_get_leaf(&pkvm_pgtable, (u64)virt, &pte, NULL); - if (ret) - goto unlock; +static pkvm_id completer_owner_id(const struct pkvm_mem_transition *tx) +{ + switch (tx->completer.id) { + case PKVM_ID_HOST: + return pkvm_host_id; + case PKVM_ID_HYP: + return pkvm_hyp_id; + case PKVM_ID_GUEST: + return pkvm_guest_id(tx->completer.guest.vcpu); + default: + WARN_ON(1); + return -1; + } +} - /* - * If the page has been shared with the hypervisor, it must be - * already mapped as SHARED_BORROWED in its stage-1. - */ - cur = kvm_pgtable_hyp_pte_prot(pte); - prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_SHARED_BORROWED); - if (!check_prot(cur, prot, ~prot)) - ret = -EPERM; - goto unlock; +struct check_walk_data { + enum pkvm_page_state desired; + enum pkvm_page_state (*get_page_state)(kvm_pte_t pte); +}; -map_shared: - /* - * If the page is not yet shared, adjust mappings in both page-tables - * while both locks are held. - */ - prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_SHARED_BORROWED); - ret = pkvm_create_mappings_locked(virt, virt + PAGE_SIZE, prot); - BUG_ON(ret); +static int __check_page_state_visitor(u64 addr, u64 end, u32 level, + kvm_pte_t *ptep, + enum kvm_pgtable_walk_flags flag, + void * const arg) +{ + struct check_walk_data *d = arg; + kvm_pte_t pte = *ptep; - prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, PKVM_PAGE_SHARED_OWNED); - ret = host_stage2_idmap_locked(addr, PAGE_SIZE, prot); - BUG_ON(ret); + if (kvm_pte_valid(pte) && !addr_is_allowed_memory(kvm_pte_to_phys(pte))) + return -EINVAL; -unlock: - hyp_spin_unlock(&pkvm_pgd_lock); - hyp_spin_unlock(&host_kvm.lock); + return d->get_page_state(pte) == d->desired ? 0 : -EPERM; +} - return ret; +static int check_page_state_range(struct kvm_pgtable *pgt, u64 addr, u64 size, + struct check_walk_data *data) +{ + struct kvm_pgtable_walker walker = { + .cb = __check_page_state_visitor, + .arg = data, + .flags = KVM_PGTABLE_WALK_LEAF, + }; + + return kvm_pgtable_walk(pgt, addr, size, &walker); } -void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt) +static enum pkvm_page_state host_get_page_state(kvm_pte_t pte) { - struct kvm_vcpu_fault_info fault; - u64 esr, addr; - int ret = 0; + if (!kvm_pte_valid(pte) && pte) + return PKVM_NOPAGE; - esr = read_sysreg_el2(SYS_ESR); - BUG_ON(!__get_fault_info(esr, &fault)); + return pkvm_getstate(kvm_pgtable_stage2_pte_prot(pte)); +} - addr = (fault.hpfar_el2 & HPFAR_MASK) << 8; - ret = host_stage2_idmap(addr); - BUG_ON(ret && ret != -EAGAIN); +static int __host_check_page_state_range(u64 addr, u64 size, + enum pkvm_page_state state) +{ + struct check_walk_data d = { + .desired = state, + .get_page_state = host_get_page_state, + }; + + hyp_assert_lock_held(&host_kvm.lock); + return check_page_state_range(&host_kvm.pgt, addr, size, &d); +} + +static int __host_set_page_state_range(u64 addr, u64 size, + enum pkvm_page_state state) +{ + enum kvm_pgtable_prot prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, state); + + return host_stage2_idmap_locked(addr, size, prot); +} + +static int host_request_owned_transition(u64 *completer_addr, + const struct pkvm_mem_transition *tx) +{ + u64 size = tx->nr_pages * PAGE_SIZE; + u64 addr = tx->initiator.addr; + + *completer_addr = tx->initiator.host.completer_addr; + return __host_check_page_state_range(addr, size, PKVM_PAGE_OWNED); +} + +static int host_request_unshare(u64 *completer_addr, + const struct pkvm_mem_transition *tx) +{ + u64 size = tx->nr_pages * PAGE_SIZE; + u64 addr = tx->initiator.addr; + + *completer_addr = tx->initiator.host.completer_addr; + return __host_check_page_state_range(addr, size, PKVM_PAGE_SHARED_OWNED); +} + +static int host_initiate_share(u64 *completer_addr, + const struct pkvm_mem_transition *tx) +{ + u64 size = tx->nr_pages * PAGE_SIZE; + u64 addr = tx->initiator.addr; + + *completer_addr = tx->initiator.host.completer_addr; + return __host_set_page_state_range(addr, size, PKVM_PAGE_SHARED_OWNED); +} + +static int host_initiate_unshare(u64 *completer_addr, + const struct pkvm_mem_transition *tx) +{ + u64 size = tx->nr_pages * PAGE_SIZE; + u64 addr = tx->initiator.addr; + + *completer_addr = tx->initiator.host.completer_addr; + return __host_set_page_state_range(addr, size, PKVM_PAGE_OWNED); +} + +static int host_initiate_donation(u64 *completer_addr, + const struct pkvm_mem_transition *tx) +{ + pkvm_id owner_id = completer_owner_id(tx); + u64 size = tx->nr_pages * PAGE_SIZE; + + *completer_addr = tx->initiator.host.completer_addr; + return host_stage2_set_owner_locked(tx->initiator.addr, size, owner_id); +} + +static bool __host_ack_skip_pgtable_check(const struct pkvm_mem_transition *tx) +{ + return !(IS_ENABLED(CONFIG_NVHE_EL2_DEBUG) || + tx->initiator.id != PKVM_ID_HYP); +} + +static int __host_ack_transition(u64 addr, const struct pkvm_mem_transition *tx, + enum pkvm_page_state state) +{ + u64 size = tx->nr_pages * PAGE_SIZE; + + if (__host_ack_skip_pgtable_check(tx)) + return 0; + + return __host_check_page_state_range(addr, size, state); +} + +static int host_ack_share(u64 addr, const struct pkvm_mem_transition *tx, + enum kvm_pgtable_prot perms) +{ + if (perms != PKVM_HOST_MEM_PROT) + return -EPERM; + + return __host_ack_transition(addr, tx, PKVM_NOPAGE); +} + +static int host_ack_donation(u64 addr, const struct pkvm_mem_transition *tx) +{ + return __host_ack_transition(addr, tx, PKVM_NOPAGE); +} + +static int host_ack_unshare(u64 addr, const struct pkvm_mem_transition *tx) +{ + return __host_ack_transition(addr, tx, PKVM_PAGE_SHARED_BORROWED); +} + +static int host_complete_share(u64 addr, const struct pkvm_mem_transition *tx, + enum kvm_pgtable_prot perms) +{ + u64 size = tx->nr_pages * PAGE_SIZE; + + return __host_set_page_state_range(addr, size, PKVM_PAGE_SHARED_BORROWED); +} + +static int host_complete_unshare(u64 addr, const struct pkvm_mem_transition *tx) +{ + u64 size = tx->nr_pages * PAGE_SIZE; + pkvm_id owner_id = initiator_owner_id(tx); + + return host_stage2_set_owner_locked(addr, size, owner_id); +} + +static int host_complete_donation(u64 addr, const struct pkvm_mem_transition *tx) +{ + u64 size = tx->nr_pages * PAGE_SIZE; + pkvm_id host_id = completer_owner_id(tx); + + return host_stage2_set_owner_locked(addr, size, host_id); +} + +static enum pkvm_page_state hyp_get_page_state(kvm_pte_t pte) +{ + if (!kvm_pte_valid(pte)) + return PKVM_NOPAGE; + + return pkvm_getstate(kvm_pgtable_stage2_pte_prot(pte)); +} + +static int __hyp_check_page_state_range(u64 addr, u64 size, + enum pkvm_page_state state) +{ + struct check_walk_data d = { + .desired = state, + .get_page_state = hyp_get_page_state, + }; + + hyp_assert_lock_held(&pkvm_pgd_lock); + return check_page_state_range(&pkvm_pgtable, addr, size, &d); +} + +static int hyp_request_donation(u64 *completer_addr, + const struct pkvm_mem_transition *tx) +{ + u64 size = tx->nr_pages * PAGE_SIZE; + u64 addr = tx->initiator.addr; + + *completer_addr = tx->initiator.hyp.completer_addr; + return __hyp_check_page_state_range(addr, size, PKVM_PAGE_OWNED); +} + +static int hyp_initiate_donation(u64 *completer_addr, + const struct pkvm_mem_transition *tx) +{ + u64 size = tx->nr_pages * PAGE_SIZE; + int ret; + + *completer_addr = tx->initiator.hyp.completer_addr; + ret = kvm_pgtable_hyp_unmap(&pkvm_pgtable, tx->initiator.addr, size); + return (ret != size) ? -EFAULT : 0; +} + +static bool __hyp_ack_skip_pgtable_check(const struct pkvm_mem_transition *tx) +{ + return !(IS_ENABLED(CONFIG_NVHE_EL2_DEBUG) || + tx->initiator.id != PKVM_ID_HOST); +} + +static int hyp_ack_share(u64 addr, const struct pkvm_mem_transition *tx, + enum kvm_pgtable_prot perms) +{ + u64 size = tx->nr_pages * PAGE_SIZE; + + if (perms != PAGE_HYP) + return -EPERM; + + if (__hyp_ack_skip_pgtable_check(tx)) + return 0; + + return __hyp_check_page_state_range(addr, size, PKVM_NOPAGE); +} + +static int hyp_ack_unshare(u64 addr, const struct pkvm_mem_transition *tx) +{ + u64 size = tx->nr_pages * PAGE_SIZE; + + if (tx->initiator.id == PKVM_ID_HOST && hyp_page_count((void *)addr)) + return -EBUSY; + + if (__hyp_ack_skip_pgtable_check(tx)) + return 0; + + return __hyp_check_page_state_range(addr, size, + PKVM_PAGE_SHARED_BORROWED); +} + +static int hyp_ack_donation(u64 addr, const struct pkvm_mem_transition *tx) +{ + u64 size = tx->nr_pages * PAGE_SIZE; + + if (__hyp_ack_skip_pgtable_check(tx)) + return 0; + + return __hyp_check_page_state_range(addr, size, PKVM_NOPAGE); +} + +static int hyp_complete_share(u64 addr, const struct pkvm_mem_transition *tx, + enum kvm_pgtable_prot perms) +{ + void *start = (void *)addr, *end = start + (tx->nr_pages * PAGE_SIZE); + enum kvm_pgtable_prot prot; + + prot = pkvm_mkstate(perms, PKVM_PAGE_SHARED_BORROWED); + return pkvm_create_mappings_locked(start, end, prot); +} + +static int hyp_complete_unshare(u64 addr, const struct pkvm_mem_transition *tx) +{ + u64 size = tx->nr_pages * PAGE_SIZE; + int ret = kvm_pgtable_hyp_unmap(&pkvm_pgtable, addr, size); + + return (ret != size) ? -EFAULT : 0; +} + +static int hyp_complete_donation(u64 addr, + const struct pkvm_mem_transition *tx) +{ + void *start = (void *)addr, *end = start + (tx->nr_pages * PAGE_SIZE); + enum kvm_pgtable_prot prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_OWNED); + + return pkvm_create_mappings_locked(start, end, prot); +} + +static enum pkvm_page_state guest_get_page_state(kvm_pte_t pte) +{ + if (!kvm_pte_valid(pte)) + return PKVM_NOPAGE; + + return pkvm_getstate(kvm_pgtable_stage2_pte_prot(pte)); +} + +static int __guest_check_page_state_range(struct kvm_vcpu *vcpu, u64 addr, + u64 size, enum pkvm_page_state state) +{ + struct kvm_shadow_vm *vm = vcpu->arch.pkvm.shadow_vm; + struct check_walk_data d = { + .desired = state, + .get_page_state = guest_get_page_state, + }; + + hyp_assert_lock_held(&vm->lock); + return check_page_state_range(&vm->pgt, addr, size, &d); +} + +static int guest_ack_share(u64 addr, const struct pkvm_mem_transition *tx, + enum kvm_pgtable_prot perms) +{ + u64 size = tx->nr_pages * PAGE_SIZE; + + if (perms != KVM_PGTABLE_PROT_RWX) + return -EPERM; + + return __guest_check_page_state_range(tx->completer.guest.vcpu, addr, + size, PKVM_NOPAGE); +} + +static int guest_ack_donation(u64 addr, const struct pkvm_mem_transition *tx) +{ + u64 size = tx->nr_pages * PAGE_SIZE; + + return __guest_check_page_state_range(tx->completer.guest.vcpu, addr, + size, PKVM_NOPAGE); +} + +static int guest_complete_share(u64 addr, const struct pkvm_mem_transition *tx, + enum kvm_pgtable_prot perms) +{ + struct kvm_vcpu *vcpu = tx->completer.guest.vcpu; + struct kvm_shadow_vm *vm = vcpu->arch.pkvm.shadow_vm; + u64 size = tx->nr_pages * PAGE_SIZE; + enum kvm_pgtable_prot prot; + + prot = pkvm_mkstate(perms, PKVM_PAGE_SHARED_BORROWED); + return kvm_pgtable_stage2_map(&vm->pgt, addr, size, tx->completer.guest.phys, + prot, &vcpu->arch.pkvm_memcache); +} + +static int guest_complete_donation(u64 addr, const struct pkvm_mem_transition *tx) +{ + enum kvm_pgtable_prot prot = pkvm_mkstate(KVM_PGTABLE_PROT_RWX, PKVM_PAGE_OWNED); + struct kvm_vcpu *vcpu = tx->completer.guest.vcpu; + struct kvm_shadow_vm *vm = vcpu->arch.pkvm.shadow_vm; + phys_addr_t phys = tx->completer.guest.phys; + u64 size = tx->nr_pages * PAGE_SIZE; + int err; + + if (tx->initiator.id == PKVM_ID_HOST) { + psci_mem_protect_inc(); + + if (ipa_in_pvmfw_region(vm, addr)) { + err = pkvm_load_pvmfw_pages(vm, addr, phys, size); + if (err) + return err; + } + } + + return kvm_pgtable_stage2_map(&vm->pgt, addr, size, phys, prot, + &vcpu->arch.pkvm_memcache); +} + +static int __guest_get_completer_addr(u64 *completer_addr, phys_addr_t phys, + const struct pkvm_mem_transition *tx) +{ + switch (tx->completer.id) { + case PKVM_ID_HOST: + *completer_addr = phys; + break; + case PKVM_ID_HYP: + *completer_addr = (u64)__hyp_va(phys); + break; + default: + return -EINVAL; + } + + return 0; +} + +static int __guest_request_page_transition(u64 *completer_addr, + const struct pkvm_mem_transition *tx, + enum pkvm_page_state desired) +{ + struct kvm_vcpu *vcpu = tx->initiator.guest.vcpu; + struct kvm_protected_vcpu *pkvm = &vcpu->arch.pkvm; + struct kvm_shadow_vm *vm = pkvm->shadow_vm; + enum pkvm_page_state state; + phys_addr_t phys; + kvm_pte_t pte; + u32 level; + int ret; + + if (tx->nr_pages != 1) + return -E2BIG; + + ret = kvm_pgtable_get_leaf(&vm->pgt, tx->initiator.addr, &pte, &level); + if (ret) + return ret; + + state = guest_get_page_state(pte); + if (state == PKVM_NOPAGE) + return -EFAULT; + + if (state != desired) + return -EPERM; + + /* + * We only deal with page granular mappings in the guest for now as + * the pgtable code relies on being able to recreate page mappings + * lazily after zapping a block mapping, which doesn't work once the + * pages have been donated. + */ + if (level != KVM_PGTABLE_MAX_LEVELS - 1) + return -EINVAL; + + phys = kvm_pte_to_phys(pte); + if (!addr_is_allowed_memory(phys)) + return -EINVAL; + + return __guest_get_completer_addr(completer_addr, phys, tx); +} + +static int guest_request_share(u64 *completer_addr, + const struct pkvm_mem_transition *tx) +{ + return __guest_request_page_transition(completer_addr, tx, + PKVM_PAGE_OWNED); +} + +static int guest_request_unshare(u64 *completer_addr, + const struct pkvm_mem_transition *tx) +{ + return __guest_request_page_transition(completer_addr, tx, + PKVM_PAGE_SHARED_OWNED); +} + +static int __guest_initiate_page_transition(u64 *completer_addr, + const struct pkvm_mem_transition *tx, + enum pkvm_page_state state) +{ + struct kvm_vcpu *vcpu = tx->initiator.guest.vcpu; + struct kvm_protected_vcpu *pkvm = &vcpu->arch.pkvm; + struct kvm_shadow_vm *vm = pkvm->shadow_vm; + struct kvm_hyp_memcache *mc = &vcpu->arch.pkvm_memcache; + u64 size = tx->nr_pages * PAGE_SIZE; + u64 addr = tx->initiator.addr; + enum kvm_pgtable_prot prot; + phys_addr_t phys; + kvm_pte_t pte; + int ret; + + ret = kvm_pgtable_get_leaf(&vm->pgt, addr, &pte, NULL); + if (ret) + return ret; + + phys = kvm_pte_to_phys(pte); + prot = pkvm_mkstate(kvm_pgtable_stage2_pte_prot(pte), state); + ret = kvm_pgtable_stage2_map(&vm->pgt, addr, size, phys, prot, mc); + if (ret) + return ret; + + return __guest_get_completer_addr(completer_addr, phys, tx); +} + +static int guest_initiate_share(u64 *completer_addr, + const struct pkvm_mem_transition *tx) +{ + return __guest_initiate_page_transition(completer_addr, tx, + PKVM_PAGE_SHARED_OWNED); +} + +static int guest_initiate_unshare(u64 *completer_addr, + const struct pkvm_mem_transition *tx) +{ + return __guest_initiate_page_transition(completer_addr, tx, + PKVM_PAGE_OWNED); +} + +static int check_share(struct pkvm_mem_share *share) +{ + const struct pkvm_mem_transition *tx = &share->tx; + u64 completer_addr; + int ret; + + switch (tx->initiator.id) { + case PKVM_ID_HOST: + ret = host_request_owned_transition(&completer_addr, tx); + break; + case PKVM_ID_GUEST: + ret = guest_request_share(&completer_addr, tx); + break; + default: + ret = -EINVAL; + } + + if (ret) + return ret; + + switch (tx->completer.id) { + case PKVM_ID_HOST: + ret = host_ack_share(completer_addr, tx, share->completer_prot); + break; + case PKVM_ID_HYP: + ret = hyp_ack_share(completer_addr, tx, share->completer_prot); + break; + case PKVM_ID_GUEST: + ret = guest_ack_share(completer_addr, tx, share->completer_prot); + break; + case PKVM_ID_FFA: + /* + * We only check the host; the secure side will check the other + * end when we forward the FFA call. + */ + ret = 0; + break; + default: + ret = -EINVAL; + } + + return ret; +} + +static int __do_share(struct pkvm_mem_share *share) +{ + const struct pkvm_mem_transition *tx = &share->tx; + u64 completer_addr; + int ret; + + switch (tx->initiator.id) { + case PKVM_ID_HOST: + ret = host_initiate_share(&completer_addr, tx); + break; + case PKVM_ID_GUEST: + ret = guest_initiate_share(&completer_addr, tx); + break; + default: + ret = -EINVAL; + } + + if (ret) + return ret; + + switch (tx->completer.id) { + case PKVM_ID_HOST: + ret = host_complete_share(completer_addr, tx, share->completer_prot); + break; + case PKVM_ID_HYP: + ret = hyp_complete_share(completer_addr, tx, share->completer_prot); + break; + case PKVM_ID_GUEST: + ret = guest_complete_share(completer_addr, tx, share->completer_prot); + break; + case PKVM_ID_FFA: + /* + * We're not responsible for any secure page-tables, so there's + * nothing to do here. + */ + ret = 0; + break; + default: + ret = -EINVAL; + } + + return ret; +} + +/* + * do_share(): + * + * The page owner grants access to another component with a given set + * of permissions. + * + * Initiator: OWNED => SHARED_OWNED + * Completer: NOPAGE => SHARED_BORROWED + */ +static int do_share(struct pkvm_mem_share *share) +{ + int ret; + + ret = check_share(share); + if (ret) + return ret; + + return WARN_ON(__do_share(share)); +} + +static int check_unshare(struct pkvm_mem_share *share) +{ + const struct pkvm_mem_transition *tx = &share->tx; + u64 completer_addr; + int ret; + + switch (tx->initiator.id) { + case PKVM_ID_HOST: + ret = host_request_unshare(&completer_addr, tx); + break; + case PKVM_ID_GUEST: + ret = guest_request_unshare(&completer_addr, tx); + break; + default: + ret = -EINVAL; + } + + if (ret) + return ret; + + switch (tx->completer.id) { + case PKVM_ID_HOST: + ret = host_ack_unshare(completer_addr, tx); + break; + case PKVM_ID_HYP: + ret = hyp_ack_unshare(completer_addr, tx); + break; + case PKVM_ID_FFA: + /* See check_share() */ + ret = 0; + break; + default: + ret = -EINVAL; + } + + return ret; +} + +static int __do_unshare(struct pkvm_mem_share *share) +{ + const struct pkvm_mem_transition *tx = &share->tx; + u64 completer_addr; + int ret; + + switch (tx->initiator.id) { + case PKVM_ID_HOST: + ret = host_initiate_unshare(&completer_addr, tx); + break; + case PKVM_ID_GUEST: + ret = guest_initiate_unshare(&completer_addr, tx); + break; + default: + ret = -EINVAL; + } + + if (ret) + return ret; + + switch (tx->completer.id) { + case PKVM_ID_HOST: + ret = host_complete_unshare(completer_addr, tx); + break; + case PKVM_ID_HYP: + ret = hyp_complete_unshare(completer_addr, tx); + break; + case PKVM_ID_FFA: + /* See __do_share() */ + ret = 0; + break; + default: + ret = -EINVAL; + } + + return ret; +} + +/* + * do_unshare(): + * + * The page owner revokes access from another component for a range of + * pages which were previously shared using do_share(). + * + * Initiator: SHARED_OWNED => OWNED + * Completer: SHARED_BORROWED => NOPAGE + */ +static int do_unshare(struct pkvm_mem_share *share) +{ + int ret; + + ret = check_unshare(share); + if (ret) + return ret; + + return WARN_ON(__do_unshare(share)); +} + +static int check_donation(struct pkvm_mem_donation *donation) +{ + const struct pkvm_mem_transition *tx = &donation->tx; + u64 completer_addr; + int ret; + + switch (tx->initiator.id) { + case PKVM_ID_HOST: + ret = host_request_owned_transition(&completer_addr, tx); + break; + case PKVM_ID_HYP: + ret = hyp_request_donation(&completer_addr, tx); + break; + default: + ret = -EINVAL; + } + + if (ret) + return ret; + + switch (tx->completer.id){ + case PKVM_ID_HOST: + ret = host_ack_donation(completer_addr, tx); + break; + case PKVM_ID_HYP: + ret = hyp_ack_donation(completer_addr, tx); + break; + case PKVM_ID_GUEST: + ret = guest_ack_donation(completer_addr, tx); + break; + default: + ret = -EINVAL; + } + + return ret; +} + +static int __do_donate(struct pkvm_mem_donation *donation) +{ + const struct pkvm_mem_transition *tx = &donation->tx; + u64 completer_addr; + int ret; + + switch (tx->initiator.id) { + case PKVM_ID_HOST: + ret = host_initiate_donation(&completer_addr, tx); + break; + case PKVM_ID_HYP: + ret = hyp_initiate_donation(&completer_addr, tx); + break; + default: + ret = -EINVAL; + } + + if (ret) + return ret; + + switch (tx->completer.id){ + case PKVM_ID_HOST: + ret = host_complete_donation(completer_addr, tx); + break; + case PKVM_ID_HYP: + ret = hyp_complete_donation(completer_addr, tx); + break; + case PKVM_ID_GUEST: + ret = guest_complete_donation(completer_addr, tx); + break; + default: + ret = -EINVAL; + } + + return ret; +} + +/* + * do_donate(): + * + * The page owner transfers ownership to another component, losing access + * as a consequence. + * + * Initiator: OWNED => NOPAGE + * Completer: NOPAGE => OWNED + */ +static int do_donate(struct pkvm_mem_donation *donation) +{ + int ret; + + ret = check_donation(donation); + if (ret) + return ret; + + return WARN_ON(__do_donate(donation)); +} + +int __pkvm_host_share_hyp(u64 pfn) +{ + int ret; + u64 host_addr = hyp_pfn_to_phys(pfn); + u64 hyp_addr = (u64)__hyp_va(host_addr); + struct pkvm_mem_share share = { + .tx = { + .nr_pages = 1, + .initiator = { + .id = PKVM_ID_HOST, + .addr = host_addr, + .host = { + .completer_addr = hyp_addr, + }, + }, + .completer = { + .id = PKVM_ID_HYP, + }, + }, + .completer_prot = PAGE_HYP, + }; + + host_lock_component(); + hyp_lock_component(); + + ret = do_share(&share); + + hyp_unlock_component(); + host_unlock_component(); + + return ret; +} + +int __pkvm_guest_share_host(struct kvm_vcpu *vcpu, u64 ipa) +{ + int ret; + struct pkvm_mem_share share = { + .tx = { + .nr_pages = 1, + .initiator = { + .id = PKVM_ID_GUEST, + .addr = ipa, + .guest = { + .vcpu = vcpu, + }, + }, + .completer = { + .id = PKVM_ID_HOST, + }, + }, + .completer_prot = PKVM_HOST_MEM_PROT, + }; + + host_lock_component(); + guest_lock_component(vcpu); + + ret = do_share(&share); + + guest_unlock_component(vcpu); + host_unlock_component(); + + return ret; +} + +int __pkvm_guest_unshare_host(struct kvm_vcpu *vcpu, u64 ipa) +{ + int ret; + struct pkvm_mem_share share = { + .tx = { + .nr_pages = 1, + .initiator = { + .id = PKVM_ID_GUEST, + .addr = ipa, + .guest = { + .vcpu = vcpu, + }, + }, + .completer = { + .id = PKVM_ID_HOST, + }, + }, + .completer_prot = PKVM_HOST_MEM_PROT, + }; + + host_lock_component(); + guest_lock_component(vcpu); + + ret = do_unshare(&share); + + guest_unlock_component(vcpu); + host_unlock_component(); + + return ret; +} + +int __pkvm_host_unshare_hyp(u64 pfn) +{ + int ret; + u64 host_addr = hyp_pfn_to_phys(pfn); + u64 hyp_addr = (u64)__hyp_va(host_addr); + struct pkvm_mem_share share = { + .tx = { + .nr_pages = 1, + .initiator = { + .id = PKVM_ID_HOST, + .addr = host_addr, + .host = { + .completer_addr = hyp_addr, + }, + }, + .completer = { + .id = PKVM_ID_HYP, + }, + }, + .completer_prot = PAGE_HYP, + }; + + host_lock_component(); + hyp_lock_component(); + + ret = do_unshare(&share); + + hyp_unlock_component(); + host_unlock_component(); + + return ret; +} + +int __pkvm_host_donate_hyp(u64 pfn, u64 nr_pages) +{ + int ret; + u64 host_addr = hyp_pfn_to_phys(pfn); + u64 hyp_addr = (u64)__hyp_va(host_addr); + struct pkvm_mem_donation donation = { + .tx = { + .nr_pages = nr_pages, + .initiator = { + .id = PKVM_ID_HOST, + .addr = host_addr, + .host = { + .completer_addr = hyp_addr, + }, + }, + .completer = { + .id = PKVM_ID_HYP, + }, + }, + }; + + host_lock_component(); + hyp_lock_component(); + + ret = do_donate(&donation); + + hyp_unlock_component(); + host_unlock_component(); + + return ret; +} + +int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages) +{ + int ret; + u64 host_addr = hyp_pfn_to_phys(pfn); + u64 hyp_addr = (u64)__hyp_va(host_addr); + struct pkvm_mem_donation donation = { + .tx = { + .nr_pages = nr_pages, + .initiator = { + .id = PKVM_ID_HYP, + .addr = hyp_addr, + .hyp = { + .completer_addr = host_addr, + }, + }, + .completer = { + .id = PKVM_ID_HOST, + }, + }, + }; + + host_lock_component(); + hyp_lock_component(); + + ret = do_donate(&donation); + + hyp_unlock_component(); + host_unlock_component(); + + return ret; +} + +int hyp_pin_shared_mem(void *from, void *to) +{ + u64 cur, start = ALIGN_DOWN((u64)from, PAGE_SIZE); + u64 end = PAGE_ALIGN((u64)to); + u64 size = end - start; + int ret; + + host_lock_component(); + hyp_lock_component(); + + ret = __host_check_page_state_range(__hyp_pa(start), size, + PKVM_PAGE_SHARED_OWNED); + if (ret) + goto unlock; + + ret = __hyp_check_page_state_range(start, size, + PKVM_PAGE_SHARED_BORROWED); + if (ret) + goto unlock; + + for (cur = start; cur < end; cur += PAGE_SIZE) + hyp_page_ref_inc(hyp_virt_to_page(cur)); + +unlock: + hyp_unlock_component(); + host_unlock_component(); + + return ret; +} + +void hyp_unpin_shared_mem(void *from, void *to) +{ + u64 cur, start = ALIGN_DOWN((u64)from, PAGE_SIZE); + u64 end = PAGE_ALIGN((u64)to); + + host_lock_component(); + hyp_lock_component(); + + for (cur = start; cur < end; cur += PAGE_SIZE) + hyp_page_ref_dec(hyp_virt_to_page(cur)); + + hyp_unlock_component(); + host_unlock_component(); +} + +int __pkvm_host_share_guest(u64 pfn, u64 gfn, struct kvm_vcpu *vcpu) +{ + int ret; + u64 host_addr = hyp_pfn_to_phys(pfn); + u64 guest_addr = hyp_pfn_to_phys(gfn); + struct pkvm_mem_share share = { + .tx = { + .nr_pages = 1, + .initiator = { + .id = PKVM_ID_HOST, + .addr = host_addr, + .host = { + .completer_addr = guest_addr, + }, + }, + .completer = { + .id = PKVM_ID_GUEST, + .guest = { + .vcpu = vcpu, + .phys = host_addr, + }, + }, + }, + .completer_prot = KVM_PGTABLE_PROT_RWX, + }; + + host_lock_component(); + guest_lock_component(vcpu); + + ret = do_share(&share); + + guest_unlock_component(vcpu); + host_unlock_component(); + + return ret; +} + +int __pkvm_host_donate_guest(u64 pfn, u64 gfn, struct kvm_vcpu *vcpu) +{ + int ret; + u64 host_addr = hyp_pfn_to_phys(pfn); + u64 guest_addr = hyp_pfn_to_phys(gfn); + struct pkvm_mem_donation donation = { + .tx = { + .nr_pages = 1, + .initiator = { + .id = PKVM_ID_HOST, + .addr = host_addr, + .host = { + .completer_addr = guest_addr, + }, + }, + .completer = { + .id = PKVM_ID_GUEST, + .guest = { + .vcpu = vcpu, + .phys = host_addr, + }, + }, + }, + }; + + host_lock_component(); + guest_lock_component(vcpu); + + ret = do_donate(&donation); + + guest_unlock_component(vcpu); + host_unlock_component(); + + return ret; +} + +int __pkvm_host_share_ffa(u64 pfn, u64 nr_pages) +{ + int ret; + struct pkvm_mem_share share = { + .tx = { + .nr_pages = nr_pages, + .initiator = { + .id = PKVM_ID_HOST, + .addr = hyp_pfn_to_phys(pfn), + }, + .completer = { + .id = PKVM_ID_FFA, + }, + }, + }; + + host_lock_component(); + ret = do_share(&share); + host_unlock_component(); + + return ret; +} + +int __pkvm_host_unshare_ffa(u64 pfn, u64 nr_pages) +{ + int ret; + struct pkvm_mem_share share = { + .tx = { + .nr_pages = nr_pages, + .initiator = { + .id = PKVM_ID_HOST, + .addr = hyp_pfn_to_phys(pfn), + }, + .completer = { + .id = PKVM_ID_FFA, + }, + }, + }; + + host_lock_component(); + ret = do_unshare(&share); + host_unlock_component(); + + return ret; +} + +static int hyp_zero_page(phys_addr_t phys) +{ + void *addr; + + addr = hyp_fixmap_map(phys); + if (!addr) + return -EINVAL; + memset(addr, 0, PAGE_SIZE); + __clean_dcache_guest_page(addr, PAGE_SIZE); + + return hyp_fixmap_unmap(); +} + +int __pkvm_host_reclaim_page(u64 pfn) +{ + u64 addr = hyp_pfn_to_phys(pfn); + struct hyp_page *page; + kvm_pte_t pte; + int ret; + + host_lock_component(); + + ret = kvm_pgtable_get_leaf(&host_kvm.pgt, addr, &pte, NULL); + if (ret) + goto unlock; + + if (host_get_page_state(pte) == PKVM_PAGE_OWNED) + goto unlock; + + page = hyp_phys_to_page(addr); + if (!(page->flags & HOST_PAGE_PENDING_RECLAIM)) { + ret = -EPERM; + goto unlock; + } + + if (page->flags & HOST_PAGE_NEED_POISONING) { + ret = hyp_zero_page(addr); + if (ret) + goto unlock; + page->flags &= ~HOST_PAGE_NEED_POISONING; + psci_mem_protect_dec(); + } + + ret = host_stage2_set_owner_locked(addr, PAGE_SIZE, pkvm_host_id); + if (ret) + goto unlock; + page->flags &= ~HOST_PAGE_PENDING_RECLAIM; + +unlock: + host_unlock_component(); + + return ret; +} + +/* Replace this with something more structured once day */ +#define MMIO_NOTE (('M' << 24 | 'M' << 16 | 'I' << 8 | 'O') << 1) + +static bool __check_ioguard_page(struct kvm_vcpu *vcpu, u64 ipa) +{ + struct kvm_shadow_vm *vm = vcpu->arch.pkvm.shadow_vm; + kvm_pte_t pte; + u32 level; + int ret; + + ret = kvm_pgtable_get_leaf(&vm->pgt, ipa, &pte, &level); + if (ret) + return false; + + /* Must be a PAGE_SIZE mapping with our annotation */ + return (BIT(ARM64_HW_PGTABLE_LEVEL_SHIFT(level)) == PAGE_SIZE && + pte == MMIO_NOTE); +} + +int __pkvm_install_ioguard_page(struct kvm_vcpu *vcpu, u64 ipa) +{ + struct kvm_shadow_vm *vm; + kvm_pte_t pte; + u32 level; + int ret; + + vm = vcpu->arch.pkvm.shadow_vm; + + if (!test_bit(KVM_ARCH_FLAG_MMIO_GUARD, &vm->arch.flags)) + return -EINVAL; + + if (ipa & ~PAGE_MASK) + return -EINVAL; + + guest_lock_component(vcpu); + + ret = kvm_pgtable_get_leaf(&vm->pgt, ipa, &pte, &level); + if (ret) + goto unlock; + + if (pte && BIT(ARM64_HW_PGTABLE_LEVEL_SHIFT(level)) == PAGE_SIZE) { + /* + * Already flagged as MMIO, let's accept it, and fail + * otherwise + */ + if (pte != MMIO_NOTE) + ret = -EBUSY; + + goto unlock; + } + + ret = kvm_pgtable_stage2_annotate(&vm->pgt, ipa, PAGE_SIZE, + &vcpu->arch.pkvm_memcache, + MMIO_NOTE); + +unlock: + guest_unlock_component(vcpu); + return ret; +} + +int __pkvm_remove_ioguard_page(struct kvm_vcpu *vcpu, u64 ipa) +{ + struct kvm_shadow_vm *vm = vcpu->arch.pkvm.shadow_vm; + + if (!test_bit(KVM_ARCH_FLAG_MMIO_GUARD, &vm->arch.flags)) + return -EINVAL; + + guest_lock_component(vcpu); + + if (__check_ioguard_page(vcpu, ipa)) { + struct kvm_shadow_vm *vm = vcpu->arch.pkvm.shadow_vm; + + kvm_pgtable_stage2_unmap(&vm->pgt, + ALIGN_DOWN(ipa, PAGE_SIZE), PAGE_SIZE); + } + + guest_unlock_component(vcpu); + return 0; +} + +bool __pkvm_check_ioguard_page(struct kvm_vcpu *vcpu) +{ + struct kvm_shadow_vm *vm = vcpu->arch.pkvm.shadow_vm; + u64 ipa, end; + bool ret; + + if (!kvm_vcpu_dabt_isvalid(vcpu)) + return false; + + if (!test_bit(KVM_ARCH_FLAG_MMIO_GUARD, &vm->arch.flags)) + return true; + + ipa = kvm_vcpu_get_fault_ipa(vcpu); + ipa |= kvm_vcpu_get_hfar(vcpu) & FAR_MASK; + end = ipa + kvm_vcpu_dabt_get_as(vcpu) - 1; + + guest_lock_component(vcpu); + ret = __check_ioguard_page(vcpu, ipa); + if ((end & PAGE_MASK) != (ipa & PAGE_MASK)) + ret &= __check_ioguard_page(vcpu, end); + guest_unlock_component(vcpu); + + return ret; } diff --git a/arch/arm64/kvm/hyp/nvhe/mm.c b/arch/arm64/kvm/hyp/nvhe/mm.c index 2fabeceb889a96e4cb15bf87328be1f4de2d323c..4e86a2123c054ea4368e5f843e8a4618651a12ae 100644 --- a/arch/arm64/kvm/hyp/nvhe/mm.c +++ b/arch/arm64/kvm/hyp/nvhe/mm.c @@ -8,21 +8,25 @@ #include #include #include +#include #include #include #include #include +#include #include #include struct kvm_pgtable pkvm_pgtable; hyp_spinlock_t pkvm_pgd_lock; -u64 __io_map_base; struct memblock_region hyp_memory[HYP_MEMBLOCK_REGIONS]; unsigned int hyp_memblock_nr; +static u64 __io_map_base; +static DEFINE_PER_CPU(void *, hyp_fixmap_base); + static int __pkvm_create_mappings(unsigned long start, unsigned long size, unsigned long phys, enum kvm_pgtable_prot prot) { @@ -35,6 +39,22 @@ static int __pkvm_create_mappings(unsigned long start, unsigned long size, return err; } +static unsigned long hyp_alloc_private_va_range(size_t size) +{ + unsigned long addr = __io_map_base; + + hyp_assert_lock_held(&pkvm_pgd_lock); + __io_map_base += PAGE_ALIGN(size); + + /* Are we overflowing on the vmemmap ? */ + if (__io_map_base > __hyp_vmemmap) { + __io_map_base = addr; + addr = (unsigned long)ERR_PTR(-ENOMEM); + } + + return addr; +} + unsigned long __pkvm_create_private_mapping(phys_addr_t phys, size_t size, enum kvm_pgtable_prot prot) { @@ -43,16 +63,10 @@ unsigned long __pkvm_create_private_mapping(phys_addr_t phys, size_t size, hyp_spin_lock(&pkvm_pgd_lock); - size = PAGE_ALIGN(size + offset_in_page(phys)); - addr = __io_map_base; - __io_map_base += size; - - /* Are we overflowing on the vmemmap ? */ - if (__io_map_base > __hyp_vmemmap) { - __io_map_base -= size; - addr = (unsigned long)ERR_PTR(-ENOMEM); + size = size + offset_in_page(phys); + addr = hyp_alloc_private_va_range(size); + if (IS_ERR((void *)addr)) goto out; - } err = kvm_pgtable_hyp_map(&pkvm_pgtable, addr, size, phys, prot); if (err) { @@ -103,13 +117,36 @@ int pkvm_create_mappings(void *from, void *to, enum kvm_pgtable_prot prot) return ret; } -int hyp_back_vmemmap(phys_addr_t phys, unsigned long size, phys_addr_t back) +int hyp_back_vmemmap(phys_addr_t back) { - unsigned long start, end; + unsigned long i, start, size, end = 0; + int ret; - hyp_vmemmap_range(phys, size, &start, &end); + for (i = 0; i < hyp_memblock_nr; i++) { + start = hyp_memory[i].base; + start = ALIGN_DOWN((u64)hyp_phys_to_page(start), PAGE_SIZE); + /* + * The begining of the hyp_vmemmap region for the current + * memblock may already be backed by the page backing the end + * the previous region, so avoid mapping it twice. + */ + start = max(start, end); + + end = hyp_memory[i].base + hyp_memory[i].size; + end = PAGE_ALIGN((u64)hyp_phys_to_page(end)); + if (start >= end) + continue; + + size = end - start; + ret = __pkvm_create_mappings(start, size, back, PAGE_HYP); + if (ret) + return ret; + + memset(hyp_phys_to_virt(back), 0, size); + back += size; + } - return __pkvm_create_mappings(start, end - start, back, PAGE_HYP); + return 0; } static void *__hyp_bp_vect_base; @@ -146,8 +183,10 @@ int hyp_map_vectors(void) phys_addr_t phys; void *bp_base; - if (!cpus_have_const_cap(ARM64_SPECTRE_V3A)) + if (!kvm_system_needs_idmapped_vectors()) { + __hyp_bp_vect_base = __bp_harden_hyp_vecs; return 0; + } phys = __hyp_pa(__bp_harden_hyp_vecs); bp_base = (void *)__pkvm_create_private_mapping(phys, @@ -161,6 +200,89 @@ int hyp_map_vectors(void) return 0; } +void *hyp_fixmap_map(phys_addr_t phys) +{ + void *addr = *this_cpu_ptr(&hyp_fixmap_base); + int ret = kvm_pgtable_hyp_map(&pkvm_pgtable, (u64)addr, PAGE_SIZE, + phys, PAGE_HYP); + return ret ? NULL : addr; +} + +int hyp_fixmap_unmap(void) +{ + void *addr = *this_cpu_ptr(&hyp_fixmap_base); + int ret = kvm_pgtable_hyp_unmap(&pkvm_pgtable, (u64)addr, PAGE_SIZE); + + return (ret != PAGE_SIZE) ? -EINVAL : 0; +} + +static int __pin_pgtable_cb(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, + enum kvm_pgtable_walk_flags flag, void * const arg) +{ + if (!kvm_pte_valid(*ptep) || level != KVM_PGTABLE_MAX_LEVELS - 1) + return -EINVAL; + hyp_page_ref_inc(hyp_virt_to_page(ptep)); + + return 0; +} + +static int hyp_pin_pgtable_pages(u64 addr) +{ + struct kvm_pgtable_walker walker = { + .cb = __pin_pgtable_cb, + .flags = KVM_PGTABLE_WALK_LEAF, + }; + + return kvm_pgtable_walk(&pkvm_pgtable, addr, PAGE_SIZE, &walker); +} + +int hyp_create_pcpu_fixmap(void) +{ + unsigned long i; + int ret = 0; + u64 addr; + + hyp_spin_lock(&pkvm_pgd_lock); + + for (i = 0; i < hyp_nr_cpus; i++) { + addr = hyp_alloc_private_va_range(PAGE_SIZE); + if (IS_ERR((void *)addr)) { + ret = -ENOMEM; + goto unlock; + } + + /* + * Create a dummy mapping, to get the intermediate page-table + * pages allocated, then take a reference on the last level + * page to keep it around at all times. + */ + ret = kvm_pgtable_hyp_map(&pkvm_pgtable, addr, PAGE_SIZE, + __hyp_pa(__hyp_bss_start), PAGE_HYP); + if (ret) { + ret = -EINVAL; + goto unlock; + } + + ret = hyp_pin_pgtable_pages(addr); + if (ret) + goto unlock; + + ret = kvm_pgtable_hyp_unmap(&pkvm_pgtable, addr, PAGE_SIZE); + if (ret != PAGE_SIZE) { + ret = -EINVAL; + goto unlock; + } else { + ret = 0; + } + + *per_cpu_ptr(&hyp_fixmap_base, i) = (void *)addr; + } +unlock: + hyp_spin_unlock(&pkvm_pgd_lock); + + return ret; +} + int hyp_create_idmap(u32 hyp_va_bits) { unsigned long start, end; @@ -185,3 +307,36 @@ int hyp_create_idmap(u32 hyp_va_bits) return __pkvm_create_mappings(start, end - start, start, PAGE_HYP_EXEC); } + +static void *admit_host_page(void *arg) +{ + struct kvm_hyp_memcache *host_mc = arg; + + if (!host_mc->nr_pages) + return NULL; + + /* + * The host still owns the pages in its memcache, so we need to go + * through a full host-to-hyp donation cycle to change it. Fortunately, + * __pkvm_host_donate_hyp() takes care of races for us, so if it + * succeeds we're good to go. + */ + if (__pkvm_host_donate_hyp(hyp_phys_to_pfn(host_mc->head), 1)) + return NULL; + + return pop_hyp_memcache(host_mc, hyp_phys_to_virt); +} + +/* Refill our local memcache by poping pages from the one provided by the host. */ +int refill_memcache(struct kvm_hyp_memcache *mc, unsigned long min_pages, + struct kvm_hyp_memcache *host_mc) +{ + struct kvm_hyp_memcache tmp = *host_mc; + int ret; + + ret = __topup_hyp_memcache(mc, min_pages, admit_host_page, + hyp_virt_to_phys, &tmp); + *host_mc = tmp; + + return ret; +} diff --git a/arch/arm64/kvm/hyp/nvhe/page_alloc.c b/arch/arm64/kvm/hyp/nvhe/page_alloc.c index 41fc25bdfb34660f470ef9dda78fe0a8a7aee989..80d447bcbfef6bcd8d13dd154083460f30e3bbe3 100644 --- a/arch/arm64/kvm/hyp/nvhe/page_alloc.c +++ b/arch/arm64/kvm/hyp/nvhe/page_alloc.c @@ -32,7 +32,7 @@ u64 __hyp_vmemmap; */ static struct hyp_page *__find_buddy_nocheck(struct hyp_pool *pool, struct hyp_page *p, - unsigned short order) + u8 order) { phys_addr_t addr = hyp_page_to_phys(p); @@ -51,7 +51,7 @@ static struct hyp_page *__find_buddy_nocheck(struct hyp_pool *pool, /* Find a buddy page currently available for allocation */ static struct hyp_page *__find_buddy_avail(struct hyp_pool *pool, struct hyp_page *p, - unsigned short order) + u8 order) { struct hyp_page *buddy = __find_buddy_nocheck(pool, p, order); @@ -93,11 +93,15 @@ static inline struct hyp_page *node_to_page(struct list_head *node) static void __hyp_attach_page(struct hyp_pool *pool, struct hyp_page *p) { - unsigned short order = p->order; + phys_addr_t phys = hyp_page_to_phys(p); struct hyp_page *buddy; + u8 order = p->order; memset(hyp_page_to_virt(p), 0, PAGE_SIZE << p->order); + if (phys < pool->range_start || phys >= pool->range_end) + goto insert; + /* * Only the first struct hyp_page of a high-order page (otherwise known * as the 'head') should have p->order set. The non-head pages should @@ -116,6 +120,7 @@ static void __hyp_attach_page(struct hyp_pool *pool, p = min(p, buddy); } +insert: /* Mark the new head, and insert it */ p->order = order; page_add_to_list(p, &pool->free_area[order]); @@ -123,7 +128,7 @@ static void __hyp_attach_page(struct hyp_pool *pool, static struct hyp_page *__hyp_extract_page(struct hyp_pool *pool, struct hyp_page *p, - unsigned short order) + u8 order) { struct hyp_page *buddy; @@ -144,24 +149,6 @@ static struct hyp_page *__hyp_extract_page(struct hyp_pool *pool, return p; } -static inline void hyp_page_ref_inc(struct hyp_page *p) -{ - BUG_ON(p->refcount == USHRT_MAX); - p->refcount++; -} - -static inline int hyp_page_ref_dec_and_test(struct hyp_page *p) -{ - p->refcount--; - return (p->refcount == 0); -} - -static inline void hyp_set_page_refcounted(struct hyp_page *p) -{ - BUG_ON(p->refcount); - p->refcount = 1; -} - static void __hyp_put_page(struct hyp_pool *pool, struct hyp_page *p) { if (hyp_page_ref_dec_and_test(p)) @@ -193,10 +180,24 @@ void hyp_get_page(struct hyp_pool *pool, void *addr) hyp_spin_unlock(&pool->lock); } -void *hyp_alloc_pages(struct hyp_pool *pool, unsigned short order) +void hyp_split_page(struct hyp_page *p) +{ + u8 order = p->order; + unsigned int i; + + p->order = 0; + for (i = 1; i < (1 << order); i++) { + struct hyp_page *tail = p + i; + + tail->order = 0; + hyp_set_page_refcounted(tail); + } +} + +void *hyp_alloc_pages(struct hyp_pool *pool, u8 order) { - unsigned short i = order; struct hyp_page *p; + u8 i = order; hyp_spin_lock(&pool->lock); @@ -226,7 +227,7 @@ int hyp_pool_init(struct hyp_pool *pool, u64 pfn, unsigned int nr_pages, int i; hyp_spin_lock_init(&pool->lock); - pool->max_order = min(MAX_ORDER, get_order(nr_pages << PAGE_SHIFT)); + pool->max_order = min(MAX_ORDER, get_order((nr_pages + 1) << PAGE_SHIFT)); for (i = 0; i < pool->max_order; i++) INIT_LIST_HEAD(&pool->free_area[i]); pool->range_start = phys; @@ -234,10 +235,8 @@ int hyp_pool_init(struct hyp_pool *pool, u64 pfn, unsigned int nr_pages, /* Init the vmemmap portion */ p = hyp_phys_to_page(phys); - for (i = 0; i < nr_pages; i++) { - p[i].order = 0; + for (i = 0; i < nr_pages; i++) hyp_set_page_refcounted(&p[i]); - } /* Attach the unused pages to the buddy tree */ for (i = reserved_pages; i < nr_pages; i++) diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c new file mode 100644 index 0000000000000000000000000000000000000000..511b5dd5a6fb1621406b679da7d63371c36abd07 --- /dev/null +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c @@ -0,0 +1,1307 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2021 Google LLC + * Author: Fuad Tabba + */ + +#include +#include +#include +#include +#include + +#include +#include + +#include +#include + +#include +#include +#include +#include + +/* Used by icache_is_vpipt(). */ +unsigned long __icache_flags; + +/* + * Set trap register values based on features in ID_AA64PFR0. + */ +static void pvm_init_traps_aa64pfr0(struct kvm_vcpu *vcpu) +{ + const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64PFR0_EL1); + u64 hcr_set = HCR_RW; + u64 hcr_clear = 0; + u64 cptr_set = 0; + + /* Protected KVM does not support AArch32 guests. */ + BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL0), + PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) != ID_AA64PFR0_ELx_64BIT_ONLY); + BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1), + PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) != ID_AA64PFR0_ELx_64BIT_ONLY); + + /* + * Linux guests assume support for floating-point and Advanced SIMD. Do + * not change the trapping behavior for these from the KVM default. + */ + BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_FP), + PVM_ID_AA64PFR0_ALLOW)); + BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_ASIMD), + PVM_ID_AA64PFR0_ALLOW)); + + /* Trap RAS unless all current versions are supported */ + if (FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_RAS), feature_ids) < + ID_AA64PFR0_RAS_V1P1) { + hcr_set |= HCR_TERR | HCR_TEA; + hcr_clear |= HCR_FIEN; + } + + /* Trap AMU */ + if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_AMU), feature_ids)) { + hcr_clear |= HCR_AMVOFFEN; + cptr_set |= CPTR_EL2_TAM; + } + + /* Trap SVE */ + if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_SVE), feature_ids)) + cptr_set |= CPTR_EL2_TZ; + + vcpu->arch.hcr_el2 |= hcr_set; + vcpu->arch.hcr_el2 &= ~hcr_clear; + vcpu->arch.cptr_el2 |= cptr_set; +} + +/* + * Set trap register values based on features in ID_AA64PFR1. + */ +static void pvm_init_traps_aa64pfr1(struct kvm_vcpu *vcpu) +{ + const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64PFR1_EL1); + u64 hcr_set = 0; + u64 hcr_clear = 0; + + /* Memory Tagging: Trap and Treat as Untagged if not supported. */ + if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR1_MTE), feature_ids)) { + hcr_set |= HCR_TID5; + hcr_clear |= HCR_DCT | HCR_ATA; + } + + vcpu->arch.hcr_el2 |= hcr_set; + vcpu->arch.hcr_el2 &= ~hcr_clear; +} + +/* + * Set trap register values based on features in ID_AA64DFR0. + */ +static void pvm_init_traps_aa64dfr0(struct kvm_vcpu *vcpu) +{ + const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64DFR0_EL1); + u64 mdcr_set = 0; + u64 mdcr_clear = 0; + u64 cptr_set = 0; + + /* Trap/constrain PMU */ + if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_PMUVER), feature_ids)) { + mdcr_set |= MDCR_EL2_TPM | MDCR_EL2_TPMCR; + mdcr_clear |= MDCR_EL2_HPME | MDCR_EL2_MTPME | + MDCR_EL2_HPMN_MASK; + } + + /* Trap Debug */ + if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_DEBUGVER), feature_ids)) + mdcr_set |= MDCR_EL2_TDRA | MDCR_EL2_TDA; + + /* Trap OS Double Lock */ + if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_DOUBLELOCK), feature_ids)) + mdcr_set |= MDCR_EL2_TDOSA; + + /* Trap SPE */ + if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_PMSVER), feature_ids)) { + mdcr_set |= MDCR_EL2_TPMS; + mdcr_clear |= MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT; + } + + /* Trap Trace Filter */ + if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_TRACE_FILT), feature_ids)) + mdcr_set |= MDCR_EL2_TTRF; + + /* Trap Trace */ + if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_TRACEVER), feature_ids)) + cptr_set |= CPTR_EL2_TTA; + + vcpu->arch.mdcr_el2 |= mdcr_set; + vcpu->arch.mdcr_el2 &= ~mdcr_clear; + vcpu->arch.cptr_el2 |= cptr_set; +} + +/* + * Set trap register values based on features in ID_AA64MMFR0. + */ +static void pvm_init_traps_aa64mmfr0(struct kvm_vcpu *vcpu) +{ + const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64MMFR0_EL1); + u64 mdcr_set = 0; + + /* Trap Debug Communications Channel registers */ + if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_FGT), feature_ids)) + mdcr_set |= MDCR_EL2_TDCC; + + vcpu->arch.mdcr_el2 |= mdcr_set; +} + +/* + * Set trap register values based on features in ID_AA64MMFR1. + */ +static void pvm_init_traps_aa64mmfr1(struct kvm_vcpu *vcpu) +{ + const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64MMFR1_EL1); + u64 hcr_set = 0; + + /* Trap LOR */ + if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR1_LOR), feature_ids)) + hcr_set |= HCR_TLOR; + + vcpu->arch.hcr_el2 |= hcr_set; +} + +/* + * Set baseline trap register values. + */ +static void pvm_init_trap_regs(struct kvm_vcpu *vcpu) +{ + vcpu->arch.cptr_el2 = CPTR_EL2_DEFAULT; + vcpu->arch.mdcr_el2 = 0; + + /* + * Always trap: + * - Feature id registers: to control features exposed to guests + * - Implementation-defined features + */ + vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS | + HCR_TID3 | HCR_TACR | HCR_TIDCP | HCR_TID1; + + if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN)) { + /* route synchronous external abort exceptions to EL2 */ + vcpu->arch.hcr_el2 |= HCR_TEA; + /* trap error record accesses */ + vcpu->arch.hcr_el2 |= HCR_TERR; + } + + if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) + vcpu->arch.hcr_el2 |= HCR_FWB; + + if (cpus_have_const_cap(ARM64_MISMATCHED_CACHE_TYPE)) + vcpu->arch.hcr_el2 |= HCR_TID2; +} + +/* + * Initialize trap register values for protected VMs. + */ +static void pkvm_vcpu_init_traps(struct kvm_vcpu *vcpu) +{ + pvm_init_trap_regs(vcpu); + pvm_init_traps_aa64pfr0(vcpu); + pvm_init_traps_aa64pfr1(vcpu); + pvm_init_traps_aa64dfr0(vcpu); + pvm_init_traps_aa64mmfr0(vcpu); + pvm_init_traps_aa64mmfr1(vcpu); +} + +/* + * Start the shadow table handle at the offset defined instead of at 0. + * Mainly for sanity checking and debugging. + */ +#define HANDLE_OFFSET 0x1000 + +static int shadow_handle_to_index(int shadow_handle) +{ + return shadow_handle - HANDLE_OFFSET; +} + +static int index_to_shadow_handle(int index) +{ + return index + HANDLE_OFFSET; +} + +extern unsigned long hyp_nr_cpus; + +/* + * Spinlock for protecting the shadow table related state. + * Protects writes to shadow_table, num_shadow_entries, and next_shadow_alloc, + * as well as reads and writes to last_shadow_vcpu_lookup. + */ +static DEFINE_HYP_SPINLOCK(shadow_lock); + +/* + * The table of shadow entries for protected VMs in hyp. + * Allocated at hyp initialization and setup. + */ +static struct kvm_shadow_vm **shadow_table; + +/* Current number of vms in the shadow table. */ +static int num_shadow_entries; + +/* The next entry index to try to allocate from. */ +static int next_shadow_alloc; + +void hyp_shadow_table_init(void *tbl) +{ + WARN_ON(shadow_table); + shadow_table = tbl; +} + +/* + * Return the shadow vm corresponding to the handle. + */ +static struct kvm_shadow_vm *find_shadow_by_handle(int shadow_handle) +{ + int shadow_index = shadow_handle_to_index(shadow_handle); + + if (unlikely(shadow_index < 0 || shadow_index >= KVM_MAX_PVMS)) + return NULL; + + return shadow_table[shadow_index]; +} + +struct kvm_vcpu *get_shadow_vcpu(int shadow_handle, unsigned int vcpu_idx) +{ + struct kvm_vcpu *vcpu = NULL; + struct kvm_shadow_vm *vm; + + hyp_spin_lock(&shadow_lock); + vm = find_shadow_by_handle(shadow_handle); + if (!vm || vm->created_vcpus <= vcpu_idx) + goto unlock; + vcpu = &vm->shadow_vcpus[vcpu_idx].vcpu; + + /* Ensure vcpu isn't loaded on more than one cpu simultaneously. */ + if (unlikely(vcpu->arch.pkvm.loaded_on_cpu)) { + vcpu = NULL; + goto unlock; + } + vcpu->arch.pkvm.loaded_on_cpu = true; + + hyp_page_ref_inc(hyp_virt_to_page(vm)); +unlock: + hyp_spin_unlock(&shadow_lock); + + return vcpu; +} + +void put_shadow_vcpu(struct kvm_vcpu *vcpu) +{ + struct kvm_shadow_vm *vm = vcpu->arch.pkvm.shadow_vm; + + hyp_spin_lock(&shadow_lock); + vcpu->arch.pkvm.loaded_on_cpu = false; + hyp_page_ref_dec(hyp_virt_to_page(vm)); + hyp_spin_unlock(&shadow_lock); +} + +/* Check and copy the supported features for the vcpu from the host. */ +static int copy_features(struct kvm_vcpu *shadow_vcpu, struct kvm_vcpu *host_vcpu) +{ + DECLARE_BITMAP(allowed_features, KVM_VCPU_MAX_FEATURES); + + bitmap_zero(allowed_features, KVM_VCPU_MAX_FEATURES); + + /* + * Always allowed: + * - CPU starting in poweroff state + * - PSCI v0.2 + */ + set_bit(KVM_ARM_VCPU_POWER_OFF, allowed_features); + set_bit(KVM_ARM_VCPU_PSCI_0_2, allowed_features); + + /* + * Check if remaining features are allowed: + * - Performance Monitoring + * - Scalable Vectors + * - Pointer Authentication + */ + if (FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_PMUVER), PVM_ID_AA64DFR0_ALLOW)) + set_bit(KVM_ARM_VCPU_PMU_V3, allowed_features); + + if (FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_SVE), PVM_ID_AA64PFR0_ALLOW)) + set_bit(KVM_ARM_VCPU_SVE, allowed_features); + + if (FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR1_API), PVM_ID_AA64ISAR1_ALLOW) && + FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR1_APA), PVM_ID_AA64ISAR1_ALLOW)) + set_bit(KVM_ARM_VCPU_PTRAUTH_ADDRESS, allowed_features); + + if (FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR1_GPI), PVM_ID_AA64ISAR1_ALLOW) && + FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR1_GPA), PVM_ID_AA64ISAR1_ALLOW)) + set_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, allowed_features); + + bitmap_and(shadow_vcpu->arch.features, host_vcpu->arch.features, + allowed_features, KVM_VCPU_MAX_FEATURES); + + /* + * Check for system support for address/generic pointer authentication + * features if either are enabled. + */ + if ((test_bit(KVM_ARM_VCPU_PTRAUTH_ADDRESS, shadow_vcpu->arch.features) || + test_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, shadow_vcpu->arch.features)) && + !system_has_full_ptr_auth()) + return -EINVAL; + + return 0; +} + +static void unpin_host_vcpus(struct shadow_vcpu_state *shadow_vcpus, int nr_vcpus) +{ + int i; + + for (i = 0; i < nr_vcpus; i++) { + struct kvm_vcpu *host_vcpu = shadow_vcpus[i].vcpu.arch.pkvm.host_vcpu; + struct kvm_vcpu *shadow_vcpu = &shadow_vcpus[i].vcpu; + size_t sve_state_size; + void *sve_state; + + hyp_unpin_shared_mem(host_vcpu, host_vcpu + 1); + + if (!test_bit(KVM_ARM_VCPU_SVE, shadow_vcpu->arch.features)) + continue; + + sve_state = shadow_vcpu->arch.sve_state; + sve_state = kern_hyp_va(sve_state); + sve_state_size = vcpu_sve_state_size(shadow_vcpu); + hyp_unpin_shared_mem(sve_state, sve_state + sve_state_size); + } +} + +static int set_host_vcpus(struct shadow_vcpu_state *shadow_vcpus, int nr_vcpus, + struct kvm_vcpu **vcpu_array, size_t vcpu_array_size) +{ + int i; + + if (vcpu_array_size < sizeof(*vcpu_array) * nr_vcpus) + return -EINVAL; + + for (i = 0; i < nr_vcpus; i++) { + struct kvm_vcpu *host_vcpu = kern_hyp_va(vcpu_array[i]); + + if (hyp_pin_shared_mem(host_vcpu, host_vcpu + 1)) { + unpin_host_vcpus(shadow_vcpus, i); + return -EBUSY; + } + + shadow_vcpus[i].vcpu.arch.pkvm.host_vcpu = host_vcpu; + } + + return 0; +} + +static int init_shadow_structs(struct kvm *kvm, struct kvm_shadow_vm *vm, + struct kvm_vcpu **vcpu_array, int nr_vcpus) +{ + int i; + int ret; + + vm->host_kvm = kvm; + vm->created_vcpus = nr_vcpus; + vm->arch.pkvm.pvmfw_load_addr = kvm->arch.pkvm.pvmfw_load_addr; + vm->arch.pkvm.enabled = READ_ONCE(kvm->arch.pkvm.enabled); + + for (i = 0; i < nr_vcpus; i++) { + struct shadow_vcpu_state *shadow_state = &vm->shadow_vcpus[i]; + struct kvm_vcpu *shadow_vcpu = &shadow_state->vcpu; + struct kvm_vcpu *host_vcpu = shadow_vcpu->arch.pkvm.host_vcpu; + + shadow_vcpu->kvm = kvm; + shadow_vcpu->vcpu_id = host_vcpu->vcpu_id; + shadow_vcpu->vcpu_idx = i; + + ret = copy_features(shadow_vcpu, host_vcpu); + if (ret) + return ret; + + if (test_bit(KVM_ARM_VCPU_SVE, shadow_vcpu->arch.features)) { + size_t sve_state_size; + void *sve_state; + + shadow_vcpu->arch.sve_state = READ_ONCE(host_vcpu->arch.sve_state); + shadow_vcpu->arch.sve_max_vl = READ_ONCE(host_vcpu->arch.sve_max_vl); + + sve_state = kern_hyp_va(shadow_vcpu->arch.sve_state); + sve_state_size = vcpu_sve_state_size(shadow_vcpu); + + if (!shadow_vcpu->arch.sve_state || !sve_state_size || + hyp_pin_shared_mem(sve_state, + sve_state + sve_state_size)) { + clear_bit(KVM_ARM_VCPU_SVE, + shadow_vcpu->arch.features); + shadow_vcpu->arch.sve_state = NULL; + shadow_vcpu->arch.sve_max_vl = 0; + return -EINVAL; + } + } + + if (vm->arch.pkvm.enabled) + pkvm_vcpu_init_traps(shadow_vcpu); + kvm_reset_pvm_sys_regs(shadow_vcpu); + + vm->vcpus[i] = shadow_vcpu; + shadow_state->vm = vm; + + shadow_vcpu->arch.hw_mmu = &vm->arch.mmu; + shadow_vcpu->arch.pkvm.shadow_handle = vm->shadow_handle; + shadow_vcpu->arch.pkvm.shadow_vm = vm; + shadow_vcpu->arch.power_off = true; + + if (test_bit(KVM_ARM_VCPU_POWER_OFF, shadow_vcpu->arch.features)) { + shadow_vcpu->arch.pkvm.power_state = PSCI_0_2_AFFINITY_LEVEL_OFF; + } else if (pvm_has_pvmfw(vm)) { + if (vm->pvmfw_entry_vcpu) + return -EINVAL; + + vm->pvmfw_entry_vcpu = shadow_vcpu; + shadow_vcpu->arch.reset_state.reset = true; + shadow_vcpu->arch.pkvm.power_state = PSCI_0_2_AFFINITY_LEVEL_ON_PENDING; + } else { + struct vcpu_reset_state *reset_state = &shadow_vcpu->arch.reset_state; + + reset_state->pc = *vcpu_pc(host_vcpu); + reset_state->r0 = vcpu_get_reg(host_vcpu, 0); + reset_state->reset = true; + shadow_vcpu->arch.pkvm.power_state = PSCI_0_2_AFFINITY_LEVEL_ON_PENDING; + } + } + + return 0; +} + +static bool __exists_shadow(struct kvm *host_kvm) +{ + int i; + int num_checked = 0; + + for (i = 0; i < KVM_MAX_PVMS && num_checked < num_shadow_entries; i++) { + if (!shadow_table[i]) + continue; + + if (unlikely(shadow_table[i]->host_kvm == host_kvm)) + return true; + + num_checked++; + } + + return false; +} + +/* + * Allocate a shadow table entry and insert a pointer to the shadow vm. + * + * Return a unique handle to the protected VM on success, + * negative error code on failure. + */ +static int insert_shadow_table(struct kvm *kvm, struct kvm_shadow_vm *vm, + size_t shadow_size) +{ + struct kvm_s2_mmu *mmu = &vm->arch.mmu; + int shadow_handle; + int vmid; + + hyp_assert_lock_held(&shadow_lock); + + if (unlikely(num_shadow_entries >= KVM_MAX_PVMS)) + return -ENOMEM; + + /* + * Initializing protected state might have failed, yet a malicious host + * could trigger this function. Thus, ensure that shadow_table exists. + */ + if (unlikely(!shadow_table)) + return -EINVAL; + + /* Check that a shadow hasn't been created before for this host KVM. */ + if (unlikely(__exists_shadow(kvm))) + return -EEXIST; + + /* Find the next free entry in the shadow table. */ + while (shadow_table[next_shadow_alloc]) + next_shadow_alloc = (next_shadow_alloc + 1) % KVM_MAX_PVMS; + shadow_handle = index_to_shadow_handle(next_shadow_alloc); + + vm->shadow_handle = shadow_handle; + vm->shadow_area_size = shadow_size; + + /* VMID 0 is reserved for the host */ + vmid = next_shadow_alloc + 1; + if (vmid > 0xff) + return -ENOMEM; + + mmu->vmid.vmid = vmid; + mmu->vmid.vmid_gen = 0; + mmu->arch = &vm->arch; + mmu->pgt = &vm->pgt; + + shadow_table[next_shadow_alloc] = vm; + next_shadow_alloc = (next_shadow_alloc + 1) % KVM_MAX_PVMS; + num_shadow_entries++; + + return shadow_handle; +} + +/* + * Deallocate and remove the shadow table entry corresponding to the handle. + */ +static void remove_shadow_table(int shadow_handle) +{ + hyp_assert_lock_held(&shadow_lock); + shadow_table[shadow_handle_to_index(shadow_handle)] = NULL; + num_shadow_entries--; +} + +static size_t pkvm_get_shadow_size(int num_vcpus) +{ + /* Shadow space for the vm struct and all of its vcpu states. */ + return sizeof(struct kvm_shadow_vm) + + sizeof(struct shadow_vcpu_state) * num_vcpus; +} + +/* + * Check whether the size of the area donated by the host is sufficient for + * the shadow structues required for nr_vcpus as well as the shadow vm. + */ +static int check_shadow_size(int nr_vcpus, size_t shadow_size) +{ + if (nr_vcpus < 1 || nr_vcpus > KVM_MAX_VCPUS) + return -EINVAL; + + /* + * Shadow size is rounded up when allocated and donated by the host, + * so it's likely to be larger than the sum of the struct sizes. + */ + if (shadow_size < pkvm_get_shadow_size(nr_vcpus)) + return -EINVAL; + + return 0; +} + +/* + * Initialize the shadow copy of the protected VM state using the memory + * donated by the host. + * + * Unmaps the donated memory from the host at stage 2. + * + * kvm: A pointer to the host's struct kvm (host va). + * shadow_va: The host va of the area being donated for the shadow state. + * Must be page aligned. + * shadow_size: The size of the area being donated for the shadow state. + * Must be a multiple of the page size. + * pgd: The host va of the area being donated for the stage-2 PGD for the VM. + * Must be page aligned. Its size is implied by the VM's VTCR. + * Note: An array to the host KVM VCPUs (host VA) is passed via the pgd, as to + * not to be dependent on how the VCPU's are layed out in struct kvm. + * + * Return a unique handle to the protected VM on success, + * negative error code on failure. + */ +int __pkvm_init_shadow(struct kvm *kvm, + void *shadow_va, + size_t shadow_size, + void *pgd) +{ + struct kvm_shadow_vm *vm = kern_hyp_va(shadow_va); + phys_addr_t shadow_pa = hyp_virt_to_phys(vm); + u64 pfn = hyp_phys_to_pfn(shadow_pa); + u64 nr_shadow_pages = shadow_size >> PAGE_SHIFT; + u64 nr_pgd_pages; + size_t pgd_size; + int nr_vcpus = 0; + int ret = 0; + + /* Check that the donated memory is aligned to page boundaries. */ + if (!PAGE_ALIGNED(shadow_va) || + !PAGE_ALIGNED(shadow_size) || + !PAGE_ALIGNED(pgd)) + return -EINVAL; + + kvm = kern_hyp_va(kvm); + pgd = kern_hyp_va(pgd); + + ret = hyp_pin_shared_mem(kvm, kvm + 1); + if (ret) + return ret; + + /* Ensure the host has donated enough memory for the shadow structs. */ + nr_vcpus = kvm->created_vcpus; + ret = check_shadow_size(nr_vcpus, shadow_size); + if (ret) + goto err; + + ret = __pkvm_host_donate_hyp(pfn, nr_shadow_pages); + if (ret) + goto err; + + /* Ensure we're working with a clean slate. */ + memset(vm, 0, shadow_size); + + vm->arch.vtcr = host_kvm.arch.vtcr; + pgd_size = kvm_pgtable_stage2_pgd_size(host_kvm.arch.vtcr); + nr_pgd_pages = pgd_size >> PAGE_SHIFT; + ret = __pkvm_host_donate_hyp(hyp_virt_to_pfn(pgd), nr_pgd_pages); + if (ret) + goto err_remove_mappings; + + ret = set_host_vcpus(vm->shadow_vcpus, nr_vcpus, pgd, pgd_size); + if (ret) + goto err_remove_pgd; + + ret = init_shadow_structs(kvm, vm, pgd, nr_vcpus); + if (ret < 0) + goto err_unpin_host_vcpus; + + /* Add the entry to the shadow table. */ + hyp_spin_lock(&shadow_lock); + ret = insert_shadow_table(kvm, vm, shadow_size); + if (ret < 0) + goto err_unlock_unpin_host_vcpus; + + ret = kvm_guest_prepare_stage2(vm, pgd); + if (ret) + goto err_remove_shadow_table; + + hyp_spin_unlock(&shadow_lock); + return vm->shadow_handle; + +err_remove_shadow_table: + remove_shadow_table(vm->shadow_handle); +err_unlock_unpin_host_vcpus: + hyp_spin_unlock(&shadow_lock); +err_unpin_host_vcpus: + unpin_host_vcpus(vm->shadow_vcpus, nr_vcpus); +err_remove_pgd: + WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(pgd), nr_pgd_pages)); + +err_remove_mappings: + /* Clear the donated shadow memory on failure to avoid data leaks. */ + memset(vm, 0, shadow_size); + WARN_ON(__pkvm_hyp_donate_host(hyp_phys_to_pfn(shadow_pa), + shadow_size >> PAGE_SHIFT)); + +err: + hyp_unpin_shared_mem(kvm, kvm + 1); + return ret; +} + +int __pkvm_teardown_shadow(int shadow_handle) +{ + struct kvm_hyp_memcache *mc; + struct kvm_shadow_vm *vm; + struct kvm *host_kvm; + size_t shadow_size; + int err; + u64 pfn; + u64 nr_pages; + void *addr; + + /* Lookup then remove entry from the shadow table. */ + hyp_spin_lock(&shadow_lock); + vm = find_shadow_by_handle(shadow_handle); + if (!vm) { + err = -ENOENT; + goto err_unlock; + } + + if (WARN_ON(hyp_page_count(vm))) { + err = -EBUSY; + goto err_unlock; + } + + /* Ensure the VMID is clean before it can be reallocated */ + __kvm_tlb_flush_vmid(&vm->arch.mmu); + remove_shadow_table(shadow_handle); + hyp_spin_unlock(&shadow_lock); + + /* Reclaim guest pages, and page-table pages */ + mc = &vm->host_kvm->arch.pkvm.teardown_mc; + reclaim_guest_pages(vm, mc); + unpin_host_vcpus(vm->shadow_vcpus, vm->created_vcpus); + + /* Push the metadata pages to the teardown memcache */ + shadow_size = vm->shadow_area_size; + host_kvm = vm->host_kvm; + memset(vm, 0, shadow_size); + for (addr = vm; addr < ((void *)vm + shadow_size); addr += PAGE_SIZE) + push_hyp_memcache(mc, addr, hyp_virt_to_phys); + hyp_unpin_shared_mem(host_kvm, host_kvm + 1); + + pfn = hyp_phys_to_pfn(__hyp_pa(vm)); + nr_pages = shadow_size >> PAGE_SHIFT; + WARN_ON(__pkvm_hyp_donate_host(pfn, nr_pages)); + return 0; + +err_unlock: + hyp_spin_unlock(&shadow_lock); + return err; +} + +int pkvm_load_pvmfw_pages(struct kvm_shadow_vm *vm, u64 ipa, phys_addr_t phys, + u64 size) +{ + struct kvm_protected_vm *pkvm = &vm->arch.pkvm; + u64 npages, offset = ipa - pkvm->pvmfw_load_addr; + void *src = hyp_phys_to_virt(pvmfw_base) + offset; + + if (offset >= pvmfw_size) + return -EINVAL; + + size = min(size, pvmfw_size - offset); + if (!PAGE_ALIGNED(size) || !PAGE_ALIGNED(src)) + return -EINVAL; + + npages = size >> PAGE_SHIFT; + while (npages--) { + void *dst; + + dst = hyp_fixmap_map(phys); + if (!dst) + return -EINVAL; + + /* + * No need for cache maintenance here, as the pgtable code will + * take care of this when installing the pte in the guest's + * stage-2 page table. + */ + memcpy(dst, src, PAGE_SIZE); + + hyp_fixmap_unmap(); + src += PAGE_SIZE; + phys += PAGE_SIZE; + } + + return 0; +} + +void pkvm_clear_pvmfw_pages(void) +{ + void *addr = hyp_phys_to_virt(pvmfw_base); + + memset(addr, 0, pvmfw_size); + kvm_flush_dcache_to_poc(addr, pvmfw_size); +} + +/* + * This function sets the registers on the vcpu to their architecturally defined + * reset values. + * + * Note: Can only be called by the vcpu on itself, after it has been turned on. + */ +void pkvm_reset_vcpu(struct kvm_vcpu *vcpu) +{ + struct vcpu_reset_state *reset_state = &vcpu->arch.reset_state; + struct kvm_shadow_vm *vm = vcpu->arch.pkvm.shadow_vm; + + WARN_ON(!reset_state->reset); + + if (test_bit(KVM_ARM_VCPU_PTRAUTH_ADDRESS, vcpu->arch.features) || + test_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, vcpu->arch.features)) { + /* + * This call should not fail since we've already checked for + * feature support on initialization. + */ + WARN_ON(kvm_vcpu_enable_ptrauth(vcpu)); + } + + /* Reset core registers */ + memset(vcpu_gp_regs(vcpu), 0, sizeof(*vcpu_gp_regs(vcpu))); + memset(&vcpu->arch.ctxt.fp_regs, 0, sizeof(vcpu->arch.ctxt.fp_regs)); + vcpu_gp_regs(vcpu)->pstate = VCPU_RESET_PSTATE_EL1; + + /* Reset system registers */ + kvm_reset_pvm_sys_regs(vcpu); + + /* Propagate initiator's endianness, after kvm_reset_pvm_sys_regs. */ + if (reset_state->be) + kvm_vcpu_set_be(vcpu); + + if (vm->pvmfw_entry_vcpu == vcpu) { + struct kvm_vcpu *host_vcpu = vcpu->arch.pkvm.host_vcpu; + u64 entry = vm->arch.pkvm.pvmfw_load_addr; + int i; + + /* X0 - X14 provided by the VMM (preserved) */ + for (i = 0; i <= 14; ++i) + vcpu_set_reg(vcpu, i, vcpu_get_reg(host_vcpu, i)); + + /* X15: Boot protocol version */ + vcpu_set_reg(vcpu, 15, 0); + + /* PC: IPA of pvmfw base */ + *vcpu_pc(vcpu) = entry; + + vm->pvmfw_entry_vcpu = NULL; + + /* Auto enroll MMIO guard */ + set_bit(KVM_ARCH_FLAG_MMIO_GUARD, + &vcpu->arch.pkvm.shadow_vm->arch.flags); + } else { + *vcpu_pc(vcpu) = reset_state->pc; + vcpu_set_reg(vcpu, 0, reset_state->r0); + } + + reset_state->reset = false; + + vcpu->arch.pkvm.exit_code = 0; + + WARN_ON(vcpu->arch.pkvm.power_state != PSCI_0_2_AFFINITY_LEVEL_ON_PENDING); + WRITE_ONCE(vcpu->arch.power_off, false); + WRITE_ONCE(vcpu->arch.pkvm.power_state, PSCI_0_2_AFFINITY_LEVEL_ON); +} + +struct kvm_vcpu *pvm_mpidr_to_vcpu(struct kvm_shadow_vm *vm, unsigned long mpidr) +{ + struct kvm_vcpu *vcpu; + int i; + + mpidr &= MPIDR_HWID_BITMASK; + + for (i = 0; i < vm->created_vcpus; i++) { + vcpu = vm->vcpus[i]; + + if (mpidr == kvm_vcpu_get_mpidr_aff(vcpu)) + return vcpu; + } + + return NULL; +} + +/* + * Returns true if the hypervisor handled PSCI call, and control should go back + * to the guest, or false if the host needs to do some additional work (i.e., + * wake up the vcpu). + */ +static bool pvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) +{ + struct kvm_shadow_vm *vm = source_vcpu->arch.pkvm.shadow_vm; + struct kvm_vcpu *vcpu; + struct vcpu_reset_state *reset_state; + unsigned long cpu_id; + unsigned long hvc_ret_val; + int power_state; + + cpu_id = smccc_get_arg1(source_vcpu); + if (!kvm_psci_valid_affinity(source_vcpu, cpu_id)) { + hvc_ret_val = PSCI_RET_INVALID_PARAMS; + goto error; + } + + vcpu = pvm_mpidr_to_vcpu(vm, cpu_id); + + /* Make sure the caller requested a valid vcpu. */ + if (!vcpu) { + hvc_ret_val = PSCI_RET_INVALID_PARAMS; + goto error; + } + + /* + * Make sure the requested vcpu is not on to begin with. + * Atomic to avoid race between vcpus trying to power on the same vcpu. + */ + power_state = cmpxchg(&vcpu->arch.pkvm.power_state, + PSCI_0_2_AFFINITY_LEVEL_OFF, + PSCI_0_2_AFFINITY_LEVEL_ON_PENDING); + switch (power_state) { + case PSCI_0_2_AFFINITY_LEVEL_ON_PENDING: + hvc_ret_val = PSCI_RET_ON_PENDING; + goto error; + case PSCI_0_2_AFFINITY_LEVEL_ON: + hvc_ret_val = PSCI_RET_ALREADY_ON; + goto error; + case PSCI_0_2_AFFINITY_LEVEL_OFF: + break; + default: + hvc_ret_val = PSCI_RET_INTERNAL_FAILURE; + goto error; + } + + reset_state = &vcpu->arch.reset_state; + + reset_state->pc = smccc_get_arg2(source_vcpu); + reset_state->r0 = smccc_get_arg3(source_vcpu); + + /* Propagate caller endianness */ + reset_state->be = kvm_vcpu_is_be(source_vcpu); + + reset_state->reset = true; + + /* + * Return to the host, which should make the KVM_REQ_VCPU_RESET request + * as well as kvm_vcpu_wake_up() to schedule the vcpu. + */ + return false; + +error: + /* If there's an error go back straight to the guest. */ + smccc_set_retval(source_vcpu, hvc_ret_val, 0, 0, 0); + return true; +} + +static bool pvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu) +{ + int i, matching_cpus = 0; + unsigned long mpidr; + unsigned long target_affinity; + unsigned long target_affinity_mask; + unsigned long lowest_affinity_level; + struct kvm_shadow_vm *vm = vcpu->arch.pkvm.shadow_vm; + struct kvm_vcpu *tmp; + unsigned long hvc_ret_val; + + target_affinity = smccc_get_arg1(vcpu); + lowest_affinity_level = smccc_get_arg2(vcpu); + + if (!kvm_psci_valid_affinity(vcpu, target_affinity)) { + hvc_ret_val = PSCI_RET_INVALID_PARAMS; + goto done; + } + + /* Determine target affinity mask */ + target_affinity_mask = psci_affinity_mask(lowest_affinity_level); + if (!target_affinity_mask) { + hvc_ret_val = PSCI_RET_INVALID_PARAMS; + goto done; + } + + /* Ignore other bits of target affinity */ + target_affinity &= target_affinity_mask; + + hvc_ret_val = PSCI_0_2_AFFINITY_LEVEL_OFF; + + /* + * If at least one vcpu matching target affinity is ON then return ON, + * then if at least one is PENDING_ON then return PENDING_ON. + * Otherwise, return OFF. + */ + for (i = 0; i < vm->created_vcpus; i++) { + tmp = vm->vcpus[i]; + mpidr = kvm_vcpu_get_mpidr_aff(tmp); + + if ((mpidr & target_affinity_mask) == target_affinity) { + int power_state; + + matching_cpus++; + power_state = READ_ONCE(tmp->arch.pkvm.power_state); + switch (power_state) { + case PSCI_0_2_AFFINITY_LEVEL_ON_PENDING: + hvc_ret_val = PSCI_0_2_AFFINITY_LEVEL_ON_PENDING; + break; + case PSCI_0_2_AFFINITY_LEVEL_ON: + hvc_ret_val = PSCI_0_2_AFFINITY_LEVEL_ON; + goto done; + case PSCI_0_2_AFFINITY_LEVEL_OFF: + break; + default: + hvc_ret_val = PSCI_RET_INTERNAL_FAILURE; + goto done; + } + } + } + + if (!matching_cpus) + hvc_ret_val = PSCI_RET_INVALID_PARAMS; + +done: + /* Nothing to be handled by the host. Go back to the guest. */ + smccc_set_retval(vcpu, hvc_ret_val, 0, 0, 0); + return true; +} + +/* + * Returns true if the hypervisor has handled the PSCI call, and control should + * go back to the guest, or false if the host needs to do some additional work + * (e.g., turn off and update vcpu scheduling status). + */ +static bool pvm_psci_vcpu_off(struct kvm_vcpu *vcpu) +{ + WARN_ON(vcpu->arch.power_off); + WARN_ON(vcpu->arch.pkvm.power_state != PSCI_0_2_AFFINITY_LEVEL_ON); + + WRITE_ONCE(vcpu->arch.power_off, true); + WRITE_ONCE(vcpu->arch.pkvm.power_state, PSCI_0_2_AFFINITY_LEVEL_OFF); + + /* Return to the host so that it can finish powering off the vcpu. */ + return false; +} + +static bool pvm_psci_version(struct kvm_vcpu *vcpu) +{ + /* Nothing to be handled by the host. Go back to the guest. */ + smccc_set_retval(vcpu, KVM_ARM_PSCI_1_1, 0, 0, 0); + return true; +} + +static bool pvm_psci_not_supported(struct kvm_vcpu *vcpu) +{ + /* Nothing to be handled by the host. Go back to the guest. */ + smccc_set_retval(vcpu, PSCI_RET_NOT_SUPPORTED, 0, 0, 0); + return true; +} + +static bool pvm_psci_features(struct kvm_vcpu *vcpu) +{ + u32 feature = smccc_get_arg1(vcpu); + unsigned long val; + + switch (feature) { + case PSCI_0_2_FN_PSCI_VERSION: + case PSCI_0_2_FN_CPU_SUSPEND: + case PSCI_0_2_FN64_CPU_SUSPEND: + case PSCI_0_2_FN_CPU_OFF: + case PSCI_0_2_FN_CPU_ON: + case PSCI_0_2_FN64_CPU_ON: + case PSCI_0_2_FN_AFFINITY_INFO: + case PSCI_0_2_FN64_AFFINITY_INFO: + case PSCI_0_2_FN_SYSTEM_OFF: + case PSCI_0_2_FN_SYSTEM_RESET: + case PSCI_1_0_FN_PSCI_FEATURES: + case PSCI_1_1_FN_SYSTEM_RESET2: + case PSCI_1_1_FN64_SYSTEM_RESET2: + case ARM_SMCCC_VERSION_FUNC_ID: + val = PSCI_RET_SUCCESS; + break; + default: + val = PSCI_RET_NOT_SUPPORTED; + break; + } + + /* Nothing to be handled by the host. Go back to the guest. */ + smccc_set_retval(vcpu, val, 0, 0, 0); + return true; +} + +static bool pkvm_handle_psci(struct kvm_vcpu *vcpu) +{ + u32 psci_fn = smccc_get_function(vcpu); + + switch (psci_fn) { + case PSCI_0_2_FN_CPU_ON: + kvm_psci_narrow_to_32bit(vcpu); + fallthrough; + case PSCI_0_2_FN64_CPU_ON: + return pvm_psci_vcpu_on(vcpu); + case PSCI_0_2_FN_CPU_OFF: + return pvm_psci_vcpu_off(vcpu); + case PSCI_0_2_FN_AFFINITY_INFO: + kvm_psci_narrow_to_32bit(vcpu); + fallthrough; + case PSCI_0_2_FN64_AFFINITY_INFO: + return pvm_psci_vcpu_affinity_info(vcpu); + case PSCI_0_2_FN_PSCI_VERSION: + return pvm_psci_version(vcpu); + case PSCI_1_0_FN_PSCI_FEATURES: + return pvm_psci_features(vcpu); + case PSCI_0_2_FN_SYSTEM_RESET: + case PSCI_0_2_FN_CPU_SUSPEND: + case PSCI_0_2_FN64_CPU_SUSPEND: + case PSCI_0_2_FN_SYSTEM_OFF: + case PSCI_1_1_FN_SYSTEM_RESET2: + case PSCI_1_1_FN64_SYSTEM_RESET2: + return false; /* Handled by the host. */ + default: + break; + } + + return pvm_psci_not_supported(vcpu); +} + +static u64 __pkvm_memshare_page_req(struct kvm_vcpu *vcpu, u64 ipa) +{ + u64 elr; + + /* Fake up a data abort (Level 3 translation fault on write) */ + vcpu->arch.fault.esr_el2 = (u32)ESR_ELx_EC_DABT_LOW << ESR_ELx_EC_SHIFT | + ESR_ELx_WNR | ESR_ELx_FSC_FAULT | + FIELD_PREP(ESR_ELx_FSC_LEVEL, 3); + + /* Shuffle the IPA around into the HPFAR */ + vcpu->arch.fault.hpfar_el2 = (ipa >> 8) & HPFAR_MASK; + + /* This is a virtual address. 0's good. Let's go with 0. */ + vcpu->arch.fault.far_el2 = 0; + + /* Rewind the ELR so we return to the HVC once the IPA is mapped */ + elr = read_sysreg(elr_el2); + elr -=4; + write_sysreg(elr, elr_el2); + + return ARM_EXCEPTION_TRAP; +} + +static bool pkvm_memshare_call(struct kvm_vcpu *vcpu, u64 *exit_code) +{ + u64 ipa = smccc_get_arg1(vcpu); + u64 arg2 = smccc_get_arg2(vcpu); + u64 arg3 = smccc_get_arg3(vcpu); + int err; + + if (arg2 || arg3) + goto out_guest_err; + + err = __pkvm_guest_share_host(vcpu, ipa); + switch (err) { + case 0: + /* Success! Now tell the host. */ + goto out_host; + case -EFAULT: + /* + * Convert the exception into a data abort so that the page + * being shared is mapped into the guest next time. + */ + *exit_code = __pkvm_memshare_page_req(vcpu, ipa); + goto out_host; + } + +out_guest_err: + smccc_set_retval(vcpu, SMCCC_RET_INVALID_PARAMETER, 0, 0, 0); + return true; + +out_host: + return false; +} + +static bool pkvm_memunshare_call(struct kvm_vcpu *vcpu) +{ + u64 ipa = smccc_get_arg1(vcpu); + u64 arg2 = smccc_get_arg2(vcpu); + u64 arg3 = smccc_get_arg3(vcpu); + int err; + + if (arg2 || arg3) + goto out_guest_err; + + err = __pkvm_guest_unshare_host(vcpu, ipa); + if (err) + goto out_guest_err; + + return false; + +out_guest_err: + smccc_set_retval(vcpu, SMCCC_RET_INVALID_PARAMETER, 0, 0, 0); + return true; +} + +static bool pkvm_install_ioguard_page(struct kvm_vcpu *vcpu, u64 *exit_code) +{ + u32 retval = SMCCC_RET_SUCCESS; + u64 ipa = smccc_get_arg1(vcpu); + int ret; + + ret = __pkvm_install_ioguard_page(vcpu, ipa); + if (ret == -ENOMEM) { + /* + * We ran out of memcache, let's ask for more. Cancel + * the effects of the HVC that took us here, and + * forward the hypercall to the host for page donation + * purposes. + */ + write_sysreg_el2(read_sysreg_el2(SYS_ELR) - 4, SYS_ELR); + return false; + } + + if (ret) + retval = SMCCC_RET_INVALID_PARAMETER; + + smccc_set_retval(vcpu, retval, 0, 0, 0); + return true; +} + +bool smccc_trng_available; + +static bool pkvm_forward_trng(struct kvm_vcpu *vcpu) +{ + u32 fn = smccc_get_function(vcpu); + struct arm_smccc_res res; + unsigned long arg1 = 0; + + /* + * Forward TRNG calls to EL3, as we can't trust the host to handle + * these for us. + */ + switch (fn) { + case ARM_SMCCC_TRNG_FEATURES: + case ARM_SMCCC_TRNG_RND32: + case ARM_SMCCC_TRNG_RND64: + arg1 = smccc_get_arg1(vcpu); + fallthrough; + case ARM_SMCCC_TRNG_VERSION: + case ARM_SMCCC_TRNG_GET_UUID: + arm_smccc_1_1_smc(fn, arg1, &res); + smccc_set_retval(vcpu, res.a0, res.a1, res.a2, res.a3); + memzero_explicit(&res, sizeof(res)); + break; + } + + return true; +} + +/* + * Handler for protected VM HVC calls. + * + * Returns true if the hypervisor has handled the exit, and control should go + * back to the guest, or false if it hasn't. + */ +bool kvm_handle_pvm_hvc64(struct kvm_vcpu *vcpu, u64 *exit_code) +{ + u32 fn = smccc_get_function(vcpu); + u64 val[4] = { SMCCC_RET_NOT_SUPPORTED }; + + switch (fn) { + case ARM_SMCCC_VERSION_FUNC_ID: + /* Nothing to be handled by the host. Go back to the guest. */ + val[0] = ARM_SMCCC_VERSION_1_1; + break; + case ARM_SMCCC_VENDOR_HYP_CALL_UID_FUNC_ID: + val[0] = ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_0; + val[1] = ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_1; + val[2] = ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_2; + val[3] = ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_3; + break; + case ARM_SMCCC_VENDOR_HYP_KVM_FEATURES_FUNC_ID: + val[0] = BIT(ARM_SMCCC_KVM_FUNC_FEATURES); + val[0] |= BIT(ARM_SMCCC_KVM_FUNC_HYP_MEMINFO); + val[0] |= BIT(ARM_SMCCC_KVM_FUNC_MEM_SHARE); + val[0] |= BIT(ARM_SMCCC_KVM_FUNC_MEM_UNSHARE); + val[0] |= BIT(ARM_SMCCC_KVM_FUNC_MMIO_GUARD_INFO); + val[0] |= BIT(ARM_SMCCC_KVM_FUNC_MMIO_GUARD_ENROLL); + val[0] |= BIT(ARM_SMCCC_KVM_FUNC_MMIO_GUARD_MAP); + val[0] |= BIT(ARM_SMCCC_KVM_FUNC_MMIO_GUARD_UNMAP); + break; + case ARM_SMCCC_VENDOR_HYP_KVM_MMIO_GUARD_ENROLL_FUNC_ID: + set_bit(KVM_ARCH_FLAG_MMIO_GUARD, &vcpu->arch.pkvm.shadow_vm->arch.flags); + val[0] = SMCCC_RET_SUCCESS; + break; + case ARM_SMCCC_VENDOR_HYP_KVM_MMIO_GUARD_MAP_FUNC_ID: + return pkvm_install_ioguard_page(vcpu, exit_code); + case ARM_SMCCC_VENDOR_HYP_KVM_MMIO_GUARD_UNMAP_FUNC_ID: + if (__pkvm_remove_ioguard_page(vcpu, vcpu_get_reg(vcpu, 1))) + val[0] = SMCCC_RET_SUCCESS; + break; + case ARM_SMCCC_VENDOR_HYP_KVM_MMIO_GUARD_INFO_FUNC_ID: + case ARM_SMCCC_VENDOR_HYP_KVM_HYP_MEMINFO_FUNC_ID: + if (smccc_get_arg1(vcpu) || + smccc_get_arg2(vcpu) || + smccc_get_arg3(vcpu)) { + val[0] = SMCCC_RET_INVALID_PARAMETER; + } else { + val[0] = PAGE_SIZE; + } + break; + case ARM_SMCCC_VENDOR_HYP_KVM_MEM_SHARE_FUNC_ID: + return pkvm_memshare_call(vcpu, exit_code); + case ARM_SMCCC_VENDOR_HYP_KVM_MEM_UNSHARE_FUNC_ID: + return pkvm_memunshare_call(vcpu); + case ARM_SMCCC_TRNG_VERSION ... ARM_SMCCC_TRNG_RND32: + case ARM_SMCCC_TRNG_RND64: + if (smccc_trng_available) + return pkvm_forward_trng(vcpu); + break; + default: + return pkvm_handle_psci(vcpu); + } + + smccc_set_retval(vcpu, val[0], val[1], val[2], val[3]); + return true; +} diff --git a/arch/arm64/kvm/hyp/nvhe/psci-relay.c b/arch/arm64/kvm/hyp/nvhe/psci-relay.c index 08508783ec3d73ff18187365a22fd81900e29372..85f7cad956d54712d530bad380c528104dd89e36 100644 --- a/arch/arm64/kvm/hyp/nvhe/psci-relay.c +++ b/arch/arm64/kvm/hyp/nvhe/psci-relay.c @@ -12,6 +12,7 @@ #include #include +#include #include void kvm_hyp_cpu_entry(unsigned long r0); @@ -221,6 +222,44 @@ asmlinkage void __noreturn kvm_host_psci_cpu_entry(bool is_cpu_on) __host_enter(host_ctxt); } +static DEFINE_HYP_SPINLOCK(mem_protect_lock); + +static u64 psci_mem_protect(s64 offset) +{ + static u64 cnt; + u64 new = cnt + offset; + + hyp_assert_lock_held(&mem_protect_lock); + + if (!offset || kvm_host_psci_config.version < PSCI_VERSION(1, 1)) + return cnt; + + if (!cnt || !new) + psci_call(PSCI_1_1_FN64_MEM_PROTECT, offset < 0 ? 0 : 1, 0, 0); + + cnt = new; + return cnt; +} + +static bool psci_mem_protect_active(void) +{ + return psci_mem_protect(0); +} + +void psci_mem_protect_inc(void) +{ + hyp_spin_lock(&mem_protect_lock); + psci_mem_protect(1); + hyp_spin_unlock(&mem_protect_lock); +} + +void psci_mem_protect_dec(void) +{ + hyp_spin_lock(&mem_protect_lock); + psci_mem_protect(-1); + hyp_spin_unlock(&mem_protect_lock); +} + static unsigned long psci_0_1_handler(u64 func_id, struct kvm_cpu_context *host_ctxt) { if (is_psci_0_1(cpu_off, func_id) || is_psci_0_1(migrate, func_id)) @@ -249,6 +288,9 @@ static unsigned long psci_0_2_handler(u64 func_id, struct kvm_cpu_context *host_ */ case PSCI_0_2_FN_SYSTEM_OFF: case PSCI_0_2_FN_SYSTEM_RESET: + pkvm_clear_pvmfw_pages(); + /* Avoid racing with a MEM_PROTECT call. */ + hyp_spin_lock(&mem_protect_lock); return psci_forward(host_ctxt); case PSCI_0_2_FN64_CPU_SUSPEND: return psci_cpu_suspend(func_id, host_ctxt); @@ -262,9 +304,16 @@ static unsigned long psci_0_2_handler(u64 func_id, struct kvm_cpu_context *host_ static unsigned long psci_1_0_handler(u64 func_id, struct kvm_cpu_context *host_ctxt) { switch (func_id) { + case PSCI_1_1_FN64_SYSTEM_RESET2: + pkvm_clear_pvmfw_pages(); + hyp_spin_lock(&mem_protect_lock); + if (psci_mem_protect_active()) { + return psci_0_2_handler(PSCI_0_2_FN_SYSTEM_RESET, + host_ctxt); + } + fallthrough; case PSCI_1_0_FN_PSCI_FEATURES: case PSCI_1_0_FN_SET_SUSPEND_MODE: - case PSCI_1_1_FN64_SYSTEM_RESET2: return psci_forward(host_ctxt); case PSCI_1_0_FN64_SYSTEM_SUSPEND: return psci_system_suspend(func_id, host_ctxt); diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c index 57c27846320f426149b8fbae67303cb91d857a4f..1a6f3eba50358eff118f1d3ed1e5324b5ab3cd52 100644 --- a/arch/arm64/kvm/hyp/nvhe/setup.c +++ b/arch/arm64/kvm/hyp/nvhe/setup.c @@ -8,37 +8,50 @@ #include #include #include +#include #include +#include #include +#include #include #include #include +#include #include -struct hyp_pool hpool; unsigned long hyp_nr_cpus; +phys_addr_t pvmfw_base; +phys_addr_t pvmfw_size; + #define hyp_percpu_size ((unsigned long)__per_cpu_end - \ (unsigned long)__per_cpu_start) static void *vmemmap_base; +static void *shadow_table_base; static void *hyp_pgt_base; static void *host_s2_pgt_base; +static void *ffa_proxy_pages; static struct kvm_pgtable_mm_ops pkvm_pgtable_mm_ops; +static struct hyp_pool hpool; static int divide_memory_pool(void *virt, unsigned long size) { - unsigned long vstart, vend, nr_pages; + unsigned long nr_pages; hyp_early_alloc_init(virt, size); - hyp_vmemmap_range(__hyp_pa(virt), size, &vstart, &vend); - nr_pages = (vend - vstart) >> PAGE_SHIFT; + nr_pages = hyp_vmemmap_pages(sizeof(struct hyp_page)); vmemmap_base = hyp_early_alloc_contig(nr_pages); if (!vmemmap_base) return -ENOMEM; + nr_pages = hyp_shadow_table_pages(sizeof(struct kvm_shadow_vm)); + shadow_table_base = hyp_early_alloc_contig(nr_pages); + if (!shadow_table_base) + return -ENOMEM; + nr_pages = hyp_s1_pgtable_pages(); hyp_pgt_base = hyp_early_alloc_contig(nr_pages); if (!hyp_pgt_base) @@ -49,6 +62,11 @@ static int divide_memory_pool(void *virt, unsigned long size) if (!host_s2_pgt_base) return -ENOMEM; + nr_pages = hyp_ffa_proxy_pages(); + ffa_proxy_pages = hyp_early_alloc_contig(nr_pages); + if (!ffa_proxy_pages) + return -ENOMEM; + return 0; } @@ -76,7 +94,7 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size, if (ret) return ret; - ret = hyp_back_vmemmap(phys, size, hyp_virt_to_phys(vmemmap_base)); + ret = hyp_back_vmemmap(hyp_virt_to_phys(vmemmap_base)); if (ret) return ret; @@ -84,6 +102,10 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size, if (ret) return ret; + ret = pkvm_create_mappings(__hyp_data_start, __hyp_data_end, PAGE_HYP); + if (ret) + return ret; + ret = pkvm_create_mappings(__hyp_rodata_start, __hyp_rodata_end, PAGE_HYP_RO); if (ret) return ret; @@ -111,20 +133,23 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size, } /* - * Map the host's .bss and .rodata sections RO in the hypervisor, but - * transfer the ownership from the host to the hypervisor itself to - * make sure it can't be donated or shared with another entity. + * Map the host sections RO in the hypervisor, but transfer the + * ownership from the host to the hypervisor itself to make sure they + * can't be donated or shared with another entity. * * The ownership transition requires matching changes in the host * stage-2. This will be done later (see finalize_host_mappings()) once * the hyp_vmemmap is addressable. */ prot = pkvm_mkstate(PAGE_HYP_RO, PKVM_PAGE_SHARED_OWNED); - ret = pkvm_create_mappings(__start_rodata, __end_rodata, prot); + ret = pkvm_create_mappings(&kvm_vgic_global_state, &kvm_vgic_global_state + 1, prot); if (ret) return ret; - ret = pkvm_create_mappings(__hyp_bss_end, __bss_stop, prot); + start = hyp_phys_to_virt(pvmfw_base); + end = start + pvmfw_size; + prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_OWNED); + ret = pkvm_create_mappings(start, end, prot); if (ret) return ret; @@ -159,10 +184,10 @@ static void hpool_put_page(void *addr) hyp_put_page(&hpool, addr); } -static int finalize_host_mappings_walker(u64 addr, u64 end, u32 level, - kvm_pte_t *ptep, - enum kvm_pgtable_walk_flags flag, - void * const arg) +static int fix_host_ownership_walker(u64 addr, u64 end, u32 level, + kvm_pte_t *ptep, + enum kvm_pgtable_walk_flags flag, + void * const arg) { enum kvm_pgtable_prot prot; enum pkvm_page_state state; @@ -177,7 +202,7 @@ static int finalize_host_mappings_walker(u64 addr, u64 end, u32 level, phys = kvm_pte_to_phys(pte); if (!addr_is_memory(phys)) - return 0; + return -EINVAL; /* * Adjust the host stage-2 mappings to match the ownership attributes @@ -200,14 +225,55 @@ static int finalize_host_mappings_walker(u64 addr, u64 end, u32 level, return host_stage2_idmap_locked(phys, PAGE_SIZE, prot); } -static int finalize_host_mappings(void) +static int fix_hyp_pgtable_refcnt_walker(u64 addr, u64 end, u32 level, + kvm_pte_t *ptep, + enum kvm_pgtable_walk_flags flag, + void * const arg) +{ + struct kvm_pgtable_mm_ops *mm_ops = arg; + kvm_pte_t pte = *ptep; + + /* + * Fix-up the refcount for the page-table pages as the early allocator + * was unable to access the hyp_vmemmap and so the buddy allocator has + * initialised the refcount to '1'. + */ + if (kvm_pte_valid(pte)) + mm_ops->get_page(ptep); + + return 0; +} + +static int fix_host_ownership(void) { struct kvm_pgtable_walker walker = { - .cb = finalize_host_mappings_walker, + .cb = fix_host_ownership_walker, .flags = KVM_PGTABLE_WALK_LEAF, }; + int i, ret; + + for (i = 0; i < hyp_memblock_nr; i++) { + struct memblock_region *reg = &hyp_memory[i]; + u64 start = (u64)hyp_phys_to_virt(reg->base); - return kvm_pgtable_walk(&pkvm_pgtable, 0, BIT(pkvm_pgtable.ia_bits), &walker); + ret = kvm_pgtable_walk(&pkvm_pgtable, start, reg->size, &walker); + if (ret) + return ret; + } + + return 0; +} + +static int fix_hyp_pgtable_refcnt(void) +{ + struct kvm_pgtable_walker walker = { + .cb = fix_hyp_pgtable_refcnt_walker, + .flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST, + .arg = pkvm_pgtable.mm_ops, + }; + + return kvm_pgtable_walk(&pkvm_pgtable, 0, BIT(pkvm_pgtable.ia_bits), + &walker); } void __noreturn __pkvm_init_finalise(void) @@ -229,19 +295,33 @@ void __noreturn __pkvm_init_finalise(void) if (ret) goto out; - ret = finalize_host_mappings(); - if (ret) - goto out; - pkvm_pgtable_mm_ops = (struct kvm_pgtable_mm_ops) { .zalloc_page = hyp_zalloc_hyp_page, .phys_to_virt = hyp_phys_to_virt, .virt_to_phys = hyp_virt_to_phys, .get_page = hpool_get_page, .put_page = hpool_put_page, + .page_count = hyp_page_count, }; pkvm_pgtable.mm_ops = &pkvm_pgtable_mm_ops; + ret = fix_host_ownership(); + if (ret) + goto out; + + ret = fix_hyp_pgtable_refcnt(); + if (ret) + goto out; + + ret = hyp_create_pcpu_fixmap(); + if (ret) + goto out; + + ret = hyp_ffa_init(ffa_proxy_pages); + if (ret) + goto out; + + hyp_shadow_table_init(shadow_table_base); out: /* * We tail-called to here from handle___pkvm_init() and will not return, @@ -260,6 +340,8 @@ int __pkvm_init(phys_addr_t phys, unsigned long size, unsigned long nr_cpus, void (*fn)(phys_addr_t params_pa, void *finalize_fn_va); int ret; + BUG_ON(kvm_check_pvm_sysreg_table()); + if (!PAGE_ALIGNED(phys) || !PAGE_ALIGNED(size)) return -EINVAL; diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index a34b01cc8ab9e8a3f2423e1d969f1f6af4c9fbe6..87338775288cd842f49425ec6f5066c53f7e78d8 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -25,9 +25,9 @@ #include #include #include -#include #include +#include /* Non-VHE specific context */ DEFINE_PER_CPU(struct kvm_host_data, kvm_host_data); @@ -158,6 +158,97 @@ static void __pmu_switch_to_host(struct kvm_cpu_context *host_ctxt) write_sysreg(pmu->events_host, pmcntenset_el0); } +/** + * Handler for protected VM MSR, MRS or System instruction execution in AArch64. + * + * Returns true if the hypervisor has handled the exit, and control should go + * back to the guest, or false if it hasn't. + */ +static bool kvm_handle_pvm_sys64(struct kvm_vcpu *vcpu, u64 *exit_code) +{ + /* + * Make sure we handle the exit for workarounds and ptrauth + * before the pKVM handling, as the latter could decide to + * UNDEF. + */ + return (kvm_hyp_handle_sysreg(vcpu, exit_code) || + kvm_handle_pvm_sysreg(vcpu, exit_code)); +} + +/** + * Handler for protected floating-point and Advanced SIMD accesses. + * + * Returns true if the hypervisor has handled the exit, and control should go + * back to the guest, or false if it hasn't. + */ +static bool kvm_handle_pvm_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code) +{ + /* Linux guests assume support for floating-point and Advanced SIMD. */ + BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_FP), + PVM_ID_AA64PFR0_ALLOW)); + BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_ASIMD), + PVM_ID_AA64PFR0_ALLOW)); + + return kvm_hyp_handle_fpsimd(vcpu, exit_code); +} + +static const exit_handler_fn hyp_exit_handlers[] = { + [0 ... ESR_ELx_EC_MAX] = NULL, + [ESR_ELx_EC_CP15_32] = kvm_hyp_handle_cp15_32, + [ESR_ELx_EC_SYS64] = kvm_hyp_handle_sysreg, + [ESR_ELx_EC_SVE] = kvm_hyp_handle_fpsimd, + [ESR_ELx_EC_FP_ASIMD] = kvm_hyp_handle_fpsimd, + [ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low, + [ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low, + [ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth, +}; + +static const exit_handler_fn pvm_exit_handlers[] = { + [0 ... ESR_ELx_EC_MAX] = NULL, + [ESR_ELx_EC_HVC64] = kvm_handle_pvm_hvc64, + [ESR_ELx_EC_SYS64] = kvm_handle_pvm_sys64, + [ESR_ELx_EC_SVE] = kvm_handle_pvm_restricted, + [ESR_ELx_EC_FP_ASIMD] = kvm_handle_pvm_fpsimd, + [ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low, + [ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low, + [ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth, +}; + +static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu) +{ + if (unlikely(vcpu_is_protected(vcpu))) + return pvm_exit_handlers; + + return hyp_exit_handlers; +} + +/* + * Some guests (e.g., protected VMs) are not be allowed to run in AArch32. + * The ARMv8 architecture does not give the hypervisor a mechanism to prevent a + * guest from dropping to AArch32 EL0 if implemented by the CPU. If the + * hypervisor spots a guest in such a state ensure it is handled, and don't + * trust the host to spot or fix it. The check below is based on the one in + * kvm_arch_vcpu_ioctl_run(). + * + * Returns false if the guest ran in AArch32 when it shouldn't have, and + * thus should exit to the host, or true if a the guest run loop can continue. + */ +static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code) +{ + if (unlikely(vcpu_is_protected(vcpu) && vcpu_mode_is_32bit(vcpu))) { + /* + * As we have caught the guest red-handed, decide that it isn't + * fit for purpose anymore by making the vcpu invalid. The VMM + * can try and fix it by re-initializing the vcpu with + * KVM_ARM_VCPU_INIT, however, this is likely not possible for + * protected VMs. + */ + vcpu->arch.target = -1; + *exit_code &= BIT(ARM_EXIT_WITH_SERROR_BIT); + *exit_code |= ARM_EXCEPTION_IL; + } +} + /* Switch to the guest for legacy non-VHE systems */ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) { diff --git a/arch/arm64/kvm/hyp/nvhe/sys_regs.c b/arch/arm64/kvm/hyp/nvhe/sys_regs.c new file mode 100644 index 0000000000000000000000000000000000000000..1729b4d6059e021f0e0fa26366ab2984608d1eaa --- /dev/null +++ b/arch/arm64/kvm/hyp/nvhe/sys_regs.c @@ -0,0 +1,601 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2021 Google LLC + * Author: Fuad Tabba + */ + +#include + +#include +#include +#include + +#include +#include + +#include "../../sys_regs.h" + +/* + * Copies of the host's CPU features registers holding sanitized values at hyp. + */ +u64 id_aa64pfr0_el1_sys_val; +u64 id_aa64pfr1_el1_sys_val; +u64 id_aa64isar0_el1_sys_val; +u64 id_aa64isar1_el1_sys_val; +u64 id_aa64mmfr0_el1_sys_val; +u64 id_aa64mmfr1_el1_sys_val; +u64 id_aa64mmfr2_el1_sys_val; + +/* + * Inject an unknown/undefined exception to an AArch64 guest while most of its + * sysregs are live. + */ +static void inject_undef64(struct kvm_vcpu *vcpu) +{ + u32 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT); + + *vcpu_pc(vcpu) = read_sysreg_el2(SYS_ELR); + *vcpu_cpsr(vcpu) = read_sysreg_el2(SYS_SPSR); + + vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA64_EL1 | + KVM_ARM64_EXCEPT_AA64_ELx_SYNC | + KVM_ARM64_PENDING_EXCEPTION); + + __kvm_adjust_pc(vcpu); + + write_sysreg_el1(esr, SYS_ESR); + write_sysreg_el1(read_sysreg_el2(SYS_ELR), SYS_ELR); + write_sysreg_el2(*vcpu_pc(vcpu), SYS_ELR); + write_sysreg_el2(*vcpu_cpsr(vcpu), SYS_SPSR); +} + +/* + * Returns the restricted features values of the feature register based on the + * limitations in restrict_fields. + * A feature id field value of 0b0000 does not impose any restrictions. + * Note: Use only for unsigned feature field values. + */ +static u64 get_restricted_features_unsigned(u64 sys_reg_val, + u64 restrict_fields) +{ + u64 value = 0UL; + u64 mask = GENMASK_ULL(ARM64_FEATURE_FIELD_BITS - 1, 0); + + /* + * According to the Arm Architecture Reference Manual, feature fields + * use increasing values to indicate increases in functionality. + * Iterate over the restricted feature fields and calculate the minimum + * unsigned value between the one supported by the system, and what the + * value is being restricted to. + */ + while (sys_reg_val && restrict_fields) { + value |= min(sys_reg_val & mask, restrict_fields & mask); + sys_reg_val &= ~mask; + restrict_fields &= ~mask; + mask <<= ARM64_FEATURE_FIELD_BITS; + } + + return value; +} + +/* + * Functions that return the value of feature id registers for protected VMs + * based on allowed features, system features, and KVM support. + */ + +static u64 get_pvm_id_aa64pfr0(const struct kvm_vcpu *vcpu) +{ + const struct kvm *kvm = (const struct kvm *)kern_hyp_va(vcpu->kvm); + u64 set_mask = 0; + u64 allow_mask = PVM_ID_AA64PFR0_ALLOW; + + if (!vcpu_has_sve(vcpu)) + allow_mask &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_SVE); + + set_mask |= get_restricted_features_unsigned(id_aa64pfr0_el1_sys_val, + PVM_ID_AA64PFR0_RESTRICT_UNSIGNED); + + /* Spectre and Meltdown mitigation in KVM */ + set_mask |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_CSV2), + (u64)kvm->arch.pfr0_csv2); + set_mask |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_CSV3), + (u64)kvm->arch.pfr0_csv3); + + return (id_aa64pfr0_el1_sys_val & allow_mask) | set_mask; +} + +static u64 get_pvm_id_aa64pfr1(const struct kvm_vcpu *vcpu) +{ + const struct kvm *kvm = (const struct kvm *)kern_hyp_va(vcpu->kvm); + u64 allow_mask = PVM_ID_AA64PFR1_ALLOW; + + if (!kvm_has_mte(kvm)) + allow_mask &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_MTE); + + return id_aa64pfr1_el1_sys_val & allow_mask; +} + +static u64 get_pvm_id_aa64zfr0(const struct kvm_vcpu *vcpu) +{ + /* + * No support for Scalable Vectors, therefore, hyp has no sanitized + * copy of the feature id register. + */ + BUILD_BUG_ON(PVM_ID_AA64ZFR0_ALLOW != 0ULL); + return 0; +} + +static u64 get_pvm_id_aa64dfr0(const struct kvm_vcpu *vcpu) +{ + /* + * No support for debug, including breakpoints, and watchpoints, + * therefore, pKVM has no sanitized copy of the feature id register. + */ + BUILD_BUG_ON(PVM_ID_AA64DFR0_ALLOW != 0ULL); + return 0; +} + +static u64 get_pvm_id_aa64dfr1(const struct kvm_vcpu *vcpu) +{ + /* + * No support for debug, therefore, hyp has no sanitized copy of the + * feature id register. + */ + BUILD_BUG_ON(PVM_ID_AA64DFR1_ALLOW != 0ULL); + return 0; +} + +static u64 get_pvm_id_aa64afr0(const struct kvm_vcpu *vcpu) +{ + /* + * No support for implementation defined features, therefore, hyp has no + * sanitized copy of the feature id register. + */ + BUILD_BUG_ON(PVM_ID_AA64AFR0_ALLOW != 0ULL); + return 0; +} + +static u64 get_pvm_id_aa64afr1(const struct kvm_vcpu *vcpu) +{ + /* + * No support for implementation defined features, therefore, hyp has no + * sanitized copy of the feature id register. + */ + BUILD_BUG_ON(PVM_ID_AA64AFR1_ALLOW != 0ULL); + return 0; +} + +static u64 get_pvm_id_aa64isar0(const struct kvm_vcpu *vcpu) +{ + return id_aa64isar0_el1_sys_val & PVM_ID_AA64ISAR0_ALLOW; +} + +static u64 get_pvm_id_aa64isar1(const struct kvm_vcpu *vcpu) +{ + u64 allow_mask = PVM_ID_AA64ISAR1_ALLOW; + + if (!vcpu_has_ptrauth(vcpu)) + allow_mask &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR1_APA) | + ARM64_FEATURE_MASK(ID_AA64ISAR1_API) | + ARM64_FEATURE_MASK(ID_AA64ISAR1_GPA) | + ARM64_FEATURE_MASK(ID_AA64ISAR1_GPI)); + + return id_aa64isar1_el1_sys_val & allow_mask; +} + +static u64 get_pvm_id_aa64mmfr0(const struct kvm_vcpu *vcpu) +{ + u64 set_mask; + + set_mask = get_restricted_features_unsigned(id_aa64mmfr0_el1_sys_val, + PVM_ID_AA64MMFR0_RESTRICT_UNSIGNED); + + return (id_aa64mmfr0_el1_sys_val & PVM_ID_AA64MMFR0_ALLOW) | set_mask; +} + +static u64 get_pvm_id_aa64mmfr1(const struct kvm_vcpu *vcpu) +{ + return id_aa64mmfr1_el1_sys_val & PVM_ID_AA64MMFR1_ALLOW; +} + +static u64 get_pvm_id_aa64mmfr2(const struct kvm_vcpu *vcpu) +{ + return id_aa64mmfr2_el1_sys_val & PVM_ID_AA64MMFR2_ALLOW; +} + +/* Read a sanitized cpufeature ID register by its encoding */ +u64 pvm_read_id_reg(const struct kvm_vcpu *vcpu, u32 id) +{ + switch (id) { + case SYS_ID_AA64PFR0_EL1: + return get_pvm_id_aa64pfr0(vcpu); + case SYS_ID_AA64PFR1_EL1: + return get_pvm_id_aa64pfr1(vcpu); + case SYS_ID_AA64ZFR0_EL1: + return get_pvm_id_aa64zfr0(vcpu); + case SYS_ID_AA64DFR0_EL1: + return get_pvm_id_aa64dfr0(vcpu); + case SYS_ID_AA64DFR1_EL1: + return get_pvm_id_aa64dfr1(vcpu); + case SYS_ID_AA64AFR0_EL1: + return get_pvm_id_aa64afr0(vcpu); + case SYS_ID_AA64AFR1_EL1: + return get_pvm_id_aa64afr1(vcpu); + case SYS_ID_AA64ISAR0_EL1: + return get_pvm_id_aa64isar0(vcpu); + case SYS_ID_AA64ISAR1_EL1: + return get_pvm_id_aa64isar1(vcpu); + case SYS_ID_AA64MMFR0_EL1: + return get_pvm_id_aa64mmfr0(vcpu); + case SYS_ID_AA64MMFR1_EL1: + return get_pvm_id_aa64mmfr1(vcpu); + case SYS_ID_AA64MMFR2_EL1: + return get_pvm_id_aa64mmfr2(vcpu); + default: + /* Unhandled ID register, RAZ */ + return 0; + } +} + +static u64 read_id_reg(const struct kvm_vcpu *vcpu, + struct sys_reg_desc const *r) +{ + return pvm_read_id_reg(vcpu, reg_to_encoding(r)); +} + +/* Handler to RAZ/WI sysregs */ +static bool pvm_access_raz_wi(struct kvm_vcpu *vcpu, struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + if (!p->is_write) + p->regval = 0; + + return true; +} + +/* + * Accessor for AArch32 feature id registers. + * + * The value of these registers is "unknown" according to the spec if AArch32 + * isn't supported. + */ +static bool pvm_access_id_aarch32(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + if (p->is_write) { + inject_undef64(vcpu); + return false; + } + + /* + * No support for AArch32 guests, therefore, pKVM has no sanitized copy + * of AArch32 feature id registers. + */ + BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1), + PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) > ID_AA64PFR0_ELx_64BIT_ONLY); + + return pvm_access_raz_wi(vcpu, p, r); +} + +/* + * Accessor for AArch64 feature id registers. + * + * If access is allowed, set the regval to the protected VM's view of the + * register and return true. + * Otherwise, inject an undefined exception and return false. + */ +static bool pvm_access_id_aarch64(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + if (p->is_write) { + inject_undef64(vcpu); + return false; + } + + p->regval = read_id_reg(vcpu, r); + return true; +} + +static bool pvm_gic_read_sre(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + /* pVMs only support GICv3. 'nuf said. */ + if (!p->is_write) + p->regval = ICC_SRE_EL1_DIB | ICC_SRE_EL1_DFB | ICC_SRE_EL1_SRE; + + return true; +} + +/* Mark the specified system register as an AArch32 feature id register. */ +#define AARCH32(REG) { SYS_DESC(REG), .access = pvm_access_id_aarch32 } + +/* Mark the specified system register as an AArch64 feature id register. */ +#define AARCH64(REG) { SYS_DESC(REG), .access = pvm_access_id_aarch64 } + +/* + * sys_reg_desc initialiser for architecturally unallocated cpufeature ID + * register with encoding Op0=3, Op1=0, CRn=0, CRm=crm, Op2=op2 + * (1 <= crm < 8, 0 <= Op2 < 8). + */ +#define ID_UNALLOCATED(crm, op2) { \ + Op0(3), Op1(0), CRn(0), CRm(crm), Op2(op2), \ + .access = pvm_access_id_aarch64, \ +} + +/* Mark the specified system register as Read-As-Zero/Write-Ignored */ +#define RAZ_WI(REG) { SYS_DESC(REG), .access = pvm_access_raz_wi } + +/* Mark the specified system register as not being handled in hyp. */ +#define HOST_HANDLED(REG) { SYS_DESC(REG), .access = NULL } + +/* + * Architected system registers. + * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2 + * + * NOTE: Anything not explicitly listed here is *restricted by default*, i.e., + * it will lead to injecting an exception into the guest. + */ +static const struct sys_reg_desc pvm_sys_reg_descs[] = { + /* Cache maintenance by set/way operations are restricted. */ + + /* Debug and Trace Registers are restricted. */ + RAZ_WI(SYS_DBGBVRn_EL1(0)), + RAZ_WI(SYS_DBGBCRn_EL1(0)), + RAZ_WI(SYS_DBGWVRn_EL1(0)), + RAZ_WI(SYS_DBGWCRn_EL1(0)), + RAZ_WI(SYS_MDSCR_EL1), + RAZ_WI(SYS_OSLAR_EL1), + RAZ_WI(SYS_OSLSR_EL1), + RAZ_WI(SYS_OSDLR_EL1), + + /* Group 1 ID registers */ + RAZ_WI(SYS_REVIDR_EL1), + + /* AArch64 mappings of the AArch32 ID registers */ + /* CRm=1 */ + AARCH32(SYS_ID_PFR0_EL1), + AARCH32(SYS_ID_PFR1_EL1), + AARCH32(SYS_ID_DFR0_EL1), + AARCH32(SYS_ID_AFR0_EL1), + AARCH32(SYS_ID_MMFR0_EL1), + AARCH32(SYS_ID_MMFR1_EL1), + AARCH32(SYS_ID_MMFR2_EL1), + AARCH32(SYS_ID_MMFR3_EL1), + + /* CRm=2 */ + AARCH32(SYS_ID_ISAR0_EL1), + AARCH32(SYS_ID_ISAR1_EL1), + AARCH32(SYS_ID_ISAR2_EL1), + AARCH32(SYS_ID_ISAR3_EL1), + AARCH32(SYS_ID_ISAR4_EL1), + AARCH32(SYS_ID_ISAR5_EL1), + AARCH32(SYS_ID_MMFR4_EL1), + AARCH32(SYS_ID_ISAR6_EL1), + + /* CRm=3 */ + AARCH32(SYS_MVFR0_EL1), + AARCH32(SYS_MVFR1_EL1), + AARCH32(SYS_MVFR2_EL1), + ID_UNALLOCATED(3,3), + AARCH32(SYS_ID_PFR2_EL1), + AARCH32(SYS_ID_DFR1_EL1), + AARCH32(SYS_ID_MMFR5_EL1), + ID_UNALLOCATED(3,7), + + /* AArch64 ID registers */ + /* CRm=4 */ + AARCH64(SYS_ID_AA64PFR0_EL1), + AARCH64(SYS_ID_AA64PFR1_EL1), + ID_UNALLOCATED(4,2), + ID_UNALLOCATED(4,3), + AARCH64(SYS_ID_AA64ZFR0_EL1), + ID_UNALLOCATED(4,5), + ID_UNALLOCATED(4,6), + ID_UNALLOCATED(4,7), + AARCH64(SYS_ID_AA64DFR0_EL1), + AARCH64(SYS_ID_AA64DFR1_EL1), + ID_UNALLOCATED(5,2), + ID_UNALLOCATED(5,3), + AARCH64(SYS_ID_AA64AFR0_EL1), + AARCH64(SYS_ID_AA64AFR1_EL1), + ID_UNALLOCATED(5,6), + ID_UNALLOCATED(5,7), + AARCH64(SYS_ID_AA64ISAR0_EL1), + AARCH64(SYS_ID_AA64ISAR1_EL1), + AARCH64(SYS_ID_AA64ISAR2_EL1), + ID_UNALLOCATED(6,3), + ID_UNALLOCATED(6,4), + ID_UNALLOCATED(6,5), + ID_UNALLOCATED(6,6), + ID_UNALLOCATED(6,7), + AARCH64(SYS_ID_AA64MMFR0_EL1), + AARCH64(SYS_ID_AA64MMFR1_EL1), + AARCH64(SYS_ID_AA64MMFR2_EL1), + ID_UNALLOCATED(7,3), + ID_UNALLOCATED(7,4), + ID_UNALLOCATED(7,5), + ID_UNALLOCATED(7,6), + ID_UNALLOCATED(7,7), + + /* Scalable Vector Registers are restricted. */ + + RAZ_WI(SYS_ERRIDR_EL1), + RAZ_WI(SYS_ERRSELR_EL1), + RAZ_WI(SYS_ERXFR_EL1), + RAZ_WI(SYS_ERXCTLR_EL1), + RAZ_WI(SYS_ERXSTATUS_EL1), + RAZ_WI(SYS_ERXADDR_EL1), + RAZ_WI(SYS_ERXMISC0_EL1), + RAZ_WI(SYS_ERXMISC1_EL1), + + /* Performance Monitoring Registers are restricted. */ + + /* Limited Ordering Regions Registers are restricted. */ + + HOST_HANDLED(SYS_ICC_SGI1R_EL1), + HOST_HANDLED(SYS_ICC_ASGI1R_EL1), + HOST_HANDLED(SYS_ICC_SGI0R_EL1), + { SYS_DESC(SYS_ICC_SRE_EL1), .access = pvm_gic_read_sre, }, + + HOST_HANDLED(SYS_CCSIDR_EL1), + HOST_HANDLED(SYS_CLIDR_EL1), + HOST_HANDLED(SYS_CSSELR_EL1), + HOST_HANDLED(SYS_CTR_EL0), + + /* Performance Monitoring Registers are restricted. */ + + /* Activity Monitoring Registers are restricted. */ + + HOST_HANDLED(SYS_CNTP_TVAL_EL0), + HOST_HANDLED(SYS_CNTP_CTL_EL0), + HOST_HANDLED(SYS_CNTP_CVAL_EL0), + + /* Performance Monitoring Registers are restricted. */ +}; + +/* A structure to track reset values for system registers in protected vcpus. */ +struct sys_reg_desc_reset { + /* Index into sys_reg[]. */ + int reg; + + /* Reset function. */ + void (*reset)(struct kvm_vcpu *, const struct sys_reg_desc_reset *); + + /* Reset value. */ + u64 value; +}; + +static void reset_actlr(struct kvm_vcpu *vcpu, const struct sys_reg_desc_reset *r) +{ + __vcpu_sys_reg(vcpu, r->reg) = read_sysreg(actlr_el1); +} + +static void reset_amair_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc_reset *r) +{ + __vcpu_sys_reg(vcpu, r->reg) = read_sysreg(amair_el1); +} + +static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc_reset *r) +{ + __vcpu_sys_reg(vcpu, r->reg) = calculate_mpidr(vcpu); +} + +static void reset_value(struct kvm_vcpu *vcpu, const struct sys_reg_desc_reset *r) +{ + __vcpu_sys_reg(vcpu, r->reg) = r->value; +} + +/* Specify the register's reset value. */ +#define RESET_VAL(REG, RESET_VAL) { REG, reset_value, RESET_VAL } + +/* Specify a function that calculates the register's reset value. */ +#define RESET_FUNC(REG, RESET_FUNC) { REG, RESET_FUNC, 0 } + +/* + * Architected system registers reset values for Protected VMs. + * Important: Must be sorted ascending by REG (index into sys_reg[]) + */ +static const struct sys_reg_desc_reset pvm_sys_reg_reset_vals[] = { + RESET_FUNC(MPIDR_EL1, reset_mpidr), + RESET_VAL(SCTLR_EL1, 0x00C50078), + RESET_FUNC(ACTLR_EL1, reset_actlr), + RESET_VAL(CPACR_EL1, 0), + RESET_VAL(TCR_EL1, 0), + RESET_VAL(VBAR_EL1, 0), + RESET_VAL(CONTEXTIDR_EL1, 0), + RESET_FUNC(AMAIR_EL1, reset_amair_el1), + RESET_VAL(CNTKCTL_EL1, 0), + RESET_VAL(DISR_EL1, 0), +}; + +/* + * Sets system registers to reset value + * + * This function finds the right entry and sets the registers on the protected + * vcpu to their architecturally defined reset values. + */ +void kvm_reset_pvm_sys_regs(struct kvm_vcpu *vcpu) +{ + unsigned long i; + + for (i = 0; i < ARRAY_SIZE(pvm_sys_reg_reset_vals); i++) { + const struct sys_reg_desc_reset *r = &pvm_sys_reg_reset_vals[i]; + + r->reset(vcpu, r); + } +} + +/* + * Checks that the sysreg tables are unique and in-order. + * + * Returns 0 if the table is consistent, or 1 otherwise. + */ +int kvm_check_pvm_sysreg_table(void) +{ + unsigned int i; + + for (i = 1; i < ARRAY_SIZE(pvm_sys_reg_descs); i++) { + if (cmp_sys_reg(&pvm_sys_reg_descs[i-1], &pvm_sys_reg_descs[i]) >= 0) + return 1; + } + + for (i = 1; i < ARRAY_SIZE(pvm_sys_reg_reset_vals); i++) { + if (pvm_sys_reg_reset_vals[i-1].reg >= pvm_sys_reg_reset_vals[i].reg) + return 1; + } + + return 0; +} + +/* + * Handler for protected VM MSR, MRS or System instruction execution. + * + * Returns true if the hypervisor has handled the exit, and control should go + * back to the guest, or false if it hasn't, to be handled by the host. + */ +bool kvm_handle_pvm_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code) +{ + const struct sys_reg_desc *r; + struct sys_reg_params params; + unsigned long esr = kvm_vcpu_get_esr(vcpu); + int Rt = kvm_vcpu_sys_get_rt(vcpu); + + params = esr_sys64_to_params(esr); + params.regval = vcpu_get_reg(vcpu, Rt); + + r = find_reg(¶ms, pvm_sys_reg_descs, ARRAY_SIZE(pvm_sys_reg_descs)); + + /* Undefined (RESTRICTED). */ + if (r == NULL) { + inject_undef64(vcpu); + return true; + } + + /* Handled by the host (HOST_HANDLED) */ + if (r->access == NULL) + return false; + + /* Handled by hyp: skip instruction if instructed to do so. */ + if (r->access(vcpu, ¶ms, r)) + __kvm_skip_instr(vcpu); + + if (!params.is_write) + vcpu_set_reg(vcpu, Rt, params.regval); + + return true; +} + +/* + * Handler for protected VM restricted exceptions. + * + * Inject an undefined exception into the guest and return true to indicate that + * the hypervisor has handled the exit, and control should go back to the guest. + */ +bool kvm_handle_pvm_restricted(struct kvm_vcpu *vcpu, u64 *exit_code) +{ + inject_undef64(vcpu); + return true; +} diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c index d296d617f589633924fa11eddfb75ace470d703b..3f5601176fab5a0ebaf9166b614e62a816ae5285 100644 --- a/arch/arm64/kvm/hyp/nvhe/tlb.c +++ b/arch/arm64/kvm/hyp/nvhe/tlb.c @@ -11,26 +11,62 @@ #include struct tlb_inv_context { - u64 tcr; + struct kvm_s2_mmu *mmu; + u64 tcr; + u64 sctlr; }; -static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu, - struct tlb_inv_context *cxt) +static void enter_vmid_context(struct kvm_s2_mmu *mmu, + struct tlb_inv_context *cxt) { + struct kvm_s2_mmu *host_mmu = &host_kvm.arch.mmu; + struct kvm_cpu_context *host_ctxt; + struct kvm_vcpu *vcpu; + + host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + vcpu = host_ctxt->__hyp_running_vcpu; + cxt->mmu = NULL; + + /* + * If we're already in the desired context, then there's nothing + * to do. + */ + if (vcpu) { + if (mmu == vcpu->arch.hw_mmu || WARN_ON(mmu != host_mmu)) + return; + } else if (mmu == host_mmu) { + return; + } + + cxt->mmu = mmu; if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { u64 val; /* * For CPUs that are affected by ARM 1319367, we need to - * avoid a host Stage-1 walk while we have the guest's - * VMID set in the VTTBR in order to invalidate TLBs. - * We're guaranteed that the S1 MMU is enabled, so we can - * simply set the EPD bits to avoid any further TLB fill. + * avoid a Stage-1 walk with the old VMID while we have + * the new VMID set in the VTTBR in order to invalidate TLBs. + * We're guaranteed that the host S1 MMU is enabled, so + * we can simply set the EPD bits to avoid any further + * TLB fill. For guests, we ensure that the S1 MMU is + * temporarily enabled in the next context. */ val = cxt->tcr = read_sysreg_el1(SYS_TCR); val |= TCR_EPD1_MASK | TCR_EPD0_MASK; write_sysreg_el1(val, SYS_TCR); isb(); + + if (vcpu) { + val = cxt->sctlr = read_sysreg_el1(SYS_SCTLR); + if (!(val & SCTLR_ELx_M)) { + val |= SCTLR_ELx_M; + write_sysreg_el1(val, SYS_SCTLR); + isb(); + } + } else { + /* The host S1 MMU is always enabled. */ + cxt->sctlr = SCTLR_ELx_M; + } } /* @@ -39,20 +75,44 @@ static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu, * ensuring that we always have an ISB, but not two ISBs back * to back. */ - __load_stage2(mmu, kern_hyp_va(mmu->arch)); + if (vcpu) + __load_host_stage2(); + else + __load_stage2(mmu, kern_hyp_va(mmu->arch)); + asm(ALTERNATIVE("isb", "nop", ARM64_WORKAROUND_SPECULATIVE_AT)); } -static void __tlb_switch_to_host(struct tlb_inv_context *cxt) +static void exit_vmid_context(struct tlb_inv_context *cxt) { - __load_host_stage2(); + struct kvm_s2_mmu *mmu = cxt->mmu; + struct kvm_cpu_context *host_ctxt; + struct kvm_vcpu *vcpu; + + host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + vcpu = host_ctxt->__hyp_running_vcpu; + + if (!mmu) + return; + + if (vcpu) + __load_stage2(mmu, kern_hyp_va(mmu->arch)); + else + __load_host_stage2(); if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { - /* Ensure write of the host VMID */ + /* Ensure write of the old VMID */ isb(); - /* Restore the host's TCR_EL1 */ + + if (!(cxt->sctlr & SCTLR_ELx_M)) { + write_sysreg_el1(cxt->sctlr, SYS_SCTLR); + isb(); + } + write_sysreg_el1(cxt->tcr, SYS_TCR); } + + cxt->mmu = NULL; } void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, @@ -63,7 +123,7 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, dsb(ishst); /* Switch to requested VMID */ - __tlb_switch_to_guest(mmu, &cxt); + enter_vmid_context(mmu, &cxt); /* * We could do so much better if we had the VA as well. @@ -106,7 +166,7 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, if (icache_is_vpipt()) icache_inval_all_pou(); - __tlb_switch_to_host(&cxt); + exit_vmid_context(&cxt); } void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu) @@ -116,13 +176,13 @@ void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu) dsb(ishst); /* Switch to requested VMID */ - __tlb_switch_to_guest(mmu, &cxt); + enter_vmid_context(mmu, &cxt); __tlbi(vmalls12e1is); dsb(ish); isb(); - __tlb_switch_to_host(&cxt); + exit_vmid_context(&cxt); } void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu) @@ -130,14 +190,14 @@ void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu) struct tlb_inv_context cxt; /* Switch to requested VMID */ - __tlb_switch_to_guest(mmu, &cxt); + enter_vmid_context(mmu, &cxt); __tlbi(vmalle1); asm volatile("ic iallu"); dsb(nsh); isb(); - __tlb_switch_to_host(&cxt); + exit_vmid_context(&cxt); } void __kvm_flush_vm_context(void) diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index f8ceebe4982eb130b884800bfea771cd1353d4db..03bdf4f5082601c28ff063c4e12c748ee39a4e1f 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -46,9 +46,6 @@ KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W | \ KVM_PTE_LEAF_ATTR_HI_S2_XN) -#define KVM_INVALID_PTE_OWNER_MASK GENMASK(9, 2) -#define KVM_MAX_OWNER_ID 1 - struct kvm_pgtable_walk_data { struct kvm_pgtable *pgt; struct kvm_pgtable_walker *walker; @@ -167,11 +164,6 @@ static kvm_pte_t kvm_init_valid_leaf_pte(u64 pa, kvm_pte_t attr, u32 level) return pte; } -static kvm_pte_t kvm_init_invalid_leaf_owner(u8 owner_id) -{ - return FIELD_PREP(KVM_INVALID_PTE_OWNER_MASK, owner_id); -} - static int kvm_pgtable_visitor_cb(struct kvm_pgtable_walk_data *data, u64 addr, u32 level, kvm_pte_t *ptep, enum kvm_pgtable_walk_flags flag) @@ -383,21 +375,6 @@ enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte) return prot; } -static bool hyp_pte_needs_update(kvm_pte_t old, kvm_pte_t new) -{ - /* - * Tolerate KVM recreating the exact same mapping, or changing software - * bits if the existing mapping was valid. - */ - if (old == new) - return false; - - if (!kvm_pte_valid(old)) - return true; - - return !WARN_ON((old ^ new) & ~KVM_PTE_LEAF_ATTR_HI_SW); -} - static bool hyp_map_walker_try_leaf(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, struct hyp_map_data *data) { @@ -407,11 +384,16 @@ static bool hyp_map_walker_try_leaf(u64 addr, u64 end, u32 level, if (!kvm_block_mapping_supported(addr, end, phys, level)) return false; + data->phys += granule; new = kvm_init_valid_leaf_pte(phys, data->attr, level); - if (hyp_pte_needs_update(old, new)) - smp_store_release(ptep, new); + if (old == new) + return true; + if (!kvm_pte_valid(old)) + data->mm_ops->get_page(ptep); + else if (WARN_ON((old ^ new) & ~KVM_PTE_LEAF_ATTR_HI_SW)) + return false; - data->phys += granule; + smp_store_release(ptep, new); return true; } @@ -433,6 +415,7 @@ static int hyp_map_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, return -ENOMEM; kvm_set_table_pte(ptep, childp, mm_ops); + mm_ops->get_page(ptep); return 0; } @@ -460,6 +443,69 @@ int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys, return ret; } +struct hyp_unmap_data { + u64 unmapped; + struct kvm_pgtable_mm_ops *mm_ops; +}; + +static int hyp_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, + enum kvm_pgtable_walk_flags flag, void * const arg) +{ + kvm_pte_t pte = *ptep, *childp = NULL; + u64 granule = kvm_granule_size(level); + struct hyp_unmap_data *data = arg; + struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops; + + if (!kvm_pte_valid(pte)) + return -EINVAL; + + if (kvm_pte_table(pte, level)) { + childp = kvm_pte_follow(pte, mm_ops); + + if (mm_ops->page_count(childp) != 1) + return 0; + + kvm_clear_pte(ptep); + dsb(ishst); + __tlbi_level(vae2is, __TLBI_VADDR(addr, 0), level); + } else { + if (end - addr < granule) + return -EINVAL; + + kvm_clear_pte(ptep); + dsb(ishst); + __tlbi_level(vale2is, __TLBI_VADDR(addr, 0), level); + data->unmapped += granule; + } + + dsb(ish); + isb(); + mm_ops->put_page(ptep); + + if (childp) + mm_ops->put_page(childp); + + return 0; +} + +u64 kvm_pgtable_hyp_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size) +{ + struct hyp_unmap_data unmap_data = { + .mm_ops = pgt->mm_ops, + }; + struct kvm_pgtable_walker walker = { + .cb = hyp_unmap_walker, + .arg = &unmap_data, + .flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST, + }; + + if (!pgt->mm_ops->page_count) + return 0; + + kvm_pgtable_walk(pgt, addr, size, &walker); + return unmap_data.unmapped; +} + int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits, struct kvm_pgtable_mm_ops *mm_ops) { @@ -482,8 +528,16 @@ static int hyp_free_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, enum kvm_pgtable_walk_flags flag, void * const arg) { struct kvm_pgtable_mm_ops *mm_ops = arg; + kvm_pte_t pte = *ptep; + + if (!kvm_pte_valid(pte)) + return 0; + + mm_ops->put_page(ptep); + + if (kvm_pte_table(pte, level)) + mm_ops->put_page(kvm_pte_follow(pte, mm_ops)); - mm_ops->put_page((void *)kvm_pte_follow(*ptep, mm_ops)); return 0; } @@ -491,7 +545,7 @@ void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt) { struct kvm_pgtable_walker walker = { .cb = hyp_free_walker, - .flags = KVM_PGTABLE_WALK_TABLE_POST, + .flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST, .arg = pgt->mm_ops, }; @@ -503,7 +557,7 @@ void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt) struct stage2_map_data { u64 phys; kvm_pte_t attr; - u8 owner_id; + u64 annotation; kvm_pte_t *anchor; kvm_pte_t *childp; @@ -638,12 +692,12 @@ static void stage2_put_pte(kvm_pte_t *ptep, struct kvm_s2_mmu *mmu, u64 addr, static bool stage2_pte_cacheable(struct kvm_pgtable *pgt, kvm_pte_t pte) { u64 memattr = pte & KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR; - return memattr == KVM_S2_MEMATTR(pgt, NORMAL); + return kvm_pte_valid(pte) && memattr == KVM_S2_MEMATTR(pgt, NORMAL); } static bool stage2_pte_executable(kvm_pte_t pte) { - return !(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN); + return kvm_pte_valid(pte) && !(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN); } static bool stage2_leaf_mapping_allowed(u64 addr, u64 end, u32 level, @@ -670,7 +724,7 @@ static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level, if (kvm_phys_is_valid(phys)) new = kvm_init_valid_leaf_pte(phys, data->attr, level); else - new = kvm_init_invalid_leaf_owner(data->owner_id); + new = data->annotation; if (stage2_pte_is_counted(old)) { /* @@ -682,20 +736,28 @@ static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level, if (!stage2_pte_needs_update(old, new)) return -EAGAIN; + /* + * If we're only changing software bits, then we don't need to + * do anything else/ + */ + if (!((old ^ new) & ~KVM_PTE_LEAF_ATTR_HI_SW)) + goto out_set_pte; + stage2_put_pte(ptep, data->mmu, addr, level, mm_ops); } /* Perform CMOs before installation of the guest stage-2 PTE */ if (mm_ops->dcache_clean_inval_poc && stage2_pte_cacheable(pgt, new)) mm_ops->dcache_clean_inval_poc(kvm_pte_follow(new, mm_ops), - granule); - + granule); if (mm_ops->icache_inval_pou && stage2_pte_executable(new)) mm_ops->icache_inval_pou(kvm_pte_follow(new, mm_ops), granule); - smp_store_release(ptep, new); if (stage2_pte_is_counted(new)) mm_ops->get_page(ptep); + +out_set_pte: + smp_store_release(ptep, new); if (kvm_phys_is_valid(phys)) data->phys += granule; return 0; @@ -860,8 +922,8 @@ int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, return ret; } -int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size, - void *mc, u8 owner_id) +int kvm_pgtable_stage2_annotate(struct kvm_pgtable *pgt, u64 addr, u64 size, + void *mc, kvm_pte_t annotation) { int ret; struct stage2_map_data map_data = { @@ -869,8 +931,8 @@ int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size, .mmu = pgt->mmu, .memcache = mc, .mm_ops = pgt->mm_ops, - .owner_id = owner_id, .force_pte = true, + .annotation = annotation, }; struct kvm_pgtable_walker walker = { .cb = stage2_map_walker, @@ -880,7 +942,7 @@ int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size, .arg = &map_data, }; - if (owner_id > KVM_MAX_OWNER_ID) + if (annotation & PTE_VALID) return -EINVAL; ret = kvm_pgtable_walk(pgt, addr, size, &walker); @@ -921,13 +983,9 @@ static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, */ stage2_put_pte(ptep, mmu, addr, level, mm_ops); - if (need_flush) { - kvm_pte_t *pte_follow = kvm_pte_follow(pte, mm_ops); - - dcache_clean_inval_poc((unsigned long)pte_follow, - (unsigned long)pte_follow + - kvm_granule_size(level)); - } + if (need_flush && mm_ops->dcache_clean_inval_poc) + mm_ops->dcache_clean_inval_poc(kvm_pte_follow(pte, mm_ops), + kvm_granule_size(level)); if (childp) mm_ops->put_page(childp); @@ -1089,15 +1147,13 @@ static int stage2_flush_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, struct kvm_pgtable *pgt = arg; struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops; kvm_pte_t pte = *ptep; - kvm_pte_t *pte_follow; - if (!kvm_pte_valid(pte) || !stage2_pte_cacheable(pgt, pte)) + if (!stage2_pte_cacheable(pgt, pte)) return 0; - pte_follow = kvm_pte_follow(pte, mm_ops); - dcache_clean_inval_poc((unsigned long)pte_follow, - (unsigned long)pte_follow + - kvm_granule_size(level)); + if (mm_ops->dcache_clean_inval_poc) + mm_ops->dcache_clean_inval_poc(kvm_pte_follow(pte, mm_ops), + kvm_granule_size(level)); return 0; } @@ -1116,13 +1172,13 @@ int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size) } -int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_arch *arch, +int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu, struct kvm_pgtable_mm_ops *mm_ops, enum kvm_pgtable_stage2_flags flags, kvm_pgtable_force_pte_cb_t force_pte_cb) { size_t pgd_sz; - u64 vtcr = arch->vtcr; + u64 vtcr = mmu->arch->vtcr; u32 ia_bits = VTCR_EL2_IPA(vtcr); u32 sl0 = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr); u32 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0; @@ -1135,7 +1191,7 @@ int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_arch *arch, pgt->ia_bits = ia_bits; pgt->start_level = start_level; pgt->mm_ops = mm_ops; - pgt->mmu = &arch->mmu; + pgt->mmu = mmu; pgt->flags = flags; pgt->force_pte_cb = force_pte_cb; @@ -1144,6 +1200,15 @@ int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_arch *arch, return 0; } +size_t kvm_pgtable_stage2_pgd_size(u64 vtcr) +{ + u32 ia_bits = VTCR_EL2_IPA(vtcr); + u32 sl0 = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr); + u32 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0; + + return kvm_pgd_pages(ia_bits, start_level) * PAGE_SIZE; +} + static int stage2_free_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, enum kvm_pgtable_walk_flags flag, void * const arg) diff --git a/arch/arm64/kvm/hyp/reserved_mem.c b/arch/arm64/kvm/hyp/reserved_mem.c deleted file mode 100644 index d654921dd09b5b19c07fa4b040de2d1c1845e1d7..0000000000000000000000000000000000000000 --- a/arch/arm64/kvm/hyp/reserved_mem.c +++ /dev/null @@ -1,112 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2020 - Google LLC - * Author: Quentin Perret - */ - -#include -#include -#include - -#include - -#include -#include - -static struct memblock_region *hyp_memory = kvm_nvhe_sym(hyp_memory); -static unsigned int *hyp_memblock_nr_ptr = &kvm_nvhe_sym(hyp_memblock_nr); - -phys_addr_t hyp_mem_base; -phys_addr_t hyp_mem_size; - -static int cmp_hyp_memblock(const void *p1, const void *p2) -{ - const struct memblock_region *r1 = p1; - const struct memblock_region *r2 = p2; - - return r1->base < r2->base ? -1 : (r1->base > r2->base); -} - -static void __init sort_memblock_regions(void) -{ - sort(hyp_memory, - *hyp_memblock_nr_ptr, - sizeof(struct memblock_region), - cmp_hyp_memblock, - NULL); -} - -static int __init register_memblock_regions(void) -{ - struct memblock_region *reg; - - for_each_mem_region(reg) { - if (*hyp_memblock_nr_ptr >= HYP_MEMBLOCK_REGIONS) - return -ENOMEM; - - hyp_memory[*hyp_memblock_nr_ptr] = *reg; - (*hyp_memblock_nr_ptr)++; - } - sort_memblock_regions(); - - return 0; -} - -void __init kvm_hyp_reserve(void) -{ - u64 nr_pages, prev, hyp_mem_pages = 0; - int ret; - - if (!is_hyp_mode_available() || is_kernel_in_hyp_mode()) - return; - - if (kvm_get_mode() != KVM_MODE_PROTECTED) - return; - - ret = register_memblock_regions(); - if (ret) { - *hyp_memblock_nr_ptr = 0; - kvm_err("Failed to register hyp memblocks: %d\n", ret); - return; - } - - hyp_mem_pages += hyp_s1_pgtable_pages(); - hyp_mem_pages += host_s2_pgtable_pages(); - - /* - * The hyp_vmemmap needs to be backed by pages, but these pages - * themselves need to be present in the vmemmap, so compute the number - * of pages needed by looking for a fixed point. - */ - nr_pages = 0; - do { - prev = nr_pages; - nr_pages = hyp_mem_pages + prev; - nr_pages = DIV_ROUND_UP(nr_pages * sizeof(struct hyp_page), PAGE_SIZE); - nr_pages += __hyp_pgtable_max_pages(nr_pages); - } while (nr_pages != prev); - hyp_mem_pages += nr_pages; - - /* - * Try to allocate a PMD-aligned region to reduce TLB pressure once - * this is unmapped from the host stage-2, and fallback to PAGE_SIZE. - */ - hyp_mem_size = hyp_mem_pages << PAGE_SHIFT; - hyp_mem_base = memblock_find_in_range(0, memblock_end_of_DRAM(), - ALIGN(hyp_mem_size, PMD_SIZE), - PMD_SIZE); - if (!hyp_mem_base) - hyp_mem_base = memblock_find_in_range(0, memblock_end_of_DRAM(), - hyp_mem_size, PAGE_SIZE); - else - hyp_mem_size = ALIGN(hyp_mem_size, PMD_SIZE); - - if (!hyp_mem_base) { - kvm_err("Failed to reserve hyp memory\n"); - return; - } - memblock_reserve(hyp_mem_base, hyp_mem_size); - - kvm_info("Reserved %lld MiB at 0x%llx\n", hyp_mem_size >> 20, - hyp_mem_base); -} diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c index 39f8f7f9227c3eafabfea247bbe24b42ba9e6e0b..d5a51ea5459c936a0a748d679737ee6523f5e668 100644 --- a/arch/arm64/kvm/hyp/vgic-v3-sr.c +++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c @@ -330,7 +330,7 @@ void __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if) write_gicreg(0, ICH_HCR_EL2); } -void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if) +static void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if) { u64 val; u32 nr_pre_bits; @@ -363,7 +363,7 @@ void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if) } } -void __vgic_v3_restore_aprs(struct vgic_v3_cpu_if *cpu_if) +static void __vgic_v3_restore_aprs(struct vgic_v3_cpu_if *cpu_if) { u64 val; u32 nr_pre_bits; @@ -455,16 +455,35 @@ u64 __vgic_v3_get_gic_config(void) return val; } -u64 __vgic_v3_read_vmcr(void) +static u64 __vgic_v3_read_vmcr(void) { return read_gicreg(ICH_VMCR_EL2); } -void __vgic_v3_write_vmcr(u32 vmcr) +static void __vgic_v3_write_vmcr(u32 vmcr) { write_gicreg(vmcr, ICH_VMCR_EL2); } +void __vgic_v3_save_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if) +{ + __vgic_v3_save_aprs(cpu_if); + if (cpu_if->vgic_sre) + cpu_if->vgic_vmcr = __vgic_v3_read_vmcr(); +} + +void __vgic_v3_restore_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if) +{ + /* + * If dealing with a GICv2 emulation on GICv3, VMCR_EL2.VFIQen + * is dependent on ICC_SRE_EL1.SRE, and we have to perform the + * VMCR_EL2 save/restore in the world switch. + */ + if (cpu_if->vgic_sre) + __vgic_v3_write_vmcr(cpu_if->vgic_vmcr); + __vgic_v3_restore_aprs(cpu_if); +} + static int __vgic_v3_bpr_min(void) { /* See Pseudocode for VPriorityGroup */ @@ -695,9 +714,7 @@ static void __vgic_v3_read_iar(struct kvm_vcpu *vcpu, u32 vmcr, int rt) goto spurious; lr_val &= ~ICH_LR_STATE; - /* No active state for LPIs */ - if ((lr_val & ICH_LR_VIRTUAL_ID_MASK) <= VGIC_MAX_SPI) - lr_val |= ICH_LR_ACTIVE_BIT; + lr_val |= ICH_LR_ACTIVE_BIT; __gic_v3_set_lr(lr_val, lr); __vgic_v3_set_active_priority(lr_prio, vmcr, grp); vcpu_set_reg(vcpu, rt, lr_val & ICH_LR_VIRTUAL_ID_MASK); @@ -764,20 +781,18 @@ static void __vgic_v3_write_eoir(struct kvm_vcpu *vcpu, u32 vmcr, int rt) /* Drop priority in any case */ act_prio = __vgic_v3_clear_highest_active_priority(); - /* If EOIing an LPI, no deactivate to be performed */ - if (vid >= VGIC_MIN_LPI) - return; - - /* EOImode == 1, nothing to be done here */ - if (vmcr & ICH_VMCR_EOIM_MASK) - return; - lr = __vgic_v3_find_active_lr(vcpu, vid, &lr_val); if (lr == -1) { - __vgic_v3_bump_eoicount(); + /* Do not bump EOIcount for LPIs that aren't in the LRs */ + if (!(vid >= VGIC_MIN_LPI)) + __vgic_v3_bump_eoicount(); return; } + /* EOImode == 1 and not an LPI, nothing to be done here */ + if ((vmcr & ICH_VMCR_EOIM_MASK) && !(vid >= VGIC_MIN_LPI)) + return; + lr_prio = (lr_val & ICH_LR_PRIORITY_MASK) >> ICH_LR_PRIORITY_SHIFT; /* If priorities or group do not match, the guest has fscked-up. */ @@ -988,7 +1003,8 @@ static void __vgic_v3_read_ctlr(struct kvm_vcpu *vcpu, u32 vmcr, int rt) /* IDbits */ val |= ((vtr >> 23) & 7) << ICC_CTLR_EL1_ID_BITS_SHIFT; /* SEIS */ - val |= ((vtr >> 22) & 1) << ICC_CTLR_EL1_SEIS_SHIFT; + if (kvm_vgic_global_state.ich_vtr_el2 & ICH_VTR_SEIS_MASK) + val |= BIT(ICC_CTLR_EL1_SEIS_SHIFT); /* A3V */ val |= ((vtr >> 21) & 1) << ICC_CTLR_EL1_A3V_SHIFT; /* EOImode */ diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index ded2c66675f06fda66f8f666e28e6c901827c015..54af47005e456caf7d469eb8fb0e2433ec28353e 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include @@ -25,6 +26,7 @@ #include #include #include +#include /* VHE specific context */ DEFINE_PER_CPU(struct kvm_host_data, kvm_host_data); @@ -68,7 +70,7 @@ NOKPROBE_SYMBOL(__activate_traps); static void __deactivate_traps(struct kvm_vcpu *vcpu) { - extern char vectors[]; /* kernel exception vectors */ + const char *host_vectors = vectors; ___deactivate_traps(vcpu); @@ -82,7 +84,10 @@ static void __deactivate_traps(struct kvm_vcpu *vcpu) asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT)); write_sysreg(CPACR_EL1_DEFAULT, cpacr_el1); - write_sysreg(vectors, vbar_el1); + + if (!arm64_kernel_unmapped_at_el0()) + host_vectors = __this_cpu_read(this_cpu_vector); + write_sysreg(host_vectors, vbar_el1); } NOKPROBE_SYMBOL(__deactivate_traps); @@ -96,6 +101,26 @@ void deactivate_traps_vhe_put(struct kvm_vcpu *vcpu) __deactivate_traps_common(vcpu); } +static const exit_handler_fn hyp_exit_handlers[] = { + [0 ... ESR_ELx_EC_MAX] = NULL, + [ESR_ELx_EC_CP15_32] = kvm_hyp_handle_cp15_32, + [ESR_ELx_EC_SYS64] = kvm_hyp_handle_sysreg, + [ESR_ELx_EC_SVE] = kvm_hyp_handle_fpsimd, + [ESR_ELx_EC_FP_ASIMD] = kvm_hyp_handle_fpsimd, + [ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low, + [ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low, + [ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth, +}; + +static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu) +{ + return hyp_exit_handlers; +} + +static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code) +{ +} + /* Switch to the guest for VHE systems running in EL2 */ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) { diff --git a/arch/arm64/kvm/hypercalls.c b/arch/arm64/kvm/hypercalls.c index 30da78f72b3b38f6abb427d9b2581b552bfd6f2c..bb04eb91e5e61b1bc42394190d0747d11d5ca6e5 100644 --- a/arch/arm64/kvm/hypercalls.c +++ b/arch/arm64/kvm/hypercalls.c @@ -107,6 +107,18 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu) break; } break; + case ARM_SMCCC_ARCH_WORKAROUND_3: + switch (arm64_get_spectre_bhb_state()) { + case SPECTRE_VULNERABLE: + break; + case SPECTRE_MITIGATED: + val[0] = SMCCC_RET_SUCCESS; + break; + case SPECTRE_UNAFFECTED: + val[0] = SMCCC_ARCH_WORKAROUND_RET_UNAFFECTED; + break; + } + break; case ARM_SMCCC_HV_PV_TIME_FEATURES: val[0] = SMCCC_RET_SUCCESS; break; @@ -133,6 +145,10 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu) case ARM_SMCCC_VENDOR_HYP_KVM_PTP_FUNC_ID: kvm_ptp_get_time(vcpu, val); break; + case ARM_SMCCC_VENDOR_HYP_KVM_MMIO_GUARD_MAP_FUNC_ID: + if (kvm_vm_is_protected(vcpu->kvm) && !topup_hyp_memcache(vcpu)) + val[0] = SMCCC_RET_SUCCESS; + break; case ARM_SMCCC_TRNG_VERSION: case ARM_SMCCC_TRNG_FEATURES: case ARM_SMCCC_TRNG_GET_UUID: diff --git a/arch/arm64/kvm/iommu.c b/arch/arm64/kvm/iommu.c new file mode 100644 index 0000000000000000000000000000000000000000..6ca171327b289b01ba211c073c1c790632cf7ebe --- /dev/null +++ b/arch/arm64/kvm/iommu.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2022 - Google LLC + * Author: David Brazdil + */ + +#include + +static unsigned long dev_to_id(struct device *dev) +{ + /* Use the struct device pointer as a unique identifier. */ + return (unsigned long)dev; +} + +int pkvm_iommu_driver_init(enum pkvm_iommu_driver_id id, void *data, size_t size) +{ + return kvm_call_hyp_nvhe(__pkvm_iommu_driver_init, id, data, size); +} + +int pkvm_iommu_register(struct device *dev, enum pkvm_iommu_driver_id drv_id, + phys_addr_t pa, size_t size, struct device *parent) +{ + void *mem; + int ret; + + /* + * Hypcall to register the device. It will return -ENOMEM if it needs + * more memory. In that case allocate a page and retry. + * We assume that hyp never allocates more than a page per hypcall. + */ + ret = kvm_call_hyp_nvhe(__pkvm_iommu_register, dev_to_id(dev), + drv_id, pa, size, dev_to_id(parent), NULL, 0); + if (ret == -ENOMEM) { + mem = (void *)__get_free_page(GFP_KERNEL); + if (!mem) + return -ENOMEM; + + ret = kvm_call_hyp_nvhe(__pkvm_iommu_register, dev_to_id(dev), + drv_id, pa, size, dev_to_id(parent), + mem, PAGE_SIZE); + } + return ret; +} + +int pkvm_iommu_suspend(struct device *dev) +{ + return kvm_call_hyp_nvhe(__pkvm_iommu_pm_notify, dev_to_id(dev), + PKVM_IOMMU_PM_SUSPEND); +} +EXPORT_SYMBOL_GPL(pkvm_iommu_suspend); + +int pkvm_iommu_resume(struct device *dev) +{ + return kvm_call_hyp_nvhe(__pkvm_iommu_pm_notify, dev_to_id(dev), + PKVM_IOMMU_PM_RESUME); +} +EXPORT_SYMBOL_GPL(pkvm_iommu_resume); + +int pkvm_iommu_finalize(void) +{ + return kvm_call_hyp_nvhe(__pkvm_iommu_finalize); +} +EXPORT_SYMBOL_GPL(pkvm_iommu_finalize); diff --git a/arch/arm64/kvm/iommu/Makefile b/arch/arm64/kvm/iommu/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..cb3242c77b4cc664fa8dcf86681dc4d8149c3bd8 --- /dev/null +++ b/arch/arm64/kvm/iommu/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for Kernel-based Virtual Machine module +# + +obj-$(CONFIG_KVM_S2MPU) += s2mpu.o +obj-$(CONFIG_TEST_KVM_S2MPU) += test_kvm_s2mpu.o diff --git a/arch/arm64/kvm/iommu/s2mpu.c b/arch/arm64/kvm/iommu/s2mpu.c new file mode 100644 index 0000000000000000000000000000000000000000..733451d741007eb6a26023c48d6d695d687052f2 --- /dev/null +++ b/arch/arm64/kvm/iommu/s2mpu.c @@ -0,0 +1,120 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2021 - Google LLC + * Author: David Brazdil + */ + +#include +#include + +static int init_s2mpu_driver(void) +{ + static DEFINE_MUTEX(lock); + static bool init_done; + + struct mpt *mpt; + unsigned int gb; + unsigned long addr; + u64 pfn; + int ret = 0; + + mutex_lock(&lock); + if (init_done) + goto out; + + /* Allocate a page for driver data. Must fit MPT descriptor. */ + BUILD_BUG_ON(sizeof(*mpt) > PAGE_SIZE); + addr = __get_free_page(GFP_KERNEL); + if (!addr) { + ret = -ENOMEM; + goto out; + } + + mpt = (struct mpt *)addr; + + /* Allocate SMPT buffers. */ + for_each_gb(gb) { + addr = __get_free_pages(GFP_KERNEL, SMPT_ORDER); + if (!addr) { + ret = -ENOMEM; + goto out_free; + } + mpt->fmpt[gb].smpt = (u32 *)addr; + } + + /* Share MPT descriptor with hyp. */ + pfn = __pa(mpt) >> PAGE_SHIFT; + ret = kvm_call_hyp_nvhe(__pkvm_host_share_hyp, pfn); + if (ret) + goto out_free; + + /* Hypercall to initialize EL2 driver. */ + ret = pkvm_iommu_driver_init(PKVM_IOMMU_DRIVER_S2MPU, mpt, sizeof(*mpt)); + if (ret) + goto out_unshare; + + init_done = true; + +out_unshare: + WARN_ON(kvm_call_hyp_nvhe(__pkvm_host_unshare_hyp, pfn)); +out_free: + /* TODO - will driver return the memory? */ + if (ret) { + for_each_gb(gb) + free_pages((unsigned long)mpt->fmpt[gb].smpt, SMPT_ORDER); + free_page((unsigned long)mpt); + } +out: + mutex_unlock(&lock); + return ret; +} + +int pkvm_iommu_s2mpu_register(struct device *dev, phys_addr_t addr) +{ + int ret; + + if (!is_protected_kvm_enabled()) + return -ENODEV; + + ret = init_s2mpu_driver(); + if (ret) + return ret; + + return pkvm_iommu_register(dev, PKVM_IOMMU_DRIVER_S2MPU, + addr, S2MPU_MMIO_SIZE, NULL); +} +EXPORT_SYMBOL_GPL(pkvm_iommu_s2mpu_register); + +static int init_sysmmu_sync_driver(void) +{ + static DEFINE_MUTEX(lock); + static bool init_done; + + int ret = 0; + + mutex_lock(&lock); + if (!init_done) { + ret = pkvm_iommu_driver_init(PKVM_IOMMU_DRIVER_SYSMMU_SYNC, NULL, 0); + init_done = !ret; + } + mutex_unlock(&lock); + return ret; +} + +int pkvm_iommu_sysmmu_sync_register(struct device *dev, phys_addr_t addr, + struct device *parent) +{ + int ret; + + if (!is_protected_kvm_enabled()) + return -ENODEV; + + ret = init_sysmmu_sync_driver(); + if (ret) + return ret; + + return pkvm_iommu_register(dev, PKVM_IOMMU_DRIVER_SYSMMU_SYNC, + addr + SYSMMU_SYNC_S2_OFFSET, + SYSMMU_SYNC_S2_MMIO_SIZE, parent); +} +EXPORT_SYMBOL_GPL(pkvm_iommu_sysmmu_sync_register); diff --git a/arch/arm64/kvm/iommu/test_kvm_s2mpu.c b/arch/arm64/kvm/iommu/test_kvm_s2mpu.c new file mode 100644 index 0000000000000000000000000000000000000000..fbc500de588e0295698871b90ffa49d5f113e09b --- /dev/null +++ b/arch/arm64/kvm/iommu/test_kvm_s2mpu.c @@ -0,0 +1,283 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2021 - Google LLC + * Author: David Brazdil + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include "../../../tools/testing/selftests/kselftest_module.h" + +#include + +#include + +KSTM_MODULE_GLOBALS(); + +#define ASSERT(cond) \ + do { \ + if (!(cond)) { \ + pr_err("line %d: assertion failed: %s\n", \ + __LINE__, #cond); \ + return -1; \ + } \ + } while (0) + +static struct fmpt g_fmpt; +static u32 g_smpt[SMPT_NUM_WORDS]; + +static void __init init_smpt(enum mpt_prot prot) +{ + memset(g_smpt, (char)mpt_prot_doubleword[prot], SMPT_SIZE); +} + +static void __init init_fmpt(enum mpt_prot prot, bool gran_1g) +{ + init_smpt(prot); + g_fmpt = (struct fmpt){ + .gran_1g = gran_1g, + .prot = prot, + .smpt = g_smpt, + }; +} + +static enum mpt_prot __init get_prot_at(size_t gb_byte_off) +{ + size_t page_idx = gb_byte_off / SMPT_GRAN; + size_t word_idx = page_idx / SMPT_ELEMS_PER_WORD; + size_t bit_shift = (page_idx % SMPT_ELEMS_PER_WORD) * MPT_PROT_BITS; + + return (g_smpt[word_idx] >> bit_shift) & MPT_PROT_MASK; +} + +static bool __init check_smpt(size_t start_byte, size_t end_byte, + enum mpt_prot prot_out, enum mpt_prot prot_in) +{ + size_t off; + + for (off = 0; off < start_byte; off += PAGE_SIZE) { + if (get_prot_at(off) != prot_out) + return false; + } + for (off = start_byte; off < end_byte; off += PAGE_SIZE) { + if (get_prot_at(off) != prot_in) + return false; + } + for (off = end_byte; off < SZ_1G; off += PAGE_SIZE) { + if (get_prot_at(off) != prot_out) + return false; + } + return true; +} + +/* Start with 1G granule, overwrite the whole 1G. */ +static int __init test_set_fmpt__fmpt_to_fmpt_whole(void) +{ + init_fmpt(MPT_PROT_NONE, /*gran_1g*/ true); + __set_fmpt_range(&g_fmpt, 0, SZ_1G, MPT_PROT_R); + ASSERT(g_fmpt.flags == MPT_UPDATE_L1); + ASSERT(g_fmpt.gran_1g); + ASSERT(g_fmpt.prot == MPT_PROT_R); + return 0; +} + +/* Start with 1G granule, overwrite the whole 1G with the same prot. */ +static int __init test_set_fmpt__fmpt_no_change_whole(void) +{ + init_fmpt(MPT_PROT_R, /*gran_1g*/ true); + __set_fmpt_range(&g_fmpt, 0, SZ_1G, MPT_PROT_R); + ASSERT(g_fmpt.flags == 0); + ASSERT(g_fmpt.gran_1g); + ASSERT(g_fmpt.prot == MPT_PROT_R); + return 0; +} + +/* Start with 1G granule, partially overwrite with the same prot. */ +static int __init test_set_fmpt__fmpt_no_change_partial(void) +{ + init_fmpt(MPT_PROT_R, /*gran_1g*/ true); + __set_fmpt_range(&g_fmpt, 0, PAGE_SIZE, MPT_PROT_R); + ASSERT(g_fmpt.flags == 0); + ASSERT(g_fmpt.gran_1g); + ASSERT(g_fmpt.prot == MPT_PROT_R); + return 0; +} + +/* Convert from 1G to PAGE_SIZE granule. */ +static int __init test_set_fmpt__fmpt_to_smpt(void) +{ + size_t start = 5 * SMPT_WORD_BYTE_RANGE / 2; + size_t end = 20 * SMPT_WORD_BYTE_RANGE; + + init_fmpt(MPT_PROT_R, /*gran_1g*/ true); + __set_fmpt_range(&g_fmpt, start, end, MPT_PROT_RW); + ASSERT(g_fmpt.flags == (MPT_UPDATE_L1 | MPT_UPDATE_L2)); + ASSERT(!g_fmpt.gran_1g); + return check_smpt(start, end, MPT_PROT_R, MPT_PROT_RW) ? 0 : 1; +} + +/* Convert from PAGE_SIZE to 1G granule by overwriting the whole 1G. */ +static int __init test_set_fmpt__smpt_to_fmpt_whole(void) +{ + init_fmpt(MPT_PROT_NONE, /*gran_1g*/ false); + __set_fmpt_range(&g_fmpt, 0, SZ_1G, MPT_PROT_R); + ASSERT(g_fmpt.flags == MPT_UPDATE_L1); + ASSERT(g_fmpt.gran_1g); + ASSERT(g_fmpt.prot == MPT_PROT_R); + return 0; +} + +/* Convert from PAGE_SIZE to 1G granule by making the SMPT uniform. */ +static int __init test_set_fmpt__smpt_to_fmpt_partial(void) +{ + size_t start = 5 * SMPT_WORD_BYTE_RANGE / 2; + size_t end = 20 * SMPT_WORD_BYTE_RANGE; + + /* Create SMPT with all PROT_W except a small subrange. */ + init_fmpt(MPT_PROT_W, /*gran_1g*/ false); + __set_smpt_range(g_smpt, start, end, MPT_PROT_RW); + + /* Fill the subrange with PROT_W to make the SMPT uniform. */ + __set_fmpt_range(&g_fmpt, start, end, MPT_PROT_W); + ASSERT(g_fmpt.flags == MPT_UPDATE_L1); + ASSERT(g_fmpt.gran_1g); + ASSERT(g_fmpt.prot == MPT_PROT_W); + return 0; +} + +/* Keep PAGE_SIZE granule when SMPT not uniform after update. */ +static int __init test_set_fmpt__smpt_to_smpt(void) +{ + size_t start = SZ_1G - SMPT_GRAN; + size_t end = SZ_1G; + + init_fmpt(MPT_PROT_NONE, /*gran_1g*/ false); + ASSERT(__is_smpt_uniform(g_smpt, MPT_PROT_NONE)); + + /* Fill the subrange with PROT_W to make the SMPT uniform. */ + __set_fmpt_range(&g_fmpt, start, end, MPT_PROT_RW); + ASSERT(g_fmpt.flags == MPT_UPDATE_L2); + ASSERT(!g_fmpt.gran_1g); + ASSERT(!__is_smpt_uniform(g_smpt, MPT_PROT_NONE)); + return 0; +} + +static int __init __test_set_smpt(size_t start_byte, size_t end_byte) +{ + init_smpt(MPT_PROT_NONE); + __set_smpt_range(g_smpt, start_byte, end_byte, MPT_PROT_W); + return check_smpt(start_byte, end_byte, MPT_PROT_NONE, MPT_PROT_W) ? 0 : 1; +} + +/* Range within one SMPT word, force a fallback to __set_smpt_range_slow. */ +static int __init test_set_smpt__within_one_word(void) +{ + return __test_set_smpt(3 * SMPT_WORD_BYTE_RANGE + 5 * PAGE_SIZE, + 3 * SMPT_WORD_BYTE_RANGE + 6 * PAGE_SIZE); +} + +/* No whole SMPT word, force a fallback to __set_smpt_range_slow. */ +static int __init test_set_smpt__no_whole_word(void) +{ + return __test_set_smpt(3 * SMPT_WORD_BYTE_RANGE + 5 * PAGE_SIZE, + 4 * SMPT_WORD_BYTE_RANGE + 2 * PAGE_SIZE); +} + +/* Both start and end aligned to SMPT word. */ +static int __init test_set_smpt__no_prologue_or_epilogue(void) +{ + return __test_set_smpt(10 * SMPT_WORD_BYTE_RANGE, + 20 * SMPT_WORD_BYTE_RANGE); +} + +/* Start not aligned to SMPT word. */ +static int __init test_set_smpt__prologue(void) +{ + return __test_set_smpt(17 * SMPT_WORD_BYTE_RANGE / 2, + 20 * SMPT_WORD_BYTE_RANGE); +} + +/* End not aligned to SMPT word. */ +static int __init test_set_smpt__epilogue(void) +{ + return __test_set_smpt(0, 17 * SMPT_WORD_BYTE_RANGE / 2); +} + +/* Neither start nor end aligned to SMPT word. */ +static int __init test_set_smpt__prologue_and_epilogue(void) +{ + return __test_set_smpt(17 * SMPT_WORD_BYTE_RANGE / 2, + 31 * SMPT_WORD_BYTE_RANGE / 2); +} + +static int __init __test_set_smpt_slow(size_t start_byte, size_t end_byte) +{ + init_smpt(MPT_PROT_NONE); + __set_smpt_range_slow(g_smpt, start_byte, end_byte, MPT_PROT_RW); + return check_smpt(start_byte, end_byte, MPT_PROT_NONE, MPT_PROT_RW) ? 0 : 1; +} + +static int __init test_set_smpt_slow__empty_word_align(void) +{ + return __test_set_smpt_slow(3 * SMPT_WORD_BYTE_RANGE, + 3 * SMPT_WORD_BYTE_RANGE); +} + +static int __init test_set_smpt_slow__empty_page_align(void) +{ + return __test_set_smpt_slow(3 * SMPT_WORD_BYTE_RANGE + PAGE_SIZE, + 3 * SMPT_WORD_BYTE_RANGE + PAGE_SIZE); +} + +static int __init test_set_smpt_slow__one_whole_word(void) +{ + return __test_set_smpt_slow(3 * SMPT_WORD_BYTE_RANGE, + 4 * SMPT_WORD_BYTE_RANGE); +} + +static int __init test_set_smpt_slow__one_partial_word(void) +{ + return __test_set_smpt_slow(3 * SMPT_WORD_BYTE_RANGE + PAGE_SIZE, + 4 * SMPT_WORD_BYTE_RANGE - PAGE_SIZE); +} + +static int __init test_set_smpt_slow__multiple_whole_words(void) +{ + return __test_set_smpt_slow(13 * SMPT_WORD_BYTE_RANGE, + 17 * SMPT_WORD_BYTE_RANGE); +} + +static int __init test_set_smpt_slow__multiple_partial_words(void) +{ + return __test_set_smpt_slow((13 * 2 + 1) * SMPT_WORD_BYTE_RANGE / 2, + (17 * 4 + 1) * SMPT_WORD_BYTE_RANGE / 4); +} + +static void __init selftest(void) +{ + KSTM_CHECK_ZERO(test_set_fmpt__fmpt_to_fmpt_whole()); + KSTM_CHECK_ZERO(test_set_fmpt__fmpt_no_change_whole()); + KSTM_CHECK_ZERO(test_set_fmpt__fmpt_no_change_partial()); + KSTM_CHECK_ZERO(test_set_fmpt__fmpt_to_smpt()); + KSTM_CHECK_ZERO(test_set_fmpt__smpt_to_fmpt_whole()); + KSTM_CHECK_ZERO(test_set_fmpt__smpt_to_fmpt_partial()); + KSTM_CHECK_ZERO(test_set_fmpt__smpt_to_smpt()); + + KSTM_CHECK_ZERO(test_set_smpt__within_one_word()); + KSTM_CHECK_ZERO(test_set_smpt__no_whole_word()); + KSTM_CHECK_ZERO(test_set_smpt__no_prologue_or_epilogue()); + KSTM_CHECK_ZERO(test_set_smpt__prologue()); + KSTM_CHECK_ZERO(test_set_smpt__epilogue()); + KSTM_CHECK_ZERO(test_set_smpt__prologue_and_epilogue()); + + KSTM_CHECK_ZERO(test_set_smpt_slow__empty_word_align()); + KSTM_CHECK_ZERO(test_set_smpt_slow__empty_page_align()); + KSTM_CHECK_ZERO(test_set_smpt_slow__one_whole_word()); + KSTM_CHECK_ZERO(test_set_smpt_slow__one_partial_word()); + KSTM_CHECK_ZERO(test_set_smpt_slow__multiple_whole_words()); + KSTM_CHECK_ZERO(test_set_smpt_slow__multiple_partial_words()); +} + +KSTM_MODULE_LOADERS(test_kvm_s2mpu); +MODULE_AUTHOR("David Brazdil "); +MODULE_LICENSE("GPL v2"); diff --git a/arch/arm64/kvm/mmio.c b/arch/arm64/kvm/mmio.c index 3e2d8ba11a02780a3a1bf8da495d659cc913aae8..db6630c70f8b545076f8fb4e3db96b39e8f0a0cc 100644 --- a/arch/arm64/kvm/mmio.c +++ b/arch/arm64/kvm/mmio.c @@ -133,9 +133,19 @@ int io_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa) /* * No valid syndrome? Ask userspace for help if it has * volunteered to do so, and bail out otherwise. + * + * In the protected VM case, there isn't much userspace can do + * though, so directly deliver an exception to the guest. */ if (!kvm_vcpu_dabt_isvalid(vcpu)) { - if (vcpu->kvm->arch.return_nisv_io_abort_to_user) { + if (is_protected_kvm_enabled() && + kvm_vm_is_protected(vcpu->kvm)) { + kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu)); + return 1; + } + + if (test_bit(KVM_ARCH_FLAG_RETURN_NISV_IO_ABORT_TO_USER, + &vcpu->kvm->arch.flags)) { run->exit_reason = KVM_EXIT_ARM_NISV; run->arm_nisv.esr_iss = kvm_vcpu_dabt_iss_nisv_sanitized(vcpu); run->arm_nisv.fault_ipa = fault_ipa; diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 98c6c38ac8a85dd8fcca98997cf11cb495108db2..14f69c7fa141e1ac4c26b1586a404e07f41d823c 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -190,6 +190,22 @@ static void unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 si __unmap_stage2_range(mmu, start, size, true); } +static void pkvm_stage2_flush(struct kvm *kvm) +{ + struct kvm_pinned_page *ppage; + + /* + * Contrary to stage2_apply_range(), we don't need to check + * whether the VM is being torn down, as this is always called + * from a vcpu thread, and the list is only ever freed on VM + * destroy (which only occurs when all vcpu are gone). + */ + list_for_each_entry(ppage, &kvm->arch.pkvm.pinned_pages, link) { + __clean_dcache_guest_page(page_address(ppage->page), PAGE_SIZE); + cond_resched_lock(&kvm->mmu_lock); + } +} + static void stage2_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot) { @@ -215,9 +231,13 @@ static void stage2_flush_vm(struct kvm *kvm) idx = srcu_read_lock(&kvm->srcu); spin_lock(&kvm->mmu_lock); - slots = kvm_memslots(kvm); - kvm_for_each_memslot(memslot, slots) - stage2_flush_memslot(kvm, memslot); + if (!is_protected_kvm_enabled()) { + slots = kvm_memslots(kvm); + kvm_for_each_memslot(memslot, slots) + stage2_flush_memslot(kvm, memslot); + } else if (!kvm_vm_is_protected(kvm)) { + pkvm_stage2_flush(kvm); + } spin_unlock(&kvm->mmu_lock); srcu_read_unlock(&kvm->srcu, idx); @@ -239,6 +259,9 @@ void free_hyp_pgds(void) static bool kvm_host_owns_hyp_mappings(void) { + if (is_kernel_in_hyp_mode()) + return false; + if (static_branch_likely(&kvm_protected_mode_initialized)) return false; @@ -281,14 +304,117 @@ static phys_addr_t kvm_kaddr_to_phys(void *kaddr) } } -static int pkvm_share_hyp(phys_addr_t start, phys_addr_t end) +struct hyp_shared_pfn { + u64 pfn; + int count; + struct rb_node node; +}; + +static DEFINE_MUTEX(hyp_shared_pfns_lock); +static struct rb_root hyp_shared_pfns = RB_ROOT; + +static struct hyp_shared_pfn *find_shared_pfn(u64 pfn, struct rb_node ***node, + struct rb_node **parent) { - phys_addr_t addr; + struct hyp_shared_pfn *this; + + *node = &hyp_shared_pfns.rb_node; + *parent = NULL; + while (**node) { + this = container_of(**node, struct hyp_shared_pfn, node); + *parent = **node; + if (this->pfn < pfn) + *node = &((**node)->rb_left); + else if (this->pfn > pfn) + *node = &((**node)->rb_right); + else + return this; + } + + return NULL; +} + +static int share_pfn_hyp(u64 pfn) +{ + struct rb_node **node, *parent; + struct hyp_shared_pfn *this; + int ret = 0; + + mutex_lock(&hyp_shared_pfns_lock); + this = find_shared_pfn(pfn, &node, &parent); + if (this) { + this->count++; + goto unlock; + } + + this = kzalloc(sizeof(*this), GFP_KERNEL); + if (!this) { + ret = -ENOMEM; + goto unlock; + } + + this->pfn = pfn; + this->count = 1; + rb_link_node(&this->node, parent, node); + rb_insert_color(&this->node, &hyp_shared_pfns); + ret = kvm_call_hyp_nvhe(__pkvm_host_share_hyp, pfn, 1); +unlock: + mutex_unlock(&hyp_shared_pfns_lock); + + return ret; +} + +static int unshare_pfn_hyp(u64 pfn) +{ + struct rb_node **node, *parent; + struct hyp_shared_pfn *this; + int ret = 0; + + mutex_lock(&hyp_shared_pfns_lock); + this = find_shared_pfn(pfn, &node, &parent); + if (WARN_ON(!this)) { + ret = -ENOENT; + goto unlock; + } + + this->count--; + if (this->count) + goto unlock; + + rb_erase(&this->node, &hyp_shared_pfns); + kfree(this); + ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_hyp, pfn, 1); +unlock: + mutex_unlock(&hyp_shared_pfns_lock); + + return ret; +} + +int kvm_share_hyp(void *from, void *to) +{ + phys_addr_t start, end, cur; + u64 pfn; int ret; - for (addr = ALIGN_DOWN(start, PAGE_SIZE); addr < end; addr += PAGE_SIZE) { - ret = kvm_call_hyp_nvhe(__pkvm_host_share_hyp, - __phys_to_pfn(addr)); + if (is_kernel_in_hyp_mode()) + return 0; + + /* + * The share hcall maps things in the 'fixed-offset' region of the hyp + * VA space, so we can only share physically contiguous data-structures + * for now. + */ + if (is_vmalloc_or_module_addr(from) || is_vmalloc_or_module_addr(to)) + return -EINVAL; + + if (kvm_host_owns_hyp_mappings()) + return create_hyp_mappings(from, to, PAGE_HYP); + + start = ALIGN_DOWN(__pa(from), PAGE_SIZE); + end = PAGE_ALIGN(__pa(to)); + for (cur = start; cur < end; cur += PAGE_SIZE) { + pfn = __phys_to_pfn(cur); + ret = share_pfn_hyp(pfn); if (ret) return ret; } @@ -296,6 +422,22 @@ static int pkvm_share_hyp(phys_addr_t start, phys_addr_t end) return 0; } +void kvm_unshare_hyp(void *from, void *to) +{ + phys_addr_t start, end, cur; + u64 pfn; + + if (is_kernel_in_hyp_mode() || kvm_host_owns_hyp_mappings() || !from) + return; + + start = ALIGN_DOWN(__pa(from), PAGE_SIZE); + end = PAGE_ALIGN(__pa(to)); + for (cur = start; cur < end; cur += PAGE_SIZE) { + pfn = __phys_to_pfn(cur); + WARN_ON(unshare_pfn_hyp(pfn)); + } +} + /** * create_hyp_mappings - duplicate a kernel virtual address range in Hyp mode * @from: The virtual kernel start address of the range @@ -316,12 +458,8 @@ int create_hyp_mappings(void *from, void *to, enum kvm_pgtable_prot prot) if (is_kernel_in_hyp_mode()) return 0; - if (!kvm_host_owns_hyp_mappings()) { - if (WARN_ON(prot != PAGE_HYP)) - return -EPERM; - return pkvm_share_hyp(kvm_kaddr_to_phys(from), - kvm_kaddr_to_phys(to)); - } + if (!kvm_host_owns_hyp_mappings()) + return -EPERM; start = start & PAGE_MASK; end = PAGE_ALIGN(end); @@ -407,6 +545,9 @@ int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size, unsigned long addr; int ret; + if (is_protected_kvm_enabled()) + return -EPERM; + *kaddr = ioremap(phys_addr, size); if (!*kaddr) return -ENOMEM; @@ -497,26 +638,56 @@ static struct kvm_pgtable_mm_ops kvm_s2_mm_ops = { * kvm_init_stage2_mmu - Initialise a S2 MMU strucrure * @kvm: The pointer to the KVM structure * @mmu: The pointer to the s2 MMU structure + * @type: The machine type of the virtual machine * * Allocates only the stage-2 HW PGD level table(s). * Note we don't need locking here as this is only called when the VM is * created, which can only be done once. */ -int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu) +int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long type) { + u32 kvm_ipa_limit = get_kvm_ipa_limit(); int cpu, err; struct kvm_pgtable *pgt; + u64 mmfr0, mmfr1; + u32 phys_shift; + + phys_shift = KVM_VM_TYPE_ARM_IPA_SIZE(type); + if (is_protected_kvm_enabled()) { + phys_shift = kvm_ipa_limit; + } else if (phys_shift) { + if (phys_shift > kvm_ipa_limit || + phys_shift < ARM64_MIN_PARANGE_BITS) + return -EINVAL; + } else { + phys_shift = KVM_PHYS_SHIFT; + if (phys_shift > kvm_ipa_limit) { + pr_warn_once("%s using unsupported default IPA limit, upgrade your VMM\n", + current->comm); + return -EINVAL; + } + } + + mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); + mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); + kvm->arch.vtcr = kvm_get_vtcr(mmfr0, mmfr1, phys_shift); + INIT_LIST_HEAD(&kvm->arch.pkvm.pinned_pages); + mmu->arch = &kvm->arch; + + if (is_protected_kvm_enabled()) + return 0; if (mmu->pgt != NULL) { kvm_err("kvm_arch already initialized?\n"); return -EINVAL; } - pgt = kzalloc(sizeof(*pgt), GFP_KERNEL); + pgt = kzalloc(sizeof(*pgt), GFP_KERNEL_ACCOUNT); if (!pgt) return -ENOMEM; - err = kvm_pgtable_stage2_init(pgt, &kvm->arch, &kvm_s2_mm_ops); + mmu->arch = &kvm->arch; + err = kvm_pgtable_stage2_init(pgt, mmu, &kvm_s2_mm_ops); if (err) goto out_free_pgtable; @@ -529,7 +700,6 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu) for_each_possible_cpu(cpu) *per_cpu_ptr(mmu->last_vcpu_ran, cpu) = -1; - mmu->arch = &kvm->arch; mmu->pgt = pgt; mmu->pgd_phys = __pa(pgt->pgd); WRITE_ONCE(mmu->vmid.vmid_gen, 0); @@ -615,6 +785,9 @@ void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu) struct kvm *kvm = kvm_s2_mmu_to_kvm(mmu); struct kvm_pgtable *pgt = NULL; + if (is_protected_kvm_enabled()) + return; + spin_lock(&kvm->mmu_lock); pgt = mmu->pgt; if (pgt) { @@ -630,6 +803,34 @@ void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu) } } +static void hyp_mc_free_fn(void *addr, void *unused) +{ + free_page((unsigned long)addr); +} + +static void *hyp_mc_alloc_fn(void *unused) +{ + return (void *)__get_free_page(GFP_KERNEL_ACCOUNT); +} + +void free_hyp_memcache(struct kvm_hyp_memcache *mc) +{ + if (is_protected_kvm_enabled()) + __free_hyp_memcache(mc, hyp_mc_free_fn, + kvm_host_va, NULL); +} + +int topup_hyp_memcache(struct kvm_vcpu *vcpu) +{ + if (!is_protected_kvm_enabled()) + return 0; + + return __topup_hyp_memcache(&vcpu->arch.pkvm_memcache, + kvm_mmu_cache_min_pages(vcpu->kvm), + hyp_mc_alloc_fn, + kvm_host_pa, NULL); +} + /** * kvm_phys_addr_ioremap - map a device range to guest IPA * @@ -650,6 +851,9 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, KVM_PGTABLE_PROT_R | (writable ? KVM_PGTABLE_PROT_W : 0); + if (is_protected_kvm_enabled()) + return -EPERM; + size += offset_in_page(guest_ipa); guest_ipa &= PAGE_MASK; @@ -939,6 +1143,100 @@ static int sanitise_mte_tags(struct kvm *kvm, kvm_pfn_t pfn, return 0; } +static int pkvm_host_donate_guest(u64 pfn, u64 gfn) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_hvc(KVM_HOST_SMCCC_FUNC(__pkvm_host_donate_guest), + pfn, gfn, &res); + WARN_ON(res.a0 != SMCCC_RET_SUCCESS); + + /* + * Getting -EPERM at this point implies that the pfn has already been + * donated. This should only ever happen when two vCPUs faulted on the + * same page, and the current one lost the race to do the donation. + */ + return (res.a1 == -EPERM) ? -EAGAIN : res.a1; +} + +static int pkvm_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, + unsigned long hva) +{ + struct mm_struct *mm = current->mm; + unsigned int flags = FOLL_HWPOISON | FOLL_LONGTERM | FOLL_WRITE; + struct kvm_pinned_page *ppage; + struct kvm *kvm = vcpu->kvm; + struct page *page; + u64 pfn; + int ret; + + ret = topup_hyp_memcache(vcpu); + if (ret) + return -ENOMEM; + + ppage = kmalloc(sizeof(*ppage), GFP_KERNEL_ACCOUNT); + if (!ppage) + return -ENOMEM; + + ret = account_locked_vm(mm, 1, true); + if (ret) + goto free_ppage; + + mmap_read_lock(mm); + ret = pin_user_pages(hva, 1, flags, &page, NULL); + mmap_read_unlock(mm); + + if (ret == -EHWPOISON) { + kvm_send_hwpoison_signal(hva, PAGE_SHIFT); + ret = 0; + goto dec_account; + } else if (ret != 1) { + ret = -EFAULT; + goto dec_account; + } else if (!PageSwapBacked(page)) { + /* + * We really can't deal with page-cache pages returned by GUP + * because (a) we may trigger writeback of a page for which we + * no longer have access and (b) page_mkclean() won't find the + * stage-2 mapping in the rmap so we can get out-of-whack with + * the filesystem when marking the page dirty during unpinning. + * + * Ideally we'd just restrict ourselves to anonymous pages, but + * we also want to allow memfd (i.e. shmem) pages, so check for + * pages backed by swap in the knowledge that the GUP pin will + * prevent try_to_unmap() from succeeding. + */ + ret = -EIO; + goto dec_account; + } + + spin_lock(&kvm->mmu_lock); + pfn = page_to_pfn(page); + ret = pkvm_host_donate_guest(pfn, fault_ipa >> PAGE_SHIFT); + if (ret) { + if (ret == -EAGAIN) + ret = 0; + goto unpin; + } + + ppage->page = page; + INIT_LIST_HEAD(&ppage->link); + list_add(&ppage->link, &kvm->arch.pkvm.pinned_pages); + spin_unlock(&kvm->mmu_lock); + + return 0; + +unpin: + spin_unlock(&kvm->mmu_lock); + unpin_user_pages(&page, 1); +dec_account: + account_locked_vm(mm, 1, false); +free_ppage: + kfree(ppage); + + return ret; +} + static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, struct kvm_memory_slot *memslot, unsigned long hva, unsigned long fault_status) @@ -1269,7 +1567,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) * faulting VA. This is always 12 bits, irrespective * of the page size. */ - fault_ipa |= kvm_vcpu_get_hfar(vcpu) & ((1 << 12) - 1); + fault_ipa |= kvm_vcpu_get_hfar(vcpu) & FAR_MASK; ret = io_mem_abort(vcpu, fault_ipa); goto out_unlock; } @@ -1283,7 +1581,11 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) goto out_unlock; } - ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status); + + if (is_protected_kvm_enabled()) + ret = pkvm_mem_abort(vcpu, fault_ipa, hva); + else + ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status); if (ret == 0) ret = 1; out: @@ -1555,6 +1857,13 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, change != KVM_MR_FLAGS_ONLY) return 0; + /* In protected mode, cannot modify memslots once a VM has run. */ + if (is_protected_kvm_enabled() && + (change == KVM_MR_DELETE || change == KVM_MR_MOVE) && + kvm->arch.pkvm.shadow_handle) { + return -EPERM; + } + /* * Prevent userspace from creating a memory region outside of the IPA * space addressable by the KVM guest IPA space. @@ -1586,8 +1895,10 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, * when updating the PG_mte_tagged page flag, see * sanitise_mte_tags for more details. */ - if (kvm_has_mte(kvm) && vma->vm_flags & VM_SHARED) - return -EINVAL; + if (kvm_has_mte(kvm) && vma->vm_flags & VM_SHARED) { + ret = -EINVAL; + break; + } if (vma->vm_flags & VM_PFNMAP) { /* IO region dirty page logging not allowed */ @@ -1622,6 +1933,10 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm, gpa_t gpa = slot->base_gfn << PAGE_SHIFT; phys_addr_t size = slot->npages << PAGE_SHIFT; + /* Stage-2 is managed by hyp in protected mode. */ + if (is_protected_kvm_enabled()) + return; + spin_lock(&kvm->mmu_lock); unmap_stage2_range(&kvm->arch.mmu, gpa, size); spin_unlock(&kvm->mmu_lock); diff --git a/arch/arm64/kvm/perf.c b/arch/arm64/kvm/perf.c index f9bb3b14130eef9da9e6fe0214c65bc9d23f6861..c84fe24b2ea1e8f1f3a2ab6e1f6e6c9bb46801ed 100644 --- a/arch/arm64/kvm/perf.c +++ b/arch/arm64/kvm/perf.c @@ -50,9 +50,6 @@ static struct perf_guest_info_callbacks kvm_guest_cbs = { int kvm_perf_init(void) { - if (kvm_pmu_probe_pmuver() != ID_AA64DFR0_PMUVER_IMP_DEF && !is_protected_kvm_enabled()) - static_branch_enable(&kvm_arm_pmu_available); - return perf_register_guest_info_callbacks(&kvm_guest_cbs); } diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c new file mode 100644 index 0000000000000000000000000000000000000000..ed9d9b189bb853c7e553d6869c5020fad41abf36 --- /dev/null +++ b/arch/arm64/kvm/pkvm.c @@ -0,0 +1,346 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 - Google LLC + * Author: Quentin Perret + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "hyp_constants.h" + +static struct reserved_mem *pkvm_firmware_mem; +static phys_addr_t *pvmfw_base = &kvm_nvhe_sym(pvmfw_base); +static phys_addr_t *pvmfw_size = &kvm_nvhe_sym(pvmfw_size); + +static struct memblock_region *hyp_memory = kvm_nvhe_sym(hyp_memory); +static unsigned int *hyp_memblock_nr_ptr = &kvm_nvhe_sym(hyp_memblock_nr); + +phys_addr_t hyp_mem_base; +phys_addr_t hyp_mem_size; + +static int cmp_hyp_memblock(const void *p1, const void *p2) +{ + const struct memblock_region *r1 = p1; + const struct memblock_region *r2 = p2; + + return r1->base < r2->base ? -1 : (r1->base > r2->base); +} + +static void __init sort_memblock_regions(void) +{ + sort(hyp_memory, + *hyp_memblock_nr_ptr, + sizeof(struct memblock_region), + cmp_hyp_memblock, + NULL); +} + +static int __init register_memblock_regions(void) +{ + struct memblock_region *reg; + + for_each_mem_region(reg) { + if (*hyp_memblock_nr_ptr >= HYP_MEMBLOCK_REGIONS) + return -ENOMEM; + + hyp_memory[*hyp_memblock_nr_ptr] = *reg; + (*hyp_memblock_nr_ptr)++; + } + sort_memblock_regions(); + + return 0; +} + +void __init kvm_hyp_reserve(void) +{ + u64 hyp_mem_pages = 0; + int ret; + + if (!is_hyp_mode_available() || is_kernel_in_hyp_mode()) + return; + + if (kvm_get_mode() != KVM_MODE_PROTECTED) + return; + + ret = register_memblock_regions(); + if (ret) { + *hyp_memblock_nr_ptr = 0; + kvm_err("Failed to register hyp memblocks: %d\n", ret); + return; + } + + hyp_mem_pages += hyp_s1_pgtable_pages(); + hyp_mem_pages += host_s2_pgtable_pages(); + hyp_mem_pages += hyp_shadow_table_pages(KVM_SHADOW_VM_SIZE); + hyp_mem_pages += hyp_vmemmap_pages(STRUCT_HYP_PAGE_SIZE); + hyp_mem_pages += hyp_ffa_proxy_pages(); + + /* + * Try to allocate a PMD-aligned region to reduce TLB pressure once + * this is unmapped from the host stage-2, and fallback to PAGE_SIZE. + */ + hyp_mem_size = hyp_mem_pages << PAGE_SHIFT; + hyp_mem_base = memblock_find_in_range(0, memblock_end_of_DRAM(), + ALIGN(hyp_mem_size, PMD_SIZE), + PMD_SIZE); + if (!hyp_mem_base) + hyp_mem_base = memblock_find_in_range(0, memblock_end_of_DRAM(), + hyp_mem_size, PAGE_SIZE); + else + hyp_mem_size = ALIGN(hyp_mem_size, PMD_SIZE); + + if (!hyp_mem_base) { + kvm_err("Failed to reserve hyp memory\n"); + return; + } + memblock_reserve(hyp_mem_base, hyp_mem_size); + + kvm_info("Reserved %lld MiB at 0x%llx\n", hyp_mem_size >> 20, + hyp_mem_base); +} + +/* + * Updates the state of the host's version of the vcpu state. + */ +static void update_vcpu_state(struct kvm_vcpu *vcpu, int shadow_handle) +{ + vcpu->arch.pkvm.shadow_handle = shadow_handle; +} + +/* + * Allocates and donates memory for EL2 shadow structs. + * + * Allocates space for the shadow state, which includes the shadow vm as well as + * the shadow vcpu states. + * + * Stores an opaque handler in the kvm struct for future reference. + * + * Return 0 on success, negative error code on failure. + */ +static int __create_el2_shadow(struct kvm *kvm) +{ + struct kvm_vcpu *vcpu, **vcpu_array; + size_t pgd_sz, shadow_sz; + void *pgd, *shadow_addr; + unsigned long idx; + int shadow_handle; + int ret, i; + + if (kvm->created_vcpus < 1) + return -EINVAL; + + pgd_sz = kvm_pgtable_stage2_pgd_size(kvm->arch.vtcr); + /* + * The PGD pages will be reclaimed using a hyp_memcache which implies + * page granularity. So, use alloc_pages_exact() to get individual + * refcounts. + */ + pgd = alloc_pages_exact(pgd_sz, GFP_KERNEL_ACCOUNT); + if (!pgd) + return -ENOMEM; + + /* Allocate memory to donate to hyp for the kvm and vcpu state. */ + shadow_sz = PAGE_ALIGN(KVM_SHADOW_VM_SIZE + + SHADOW_VCPU_STATE_SIZE * kvm->created_vcpus); + shadow_addr = alloc_pages_exact(shadow_sz, GFP_KERNEL_ACCOUNT); + if (!shadow_addr) { + ret = -ENOMEM; + goto free_pgd; + } + + /* Stash the vcpu pointers into the PGD */ + BUILD_BUG_ON(KVM_MAX_VCPUS > (PAGE_SIZE / sizeof(u64))); + vcpu_array = pgd; + kvm_for_each_vcpu(idx, vcpu, kvm) + vcpu_array[idx] = vcpu; + + /* Donate the shadow memory to hyp and let hyp initialize it. */ + ret = kvm_call_hyp_nvhe(__pkvm_init_shadow, kvm, shadow_addr, shadow_sz, + pgd); + if (ret < 0) + goto free_shadow; + + shadow_handle = ret; + + /* Store the shadow handle given by hyp for future call reference. */ + kvm->arch.pkvm.shadow_handle = shadow_handle; + + /* Adjust host's vcpu state as it doesn't control it anymore. */ + for (i = 0; i < kvm->created_vcpus; i++) + update_vcpu_state(kvm->vcpus[i], shadow_handle); + + return 0; + +free_shadow: + free_pages_exact(shadow_addr, shadow_sz); +free_pgd: + free_pages_exact(pgd, pgd_sz); + return ret; +} + +int create_el2_shadow(struct kvm *kvm) +{ + int ret = 0; + + mutex_lock(&kvm->arch.pkvm.shadow_lock); + if (!kvm->arch.pkvm.shadow_handle) + ret = __create_el2_shadow(kvm); + mutex_unlock(&kvm->arch.pkvm.shadow_lock); + + return ret; +} + +void kvm_shadow_destroy(struct kvm *kvm) +{ + struct kvm_pinned_page *ppage, *tmp; + struct mm_struct *mm = current->mm; + struct list_head *ppages; + + if (kvm->arch.pkvm.shadow_handle) + WARN_ON(kvm_call_hyp_nvhe(__pkvm_teardown_shadow, + kvm->arch.pkvm.shadow_handle)); + + free_hyp_memcache(&kvm->arch.pkvm.teardown_mc); + + ppages = &kvm->arch.pkvm.pinned_pages; + list_for_each_entry_safe(ppage, tmp, ppages, link) { + WARN_ON(kvm_call_hyp_nvhe(__pkvm_host_reclaim_page, + page_to_pfn(ppage->page))); + cond_resched(); + + account_locked_vm(mm, 1, false); + unpin_user_pages_dirty_lock(&ppage->page, 1, true); + list_del(&ppage->link); + kfree(ppage); + } +} + +static int __init pkvm_firmware_rmem_err(struct reserved_mem *rmem, + const char *reason) +{ + phys_addr_t end = rmem->base + rmem->size; + + kvm_err("Ignoring pkvm guest firmware memory reservation [%pa - %pa]: %s\n", + &rmem->base, &end, reason); + return -EINVAL; +} + +static int __init pkvm_firmware_rmem_init(struct reserved_mem *rmem) +{ + unsigned long node = rmem->fdt_node; + + if (pkvm_firmware_mem) + return pkvm_firmware_rmem_err(rmem, "duplicate reservation"); + + if (!of_get_flat_dt_prop(node, "no-map", NULL)) + return pkvm_firmware_rmem_err(rmem, "missing \"no-map\" property"); + + if (of_get_flat_dt_prop(node, "reusable", NULL)) + return pkvm_firmware_rmem_err(rmem, "\"reusable\" property unsupported"); + + if (!PAGE_ALIGNED(rmem->base)) + return pkvm_firmware_rmem_err(rmem, "base is not page-aligned"); + + if (!PAGE_ALIGNED(rmem->size)) + return pkvm_firmware_rmem_err(rmem, "size is not page-aligned"); + + *pvmfw_size = rmem->size; + *pvmfw_base = rmem->base; + pkvm_firmware_mem = rmem; + return 0; +} +RESERVEDMEM_OF_DECLARE(pkvm_firmware, "linux,pkvm-guest-firmware-memory", + pkvm_firmware_rmem_init); + +static int __init pkvm_firmware_rmem_clear(void) +{ + void *addr; + phys_addr_t size; + + if (likely(!pkvm_firmware_mem) || is_protected_kvm_enabled()) + return 0; + + kvm_info("Clearing unused pKVM firmware memory\n"); + size = pkvm_firmware_mem->size; + addr = memremap(pkvm_firmware_mem->base, size, MEMREMAP_WB); + if (!addr) + return -EINVAL; + + memset(addr, 0, size); + dcache_clean_poc((unsigned long)addr, (unsigned long)addr + size); + memunmap(addr); + return 0; +} +device_initcall_sync(pkvm_firmware_rmem_clear); + +static int pkvm_vm_ioctl_set_fw_ipa(struct kvm *kvm, u64 ipa) +{ + int ret = 0; + + if (!pkvm_firmware_mem) + return -EINVAL; + + mutex_lock(&kvm->arch.pkvm.shadow_lock); + if (kvm->arch.pkvm.shadow_handle) { + ret = -EBUSY; + goto out_unlock; + } + + kvm->arch.pkvm.pvmfw_load_addr = ipa; +out_unlock: + mutex_unlock(&kvm->arch.pkvm.shadow_lock); + return ret; +} + +static int pkvm_vm_ioctl_info(struct kvm *kvm, + struct kvm_protected_vm_info __user *info) +{ + struct kvm_protected_vm_info kinfo = { + .firmware_size = pkvm_firmware_mem ? + pkvm_firmware_mem->size : + 0, + }; + + return copy_to_user(info, &kinfo, sizeof(kinfo)) ? -EFAULT : 0; +} + +int kvm_arm_vm_ioctl_pkvm(struct kvm *kvm, struct kvm_enable_cap *cap) +{ + if (cap->args[1] || cap->args[2] || cap->args[3]) + return -EINVAL; + + switch (cap->flags) { + case KVM_CAP_ARM_PROTECTED_VM_FLAGS_SET_FW_IPA: + return pkvm_vm_ioctl_set_fw_ipa(kvm, cap->args[0]); + case KVM_CAP_ARM_PROTECTED_VM_FLAGS_INFO: + return pkvm_vm_ioctl_info(kvm, (void __force __user *)cap->args[0]); + default: + return -EINVAL; + } + + return 0; +} + +int kvm_init_pvm(struct kvm *kvm, unsigned long type) +{ + mutex_init(&kvm->arch.pkvm.shadow_lock); + kvm->arch.pkvm.pvmfw_load_addr = PVMFW_INVALID_LOAD_ADDR; + + if (!(type & KVM_VM_TYPE_ARM_PROTECTED)) + return 0; + + if (!is_protected_kvm_enabled()) + return -EINVAL; + + kvm->arch.pkvm.enabled = true; + return 0; +} diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index f5065f23b413faf606137b3ceabc1bc81b87a27b..ca92cc5c71c681a01ee7022b5723cff60dae4f05 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -28,6 +28,7 @@ static u32 kvm_pmu_event_mask(struct kvm *kvm) case ID_AA64DFR0_PMUVER_8_1: case ID_AA64DFR0_PMUVER_8_4: case ID_AA64DFR0_PMUVER_8_5: + case ID_AA64DFR0_PMUVER_8_7: return GENMASK(15, 0); default: /* Shouldn't be here, just for sanity */ WARN_ONCE(1, "Unknown PMU version %d\n", kvm->arch.pmuver); @@ -740,7 +741,14 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data, kvm_pmu_create_perf_event(vcpu, select_idx); } -int kvm_pmu_probe_pmuver(void) +void kvm_host_pmu_init(struct arm_pmu *pmu) +{ + if (pmu->pmuver != 0 && pmu->pmuver != ID_AA64DFR0_PMUVER_IMP_DEF && + !kvm_arm_support_pmu_v3() && !is_protected_kvm_enabled()) + static_branch_enable(&kvm_arm_pmu_available); +} + +static int kvm_pmu_probe_pmuver(void) { struct perf_event_attr attr = { }; struct perf_event *event; @@ -971,7 +979,7 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) mutex_lock(&vcpu->kvm->lock); if (!vcpu->kvm->arch.pmu_filter) { - vcpu->kvm->arch.pmu_filter = bitmap_alloc(nr_events, GFP_KERNEL); + vcpu->kvm->arch.pmu_filter = bitmap_alloc(nr_events, GFP_KERNEL_ACCOUNT); if (!vcpu->kvm->arch.pmu_filter) { mutex_unlock(&vcpu->kvm->lock); return -ENOMEM; diff --git a/arch/arm64/kvm/pmu.c b/arch/arm64/kvm/pmu.c index 03a6c1f4a09af0a3e2321054e31640163cf79650..a8878fd8b696caa9ba68ffd996ff0ae91b74bcc8 100644 --- a/arch/arm64/kvm/pmu.c +++ b/arch/arm64/kvm/pmu.c @@ -31,9 +31,13 @@ static bool kvm_pmu_switch_needed(struct perf_event_attr *attr) */ void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr) { - struct kvm_host_data *ctx = this_cpu_ptr_hyp_sym(kvm_host_data); + struct kvm_host_data *ctx; - if (!kvm_arm_support_pmu_v3() || !ctx || !kvm_pmu_switch_needed(attr)) + if (!kvm_arm_support_pmu_v3()) + return; + + ctx = this_cpu_ptr_hyp_sym(kvm_host_data); + if (!ctx || !kvm_pmu_switch_needed(attr)) return; if (!attr->exclude_host) @@ -47,9 +51,13 @@ void kvm_set_pmu_events(u32 set, struct perf_event_attr *attr) */ void kvm_clr_pmu_events(u32 clr) { - struct kvm_host_data *ctx = this_cpu_ptr_hyp_sym(kvm_host_data); + struct kvm_host_data *ctx; + + if (!kvm_arm_support_pmu_v3()) + return; - if (!kvm_arm_support_pmu_v3() || !ctx) + ctx = this_cpu_ptr_hyp_sym(kvm_host_data); + if (!ctx) return; ctx->pmu_events.events_host &= ~clr; diff --git a/arch/arm64/kvm/psci.c b/arch/arm64/kvm/psci.c index 74c47d420253443c8c116e5f4fab182e37d3019f..371ebbc6b529c9e4e382659c5cc100d9b9a715b8 100644 --- a/arch/arm64/kvm/psci.c +++ b/arch/arm64/kvm/psci.c @@ -21,16 +21,6 @@ * as described in ARM document number ARM DEN 0022A. */ -#define AFFINITY_MASK(level) ~((0x1UL << ((level) * MPIDR_LEVEL_BITS)) - 1) - -static unsigned long psci_affinity_mask(unsigned long affinity_level) -{ - if (affinity_level <= 3) - return MPIDR_HWID_BITMASK & AFFINITY_MASK(affinity_level); - - return 0; -} - static unsigned long kvm_psci_vcpu_suspend(struct kvm_vcpu *vcpu) { /* @@ -59,12 +49,6 @@ static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu) kvm_vcpu_kick(vcpu); } -static inline bool kvm_psci_valid_affinity(struct kvm_vcpu *vcpu, - unsigned long affinity) -{ - return !(affinity & ~MPIDR_HWID_BITMASK); -} - static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) { struct vcpu_reset_state *reset_state; @@ -109,7 +93,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) /* * Make sure the reset request is observed if the change to - * power_state is observed. + * power_off is observed. */ smp_wmb(); @@ -162,7 +146,7 @@ static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu) return PSCI_0_2_AFFINITY_LEVEL_OFF; } -static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type) +static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type, u64 flags) { int i; struct kvm_vcpu *tmp; @@ -182,29 +166,24 @@ static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type) memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event)); vcpu->run->system_event.type = type; + vcpu->run->system_event.flags = flags; vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT; } static void kvm_psci_system_off(struct kvm_vcpu *vcpu) { - kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_SHUTDOWN); + kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_SHUTDOWN, 0); } static void kvm_psci_system_reset(struct kvm_vcpu *vcpu) { - kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_RESET); + kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_RESET, 0); } -static void kvm_psci_narrow_to_32bit(struct kvm_vcpu *vcpu) +static void kvm_psci_system_reset2(struct kvm_vcpu *vcpu) { - int i; - - /* - * Zero the input registers' upper 32 bits. They will be fully - * zeroed on exit, so we're fine changing them in place. - */ - for (i = 1; i < 4; i++) - vcpu_set_reg(vcpu, i, lower_32_bits(vcpu_get_reg(vcpu, i))); + kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_RESET, + KVM_SYSTEM_EVENT_RESET_FLAG_PSCI_RESET2); } static unsigned long kvm_psci_check_allowed_function(struct kvm_vcpu *vcpu, u32 fn) @@ -305,24 +284,27 @@ out: return ret; } -static int kvm_psci_1_0_call(struct kvm_vcpu *vcpu) +static int kvm_psci_1_x_call(struct kvm_vcpu *vcpu, u32 minor) { u32 psci_fn = smccc_get_function(vcpu); - u32 feature; + u32 arg; unsigned long val; int ret = 1; + if (minor > 1) + return -EINVAL; + switch(psci_fn) { case PSCI_0_2_FN_PSCI_VERSION: - val = KVM_ARM_PSCI_1_0; + val = minor == 0 ? KVM_ARM_PSCI_1_0 : KVM_ARM_PSCI_1_1; break; case PSCI_1_0_FN_PSCI_FEATURES: - feature = smccc_get_arg1(vcpu); - val = kvm_psci_check_allowed_function(vcpu, feature); + arg = smccc_get_arg1(vcpu); + val = kvm_psci_check_allowed_function(vcpu, arg); if (val) break; - switch(feature) { + switch(arg) { case PSCI_0_2_FN_PSCI_VERSION: case PSCI_0_2_FN_CPU_SUSPEND: case PSCI_0_2_FN64_CPU_SUSPEND: @@ -338,11 +320,36 @@ static int kvm_psci_1_0_call(struct kvm_vcpu *vcpu) case ARM_SMCCC_VERSION_FUNC_ID: val = 0; break; + case PSCI_1_1_FN_SYSTEM_RESET2: + case PSCI_1_1_FN64_SYSTEM_RESET2: + if (minor >= 1) { + val = 0; + break; + } + fallthrough; default: val = PSCI_RET_NOT_SUPPORTED; break; } break; + case PSCI_1_1_FN_SYSTEM_RESET2: + kvm_psci_narrow_to_32bit(vcpu); + fallthrough; + case PSCI_1_1_FN64_SYSTEM_RESET2: + if (minor >= 1) { + arg = smccc_get_arg1(vcpu); + + if (arg <= PSCI_1_1_RESET_TYPE_SYSTEM_WARM_RESET || + arg >= PSCI_1_1_RESET_TYPE_VENDOR_START) { + kvm_psci_system_reset2(vcpu); + vcpu_set_reg(vcpu, 0, PSCI_RET_INTERNAL_FAILURE); + return 0; + } + + val = PSCI_RET_INVALID_PARAMS; + break; + }; + fallthrough; default: return kvm_psci_0_2_call(vcpu); } @@ -393,8 +400,10 @@ static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu) int kvm_psci_call(struct kvm_vcpu *vcpu) { switch (kvm_psci_version(vcpu, vcpu->kvm)) { + case KVM_ARM_PSCI_1_1: + return kvm_psci_1_x_call(vcpu, 1); case KVM_ARM_PSCI_1_0: - return kvm_psci_1_0_call(vcpu); + return kvm_psci_1_x_call(vcpu, 0); case KVM_ARM_PSCI_0_2: return kvm_psci_0_2_call(vcpu); case KVM_ARM_PSCI_0_1: @@ -406,7 +415,7 @@ int kvm_psci_call(struct kvm_vcpu *vcpu) int kvm_arm_get_fw_num_regs(struct kvm_vcpu *vcpu) { - return 3; /* PSCI version and two workaround registers */ + return 4; /* PSCI version and three workaround registers */ } int kvm_arm_copy_fw_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) @@ -420,6 +429,9 @@ int kvm_arm_copy_fw_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) if (put_user(KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2, uindices++)) return -EFAULT; + if (put_user(KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3, uindices++)) + return -EFAULT; + return 0; } @@ -459,6 +471,17 @@ static int get_kernel_wa_level(u64 regid) case SPECTRE_VULNERABLE: return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL; } + break; + case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3: + switch (arm64_get_spectre_bhb_state()) { + case SPECTRE_VULNERABLE: + return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_AVAIL; + case SPECTRE_MITIGATED: + return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_AVAIL; + case SPECTRE_UNAFFECTED: + return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_REQUIRED; + } + return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_AVAIL; } return -EINVAL; @@ -475,6 +498,7 @@ int kvm_arm_get_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) break; case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1: case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2: + case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3: val = get_kernel_wa_level(reg->id) & KVM_REG_FEATURE_LEVEL_MASK; break; default: @@ -511,6 +535,7 @@ int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) return 0; case KVM_ARM_PSCI_0_2: case KVM_ARM_PSCI_1_0: + case KVM_ARM_PSCI_1_1: if (!wants_02) return -EINVAL; vcpu->kvm->arch.psci_version = val; @@ -520,6 +545,7 @@ int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) } case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1: + case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3: if (val & ~KVM_REG_FEATURE_LEVEL_MASK) return -EINVAL; diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 5ce36b0a3343963924e28317acf6b865801dfa9f..0eee9ea0ab245615cff902e3885ee01bae48a819 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -32,15 +32,6 @@ /* Maximum phys_shift supported for any VM on this host */ static u32 kvm_ipa_limit; -/* - * ARMv8 Reset Values - */ -#define VCPU_RESET_PSTATE_EL1 (PSR_MODE_EL1h | PSR_A_BIT | PSR_I_BIT | \ - PSR_F_BIT | PSR_D_BIT) - -#define VCPU_RESET_PSTATE_SVC (PSR_AA32_MODE_SVC | PSR_AA32_A_BIT | \ - PSR_AA32_I_BIT | PSR_AA32_F_BIT) - unsigned int kvm_sve_max_vl; int kvm_arm_init_sve(void) @@ -94,22 +85,31 @@ static int kvm_vcpu_finalize_sve(struct kvm_vcpu *vcpu) { void *buf; unsigned int vl; + size_t reg_sz; + int ret; vl = vcpu->arch.sve_max_vl; /* * Responsibility for these properties is shared between - * kvm_arm_init_arch_resources(), kvm_vcpu_enable_sve() and + * kvm_arm_init_sve(), kvm_vcpu_enable_sve() and * set_sve_vls(). Double-check here just to be sure: */ if (WARN_ON(!sve_vl_valid(vl) || vl > sve_max_virtualisable_vl || vl > SVE_VL_ARCH_MAX)) return -EIO; - buf = kzalloc(SVE_SIG_REGS_SIZE(sve_vq_from_vl(vl)), GFP_KERNEL); + reg_sz = vcpu_sve_state_size(vcpu); + buf = kzalloc(reg_sz, GFP_KERNEL_ACCOUNT); if (!buf) return -ENOMEM; + ret = kvm_share_hyp(buf, buf + reg_sz); + if (ret) { + kfree(buf); + return ret; + } + vcpu->arch.sve_state = buf; vcpu->arch.flags |= KVM_ARM64_VCPU_SVE_FINALIZED; return 0; @@ -141,7 +141,13 @@ bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu) void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu) { - kfree(vcpu->arch.sve_state); + void *sve_state = vcpu->arch.sve_state; + + kvm_vcpu_unshare_task_fp(vcpu); + kvm_unshare_hyp(vcpu, vcpu + 1); + if (sve_state) + kvm_unshare_hyp(sve_state, sve_state + vcpu_sve_state_size(vcpu)); + kfree(sve_state); } static void kvm_vcpu_reset_sve(struct kvm_vcpu *vcpu) @@ -150,22 +156,6 @@ static void kvm_vcpu_reset_sve(struct kvm_vcpu *vcpu) memset(vcpu->arch.sve_state, 0, vcpu_sve_state_size(vcpu)); } -static int kvm_vcpu_enable_ptrauth(struct kvm_vcpu *vcpu) -{ - /* - * For now make sure that both address/generic pointer authentication - * features are requested by the userspace together and the system - * supports these capabilities. - */ - if (!test_bit(KVM_ARM_VCPU_PTRAUTH_ADDRESS, vcpu->arch.features) || - !test_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, vcpu->arch.features) || - !system_has_full_ptr_auth()) - return -EINVAL; - - vcpu->arch.flags |= KVM_ARM64_GUEST_HAS_PTRAUTH; - return 0; -} - static bool vcpu_allowed_register_width(struct kvm_vcpu *vcpu) { struct kvm_vcpu *tmp; @@ -193,10 +183,9 @@ static bool vcpu_allowed_register_width(struct kvm_vcpu *vcpu) * kvm_reset_vcpu - sets core registers and sys_regs to reset value * @vcpu: The VCPU pointer * - * This function finds the right table above and sets the registers on - * the virtual CPU struct to their architecturally defined reset - * values, except for registers whose reset is deferred until - * kvm_arm_vcpu_finalize(). + * This function sets the registers on the virtual CPU struct to their + * architecturally defined reset values, except for registers whose reset is + * deferred until kvm_arm_vcpu_finalize(). * * Note: This function can be called from two paths: The KVM_ARM_VCPU_INIT * ioctl or as part of handling a request issued by another VCPU in the PSCI @@ -356,32 +345,3 @@ int kvm_set_ipa_limit(void) return 0; } - -int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type) -{ - u64 mmfr0, mmfr1; - u32 phys_shift; - - if (type & ~KVM_VM_TYPE_ARM_IPA_SIZE_MASK) - return -EINVAL; - - phys_shift = KVM_VM_TYPE_ARM_IPA_SIZE(type); - if (phys_shift) { - if (phys_shift > kvm_ipa_limit || - phys_shift < ARM64_MIN_PARANGE_BITS) - return -EINVAL; - } else { - phys_shift = KVM_PHYS_SHIFT; - if (phys_shift > kvm_ipa_limit) { - pr_warn_once("%s using unsupported default IPA limit, upgrade your VMM\n", - current->comm); - return -EINVAL; - } - } - - mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); - mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); - kvm->arch.vtcr = kvm_get_vtcr(mmfr0, mmfr1, phys_shift); - - return 0; -} diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 1d46e185f31e195766692df7224104d6bca4d6ec..95b5caad48e9bda9fdc3045671748a1b14dfb23d 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -64,26 +64,6 @@ static bool write_to_read_only(struct kvm_vcpu *vcpu, return false; } -u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg) -{ - u64 val = 0x8badf00d8badf00d; - - if (vcpu->arch.sysregs_loaded_on_cpu && - __vcpu_read_sys_reg_from_cpu(reg, &val)) - return val; - - return __vcpu_sys_reg(vcpu, reg); -} - -void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg) -{ - if (vcpu->arch.sysregs_loaded_on_cpu && - __vcpu_write_sys_reg_to_cpu(val, reg)) - return; - - __vcpu_sys_reg(vcpu, reg) = val; -} - /* 3 bits per cache level, as per CLIDR, but non-existent caches always 0 */ static u32 cache_levels; @@ -575,19 +555,7 @@ static void reset_actlr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) static void reset_mpidr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { - u64 mpidr; - - /* - * Map the vcpu_id into the first three affinity level fields of - * the MPIDR. We limit the number of VCPUs in level 0 due to a - * limitation to 16 CPUs in that level in the ICC_SGIxR registers - * of the GICv3 to be able to address each CPU directly when - * sending IPIs. - */ - mpidr = (vcpu->vcpu_id & 0x0f) << MPIDR_LEVEL_SHIFT(0); - mpidr |= ((vcpu->vcpu_id >> 4) & 0xff) << MPIDR_LEVEL_SHIFT(1); - mpidr |= ((vcpu->vcpu_id >> 12) & 0xff) << MPIDR_LEVEL_SHIFT(2); - vcpu_write_sys_reg(vcpu, (1ULL << 31) | mpidr, MPIDR_EL1); + vcpu_write_sys_reg(vcpu, calculate_mpidr(vcpu), MPIDR_EL1); } static unsigned int pmu_visibility(const struct kvm_vcpu *vcpu, @@ -1064,7 +1032,12 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, struct sys_reg_desc const *r, bool raz) { u32 id = reg_to_encoding(r); - u64 val = raz ? 0 : read_sanitised_ftr_reg(id); + u64 val; + + if (raz) + return 0; + + val = read_sanitised_ftr_reg(id); switch (id) { case SYS_ID_AA64PFR0_EL1: @@ -1075,16 +1048,15 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_CSV2), (u64)vcpu->kvm->arch.pfr0_csv2); val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_CSV3); val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_CSV3), (u64)vcpu->kvm->arch.pfr0_csv3); + if (irqchip_in_kernel(vcpu->kvm) && + vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) { + val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_GIC); + val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_GIC), 1); + } break; case SYS_ID_AA64PFR1_EL1: - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_MTE); - if (kvm_has_mte(vcpu->kvm)) { - u64 pfr, mte; - - pfr = read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1); - mte = cpuid_feature_extract_unsigned_field(pfr, ID_AA64PFR1_MTE_SHIFT); - val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR1_MTE), mte); - } + if (!kvm_has_mte(vcpu->kvm)) + val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_MTE); break; case SYS_ID_AA64ISAR1_EL1: if (!vcpu_has_ptrauth(vcpu)) @@ -1268,16 +1240,19 @@ static int set_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, return __set_id_reg(vcpu, rd, uaddr, raz); } -static int get_raz_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, +static int set_raz_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, const struct kvm_one_reg *reg, void __user *uaddr) { - return __get_id_reg(vcpu, rd, uaddr, true); + return __set_id_reg(vcpu, rd, uaddr, true); } -static int set_raz_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, - const struct kvm_one_reg *reg, void __user *uaddr) +static int get_raz_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, + const struct kvm_one_reg *reg, void __user *uaddr) { - return __set_id_reg(vcpu, rd, uaddr, true); + const u64 id = sys_reg_to_index(rd); + const u64 val = 0; + + return reg_to_user(uaddr, &val, id); } static int set_wi_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, @@ -1388,7 +1363,7 @@ static unsigned int mte_visibility(const struct kvm_vcpu *vcpu, #define ID_UNALLOCATED(crm, op2) { \ Op0(3), Op1(0), CRn(0), CRm(crm), Op2(op2), \ .access = access_raz_id_reg, \ - .get_user = get_raz_id_reg, \ + .get_user = get_raz_reg, \ .set_user = set_raz_id_reg, \ } @@ -1400,7 +1375,7 @@ static unsigned int mte_visibility(const struct kvm_vcpu *vcpu, #define ID_HIDDEN(name) { \ SYS_DESC(SYS_##name), \ .access = access_raz_id_reg, \ - .get_user = get_raz_id_reg, \ + .get_user = get_raz_reg, \ .set_user = set_raz_id_reg, \ } @@ -1518,7 +1493,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { /* CRm=6 */ ID_SANITISED(ID_AA64ISAR0_EL1), ID_SANITISED(ID_AA64ISAR1_EL1), - ID_UNALLOCATED(6,2), + ID_SANITISED(ID_AA64ISAR2_EL1), ID_UNALLOCATED(6,3), ID_UNALLOCATED(6,4), ID_UNALLOCATED(6,5), @@ -1642,7 +1617,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { * previously (and pointlessly) advertised in the past... */ { PMU_SYS_REG(SYS_PMSWINC_EL0), - .get_user = get_raz_id_reg, .set_user = set_wi_reg, + .get_user = get_raz_reg, .set_user = set_wi_reg, .access = access_pmswinc, .reset = NULL }, { PMU_SYS_REG(SYS_PMSELR_EL0), .access = access_pmselr, .reset = reset_pmselr, .reg = PMSELR_EL0 }, diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h index cc0cc95a0280519d04c64a409668f3d505e5f53d..9b32772f398e5d825ced69c9946a8bb6913aaae4 100644 --- a/arch/arm64/kvm/sys_regs.h +++ b/arch/arm64/kvm/sys_regs.h @@ -183,6 +183,25 @@ find_reg(const struct sys_reg_params *params, const struct sys_reg_desc table[], return __inline_bsearch((void *)pval, table, num, sizeof(table[0]), match_sys_reg); } +static inline u64 calculate_mpidr(const struct kvm_vcpu *vcpu) +{ + u64 mpidr; + + /* + * Map the vcpu_id into the first three affinity level fields of + * the MPIDR. We limit the number of VCPUs in level 0 due to a + * limitation to 16 CPUs in that level in the ICC_SGIxR registers + * of the GICv3 to be able to address each CPU directly when + * sending IPIs. + */ + mpidr = (vcpu->vcpu_id & 0x0f) << MPIDR_LEVEL_SHIFT(0); + mpidr |= ((vcpu->vcpu_id >> 4) & 0xff) << MPIDR_LEVEL_SHIFT(1); + mpidr |= ((vcpu->vcpu_id >> 12) & 0xff) << MPIDR_LEVEL_SHIFT(2); + mpidr |= (1ULL << 31); + + return mpidr; +} + const struct sys_reg_desc *find_reg_by_id(u64 id, struct sys_reg_params *params, const struct sys_reg_desc table[], diff --git a/arch/arm64/kvm/va_layout.c b/arch/arm64/kvm/va_layout.c index acdb7b3cc97d65aaa9a540ea3773e46af1ea04cf..f0b678d24e25de98966fcecd2458fa3557de86e5 100644 --- a/arch/arm64/kvm/va_layout.c +++ b/arch/arm64/kvm/va_layout.c @@ -295,3 +295,15 @@ void kvm_compute_final_ctr_el0(struct alt_instr *alt, generate_mov_q(read_sanitised_ftr_reg(SYS_CTR_EL0), origptr, updptr, nr_inst); } + +void kvm_get__text(struct alt_instr *alt, + __le32 *origptr, __le32 *updptr, int nr_inst) +{ + generate_mov_q((u64)_text, origptr, updptr, nr_inst); +} + +void kvm_get__etext(struct alt_instr *alt, + __le32 *origptr, __le32 *updptr, int nr_inst) +{ + generate_mov_q((u64)_etext, origptr, updptr, nr_inst); +} diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index 340c51d87677caf9165773a7e59869aa5da002c7..ce2b42c38e62a283e24ab6c337472c12e90d4c49 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -91,7 +91,7 @@ int kvm_vgic_create(struct kvm *kvm, u32 type) return ret; kvm_for_each_vcpu(i, vcpu, kvm) { - if (vcpu->arch.has_run_once) + if (vcpu_has_run_once(vcpu)) goto out_unlock; } ret = 0; @@ -134,7 +134,7 @@ static int kvm_vgic_dist_init(struct kvm *kvm, unsigned int nr_spis) struct kvm_vcpu *vcpu0 = kvm_get_vcpu(kvm, 0); int i; - dist->spis = kcalloc(nr_spis, sizeof(struct vgic_irq), GFP_KERNEL); + dist->spis = kcalloc(nr_spis, sizeof(struct vgic_irq), GFP_KERNEL_ACCOUNT); if (!dist->spis) return -ENOMEM; diff --git a/arch/arm64/kvm/vgic/vgic-irqfd.c b/arch/arm64/kvm/vgic/vgic-irqfd.c index 79f8899b234cf769afd5d47b08a5c0ac1a532efe..475059bacedf892f33c1ce8287c0a1f9dcd91a1a 100644 --- a/arch/arm64/kvm/vgic/vgic-irqfd.c +++ b/arch/arm64/kvm/vgic/vgic-irqfd.c @@ -139,7 +139,7 @@ int kvm_vgic_setup_default_irq_routing(struct kvm *kvm) u32 nr = dist->nr_spis; int i, ret; - entries = kcalloc(nr, sizeof(*entries), GFP_KERNEL); + entries = kcalloc(nr, sizeof(*entries), GFP_KERNEL_ACCOUNT); if (!entries) return -ENOMEM; diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c index 61728c543eb9c1165588b975033ec0f7b91048fd..089fc2ffcb43d1c6c1269036762edf669068ad24 100644 --- a/arch/arm64/kvm/vgic/vgic-its.c +++ b/arch/arm64/kvm/vgic/vgic-its.c @@ -48,7 +48,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid, if (irq) return irq; - irq = kzalloc(sizeof(struct vgic_irq), GFP_KERNEL); + irq = kzalloc(sizeof(struct vgic_irq), GFP_KERNEL_ACCOUNT); if (!irq) return ERR_PTR(-ENOMEM); @@ -332,7 +332,7 @@ int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr) * we must be careful not to overrun the array. */ irq_count = READ_ONCE(dist->lpi_list_count); - intids = kmalloc_array(irq_count, sizeof(intids[0]), GFP_KERNEL); + intids = kmalloc_array(irq_count, sizeof(intids[0]), GFP_KERNEL_ACCOUNT); if (!intids) return -ENOMEM; @@ -985,7 +985,7 @@ static int vgic_its_alloc_collection(struct vgic_its *its, if (!vgic_its_check_id(its, its->baser_coll_table, coll_id, NULL)) return E_ITS_MAPC_COLLECTION_OOR; - collection = kzalloc(sizeof(*collection), GFP_KERNEL); + collection = kzalloc(sizeof(*collection), GFP_KERNEL_ACCOUNT); if (!collection) return -ENOMEM; @@ -1029,7 +1029,7 @@ static struct its_ite *vgic_its_alloc_ite(struct its_device *device, { struct its_ite *ite; - ite = kzalloc(sizeof(*ite), GFP_KERNEL); + ite = kzalloc(sizeof(*ite), GFP_KERNEL_ACCOUNT); if (!ite) return ERR_PTR(-ENOMEM); @@ -1150,7 +1150,7 @@ static struct its_device *vgic_its_alloc_device(struct vgic_its *its, { struct its_device *device; - device = kzalloc(sizeof(*device), GFP_KERNEL); + device = kzalloc(sizeof(*device), GFP_KERNEL_ACCOUNT); if (!device) return ERR_PTR(-ENOMEM); @@ -1847,7 +1847,7 @@ void vgic_lpi_translation_cache_init(struct kvm *kvm) struct vgic_translation_cache_entry *cte; /* An allocation failure is not fatal */ - cte = kzalloc(sizeof(*cte), GFP_KERNEL); + cte = kzalloc(sizeof(*cte), GFP_KERNEL_ACCOUNT); if (WARN_ON(!cte)) break; @@ -1888,7 +1888,7 @@ static int vgic_its_create(struct kvm_device *dev, u32 type) if (type != KVM_DEV_TYPE_ARM_VGIC_ITS) return -ENODEV; - its = kzalloc(sizeof(struct vgic_its), GFP_KERNEL); + its = kzalloc(sizeof(struct vgic_its), GFP_KERNEL_ACCOUNT); if (!its) return -ENOMEM; @@ -2710,8 +2710,8 @@ static int vgic_its_set_attr(struct kvm_device *dev, if (copy_from_user(&addr, uaddr, sizeof(addr))) return -EFAULT; - ret = vgic_check_ioaddr(dev->kvm, &its->vgic_its_base, - addr, SZ_64K); + ret = vgic_check_iorange(dev->kvm, its->vgic_its_base, + addr, SZ_64K, KVM_VGIC_V3_ITS_SIZE); if (ret) return ret; diff --git a/arch/arm64/kvm/vgic/vgic-kvm-device.c b/arch/arm64/kvm/vgic/vgic-kvm-device.c index 7740995de982e1078a8ce04732942b6b8ada0246..0d000d2fe8d284f02f9641332ada7e6ddd9c3649 100644 --- a/arch/arm64/kvm/vgic/vgic-kvm-device.c +++ b/arch/arm64/kvm/vgic/vgic-kvm-device.c @@ -14,17 +14,21 @@ /* common helpers */ -int vgic_check_ioaddr(struct kvm *kvm, phys_addr_t *ioaddr, - phys_addr_t addr, phys_addr_t alignment) +int vgic_check_iorange(struct kvm *kvm, phys_addr_t ioaddr, + phys_addr_t addr, phys_addr_t alignment, + phys_addr_t size) { - if (addr & ~kvm_phys_mask(kvm)) - return -E2BIG; + if (!IS_VGIC_ADDR_UNDEF(ioaddr)) + return -EEXIST; - if (!IS_ALIGNED(addr, alignment)) + if (!IS_ALIGNED(addr, alignment) || !IS_ALIGNED(size, alignment)) return -EINVAL; - if (!IS_VGIC_ADDR_UNDEF(*ioaddr)) - return -EEXIST; + if (addr + size < addr) + return -EINVAL; + + if (addr & ~kvm_phys_mask(kvm) || addr + size > kvm_phys_size(kvm)) + return -E2BIG; return 0; } @@ -57,7 +61,7 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write) { int r = 0; struct vgic_dist *vgic = &kvm->arch.vgic; - phys_addr_t *addr_ptr, alignment; + phys_addr_t *addr_ptr, alignment, size; u64 undef_value = VGIC_ADDR_UNDEF; mutex_lock(&kvm->lock); @@ -66,16 +70,19 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write) r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V2); addr_ptr = &vgic->vgic_dist_base; alignment = SZ_4K; + size = KVM_VGIC_V2_DIST_SIZE; break; case KVM_VGIC_V2_ADDR_TYPE_CPU: r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V2); addr_ptr = &vgic->vgic_cpu_base; alignment = SZ_4K; + size = KVM_VGIC_V2_CPU_SIZE; break; case KVM_VGIC_V3_ADDR_TYPE_DIST: r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V3); addr_ptr = &vgic->vgic_dist_base; alignment = SZ_64K; + size = KVM_VGIC_V3_DIST_SIZE; break; case KVM_VGIC_V3_ADDR_TYPE_REDIST: { struct vgic_redist_region *rdreg; @@ -140,7 +147,7 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write) goto out; if (write) { - r = vgic_check_ioaddr(kvm, addr_ptr, *addr, alignment); + r = vgic_check_iorange(kvm, *addr_ptr, *addr, alignment, size); if (!r) *addr_ptr = *addr; } else { diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c index 03a253785700a939b5d10a65619ec148aa2a7c12..aba1b6fca293e649acc8f44d3531a70943ab0008 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c +++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c @@ -763,10 +763,12 @@ static int vgic_register_all_redist_iodevs(struct kvm *kvm) } if (ret) { - /* The current c failed, so we start with the previous one. */ + /* The current c failed, so iterate over the previous ones. */ + int i; + mutex_lock(&kvm->slots_lock); - for (c--; c >= 0; c--) { - vcpu = kvm_get_vcpu(kvm, c); + for (i = 0; i < c; i++) { + vcpu = kvm_get_vcpu(kvm, i); vgic_unregister_redist_iodev(vcpu); } mutex_unlock(&kvm->slots_lock); @@ -796,7 +798,9 @@ static int vgic_v3_alloc_redist_region(struct kvm *kvm, uint32_t index, struct vgic_dist *d = &kvm->arch.vgic; struct vgic_redist_region *rdreg; struct list_head *rd_regions = &d->rd_regions; - size_t size = count * KVM_VGIC_V3_REDIST_SIZE; + int nr_vcpus = atomic_read(&kvm->online_vcpus); + size_t size = count ? count * KVM_VGIC_V3_REDIST_SIZE + : nr_vcpus * KVM_VGIC_V3_REDIST_SIZE; int ret; /* cross the end of memory ? */ @@ -834,13 +838,13 @@ static int vgic_v3_alloc_redist_region(struct kvm *kvm, uint32_t index, if (vgic_v3_rdist_overlap(kvm, base, size)) return -EINVAL; - rdreg = kzalloc(sizeof(*rdreg), GFP_KERNEL); + rdreg = kzalloc(sizeof(*rdreg), GFP_KERNEL_ACCOUNT); if (!rdreg) return -ENOMEM; rdreg->base = VGIC_ADDR_UNDEF; - ret = vgic_check_ioaddr(kvm, &rdreg->base, base, SZ_64K); + ret = vgic_check_iorange(kvm, rdreg->base, base, SZ_64K, size); if (ret) goto free; diff --git a/arch/arm64/kvm/vgic/vgic-mmio.c b/arch/arm64/kvm/vgic/vgic-mmio.c index 48c6067fc5ecb0dd916727d8b4d8e3dc906ed224..49837d3a3ef5625ba0c5e4d2fed57e362b55ffc0 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio.c +++ b/arch/arm64/kvm/vgic/vgic-mmio.c @@ -248,6 +248,8 @@ unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu, IRQCHIP_STATE_PENDING, &val); WARN_RATELIMIT(err, "IRQ %d", irq->host_irq); + } else if (vgic_irq_is_mapped_level(irq)) { + val = vgic_get_phys_line_level(irq); } else { val = irq_is_pending(irq); } @@ -1050,7 +1052,7 @@ static int dispatch_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, return 0; } -struct kvm_io_device_ops kvm_io_gic_ops = { +const struct kvm_io_device_ops kvm_io_gic_ops = { .read = dispatch_mmio_read, .write = dispatch_mmio_write, }; diff --git a/arch/arm64/kvm/vgic/vgic-mmio.h b/arch/arm64/kvm/vgic/vgic-mmio.h index fefcca2b14dc7297db682d523b0326de249622ad..3fa696f198a37ab9aad81832db4b76d61bcf1ece 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio.h +++ b/arch/arm64/kvm/vgic/vgic-mmio.h @@ -34,7 +34,7 @@ struct vgic_register_region { }; }; -extern struct kvm_io_device_ops kvm_io_gic_ops; +extern const struct kvm_io_device_ops kvm_io_gic_ops; #define VGIC_ACCESS_8bit 1 #define VGIC_ACCESS_32bit 2 diff --git a/arch/arm64/kvm/vgic/vgic-v2.c b/arch/arm64/kvm/vgic/vgic-v2.c index 95a18cec14a35036ca4c6c2e3446f5a1cb646935..4e8bb90bd96f795bc6d3124a5f9e4bab52c29f91 100644 --- a/arch/arm64/kvm/vgic/vgic-v2.c +++ b/arch/arm64/kvm/vgic/vgic-v2.c @@ -293,12 +293,12 @@ int vgic_v2_map_resources(struct kvm *kvm) if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base) || IS_VGIC_ADDR_UNDEF(dist->vgic_cpu_base)) { - kvm_err("Need to set vgic cpu and dist addresses first\n"); + kvm_debug("Need to set vgic cpu and dist addresses first\n"); return -ENXIO; } if (!vgic_v2_check_base(dist->vgic_dist_base, dist->vgic_cpu_base)) { - kvm_err("VGIC CPU and dist frames overlap\n"); + kvm_debug("VGIC CPU and dist frames overlap\n"); return -EINVAL; } @@ -345,6 +345,11 @@ int vgic_v2_probe(const struct gic_kvm_info *info) int ret; u32 vtr; + if (is_protected_kvm_enabled()) { + kvm_err("GICv2 not supported in protected mode\n"); + return -ENXIO; + } + if (!info->vctrl.start) { kvm_err("GICH not present in the firmware table\n"); return -ENXIO; @@ -465,17 +470,10 @@ void vgic_v2_load(struct kvm_vcpu *vcpu) kvm_vgic_global_state.vctrl_base + GICH_APR); } -void vgic_v2_vmcr_sync(struct kvm_vcpu *vcpu) +void vgic_v2_put(struct kvm_vcpu *vcpu, bool blocking) { struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; cpu_if->vgic_vmcr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_VMCR); -} - -void vgic_v2_put(struct kvm_vcpu *vcpu) -{ - struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; - - vgic_v2_vmcr_sync(vcpu); cpu_if->vgic_apr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_APR); } diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c index 21a6207fb2eed08e94c1c9bc4d83922fa15e73d8..9be4fed60bd479e36b04af92a532c07e503d70da 100644 --- a/arch/arm64/kvm/vgic/vgic-v3.c +++ b/arch/arm64/kvm/vgic/vgic-v3.c @@ -15,6 +15,7 @@ static bool group0_trap; static bool group1_trap; static bool common_trap; +static bool dir_trap; static bool gicv4_enable; void vgic_v3_set_underflow(struct kvm_vcpu *vcpu) @@ -296,6 +297,8 @@ void vgic_v3_enable(struct kvm_vcpu *vcpu) vgic_v3->vgic_hcr |= ICH_HCR_TALL1; if (common_trap) vgic_v3->vgic_hcr |= ICH_HCR_TC; + if (dir_trap) + vgic_v3->vgic_hcr |= ICH_HCR_TDIR; } int vgic_v3_lpi_sync_pending_status(struct kvm *kvm, struct vgic_irq *irq) @@ -483,8 +486,10 @@ bool vgic_v3_check_base(struct kvm *kvm) return false; list_for_each_entry(rdreg, &d->rd_regions, list) { - if (rdreg->base + vgic_v3_rd_region_size(kvm, rdreg) < - rdreg->base) + size_t sz = vgic_v3_rd_region_size(kvm, rdreg); + + if (vgic_check_iorange(kvm, VGIC_ADDR_UNDEF, + rdreg->base, SZ_64K, sz)) return false; } @@ -549,12 +554,12 @@ int vgic_v3_map_resources(struct kvm *kvm) } if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base)) { - kvm_err("Need to set vgic distributor addresses first\n"); + kvm_debug("Need to set vgic distributor addresses first\n"); return -ENXIO; } if (!vgic_v3_check_base(kvm)) { - kvm_err("VGIC redist and dist frames overlap\n"); + kvm_debug("VGIC redist and dist frames overlap\n"); return -EINVAL; } @@ -604,6 +609,18 @@ static int __init early_gicv4_enable(char *buf) } early_param("kvm-arm.vgic_v4_enable", early_gicv4_enable); +static const struct midr_range broken_seis[] = { + MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM), + MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM), + {}, +}; + +static bool vgic_v3_broken_seis(void) +{ + return ((kvm_vgic_global_state.ich_vtr_el2 & ICH_VTR_SEIS_MASK) && + is_midr_in_range_list(read_cpuid_id(), broken_seis)); +} + /** * vgic_v3_probe - probe for a VGICv3 compatible interrupt controller * @info: pointer to the GIC description @@ -646,7 +663,7 @@ int vgic_v3_probe(const struct gic_kvm_info *info) } else if (!PAGE_ALIGNED(info->vcpu.start)) { pr_warn("GICV physical address 0x%llx not page aligned\n", (unsigned long long)info->vcpu.start); - } else { + } else if (kvm_get_mode() != KVM_MODE_PROTECTED) { kvm_vgic_global_state.vcpu_base = info->vcpu.start; kvm_vgic_global_state.can_emulate_gicv2 = true; ret = kvm_register_vgic_device(KVM_DEV_TYPE_ARM_VGIC_V2); @@ -671,11 +688,24 @@ int vgic_v3_probe(const struct gic_kvm_info *info) group1_trap = true; } - if (group0_trap || group1_trap || common_trap) { - kvm_info("GICv3 sysreg trapping enabled ([%s%s%s], reduced performance)\n", + if (vgic_v3_broken_seis()) { + kvm_info("GICv3 with broken locally generated SEI\n"); + + kvm_vgic_global_state.ich_vtr_el2 &= ~ICH_VTR_SEIS_MASK; + group0_trap = true; + group1_trap = true; + if (ich_vtr_el2 & ICH_VTR_TDS_MASK) + dir_trap = true; + else + common_trap = true; + } + + if (group0_trap || group1_trap || common_trap | dir_trap) { + kvm_info("GICv3 sysreg trapping enabled ([%s%s%s%s], reduced performance)\n", group0_trap ? "G0" : "", group1_trap ? "G1" : "", - common_trap ? "C" : ""); + common_trap ? "C" : "", + dir_trap ? "D" : ""); static_branch_enable(&vgic_v3_cpuif_trap); } @@ -690,15 +720,8 @@ void vgic_v3_load(struct kvm_vcpu *vcpu) { struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; - /* - * If dealing with a GICv2 emulation on GICv3, VMCR_EL2.VFIQen - * is dependent on ICC_SRE_EL1.SRE, and we have to perform the - * VMCR_EL2 save/restore in the world switch. - */ - if (likely(cpu_if->vgic_sre)) - kvm_call_hyp(__vgic_v3_write_vmcr, cpu_if->vgic_vmcr); - - kvm_call_hyp(__vgic_v3_restore_aprs, cpu_if); + if (likely(!is_protected_kvm_enabled())) + kvm_call_hyp(__vgic_v3_restore_vmcr_aprs, cpu_if); if (has_vhe()) __vgic_v3_activate_traps(cpu_if); @@ -706,23 +729,14 @@ void vgic_v3_load(struct kvm_vcpu *vcpu) WARN_ON(vgic_v4_load(vcpu)); } -void vgic_v3_vmcr_sync(struct kvm_vcpu *vcpu) -{ - struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; - - if (likely(cpu_if->vgic_sre)) - cpu_if->vgic_vmcr = kvm_call_hyp_ret(__vgic_v3_read_vmcr); -} - -void vgic_v3_put(struct kvm_vcpu *vcpu) +void vgic_v3_put(struct kvm_vcpu *vcpu, bool blocking) { struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; - WARN_ON(vgic_v4_put(vcpu, false)); - - vgic_v3_vmcr_sync(vcpu); + WARN_ON(vgic_v4_put(vcpu, blocking)); - kvm_call_hyp(__vgic_v3_save_aprs, cpu_if); + if (likely(!is_protected_kvm_enabled())) + kvm_call_hyp(__vgic_v3_save_vmcr_aprs, cpu_if); if (has_vhe()) __vgic_v3_deactivate_traps(cpu_if); diff --git a/arch/arm64/kvm/vgic/vgic-v4.c b/arch/arm64/kvm/vgic/vgic-v4.c index c1845d8f5f7e712d4ea7c29f67741855a0a4d70e..772dd15a22c7ccd32f9faf1f621f7d0616dbb5d9 100644 --- a/arch/arm64/kvm/vgic/vgic-v4.c +++ b/arch/arm64/kvm/vgic/vgic-v4.c @@ -246,7 +246,7 @@ int vgic_v4_init(struct kvm *kvm) nr_vcpus = atomic_read(&kvm->online_vcpus); dist->its_vm.vpes = kcalloc(nr_vcpus, sizeof(*dist->its_vm.vpes), - GFP_KERNEL); + GFP_KERNEL_ACCOUNT); if (!dist->its_vm.vpes) return -ENOMEM; diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c index 5dad4996cfb22bd6d71940bb86ccc3c5e38167a9..0bf570eb46222206f0ba896af311ee2b8456619e 100644 --- a/arch/arm64/kvm/vgic/vgic.c +++ b/arch/arm64/kvm/vgic/vgic.c @@ -931,26 +931,15 @@ void kvm_vgic_load(struct kvm_vcpu *vcpu) vgic_v3_load(vcpu); } -void kvm_vgic_put(struct kvm_vcpu *vcpu) +void kvm_vgic_put(struct kvm_vcpu *vcpu, bool blocking) { if (unlikely(!vgic_initialized(vcpu->kvm))) return; if (kvm_vgic_global_state.type == VGIC_V2) - vgic_v2_put(vcpu); + vgic_v2_put(vcpu, blocking); else - vgic_v3_put(vcpu); -} - -void kvm_vgic_vmcr_sync(struct kvm_vcpu *vcpu) -{ - if (unlikely(!irqchip_in_kernel(vcpu->kvm))) - return; - - if (kvm_vgic_global_state.type == VGIC_V2) - vgic_v2_vmcr_sync(vcpu); - else - vgic_v3_vmcr_sync(vcpu); + vgic_v3_put(vcpu, blocking); } int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu) diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h index 14a9218641f57d70b1125cd9fb006bbe1a9f9379..947985b3b03e959fb8ed95f4ffad1f58f480c55e 100644 --- a/arch/arm64/kvm/vgic/vgic.h +++ b/arch/arm64/kvm/vgic/vgic.h @@ -172,8 +172,9 @@ void vgic_kick_vcpus(struct kvm *kvm); void vgic_irq_handle_resampling(struct vgic_irq *irq, bool lr_deactivated, bool lr_pending); -int vgic_check_ioaddr(struct kvm *kvm, phys_addr_t *ioaddr, - phys_addr_t addr, phys_addr_t alignment); +int vgic_check_iorange(struct kvm *kvm, phys_addr_t ioaddr, + phys_addr_t addr, phys_addr_t alignment, + phys_addr_t size); void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu); void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr); @@ -195,8 +196,7 @@ int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address, void vgic_v2_init_lrs(void); void vgic_v2_load(struct kvm_vcpu *vcpu); -void vgic_v2_put(struct kvm_vcpu *vcpu); -void vgic_v2_vmcr_sync(struct kvm_vcpu *vcpu); +void vgic_v2_put(struct kvm_vcpu *vcpu, bool blocking); void vgic_v2_save_state(struct kvm_vcpu *vcpu); void vgic_v2_restore_state(struct kvm_vcpu *vcpu); @@ -226,8 +226,7 @@ int vgic_register_redist_iodev(struct kvm_vcpu *vcpu); bool vgic_v3_check_base(struct kvm *kvm); void vgic_v3_load(struct kvm_vcpu *vcpu); -void vgic_v3_put(struct kvm_vcpu *vcpu); -void vgic_v3_vmcr_sync(struct kvm_vcpu *vcpu); +void vgic_v3_put(struct kvm_vcpu *vcpu, bool blocking); bool vgic_has_its(struct kvm *kvm); int kvm_vgic_register_its_device(void); diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile index d31e1169d9b8e94a98f8034fc3bd32720f90898f..8e60d76a1b473be3422cc9888c17f1e02068c8bb 100644 --- a/arch/arm64/lib/Makefile +++ b/arch/arm64/lib/Makefile @@ -18,3 +18,5 @@ obj-$(CONFIG_CRC32) += crc32.o obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o obj-$(CONFIG_ARM64_MTE) += mte.o + +obj-$(CONFIG_KASAN_SW_TAGS) += kasan_sw_tags.o diff --git a/arch/arm64/lib/clear_page.S b/arch/arm64/lib/clear_page.S index b84b179edba3a62c8b513ed133157fd1e669cfaf..1fd5d790ab800321786324dbc4ea8804920e07f1 100644 --- a/arch/arm64/lib/clear_page.S +++ b/arch/arm64/lib/clear_page.S @@ -16,6 +16,7 @@ */ SYM_FUNC_START_PI(clear_page) mrs x1, dczid_el0 + tbnz x1, #4, 2f /* Branch if DC ZVA is prohibited */ and w1, w1, #0xf mov x2, #4 lsl x1, x2, x1 @@ -25,5 +26,14 @@ SYM_FUNC_START_PI(clear_page) tst x0, #(PAGE_SIZE - 1) b.ne 1b ret + +2: stnp xzr, xzr, [x0] + stnp xzr, xzr, [x0, #16] + stnp xzr, xzr, [x0, #32] + stnp xzr, xzr, [x0, #48] + add x0, x0, #64 + tst x0, #(PAGE_SIZE - 1) + b.ne 2b + ret SYM_FUNC_END_PI(clear_page) EXPORT_SYMBOL(clear_page) diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S index 0f8a3a9e3795b64b6cb0e53c0953e3b1833884e4..957a6d092d7af83771ea09395f73d87f1015235e 100644 --- a/arch/arm64/lib/copy_from_user.S +++ b/arch/arm64/lib/copy_from_user.S @@ -29,7 +29,7 @@ .endm .macro ldrh1 reg, ptr, val - uao_user_alternative 9998f, ldrh, ldtrh, \reg, \ptr, \val + uao_user_alternative 9997f, ldrh, ldtrh, \reg, \ptr, \val .endm .macro strh1 reg, ptr, val @@ -37,7 +37,7 @@ .endm .macro ldr1 reg, ptr, val - uao_user_alternative 9998f, ldr, ldtr, \reg, \ptr, \val + uao_user_alternative 9997f, ldr, ldtr, \reg, \ptr, \val .endm .macro str1 reg, ptr, val @@ -45,7 +45,7 @@ .endm .macro ldp1 reg1, reg2, ptr, val - uao_ldp 9998f, \reg1, \reg2, \ptr, \val + uao_ldp 9997f, \reg1, \reg2, \ptr, \val .endm .macro stp1 reg1, reg2, ptr, val @@ -53,8 +53,10 @@ .endm end .req x5 +srcin .req x15 SYM_FUNC_START(__arch_copy_from_user) add end, x0, x2 + mov srcin, x1 #include "copy_template.S" mov x0, #0 // Nothing to copy ret @@ -63,6 +65,11 @@ EXPORT_SYMBOL(__arch_copy_from_user) .section .fixup,"ax" .align 2 +9997: cmp dst, dstin + b.ne 9998f + // Before being absolutely sure we couldn't copy anything, try harder +USER(9998f, ldtrb tmp1w, [srcin]) + strb tmp1w, [dst], #1 9998: sub x0, end, dst // bytes not copied ret .previous diff --git a/arch/arm64/lib/copy_in_user.S b/arch/arm64/lib/copy_in_user.S index 80e37ada0ee1a501d5d4fb9fe87db9fc5853af2b..35c01da09323ee6de85494efed8e760fbe453206 100644 --- a/arch/arm64/lib/copy_in_user.S +++ b/arch/arm64/lib/copy_in_user.S @@ -30,33 +30,34 @@ .endm .macro ldrh1 reg, ptr, val - uao_user_alternative 9998f, ldrh, ldtrh, \reg, \ptr, \val + uao_user_alternative 9997f, ldrh, ldtrh, \reg, \ptr, \val .endm .macro strh1 reg, ptr, val - uao_user_alternative 9998f, strh, sttrh, \reg, \ptr, \val + uao_user_alternative 9997f, strh, sttrh, \reg, \ptr, \val .endm .macro ldr1 reg, ptr, val - uao_user_alternative 9998f, ldr, ldtr, \reg, \ptr, \val + uao_user_alternative 9997f, ldr, ldtr, \reg, \ptr, \val .endm .macro str1 reg, ptr, val - uao_user_alternative 9998f, str, sttr, \reg, \ptr, \val + uao_user_alternative 9997f, str, sttr, \reg, \ptr, \val .endm .macro ldp1 reg1, reg2, ptr, val - uao_ldp 9998f, \reg1, \reg2, \ptr, \val + uao_ldp 9997f, \reg1, \reg2, \ptr, \val .endm .macro stp1 reg1, reg2, ptr, val - uao_stp 9998f, \reg1, \reg2, \ptr, \val + uao_stp 9997f, \reg1, \reg2, \ptr, \val .endm end .req x5 - +srcin .req x15 SYM_FUNC_START(__arch_copy_in_user) add end, x0, x2 + mov srcin, x1 #include "copy_template.S" mov x0, #0 ret @@ -65,6 +66,12 @@ EXPORT_SYMBOL(__arch_copy_in_user) .section .fixup,"ax" .align 2 +9997: cmp dst, dstin + b.ne 9998f + // Before being absolutely sure we couldn't copy anything, try harder +USER(9998f, ldtrb tmp1w, [srcin]) +USER(9998f, sttrb tmp1w, [dst]) + add dst, dst, #1 9998: sub x0, end, dst // bytes not copied ret .previous diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S index 4ec59704b8f2d19933ec1f39ca48e76315309609..85705350ff35457c03320bce3192a626732cd16a 100644 --- a/arch/arm64/lib/copy_to_user.S +++ b/arch/arm64/lib/copy_to_user.S @@ -32,7 +32,7 @@ .endm .macro strh1 reg, ptr, val - uao_user_alternative 9998f, strh, sttrh, \reg, \ptr, \val + uao_user_alternative 9997f, strh, sttrh, \reg, \ptr, \val .endm .macro ldr1 reg, ptr, val @@ -40,7 +40,7 @@ .endm .macro str1 reg, ptr, val - uao_user_alternative 9998f, str, sttr, \reg, \ptr, \val + uao_user_alternative 9997f, str, sttr, \reg, \ptr, \val .endm .macro ldp1 reg1, reg2, ptr, val @@ -48,12 +48,14 @@ .endm .macro stp1 reg1, reg2, ptr, val - uao_stp 9998f, \reg1, \reg2, \ptr, \val + uao_stp 9997f, \reg1, \reg2, \ptr, \val .endm end .req x5 +srcin .req x15 SYM_FUNC_START(__arch_copy_to_user) add end, x0, x2 + mov srcin, x1 #include "copy_template.S" mov x0, #0 ret @@ -62,6 +64,12 @@ EXPORT_SYMBOL(__arch_copy_to_user) .section .fixup,"ax" .align 2 +9997: cmp dst, dstin + b.ne 9998f + // Before being absolutely sure we couldn't copy anything, try harder + ldrb tmp1w, [srcin] +USER(9998f, sttrb tmp1w, [dst]) + add dst, dst, #1 9998: sub x0, end, dst // bytes not copied ret .previous diff --git a/arch/arm64/lib/kasan_sw_tags.S b/arch/arm64/lib/kasan_sw_tags.S new file mode 100644 index 0000000000000000000000000000000000000000..5b04464c045eb552fc6416826ca9f4cd9b1820f8 --- /dev/null +++ b/arch/arm64/lib/kasan_sw_tags.S @@ -0,0 +1,76 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020 Google LLC + */ + +#include +#include + +/* + * Report a tag mismatch detected by tag-based KASAN. + * + * A compiler-generated thunk calls this with a non-AAPCS calling + * convention. Upon entry to this function, registers are as follows: + * + * x0: fault address (see below for restore) + * x1: fault description (see below for restore) + * x2 to x15: callee-saved + * x16 to x17: safe to clobber + * x18 to x30: callee-saved + * sp: pre-decremented by 256 bytes (see below for restore) + * + * The caller has decremented the SP by 256 bytes, and created a + * structure on the stack as follows: + * + * sp + 0..15: x0 and x1 to be restored + * sp + 16..231: free for use + * sp + 232..247: x29 and x30 (same as in GPRs) + * sp + 248..255: free for use + * + * Note that this is not a struct pt_regs. + * + * To call a regular AAPCS function we must save x2 to x15 (which we can + * store in the gaps), and create a frame record (for which we can use + * x29 and x30 spilled by the caller as those match the GPRs). + * + * The caller expects x0 and x1 to be restored from the structure, and + * for the structure to be removed from the stack (i.e. the SP must be + * incremented by 256 prior to return). + */ +SYM_CODE_START(__hwasan_tag_mismatch) +#ifdef BTI_C + BTI_C +#endif + add x29, sp, #232 + stp x2, x3, [sp, #8 * 2] + stp x4, x5, [sp, #8 * 4] + stp x6, x7, [sp, #8 * 6] + stp x8, x9, [sp, #8 * 8] + stp x10, x11, [sp, #8 * 10] + stp x12, x13, [sp, #8 * 12] + stp x14, x15, [sp, #8 * 14] +#ifndef CONFIG_SHADOW_CALL_STACK + str x18, [sp, #8 * 18] +#endif + + mov x2, x30 + bl kasan_tag_mismatch + + ldp x0, x1, [sp] + ldp x2, x3, [sp, #8 * 2] + ldp x4, x5, [sp, #8 * 4] + ldp x6, x7, [sp, #8 * 6] + ldp x8, x9, [sp, #8 * 8] + ldp x10, x11, [sp, #8 * 10] + ldp x12, x13, [sp, #8 * 12] + ldp x14, x15, [sp, #8 * 14] +#ifndef CONFIG_SHADOW_CALL_STACK + ldr x18, [sp, #8 * 18] +#endif + ldp x29, x30, [sp, #8 * 29] + + /* remove the structure from the stack */ + add sp, sp, #256 + ret +SYM_CODE_END(__hwasan_tag_mismatch) +EXPORT_SYMBOL(__hwasan_tag_mismatch) diff --git a/arch/arm64/lib/mte.S b/arch/arm64/lib/mte.S index ebb6337faaf92690a22ea7432bb1fd7464b47d6e..a754e15e629fa4fed2d6ab7045b1786cea5eea7d 100644 --- a/arch/arm64/lib/mte.S +++ b/arch/arm64/lib/mte.S @@ -43,17 +43,23 @@ SYM_FUNC_END(mte_clear_page_tags) * x0 - address to the beginning of the page */ SYM_FUNC_START(mte_zero_clear_page_tags) + and x0, x0, #(1 << MTE_TAG_SHIFT) - 1 // clear the tag mrs x1, dczid_el0 + tbnz x1, #4, 2f // Branch if DC GZVA is prohibited and w1, w1, #0xf mov x2, #4 lsl x1, x2, x1 - and x0, x0, #(1 << MTE_TAG_SHIFT) - 1 // clear the tag 1: dc gzva, x0 add x0, x0, x1 tst x0, #(PAGE_SIZE - 1) b.ne 1b ret + +2: stz2g x0, [x0], #(MTE_GRANULE_SIZE * 2) + tst x0, #(PAGE_SIZE - 1) + b.ne 2b + ret SYM_FUNC_END(mte_zero_clear_page_tags) /* diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile index 5ead3c3de3b6115deab05a1255d5bf3644871308..ca650b2dce43f1f00bfbe5b2eac449262c18d4ea 100644 --- a/arch/arm64/mm/Makefile +++ b/arch/arm64/mm/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 obj-y := dma-mapping.o extable.o fault.o init.o \ cache.o copypage.o flush.o \ - ioremap.o mmap.o pgd.o mmu.o \ + ioremap.o mem_encrypt.o mmap.o pgd.o mmu.o \ context.o proc.o pageattr.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_PTDUMP_CORE) += ptdump.o diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index 5051b3c1a4f1263fbd8b7619c88d5f76bed87867..7835c22db175065c0166dbf1e1028688c726781d 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -88,7 +88,7 @@ SYM_FUNC_END(caches_clean_inval_user_pou) * - start - virtual start address of region * - end - virtual end address of region */ -SYM_FUNC_START(icache_inval_pou) +SYM_FUNC_START_PI(icache_inval_pou) alternative_if ARM64_HAS_CACHE_DIC isb ret @@ -96,7 +96,7 @@ alternative_else_nop_endif invalidate_icache_by_line x0, x1, x2, x3 ret -SYM_FUNC_END(icache_inval_pou) +SYM_FUNC_END_PI(icache_inval_pou) /* * dcache_clean_inval_poc(start, end) diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index c834a6445842fcf79c19d00387bedf3100f53a46..deb45e8edf7ce9f7f9cbe817aed8ff23f1205fcf 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -52,7 +52,7 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, dev->dma_coherent = coherent; if (iommu) { iommu_setup_dma_ops(dev, dma_base, size); - trace_android_vh_iommu_setup_dma_ops(dev, dma_base, size); + trace_android_rvh_iommu_setup_dma_ops(dev, dma_base, size); } #ifdef CONFIG_XEN diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 37806735f7224050f4a0d9f4786beb57bb8fd026..9af359f68356102a1524b76b363f0c2c62abaa02 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -41,6 +41,7 @@ #include #include #include +#include #include @@ -253,6 +254,15 @@ static inline bool is_el1_permission_fault(unsigned long addr, unsigned int esr, return false; } +static bool is_pkvm_stage2_abort(unsigned int esr) +{ + /* + * S1PTW should only ever be set in ESR_EL1 if the pkvm hypervisor + * injected a stage-2 abort -- see host_inject_abort(). + */ + return is_pkvm_initialized() && (esr & ESR_ELx_S1PTW); +} + static bool __kprobes is_spurious_el1_translation_fault(unsigned long addr, unsigned int esr, struct pt_regs *regs) @@ -264,6 +274,9 @@ static bool __kprobes is_spurious_el1_translation_fault(unsigned long addr, (esr & ESR_ELx_FSC_TYPE) != ESR_ELx_FSC_FAULT) return false; + if (is_pkvm_stage2_abort(esr)) + return false; + local_irq_save(flags); asm volatile("at s1e1r, %0" :: "r" (addr)); isb(); @@ -306,24 +319,11 @@ static void die_kernel_fault(const char *msg, unsigned long addr, static void report_tag_fault(unsigned long addr, unsigned int esr, struct pt_regs *regs) { - static bool reported; - bool is_write; - - if (READ_ONCE(reported)) - return; - - /* - * This is used for KASAN tests and assumes that no MTE faults - * happened before running the tests. - */ - if (mte_report_once()) - WRITE_ONCE(reported, true); - /* * SAS bits aren't set for all faults reported in EL1, so we can't * find out access size. */ - is_write = !!(esr & ESR_ELx_WNR); + bool is_write = !!(esr & ESR_ELx_WNR); kasan_report(addr, 0, is_write, regs->pc); } #else @@ -392,6 +392,8 @@ static void __do_kernel_fault(unsigned long addr, unsigned int esr, msg = "read from unreadable memory"; } else if (addr < PAGE_SIZE) { msg = "NULL pointer dereference"; + } else if (is_pkvm_stage2_abort(esr)) { + msg = "access to hypervisor-protected memory"; } else { if (kfence_handle_page_fault(addr, esr & ESR_ELx_WNR, regs)) return; @@ -570,6 +572,13 @@ static int __kprobes do_page_fault(unsigned long far, unsigned int esr, addr, esr, regs); } + if (is_pkvm_stage2_abort(esr)) { + if (!user_mode(regs)) + goto no_context; + arm64_force_sig_fault(SIGSEGV, SEGV_ACCERR, far, "stage-2 fault"); + return 0; + } + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); /* diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 58987a98e17986811a6fa9803da47d0199fc3534..54009d45b11e1a5674098c633370cd3015bca0c2 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -43,7 +43,7 @@ void __init arm64_hugetlb_cma_reserve(void) #ifdef CONFIG_ARM64_4K_PAGES order = PUD_SHIFT - PAGE_SHIFT; #else - order = CONT_PMD_SHIFT + PMD_SHIFT - PAGE_SHIFT; + order = CONT_PMD_SHIFT - PAGE_SHIFT; #endif /* * HugeTLB CMA reservation is required for gigantic diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 71a12660f7472eeb3d469281d5b539c4ff604edc..33d93f800734a88ea568961ebfd403e6a9c36070 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -62,6 +62,12 @@ EXPORT_SYMBOL(memstart_addr); */ phys_addr_t arm64_dma_phys_limit __ro_after_init; +/* + * Provide a run-time mean of disabling ZONE_DMA32 if it is enabled via + * CONFIG_ZONE_DMA32. + */ +static bool disable_dma32 __ro_after_init; + #ifdef CONFIG_KEXEC_CORE /* * reserve_crashkernel() - reserves memory for crash kernel @@ -207,7 +213,7 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) max_zone_pfns[ZONE_DMA] = PFN_DOWN(arm64_dma_phys_limit); #endif #ifdef CONFIG_ZONE_DMA32 - max_zone_pfns[ZONE_DMA32] = PFN_DOWN(dma32_phys_limit); + max_zone_pfns[ZONE_DMA32] = disable_dma32 ? 0 : PFN_DOWN(dma32_phys_limit); if (!arm64_dma_phys_limit) arm64_dma_phys_limit = dma32_phys_limit; #endif @@ -218,6 +224,18 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) free_area_init(max_zone_pfns); } +static int __init early_disable_dma32(char *buf) +{ + if (!buf) + return -EINVAL; + + if (!strcmp(buf, "on")) + disable_dma32 = true; + + return 0; +} +early_param("disable_dma32", early_disable_dma32); + int pfn_valid(unsigned long pfn) { phys_addr_t addr = pfn << PAGE_SHIFT; diff --git a/arch/arm64/mm/ioremap.c b/arch/arm64/mm/ioremap.c index b5e83c46b23e7cf9cf5c3883e67527eb90dc60cd..d4a0a24fcfe16c4ddefc72fad5f5aa21cc9c4940 100644 --- a/arch/arm64/mm/ioremap.c +++ b/arch/arm64/mm/ioremap.c @@ -9,13 +9,175 @@ * Copyright (C) 2012 ARM Ltd. */ +#define pr_fmt(fmt) "ioremap: " fmt + #include #include #include +#include #include +#include #include #include +#include + +struct ioremap_guard_ref { + refcount_t count; +}; + +static DEFINE_STATIC_KEY_FALSE(ioremap_guard_key); +static DEFINE_XARRAY(ioremap_guard_array); +static DEFINE_MUTEX(ioremap_guard_lock); + +static bool ioremap_guard; +static int __init ioremap_guard_setup(char *str) +{ + ioremap_guard = true; + + return 0; +} +early_param("ioremap_guard", ioremap_guard_setup); + +static void fixup_fixmap(void) +{ + pte_t *ptep = __get_fixmap_pte(FIX_EARLYCON_MEM_BASE); + + if (!ptep) + return; + + ioremap_phys_range_hook(__pte_to_phys(*ptep), PAGE_SIZE, + __pgprot(pte_val(*ptep) & PTE_ATTRINDX_MASK)); +} + +void kvm_init_ioremap_services(void) +{ + struct arm_smccc_res res; + + if (!ioremap_guard) + return; + + /* We need all the functions to be implemented */ + if (!kvm_arm_hyp_service_available(ARM_SMCCC_KVM_FUNC_MMIO_GUARD_INFO) || + !kvm_arm_hyp_service_available(ARM_SMCCC_KVM_FUNC_MMIO_GUARD_ENROLL) || + !kvm_arm_hyp_service_available(ARM_SMCCC_KVM_FUNC_MMIO_GUARD_MAP) || + !kvm_arm_hyp_service_available(ARM_SMCCC_KVM_FUNC_MMIO_GUARD_UNMAP)) + return; + + arm_smccc_1_1_invoke(ARM_SMCCC_VENDOR_HYP_KVM_MMIO_GUARD_INFO_FUNC_ID, + 0, 0, 0, &res); + if (res.a0 != PAGE_SIZE) + return; + + arm_smccc_1_1_invoke(ARM_SMCCC_VENDOR_HYP_KVM_MMIO_GUARD_ENROLL_FUNC_ID, + &res); + if (res.a0 == SMCCC_RET_SUCCESS) { + static_branch_enable(&ioremap_guard_key); + fixup_fixmap(); + pr_info("Using KVM MMIO guard for ioremap\n"); + } else { + pr_warn("KVM MMIO guard registration failed (%ld)\n", res.a0); + } +} + +void ioremap_phys_range_hook(phys_addr_t phys_addr, size_t size, pgprot_t prot) +{ + if (!static_branch_unlikely(&ioremap_guard_key)) + return; + + if (pfn_valid(__phys_to_pfn(phys_addr))) + return; + + mutex_lock(&ioremap_guard_lock); + + while (size) { + u64 pfn = phys_addr >> PAGE_SHIFT; + struct ioremap_guard_ref *ref; + struct arm_smccc_res res; + + ref = xa_load(&ioremap_guard_array, pfn); + if (ref) { + refcount_inc(&ref->count); + goto next; + } + + /* + * It is acceptable for the allocation to fail, specially + * if trying to ioremap something very early on, like with + * earlycon, which happens long before kmem_cache_init. + * This page will be permanently accessible, similar to a + * saturated refcount. + */ + ref = kzalloc(sizeof(*ref), GFP_KERNEL); + if (ref) { + refcount_set(&ref->count, 1); + if (xa_err(xa_store(&ioremap_guard_array, pfn, ref, + GFP_KERNEL))) { + kfree(ref); + ref = NULL; + } + } + + arm_smccc_1_1_hvc(ARM_SMCCC_VENDOR_HYP_KVM_MMIO_GUARD_MAP_FUNC_ID, + phys_addr, prot, &res); + if (res.a0 != SMCCC_RET_SUCCESS) { + pr_warn_ratelimited("Failed to register %llx\n", + phys_addr); + xa_erase(&ioremap_guard_array, pfn); + kfree(ref); + goto out; + } + + next: + size -= PAGE_SIZE; + phys_addr += PAGE_SIZE; + } +out: + mutex_unlock(&ioremap_guard_lock); +} + +void iounmap_phys_range_hook(phys_addr_t phys_addr, size_t size) +{ + if (!static_branch_unlikely(&ioremap_guard_key)) + return; + + VM_BUG_ON(phys_addr & ~PAGE_MASK || size & ~PAGE_MASK); + + mutex_lock(&ioremap_guard_lock); + + while (size) { + u64 pfn = phys_addr >> PAGE_SHIFT; + struct ioremap_guard_ref *ref; + struct arm_smccc_res res; + + ref = xa_load(&ioremap_guard_array, pfn); + if (!ref) { + pr_warn_ratelimited("%llx not tracked, left mapped\n", + phys_addr); + goto next; + } + + if (!refcount_dec_and_test(&ref->count)) + goto next; + + xa_erase(&ioremap_guard_array, pfn); + kfree(ref); + + arm_smccc_1_1_hvc(ARM_SMCCC_VENDOR_HYP_KVM_MMIO_GUARD_UNMAP_FUNC_ID, + phys_addr, &res); + if (res.a0 != SMCCC_RET_SUCCESS) { + pr_warn_ratelimited("Failed to unregister %llx\n", + phys_addr); + goto out; + } + + next: + size -= PAGE_SIZE; + phys_addr += PAGE_SIZE; + } +out: + mutex_unlock(&ioremap_guard_lock); +} static void __iomem *__ioremap_caller(phys_addr_t phys_addr, size_t size, pgprot_t prot, void *caller) diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c index d8e66c78440ec38a9a8d2e9304a0f01dfc3cbd00..dbe4101d038698b25cde858b472be808b3eeeb93 100644 --- a/arch/arm64/mm/kasan_init.c +++ b/arch/arm64/mm/kasan_init.c @@ -36,7 +36,7 @@ static phys_addr_t __init kasan_alloc_zeroed_page(int node) { void *p = memblock_alloc_try_nid(PAGE_SIZE, PAGE_SIZE, __pa(MAX_DMA_ADDRESS), - MEMBLOCK_ALLOC_KASAN, node); + MEMBLOCK_ALLOC_NOLEAKTRACE, node); if (!p) panic("%s: Failed to allocate %lu bytes align=0x%lx nid=%d from=%llx\n", __func__, PAGE_SIZE, PAGE_SIZE, node, @@ -49,7 +49,8 @@ static phys_addr_t __init kasan_alloc_raw_page(int node) { void *p = memblock_alloc_try_nid_raw(PAGE_SIZE, PAGE_SIZE, __pa(MAX_DMA_ADDRESS), - MEMBLOCK_ALLOC_KASAN, node); + MEMBLOCK_ALLOC_NOLEAKTRACE, + node); if (!p) panic("%s: Failed to allocate %lu bytes align=0x%lx nid=%d from=%llx\n", __func__, PAGE_SIZE, PAGE_SIZE, node, @@ -79,7 +80,7 @@ static pmd_t *__init kasan_pmd_offset(pud_t *pudp, unsigned long addr, int node, phys_addr_t pmd_phys = early ? __pa_symbol(kasan_early_shadow_pmd) : kasan_alloc_zeroed_page(node); - __pud_populate(pudp, pmd_phys, PMD_TYPE_TABLE); + __pud_populate(pudp, pmd_phys, PUD_TYPE_TABLE); } return early ? pmd_offset_kimg(pudp, addr) : pmd_offset(pudp, addr); @@ -92,7 +93,7 @@ static pud_t *__init kasan_pud_offset(p4d_t *p4dp, unsigned long addr, int node, phys_addr_t pud_phys = early ? __pa_symbol(kasan_early_shadow_pud) : kasan_alloc_zeroed_page(node); - __p4d_populate(p4dp, pud_phys, PMD_TYPE_TABLE); + __p4d_populate(p4dp, pud_phys, P4D_TYPE_TABLE); } return early ? pud_offset_kimg(p4dp, addr) : pud_offset(p4dp, addr); @@ -214,15 +215,18 @@ static void __init kasan_init_shadow(void) { u64 kimg_shadow_start, kimg_shadow_end; u64 mod_shadow_start, mod_shadow_end; + u64 vmalloc_shadow_end; phys_addr_t pa_start, pa_end; u64 i; - kimg_shadow_start = (u64)kasan_mem_to_shadow(_text) & PAGE_MASK; - kimg_shadow_end = PAGE_ALIGN((u64)kasan_mem_to_shadow(_end)); + kimg_shadow_start = (u64)kasan_mem_to_shadow(KERNEL_START) & PAGE_MASK; + kimg_shadow_end = PAGE_ALIGN((u64)kasan_mem_to_shadow(KERNEL_END)); mod_shadow_start = (u64)kasan_mem_to_shadow((void *)MODULES_VADDR); mod_shadow_end = (u64)kasan_mem_to_shadow((void *)MODULES_END); + vmalloc_shadow_end = (u64)kasan_mem_to_shadow((void *)VMALLOC_END); + /* * We are going to perform proper setup of shadow memory. * At first we should unmap early shadow (clear_pgds() call below). @@ -237,16 +241,22 @@ static void __init kasan_init_shadow(void) clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END); kasan_map_populate(kimg_shadow_start, kimg_shadow_end, - early_pfn_to_nid(virt_to_pfn(lm_alias(_text)))); + early_pfn_to_nid(virt_to_pfn(lm_alias(KERNEL_START)))); kasan_populate_early_shadow(kasan_mem_to_shadow((void *)PAGE_END), (void *)mod_shadow_start); - kasan_populate_early_shadow((void *)kimg_shadow_end, - (void *)KASAN_SHADOW_END); - if (kimg_shadow_start > mod_shadow_end) - kasan_populate_early_shadow((void *)mod_shadow_end, - (void *)kimg_shadow_start); + if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) { + BUILD_BUG_ON(VMALLOC_START != MODULES_END); + kasan_populate_early_shadow((void *)vmalloc_shadow_end, + (void *)KASAN_SHADOW_END); + } else { + kasan_populate_early_shadow((void *)kimg_shadow_end, + (void *)KASAN_SHADOW_END); + if (kimg_shadow_start > mod_shadow_end) + kasan_populate_early_shadow((void *)mod_shadow_end, + (void *)kimg_shadow_start); + } for_each_mem_range(i, &pa_start, &pa_end) { void *start = (void *)__phys_to_virt(pa_start); @@ -284,7 +294,7 @@ void __init kasan_init(void) kasan_init_depth(); #if defined(CONFIG_KASAN_GENERIC) /* CONFIG_KASAN_SW_TAGS also requires kasan_init_sw_tags(). */ - pr_info("KernelAddressSanitizer initialized\n"); + pr_info("KernelAddressSanitizer initialized (generic)\n"); #endif } diff --git a/arch/arm64/mm/mem_encrypt.c b/arch/arm64/mm/mem_encrypt.c new file mode 100644 index 0000000000000000000000000000000000000000..ff75dd3200bc6d5fee272d94161a606950fe896e --- /dev/null +++ b/arch/arm64/mm/mem_encrypt.c @@ -0,0 +1,102 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Implementation of the memory encryption/decryption API. + * + * Amusingly, no crypto is actually performed. Rather, we call into the + * hypervisor component of KVM to expose pages selectively to the host + * for virtio "DMA" operations. In other words, "encrypted" pages are + * not accessible to the host, whereas "decrypted" pages are. + * + * Author: Will Deacon + */ +#include +#include +#include +#include +#include +#include + +#include + +static unsigned long memshare_granule_sz; + +bool mem_encrypt_active(void) +{ + return memshare_granule_sz; +} +EXPORT_SYMBOL(mem_encrypt_active); + +void kvm_init_memshare_services(void) +{ + int i; + struct arm_smccc_res res; + const u32 funcs[] = { + ARM_SMCCC_KVM_FUNC_HYP_MEMINFO, + ARM_SMCCC_KVM_FUNC_MEM_SHARE, + ARM_SMCCC_KVM_FUNC_MEM_UNSHARE, + }; + + for (i = 0; i < ARRAY_SIZE(funcs); ++i) { + if (!kvm_arm_hyp_service_available(funcs[i])) + return; + } + + arm_smccc_1_1_invoke(ARM_SMCCC_VENDOR_HYP_KVM_HYP_MEMINFO_FUNC_ID, + 0, 0, 0, &res); + if (res.a0 > PAGE_SIZE) /* Includes error codes */ + return; + + memshare_granule_sz = res.a0; +} + +static int arm_smccc_share_unshare_page(u32 func_id, phys_addr_t phys) +{ + phys_addr_t end = phys + PAGE_SIZE; + + while (phys < end) { + struct arm_smccc_res res; + + arm_smccc_1_1_invoke(func_id, phys, 0, 0, &res); + if (res.a0 != SMCCC_RET_SUCCESS) + return -EPERM; + + phys += memshare_granule_sz; + } + + return 0; +} + +static int set_memory_xcrypted(u32 func_id, unsigned long start, int numpages) +{ + void *addr = (void *)start, *end = addr + numpages * PAGE_SIZE; + + while (addr < end) { + int err; + + err = arm_smccc_share_unshare_page(func_id, virt_to_phys(addr)); + if (err) + return err; + + addr += PAGE_SIZE; + } + + return 0; +} + +int set_memory_encrypted(unsigned long addr, int numpages) +{ + if (!memshare_granule_sz || WARN_ON(!PAGE_ALIGNED(addr))) + return 0; + + return set_memory_xcrypted(ARM_SMCCC_VENDOR_HYP_KVM_MEM_UNSHARE_FUNC_ID, + addr, numpages); +} + +int set_memory_decrypted(unsigned long addr, int numpages) +{ + if (!memshare_granule_sz || WARN_ON(!PAGE_ALIGNED(addr))) + return 0; + + return set_memory_xcrypted(ARM_SMCCC_VENDOR_HYP_KVM_MEM_SHARE_FUNC_ID, + addr, numpages); +} diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 57d4539a7d2ca9cecd5332b77a75e36f7361fa0c..52483efde97f2984d9c029ecb82b4a8888bf5268 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -94,7 +94,8 @@ static phys_addr_t __init early_pgtable_alloc(int shift) phys_addr_t phys; void *ptr; - phys = memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE); + phys = memblock_phys_alloc_range(PAGE_SIZE, PAGE_SIZE, 0, + MEMBLOCK_ALLOC_NOLEAKTRACE); if (!phys) panic("Failed to allocate page table page\n"); @@ -309,7 +310,7 @@ static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end, phys_addr_t pud_phys; BUG_ON(!pgtable_alloc); pud_phys = pgtable_alloc(PUD_SHIFT); - __p4d_populate(p4dp, pud_phys, PUD_TYPE_TABLE); + __p4d_populate(p4dp, pud_phys, P4D_TYPE_TABLE); p4d = READ_ONCE(*p4dp); } BUG_ON(p4d_bad(p4d)); @@ -593,6 +594,8 @@ early_param("rodata", parse_rodata); #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 static int __init map_entry_trampoline(void) { + int i; + pgprot_t prot = rodata_enabled ? PAGE_KERNEL_ROX : PAGE_KERNEL_EXEC; phys_addr_t pa_start = __pa_symbol(__entry_tramp_text_start); @@ -601,11 +604,15 @@ static int __init map_entry_trampoline(void) /* Map only the text into the trampoline page table */ memset(tramp_pg_dir, 0, PGD_SIZE); - __create_pgd_mapping(tramp_pg_dir, pa_start, TRAMP_VALIAS, PAGE_SIZE, - prot, __pgd_pgtable_alloc, 0); + __create_pgd_mapping(tramp_pg_dir, pa_start, TRAMP_VALIAS, + entry_tramp_text_size(), prot, + __pgd_pgtable_alloc, NO_BLOCK_MAPPINGS); /* Map both the text and data into the kernel page table */ - __set_fixmap(FIX_ENTRY_TRAMP_TEXT, pa_start, prot); + for (i = 0; i < DIV_ROUND_UP(entry_tramp_text_size(), PAGE_SIZE); i++) + __set_fixmap(FIX_ENTRY_TRAMP_TEXT1 - i, + pa_start + i * PAGE_SIZE, prot); + if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) { extern char __entry_tramp_data_start[]; @@ -1209,11 +1216,11 @@ void __init early_fixmap_init(void) pudp = pud_offset_kimg(p4dp, addr); } else { if (p4d_none(p4d)) - __p4d_populate(p4dp, __pa_symbol(bm_pud), PUD_TYPE_TABLE); + __p4d_populate(p4dp, __pa_symbol(bm_pud), P4D_TYPE_TABLE); pudp = fixmap_pud(addr); } if (pud_none(READ_ONCE(*pudp))) - __pud_populate(pudp, __pa_symbol(bm_pmd), PMD_TYPE_TABLE); + __pud_populate(pudp, __pa_symbol(bm_pmd), PUD_TYPE_TABLE); pmdp = fixmap_pmd(addr); __pmd_populate(pmdp, __pa_symbol(bm_pte), PMD_TYPE_TABLE); @@ -1262,6 +1269,21 @@ void __set_fixmap(enum fixed_addresses idx, } } +pte_t *__get_fixmap_pte(enum fixed_addresses idx) +{ + unsigned long addr = __fix_to_virt(idx); + pte_t *ptep; + + BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses); + + ptep = fixmap_pte(addr); + + if (!pte_valid(*ptep)) + return NULL; + + return ptep; +} + void *__init fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot) { const u64 dt_virt_base = __fix_to_virt(FIX_FDT); @@ -1501,6 +1523,11 @@ int arch_add_memory(int nid, u64 start, u64 size, if (ret) __remove_pgd_mapping(swapper_pg_dir, __phys_to_virt(start), size); + else { + max_pfn = PFN_UP(start + size); + max_low_pfn = max_pfn; + } + return ret; } diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c index 1b94f5b8265451f7160d158324230cb2c1c7f919..8cacfbc2932bd0c60eb6cf1a8696b2c50fa2eaeb 100644 --- a/arch/arm64/mm/pageattr.c +++ b/arch/arm64/mm/pageattr.c @@ -80,7 +80,7 @@ static int change_memory_common(unsigned long addr, int numpages, */ area = find_vm_area((void *)addr); if (!area || - end > (unsigned long)area->addr + area->size || + end > (unsigned long)kasan_reset_tag(area->addr) + area->size || !(area->flags & VM_ALLOC)) return -EINVAL; diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 6e175dbbb474230edf0bc98dae93938e79b51d59..c188a9eb3ea66c7bc6c483ab650def3bb5c144c1 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -445,8 +445,7 @@ SYM_FUNC_START(__cpu_setup) mov x10, #MAIR_ATTR_NORMAL_TAGGED bfi x5, x10, #(8 * MT_NORMAL_TAGGED), #8 - /* initialize GCR_EL1: all non-zero tags excluded by default */ - mov x10, #(SYS_GCR_EL1_RRND | SYS_GCR_EL1_EXCL_MASK) + mov x10, #KERNEL_GCR_EL1 msr_s SYS_GCR_EL1, x10 /* diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 4b5dbde3ff11c05ca0effc2e4b1b4bff348b2bea..8e1d5a37e5c94e14c283031607014c889fc35818 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -1137,12 +1137,19 @@ out: return prog; } +u64 bpf_jit_alloc_exec_limit(void) +{ + return BPF_JIT_REGION_SIZE; +} + void *bpf_jit_alloc_exec(unsigned long size) { - return __vmalloc_node_range(size, PAGE_SIZE, BPF_JIT_REGION_START, - BPF_JIT_REGION_END, GFP_KERNEL, - PAGE_KERNEL, 0, NUMA_NO_NODE, - __builtin_return_address(0)); + void *p = __vmalloc_node_range(size, PAGE_SIZE, BPF_JIT_REGION_START, + BPF_JIT_REGION_END, GFP_KERNEL, + PAGE_KERNEL, 0, NUMA_NO_NODE, + __builtin_return_address(0)); + /* Memory is intended to be executable, reset the pointer tag. */ + return kasan_reset_tag(p); } void bpf_jit_free_exec(void *addr) diff --git a/arch/csky/kernel/perf_callchain.c b/arch/csky/kernel/perf_callchain.c index ab55e98ee8f62dc3f98f4d6529d7c05775670878..35318a635a5fae7b6de24723f6f43338bab85b67 100644 --- a/arch/csky/kernel/perf_callchain.c +++ b/arch/csky/kernel/perf_callchain.c @@ -86,10 +86,11 @@ static unsigned long user_backtrace(struct perf_callchain_entry_ctx *entry, void perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { + struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs(); unsigned long fp = 0; /* C-SKY does not support virtualization. */ - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) + if (guest_cbs && guest_cbs->is_in_guest()) return; fp = regs->regs[4]; @@ -110,10 +111,11 @@ void perf_callchain_user(struct perf_callchain_entry_ctx *entry, void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { + struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs(); struct stackframe fr; /* C-SKY does not support virtualization. */ - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + if (guest_cbs && guest_cbs->is_in_guest()) { pr_warn("C-SKY does not support perf in guest mode!"); return; } diff --git a/arch/csky/kernel/ptrace.c b/arch/csky/kernel/ptrace.c index a4cf2e2ac15acd4364e46e618fa82a02d6ff0697..ac07695bc5418adc12a616be55d8533894bc3305 100644 --- a/arch/csky/kernel/ptrace.c +++ b/arch/csky/kernel/ptrace.c @@ -98,7 +98,8 @@ static int gpr_set(struct task_struct *target, if (ret) return ret; - regs.sr = task_pt_regs(target)->sr; + /* BIT(0) of regs.sr is Condition Code/Carry bit */ + regs.sr = (regs.sr & BIT(0)) | (task_pt_regs(target)->sr & ~BIT(0)); #ifdef CONFIG_CPU_HAS_HILO regs.dcsr = task_pt_regs(target)->dcsr; #endif diff --git a/arch/csky/kernel/signal.c b/arch/csky/kernel/signal.c index 8b068cf37447839152227f3df7629a06fcfa6ae9..0ca49b5e3dd378145dda5f3fbbf52b5587ed0275 100644 --- a/arch/csky/kernel/signal.c +++ b/arch/csky/kernel/signal.c @@ -52,10 +52,14 @@ static long restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc) { int err = 0; + unsigned long sr = regs->sr; /* sc_pt_regs is structured the same as the start of pt_regs */ err |= __copy_from_user(regs, &sc->sc_pt_regs, sizeof(struct pt_regs)); + /* BIT(0) of regs->sr is Condition Code/Carry bit */ + regs->sr = (sr & ~1) | (regs->sr & 1); + /* Restore the floating-point state. */ err |= restore_fpu_state(sc); diff --git a/arch/csky/kernel/smp.c b/arch/csky/kernel/smp.c index 041d0de6a1b6771de342bf1ed74292f93f87d872..1a8d7eaf1ff718a71788f644fa2aa5de105f1466 100644 --- a/arch/csky/kernel/smp.c +++ b/arch/csky/kernel/smp.c @@ -282,7 +282,6 @@ void csky_start_secondary(void) pr_info("CPU%u Online: %s...\n", cpu, __func__); local_irq_enable(); - preempt_disable(); cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); } diff --git a/arch/csky/kernel/traps.c b/arch/csky/kernel/traps.c index 959a917c989d85c0587f67a39e404899c929612a..22721468a04b28acee2f4795c165586f1d1c3315 100644 --- a/arch/csky/kernel/traps.c +++ b/arch/csky/kernel/traps.c @@ -211,7 +211,7 @@ asmlinkage void do_trap_illinsn(struct pt_regs *regs) asmlinkage void do_trap_fpe(struct pt_regs *regs) { -#ifdef CONFIG_CPU_HAS_FP +#ifdef CONFIG_CPU_HAS_FPU return fpu_fpe(regs); #else do_trap_error(regs, SIGILL, ILL_ILLOPC, regs->pc, @@ -221,7 +221,7 @@ asmlinkage void do_trap_fpe(struct pt_regs *regs) asmlinkage void do_trap_priv(struct pt_regs *regs) { -#ifdef CONFIG_CPU_HAS_FP +#ifdef CONFIG_CPU_HAS_FPU if (user_mode(regs) && fpu_libc_helper(regs)) return; #endif diff --git a/arch/hexagon/include/asm/timer-regs.h b/arch/hexagon/include/asm/timer-regs.h deleted file mode 100644 index ee6c61423a0589d005ec4d14c7c918a7c0e15d98..0000000000000000000000000000000000000000 --- a/arch/hexagon/include/asm/timer-regs.h +++ /dev/null @@ -1,26 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Timer support for Hexagon - * - * Copyright (c) 2010-2011, The Linux Foundation. All rights reserved. - */ - -#ifndef _ASM_TIMER_REGS_H -#define _ASM_TIMER_REGS_H - -/* This stuff should go into a platform specific file */ -#define TCX0_CLK_RATE 19200 -#define TIMER_ENABLE 0 -#define TIMER_CLR_ON_MATCH 1 - -/* - * 8x50 HDD Specs 5-8. Simulator co-sim not fixed until - * release 1.1, and then it's "adjustable" and probably not defaulted. - */ -#define RTOS_TIMER_INT 3 -#ifdef CONFIG_HEXAGON_COMET -#define RTOS_TIMER_REGS_ADDR 0xAB000000UL -#endif -#define SLEEP_CLK_RATE 32000 - -#endif diff --git a/arch/hexagon/include/asm/timex.h b/arch/hexagon/include/asm/timex.h index 8d4ec76fceb458ab3f21e4fa858fae264d1ef166..dfe69e118b2beb238aaac1700f2622530f077eee 100644 --- a/arch/hexagon/include/asm/timex.h +++ b/arch/hexagon/include/asm/timex.h @@ -7,11 +7,10 @@ #define _ASM_TIMEX_H #include -#include #include /* Using TCX0 as our clock. CLOCK_TICK_RATE scheduled to be removed. */ -#define CLOCK_TICK_RATE TCX0_CLK_RATE +#define CLOCK_TICK_RATE 19200 #define ARCH_HAS_READ_CURRENT_TIMER diff --git a/arch/hexagon/kernel/time.c b/arch/hexagon/kernel/time.c index feffe527ac92939311fe61ee1d247572752ce41d..febc95714d756de139f526ee0771c5da8773ed16 100644 --- a/arch/hexagon/kernel/time.c +++ b/arch/hexagon/kernel/time.c @@ -17,9 +17,10 @@ #include #include -#include #include +#define TIMER_ENABLE BIT(0) + /* * For the clocksource we need: * pcycle frequency (600MHz) @@ -33,6 +34,13 @@ cycles_t pcycle_freq_mhz; cycles_t thread_freq_mhz; cycles_t sleep_clk_freq; +/* + * 8x50 HDD Specs 5-8. Simulator co-sim not fixed until + * release 1.1, and then it's "adjustable" and probably not defaulted. + */ +#define RTOS_TIMER_INT 3 +#define RTOS_TIMER_REGS_ADDR 0xAB000000UL + static struct resource rtos_timer_resources[] = { { .start = RTOS_TIMER_REGS_ADDR, @@ -80,7 +88,7 @@ static int set_next_event(unsigned long delta, struct clock_event_device *evt) iowrite32(0, &rtos_timer->clear); iowrite32(delta, &rtos_timer->match); - iowrite32(1 << TIMER_ENABLE, &rtos_timer->enable); + iowrite32(TIMER_ENABLE, &rtos_timer->enable); return 0; } diff --git a/arch/hexagon/lib/io.c b/arch/hexagon/lib/io.c index d35d69d6588c4a182106f4951be34c4826f99211..55f75392857b00717bc4999bd491319743b8a5a2 100644 --- a/arch/hexagon/lib/io.c +++ b/arch/hexagon/lib/io.c @@ -27,6 +27,7 @@ void __raw_readsw(const void __iomem *addr, void *data, int len) *dst++ = *src; } +EXPORT_SYMBOL(__raw_readsw); /* * __raw_writesw - read words a short at a time @@ -47,6 +48,7 @@ void __raw_writesw(void __iomem *addr, const void *data, int len) } +EXPORT_SYMBOL(__raw_writesw); /* Pretty sure len is pre-adjusted for the length of the access already */ void __raw_readsl(const void __iomem *addr, void *data, int len) @@ -62,6 +64,7 @@ void __raw_readsl(const void __iomem *addr, void *data, int len) } +EXPORT_SYMBOL(__raw_readsl); void __raw_writesl(void __iomem *addr, const void *data, int len) { @@ -76,3 +79,4 @@ void __raw_writesl(void __iomem *addr, const void *data, int len) } +EXPORT_SYMBOL(__raw_writesl); diff --git a/arch/ia64/Kconfig.debug b/arch/ia64/Kconfig.debug index 40ca23bd228d69a8e16a0aa060ef0ab3e2b909f6..2ce008e2d1644b84b5b99b51c2c5be8098d21d70 100644 --- a/arch/ia64/Kconfig.debug +++ b/arch/ia64/Kconfig.debug @@ -39,7 +39,7 @@ config DISABLE_VHPT config IA64_DEBUG_CMPXCHG bool "Turn on compare-and-exchange bug checking (slow!)" - depends on DEBUG_KERNEL + depends on DEBUG_KERNEL && PRINTK help Selecting this option turns on bug checking for the IA-64 compare-and-exchange instructions. This is slow! Itaniums diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index a5636524af7693a5f7900c3e92f87a31d9e0323c..e2af6b172200ed40ddd2e7aee4de301c0a6c9feb 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c @@ -446,7 +446,8 @@ void __init acpi_numa_fixup(void) if (srat_num_cpus == 0) { node_set_online(0); node_cpuid[0].phys_id = hard_smp_processor_id(); - return; + slit_distance(0, 0) = LOCAL_DISTANCE; + goto out; } /* @@ -489,7 +490,7 @@ void __init acpi_numa_fixup(void) for (j = 0; j < MAX_NUMNODES; j++) slit_distance(i, j) = i == j ? LOCAL_DISTANCE : REMOTE_DISTANCE; - return; + goto out; } memset(numa_slit, -1, sizeof(numa_slit)); @@ -514,6 +515,8 @@ void __init acpi_numa_fixup(void) printk("\n"); } #endif +out: + node_possible_map = node_online_map; } #endif /* CONFIG_ACPI_NUMA */ diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c index fc1ff8a4d7de6c0b6abf22bc4f4e97e9d47cb33a..ca4b4fa45aef51686c34557f96d338270785eab1 100644 --- a/arch/ia64/kernel/kprobes.c +++ b/arch/ia64/kernel/kprobes.c @@ -398,7 +398,8 @@ static void kretprobe_trampoline(void) int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) { - regs->cr_iip = __kretprobe_trampoline_handler(regs, kretprobe_trampoline, NULL); + regs->cr_iip = __kretprobe_trampoline_handler(regs, + dereference_function_descriptor(kretprobe_trampoline), NULL); /* * By returning a non-zero value, we are telling * kprobe_handler() that we don't want the post_handler @@ -414,7 +415,7 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, ri->fp = NULL; /* Replace the return addr with trampoline addr */ - regs->b0 = ((struct fnptr *)kretprobe_trampoline)->ip; + regs->b0 = (unsigned long)dereference_function_descriptor(kretprobe_trampoline); } /* Check the instruction in the slot is break */ @@ -918,14 +919,14 @@ static struct kprobe trampoline_p = { int __init arch_init_kprobes(void) { trampoline_p.addr = - (kprobe_opcode_t *)((struct fnptr *)kretprobe_trampoline)->ip; + dereference_function_descriptor(kretprobe_trampoline); return register_kprobe(&trampoline_p); } int __kprobes arch_trampoline_kprobe(struct kprobe *p) { if (p->addr == - (kprobe_opcode_t *)((struct fnptr *)kretprobe_trampoline)->ip) + dereference_function_descriptor(kretprobe_trampoline)) return 1; return 0; diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c index 093040f7e626a1ef79aaed1cdfa17f513808cc2a..0cad990385c04acb239d074a710727b1e8455bf0 100644 --- a/arch/ia64/kernel/smpboot.c +++ b/arch/ia64/kernel/smpboot.c @@ -440,7 +440,6 @@ start_secondary (void *unused) #endif efi_map_pal_code(); cpu_init(); - preempt_disable(); smp_callin(); cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl index b96ed8b8a508999112a1d8065db035d90efaec0c..f2b90e45c9c2836e35f11d33e21eba758c22d7c9 100644 --- a/arch/ia64/kernel/syscalls/syscall.tbl +++ b/arch/ia64/kernel/syscalls/syscall.tbl @@ -361,3 +361,5 @@ 438 common pidfd_getfd sys_pidfd_getfd 439 common faccessat2 sys_faccessat2 440 common process_madvise sys_process_madvise +# 447 reserved for memfd_secret +448 common process_mrelease sys_process_mrelease diff --git a/arch/m68k/Kconfig.machine b/arch/m68k/Kconfig.machine index e161a4e1493b42a2064319c972d5e41eac9706f5..51a878803fb6dcb09f5a8722739d6fb7694b5d6c 100644 --- a/arch/m68k/Kconfig.machine +++ b/arch/m68k/Kconfig.machine @@ -191,6 +191,7 @@ config INIT_LCD config MEMORY_RESERVE int "Memory reservation (MiB)" depends on (UCSIMM || UCDIMM) + default 0 help Reserve certain memory regions on 68x328 based boards. diff --git a/arch/m68k/kernel/signal.c b/arch/m68k/kernel/signal.c index 46f91e0f6a0820715ac37fd7f3e320782725a8df..fd916844a683f9393d4b601d79785451726d75cc 100644 --- a/arch/m68k/kernel/signal.c +++ b/arch/m68k/kernel/signal.c @@ -447,7 +447,7 @@ static inline void save_fpu_state(struct sigcontext *sc, struct pt_regs *regs) if (CPU_IS_060 ? sc->sc_fpstate[2] : sc->sc_fpstate[0]) { fpu_version = sc->sc_fpstate[0]; - if (CPU_IS_020_OR_030 && + if (CPU_IS_020_OR_030 && !regs->stkadj && regs->vector >= (VEC_FPBRUC * 4) && regs->vector <= (VEC_FPNAN * 4)) { /* Clear pending exception in 68882 idle frame */ @@ -510,7 +510,7 @@ static inline int rt_save_fpu_state(struct ucontext __user *uc, struct pt_regs * if (!(CPU_IS_060 || CPU_IS_COLDFIRE)) context_size = fpstate[1]; fpu_version = fpstate[0]; - if (CPU_IS_020_OR_030 && + if (CPU_IS_020_OR_030 && !regs->stkadj && regs->vector >= (VEC_FPBRUC * 4) && regs->vector <= (VEC_FPNAN * 4)) { /* Clear pending exception in 68882 idle frame */ @@ -828,18 +828,24 @@ badframe: return 0; } +static inline struct pt_regs *rte_regs(struct pt_regs *regs) +{ + return (void *)regs + regs->stkadj; +} + static void setup_sigcontext(struct sigcontext *sc, struct pt_regs *regs, unsigned long mask) { + struct pt_regs *tregs = rte_regs(regs); sc->sc_mask = mask; sc->sc_usp = rdusp(); sc->sc_d0 = regs->d0; sc->sc_d1 = regs->d1; sc->sc_a0 = regs->a0; sc->sc_a1 = regs->a1; - sc->sc_sr = regs->sr; - sc->sc_pc = regs->pc; - sc->sc_formatvec = regs->format << 12 | regs->vector; + sc->sc_sr = tregs->sr; + sc->sc_pc = tregs->pc; + sc->sc_formatvec = tregs->format << 12 | tregs->vector; save_a5_state(sc, regs); save_fpu_state(sc, regs); } @@ -847,6 +853,7 @@ static void setup_sigcontext(struct sigcontext *sc, struct pt_regs *regs, static inline int rt_setup_ucontext(struct ucontext __user *uc, struct pt_regs *regs) { struct switch_stack *sw = (struct switch_stack *)regs - 1; + struct pt_regs *tregs = rte_regs(regs); greg_t __user *gregs = uc->uc_mcontext.gregs; int err = 0; @@ -867,9 +874,9 @@ static inline int rt_setup_ucontext(struct ucontext __user *uc, struct pt_regs * err |= __put_user(sw->a5, &gregs[13]); err |= __put_user(sw->a6, &gregs[14]); err |= __put_user(rdusp(), &gregs[15]); - err |= __put_user(regs->pc, &gregs[16]); - err |= __put_user(regs->sr, &gregs[17]); - err |= __put_user((regs->format << 12) | regs->vector, &uc->uc_formatvec); + err |= __put_user(tregs->pc, &gregs[16]); + err |= __put_user(tregs->sr, &gregs[17]); + err |= __put_user((tregs->format << 12) | tregs->vector, &uc->uc_formatvec); err |= rt_save_fpu_state(uc, regs); return err; } @@ -886,13 +893,14 @@ static int setup_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs) { struct sigframe __user *frame; - int fsize = frame_extra_sizes(regs->format); + struct pt_regs *tregs = rte_regs(regs); + int fsize = frame_extra_sizes(tregs->format); struct sigcontext context; int err = 0, sig = ksig->sig; if (fsize < 0) { pr_debug("setup_frame: Unknown frame format %#x\n", - regs->format); + tregs->format); return -EFAULT; } @@ -903,7 +911,7 @@ static int setup_frame(struct ksignal *ksig, sigset_t *set, err |= __put_user(sig, &frame->sig); - err |= __put_user(regs->vector, &frame->code); + err |= __put_user(tregs->vector, &frame->code); err |= __put_user(&frame->sc, &frame->psc); if (_NSIG_WORDS > 1) @@ -929,34 +937,28 @@ static int setup_frame(struct ksignal *ksig, sigset_t *set, push_cache ((unsigned long) &frame->retcode); - /* - * Set up registers for signal handler. All the state we are about - * to destroy is successfully copied to sigframe. - */ - wrusp ((unsigned long) frame); - regs->pc = (unsigned long) ksig->ka.sa.sa_handler; - adjustformat(regs); - /* * This is subtle; if we build more than one sigframe, all but the * first one will see frame format 0 and have fsize == 0, so we won't * screw stkadj. */ - if (fsize) + if (fsize) { regs->stkadj = fsize; - - /* Prepare to skip over the extra stuff in the exception frame. */ - if (regs->stkadj) { - struct pt_regs *tregs = - (struct pt_regs *)((ulong)regs + regs->stkadj); + tregs = rte_regs(regs); pr_debug("Performing stackadjust=%04lx\n", regs->stkadj); - /* This must be copied with decreasing addresses to - handle overlaps. */ tregs->vector = 0; tregs->format = 0; - tregs->pc = regs->pc; tregs->sr = regs->sr; } + + /* + * Set up registers for signal handler. All the state we are about + * to destroy is successfully copied to sigframe. + */ + wrusp ((unsigned long) frame); + tregs->pc = (unsigned long) ksig->ka.sa.sa_handler; + adjustformat(regs); + return 0; } @@ -964,7 +966,8 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs) { struct rt_sigframe __user *frame; - int fsize = frame_extra_sizes(regs->format); + struct pt_regs *tregs = rte_regs(regs); + int fsize = frame_extra_sizes(tregs->format); int err = 0, sig = ksig->sig; if (fsize < 0) { @@ -1014,34 +1017,27 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set, push_cache ((unsigned long) &frame->retcode); - /* - * Set up registers for signal handler. All the state we are about - * to destroy is successfully copied to sigframe. - */ - wrusp ((unsigned long) frame); - regs->pc = (unsigned long) ksig->ka.sa.sa_handler; - adjustformat(regs); - /* * This is subtle; if we build more than one sigframe, all but the * first one will see frame format 0 and have fsize == 0, so we won't * screw stkadj. */ - if (fsize) + if (fsize) { regs->stkadj = fsize; - - /* Prepare to skip over the extra stuff in the exception frame. */ - if (regs->stkadj) { - struct pt_regs *tregs = - (struct pt_regs *)((ulong)regs + regs->stkadj); + tregs = rte_regs(regs); pr_debug("Performing stackadjust=%04lx\n", regs->stkadj); - /* This must be copied with decreasing addresses to - handle overlaps. */ tregs->vector = 0; tregs->format = 0; - tregs->pc = regs->pc; tregs->sr = regs->sr; } + + /* + * Set up registers for signal handler. All the state we are about + * to destroy is successfully copied to sigframe. + */ + wrusp ((unsigned long) frame); + tregs->pc = (unsigned long) ksig->ka.sa.sa_handler; + adjustformat(regs); return 0; } diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl index 625fb6d3284240e0d8bc96556c0b3ff92b350540..a2715bc6d4091acc05f117acc5505b1ae38a2173 100644 --- a/arch/m68k/kernel/syscalls/syscall.tbl +++ b/arch/m68k/kernel/syscalls/syscall.tbl @@ -440,3 +440,5 @@ 438 common pidfd_getfd sys_pidfd_getfd 439 common faccessat2 sys_faccessat2 440 common process_madvise sys_process_madvise +# 447 reserved for memfd_secret +448 common process_mrelease sys_process_mrelease diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl index aae729c95cf99d23e056c510651c6f3126d4ceff..5b9f1166dfff33243e9d13bb0d4d5dfcdd86e5aa 100644 --- a/arch/microblaze/kernel/syscalls/syscall.tbl +++ b/arch/microblaze/kernel/syscalls/syscall.tbl @@ -446,3 +446,5 @@ 438 common pidfd_getfd sys_pidfd_getfd 439 common faccessat2 sys_faccessat2 440 common process_madvise sys_process_madvise +# 447 reserved for memfd_secret +448 common process_mrelease sys_process_mrelease diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 1a63f592034ebda5a6cbe5ff7e10eaa877a265f4..3442bdd4314cbac824ae468782de331326aa8a86 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -320,6 +320,9 @@ config BCM63XX select SYS_SUPPORTS_32BIT_KERNEL select SYS_SUPPORTS_BIG_ENDIAN select SYS_HAS_EARLY_PRINTK + select SYS_HAS_CPU_BMIPS32_3300 + select SYS_HAS_CPU_BMIPS4350 + select SYS_HAS_CPU_BMIPS4380 select SWAP_IO_SPACE select GPIOLIB select MIPS_L1_CACHE_SHIFT_4 @@ -1380,6 +1383,7 @@ config CPU_LOONGSON64 select MIPS_ASID_BITS_VARIABLE select MIPS_PGD_C0_CONTEXT select MIPS_L1_CACHE_SHIFT_6 + select MIPS_FP_SUPPORT select GPIOLIB select SWIOTLB select HAVE_KVM @@ -1981,6 +1985,10 @@ config SYS_HAS_CPU_MIPS64_R1 config SYS_HAS_CPU_MIPS64_R2 bool +config SYS_HAS_CPU_MIPS64_R5 + bool + select ARCH_HAS_SYNC_DMA_FOR_CPU if DMA_NONCOHERENT + config SYS_HAS_CPU_MIPS64_R6 bool select ARCH_HAS_SYNC_DMA_FOR_CPU if DMA_NONCOHERENT @@ -2142,7 +2150,7 @@ config CPU_SUPPORTS_ADDRWINCFG bool config CPU_SUPPORTS_HUGEPAGES bool - depends on !(32BIT && (ARCH_PHYS_ADDR_T_64BIT || EVA)) + depends on !(32BIT && (PHYS_ADDR_T_64BIT || EVA)) config MIPS_PGD_C0_CONTEXT bool default y if 64BIT && (CPU_MIPSR2 || CPU_MIPSR6) && !CPU_XLP @@ -3185,7 +3193,7 @@ config STACKTRACE_SUPPORT config PGTABLE_LEVELS int default 4 if PAGE_SIZE_4KB && MIPS_VA_BITS_48 - default 3 if 64BIT && !PAGE_SIZE_64KB + default 3 if 64BIT && (!PAGE_SIZE_64KB || MIPS_VA_BITS_48) default 2 config MIPS_AUTO_PFN_OFFSET diff --git a/arch/mips/bcm63xx/clk.c b/arch/mips/bcm63xx/clk.c index 164115944a7fd6a76c9f9aad84c04f970156446a..dcfa0ea912fe1633a10d0acbb87fe1cb85612ee4 100644 --- a/arch/mips/bcm63xx/clk.c +++ b/arch/mips/bcm63xx/clk.c @@ -381,6 +381,18 @@ void clk_disable(struct clk *clk) EXPORT_SYMBOL(clk_disable); +struct clk *clk_get_parent(struct clk *clk) +{ + return NULL; +} +EXPORT_SYMBOL(clk_get_parent); + +int clk_set_parent(struct clk *clk, struct clk *parent) +{ + return 0; +} +EXPORT_SYMBOL(clk_set_parent); + unsigned long clk_get_rate(struct clk *clk) { if (!clk) diff --git a/arch/mips/cavium-octeon/octeon-platform.c b/arch/mips/cavium-octeon/octeon-platform.c index d56e9b9d2e434d72ec1c8ed398dd70dac8e7b6e0..a994022e32c9f0ee0ed4f91612d5fbd26071e218 100644 --- a/arch/mips/cavium-octeon/octeon-platform.c +++ b/arch/mips/cavium-octeon/octeon-platform.c @@ -328,6 +328,7 @@ static int __init octeon_ehci_device_init(void) pd->dev.platform_data = &octeon_ehci_pdata; octeon_ehci_hw_start(&pd->dev); + put_device(&pd->dev); return ret; } @@ -391,6 +392,7 @@ static int __init octeon_ohci_device_init(void) pd->dev.platform_data = &octeon_ohci_pdata; octeon_ohci_hw_start(&pd->dev); + put_device(&pd->dev); return ret; } diff --git a/arch/mips/cavium-octeon/octeon-usb.c b/arch/mips/cavium-octeon/octeon-usb.c index 950e6c6e862973b421393f9afcc02f9fdb1f3f0f..fa87e5aa1811d8876713e048d4ccc2fd8e89fd57 100644 --- a/arch/mips/cavium-octeon/octeon-usb.c +++ b/arch/mips/cavium-octeon/octeon-usb.c @@ -544,6 +544,7 @@ static int __init dwc3_octeon_device_init(void) devm_iounmap(&pdev->dev, base); devm_release_mem_region(&pdev->dev, res->start, resource_size(res)); + put_device(&pdev->dev); } } while (node != NULL); diff --git a/arch/mips/generic/yamon-dt.c b/arch/mips/generic/yamon-dt.c index a3aa22c77cadc2507eaff4b067cd34ff62ab1618..a07a5edbcda780f7d22d51833356ee6109da8404 100644 --- a/arch/mips/generic/yamon-dt.c +++ b/arch/mips/generic/yamon-dt.c @@ -75,7 +75,7 @@ static unsigned int __init gen_fdt_mem_array( __init int yamon_dt_append_memory(void *fdt, const struct yamon_mem_region *regions) { - unsigned long phys_memsize, memsize; + unsigned long phys_memsize = 0, memsize; __be32 mem_array[2 * MAX_MEM_ARRAY_ENTRIES]; unsigned int mem_entries; int i, err, mem_off; diff --git a/arch/mips/include/asm/cmpxchg.h b/arch/mips/include/asm/cmpxchg.h index ed8f3f3c4304a2ec9a8d9e3d155735da3bf88744..3e9c41f6916591ece50ec7adeda9be88b8b06235 100644 --- a/arch/mips/include/asm/cmpxchg.h +++ b/arch/mips/include/asm/cmpxchg.h @@ -249,6 +249,7 @@ static inline unsigned long __cmpxchg64(volatile void *ptr, /* Load 64 bits from ptr */ " " __SYNC(full, loongson3_war) " \n" "1: lld %L0, %3 # __cmpxchg64 \n" + " .set pop \n" /* * Split the 64 bit value we loaded into the 2 registers that hold the * ret variable. @@ -276,12 +277,14 @@ static inline unsigned long __cmpxchg64(volatile void *ptr, " or %L1, %L1, $at \n" " .set at \n" # endif + " .set push \n" + " .set " MIPS_ISA_ARCH_LEVEL " \n" /* Attempt to store new at ptr */ " scd %L1, %2 \n" /* If we failed, loop! */ "\t" __SC_BEQZ "%L1, 1b \n" - " .set pop \n" "2: " __SYNC(full, loongson3_war) " \n" + " .set pop \n" : "=&r"(ret), "=&r"(tmp), "=" GCC_OFF_SMALL_ASM() (*(unsigned long long *)ptr) diff --git a/arch/mips/include/asm/mach-loongson64/kernel-entry-init.h b/arch/mips/include/asm/mach-loongson64/kernel-entry-init.h index 87a5bfbf8cfe9b925c351f42de612be1b8dcaf82..28572ddfb004a2b384d4ac33ebb865bd4c4e413e 100644 --- a/arch/mips/include/asm/mach-loongson64/kernel-entry-init.h +++ b/arch/mips/include/asm/mach-loongson64/kernel-entry-init.h @@ -36,7 +36,7 @@ nop /* Loongson-3A R2/R3 */ andi t0, (PRID_IMP_MASK | PRID_REV_MASK) - slti t0, (PRID_IMP_LOONGSON_64C | PRID_REV_LOONGSON3A_R2_0) + slti t0, t0, (PRID_IMP_LOONGSON_64C | PRID_REV_LOONGSON3A_R2_0) bnez t0, 2f nop 1: @@ -71,7 +71,7 @@ nop /* Loongson-3A R2/R3 */ andi t0, (PRID_IMP_MASK | PRID_REV_MASK) - slti t0, (PRID_IMP_LOONGSON_64C | PRID_REV_LOONGSON3A_R2_0) + slti t0, t0, (PRID_IMP_LOONGSON_64C | PRID_REV_LOONGSON3A_R2_0) bnez t0, 2f nop 1: diff --git a/arch/mips/include/asm/mips-cm.h b/arch/mips/include/asm/mips-cm.h index aeae2effa123d7e27fcd335f36e35273aa66663e..23c67c0871b17c91969b0c04169cb90f38bfdd86 100644 --- a/arch/mips/include/asm/mips-cm.h +++ b/arch/mips/include/asm/mips-cm.h @@ -11,6 +11,7 @@ #ifndef __MIPS_ASM_MIPS_CM_H__ #define __MIPS_ASM_MIPS_CM_H__ +#include #include #include @@ -153,8 +154,8 @@ GCR_ACCESSOR_RO(32, 0x030, rev) #define CM_GCR_REV_MINOR GENMASK(7, 0) #define CM_ENCODE_REV(major, minor) \ - (((major) << __ffs(CM_GCR_REV_MAJOR)) | \ - ((minor) << __ffs(CM_GCR_REV_MINOR))) + (FIELD_PREP(CM_GCR_REV_MAJOR, major) | \ + FIELD_PREP(CM_GCR_REV_MINOR, minor)) #define CM_REV_CM2 CM_ENCODE_REV(6, 0) #define CM_REV_CM2_5 CM_ENCODE_REV(7, 0) @@ -362,10 +363,10 @@ static inline int mips_cm_revision(void) static inline unsigned int mips_cm_max_vp_width(void) { extern int smp_num_siblings; - uint32_t cfg; if (mips_cm_revision() >= CM_REV_CM3) - return read_gcr_sys_config2() & CM_GCR_SYS_CONFIG2_MAXVPW; + return FIELD_GET(CM_GCR_SYS_CONFIG2_MAXVPW, + read_gcr_sys_config2()); if (mips_cm_present()) { /* @@ -373,8 +374,7 @@ static inline unsigned int mips_cm_max_vp_width(void) * number of VP(E)s, and if that ever changes then this will * need revisiting. */ - cfg = read_gcr_cl_config() & CM_GCR_Cx_CONFIG_PVPE; - return (cfg >> __ffs(CM_GCR_Cx_CONFIG_PVPE)) + 1; + return FIELD_GET(CM_GCR_Cx_CONFIG_PVPE, read_gcr_cl_config()) + 1; } if (IS_ENABLED(CONFIG_SMP)) diff --git a/arch/mips/include/asm/octeon/cvmx-bootinfo.h b/arch/mips/include/asm/octeon/cvmx-bootinfo.h index c114a7ba0badd28ddf9dfa41d600774e03d943ec..e77e8b7c00838a1fc1ff30176ac6a7d5d01386dd 100644 --- a/arch/mips/include/asm/octeon/cvmx-bootinfo.h +++ b/arch/mips/include/asm/octeon/cvmx-bootinfo.h @@ -317,7 +317,7 @@ enum cvmx_chip_types_enum { /* Functions to return string based on type */ #define ENUM_BRD_TYPE_CASE(x) \ - case x: return(#x + 16); /* Skip CVMX_BOARD_TYPE_ */ + case x: return (&#x[16]); /* Skip CVMX_BOARD_TYPE_ */ static inline const char *cvmx_board_type_to_string(enum cvmx_board_types_enum type) { @@ -408,7 +408,7 @@ static inline const char *cvmx_board_type_to_string(enum } #define ENUM_CHIP_TYPE_CASE(x) \ - case x: return(#x + 15); /* Skip CVMX_CHIP_TYPE */ + case x: return (&#x[15]); /* Skip CVMX_CHIP_TYPE */ static inline const char *cvmx_chip_type_to_string(enum cvmx_chip_types_enum type) { diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c index 067cb3eb16141650cf64085d07cf8ce65e39fd45..d120201910acffceba89853bcf9e9f8a69e0d328 100644 --- a/arch/mips/kernel/cpu-probe.c +++ b/arch/mips/kernel/cpu-probe.c @@ -1721,8 +1721,6 @@ static inline void decode_cpucfg(struct cpuinfo_mips *c) static inline void cpu_probe_loongson(struct cpuinfo_mips *c, unsigned int cpu) { - decode_configs(c); - /* All Loongson processors covered here define ExcCode 16 as GSExc. */ c->options |= MIPS_CPU_GSEXCEX; @@ -1783,6 +1781,8 @@ static inline void cpu_probe_loongson(struct cpuinfo_mips *c, unsigned int cpu) panic("Unknown Loongson Processor ID!"); break; } + + decode_configs(c); } #else static inline void cpu_probe_loongson(struct cpuinfo_mips *c, unsigned int cpu) { } diff --git a/arch/mips/kernel/mips-cm.c b/arch/mips/kernel/mips-cm.c index f60af512c8773929e2ec55f07bb6c60d88ec6bf8..72c8374a39002ed3abe1d3498c2ecd6303874961 100644 --- a/arch/mips/kernel/mips-cm.c +++ b/arch/mips/kernel/mips-cm.c @@ -221,8 +221,7 @@ static void mips_cm_probe_l2sync(void) phys_addr_t addr; /* L2-only sync was introduced with CM major revision 6 */ - major_rev = (read_gcr_rev() & CM_GCR_REV_MAJOR) >> - __ffs(CM_GCR_REV_MAJOR); + major_rev = FIELD_GET(CM_GCR_REV_MAJOR, read_gcr_rev()); if (major_rev < 6) return; @@ -305,13 +304,13 @@ void mips_cm_lock_other(unsigned int cluster, unsigned int core, preempt_disable(); if (cm_rev >= CM_REV_CM3) { - val = core << __ffs(CM3_GCR_Cx_OTHER_CORE); - val |= vp << __ffs(CM3_GCR_Cx_OTHER_VP); + val = FIELD_PREP(CM3_GCR_Cx_OTHER_CORE, core) | + FIELD_PREP(CM3_GCR_Cx_OTHER_VP, vp); if (cm_rev >= CM_REV_CM3_5) { val |= CM_GCR_Cx_OTHER_CLUSTER_EN; - val |= cluster << __ffs(CM_GCR_Cx_OTHER_CLUSTER); - val |= block << __ffs(CM_GCR_Cx_OTHER_BLOCK); + val |= FIELD_PREP(CM_GCR_Cx_OTHER_CLUSTER, cluster); + val |= FIELD_PREP(CM_GCR_Cx_OTHER_BLOCK, block); } else { WARN_ON(cluster != 0); WARN_ON(block != CM_GCR_Cx_OTHER_BLOCK_LOCAL); @@ -341,7 +340,7 @@ void mips_cm_lock_other(unsigned int cluster, unsigned int core, spin_lock_irqsave(&per_cpu(cm_core_lock, curr_core), per_cpu(cm_core_lock_flags, curr_core)); - val = core << __ffs(CM_GCR_Cx_OTHER_CORENUM); + val = FIELD_PREP(CM_GCR_Cx_OTHER_CORENUM, core); } write_gcr_cl_other(val); @@ -385,8 +384,8 @@ void mips_cm_error_report(void) cm_other = read_gcr_error_mult(); if (revision < CM_REV_CM3) { /* CM2 */ - cause = cm_error >> __ffs(CM_GCR_ERROR_CAUSE_ERRTYPE); - ocause = cm_other >> __ffs(CM_GCR_ERROR_MULT_ERR2ND); + cause = FIELD_GET(CM_GCR_ERROR_CAUSE_ERRTYPE, cm_error); + ocause = FIELD_GET(CM_GCR_ERROR_MULT_ERR2ND, cm_other); if (!cause) return; @@ -444,8 +443,8 @@ void mips_cm_error_report(void) ulong core_id_bits, vp_id_bits, cmd_bits, cmd_group_bits; ulong cm3_cca_bits, mcp_bits, cm3_tr_bits, sched_bit; - cause = cm_error >> __ffs64(CM3_GCR_ERROR_CAUSE_ERRTYPE); - ocause = cm_other >> __ffs(CM_GCR_ERROR_MULT_ERR2ND); + cause = FIELD_GET(CM3_GCR_ERROR_CAUSE_ERRTYPE, cm_error); + ocause = FIELD_GET(CM_GCR_ERROR_MULT_ERR2ND, cm_other); if (!cause) return; diff --git a/arch/mips/kernel/r2300_fpu.S b/arch/mips/kernel/r2300_fpu.S index 12e58053544fca151e1311006c340c4fd46826f2..cbf6db98cfb38ac3e43364eef2cb4bf0d6455b39 100644 --- a/arch/mips/kernel/r2300_fpu.S +++ b/arch/mips/kernel/r2300_fpu.S @@ -29,8 +29,8 @@ #define EX2(a,b) \ 9: a,##b; \ .section __ex_table,"a"; \ - PTR 9b,bad_stack; \ - PTR 9b+4,bad_stack; \ + PTR 9b,fault; \ + PTR 9b+4,fault; \ .previous .set mips1 diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c index 48d84d5fcc36157d8a47f7a82183272bd2cfd568..14db66dbcdad9254f51aa28ea3e3e3d4f95e828f 100644 --- a/arch/mips/kernel/smp.c +++ b/arch/mips/kernel/smp.c @@ -348,10 +348,12 @@ asmlinkage void start_secondary(void) */ calibrate_delay(); - preempt_disable(); cpu = smp_processor_id(); cpu_data[cpu].udelay_val = loops_per_jiffy; + set_cpu_sibling_map(cpu); + set_cpu_core_map(cpu); + cpumask_set_cpu(cpu, &cpu_coherent_mask); notify_cpu_starting(cpu); @@ -363,9 +365,6 @@ asmlinkage void start_secondary(void) /* The CPU is running and counters synchronised, now mark it online */ set_cpu_online(cpu, true); - set_cpu_sibling_map(cpu); - set_cpu_core_map(cpu); - calculate_cpu_foreign_map(); /* diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c index 2afa3eef486a92e0536e98046202bc0fa9b89217..5512cd586e6e89cf7897536f7dee78400b21c01e 100644 --- a/arch/mips/kernel/syscall.c +++ b/arch/mips/kernel/syscall.c @@ -240,12 +240,3 @@ SYSCALL_DEFINE3(cachectl, char *, addr, int, nbytes, int, op) { return -ENOSYS; } - -/* - * If we ever come here the user sp is bad. Zap the process right away. - * Due to the bad stack signaling wouldn't work. - */ -asmlinkage void bad_stack(void) -{ - do_exit(SIGSEGV); -} diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl index 32817c954435d7c7dba4a126b35054f18ae0d9ee..2ee36d91dbf08c52b2ab1d5d25f521a8213548d0 100644 --- a/arch/mips/kernel/syscalls/syscall_n32.tbl +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl @@ -379,3 +379,5 @@ 438 n32 pidfd_getfd sys_pidfd_getfd 439 n32 faccessat2 sys_faccessat2 440 n32 process_madvise sys_process_madvise +# 447 reserved for memfd_secret +448 n32 process_mrelease sys_process_mrelease diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl index 9e4ea3c31b1ce026fdad266866b60be6426d882b..d7ee167969e72ca5305199b10c58ea948cefb0d1 100644 --- a/arch/mips/kernel/syscalls/syscall_n64.tbl +++ b/arch/mips/kernel/syscalls/syscall_n64.tbl @@ -355,3 +355,5 @@ 438 n64 pidfd_getfd sys_pidfd_getfd 439 n64 faccessat2 sys_faccessat2 440 n64 process_madvise sys_process_madvise +# 447 reserved for memfd_secret +448 n64 process_mrelease sys_process_mrelease diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl index 29f5f28cf5cea83d6119637ea57d40ad6abe4d40..03f0ff8f841c7cbc6a090a7fdab36dc1f3022147 100644 --- a/arch/mips/kernel/syscalls/syscall_o32.tbl +++ b/arch/mips/kernel/syscalls/syscall_o32.tbl @@ -428,3 +428,5 @@ 438 o32 pidfd_getfd sys_pidfd_getfd 439 o32 faccessat2 sys_faccessat2 440 o32 process_madvise sys_process_madvise +# 447 reserved for memfd_secret +448 o32 process_mrelease sys_process_mrelease diff --git a/arch/mips/lantiq/clk.c b/arch/mips/lantiq/clk.c index dd819e31fcbbfa3f51c6e29867082955e0bfe8af..7a623684d9b5ed415e167af74e0b9af22ea5d74c 100644 --- a/arch/mips/lantiq/clk.c +++ b/arch/mips/lantiq/clk.c @@ -158,6 +158,18 @@ void clk_deactivate(struct clk *clk) } EXPORT_SYMBOL(clk_deactivate); +struct clk *clk_get_parent(struct clk *clk) +{ + return NULL; +} +EXPORT_SYMBOL(clk_get_parent); + +int clk_set_parent(struct clk *clk, struct clk *parent) +{ + return 0; +} +EXPORT_SYMBOL(clk_set_parent); + static inline u32 get_counter_resolution(void) { u32 res; diff --git a/arch/mips/lantiq/xway/dma.c b/arch/mips/lantiq/xway/dma.c index aeb1b989cd4ee02f5fbe5d3c5aad665c68be17c8..ab13e257132af7b08c8a01d98057b355dc1df3f3 100644 --- a/arch/mips/lantiq/xway/dma.c +++ b/arch/mips/lantiq/xway/dma.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -29,6 +30,7 @@ #define LTQ_DMA_PCTRL 0x44 #define LTQ_DMA_IRNEN 0xf4 +#define DMA_ID_CHNR GENMASK(26, 20) /* channel number */ #define DMA_DESCPT BIT(3) /* descriptor complete irq */ #define DMA_TX BIT(8) /* TX channel direction */ #define DMA_CHAN_ON BIT(0) /* channel on / off bit */ @@ -38,8 +40,11 @@ #define DMA_IRQ_ACK 0x7e /* IRQ status register */ #define DMA_POLL BIT(31) /* turn on channel polling */ #define DMA_CLK_DIV4 BIT(6) /* polling clock divider */ -#define DMA_2W_BURST BIT(1) /* 2 word burst length */ -#define DMA_MAX_CHANNEL 20 /* the soc has 20 channels */ +#define DMA_PCTRL_2W_BURST 0x1 /* 2 word burst length */ +#define DMA_PCTRL_4W_BURST 0x2 /* 4 word burst length */ +#define DMA_PCTRL_8W_BURST 0x3 /* 8 word burst length */ +#define DMA_TX_BURST_SHIFT 4 /* tx burst shift */ +#define DMA_RX_BURST_SHIFT 2 /* rx burst shift */ #define DMA_ETOP_ENDIANNESS (0xf << 8) /* endianness swap etop channels */ #define DMA_WEIGHT (BIT(17) | BIT(16)) /* default channel wheight */ @@ -190,7 +195,8 @@ ltq_dma_init_port(int p) break; case DMA_PORT_DEU: - ltq_dma_w32((DMA_2W_BURST << 4) | (DMA_2W_BURST << 2), + ltq_dma_w32((DMA_PCTRL_2W_BURST << DMA_TX_BURST_SHIFT) | + (DMA_PCTRL_2W_BURST << DMA_RX_BURST_SHIFT), LTQ_DMA_PCTRL); break; @@ -205,7 +211,7 @@ ltq_dma_init(struct platform_device *pdev) { struct clk *clk; struct resource *res; - unsigned id; + unsigned int id, nchannels; int i; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); @@ -221,21 +227,24 @@ ltq_dma_init(struct platform_device *pdev) clk_enable(clk); ltq_dma_w32_mask(0, DMA_RESET, LTQ_DMA_CTRL); + usleep_range(1, 10); + /* disable all interrupts */ ltq_dma_w32(0, LTQ_DMA_IRNEN); /* reset/configure each channel */ - for (i = 0; i < DMA_MAX_CHANNEL; i++) { + id = ltq_dma_r32(LTQ_DMA_ID); + nchannels = ((id & DMA_ID_CHNR) >> 20); + for (i = 0; i < nchannels; i++) { ltq_dma_w32(i, LTQ_DMA_CS); ltq_dma_w32(DMA_CHAN_RST, LTQ_DMA_CCTRL); ltq_dma_w32(DMA_POLL | DMA_CLK_DIV4, LTQ_DMA_CPOLL); ltq_dma_w32_mask(DMA_CHAN_ON, 0, LTQ_DMA_CCTRL); } - id = ltq_dma_r32(LTQ_DMA_ID); dev_info(&pdev->dev, "Init done - hw rev: %X, ports: %d, channels: %d\n", - id & 0x1f, (id >> 16) & 0xf, id >> 20); + id & 0x1f, (id >> 16) & 0xf, nchannels); return 0; } diff --git a/arch/mips/loongson64/Platform b/arch/mips/loongson64/Platform index e2354e128d9a05a2514b3ea28b790005bbda9348..3e660d6d3c2bd5b3f5dd511c08b86ea811b11777 100644 --- a/arch/mips/loongson64/Platform +++ b/arch/mips/loongson64/Platform @@ -13,7 +13,7 @@ cflags-$(CONFIG_CPU_LOONGSON64) += -Wa,--trap # can't easily be used safely within the kbuild framework. # ifeq ($(call cc-ifversion, -ge, 0409, y), y) - ifeq ($(call ld-ifversion, -ge, 225000000, y), y) + ifeq ($(call ld-ifversion, -ge, 22500, y), y) cflags-$(CONFIG_CPU_LOONGSON64) += \ $(call cc-option,-march=loongson3a -U_MIPS_ISA -D_MIPS_ISA=_MIPS_ISA_MIPS64) else diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c index 0af88622c619253d1a5284739c709d24af32f9a7..cb6d22439f71b0e3460b5e1b40bad9459f33bc71 100644 --- a/arch/mips/net/bpf_jit.c +++ b/arch/mips/net/bpf_jit.c @@ -662,6 +662,11 @@ static void build_epilogue(struct jit_ctx *ctx) ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative : func) : \ func##_positive) +static bool is_bad_offset(int b_off) +{ + return b_off > 0x1ffff || b_off < -0x20000; +} + static int build_body(struct jit_ctx *ctx) { const struct bpf_prog *prog = ctx->skf; @@ -728,7 +733,10 @@ load_common: /* Load return register on DS for failures */ emit_reg_move(r_ret, r_zero, ctx); /* Return with error */ - emit_b(b_imm(prog->len, ctx), ctx); + b_off = b_imm(prog->len, ctx); + if (is_bad_offset(b_off)) + return -E2BIG; + emit_b(b_off, ctx); emit_nop(ctx); break; case BPF_LD | BPF_W | BPF_IND: @@ -775,8 +783,10 @@ load_ind: emit_jalr(MIPS_R_RA, r_s0, ctx); emit_reg_move(MIPS_R_A0, r_skb, ctx); /* delay slot */ /* Check the error value */ - emit_bcond(MIPS_COND_NE, r_ret, 0, - b_imm(prog->len, ctx), ctx); + b_off = b_imm(prog->len, ctx); + if (is_bad_offset(b_off)) + return -E2BIG; + emit_bcond(MIPS_COND_NE, r_ret, 0, b_off, ctx); emit_reg_move(r_ret, r_zero, ctx); /* We are good */ /* X <- P[1:K] & 0xf */ @@ -855,8 +865,10 @@ load_ind: /* A /= X */ ctx->flags |= SEEN_X | SEEN_A; /* Check if r_X is zero */ - emit_bcond(MIPS_COND_EQ, r_X, r_zero, - b_imm(prog->len, ctx), ctx); + b_off = b_imm(prog->len, ctx); + if (is_bad_offset(b_off)) + return -E2BIG; + emit_bcond(MIPS_COND_EQ, r_X, r_zero, b_off, ctx); emit_load_imm(r_ret, 0, ctx); /* delay slot */ emit_div(r_A, r_X, ctx); break; @@ -864,8 +876,10 @@ load_ind: /* A %= X */ ctx->flags |= SEEN_X | SEEN_A; /* Check if r_X is zero */ - emit_bcond(MIPS_COND_EQ, r_X, r_zero, - b_imm(prog->len, ctx), ctx); + b_off = b_imm(prog->len, ctx); + if (is_bad_offset(b_off)) + return -E2BIG; + emit_bcond(MIPS_COND_EQ, r_X, r_zero, b_off, ctx); emit_load_imm(r_ret, 0, ctx); /* delay slot */ emit_mod(r_A, r_X, ctx); break; @@ -926,7 +940,10 @@ load_ind: break; case BPF_JMP | BPF_JA: /* pc += K */ - emit_b(b_imm(i + k + 1, ctx), ctx); + b_off = b_imm(i + k + 1, ctx); + if (is_bad_offset(b_off)) + return -E2BIG; + emit_b(b_off, ctx); emit_nop(ctx); break; case BPF_JMP | BPF_JEQ | BPF_K: @@ -1056,12 +1073,16 @@ jmp_cmp: break; case BPF_RET | BPF_A: ctx->flags |= SEEN_A; - if (i != prog->len - 1) + if (i != prog->len - 1) { /* * If this is not the last instruction * then jump to the epilogue */ - emit_b(b_imm(prog->len, ctx), ctx); + b_off = b_imm(prog->len, ctx); + if (is_bad_offset(b_off)) + return -E2BIG; + emit_b(b_off, ctx); + } emit_reg_move(r_ret, r_A, ctx); /* delay slot */ break; case BPF_RET | BPF_K: @@ -1075,7 +1096,10 @@ jmp_cmp: * If this is not the last instruction * then jump to the epilogue */ - emit_b(b_imm(prog->len, ctx), ctx); + b_off = b_imm(prog->len, ctx); + if (is_bad_offset(b_off)) + return -E2BIG; + emit_b(b_off, ctx); emit_nop(ctx); } break; @@ -1133,8 +1157,10 @@ jmp_cmp: /* Load *dev pointer */ emit_load_ptr(r_s0, r_skb, off, ctx); /* error (0) in the delay slot */ - emit_bcond(MIPS_COND_EQ, r_s0, r_zero, - b_imm(prog->len, ctx), ctx); + b_off = b_imm(prog->len, ctx); + if (is_bad_offset(b_off)) + return -E2BIG; + emit_bcond(MIPS_COND_EQ, r_s0, r_zero, b_off, ctx); emit_reg_move(r_ret, r_zero, ctx); if (code == (BPF_ANC | SKF_AD_IFINDEX)) { BUILD_BUG_ON(sizeof_field(struct net_device, ifindex) != 4); @@ -1244,7 +1270,10 @@ void bpf_jit_compile(struct bpf_prog *fp) /* Generate the actual JIT code */ build_prologue(&ctx); - build_body(&ctx); + if (build_body(&ctx)) { + module_memfree(ctx.target); + goto out; + } build_epilogue(&ctx); /* Update the icache */ diff --git a/arch/mips/sni/time.c b/arch/mips/sni/time.c index 240bb68ec247818bfd1d9d17f7b7025a4ae8a267..ff3ba7e7789017ecc0aac708763297249757bfc4 100644 --- a/arch/mips/sni/time.c +++ b/arch/mips/sni/time.c @@ -18,14 +18,14 @@ static int a20r_set_periodic(struct clock_event_device *evt) { *(volatile u8 *)(A20R_PT_CLOCK_BASE + 12) = 0x34; wmb(); - *(volatile u8 *)(A20R_PT_CLOCK_BASE + 0) = SNI_COUNTER0_DIV; + *(volatile u8 *)(A20R_PT_CLOCK_BASE + 0) = SNI_COUNTER0_DIV & 0xff; wmb(); *(volatile u8 *)(A20R_PT_CLOCK_BASE + 0) = SNI_COUNTER0_DIV >> 8; wmb(); *(volatile u8 *)(A20R_PT_CLOCK_BASE + 12) = 0xb4; wmb(); - *(volatile u8 *)(A20R_PT_CLOCK_BASE + 8) = SNI_COUNTER2_DIV; + *(volatile u8 *)(A20R_PT_CLOCK_BASE + 8) = SNI_COUNTER2_DIV & 0xff; wmb(); *(volatile u8 *)(A20R_PT_CLOCK_BASE + 8) = SNI_COUNTER2_DIV >> 8; wmb(); diff --git a/arch/mips/vdso/Kconfig b/arch/mips/vdso/Kconfig index 7aec721398d59e4b5d271d98d7de0a9a43a4a039..a665f6108cb5a3c477fb0f2ce06e081c92055489 100644 --- a/arch/mips/vdso/Kconfig +++ b/arch/mips/vdso/Kconfig @@ -12,7 +12,7 @@ # the lack of relocations. As such, we disable the VDSO for microMIPS builds. config MIPS_LD_CAN_LINK_VDSO - def_bool LD_VERSION >= 225000000 || LD_IS_LLD + def_bool LD_VERSION >= 22500 || LD_IS_LLD config MIPS_DISABLE_VDSO def_bool CPU_MICROMIPS || (!CPU_MIPSR6 && !MIPS_LD_CAN_LINK_VDSO) diff --git a/arch/nds32/kernel/perf_event_cpu.c b/arch/nds32/kernel/perf_event_cpu.c index 0ce6f9f307e6ad34188ced46a9f8f5c71fc1c564..f3879196078133d6645f667d0b900cfc41deb1f5 100644 --- a/arch/nds32/kernel/perf_event_cpu.c +++ b/arch/nds32/kernel/perf_event_cpu.c @@ -1363,6 +1363,7 @@ void perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { + struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs(); unsigned long fp = 0; unsigned long gp = 0; unsigned long lp = 0; @@ -1371,7 +1372,7 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, leaf_fp = 0; - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + if (guest_cbs && guest_cbs->is_in_guest()) { /* We don't support guest os callchain now */ return; } @@ -1479,9 +1480,10 @@ void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { + struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs(); struct stackframe fr; - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + if (guest_cbs && guest_cbs->is_in_guest()) { /* We don't support guest os callchain now */ return; } @@ -1493,20 +1495,23 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, unsigned long perf_instruction_pointer(struct pt_regs *regs) { + struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs(); + /* However, NDS32 does not support virtualization */ - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) - return perf_guest_cbs->get_guest_ip(); + if (guest_cbs && guest_cbs->is_in_guest()) + return guest_cbs->get_guest_ip(); return instruction_pointer(regs); } unsigned long perf_misc_flags(struct pt_regs *regs) { + struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs(); int misc = 0; /* However, NDS32 does not support virtualization */ - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { - if (perf_guest_cbs->is_user_mode()) + if (guest_cbs && guest_cbs->is_in_guest()) { + if (guest_cbs->is_user_mode()) misc |= PERF_RECORD_MISC_GUEST_USER; else misc |= PERF_RECORD_MISC_GUEST_KERNEL; diff --git a/arch/nios2/include/asm/irqflags.h b/arch/nios2/include/asm/irqflags.h index b3ec3e510706d5560e9513b53f1576d7f3cfb6d5..25acf27862f9155a7bff299e1544e22943b07a58 100644 --- a/arch/nios2/include/asm/irqflags.h +++ b/arch/nios2/include/asm/irqflags.h @@ -9,7 +9,7 @@ static inline unsigned long arch_local_save_flags(void) { - return RDCTL(CTL_STATUS); + return RDCTL(CTL_FSTATUS); } /* @@ -18,7 +18,7 @@ static inline unsigned long arch_local_save_flags(void) */ static inline void arch_local_irq_restore(unsigned long flags) { - WRCTL(CTL_STATUS, flags); + WRCTL(CTL_FSTATUS, flags); } static inline void arch_local_irq_disable(void) diff --git a/arch/nios2/include/asm/registers.h b/arch/nios2/include/asm/registers.h index 183c720e454d915ad4690aa4e9184e4bf8f17270..95b67dd16f818866fb466ec27830371f3e2fcd00 100644 --- a/arch/nios2/include/asm/registers.h +++ b/arch/nios2/include/asm/registers.h @@ -11,7 +11,7 @@ #endif /* control register numbers */ -#define CTL_STATUS 0 +#define CTL_FSTATUS 0 #define CTL_ESTATUS 1 #define CTL_BSTATUS 2 #define CTL_IENABLE 3 diff --git a/arch/nios2/platform/Kconfig.platform b/arch/nios2/platform/Kconfig.platform index 9e32fb7f3d4ce92f6e9c9876fcf1891354af99f2..e849daff6fd1622563abbcea7920de85613bec18 100644 --- a/arch/nios2/platform/Kconfig.platform +++ b/arch/nios2/platform/Kconfig.platform @@ -37,6 +37,7 @@ config NIOS2_DTB_PHYS_ADDR config NIOS2_DTB_SOURCE_BOOL bool "Compile and link device tree into kernel image" + depends on !COMPILE_TEST help This allows you to specify a dts (device tree source) file which will be compiled and linked into the kernel image. diff --git a/arch/openrisc/include/asm/syscalls.h b/arch/openrisc/include/asm/syscalls.h index 3a7eeae6f56a8259714670dd00154ee8da8657d4..aa1c7e98722e3a6bfb39903d59162bfb1c611da9 100644 --- a/arch/openrisc/include/asm/syscalls.h +++ b/arch/openrisc/include/asm/syscalls.h @@ -22,9 +22,11 @@ asmlinkage long sys_or1k_atomic(unsigned long type, unsigned long *v1, asmlinkage long __sys_clone(unsigned long clone_flags, unsigned long newsp, void __user *parent_tid, void __user *child_tid, int tls); +asmlinkage long __sys_clone3(struct clone_args __user *uargs, size_t size); asmlinkage long __sys_fork(void); #define sys_clone __sys_clone +#define sys_clone3 __sys_clone3 #define sys_fork __sys_fork #endif /* __ASM_OPENRISC_SYSCALLS_H */ diff --git a/arch/openrisc/kernel/dma.c b/arch/openrisc/kernel/dma.c index 1b16d97e7da7f96eb6ab80f4162eb9a581b8f9ab..a82b2caaa560d6e3dc8e2b5bbcfc2240cfb54291 100644 --- a/arch/openrisc/kernel/dma.c +++ b/arch/openrisc/kernel/dma.c @@ -33,7 +33,7 @@ page_set_nocache(pte_t *pte, unsigned long addr, * Flush the page out of the TLB so that the new page flags get * picked up next time there's an access */ - flush_tlb_page(NULL, addr); + flush_tlb_kernel_range(addr, addr + PAGE_SIZE); /* Flush page out of dcache */ for (cl = __pa(addr); cl < __pa(next); cl += cpuinfo->dcache_block_size) @@ -56,7 +56,7 @@ page_clear_nocache(pte_t *pte, unsigned long addr, * Flush the page out of the TLB so that the new page flags get * picked up next time there's an access */ - flush_tlb_page(NULL, addr); + flush_tlb_kernel_range(addr, addr + PAGE_SIZE); return 0; } diff --git a/arch/openrisc/kernel/entry.S b/arch/openrisc/kernel/entry.S index 98e4f97db51596050fc7d02416ebf655286f9366..b42d32d79b2e614d9ea62fa0117f2fee49846c75 100644 --- a/arch/openrisc/kernel/entry.S +++ b/arch/openrisc/kernel/entry.S @@ -1170,6 +1170,11 @@ ENTRY(__sys_clone) l.j _fork_save_extra_regs_and_call l.nop +ENTRY(__sys_clone3) + l.movhi r29,hi(sys_clone3) + l.j _fork_save_extra_regs_and_call + l.ori r29,r29,lo(sys_clone3) + ENTRY(__sys_fork) l.movhi r29,hi(sys_fork) l.ori r29,r29,lo(sys_fork) diff --git a/arch/openrisc/kernel/smp.c b/arch/openrisc/kernel/smp.c index 29c82ef2e207ce098918a6a7a682311ab2d75eba..18b320a06fe5625a639d5ee54ec80b322585f781 100644 --- a/arch/openrisc/kernel/smp.c +++ b/arch/openrisc/kernel/smp.c @@ -134,8 +134,6 @@ asmlinkage __init void secondary_start_kernel(void) set_cpu_online(cpu, true); local_irq_enable(); - - preempt_disable(); /* * OK, it's off to the idle thread for us */ @@ -263,7 +261,7 @@ static inline void ipi_flush_tlb_range(void *info) local_flush_tlb_range(NULL, fd->addr1, fd->addr2); } -static void smp_flush_tlb_range(struct cpumask *cmask, unsigned long start, +static void smp_flush_tlb_range(const struct cpumask *cmask, unsigned long start, unsigned long end) { unsigned int cpuid; @@ -311,7 +309,9 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr) void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - smp_flush_tlb_range(mm_cpumask(vma->vm_mm), start, end); + const struct cpumask *cmask = vma ? mm_cpumask(vma->vm_mm) + : cpu_online_mask; + smp_flush_tlb_range(cmask, start, end); } /* Instruction cache invalidate - performed on each cpu */ diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile index 5140c602207f6b0d99bc2f00310bdb840acce7b6..0cf86ed2b7c17fdf20d7dcfa55ae9495b1296c2b 100644 --- a/arch/parisc/Makefile +++ b/arch/parisc/Makefile @@ -17,7 +17,12 @@ # Mike Shaver, Helge Deller and Martin K. Petersen # +ifdef CONFIG_PARISC_SELF_EXTRACT +boot := arch/parisc/boot +KBUILD_IMAGE := $(boot)/bzImage +else KBUILD_IMAGE := vmlinuz +endif NM = sh $(srctree)/arch/parisc/nm CHECKFLAGS += -D__hppa__=1 diff --git a/arch/parisc/include/asm/futex.h b/arch/parisc/include/asm/futex.h index fceb9cf02fb3a70a5e1bf771cd54446cd3802ad7..71aa0921d6c72ff8eaaa4f31710fe92b2285a0ae 100644 --- a/arch/parisc/include/asm/futex.h +++ b/arch/parisc/include/asm/futex.h @@ -16,7 +16,7 @@ static inline void _futex_spin_lock_irqsave(u32 __user *uaddr, unsigned long int *flags) { extern u32 lws_lock_start[]; - long index = ((long)uaddr & 0x3f8) >> 1; + long index = ((long)uaddr & 0x7f8) >> 1; arch_spinlock_t *s = (arch_spinlock_t *)&lws_lock_start[index]; local_irq_save(*flags); arch_spin_lock(s); @@ -26,7 +26,7 @@ static inline void _futex_spin_unlock_irqrestore(u32 __user *uaddr, unsigned long int *flags) { extern u32 lws_lock_start[]; - long index = ((long)uaddr & 0x3f8) >> 1; + long index = ((long)uaddr & 0x7f8) >> 1; arch_spinlock_t *s = (arch_spinlock_t *)&lws_lock_start[index]; arch_spin_unlock(s); local_irq_restore(*flags); diff --git a/arch/parisc/include/asm/special_insns.h b/arch/parisc/include/asm/special_insns.h index a303ae9a77f41fe8ceef5490ab2be5b394fc199d..16ee41e77174f14e016dac77ab6e512c49b02816 100644 --- a/arch/parisc/include/asm/special_insns.h +++ b/arch/parisc/include/asm/special_insns.h @@ -2,28 +2,32 @@ #ifndef __PARISC_SPECIAL_INSNS_H #define __PARISC_SPECIAL_INSNS_H -#define lpa(va) ({ \ - unsigned long pa; \ - __asm__ __volatile__( \ - "copy %%r0,%0\n\t" \ - "lpa %%r0(%1),%0" \ - : "=r" (pa) \ - : "r" (va) \ - : "memory" \ - ); \ - pa; \ +#define lpa(va) ({ \ + unsigned long pa; \ + __asm__ __volatile__( \ + "copy %%r0,%0\n" \ + "8:\tlpa %%r0(%1),%0\n" \ + "9:\n" \ + ASM_EXCEPTIONTABLE_ENTRY(8b, 9b) \ + : "=&r" (pa) \ + : "r" (va) \ + : "memory" \ + ); \ + pa; \ }) -#define lpa_user(va) ({ \ - unsigned long pa; \ - __asm__ __volatile__( \ - "copy %%r0,%0\n\t" \ - "lpa %%r0(%%sr3,%1),%0" \ - : "=r" (pa) \ - : "r" (va) \ - : "memory" \ - ); \ - pa; \ +#define lpa_user(va) ({ \ + unsigned long pa; \ + __asm__ __volatile__( \ + "copy %%r0,%0\n" \ + "8:\tlpa %%r0(%%sr3,%1),%0\n" \ + "9:\n" \ + ASM_EXCEPTIONTABLE_ENTRY(8b, 9b) \ + : "=&r" (pa) \ + : "r" (va) \ + : "memory" \ + ); \ + pa; \ }) #define mfctl(reg) ({ \ diff --git a/arch/parisc/install.sh b/arch/parisc/install.sh index 056d588befdd63671fbaf5a2e3f1afc498d9f8b0..70d3cffb02515c7e8f99a1f493994fca16ce648b 100644 --- a/arch/parisc/install.sh +++ b/arch/parisc/install.sh @@ -39,6 +39,7 @@ verify "$3" if [ -n "${INSTALLKERNEL}" ]; then if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi + if [ -x /usr/sbin/${INSTALLKERNEL} ]; then exec /usr/sbin/${INSTALLKERNEL} "$@"; fi fi # Default install diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S index 5d8123eb38ec59e37d17ebb54aa7e5c0e9b901a6..3da39140babcf2836f8f7fc3adeddc0e6de9c2ff 100644 --- a/arch/parisc/kernel/entry.S +++ b/arch/parisc/kernel/entry.S @@ -1848,8 +1848,8 @@ syscall_restore: LDREG TI_TASK-THREAD_SZ_ALGN-FRAME_SIZE(%r30),%r1 /* Are we being ptraced? */ - ldw TASK_FLAGS(%r1),%r19 - ldi _TIF_SYSCALL_TRACE_MASK,%r2 + LDREG TI_FLAGS-THREAD_SZ_ALGN-FRAME_SIZE(%r30),%r19 + ldi _TIF_SINGLESTEP|_TIF_BLOCKSTEP,%r2 and,COND(=) %r19,%r2,%r0 b,n syscall_restore_rfi diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c index 10227f667c8a6ed09f68dc2e9e3e75f6d7a4a171..cf92ece20b75733c8cf084841e7e11a08a3db0f1 100644 --- a/arch/parisc/kernel/smp.c +++ b/arch/parisc/kernel/smp.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -69,7 +70,10 @@ enum ipi_message_type { IPI_CALL_FUNC, IPI_CPU_START, IPI_CPU_STOP, - IPI_CPU_TEST + IPI_CPU_TEST, +#ifdef CONFIG_KGDB + IPI_ENTER_KGDB, +#endif }; @@ -167,7 +171,12 @@ ipi_interrupt(int irq, void *dev_id) case IPI_CPU_TEST: smp_debug(100, KERN_DEBUG "CPU%d is alive!\n", this_cpu); break; - +#ifdef CONFIG_KGDB + case IPI_ENTER_KGDB: + smp_debug(100, KERN_DEBUG "CPU%d ENTER_KGDB\n", this_cpu); + kgdb_nmicallback(raw_smp_processor_id(), get_irq_regs()); + break; +#endif default: printk(KERN_CRIT "Unknown IPI num on CPU%d: %lu\n", this_cpu, which); @@ -226,6 +235,12 @@ send_IPI_allbutself(enum ipi_message_type op) } } +#ifdef CONFIG_KGDB +void kgdb_roundup_cpus(void) +{ + send_IPI_allbutself(IPI_ENTER_KGDB); +} +#endif inline void smp_send_stop(void) { send_IPI_allbutself(IPI_CPU_STOP); } @@ -302,7 +317,6 @@ void __init smp_callin(unsigned long pdce_proc) #endif smp_cpu_init(slave_id); - preempt_disable(); flush_cache_all_local(); /* start with known state */ flush_tlb_all_local(NULL); diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S index 322503780db614a37e2d88dbca8114a7fc640f23..4e53515cf81f1eb8adb3f7fd7a1e0f852db32306 100644 --- a/arch/parisc/kernel/syscall.S +++ b/arch/parisc/kernel/syscall.S @@ -478,7 +478,7 @@ lws_start: extrd,u %r1,PSW_W_BIT,1,%r1 /* sp must be aligned on 4, so deposit the W bit setting into * the bottom of sp temporarily */ - or,ev %r1,%r30,%r30 + or,od %r1,%r30,%r30 /* Clip LWS number to a 32-bit value for 32-bit processes */ depdi 0, 31, 32, %r20 diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl index f375ea528e59c34e799e16f9cc9ec8a97cc32fec..084e3ddba4346bf0fd556d0070cd4b12db45130e 100644 --- a/arch/parisc/kernel/syscalls/syscall.tbl +++ b/arch/parisc/kernel/syscalls/syscall.tbl @@ -438,3 +438,5 @@ 438 common pidfd_getfd sys_pidfd_getfd 439 common faccessat2 sys_faccessat2 440 common process_madvise sys_process_madvise +# 447 reserved for memfd_secret +448 common process_mrelease sys_process_mrelease diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c index 13d94f0f94a08d19234240a5ff11ce62489320cf..42be3ff1315e5d4be02ecd7590fc7c597183fd20 100644 --- a/arch/parisc/kernel/time.c +++ b/arch/parisc/kernel/time.c @@ -252,27 +252,13 @@ void __init time_init(void) static int __init init_cr16_clocksource(void) { /* - * The cr16 interval timers are not syncronized across CPUs on - * different sockets, so mark them unstable and lower rating on - * multi-socket SMP systems. + * The cr16 interval timers are not syncronized across CPUs, even if + * they share the same socket. */ if (num_online_cpus() > 1 && !running_on_qemu) { - int cpu; - unsigned long cpu0_loc; - cpu0_loc = per_cpu(cpu_data, 0).cpu_loc; - - for_each_online_cpu(cpu) { - if (cpu == 0) - continue; - if ((cpu0_loc != 0) && - (cpu0_loc == per_cpu(cpu_data, cpu).cpu_loc)) - continue; - - clocksource_cr16.name = "cr16_unstable"; - clocksource_cr16.flags = CLOCK_SOURCE_UNSTABLE; - clocksource_cr16.rating = 0; - break; - } + clocksource_cr16.name = "cr16_unstable"; + clocksource_cr16.flags = CLOCK_SOURCE_UNSTABLE; + clocksource_cr16.rating = 0; } /* XXX: We may want to mark sched_clock stable here if cr16 clocks are diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c index a52c7abf2ca49284ce495eda92270e7aefada485..269b737d26299cf09dda74abb22266f457df0766 100644 --- a/arch/parisc/kernel/traps.c +++ b/arch/parisc/kernel/traps.c @@ -729,6 +729,8 @@ void notrace handle_interruption(int code, struct pt_regs *regs) } mmap_read_unlock(current->mm); } + /* CPU could not fetch instruction, so clear stale IIR value. */ + regs->iir = 0xbaadf00d; fallthrough; case 27: /* Data memory protection ID trap */ @@ -782,7 +784,7 @@ void notrace handle_interruption(int code, struct pt_regs *regs) * unless pagefault_disable() was called before. */ - if (fault_space == 0 && !faulthandler_disabled()) + if (faulthandler_disabled() || fault_space == 0) { /* Clean up and return if in exception table. */ if (fixup_exception(regs)) diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c index 237d20dd5622de53fb207ec5888f972c7237a9e3..286cec4d86d7b0165a7b81ffcfdc48512a568acf 100644 --- a/arch/parisc/kernel/unaligned.c +++ b/arch/parisc/kernel/unaligned.c @@ -340,7 +340,7 @@ static int emulate_stw(struct pt_regs *regs, int frreg, int flop) : "r" (val), "r" (regs->ior), "r" (regs->isr) : "r19", "r20", "r21", "r22", "r1", FIXUP_BRANCH_CLOBBER ); - return 0; + return ret; } static int emulate_std(struct pt_regs *regs, int frreg, int flop) { @@ -397,7 +397,7 @@ static int emulate_std(struct pt_regs *regs, int frreg, int flop) __asm__ __volatile__ ( " mtsp %4, %%sr1\n" " zdep %2, 29, 2, %%r19\n" -" dep %%r0, 31, 2, %2\n" +" dep %%r0, 31, 2, %3\n" " mtsar %%r19\n" " zvdepi -2, 32, %%r19\n" "1: ldw 0(%%sr1,%3),%%r20\n" @@ -409,7 +409,7 @@ static int emulate_std(struct pt_regs *regs, int frreg, int flop) " andcm %%r21, %%r19, %%r21\n" " or %1, %%r20, %1\n" " or %2, %%r21, %2\n" -"3: stw %1,0(%%sr1,%1)\n" +"3: stw %1,0(%%sr1,%3)\n" "4: stw %%r1,4(%%sr1,%3)\n" "5: stw %2,8(%%sr1,%3)\n" " copy %%r0, %0\n" @@ -596,7 +596,6 @@ void handle_unaligned(struct pt_regs *regs) ret = ERR_NOTHANDLED; /* "undefined", but lets kill them. */ break; } -#ifdef CONFIG_PA20 switch (regs->iir & OPCODE2_MASK) { case OPCODE_FLDD_L: @@ -607,22 +606,23 @@ void handle_unaligned(struct pt_regs *regs) flop=1; ret = emulate_std(regs, R2(regs->iir),1); break; +#ifdef CONFIG_PA20 case OPCODE_LDD_L: ret = emulate_ldd(regs, R2(regs->iir),0); break; case OPCODE_STD_L: ret = emulate_std(regs, R2(regs->iir),0); break; - } #endif + } switch (regs->iir & OPCODE3_MASK) { case OPCODE_FLDW_L: flop=1; - ret = emulate_ldw(regs, R2(regs->iir),0); + ret = emulate_ldw(regs, R2(regs->iir), 1); break; case OPCODE_LDW_M: - ret = emulate_ldw(regs, R2(regs->iir),1); + ret = emulate_ldw(regs, R2(regs->iir), 0); break; case OPCODE_FSTW_L: diff --git a/arch/parisc/kernel/unwind.c b/arch/parisc/kernel/unwind.c index 87ae476d1c4f5d17b24d8d088c71ac261bae0998..86a57fb0e6faebe01a63d7eb0927c31713f79d3c 100644 --- a/arch/parisc/kernel/unwind.c +++ b/arch/parisc/kernel/unwind.c @@ -21,6 +21,8 @@ #include #include +#include +#include /* #define DEBUG 1 */ #ifdef DEBUG @@ -203,6 +205,11 @@ int __init unwind_init(void) return 0; } +static bool pc_is_kernel_fn(unsigned long pc, void *fn) +{ + return (unsigned long)dereference_kernel_function_descriptor(fn) == pc; +} + static int unwind_special(struct unwind_frame_info *info, unsigned long pc, int frame_size) { /* @@ -221,7 +228,7 @@ static int unwind_special(struct unwind_frame_info *info, unsigned long pc, int extern void * const _call_on_stack; #endif /* CONFIG_IRQSTACKS */ - if (pc == (unsigned long) &handle_interruption) { + if (pc_is_kernel_fn(pc, handle_interruption)) { struct pt_regs *regs = (struct pt_regs *)(info->sp - frame_size - PT_SZ_ALGN); dbg("Unwinding through handle_interruption()\n"); info->prev_sp = regs->gr[30]; @@ -229,13 +236,13 @@ static int unwind_special(struct unwind_frame_info *info, unsigned long pc, int return 1; } - if (pc == (unsigned long) &ret_from_kernel_thread || - pc == (unsigned long) &syscall_exit) { + if (pc_is_kernel_fn(pc, ret_from_kernel_thread) || + pc_is_kernel_fn(pc, syscall_exit)) { info->prev_sp = info->prev_ip = 0; return 1; } - if (pc == (unsigned long) &intr_return) { + if (pc_is_kernel_fn(pc, intr_return)) { struct pt_regs *regs; dbg("Found intr_return()\n"); @@ -246,20 +253,20 @@ static int unwind_special(struct unwind_frame_info *info, unsigned long pc, int return 1; } - if (pc == (unsigned long) &_switch_to_ret) { + if (pc_is_kernel_fn(pc, _switch_to) || + pc_is_kernel_fn(pc, _switch_to_ret)) { info->prev_sp = info->sp - CALLEE_SAVE_FRAME_SIZE; info->prev_ip = *(unsigned long *)(info->prev_sp - RP_OFFSET); return 1; } #ifdef CONFIG_IRQSTACKS - if (pc == (unsigned long) &_call_on_stack) { + if (pc_is_kernel_fn(pc, _call_on_stack)) { info->prev_sp = *(unsigned long *)(info->sp - FRAME_SIZE - REG_SZ); info->prev_ip = *(unsigned long *)(info->sp - FRAME_SIZE - RP_OFFSET); return 1; } #endif - return 0; } diff --git a/arch/parisc/lib/iomap.c b/arch/parisc/lib/iomap.c index f03adb1999e77ed05f1282be1b24aa5c601403ce..e362d6a147311264462d2c2789d26ea39822da3a 100644 --- a/arch/parisc/lib/iomap.c +++ b/arch/parisc/lib/iomap.c @@ -346,6 +346,16 @@ u64 ioread64be(const void __iomem *addr) return *((u64 *)addr); } +u64 ioread64_lo_hi(const void __iomem *addr) +{ + u32 low, high; + + low = ioread32(addr); + high = ioread32(addr + sizeof(u32)); + + return low + ((u64)high << 32); +} + u64 ioread64_hi_lo(const void __iomem *addr) { u32 low, high; @@ -419,6 +429,12 @@ void iowrite64be(u64 datum, void __iomem *addr) } } +void iowrite64_lo_hi(u64 val, void __iomem *addr) +{ + iowrite32(val, addr); + iowrite32(val >> 32, addr + sizeof(u32)); +} + void iowrite64_hi_lo(u64 val, void __iomem *addr) { iowrite32(val >> 32, addr + sizeof(u32)); @@ -527,6 +543,7 @@ EXPORT_SYMBOL(ioread32); EXPORT_SYMBOL(ioread32be); EXPORT_SYMBOL(ioread64); EXPORT_SYMBOL(ioread64be); +EXPORT_SYMBOL(ioread64_lo_hi); EXPORT_SYMBOL(ioread64_hi_lo); EXPORT_SYMBOL(iowrite8); EXPORT_SYMBOL(iowrite16); @@ -535,6 +552,7 @@ EXPORT_SYMBOL(iowrite32); EXPORT_SYMBOL(iowrite32be); EXPORT_SYMBOL(iowrite64); EXPORT_SYMBOL(iowrite64be); +EXPORT_SYMBOL(iowrite64_lo_hi); EXPORT_SYMBOL(iowrite64_hi_lo); EXPORT_SYMBOL(ioread8_rep); EXPORT_SYMBOL(ioread16_rep); diff --git a/arch/parisc/math-emu/fpudispatch.c b/arch/parisc/math-emu/fpudispatch.c index 7c46969ead9b1d17df3613fac35ceaa610adb803..01ed133227c25d8cb984b503704390364e7e721a 100644 --- a/arch/parisc/math-emu/fpudispatch.c +++ b/arch/parisc/math-emu/fpudispatch.c @@ -310,12 +310,15 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) r1 &= ~3; fpregs[t+3] = fpregs[r1+3]; fpregs[t+2] = fpregs[r1+2]; + fallthrough; case 1: /* double */ fpregs[t+1] = fpregs[r1+1]; + fallthrough; case 0: /* single */ fpregs[t] = fpregs[r1]; return(NOEXCEPTION); } + BUG(); case 3: /* FABS */ switch (fmt) { case 2: /* illegal */ @@ -325,13 +328,16 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) r1 &= ~3; fpregs[t+3] = fpregs[r1+3]; fpregs[t+2] = fpregs[r1+2]; + fallthrough; case 1: /* double */ fpregs[t+1] = fpregs[r1+1]; + fallthrough; case 0: /* single */ /* copy and clear sign bit */ fpregs[t] = fpregs[r1] & 0x7fffffff; return(NOEXCEPTION); } + BUG(); case 6: /* FNEG */ switch (fmt) { case 2: /* illegal */ @@ -341,13 +347,16 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) r1 &= ~3; fpregs[t+3] = fpregs[r1+3]; fpregs[t+2] = fpregs[r1+2]; + fallthrough; case 1: /* double */ fpregs[t+1] = fpregs[r1+1]; + fallthrough; case 0: /* single */ /* copy and invert sign bit */ fpregs[t] = fpregs[r1] ^ 0x80000000; return(NOEXCEPTION); } + BUG(); case 7: /* FNEGABS */ switch (fmt) { case 2: /* illegal */ @@ -357,13 +366,16 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) r1 &= ~3; fpregs[t+3] = fpregs[r1+3]; fpregs[t+2] = fpregs[r1+2]; + fallthrough; case 1: /* double */ fpregs[t+1] = fpregs[r1+1]; + fallthrough; case 0: /* single */ /* copy and set sign bit */ fpregs[t] = fpregs[r1] | 0x80000000; return(NOEXCEPTION); } + BUG(); case 4: /* FSQRT */ switch (fmt) { case 0: @@ -376,6 +388,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) case 3: /* quad not implemented */ return(MAJOR_0C_EXCP); } + BUG(); case 5: /* FRND */ switch (fmt) { case 0: @@ -389,7 +402,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) return(MAJOR_0C_EXCP); } } /* end of switch (subop) */ - + BUG(); case 1: /* class 1 */ df = extru(ir,fpdfpos,2); /* get dest format */ if ((df & 2) || (fmt & 2)) { @@ -419,6 +432,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) case 3: /* dbl/dbl */ return(MAJOR_0C_EXCP); } + BUG(); case 1: /* FCNVXF */ switch(fmt) { case 0: /* sgl/sgl */ @@ -434,6 +448,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) return(dbl_to_dbl_fcnvxf(&fpregs[r1],0, &fpregs[t],status)); } + BUG(); case 2: /* FCNVFX */ switch(fmt) { case 0: /* sgl/sgl */ @@ -449,6 +464,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) return(dbl_to_dbl_fcnvfx(&fpregs[r1],0, &fpregs[t],status)); } + BUG(); case 3: /* FCNVFXT */ switch(fmt) { case 0: /* sgl/sgl */ @@ -464,6 +480,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) return(dbl_to_dbl_fcnvfxt(&fpregs[r1],0, &fpregs[t],status)); } + BUG(); case 5: /* FCNVUF (PA2.0 only) */ switch(fmt) { case 0: /* sgl/sgl */ @@ -479,6 +496,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) return(dbl_to_dbl_fcnvuf(&fpregs[r1],0, &fpregs[t],status)); } + BUG(); case 6: /* FCNVFU (PA2.0 only) */ switch(fmt) { case 0: /* sgl/sgl */ @@ -494,6 +512,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) return(dbl_to_dbl_fcnvfu(&fpregs[r1],0, &fpregs[t],status)); } + BUG(); case 7: /* FCNVFUT (PA2.0 only) */ switch(fmt) { case 0: /* sgl/sgl */ @@ -509,10 +528,11 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) return(dbl_to_dbl_fcnvfut(&fpregs[r1],0, &fpregs[t],status)); } + BUG(); case 4: /* undefined */ return(MAJOR_0C_EXCP); } /* end of switch subop */ - + BUG(); case 2: /* class 2 */ fpu_type_flags=fpregs[FPU_TYPE_FLAG_POS]; r2 = extru(ir, fpr2pos, 5) * sizeof(double)/sizeof(u_int); @@ -590,6 +610,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) case 3: /* quad not implemented */ return(MAJOR_0C_EXCP); } + BUG(); case 1: /* FTEST */ switch (fmt) { case 0: @@ -609,8 +630,10 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) case 3: return(MAJOR_0C_EXCP); } + BUG(); } /* end of switch subop */ } /* end of else for PA1.0 & PA1.1 */ + BUG(); case 3: /* class 3 */ r2 = extru(ir,fpr2pos,5) * sizeof(double)/sizeof(u_int); if (r2 == 0) @@ -633,6 +656,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) case 3: /* quad not implemented */ return(MAJOR_0C_EXCP); } + BUG(); case 1: /* FSUB */ switch (fmt) { case 0: @@ -645,6 +669,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) case 3: /* quad not implemented */ return(MAJOR_0C_EXCP); } + BUG(); case 2: /* FMPY */ switch (fmt) { case 0: @@ -657,6 +682,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) case 3: /* quad not implemented */ return(MAJOR_0C_EXCP); } + BUG(); case 3: /* FDIV */ switch (fmt) { case 0: @@ -669,6 +695,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) case 3: /* quad not implemented */ return(MAJOR_0C_EXCP); } + BUG(); case 4: /* FREM */ switch (fmt) { case 0: @@ -681,6 +708,7 @@ decode_0c(u_int ir, u_int class, u_int subop, u_int fpregs[]) case 3: /* quad not implemented */ return(MAJOR_0C_EXCP); } + BUG(); } /* end of class 3 switch */ } /* end of switch(class) */ @@ -736,10 +764,12 @@ u_int fpregs[]; return(MAJOR_0E_EXCP); case 1: /* double */ fpregs[t+1] = fpregs[r1+1]; + fallthrough; case 0: /* single */ fpregs[t] = fpregs[r1]; return(NOEXCEPTION); } + BUG(); case 3: /* FABS */ switch (fmt) { case 2: @@ -747,10 +777,12 @@ u_int fpregs[]; return(MAJOR_0E_EXCP); case 1: /* double */ fpregs[t+1] = fpregs[r1+1]; + fallthrough; case 0: /* single */ fpregs[t] = fpregs[r1] & 0x7fffffff; return(NOEXCEPTION); } + BUG(); case 6: /* FNEG */ switch (fmt) { case 2: @@ -758,10 +790,12 @@ u_int fpregs[]; return(MAJOR_0E_EXCP); case 1: /* double */ fpregs[t+1] = fpregs[r1+1]; + fallthrough; case 0: /* single */ fpregs[t] = fpregs[r1] ^ 0x80000000; return(NOEXCEPTION); } + BUG(); case 7: /* FNEGABS */ switch (fmt) { case 2: @@ -769,10 +803,12 @@ u_int fpregs[]; return(MAJOR_0E_EXCP); case 1: /* double */ fpregs[t+1] = fpregs[r1+1]; + fallthrough; case 0: /* single */ fpregs[t] = fpregs[r1] | 0x80000000; return(NOEXCEPTION); } + BUG(); case 4: /* FSQRT */ switch (fmt) { case 0: @@ -785,6 +821,7 @@ u_int fpregs[]; case 3: return(MAJOR_0E_EXCP); } + BUG(); case 5: /* FRMD */ switch (fmt) { case 0: @@ -798,7 +835,7 @@ u_int fpregs[]; return(MAJOR_0E_EXCP); } } /* end of switch (subop */ - + BUG(); case 1: /* class 1 */ df = extru(ir,fpdfpos,2); /* get dest format */ /* @@ -826,6 +863,7 @@ u_int fpregs[]; case 3: /* dbl/dbl */ return(MAJOR_0E_EXCP); } + BUG(); case 1: /* FCNVXF */ switch(fmt) { case 0: /* sgl/sgl */ @@ -841,6 +879,7 @@ u_int fpregs[]; return(dbl_to_dbl_fcnvxf(&fpregs[r1],0, &fpregs[t],status)); } + BUG(); case 2: /* FCNVFX */ switch(fmt) { case 0: /* sgl/sgl */ @@ -856,6 +895,7 @@ u_int fpregs[]; return(dbl_to_dbl_fcnvfx(&fpregs[r1],0, &fpregs[t],status)); } + BUG(); case 3: /* FCNVFXT */ switch(fmt) { case 0: /* sgl/sgl */ @@ -871,6 +911,7 @@ u_int fpregs[]; return(dbl_to_dbl_fcnvfxt(&fpregs[r1],0, &fpregs[t],status)); } + BUG(); case 5: /* FCNVUF (PA2.0 only) */ switch(fmt) { case 0: /* sgl/sgl */ @@ -886,6 +927,7 @@ u_int fpregs[]; return(dbl_to_dbl_fcnvuf(&fpregs[r1],0, &fpregs[t],status)); } + BUG(); case 6: /* FCNVFU (PA2.0 only) */ switch(fmt) { case 0: /* sgl/sgl */ @@ -901,6 +943,7 @@ u_int fpregs[]; return(dbl_to_dbl_fcnvfu(&fpregs[r1],0, &fpregs[t],status)); } + BUG(); case 7: /* FCNVFUT (PA2.0 only) */ switch(fmt) { case 0: /* sgl/sgl */ @@ -916,9 +959,11 @@ u_int fpregs[]; return(dbl_to_dbl_fcnvfut(&fpregs[r1],0, &fpregs[t],status)); } + BUG(); case 4: /* undefined */ return(MAJOR_0C_EXCP); } /* end of switch subop */ + BUG(); case 2: /* class 2 */ /* * Be careful out there. @@ -994,6 +1039,7 @@ u_int fpregs[]; } } /* end of switch subop */ } /* end of else for PA1.0 & PA1.1 */ + BUG(); case 3: /* class 3 */ /* * Be careful out there. @@ -1026,6 +1072,7 @@ u_int fpregs[]; return(dbl_fadd(&fpregs[r1],&fpregs[r2], &fpregs[t],status)); } + BUG(); case 1: /* FSUB */ switch (fmt) { case 0: @@ -1035,6 +1082,7 @@ u_int fpregs[]; return(dbl_fsub(&fpregs[r1],&fpregs[r2], &fpregs[t],status)); } + BUG(); case 2: /* FMPY or XMPYU */ /* * check for integer multiply (x bit set) @@ -1071,6 +1119,7 @@ u_int fpregs[]; &fpregs[r2],&fpregs[t],status)); } } + BUG(); case 3: /* FDIV */ switch (fmt) { case 0: @@ -1080,6 +1129,7 @@ u_int fpregs[]; return(dbl_fdiv(&fpregs[r1],&fpregs[r2], &fpregs[t],status)); } + BUG(); case 4: /* FREM */ switch (fmt) { case 0: diff --git a/arch/parisc/mm/fixmap.c b/arch/parisc/mm/fixmap.c index 24426a7e1a5e5a43191675435b5d92d0ee2842c3..cc15d737fda64b1db0cf883dfbe622a7b0d16ad3 100644 --- a/arch/parisc/mm/fixmap.c +++ b/arch/parisc/mm/fixmap.c @@ -20,12 +20,9 @@ void notrace set_fixmap(enum fixed_addresses idx, phys_addr_t phys) pte_t *pte; if (pmd_none(*pmd)) - pmd = pmd_alloc(NULL, pud, vaddr); - - pte = pte_offset_kernel(pmd, vaddr); - if (pte_none(*pte)) pte = pte_alloc_kernel(pmd, vaddr); + pte = pte_offset_kernel(pmd, vaddr); set_pte_at(&init_mm, vaddr, pte, __mk_pte(phys, PAGE_KERNEL_RWX)); flush_tlb_kernel_range(vaddr, vaddr + PAGE_SIZE); } diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index 3ec633b11b542d5dc893c63f1465dc6bfdc16140..319afa00cdf7bfa4686c2bcb5f72750c746d5cf3 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -346,9 +346,9 @@ static void __init setup_bootmem(void) static bool kernel_set_to_readonly; -static void __init map_pages(unsigned long start_vaddr, - unsigned long start_paddr, unsigned long size, - pgprot_t pgprot, int force) +static void __ref map_pages(unsigned long start_vaddr, + unsigned long start_paddr, unsigned long size, + pgprot_t pgprot, int force) { pmd_t *pmd; pte_t *pg_table; @@ -458,7 +458,7 @@ void __init set_kernel_text_rw(int enable_read_write) flush_tlb_all(); } -void __ref free_initmem(void) +void free_initmem(void) { unsigned long init_begin = (unsigned long)__init_begin; unsigned long init_end = (unsigned long)__init_end; @@ -472,7 +472,6 @@ void __ref free_initmem(void) /* The init text pages are marked R-X. We have to * flush the icache and mark them RW- * - * This is tricky, because map_pages is in the init section. * Do a dummy remap of the data section first (the data * section is already PAGE_KERNEL) to pull in the TLB entries * for map_kernel */ @@ -844,9 +843,9 @@ void flush_tlb_all(void) { int do_recycle; - __inc_irq_stat(irq_tlb_count); do_recycle = 0; spin_lock(&sid_lock); + __inc_irq_stat(irq_tlb_count); if (dirty_space_ids > RECYCLE_THRESHOLD) { BUG_ON(recycle_inuse); /* FIXME: Use a semaphore/wait queue here */ get_dirty_sids(&recycle_ndirty,recycle_dirty_array); @@ -865,8 +864,8 @@ void flush_tlb_all(void) #else void flush_tlb_all(void) { - __inc_irq_stat(irq_tlb_count); spin_lock(&sid_lock); + __inc_irq_stat(irq_tlb_count); flush_tlb_all_local(NULL); recycle_sids(); spin_unlock(&sid_lock); diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 5c8c06215dd4ba7c6efefe1e948620ba6e4f4095..6a9a852c3d5667e3063738287078b4215940d914 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -65,7 +65,7 @@ UTS_MACHINE := $(subst $(space),,$(machine-y)) ifdef CONFIG_PPC32 KBUILD_LDFLAGS_MODULE += arch/powerpc/lib/crtsavres.o else -ifeq ($(call ld-ifversion, -ge, 225000000, y),y) +ifeq ($(call ld-ifversion, -ge, 22500, y),y) # Have the linker provide sfpr if possible. # There is a corresponding test in arch/powerpc/lib/Makefile KBUILD_LDFLAGS_MODULE += --save-restore-funcs diff --git a/arch/powerpc/boot/dts/charon.dts b/arch/powerpc/boot/dts/charon.dts index 408b486b13dffc7098e387be2bc87fc47a76a775..cd589539f313f01a71ca812ba46f703216f94ad2 100644 --- a/arch/powerpc/boot/dts/charon.dts +++ b/arch/powerpc/boot/dts/charon.dts @@ -35,7 +35,7 @@ }; }; - memory { + memory@0 { device_type = "memory"; reg = <0x00000000 0x08000000>; // 128MB }; diff --git a/arch/powerpc/boot/dts/digsy_mtc.dts b/arch/powerpc/boot/dts/digsy_mtc.dts index 0e5e9d3acf79fd684d4b87f795550fc36312120d..19a14e62e65f4ec3606a6a27752e9e6e448a90a8 100644 --- a/arch/powerpc/boot/dts/digsy_mtc.dts +++ b/arch/powerpc/boot/dts/digsy_mtc.dts @@ -16,7 +16,7 @@ model = "intercontrol,digsy-mtc"; compatible = "intercontrol,digsy-mtc"; - memory { + memory@0 { reg = <0x00000000 0x02000000>; // 32MB }; diff --git a/arch/powerpc/boot/dts/fsl/qoriq-fman3l-0.dtsi b/arch/powerpc/boot/dts/fsl/qoriq-fman3l-0.dtsi index c90702b04a530f798886e71be187cf36574d94b1..48e5cd61599c6ba0409f106fda093bf19f40b5a9 100644 --- a/arch/powerpc/boot/dts/fsl/qoriq-fman3l-0.dtsi +++ b/arch/powerpc/boot/dts/fsl/qoriq-fman3l-0.dtsi @@ -79,6 +79,7 @@ fman0: fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xfc000 0x1000>; + fsl,erratum-a009885; }; xmdio0: mdio@fd000 { @@ -86,6 +87,7 @@ fman0: fman@400000 { #size-cells = <0>; compatible = "fsl,fman-memac-mdio", "fsl,fman-xmdio"; reg = <0xfd000 0x1000>; + fsl,erratum-a009885; }; }; diff --git a/arch/powerpc/boot/dts/fsl/t1023rdb.dts b/arch/powerpc/boot/dts/fsl/t1023rdb.dts index 5ba6fbfca2742b9d8f8f62a96e6a2874f201eaed..f82f85c65964cc477ad3b3b8a965e52ef1d490d0 100644 --- a/arch/powerpc/boot/dts/fsl/t1023rdb.dts +++ b/arch/powerpc/boot/dts/fsl/t1023rdb.dts @@ -154,7 +154,7 @@ fm1mac3: ethernet@e4000 { phy-handle = <&sgmii_aqr_phy3>; - phy-connection-type = "sgmii-2500"; + phy-connection-type = "2500base-x"; sleep = <&rcpm 0x20000000>; }; diff --git a/arch/powerpc/boot/dts/lite5200.dts b/arch/powerpc/boot/dts/lite5200.dts index cb2782dd6132c1ffb94825c6dc679aae5063cda4..e7b194775d783b69122a6615f27f46ec4d6fd9fe 100644 --- a/arch/powerpc/boot/dts/lite5200.dts +++ b/arch/powerpc/boot/dts/lite5200.dts @@ -32,7 +32,7 @@ }; }; - memory { + memory@0 { device_type = "memory"; reg = <0x00000000 0x04000000>; // 64MB }; diff --git a/arch/powerpc/boot/dts/lite5200b.dts b/arch/powerpc/boot/dts/lite5200b.dts index 2b86c81f9048564becfc47120351df7c46b6a436..547cbe726ff23f751e59598cfc01b776e2664040 100644 --- a/arch/powerpc/boot/dts/lite5200b.dts +++ b/arch/powerpc/boot/dts/lite5200b.dts @@ -31,7 +31,7 @@ led4 { gpios = <&gpio_simple 2 1>; }; }; - memory { + memory@0 { reg = <0x00000000 0x10000000>; // 256MB }; diff --git a/arch/powerpc/boot/dts/media5200.dts b/arch/powerpc/boot/dts/media5200.dts index 61cae9dcddef4f3c814b32ed3197dd65dd0141eb..f3188018faceb16751b63fdbe89e75276dc67f00 100644 --- a/arch/powerpc/boot/dts/media5200.dts +++ b/arch/powerpc/boot/dts/media5200.dts @@ -32,7 +32,7 @@ }; }; - memory { + memory@0 { reg = <0x00000000 0x08000000>; // 128MB RAM }; diff --git a/arch/powerpc/boot/dts/mpc5200b.dtsi b/arch/powerpc/boot/dts/mpc5200b.dtsi index 648fe31795f494093df0fe570b4ea0215351c3a1..8b796f3b11da7e2658a6348ca66cfeaae7119b53 100644 --- a/arch/powerpc/boot/dts/mpc5200b.dtsi +++ b/arch/powerpc/boot/dts/mpc5200b.dtsi @@ -33,7 +33,7 @@ }; }; - memory: memory { + memory: memory@0 { device_type = "memory"; reg = <0x00000000 0x04000000>; // 64MB }; diff --git a/arch/powerpc/boot/dts/o2d.dts b/arch/powerpc/boot/dts/o2d.dts index 24a46f65e529933e75af46186e4f48de7cb16012..e0a8d3034417fff58258912633143b93d7fd0532 100644 --- a/arch/powerpc/boot/dts/o2d.dts +++ b/arch/powerpc/boot/dts/o2d.dts @@ -12,7 +12,7 @@ model = "ifm,o2d"; compatible = "ifm,o2d"; - memory { + memory@0 { reg = <0x00000000 0x08000000>; // 128MB }; diff --git a/arch/powerpc/boot/dts/o2d.dtsi b/arch/powerpc/boot/dts/o2d.dtsi index 6661955a2be47899324db0f5edf22d9086ca3dcb..b55a9e5bd828c1178c551689fc75e50eb8d20cfd 100644 --- a/arch/powerpc/boot/dts/o2d.dtsi +++ b/arch/powerpc/boot/dts/o2d.dtsi @@ -19,7 +19,7 @@ model = "ifm,o2d"; compatible = "ifm,o2d"; - memory { + memory@0 { reg = <0x00000000 0x04000000>; // 64MB }; diff --git a/arch/powerpc/boot/dts/o2dnt2.dts b/arch/powerpc/boot/dts/o2dnt2.dts index eeba7f5507d5df20d6e4fcec1bc0d4d56a4acd95..c2eedbd1f5fcb9f1244f9fe6d591b9e0462352e0 100644 --- a/arch/powerpc/boot/dts/o2dnt2.dts +++ b/arch/powerpc/boot/dts/o2dnt2.dts @@ -12,7 +12,7 @@ model = "ifm,o2dnt2"; compatible = "ifm,o2d"; - memory { + memory@0 { reg = <0x00000000 0x08000000>; // 128MB }; diff --git a/arch/powerpc/boot/dts/o3dnt.dts b/arch/powerpc/boot/dts/o3dnt.dts index fd00396b0593eb30098aa5f149645263961ff877..e4c1bdd412716e442fc8ba9c6650c1051495bdd4 100644 --- a/arch/powerpc/boot/dts/o3dnt.dts +++ b/arch/powerpc/boot/dts/o3dnt.dts @@ -12,7 +12,7 @@ model = "ifm,o3dnt"; compatible = "ifm,o2d"; - memory { + memory@0 { reg = <0x00000000 0x04000000>; // 64MB }; diff --git a/arch/powerpc/boot/dts/pcm032.dts b/arch/powerpc/boot/dts/pcm032.dts index 780e13d99e7b8dd8fc13c3858552e4886a60af5c..1895bc95900cc59ee25459acb378e92e970d6dcb 100644 --- a/arch/powerpc/boot/dts/pcm032.dts +++ b/arch/powerpc/boot/dts/pcm032.dts @@ -20,7 +20,7 @@ model = "phytec,pcm032"; compatible = "phytec,pcm032"; - memory { + memory@0 { reg = <0x00000000 0x08000000>; // 128MB }; diff --git a/arch/powerpc/boot/dts/tqm5200.dts b/arch/powerpc/boot/dts/tqm5200.dts index 9ed0bc78967e191afab7c417ddf65a573e2f8ad1..5bb25a9e40a01f6a5615986c03a8fe680ee24a14 100644 --- a/arch/powerpc/boot/dts/tqm5200.dts +++ b/arch/powerpc/boot/dts/tqm5200.dts @@ -32,7 +32,7 @@ }; }; - memory { + memory@0 { device_type = "memory"; reg = <0x00000000 0x04000000>; // 64MB }; diff --git a/arch/powerpc/include/asm/book3s/32/mmu-hash.h b/arch/powerpc/include/asm/book3s/32/mmu-hash.h index a8982d52f6b1d6c850600d343c632961e2a8502c..cbde06d0fb380c02ccd1a903c3213abd77efbfd9 100644 --- a/arch/powerpc/include/asm/book3s/32/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/32/mmu-hash.h @@ -102,6 +102,8 @@ extern s32 patch__hash_page_B, patch__hash_page_C; extern s32 patch__flush_hash_A0, patch__flush_hash_A1, patch__flush_hash_A2; extern s32 patch__flush_hash_B; +int __init find_free_bat(void); +unsigned int bat_block_size(unsigned long base, unsigned long top); #endif /* !__ASSEMBLY__ */ /* We happily ignore the smaller BATs on 601, we don't actually use diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 523d3e6e2400917f95bbcda21cec6b6b8b10d453..94c5c66231a8c394b00ab24127e9041ee95c9ef6 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -142,6 +142,7 @@ static inline bool pte_user(pte_t pte) #ifndef __ASSEMBLY__ int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot); +void unmap_kernel_page(unsigned long va); #endif /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 4a3dca0271f1e71e388bd1e400a8fd35c354c497..71e2c524f1eead2d4c5e5c27a26023812e8b5869 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -1054,6 +1054,8 @@ static inline int map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t p return hash__map_kernel_page(ea, pa, prot); } +void unmap_kernel_page(unsigned long va); + static inline int __meminit vmemmap_create_mapping(unsigned long start, unsigned long page_size, unsigned long phys) diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h index d5b3c3bb95b400310df4879094684160bcaba764..fa8746c320067a51b50696207228a9140ed690a6 100644 --- a/arch/powerpc/include/asm/code-patching.h +++ b/arch/powerpc/include/asm/code-patching.h @@ -23,6 +23,7 @@ #define BRANCH_ABSOLUTE 0x2 bool is_offset_in_branch_range(long offset); +bool is_offset_in_cond_branch_range(long offset); int create_branch(struct ppc_inst *instr, const struct ppc_inst *addr, unsigned long target, int flags); int create_cond_branch(struct ppc_inst *instr, const struct ppc_inst *addr, diff --git a/arch/powerpc/include/asm/cpu_setup_power.h b/arch/powerpc/include/asm/cpu_setup_power.h new file mode 100644 index 0000000000000000000000000000000000000000..24be9131f8032d80b002f2bf0b055e54981717bf --- /dev/null +++ b/arch/powerpc/include/asm/cpu_setup_power.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2020 IBM Corporation + */ +void __setup_cpu_power7(unsigned long offset, struct cpu_spec *spec); +void __restore_cpu_power7(void); +void __setup_cpu_power8(unsigned long offset, struct cpu_spec *spec); +void __restore_cpu_power8(void); +void __setup_cpu_power9(unsigned long offset, struct cpu_spec *spec); +void __restore_cpu_power9(void); +void __setup_cpu_power10(unsigned long offset, struct cpu_spec *spec); +void __restore_cpu_power10(void); diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h index 0b295bdb201e82d0ce3b61d6732fae0e64710942..aa6a5ef5d4830c8ff97983c392f9ae604139ef87 100644 --- a/arch/powerpc/include/asm/firmware.h +++ b/arch/powerpc/include/asm/firmware.h @@ -134,12 +134,6 @@ extern int ibm_nmi_interlock_token; extern unsigned int __start___fw_ftr_fixup, __stop___fw_ftr_fixup; -#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_KVM_GUEST) -bool is_kvm_guest(void); -#else -static inline bool is_kvm_guest(void) { return false; } -#endif - #ifdef CONFIG_PPC_PSERIES void pseries_probe_fw_features(void); #else diff --git a/arch/powerpc/include/asm/fixmap.h b/arch/powerpc/include/asm/fixmap.h index 591b2f4deed533df0832db93bec2441fd738ec9b..897cc68758d44aca52afb362b0e018ceb7d1ea82 100644 --- a/arch/powerpc/include/asm/fixmap.h +++ b/arch/powerpc/include/asm/fixmap.h @@ -111,8 +111,10 @@ static inline void __set_fixmap(enum fixed_addresses idx, BUILD_BUG_ON(idx >= __end_of_fixed_addresses); else if (WARN_ON(idx >= __end_of_fixed_addresses)) return; - - map_kernel_page(__fix_to_virt(idx), phys, flags); + if (pgprot_val(flags)) + map_kernel_page(__fix_to_virt(idx), phys, flags); + else + unmap_kernel_page(__fix_to_virt(idx)); } #define __early_set_fixmap __set_fixmap diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 3e8e19f5746c7bf18b515402801b90a13b958a39..00c8cda1c9c3179e41ae91691fced33f58b9ac70 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -382,6 +382,8 @@ #define H_CPU_BEHAV_BNDS_CHK_SPEC_BAR (1ull << 61) // IBM bit 2 #define H_CPU_BEHAV_FLUSH_COUNT_CACHE (1ull << 58) // IBM bit 5 #define H_CPU_BEHAV_FLUSH_LINK_STACK (1ull << 57) // IBM bit 6 +#define H_CPU_BEHAV_NO_L1D_FLUSH_ENTRY (1ull << 56) // IBM bit 7 +#define H_CPU_BEHAV_NO_L1D_FLUSH_UACCESS (1ull << 55) // IBM bit 8 /* Flag values used in H_REGISTER_PROC_TBL hcall */ #define PROC_TABLE_OP_MASK 0x18 diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index 0363734ff56e0f9fc0e2c6ddb013cea24d7cbbd6..0f2acbb9667400226d98d18509c36f7e57c37b69 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -38,6 +38,8 @@ #define PACA_IRQ_MUST_HARD_MASK (PACA_IRQ_EE) #endif +#endif /* CONFIG_PPC64 */ + /* * flags for paca->irq_soft_mask */ @@ -46,8 +48,6 @@ #define IRQS_PMI_DISABLED 2 #define IRQS_ALL_DISABLED (IRQS_DISABLED | IRQS_PMI_DISABLED) -#endif /* CONFIG_PPC64 */ - #ifndef __ASSEMBLY__ extern void replay_system_reset(void); @@ -175,6 +175,42 @@ static inline bool arch_irqs_disabled(void) return arch_irqs_disabled_flags(arch_local_save_flags()); } +static inline void set_pmi_irq_pending(void) +{ + /* + * Invoked from PMU callback functions to set PMI bit in the paca. + * This has to be called with irq's disabled (via hard_irq_disable()). + */ + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) + WARN_ON_ONCE(mfmsr() & MSR_EE); + + get_paca()->irq_happened |= PACA_IRQ_PMI; +} + +static inline void clear_pmi_irq_pending(void) +{ + /* + * Invoked from PMU callback functions to clear the pending PMI bit + * in the paca. + */ + if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) + WARN_ON_ONCE(mfmsr() & MSR_EE); + + get_paca()->irq_happened &= ~PACA_IRQ_PMI; +} + +static inline bool pmi_irq_pending(void) +{ + /* + * Invoked from PMU callback functions to check if there is a pending + * PMI bit in the paca. + */ + if (get_paca()->irq_happened & PACA_IRQ_PMI) + return true; + + return false; +} + #ifdef CONFIG_PPC_BOOK3S /* * To support disabling and enabling of irq with PMI, set of @@ -296,6 +332,10 @@ extern void irq_set_pending_from_srr1(unsigned long srr1); extern void force_external_irq_replay(void); +static inline void irq_soft_mask_regs_set_state(struct pt_regs *regs, unsigned long val) +{ + regs->softe = val; +} #else /* CONFIG_PPC64 */ static inline unsigned long arch_local_save_flags(void) @@ -364,6 +404,13 @@ static inline bool arch_irq_disabled_regs(struct pt_regs *regs) static inline void may_hard_irq_enable(void) { } +static inline void clear_pmi_irq_pending(void) { } +static inline void set_pmi_irq_pending(void) { } +static inline bool pmi_irq_pending(void) { return false; } + +static inline void irq_soft_mask_regs_set_state(struct pt_regs *regs, unsigned long val) +{ +} #endif /* CONFIG_PPC64 */ #define ARCH_IRQ_INIT_FLAGS IRQ_NOREQUEST diff --git a/arch/powerpc/include/asm/kvm_guest.h b/arch/powerpc/include/asm/kvm_guest.h new file mode 100644 index 0000000000000000000000000000000000000000..c63105d2c9e7c3cf8fec91f130088a1a6418ea92 --- /dev/null +++ b/arch/powerpc/include/asm/kvm_guest.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020 IBM Corporation + */ + +#ifndef _ASM_POWERPC_KVM_GUEST_H_ +#define _ASM_POWERPC_KVM_GUEST_H_ + +#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_KVM_GUEST) +#include + +DECLARE_STATIC_KEY_FALSE(kvm_guest); + +static inline bool is_kvm_guest(void) +{ + return static_branch_unlikely(&kvm_guest); +} + +int check_kvm_guest(void); +#else +static inline bool is_kvm_guest(void) { return false; } +static inline int check_kvm_guest(void) { return 0; } +#endif + +#endif /* _ASM_POWERPC_KVM_GUEST_H_ */ diff --git a/arch/powerpc/include/asm/kvm_para.h b/arch/powerpc/include/asm/kvm_para.h index 744612054c94ca9e6266aaa8e919463074351d13..abe1b5e82547b1137f18cdd62c4959e519980236 100644 --- a/arch/powerpc/include/asm/kvm_para.h +++ b/arch/powerpc/include/asm/kvm_para.h @@ -8,7 +8,7 @@ #ifndef __POWERPC_KVM_PARA_H__ #define __POWERPC_KVM_PARA_H__ -#include +#include #include diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index 96522f7f0618a0eb9096858f2b63ff2b98bc3046..e53cc07e6b9ec9d433f4969d9100d277e7ac0c3a 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -65,6 +65,7 @@ extern int icache_44x_need_flush; #ifndef __ASSEMBLY__ int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot); +void unmap_kernel_page(unsigned long va); #endif /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h index 57cd3892bfe05237231e929fb1284612d9d13cbe..1eacff0fff02930042793416483dc06da2c37b7c 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h @@ -311,6 +311,7 @@ static inline void __ptep_set_access_flags(struct vm_area_struct *vma, #define __swp_entry_to_pte(x) __pte((x).val) int map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot); +void unmap_kernel_page(unsigned long va); extern int __meminit vmemmap_create_mapping(unsigned long start, unsigned long page_size, unsigned long phys); diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index a6e3700c4566adbe194f76643f2464bfdc16d2b1..f0c0816f572707a2f0467c6b78a21b869c17e539 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -449,6 +449,7 @@ #define PPC_RAW_LDX(r, base, b) (0x7c00002a | ___PPC_RT(r) | ___PPC_RA(base) | ___PPC_RB(b)) #define PPC_RAW_LHZ(r, base, i) (0xa0000000 | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i)) #define PPC_RAW_LHBRX(r, base, b) (0x7c00062c | ___PPC_RT(r) | ___PPC_RA(base) | ___PPC_RB(b)) +#define PPC_RAW_LWBRX(r, base, b) (0x7c00042c | ___PPC_RT(r) | ___PPC_RA(base) | ___PPC_RB(b)) #define PPC_RAW_LDBRX(r, base, b) (0x7c000428 | ___PPC_RT(r) | ___PPC_RA(base) | ___PPC_RB(b)) #define PPC_RAW_STWCX(s, a, b) (0x7c00012d | ___PPC_RS(s) | ___PPC_RA(a) | ___PPC_RB(b)) #define PPC_RAW_CMPWI(a, i) (0x2c000000 | ___PPC_RA(a) | IMM_L(i)) diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index f4b98903064f5ea9f22953d50e85f418ba6ea8d0..6afb14b6bbc26ca80646015a51f72095cfb73bb6 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -865,6 +865,7 @@ #define MMCR0_BHRBA 0x00200000UL /* BHRB Access allowed in userspace */ #define MMCR0_EBE 0x00100000UL /* Event based branch enable */ #define MMCR0_PMCC 0x000c0000UL /* PMC control */ +#define MMCR0_PMCCEXT ASM_CONST(0x00000200) /* PMCCEXT control */ #define MMCR0_PMCC_U6 0x00080000UL /* PMC1-6 are R/W by user (PR) */ #define MMCR0_PMC1CE 0x00008000UL /* PMC1 count enable*/ #define MMCR0_PMCjCE ASM_CONST(0x00004000) /* PMCj count enable*/ diff --git a/arch/powerpc/include/asm/security_features.h b/arch/powerpc/include/asm/security_features.h index b774a4477d5f1a256895c1778af1e7372fb7eeb9..e380acc6e41327302b300c119185f6c1689fc491 100644 --- a/arch/powerpc/include/asm/security_features.h +++ b/arch/powerpc/include/asm/security_features.h @@ -39,6 +39,11 @@ static inline bool security_ftr_enabled(u64 feature) return !!(powerpc_security_features & feature); } +#ifdef CONFIG_PPC_BOOK3S_64 +enum stf_barrier_type stf_barrier_type_get(void); +#else +static inline enum stf_barrier_type stf_barrier_type_get(void) { return STF_BARRIER_NONE; } +#endif // Features indicating support for Spectre/Meltdown mitigations diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index fe2ef598e2ead0a5f429cd30a90e0edd48e22664..376104c166fcf337bd4aa19b50a1e4f588ae8daf 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -11,6 +11,7 @@ CFLAGS_prom_init.o += -fPIC CFLAGS_btext.o += -fPIC endif +CFLAGS_early_32.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) CFLAGS_cputable.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) CFLAGS_prom_init.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) CFLAGS_btext.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) diff --git a/arch/powerpc/kernel/btext.c b/arch/powerpc/kernel/btext.c index 803c2a45b22ac2092137e1b2a715d9f51a23b434..1cffb5e7c38d6c0f1315dcb9c729842b351a68d9 100644 --- a/arch/powerpc/kernel/btext.c +++ b/arch/powerpc/kernel/btext.c @@ -241,8 +241,10 @@ int __init btext_find_display(int allow_nonstdout) rc = btext_initialize(np); printk("result: %d\n", rc); } - if (rc == 0) + if (rc == 0) { + of_node_put(np); break; + } } return rc; } diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S deleted file mode 100644 index 704e8b9501eee84ade894358b57986268c2ac17f..0000000000000000000000000000000000000000 --- a/arch/powerpc/kernel/cpu_setup_power.S +++ /dev/null @@ -1,252 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * This file contains low level CPU setup functions. - * Copyright (C) 2003 Benjamin Herrenschmidt (benh@kernel.crashing.org) - */ - -#include -#include -#include -#include -#include -#include -#include - -/* Entry: r3 = crap, r4 = ptr to cputable entry - * - * Note that we can be called twice for pseudo-PVRs - */ -_GLOBAL(__setup_cpu_power7) - mflr r11 - bl __init_hvmode_206 - mtlr r11 - beqlr - li r0,0 - mtspr SPRN_LPID,r0 - LOAD_REG_IMMEDIATE(r0, PCR_MASK) - mtspr SPRN_PCR,r0 - mfspr r3,SPRN_LPCR - li r4,(LPCR_LPES1 >> LPCR_LPES_SH) - bl __init_LPCR_ISA206 - mtlr r11 - blr - -_GLOBAL(__restore_cpu_power7) - mflr r11 - mfmsr r3 - rldicl. r0,r3,4,63 - beqlr - li r0,0 - mtspr SPRN_LPID,r0 - LOAD_REG_IMMEDIATE(r0, PCR_MASK) - mtspr SPRN_PCR,r0 - mfspr r3,SPRN_LPCR - li r4,(LPCR_LPES1 >> LPCR_LPES_SH) - bl __init_LPCR_ISA206 - mtlr r11 - blr - -_GLOBAL(__setup_cpu_power8) - mflr r11 - bl __init_FSCR - bl __init_PMU - bl __init_PMU_ISA207 - bl __init_hvmode_206 - mtlr r11 - beqlr - li r0,0 - mtspr SPRN_LPID,r0 - LOAD_REG_IMMEDIATE(r0, PCR_MASK) - mtspr SPRN_PCR,r0 - mfspr r3,SPRN_LPCR - ori r3, r3, LPCR_PECEDH - li r4,0 /* LPES = 0 */ - bl __init_LPCR_ISA206 - bl __init_HFSCR - bl __init_PMU_HV - bl __init_PMU_HV_ISA207 - mtlr r11 - blr - -_GLOBAL(__restore_cpu_power8) - mflr r11 - bl __init_FSCR - bl __init_PMU - bl __init_PMU_ISA207 - mfmsr r3 - rldicl. r0,r3,4,63 - mtlr r11 - beqlr - li r0,0 - mtspr SPRN_LPID,r0 - LOAD_REG_IMMEDIATE(r0, PCR_MASK) - mtspr SPRN_PCR,r0 - mfspr r3,SPRN_LPCR - ori r3, r3, LPCR_PECEDH - li r4,0 /* LPES = 0 */ - bl __init_LPCR_ISA206 - bl __init_HFSCR - bl __init_PMU_HV - bl __init_PMU_HV_ISA207 - mtlr r11 - blr - -_GLOBAL(__setup_cpu_power10) - mflr r11 - bl __init_FSCR_power10 - bl __init_PMU - bl __init_PMU_ISA31 - b 1f - -_GLOBAL(__setup_cpu_power9) - mflr r11 - bl __init_FSCR_power9 - bl __init_PMU -1: bl __init_hvmode_206 - mtlr r11 - beqlr - li r0,0 - mtspr SPRN_PSSCR,r0 - mtspr SPRN_LPID,r0 - mtspr SPRN_PID,r0 - LOAD_REG_IMMEDIATE(r0, PCR_MASK) - mtspr SPRN_PCR,r0 - mfspr r3,SPRN_LPCR - LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE | LPCR_HEIC) - or r3, r3, r4 - LOAD_REG_IMMEDIATE(r4, LPCR_UPRT | LPCR_HR) - andc r3, r3, r4 - li r4,0 /* LPES = 0 */ - bl __init_LPCR_ISA300 - bl __init_HFSCR - bl __init_PMU_HV - mtlr r11 - blr - -_GLOBAL(__restore_cpu_power10) - mflr r11 - bl __init_FSCR_power10 - bl __init_PMU - bl __init_PMU_ISA31 - b 1f - -_GLOBAL(__restore_cpu_power9) - mflr r11 - bl __init_FSCR_power9 - bl __init_PMU -1: mfmsr r3 - rldicl. r0,r3,4,63 - mtlr r11 - beqlr - li r0,0 - mtspr SPRN_PSSCR,r0 - mtspr SPRN_LPID,r0 - mtspr SPRN_PID,r0 - LOAD_REG_IMMEDIATE(r0, PCR_MASK) - mtspr SPRN_PCR,r0 - mfspr r3,SPRN_LPCR - LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE | LPCR_HEIC) - or r3, r3, r4 - LOAD_REG_IMMEDIATE(r4, LPCR_UPRT | LPCR_HR) - andc r3, r3, r4 - li r4,0 /* LPES = 0 */ - bl __init_LPCR_ISA300 - bl __init_HFSCR - bl __init_PMU_HV - mtlr r11 - blr - -__init_hvmode_206: - /* Disable CPU_FTR_HVMODE and exit if MSR:HV is not set */ - mfmsr r3 - rldicl. r0,r3,4,63 - bnelr - ld r5,CPU_SPEC_FEATURES(r4) - LOAD_REG_IMMEDIATE(r6,CPU_FTR_HVMODE | CPU_FTR_P9_TM_HV_ASSIST) - andc r5,r5,r6 - std r5,CPU_SPEC_FEATURES(r4) - blr - -__init_LPCR_ISA206: - /* Setup a sane LPCR: - * Called with initial LPCR in R3 and desired LPES 2-bit value in R4 - * - * LPES = 0b01 (HSRR0/1 used for 0x500) - * PECE = 0b111 - * DPFD = 4 - * HDICE = 0 - * VC = 0b100 (VPM0=1, VPM1=0, ISL=0) - * VRMASD = 0b10000 (L=1, LP=00) - * - * Other bits untouched for now - */ - li r5,0x10 - rldimi r3,r5, LPCR_VRMASD_SH, 64-LPCR_VRMASD_SH-5 - - /* POWER9 has no VRMASD */ -__init_LPCR_ISA300: - rldimi r3,r4, LPCR_LPES_SH, 64-LPCR_LPES_SH-2 - ori r3,r3,(LPCR_PECE0|LPCR_PECE1|LPCR_PECE2) - li r5,4 - rldimi r3,r5, LPCR_DPFD_SH, 64-LPCR_DPFD_SH-3 - clrrdi r3,r3,1 /* clear HDICE */ - li r5,4 - rldimi r3,r5, LPCR_VC_SH, 0 - mtspr SPRN_LPCR,r3 - isync - blr - -__init_FSCR_power10: - mfspr r3, SPRN_FSCR - ori r3, r3, FSCR_PREFIX - mtspr SPRN_FSCR, r3 - // fall through - -__init_FSCR_power9: - mfspr r3, SPRN_FSCR - ori r3, r3, FSCR_SCV - mtspr SPRN_FSCR, r3 - // fall through - -__init_FSCR: - mfspr r3,SPRN_FSCR - ori r3,r3,FSCR_TAR|FSCR_EBB - mtspr SPRN_FSCR,r3 - blr - -__init_HFSCR: - mfspr r3,SPRN_HFSCR - ori r3,r3,HFSCR_TAR|HFSCR_TM|HFSCR_BHRB|HFSCR_PM|\ - HFSCR_DSCR|HFSCR_VECVSX|HFSCR_FP|HFSCR_EBB|HFSCR_MSGP - mtspr SPRN_HFSCR,r3 - blr - -__init_PMU_HV: - li r5,0 - mtspr SPRN_MMCRC,r5 - blr - -__init_PMU_HV_ISA207: - li r5,0 - mtspr SPRN_MMCRH,r5 - blr - -__init_PMU: - li r5,0 - mtspr SPRN_MMCRA,r5 - mtspr SPRN_MMCR0,r5 - mtspr SPRN_MMCR1,r5 - mtspr SPRN_MMCR2,r5 - blr - -__init_PMU_ISA207: - li r5,0 - mtspr SPRN_MMCRS,r5 - blr - -__init_PMU_ISA31: - li r5,0 - mtspr SPRN_MMCR3,r5 - LOAD_REG_IMMEDIATE(r5, MMCRA_BHRB_DISABLE) - mtspr SPRN_MMCRA,r5 - blr diff --git a/arch/powerpc/kernel/cpu_setup_power.c b/arch/powerpc/kernel/cpu_setup_power.c new file mode 100644 index 0000000000000000000000000000000000000000..3cca88ee96d711813cb64430b838d95ec63bb98b --- /dev/null +++ b/arch/powerpc/kernel/cpu_setup_power.c @@ -0,0 +1,272 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright 2020, Jordan Niethe, IBM Corporation. + * + * This file contains low level CPU setup functions. + * Originally written in assembly by Benjamin Herrenschmidt & various other + * authors. + */ + +#include +#include +#include +#include +#include + +/* Disable CPU_FTR_HVMODE and return false if MSR:HV is not set */ +static bool init_hvmode_206(struct cpu_spec *t) +{ + u64 msr; + + msr = mfmsr(); + if (msr & MSR_HV) + return true; + + t->cpu_features &= ~(CPU_FTR_HVMODE | CPU_FTR_P9_TM_HV_ASSIST); + return false; +} + +static void init_LPCR_ISA300(u64 lpcr, u64 lpes) +{ + /* POWER9 has no VRMASD */ + lpcr |= (lpes << LPCR_LPES_SH) & LPCR_LPES; + lpcr |= LPCR_PECE0|LPCR_PECE1|LPCR_PECE2; + lpcr |= (4ull << LPCR_DPFD_SH) & LPCR_DPFD; + lpcr &= ~LPCR_HDICE; /* clear HDICE */ + lpcr |= (4ull << LPCR_VC_SH); + mtspr(SPRN_LPCR, lpcr); + isync(); +} + +/* + * Setup a sane LPCR: + * Called with initial LPCR and desired LPES 2-bit value + * + * LPES = 0b01 (HSRR0/1 used for 0x500) + * PECE = 0b111 + * DPFD = 4 + * HDICE = 0 + * VC = 0b100 (VPM0=1, VPM1=0, ISL=0) + * VRMASD = 0b10000 (L=1, LP=00) + * + * Other bits untouched for now + */ +static void init_LPCR_ISA206(u64 lpcr, u64 lpes) +{ + lpcr |= (0x10ull << LPCR_VRMASD_SH) & LPCR_VRMASD; + init_LPCR_ISA300(lpcr, lpes); +} + +static void init_FSCR(void) +{ + u64 fscr; + + fscr = mfspr(SPRN_FSCR); + fscr |= FSCR_TAR|FSCR_EBB; + mtspr(SPRN_FSCR, fscr); +} + +static void init_FSCR_power9(void) +{ + u64 fscr; + + fscr = mfspr(SPRN_FSCR); + fscr |= FSCR_SCV; + mtspr(SPRN_FSCR, fscr); + init_FSCR(); +} + +static void init_FSCR_power10(void) +{ + u64 fscr; + + fscr = mfspr(SPRN_FSCR); + fscr |= FSCR_PREFIX; + mtspr(SPRN_FSCR, fscr); + init_FSCR_power9(); +} + +static void init_HFSCR(void) +{ + u64 hfscr; + + hfscr = mfspr(SPRN_HFSCR); + hfscr |= HFSCR_TAR|HFSCR_TM|HFSCR_BHRB|HFSCR_PM|HFSCR_DSCR|\ + HFSCR_VECVSX|HFSCR_FP|HFSCR_EBB|HFSCR_MSGP; + mtspr(SPRN_HFSCR, hfscr); +} + +static void init_PMU_HV(void) +{ + mtspr(SPRN_MMCRC, 0); +} + +static void init_PMU_HV_ISA207(void) +{ + mtspr(SPRN_MMCRH, 0); +} + +static void init_PMU(void) +{ + mtspr(SPRN_MMCRA, 0); + mtspr(SPRN_MMCR0, 0); + mtspr(SPRN_MMCR1, 0); + mtspr(SPRN_MMCR2, 0); +} + +static void init_PMU_ISA207(void) +{ + mtspr(SPRN_MMCRS, 0); +} + +static void init_PMU_ISA31(void) +{ + mtspr(SPRN_MMCR3, 0); + mtspr(SPRN_MMCRA, MMCRA_BHRB_DISABLE); + mtspr(SPRN_MMCR0, MMCR0_PMCCEXT); +} + +/* + * Note that we can be called twice of pseudo-PVRs. + * The parameter offset is not used. + */ + +void __setup_cpu_power7(unsigned long offset, struct cpu_spec *t) +{ + if (!init_hvmode_206(t)) + return; + + mtspr(SPRN_LPID, 0); + mtspr(SPRN_PCR, PCR_MASK); + init_LPCR_ISA206(mfspr(SPRN_LPCR), LPCR_LPES1 >> LPCR_LPES_SH); +} + +void __restore_cpu_power7(void) +{ + u64 msr; + + msr = mfmsr(); + if (!(msr & MSR_HV)) + return; + + mtspr(SPRN_LPID, 0); + mtspr(SPRN_PCR, PCR_MASK); + init_LPCR_ISA206(mfspr(SPRN_LPCR), LPCR_LPES1 >> LPCR_LPES_SH); +} + +void __setup_cpu_power8(unsigned long offset, struct cpu_spec *t) +{ + init_FSCR(); + init_PMU(); + init_PMU_ISA207(); + + if (!init_hvmode_206(t)) + return; + + mtspr(SPRN_LPID, 0); + mtspr(SPRN_PCR, PCR_MASK); + init_LPCR_ISA206(mfspr(SPRN_LPCR) | LPCR_PECEDH, 0); /* LPES = 0 */ + init_HFSCR(); + init_PMU_HV(); + init_PMU_HV_ISA207(); +} + +void __restore_cpu_power8(void) +{ + u64 msr; + + init_FSCR(); + init_PMU(); + init_PMU_ISA207(); + + msr = mfmsr(); + if (!(msr & MSR_HV)) + return; + + mtspr(SPRN_LPID, 0); + mtspr(SPRN_PCR, PCR_MASK); + init_LPCR_ISA206(mfspr(SPRN_LPCR) | LPCR_PECEDH, 0); /* LPES = 0 */ + init_HFSCR(); + init_PMU_HV(); + init_PMU_HV_ISA207(); +} + +void __setup_cpu_power9(unsigned long offset, struct cpu_spec *t) +{ + init_FSCR_power9(); + init_PMU(); + + if (!init_hvmode_206(t)) + return; + + mtspr(SPRN_PSSCR, 0); + mtspr(SPRN_LPID, 0); + mtspr(SPRN_PID, 0); + mtspr(SPRN_PCR, PCR_MASK); + init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\ + LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0); + init_HFSCR(); + init_PMU_HV(); +} + +void __restore_cpu_power9(void) +{ + u64 msr; + + init_FSCR_power9(); + init_PMU(); + + msr = mfmsr(); + if (!(msr & MSR_HV)) + return; + + mtspr(SPRN_PSSCR, 0); + mtspr(SPRN_LPID, 0); + mtspr(SPRN_PID, 0); + mtspr(SPRN_PCR, PCR_MASK); + init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\ + LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0); + init_HFSCR(); + init_PMU_HV(); +} + +void __setup_cpu_power10(unsigned long offset, struct cpu_spec *t) +{ + init_FSCR_power10(); + init_PMU(); + init_PMU_ISA31(); + + if (!init_hvmode_206(t)) + return; + + mtspr(SPRN_PSSCR, 0); + mtspr(SPRN_LPID, 0); + mtspr(SPRN_PID, 0); + mtspr(SPRN_PCR, PCR_MASK); + init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\ + LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0); + init_HFSCR(); + init_PMU_HV(); +} + +void __restore_cpu_power10(void) +{ + u64 msr; + + init_FSCR_power10(); + init_PMU(); + init_PMU_ISA31(); + + msr = mfmsr(); + if (!(msr & MSR_HV)) + return; + + mtspr(SPRN_PSSCR, 0); + mtspr(SPRN_LPID, 0); + mtspr(SPRN_PID, 0); + mtspr(SPRN_PCR, PCR_MASK); + init_LPCR_ISA300((mfspr(SPRN_LPCR) | LPCR_PECEDH | LPCR_PECE_HVEE |\ + LPCR_HVICE | LPCR_HEIC) & ~(LPCR_UPRT | LPCR_HR), 0); + init_HFSCR(); + init_PMU_HV(); +} diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 29de58d4dfb767e01409629796e955b7c7a80841..8fdb40ee86d11930e4bd25220d16672ffa1f21ce 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -60,19 +60,15 @@ extern void __setup_cpu_7410(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_745x(unsigned long offset, struct cpu_spec* spec); #endif /* CONFIG_PPC32 */ #ifdef CONFIG_PPC64 +#include extern void __setup_cpu_ppc970(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_ppc970MP(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_pa6t(unsigned long offset, struct cpu_spec* spec); extern void __restore_cpu_pa6t(void); extern void __restore_cpu_ppc970(void); -extern void __setup_cpu_power7(unsigned long offset, struct cpu_spec* spec); -extern void __restore_cpu_power7(void); -extern void __setup_cpu_power8(unsigned long offset, struct cpu_spec* spec); -extern void __restore_cpu_power8(void); -extern void __setup_cpu_power9(unsigned long offset, struct cpu_spec* spec); -extern void __restore_cpu_power9(void); -extern void __setup_cpu_power10(unsigned long offset, struct cpu_spec* spec); -extern void __restore_cpu_power10(void); +extern long __machine_check_early_realmode_p7(struct pt_regs *regs); +extern long __machine_check_early_realmode_p8(struct pt_regs *regs); +extern long __machine_check_early_realmode_p9(struct pt_regs *regs); #endif /* CONFIG_PPC64 */ #if defined(CONFIG_E500) extern void __setup_cpu_e5500(unsigned long offset, struct cpu_spec* spec); diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c index a1c744194018448281b08dfe987a42126051871d..9ac0651795cf6de50631a3ecd59690699e00d810 100644 --- a/arch/powerpc/kernel/dma-iommu.c +++ b/arch/powerpc/kernel/dma-iommu.c @@ -117,6 +117,15 @@ u64 dma_iommu_get_required_mask(struct device *dev) struct iommu_table *tbl = get_iommu_table_base(dev); u64 mask; + if (dev_is_pci(dev)) { + u64 bypass_mask = dma_direct_get_required_mask(dev); + + if (dma_iommu_dma_supported(dev, bypass_mask)) { + dev_info(dev, "%s: returning bypass mask 0x%llx\n", __func__, bypass_mask); + return bypass_mask; + } + } + if (!tbl) return 0; diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c index 1098863e17ee8d90cdd9b1363ed16b40a2748e95..9d079659b24d344698f33451935ad68b2cef75be 100644 --- a/arch/powerpc/kernel/dt_cpu_ftrs.c +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c @@ -454,6 +454,7 @@ static void init_pmu_power10(void) mtspr(SPRN_MMCR3, 0); mtspr(SPRN_MMCRA, MMCRA_BHRB_DISABLE); + mtspr(SPRN_MMCR0, MMCR0_PMCCEXT); } static int __init feat_enable_pmu_power10(struct dt_cpu_feature *f) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 9d3b468bd2d7a75a3fa44d452198b648ad08002f..10df278dc3fbe1339b5b54056c8d4c43bd45b269 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1715,27 +1715,30 @@ EXC_COMMON_BEGIN(program_check_common) */ andi. r10,r12,MSR_PR - bne 2f /* If userspace, go normal path */ + bne .Lnormal_stack /* If userspace, go normal path */ andis. r10,r12,(SRR1_PROGTM)@h - bne 1f /* If TM, emergency */ + bne .Lemergency_stack /* If TM, emergency */ cmpdi r1,-INT_FRAME_SIZE /* check if r1 is in userspace */ - blt 2f /* normal path if not */ + blt .Lnormal_stack /* normal path if not */ /* Use the emergency stack */ -1: andi. r10,r12,MSR_PR /* Set CR0 correctly for label */ +.Lemergency_stack: + andi. r10,r12,MSR_PR /* Set CR0 correctly for label */ /* 3 in EXCEPTION_PROLOG_COMMON */ mr r10,r1 /* Save r1 */ ld r1,PACAEMERGSP(r13) /* Use emergency stack */ subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */ __ISTACK(program_check)=0 __GEN_COMMON_BODY program_check - b 3f -2: + b .Ldo_program_check + +.Lnormal_stack: __ISTACK(program_check)=1 __GEN_COMMON_BODY program_check -3: + +.Ldo_program_check: addi r3,r1,STACK_FRAME_OVERHEAD bl program_check_exception REST_NVGPRS(r1) /* instruction emulation may change GPRs */ diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index eddf362caedce8091bcc417bde5a99547d836019..c3bb800dc4352398dcad2186fb6c6fdb624fd1e3 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -1641,6 +1641,14 @@ int __init setup_fadump(void) else if (fw_dump.reserve_dump_area_size) fw_dump.ops->fadump_init_mem_struct(&fw_dump); + /* + * In case of panic, fadump is triggered via ppc_panic_event() + * panic notifier. Setting crash_kexec_post_notifiers to 'true' + * lets panic() function take crash friendly path before panic + * notifiers are invoked. + */ + crash_kexec_post_notifiers = true; + return 1; } subsys_initcall(setup_fadump); diff --git a/arch/powerpc/kernel/firmware.c b/arch/powerpc/kernel/firmware.c index fe48d319d490e651b44c7806efd20fcbcc6b6001..20328f72f9f2ba2965da1f3148ff4f533f4564cd 100644 --- a/arch/powerpc/kernel/firmware.c +++ b/arch/powerpc/kernel/firmware.c @@ -14,6 +14,7 @@ #include #include +#include #ifdef CONFIG_PPC64 unsigned long powerpc_firmware_features __read_mostly; @@ -21,7 +22,8 @@ EXPORT_SYMBOL_GPL(powerpc_firmware_features); #endif #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_KVM_GUEST) -bool is_kvm_guest(void) +DEFINE_STATIC_KEY_FALSE(kvm_guest); +int __init check_kvm_guest(void) { struct device_node *hyper_node; @@ -29,9 +31,11 @@ bool is_kvm_guest(void) if (!hyper_node) return 0; - if (!of_device_is_compatible(hyper_node, "linux,kvm")) - return 0; + if (of_device_is_compatible(hyper_node, "linux,kvm")) + static_branch_enable(&kvm_guest); - return 1; + of_node_put(hyper_node); + return 0; } +core_initcall(check_kvm_guest); // before kvm_guest_init() #endif diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index f8e3d15ddf694c0cfc72c148174016716dc1807e..abb057a86739b791c6fa8c50f8d8fa850b6e2543 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -333,11 +333,11 @@ label: mfspr r1, SPRN_SPRG_THREAD lwz r1, TASK_CPU - THREAD(r1) slwi r1, r1, 3 - addis r1, r1, emergency_ctx@ha + addis r1, r1, emergency_ctx-PAGE_OFFSET@ha #else - lis r1, emergency_ctx@ha + lis r1, emergency_ctx-PAGE_OFFSET@ha #endif - lwz r1, emergency_ctx@l(r1) + lwz r1, emergency_ctx-PAGE_OFFSET@l(r1) addi r1, r1, THREAD_SIZE - INT_FRAME_SIZE EXCEPTION_PROLOG_2 SAVE_NVGPRS(r11) diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index a1ae00689e0f4898d17a6782a9e82afc7756edd7..aeb9bc9958749e8a67a4887bf09b3f6fae51172d 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -27,6 +27,7 @@ #include #include +#include #include #include #include @@ -626,7 +627,7 @@ start_here: b . /* prevent prefetch past rfi */ /* Set up the initial MMU state so we can do the first level of - * kernel initialization. This maps the first 16 MBytes of memory 1:1 + * kernel initialization. This maps the first 32 MBytes of memory 1:1 * virtual to physical and more importantly sets the cache mode. */ initial_mmu: @@ -663,6 +664,12 @@ initial_mmu: tlbwe r4,r0,TLB_DATA /* Load the data portion of the entry */ tlbwe r3,r0,TLB_TAG /* Load the tag portion of the entry */ + li r0,62 /* TLB slot 62 */ + addis r4,r4,SZ_16M@h + addis r3,r3,SZ_16M@h + tlbwe r4,r0,TLB_DATA /* Load the data portion of the entry */ + tlbwe r3,r0,TLB_TAG /* Load the tag portion of the entry */ + isync /* Establish the exception vector base diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index ce5fd93499a74b9e769f2abb62b653af17e41560..a61b4ff3b7102ad9c6225a4ffd65221dc21defd3 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -766,6 +766,7 @@ _GLOBAL(mmu_pin_tlb) #ifdef CONFIG_PIN_TLB_DATA LOAD_REG_IMMEDIATE(r6, PAGE_OFFSET) LOAD_REG_IMMEDIATE(r7, MI_SVALID | MI_PS8MEG | _PMD_ACCESSED) + li r8, 0 #ifdef CONFIG_PIN_TLB_IMMR li r0, 3 #else @@ -774,26 +775,26 @@ _GLOBAL(mmu_pin_tlb) mtctr r0 cmpwi r4, 0 beq 4f - LOAD_REG_IMMEDIATE(r8, 0xf0 | _PAGE_RO | _PAGE_SPS | _PAGE_SH | _PAGE_PRESENT) LOAD_REG_ADDR(r9, _sinittext) 2: ori r0, r6, MD_EVALID + ori r12, r8, 0xf0 | _PAGE_RO | _PAGE_SPS | _PAGE_SH | _PAGE_PRESENT mtspr SPRN_MD_CTR, r5 mtspr SPRN_MD_EPN, r0 mtspr SPRN_MD_TWC, r7 - mtspr SPRN_MD_RPN, r8 + mtspr SPRN_MD_RPN, r12 addi r5, r5, 0x100 addis r6, r6, SZ_8M@h addis r8, r8, SZ_8M@h cmplw r6, r9 bdnzt lt, 2b - -4: LOAD_REG_IMMEDIATE(r8, 0xf0 | _PAGE_SPS | _PAGE_SH | _PAGE_PRESENT) +4: 2: ori r0, r6, MD_EVALID + ori r12, r8, 0xf0 | _PAGE_DIRTY | _PAGE_SPS | _PAGE_SH | _PAGE_PRESENT mtspr SPRN_MD_CTR, r5 mtspr SPRN_MD_EPN, r0 mtspr SPRN_MD_TWC, r7 - mtspr SPRN_MD_RPN, r8 + mtspr SPRN_MD_RPN, r12 addi r5, r5, 0x100 addis r6, r6, SZ_8M@h addis r8, r8, SZ_8M@h @@ -814,7 +815,7 @@ _GLOBAL(mmu_pin_tlb) #endif #if defined(CONFIG_PIN_TLB_IMMR) || defined(CONFIG_PIN_TLB_DATA) lis r0, (MD_RSV4I | MD_TWAM)@h - mtspr SPRN_MI_CTR, r0 + mtspr SPRN_MD_CTR, r0 #endif mtspr SPRN_SRR1, r10 mtspr SPRN_SRR0, r11 diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index 22f249b6f58de0bd37187b66fd70aaa039698252..b16aecaaa91261aaeb1118ba5c1059a76e5919dd 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -52,28 +52,32 @@ _GLOBAL(isa300_idle_stop_mayloss) std r1,PACAR1(r13) mflr r4 mfcr r5 - /* use stack red zone rather than a new frame for saving regs */ - std r2,-8*0(r1) - std r14,-8*1(r1) - std r15,-8*2(r1) - std r16,-8*3(r1) - std r17,-8*4(r1) - std r18,-8*5(r1) - std r19,-8*6(r1) - std r20,-8*7(r1) - std r21,-8*8(r1) - std r22,-8*9(r1) - std r23,-8*10(r1) - std r24,-8*11(r1) - std r25,-8*12(r1) - std r26,-8*13(r1) - std r27,-8*14(r1) - std r28,-8*15(r1) - std r29,-8*16(r1) - std r30,-8*17(r1) - std r31,-8*18(r1) - std r4,-8*19(r1) - std r5,-8*20(r1) + /* + * Use the stack red zone rather than a new frame for saving regs since + * in the case of no GPR loss the wakeup code branches directly back to + * the caller without deallocating the stack frame first. + */ + std r2,-8*1(r1) + std r14,-8*2(r1) + std r15,-8*3(r1) + std r16,-8*4(r1) + std r17,-8*5(r1) + std r18,-8*6(r1) + std r19,-8*7(r1) + std r20,-8*8(r1) + std r21,-8*9(r1) + std r22,-8*10(r1) + std r23,-8*11(r1) + std r24,-8*12(r1) + std r25,-8*13(r1) + std r26,-8*14(r1) + std r27,-8*15(r1) + std r28,-8*16(r1) + std r29,-8*17(r1) + std r30,-8*18(r1) + std r31,-8*19(r1) + std r4,-8*20(r1) + std r5,-8*21(r1) /* 168 bytes */ PPC_STOP b . /* catch bugs */ @@ -89,8 +93,8 @@ _GLOBAL(isa300_idle_stop_mayloss) */ _GLOBAL(idle_return_gpr_loss) ld r1,PACAR1(r13) - ld r4,-8*19(r1) - ld r5,-8*20(r1) + ld r4,-8*20(r1) + ld r5,-8*21(r1) mtlr r4 mtcr r5 /* @@ -98,38 +102,40 @@ _GLOBAL(idle_return_gpr_loss) * from PACATOC. This could be avoided for that less common case * if KVM saved its r2. */ - ld r2,-8*0(r1) - ld r14,-8*1(r1) - ld r15,-8*2(r1) - ld r16,-8*3(r1) - ld r17,-8*4(r1) - ld r18,-8*5(r1) - ld r19,-8*6(r1) - ld r20,-8*7(r1) - ld r21,-8*8(r1) - ld r22,-8*9(r1) - ld r23,-8*10(r1) - ld r24,-8*11(r1) - ld r25,-8*12(r1) - ld r26,-8*13(r1) - ld r27,-8*14(r1) - ld r28,-8*15(r1) - ld r29,-8*16(r1) - ld r30,-8*17(r1) - ld r31,-8*18(r1) + ld r2,-8*1(r1) + ld r14,-8*2(r1) + ld r15,-8*3(r1) + ld r16,-8*4(r1) + ld r17,-8*5(r1) + ld r18,-8*6(r1) + ld r19,-8*7(r1) + ld r20,-8*8(r1) + ld r21,-8*9(r1) + ld r22,-8*10(r1) + ld r23,-8*11(r1) + ld r24,-8*12(r1) + ld r25,-8*13(r1) + ld r26,-8*14(r1) + ld r27,-8*15(r1) + ld r28,-8*16(r1) + ld r29,-8*17(r1) + ld r30,-8*18(r1) + ld r31,-8*19(r1) blr /* * This is the sequence required to execute idle instructions, as * specified in ISA v2.07 (and earlier). MSR[IR] and MSR[DR] must be 0. - * - * The 0(r1) slot is used to save r2 in isa206, so use that here. + * We have to store a GPR somewhere, ptesync, then reload it, and create + * a false dependency on the result of the load. It doesn't matter which + * GPR we store, or where we store it. We have already stored r2 to the + * stack at -8(r1) in isa206_idle_insn_mayloss, so use that. */ #define IDLE_STATE_ENTER_SEQ_NORET(IDLE_INST) \ /* Magic NAP/SLEEP/WINKLE mode enter sequence */ \ - std r2,0(r1); \ + std r2,-8(r1); \ ptesync; \ - ld r2,0(r1); \ + ld r2,-8(r1); \ 236: cmpd cr0,r2,r2; \ bne 236b; \ IDLE_INST; \ @@ -154,28 +160,32 @@ _GLOBAL(isa206_idle_insn_mayloss) std r1,PACAR1(r13) mflr r4 mfcr r5 - /* use stack red zone rather than a new frame for saving regs */ - std r2,-8*0(r1) - std r14,-8*1(r1) - std r15,-8*2(r1) - std r16,-8*3(r1) - std r17,-8*4(r1) - std r18,-8*5(r1) - std r19,-8*6(r1) - std r20,-8*7(r1) - std r21,-8*8(r1) - std r22,-8*9(r1) - std r23,-8*10(r1) - std r24,-8*11(r1) - std r25,-8*12(r1) - std r26,-8*13(r1) - std r27,-8*14(r1) - std r28,-8*15(r1) - std r29,-8*16(r1) - std r30,-8*17(r1) - std r31,-8*18(r1) - std r4,-8*19(r1) - std r5,-8*20(r1) + /* + * Use the stack red zone rather than a new frame for saving regs since + * in the case of no GPR loss the wakeup code branches directly back to + * the caller without deallocating the stack frame first. + */ + std r2,-8*1(r1) + std r14,-8*2(r1) + std r15,-8*3(r1) + std r16,-8*4(r1) + std r17,-8*5(r1) + std r18,-8*6(r1) + std r19,-8*7(r1) + std r20,-8*8(r1) + std r21,-8*9(r1) + std r22,-8*10(r1) + std r23,-8*11(r1) + std r24,-8*12(r1) + std r25,-8*13(r1) + std r26,-8*14(r1) + std r27,-8*15(r1) + std r28,-8*16(r1) + std r29,-8*17(r1) + std r30,-8*18(r1) + std r31,-8*19(r1) + std r4,-8*20(r1) + std r5,-8*21(r1) cmpwi r3,PNV_THREAD_NAP bne 1f IDLE_STATE_ENTER_SEQ_NORET(PPC_NAP) diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 7e337c570ea6b3ea4b0f7a1b4f372835cb5e8ae5..9e71c0739f08dd32a70a56a5a7fb76a09364f665 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -2956,7 +2956,7 @@ static void __init fixup_device_tree_efika_add_phy(void) /* Check if the phy-handle property exists - bail if it does */ rv = prom_getprop(node, "phy-handle", prop, sizeof(prop)); - if (!rv) + if (rv <= 0) return; /* diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index e4e1a94ccf6a6fa541ab9f9dd6193f11ed509c6d..3f510c911b1076a716582f7be3c41d6eb7f4c96e 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -261,6 +261,11 @@ static int __init handle_no_stf_barrier(char *p) early_param("no_stf_barrier", handle_no_stf_barrier); +enum stf_barrier_type stf_barrier_type_get(void) +{ + return stf_enabled_flush_types; +} + /* This is the generic flag used by other architectures */ static int __init handle_ssbd(char *p) { diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index aa0a35ebc04abdb774b3a420cc7a2863562f4139..cf99f57aed8220b15544a08cb534e3a11880bc2f 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -60,6 +60,7 @@ #include #include #include +#include #ifdef DEBUG #include @@ -594,6 +595,45 @@ void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *)) } #endif +#ifdef CONFIG_NMI_IPI +static void crash_stop_this_cpu(struct pt_regs *regs) +#else +static void crash_stop_this_cpu(void *dummy) +#endif +{ + /* + * Just busy wait here and avoid marking CPU as offline to ensure + * register data is captured appropriately. + */ + while (1) + cpu_relax(); +} + +void crash_smp_send_stop(void) +{ + static bool stopped = false; + + /* + * In case of fadump, register data for all CPUs is captured by f/w + * on ibm,os-term rtas call. Skip IPI callbacks to other CPUs before + * this rtas call to avoid tricky post processing of those CPUs' + * backtraces. + */ + if (should_fadump_crash()) + return; + + if (stopped) + return; + + stopped = true; + +#ifdef CONFIG_NMI_IPI + smp_send_nmi_ipi(NMI_IPI_ALL_OTHERS, crash_stop_this_cpu, 1000000); +#else + smp_call_function(crash_stop_this_cpu, NULL, 0); +#endif /* CONFIG_NMI_IPI */ +} + #ifdef CONFIG_NMI_IPI static void nmi_stop_this_cpu(struct pt_regs *regs) { @@ -1436,7 +1476,6 @@ void start_secondary(void *unused) smp_store_cpu_info(cpu); set_dec(tb_ticks_per_jiffy); rcu_cpu_starting(cpu); - preempt_disable(); cpu_callin_map[cpu] = 1; if (smp_ops->setup_cpu) @@ -1489,10 +1528,12 @@ void start_secondary(void *unused) BUG(); } +#ifdef CONFIG_PROFILING int setup_profiling_timer(unsigned int multiplier) { return 0; } +#endif static void fixup_topology(void) { @@ -1579,8 +1620,6 @@ void __cpu_die(unsigned int cpu) void arch_cpu_idle_dead(void) { - sched_preempt_enable_no_resched(); - /* * Disable on the down path. This will be re-enabled by * start_secondary() via start_secondary_resume() below diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index 1275daec7fec365024fd50d2b43d42616d829b20..1249444185b708e28340158a642dbc2209559459 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -530,3 +530,5 @@ 438 common pidfd_getfd sys_pidfd_getfd 439 common faccessat2 sys_faccessat2 440 common process_madvise sys_process_madvise +# 447 reserved for memfd_secret +448 common process_mrelease sys_process_mrelease diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 77dffea3d5373d69b886577edd9f9d88b978ab2c..069d451240fa4c961fc85fed91a3eb5777d57676 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -1922,11 +1922,40 @@ void vsx_unavailable_tm(struct pt_regs *regs) } #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ -void performance_monitor_exception(struct pt_regs *regs) +static void performance_monitor_exception_nmi(struct pt_regs *regs) +{ + nmi_enter(); + + __this_cpu_inc(irq_stat.pmu_irqs); + + perf_irq(regs); + + nmi_exit(); +} + +static void performance_monitor_exception_async(struct pt_regs *regs) { + irq_enter(); + __this_cpu_inc(irq_stat.pmu_irqs); perf_irq(regs); + + irq_exit(); +} + +void performance_monitor_exception(struct pt_regs *regs) +{ + /* + * On 64-bit, if perf interrupts hit in a local_irq_disable + * (soft-masked) region, we consider them as NMIs. This is required to + * prevent hash faults on user addresses when reading callchains (and + * looks better from an irq tracing perspective). + */ + if (IS_ENABLED(CONFIG_PPC64) && unlikely(arch_irq_disabled_regs(regs))) + performance_monitor_exception_nmi(regs); + else + performance_monitor_exception_async(regs); } #ifdef CONFIG_PPC_ADV_DEBUG_REGS diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c index af3c15a1d41eb159670be90218c12b9966afa7f1..75b2a6c4db5a5001313ddb2e0764a42b7656ee9c 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c @@ -132,6 +132,10 @@ static void set_cpumask_stuck(const struct cpumask *cpumask, u64 tb) { cpumask_or(&wd_smp_cpus_stuck, &wd_smp_cpus_stuck, cpumask); cpumask_andnot(&wd_smp_cpus_pending, &wd_smp_cpus_pending, cpumask); + /* + * See wd_smp_clear_cpu_pending() + */ + smp_mb(); if (cpumask_empty(&wd_smp_cpus_pending)) { wd_smp_last_reset_tb = tb; cpumask_andnot(&wd_smp_cpus_pending, @@ -217,13 +221,44 @@ static void wd_smp_clear_cpu_pending(int cpu, u64 tb) cpumask_clear_cpu(cpu, &wd_smp_cpus_stuck); wd_smp_unlock(&flags); + } else { + /* + * The last CPU to clear pending should have reset the + * watchdog so we generally should not find it empty + * here if our CPU was clear. However it could happen + * due to a rare race with another CPU taking the + * last CPU out of the mask concurrently. + * + * We can't add a warning for it. But just in case + * there is a problem with the watchdog that is causing + * the mask to not be reset, try to kick it along here. + */ + if (unlikely(cpumask_empty(&wd_smp_cpus_pending))) + goto none_pending; } return; } + cpumask_clear_cpu(cpu, &wd_smp_cpus_pending); + + /* + * Order the store to clear pending with the load(s) to check all + * words in the pending mask to check they are all empty. This orders + * with the same barrier on another CPU. This prevents two CPUs + * clearing the last 2 pending bits, but neither seeing the other's + * store when checking if the mask is empty, and missing an empty + * mask, which ends with a false positive. + */ + smp_mb(); if (cpumask_empty(&wd_smp_cpus_pending)) { unsigned long flags; +none_pending: + /* + * Double check under lock because more than one CPU could see + * a clear mask with the lockless check after clearing their + * pending bits. + */ wd_smp_lock(&flags); if (cpumask_empty(&wd_smp_cpus_pending)) { wd_smp_last_reset_tb = tb; @@ -314,8 +349,12 @@ void arch_touch_nmi_watchdog(void) { unsigned long ticks = tb_ticks_per_usec * wd_timer_period_ms * 1000; int cpu = smp_processor_id(); - u64 tb = get_tb(); + u64 tb; + if (!cpumask_test_cpu(cpu, &watchdog_cpumask)) + return; + + tb = get_tb(); if (tb - per_cpu(wd_timer_tb, cpu) >= ticks) { per_cpu(wd_timer_tb, cpu) = tb; wd_smp_clear_cpu_pending(cpu, tb); diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 175967a195c4459075cdb7cc3c70841a64a7a4cc..527c205d5a5f5f4f05d7416ef83ede53d5479d23 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -4557,8 +4557,12 @@ static int kvmppc_core_prepare_memory_region_hv(struct kvm *kvm, unsigned long npages = mem->memory_size >> PAGE_SHIFT; if (change == KVM_MR_CREATE) { - slot->arch.rmap = vzalloc(array_size(npages, - sizeof(*slot->arch.rmap))); + unsigned long size = array_size(npages, sizeof(*slot->arch.rmap)); + + if ((size >> PAGE_SHIFT) > totalram_pages()) + return -ENOMEM; + + slot->arch.rmap = vzalloc(size); if (!slot->arch.rmap) return -ENOMEM; } diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 4621905bdd9ea05bbe0cfaf578958d1e3f6fb7a1..121fca2bcd82b61399d4a979d7589b1d63e99c2c 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -867,6 +867,7 @@ static void flush_guest_tlb(struct kvm *kvm) "r" (0) : "memory"); } asm volatile("ptesync": : :"memory"); + // POWER9 congruence-class TLBIEL leaves ERAT. Flush it now. asm volatile(PPC_RADIX_INVALIDATE_ERAT_GUEST : : :"memory"); } else { for (set = 0; set < kvm->arch.tlb_sets; ++set) { @@ -877,7 +878,9 @@ static void flush_guest_tlb(struct kvm *kvm) rb += PPC_BIT(51); /* increment set number */ } asm volatile("ptesync": : :"memory"); - asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT : : :"memory"); + // POWER9 congruence-class TLBIEL leaves ERAT. Flush it now. + if (cpu_has_feature(CPU_FTR_ARCH_300)) + asm volatile(PPC_ISA_3_0_INVALIDATE_ERAT : : :"memory"); } } diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c index a5f1ae892ba68ddd1d6dca6728d10339fe3c950c..d0b6c8c16c48a5ed7e475ce11af48af2acf124b4 100644 --- a/arch/powerpc/kvm/book3s_hv_nested.c +++ b/arch/powerpc/kvm/book3s_hv_nested.c @@ -510,7 +510,7 @@ long kvmhv_copy_tofrom_guest_nested(struct kvm_vcpu *vcpu) if (eaddr & (0xFFFUL << 52)) return H_PARAMETER; - buf = kzalloc(n, GFP_KERNEL); + buf = kzalloc(n, GFP_KERNEL | __GFP_NOWARN); if (!buf) return H_NO_MEM; diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 5777b72bb8b624593133ea14c8c2cd7b0e0a193c..b1d9afffd8419ef5cab67c70de4e2f0631a49c18 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -292,13 +292,16 @@ kvm_novcpu_exit: * r3 contains the SRR1 wakeup value, SRR1 is trashed. */ _GLOBAL(idle_kvm_start_guest) - ld r4,PACAEMERGSP(r13) mfcr r5 mflr r0 - std r1,0(r4) - std r5,8(r4) - std r0,16(r4) - subi r1,r4,STACK_FRAME_OVERHEAD + std r5, 8(r1) // Save CR in caller's frame + std r0, 16(r1) // Save LR in caller's frame + // Create frame on emergency stack + ld r4, PACAEMERGSP(r13) + stdu r1, -SWITCH_FRAME_SIZE(r4) + // Switch to new frame on emergency stack + mr r1, r4 + std r3, 32(r1) // Save SRR1 wakeup value SAVE_NVGPRS(r1) /* @@ -350,6 +353,10 @@ kvm_unsplit_wakeup: kvm_secondary_got_guest: + // About to go to guest, clear saved SRR1 + li r0, 0 + std r0, 32(r1) + /* Set HSTATE_DSCR(r13) to something sensible */ ld r6, PACA_DSCR_DEFAULT(r13) std r6, HSTATE_DSCR(r13) @@ -441,13 +448,12 @@ kvm_no_guest: mfspr r4, SPRN_LPCR rlwimi r4, r3, 0, LPCR_PECE0 | LPCR_PECE1 mtspr SPRN_LPCR, r4 - /* set up r3 for return */ - mfspr r3,SPRN_SRR1 + // Return SRR1 wakeup value, or 0 if we went into the guest + ld r3, 32(r1) REST_NVGPRS(r1) - addi r1, r1, STACK_FRAME_OVERHEAD - ld r0, 16(r1) - ld r5, 8(r1) - ld r1, 0(r1) + ld r1, 0(r1) // Switch back to caller stack + ld r0, 16(r1) // Reload LR + ld r5, 8(r1) // Reload CR mtlr r0 mtcr r5 blr @@ -2533,7 +2539,7 @@ hcall_real_table: .globl hcall_real_table_end hcall_real_table_end: -_GLOBAL(kvmppc_h_set_xdabr) +_GLOBAL_TOC(kvmppc_h_set_xdabr) EXPORT_SYMBOL_GPL(kvmppc_h_set_xdabr) andi. r0, r5, DABRX_USER | DABRX_KERNEL beq 6f @@ -2543,7 +2549,7 @@ EXPORT_SYMBOL_GPL(kvmppc_h_set_xdabr) 6: li r3, H_PARAMETER blr -_GLOBAL(kvmppc_h_set_dabr) +_GLOBAL_TOC(kvmppc_h_set_dabr) EXPORT_SYMBOL_GPL(kvmppc_h_set_dabr) li r5, DABRX_USER | DABRX_KERNEL 3: diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index 58991233381ed6d0d1f730b3c6836e53e006d9a7..379b6dbb326b371db90015efef791ad9739977b5 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -19,6 +19,9 @@ CFLAGS_code-patching.o += -DDISABLE_BRANCH_PROFILING CFLAGS_feature-fixups.o += -DDISABLE_BRANCH_PROFILING endif +CFLAGS_code-patching.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) +CFLAGS_feature-fixups.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) + obj-y += alloc.o code-patching.o feature-fixups.o pmem.o inst.o test_code-patching.o ifndef CONFIG_KASAN @@ -34,7 +37,7 @@ obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o # 64-bit linker creates .sfpr on demand for final link (vmlinux), # so it is only needed for modules, and only for older linkers which # do not support --save-restore-funcs -ifeq ($(call ld-ifversion, -lt, 225000000, y),y) +ifeq ($(call ld-ifversion, -lt, 22500, y),y) extra-$(CONFIG_PPC64) += crtsavres.o endif diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index 2333625b5e315006ac9d5b97477957fc2cf76aff..a2e4f864b63d2ea1c4e10c185849b4e06350bc39 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -230,6 +230,11 @@ bool is_offset_in_branch_range(long offset) return (offset >= -0x2000000 && offset <= 0x1fffffc && !(offset & 0x3)); } +bool is_offset_in_cond_branch_range(long offset) +{ + return offset >= -0x8000 && offset <= 0x7fff && !(offset & 0x3); +} + /* * Helper to check if a given instruction is a conditional branch * Derived from the conditional checks in analyse_instr() @@ -283,7 +288,7 @@ int create_cond_branch(struct ppc_inst *instr, const struct ppc_inst *addr, offset = offset - (unsigned long)addr; /* Check we can represent the target in the instruction format */ - if (offset < -0x8000 || offset > 0x7FFF || offset & 0x3) + if (!is_offset_in_cond_branch_range(offset)) return 1; /* Mask out the flags and target, so they don't step on each other. */ diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index a2e067f68dee867762fb4657d4e8aaf5ec68e62c..0edebbbffcdcaa00f41f86f4e1d8e56f8e9feae9 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -3062,12 +3062,14 @@ void emulate_update_regs(struct pt_regs *regs, struct instruction_op *op) case BARRIER_EIEIO: eieio(); break; +#ifdef CONFIG_PPC64 case BARRIER_LWSYNC: asm volatile("lwsync" : : : "memory"); break; case BARRIER_PTESYNC: asm volatile("ptesync" : : : "memory"); break; +#endif } break; diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index a59e7ec9818039de71e921ea9dcee6063415ad1a..602ab13127b4077a6fc81dafa002019432f67436 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -72,7 +72,7 @@ unsigned long p_block_mapped(phys_addr_t pa) return 0; } -static int find_free_bat(void) +int __init find_free_bat(void) { int b; int n = mmu_has_feature(MMU_FTR_USE_HIGH_BATS) ? 8 : 4; @@ -96,7 +96,7 @@ static int find_free_bat(void) * - block size has to be a power of two. This is calculated by finding the * highest bit set to 1. */ -static unsigned int block_size(unsigned long base, unsigned long top) +unsigned int bat_block_size(unsigned long base, unsigned long top) { unsigned int max_size = SZ_256M; unsigned int base_shift = (ffs(base) - 1) & 31; @@ -141,7 +141,7 @@ static unsigned long __init __mmu_mapin_ram(unsigned long base, unsigned long to int idx; while ((idx = find_free_bat()) != -1 && base != top) { - unsigned int size = block_size(base, top); + unsigned int size = bat_block_size(base, top); if (size < 128 << 10) break; @@ -201,18 +201,17 @@ void mmu_mark_initmem_nx(void) int nb = mmu_has_feature(MMU_FTR_USE_HIGH_BATS) ? 8 : 4; int i; unsigned long base = (unsigned long)_stext - PAGE_OFFSET; - unsigned long top = (unsigned long)_etext - PAGE_OFFSET; + unsigned long top = ALIGN((unsigned long)_etext - PAGE_OFFSET, SZ_128K); unsigned long border = (unsigned long)__init_begin - PAGE_OFFSET; unsigned long size; - for (i = 0; i < nb - 1 && base < top && top - base > (128 << 10);) { - size = block_size(base, top); + for (i = 0; i < nb - 1 && base < top;) { + size = bat_block_size(base, top); setibat(i++, PAGE_OFFSET + base, base, size, PAGE_KERNEL_TEXT); base += size; } if (base < top) { - size = block_size(base, top); - size = max(size, 128UL << 10); + size = bat_block_size(base, top); if ((top - base) > size) { size <<= 1; if (strict_kernel_rwx_enabled() && base + size > border) diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index 1d5eec847b883e8eda9e2eb7ba09a39193219c4f..295959487b76d606766962bd29a5184ffec2d5ab 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -1152,7 +1152,7 @@ int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot) int pud_clear_huge(pud_t *pud) { - if (pud_huge(*pud)) { + if (pud_is_leaf(*pud)) { pud_clear(pud); return 1; } @@ -1199,7 +1199,7 @@ int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot) int pmd_clear_huge(pmd_t *pmd) { - if (pmd_huge(*pmd)) { + if (pmd_is_leaf(*pmd)) { pmd_clear(pmd); return 1; } diff --git a/arch/powerpc/mm/kasan/book3s_32.c b/arch/powerpc/mm/kasan/book3s_32.c index 202bd260a009567b242b7b0177d4f47bfc53f364..450a67ef0bbe1eb4fc4f897e897f1ff24727f44c 100644 --- a/arch/powerpc/mm/kasan/book3s_32.c +++ b/arch/powerpc/mm/kasan/book3s_32.c @@ -10,47 +10,51 @@ int __init kasan_init_region(void *start, size_t size) { unsigned long k_start = (unsigned long)kasan_mem_to_shadow(start); unsigned long k_end = (unsigned long)kasan_mem_to_shadow(start + size); - unsigned long k_cur = k_start; - int k_size = k_end - k_start; - int k_size_base = 1 << (ffs(k_size) - 1); + unsigned long k_nobat = k_start; + unsigned long k_cur; + phys_addr_t phys; int ret; - void *block; - block = memblock_alloc(k_size, k_size_base); - - if (block && k_size_base >= SZ_128K && k_start == ALIGN(k_start, k_size_base)) { - int k_size_more = 1 << (ffs(k_size - k_size_base) - 1); - - setbat(-1, k_start, __pa(block), k_size_base, PAGE_KERNEL); - if (k_size_more >= SZ_128K) - setbat(-1, k_start + k_size_base, __pa(block) + k_size_base, - k_size_more, PAGE_KERNEL); - if (v_block_mapped(k_start)) - k_cur = k_start + k_size_base; - if (v_block_mapped(k_start + k_size_base)) - k_cur = k_start + k_size_base + k_size_more; - - update_bats(); + while (k_nobat < k_end) { + unsigned int k_size = bat_block_size(k_nobat, k_end); + int idx = find_free_bat(); + + if (idx == -1) + break; + if (k_size < SZ_128K) + break; + phys = memblock_phys_alloc_range(k_size, k_size, 0, + MEMBLOCK_ALLOC_ANYWHERE); + if (!phys) + break; + + setbat(idx, k_nobat, phys, k_size, PAGE_KERNEL); + k_nobat += k_size; } + if (k_nobat != k_start) + update_bats(); - if (!block) - block = memblock_alloc(k_size, PAGE_SIZE); - if (!block) - return -ENOMEM; + if (k_nobat < k_end) { + phys = memblock_phys_alloc_range(k_end - k_nobat, PAGE_SIZE, 0, + MEMBLOCK_ALLOC_ANYWHERE); + if (!phys) + return -ENOMEM; + } ret = kasan_init_shadow_page_tables(k_start, k_end); if (ret) return ret; - kasan_update_early_region(k_start, k_cur, __pte(0)); + kasan_update_early_region(k_start, k_nobat, __pte(0)); - for (; k_cur < k_end; k_cur += PAGE_SIZE) { + for (k_cur = k_nobat; k_cur < k_end; k_cur += PAGE_SIZE) { pmd_t *pmd = pmd_off_k(k_cur); - void *va = block + k_cur - k_start; - pte_t pte = pfn_pte(PHYS_PFN(__pa(va)), PAGE_KERNEL); + pte_t pte = pfn_pte(PHYS_PFN(phys + k_cur - k_nobat), PAGE_KERNEL); __set_pte_at(&init_mm, k_cur, pte_offset_kernel(pmd, k_cur), pte, 0); } flush_tlb_kernel_range(k_start, k_end); + memset(kasan_mem_to_shadow(start), 0, k_end - k_start); + return 0; } diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index 15555c95cebc70ea708e41753148f73142af0367..faaf33e204de1064af5843c03058b05428e40271 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -194,6 +194,15 @@ void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, __set_pte_at(mm, addr, ptep, pte, 0); } +void unmap_kernel_page(unsigned long va) +{ + pmd_t *pmdp = pmd_off_k(va); + pte_t *ptep = pte_offset_kernel(pmdp, va); + + pte_clear(&init_mm, va, ptep); + flush_tlb_kernel_range(va, va + PAGE_SIZE); +} + /* * This is called when relaxing access to a PTE. It's also called in the page * fault path when we don't hit any of the major fault cases, ie, a minor diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index cc6e2f94517fcbae716c9b1c6119002603497327..aefc2bfdf10494b0358ef531c899f51be083132f 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -102,7 +102,8 @@ EXPORT_SYMBOL(__pte_frag_size_shift); struct page *p4d_page(p4d_t p4d) { if (p4d_is_leaf(p4d)) { - VM_WARN_ON(!p4d_huge(p4d)); + if (!IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMAP)) + VM_WARN_ON(!p4d_huge(p4d)); return pte_page(p4d_pte(p4d)); } return virt_to_page(p4d_page_vaddr(p4d)); @@ -112,7 +113,8 @@ struct page *p4d_page(p4d_t p4d) struct page *pud_page(pud_t pud) { if (pud_is_leaf(pud)) { - VM_WARN_ON(!pud_huge(pud)); + if (!IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMAP)) + VM_WARN_ON(!pud_huge(pud)); return pte_page(pud_pte(pud)); } return virt_to_page(pud_page_vaddr(pud)); @@ -125,7 +127,13 @@ struct page *pud_page(pud_t pud) struct page *pmd_page(pmd_t pmd) { if (pmd_is_leaf(pmd)) { - VM_WARN_ON(!(pmd_large(pmd) || pmd_huge(pmd))); + /* + * vmalloc_to_page may be called on any vmap address (not only + * vmalloc), and it uses pmd_page() etc., when huge vmap is + * enabled so these checks can't be used. + */ + if (!IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMAP)) + VM_WARN_ON(!(pmd_large(pmd) || pmd_huge(pmd))); return pte_page(pmd_pte(pmd)); } return virt_to_page(pmd_page_vaddr(pmd)); diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index d0a67a1bbaf1884d0f7f2574cb71d39ae4e8f2e2..1a5b4da8a23555c88b2fa4b14fee375fa68285f3 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h @@ -12,6 +12,7 @@ #include #include +#include #ifdef PPC64_ELF_ABI_v1 #define FUNCTION_DESCR_SIZE 24 @@ -24,13 +25,26 @@ #define EMIT(instr) PLANT_INSTR(image, ctx->idx, instr) /* Long jump; (unconditional 'branch') */ -#define PPC_JMP(dest) EMIT(PPC_INST_BRANCH | \ - (((dest) - (ctx->idx * 4)) & 0x03fffffc)) +#define PPC_JMP(dest) \ + do { \ + long offset = (long)(dest) - (ctx->idx * 4); \ + if (!is_offset_in_branch_range(offset)) { \ + pr_err_ratelimited("Branch offset 0x%lx (@%u) out of range\n", offset, ctx->idx); \ + return -ERANGE; \ + } \ + EMIT(PPC_INST_BRANCH | (offset & 0x03fffffc)); \ + } while (0) /* "cond" here covers BO:BI fields. */ -#define PPC_BCC_SHORT(cond, dest) EMIT(PPC_INST_BRANCH_COND | \ - (((cond) & 0x3ff) << 16) | \ - (((dest) - (ctx->idx * 4)) & \ - 0xfffc)) +#define PPC_BCC_SHORT(cond, dest) \ + do { \ + long offset = (long)(dest) - (ctx->idx * 4); \ + if (!is_offset_in_cond_branch_range(offset)) { \ + pr_err_ratelimited("Conditional branch offset 0x%lx (@%u) out of range\n", offset, ctx->idx); \ + return -ERANGE; \ + } \ + EMIT(PPC_INST_BRANCH_COND | (((cond) & 0x3ff) << 16) | (offset & 0xfffc)); \ + } while (0) + /* Sign-extended 32-bit immediate load */ #define PPC_LI32(d, i) do { \ if ((int)(uintptr_t)(i) >= -32768 && \ @@ -71,11 +85,6 @@ #define PPC_FUNC_ADDR(d,i) do { PPC_LI32(d, i); } while(0) #endif -static inline bool is_nearbranch(int offset) -{ - return (offset < 32768) && (offset >= -32768); -} - /* * The fly in the ointment of code size changing from pass to pass is * avoided by padding the short branch case with a NOP. If code size differs @@ -84,7 +93,7 @@ static inline bool is_nearbranch(int offset) * state. */ #define PPC_BCC(cond, dest) do { \ - if (is_nearbranch((dest) - (ctx->idx * 4))) { \ + if (is_offset_in_cond_branch_range((long)(dest) - (ctx->idx * 4))) { \ PPC_BCC_SHORT(cond, dest); \ EMIT(PPC_RAW_NOP()); \ } else { \ diff --git a/arch/powerpc/net/bpf_jit64.h b/arch/powerpc/net/bpf_jit64.h index 2e33c6673ff9571b25187919f65e923bc7c27e20..4d164e865b392eb271bd612fa5b6d11a2c5320e4 100644 --- a/arch/powerpc/net/bpf_jit64.h +++ b/arch/powerpc/net/bpf_jit64.h @@ -16,18 +16,18 @@ * with our redzone usage. * * [ prev sp ] <------------- - * [ nv gpr save area ] 6*8 | + * [ nv gpr save area ] 5*8 | * [ tail_call_cnt ] 8 | - * [ local_tmp_var ] 8 | + * [ local_tmp_var ] 16 | * fp (r31) --> [ ebpf stack space ] upto 512 | * [ frame header ] 32/112 | * sp (r1) ---> [ stack pointer ] -------------- */ /* for gpr non volatile registers BPG_REG_6 to 10 */ -#define BPF_PPC_STACK_SAVE (6*8) +#define BPF_PPC_STACK_SAVE (5*8) /* for bpf JIT code internal usage */ -#define BPF_PPC_STACK_LOCALS 16 +#define BPF_PPC_STACK_LOCALS 24 /* stack frame excluding BPF stack, ensure this is quadword aligned */ #define BPF_PPC_STACKFRAME (STACK_FRAME_MIN_SIZE + \ BPF_PPC_STACK_LOCALS + BPF_PPC_STACK_SAVE) diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 658ca2bab13cc56bbf7c4bc4e7fe4d92712e4bd6..0d47514e8870db6919a615a72a3b15c82ef52c75 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "bpf_jit64.h" @@ -56,9 +57,9 @@ static inline bool bpf_has_stack_frame(struct codegen_context *ctx) * [ prev sp ] <------------- * [ ... ] | * sp (r1) ---> [ stack pointer ] -------------- - * [ nv gpr save area ] 6*8 + * [ nv gpr save area ] 5*8 * [ tail_call_cnt ] 8 - * [ local_tmp_var ] 8 + * [ local_tmp_var ] 16 * [ unused red zone ] 208 bytes protected */ static int bpf_jit_stack_local(struct codegen_context *ctx) @@ -66,12 +67,12 @@ static int bpf_jit_stack_local(struct codegen_context *ctx) if (bpf_has_stack_frame(ctx)) return STACK_FRAME_MIN_SIZE + ctx->stack_size; else - return -(BPF_PPC_STACK_SAVE + 16); + return -(BPF_PPC_STACK_SAVE + 24); } static int bpf_jit_stack_tailcallcnt(struct codegen_context *ctx) { - return bpf_jit_stack_local(ctx) + 8; + return bpf_jit_stack_local(ctx) + 16; } static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg) @@ -224,7 +225,7 @@ static void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, EMIT(PPC_RAW_BLRL()); } -static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 out) +static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 out) { /* * By now, the eBPF program has already setup parameters in r3, r4 and r5 @@ -285,14 +286,39 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 bpf_jit_emit_common_epilogue(image, ctx); EMIT(PPC_RAW_BCTR()); + /* out: */ + return 0; } +/* + * We spill into the redzone always, even if the bpf program has its own stackframe. + * Offsets hardcoded based on BPF_PPC_STACK_SAVE -- see bpf_jit_stack_local() + */ +void bpf_stf_barrier(void); + +asm ( +" .global bpf_stf_barrier ;" +" bpf_stf_barrier: ;" +" std 21,-64(1) ;" +" std 22,-56(1) ;" +" sync ;" +" ld 21,-64(1) ;" +" ld 22,-56(1) ;" +" ori 31,31,0 ;" +" .rept 14 ;" +" b 1f ;" +" 1: ;" +" .endr ;" +" blr ;" +); + /* Assemble the body code between the prologue & epilogue */ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx, u32 *addrs, bool extra_pass) { + enum stf_barrier_type stf_barrier = stf_barrier_type_get(); const struct bpf_insn *insn = fp->insnsi; int flen = fp->len; int i, ret; @@ -347,18 +373,25 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, EMIT(PPC_RAW_SUB(dst_reg, dst_reg, src_reg)); goto bpf_alu32_trunc; case BPF_ALU | BPF_ADD | BPF_K: /* (u32) dst += (u32) imm */ - case BPF_ALU | BPF_SUB | BPF_K: /* (u32) dst -= (u32) imm */ case BPF_ALU64 | BPF_ADD | BPF_K: /* dst += imm */ + if (!imm) { + goto bpf_alu32_trunc; + } else if (imm >= -32768 && imm < 32768) { + EMIT(PPC_RAW_ADDI(dst_reg, dst_reg, IMM_L(imm))); + } else { + PPC_LI32(b2p[TMP_REG_1], imm); + EMIT(PPC_RAW_ADD(dst_reg, dst_reg, b2p[TMP_REG_1])); + } + goto bpf_alu32_trunc; + case BPF_ALU | BPF_SUB | BPF_K: /* (u32) dst -= (u32) imm */ case BPF_ALU64 | BPF_SUB | BPF_K: /* dst -= imm */ - if (BPF_OP(code) == BPF_SUB) - imm = -imm; - if (imm) { - if (imm >= -32768 && imm < 32768) - EMIT(PPC_RAW_ADDI(dst_reg, dst_reg, IMM_L(imm))); - else { - PPC_LI32(b2p[TMP_REG_1], imm); - EMIT(PPC_RAW_ADD(dst_reg, dst_reg, b2p[TMP_REG_1])); - } + if (!imm) { + goto bpf_alu32_trunc; + } else if (imm > -32768 && imm <= 32768) { + EMIT(PPC_RAW_ADDI(dst_reg, dst_reg, IMM_L(-imm))); + } else { + PPC_LI32(b2p[TMP_REG_1], imm); + EMIT(PPC_RAW_SUB(dst_reg, dst_reg, b2p[TMP_REG_1])); } goto bpf_alu32_trunc; case BPF_ALU | BPF_MUL | BPF_X: /* (u32) dst *= (u32) src */ @@ -408,8 +441,14 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, case BPF_ALU64 | BPF_DIV | BPF_K: /* dst /= imm */ if (imm == 0) return -EINVAL; - else if (imm == 1) - goto bpf_alu32_trunc; + if (imm == 1) { + if (BPF_OP(code) == BPF_DIV) { + goto bpf_alu32_trunc; + } else { + EMIT(PPC_RAW_LI(dst_reg, 0)); + break; + } + } PPC_LI32(b2p[TMP_REG_1], imm); switch (BPF_CLASS(code)) { @@ -612,17 +651,21 @@ bpf_alu32_trunc: EMIT(PPC_RAW_MR(dst_reg, b2p[TMP_REG_1])); break; case 64: - /* - * Way easier and faster(?) to store the value - * into stack and then use ldbrx - * - * ctx->seen will be reliable in pass2, but - * the instructions generated will remain the - * same across all passes - */ + /* Store the value to stack and then use byte-reverse loads */ PPC_BPF_STL(dst_reg, 1, bpf_jit_stack_local(ctx)); EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], 1, bpf_jit_stack_local(ctx))); - EMIT(PPC_RAW_LDBRX(dst_reg, 0, b2p[TMP_REG_1])); + if (cpu_has_feature(CPU_FTR_ARCH_206)) { + EMIT(PPC_RAW_LDBRX(dst_reg, 0, b2p[TMP_REG_1])); + } else { + EMIT(PPC_RAW_LWBRX(dst_reg, 0, b2p[TMP_REG_1])); + if (IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN)) + EMIT(PPC_RAW_SLDI(dst_reg, dst_reg, 32)); + EMIT(PPC_RAW_LI(b2p[TMP_REG_2], 4)); + EMIT(PPC_RAW_LWBRX(b2p[TMP_REG_2], b2p[TMP_REG_2], b2p[TMP_REG_1])); + if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)) + EMIT(PPC_RAW_SLDI(b2p[TMP_REG_2], b2p[TMP_REG_2], 32)); + EMIT(PPC_RAW_OR(dst_reg, dst_reg, b2p[TMP_REG_2])); + } break; } break; @@ -650,6 +693,30 @@ emit_clear: * BPF_ST NOSPEC (speculation barrier) */ case BPF_ST | BPF_NOSPEC: + if (!security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) || + (!security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR) && + (!security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV) || !cpu_has_feature(CPU_FTR_HVMODE)))) + break; + + switch (stf_barrier) { + case STF_BARRIER_EIEIO: + EMIT(0x7c0006ac | 0x02000000); + break; + case STF_BARRIER_SYNC_ORI: + EMIT(PPC_INST_SYNC); + EMIT(PPC_RAW_LD(b2p[TMP_REG_1], 13, 0)); + EMIT(PPC_RAW_ORI(31, 31, 0)); + break; + case STF_BARRIER_FALLBACK: + EMIT(PPC_INST_MFLR | ___PPC_RT(b2p[TMP_REG_1])); + PPC_LI64(12, dereference_kernel_function_descriptor(bpf_stf_barrier)); + EMIT(PPC_RAW_MTCTR(12)); + EMIT(PPC_INST_BCTR | 0x1); + EMIT(PPC_RAW_MTLR(b2p[TMP_REG_1])); + break; + case STF_BARRIER_NONE: + break; + } break; /* @@ -997,7 +1064,9 @@ cond_branch: */ case BPF_JMP | BPF_TAIL_CALL: ctx->seen |= SEEN_TAILCALL; - bpf_jit_emit_tail_call(image, ctx, addrs[i + 1]); + ret = bpf_jit_emit_tail_call(image, ctx, addrs[i + 1]); + if (ret < 0) + return ret; break; default: diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 91452313489f1ae7f5e990aedae09a5b24788f43..e49aa8fc6a491a972829788d91583e4fbbbc0eb5 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -95,6 +95,7 @@ static unsigned int freeze_events_kernel = MMCR0_FCS; #define SPRN_SIER3 0 #define MMCRA_SAMPLE_ENABLE 0 #define MMCRA_BHRB_DISABLE 0 +#define MMCR0_PMCCEXT 0 static inline unsigned long perf_ip_adjust(struct pt_regs *regs) { @@ -109,10 +110,6 @@ static inline void perf_read_regs(struct pt_regs *regs) { regs->result = 0; } -static inline int perf_intr_is_nmi(struct pt_regs *regs) -{ - return 0; -} static inline int siar_valid(struct pt_regs *regs) { @@ -331,15 +328,6 @@ static inline void perf_read_regs(struct pt_regs *regs) regs->result = use_siar; } -/* - * If interrupts were soft-disabled when a PMU interrupt occurs, treat - * it as an NMI. - */ -static inline int perf_intr_is_nmi(struct pt_regs *regs) -{ - return (regs->softe & IRQS_DISABLED); -} - /* * On processors like P7+ that have the SIAR-Valid bit, marked instructions * must be sampled only if the SIAR-valid bit is set. @@ -817,6 +805,19 @@ static void write_pmc(int idx, unsigned long val) } } +static int any_pmc_overflown(struct cpu_hw_events *cpuhw) +{ + int i, idx; + + for (i = 0; i < cpuhw->n_events; i++) { + idx = cpuhw->event[i]->hw.idx; + if ((idx) && ((int)read_pmc(idx) < 0)) + return idx; + } + + return 0; +} + /* Called from sysrq_handle_showregs() */ void perf_event_print_debug(void) { @@ -1240,11 +1241,16 @@ static void power_pmu_disable(struct pmu *pmu) /* * Set the 'freeze counters' bit, clear EBE/BHRBA/PMCC/PMAO/FC56 + * Also clear PMXE to disable PMI's getting triggered in some + * corner cases during PMU disable. */ val = mmcr0 = mfspr(SPRN_MMCR0); val |= MMCR0_FC; val &= ~(MMCR0_EBE | MMCR0_BHRBA | MMCR0_PMCC | MMCR0_PMAO | - MMCR0_FC56); + MMCR0_PMXE | MMCR0_FC56); + /* Set mmcr0 PMCCEXT for p10 */ + if (ppmu->flags & PPMU_ARCH_31) + val |= MMCR0_PMCCEXT; /* * The barrier is to make sure the mtspr has been @@ -1255,6 +1261,34 @@ static void power_pmu_disable(struct pmu *pmu) mb(); isync(); + /* + * Some corner cases could clear the PMU counter overflow + * while a masked PMI is pending. One such case is when + * a PMI happens during interrupt replay and perf counter + * values are cleared by PMU callbacks before replay. + * + * If any PMC corresponding to the active PMU events are + * overflown, disable the interrupt by clearing the paca + * bit for PMI since we are disabling the PMU now. + * Otherwise provide a warning if there is PMI pending, but + * no counter is found overflown. + */ + if (any_pmc_overflown(cpuhw)) { + /* + * Since power_pmu_disable runs under local_irq_save, it + * could happen that code hits a PMC overflow without PMI + * pending in paca. Hence only clear PMI pending if it was + * set. + * + * If a PMI is pending, then MSR[EE] must be disabled (because + * the masked PMI handler disabling EE). So it is safe to + * call clear_pmi_irq_pending(). + */ + if (pmi_irq_pending()) + clear_pmi_irq_pending(); + } else + WARN_ON(pmi_irq_pending()); + val = mmcra = cpuhw->mmcr.mmcra; /* @@ -1346,6 +1380,15 @@ static void power_pmu_enable(struct pmu *pmu) * (possibly updated for removal of events). */ if (!cpuhw->n_added) { + /* + * If there is any active event with an overflown PMC + * value, set back PACA_IRQ_PMI which would have been + * cleared in power_pmu_disable(). + */ + hard_irq_disable(); + if (any_pmc_overflown(cpuhw)) + set_pmi_irq_pending(); + mtspr(SPRN_MMCRA, cpuhw->mmcr.mmcra & ~MMCRA_SAMPLE_ENABLE); mtspr(SPRN_MMCR1, cpuhw->mmcr.mmcr1); if (ppmu->flags & PPMU_ARCH_31) @@ -2250,7 +2293,6 @@ static void __perf_event_interrupt(struct pt_regs *regs) struct perf_event *event; unsigned long val[8]; int found, active; - int nmi; if (cpuhw->n_limited) freeze_limited_counters(cpuhw, mfspr(SPRN_PMC5), @@ -2258,18 +2300,6 @@ static void __perf_event_interrupt(struct pt_regs *regs) perf_read_regs(regs); - /* - * If perf interrupts hit in a local_irq_disable (soft-masked) region, - * we consider them as NMIs. This is required to prevent hash faults on - * user addresses when reading callchains. See the NMI test in - * do_hash_page. - */ - nmi = perf_intr_is_nmi(regs); - if (nmi) - nmi_enter(); - else - irq_enter(); - /* Read all the PMCs since we'll need them a bunch of times */ for (i = 0; i < ppmu->n_counter; ++i) val[i] = read_pmc(i + 1); @@ -2296,6 +2326,14 @@ static void __perf_event_interrupt(struct pt_regs *regs) break; } } + + /* + * Clear PACA_IRQ_PMI in case it was set by + * set_pmi_irq_pending() when PMU was enabled + * after accounting for interrupts. + */ + clear_pmi_irq_pending(); + if (!active) /* reset non active counters that have overflowed */ write_pmc(i + 1, 0); @@ -2315,8 +2353,15 @@ static void __perf_event_interrupt(struct pt_regs *regs) } } } - if (!found && !nmi && printk_ratelimit()) - printk(KERN_WARNING "Can't find PMC that caused IRQ\n"); + + /* + * During system wide profling or while specific CPU is monitored for an + * event, some corner cases could cause PMC to overflow in idle path. This + * will trigger a PMI after waking up from idle. Since counter values are _not_ + * saved/restored in idle path, can lead to below "Can't find PMC" message. + */ + if (unlikely(!found) && !arch_irq_disabled_regs(regs)) + printk_ratelimited(KERN_WARNING "Can't find PMC that caused IRQ\n"); /* * Reset MMCR0 to its normal value. This will set PMXE and @@ -2326,11 +2371,6 @@ static void __perf_event_interrupt(struct pt_regs *regs) * we get back out of this interrupt. */ write_mmcr0(cpuhw, cpuhw->mmcr.mmcr0); - - if (nmi) - nmi_exit(); - else - irq_exit(); } static void perf_event_interrupt(struct pt_regs *regs) diff --git a/arch/powerpc/perf/core-fsl-emb.c b/arch/powerpc/perf/core-fsl-emb.c index e0e7e276bfd25aeecf709ee49399ec87692bcb16..ee721f420a7bac495c3363b74bf78a8c6e0b044e 100644 --- a/arch/powerpc/perf/core-fsl-emb.c +++ b/arch/powerpc/perf/core-fsl-emb.c @@ -31,19 +31,6 @@ static atomic_t num_events; /* Used to avoid races in calling reserve/release_pmc_hardware */ static DEFINE_MUTEX(pmc_reserve_mutex); -/* - * If interrupts were soft-disabled when a PMU interrupt occurs, treat - * it as an NMI. - */ -static inline int perf_intr_is_nmi(struct pt_regs *regs) -{ -#ifdef __powerpc64__ - return (regs->softe & IRQS_DISABLED); -#else - return 0; -#endif -} - static void perf_event_interrupt(struct pt_regs *regs); /* @@ -659,13 +646,6 @@ static void perf_event_interrupt(struct pt_regs *regs) struct perf_event *event; unsigned long val; int found = 0; - int nmi; - - nmi = perf_intr_is_nmi(regs); - if (nmi) - nmi_enter(); - else - irq_enter(); for (i = 0; i < ppmu->n_counter; ++i) { event = cpuhw->event[i]; @@ -690,11 +670,6 @@ static void perf_event_interrupt(struct pt_regs *regs) mtmsr(mfmsr() | MSR_PMM); mtpmr(PMRN_PMGC0, PMGC0_PMIE | PMGC0_FCECE); isync(); - - if (nmi) - nmi_exit(); - else - irq_exit(); } void hw_perf_event_setup(int cpu) diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c index 5e8eedda45d39fdd48da8ee9ce3b5970348e1f77..58448f0e472132e9fa24163e10b0162120ae83b3 100644 --- a/arch/powerpc/perf/isa207-common.c +++ b/arch/powerpc/perf/isa207-common.c @@ -561,6 +561,14 @@ int isa207_compute_mmcr(u64 event[], int n_ev, if (!(pmc_inuse & 0x60)) mmcr->mmcr0 |= MMCR0_FC56; + /* + * Set mmcr0 (PMCCEXT) for p10 which + * will restrict access to group B registers + * when MMCR0 PMCC=0b00. + */ + if (cpu_has_feature(CPU_FTR_ARCH_31)) + mmcr->mmcr0 |= MMCR0_PMCCEXT; + mmcr->mmcr1 = mmcr1; mmcr->mmcra = mmcra; mmcr->mmcr2 = mmcr2; diff --git a/arch/powerpc/platforms/44x/fsp2.c b/arch/powerpc/platforms/44x/fsp2.c index b299e43f5ef944684f0217b5da1e9bbb68f6bd7f..823397c802defe52e7d2fed90c8d587d19cb3d8f 100644 --- a/arch/powerpc/platforms/44x/fsp2.c +++ b/arch/powerpc/platforms/44x/fsp2.c @@ -208,6 +208,7 @@ static void node_irq_request(const char *compat, irq_handler_t errirq_handler) if (irq == NO_IRQ) { pr_err("device tree node %pOFn is missing a interrupt", np); + of_node_put(np); return; } @@ -215,6 +216,7 @@ static void node_irq_request(const char *compat, irq_handler_t errirq_handler) if (rc) { pr_err("fsp_of_probe: request_irq failed: np=%pOF rc=%d", np, rc); + of_node_put(np); return; } } diff --git a/arch/powerpc/platforms/85xx/Makefile b/arch/powerpc/platforms/85xx/Makefile index d1dd0dca5ebf028187b40cec4dbc29d46b4cdb3e..bd750edeb105b37186a767dfe21c4d558bd70ae5 100644 --- a/arch/powerpc/platforms/85xx/Makefile +++ b/arch/powerpc/platforms/85xx/Makefile @@ -3,7 +3,9 @@ # Makefile for the PowerPC 85xx linux kernel. # obj-$(CONFIG_SMP) += smp.o -obj-$(CONFIG_FSL_PMC) += mpc85xx_pm_ops.o +ifneq ($(CONFIG_FSL_CORENET_RCPM),y) +obj-$(CONFIG_SMP) += mpc85xx_pm_ops.o +endif obj-y += common.o diff --git a/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c b/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c index 7c0133f558d028742ae9c5d62b7e990f66c0fa5c..4a8af80011a6f1886d8dc5c392d5e6192698f476 100644 --- a/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c +++ b/arch/powerpc/platforms/85xx/mpc85xx_pm_ops.c @@ -17,6 +17,7 @@ static struct ccsr_guts __iomem *guts; +#ifdef CONFIG_FSL_PMC static void mpc85xx_irq_mask(int cpu) { @@ -49,6 +50,7 @@ static void mpc85xx_cpu_up_prepare(int cpu) { } +#endif static void mpc85xx_freeze_time_base(bool freeze) { @@ -76,10 +78,12 @@ static const struct of_device_id mpc85xx_smp_guts_ids[] = { static const struct fsl_pm_ops mpc85xx_pm_ops = { .freeze_time_base = mpc85xx_freeze_time_base, +#ifdef CONFIG_FSL_PMC .irq_mask = mpc85xx_irq_mask, .irq_unmask = mpc85xx_irq_unmask, .cpu_die = mpc85xx_cpu_die, .cpu_up_prepare = mpc85xx_cpu_up_prepare, +#endif }; int __init mpc85xx_setup_pmc(void) @@ -94,9 +98,8 @@ int __init mpc85xx_setup_pmc(void) pr_err("Could not map guts node address\n"); return -ENOMEM; } + qoriq_pm_ops = &mpc85xx_pm_ops; } - qoriq_pm_ops = &mpc85xx_pm_ops; - return 0; } diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c index c6df294054fe90b15086d945702d2a89dcba7d86..d7081e9af65c73781bce016c65841208ef91d177 100644 --- a/arch/powerpc/platforms/85xx/smp.c +++ b/arch/powerpc/platforms/85xx/smp.c @@ -40,7 +40,6 @@ struct epapr_spin_table { u32 pir; }; -#ifdef CONFIG_HOTPLUG_CPU static u64 timebase; static int tb_req; static int tb_valid; @@ -112,6 +111,7 @@ static void mpc85xx_take_timebase(void) local_irq_restore(flags); } +#ifdef CONFIG_HOTPLUG_CPU static void smp_85xx_cpu_offline_self(void) { unsigned int cpu = smp_processor_id(); @@ -220,7 +220,7 @@ static int smp_85xx_start_cpu(int cpu) local_irq_save(flags); hard_irq_disable(); - if (qoriq_pm_ops) + if (qoriq_pm_ops && qoriq_pm_ops->cpu_up_prepare) qoriq_pm_ops->cpu_up_prepare(cpu); /* if cpu is not spinning, reset it */ @@ -292,7 +292,7 @@ static int smp_85xx_kick_cpu(int nr) booting_thread_hwid = cpu_thread_in_core(nr); primary = cpu_first_thread_sibling(nr); - if (qoriq_pm_ops) + if (qoriq_pm_ops && qoriq_pm_ops->cpu_up_prepare) qoriq_pm_ops->cpu_up_prepare(nr); /* @@ -495,21 +495,21 @@ void __init mpc85xx_smp_init(void) smp_85xx_ops.probe = NULL; } -#ifdef CONFIG_HOTPLUG_CPU #ifdef CONFIG_FSL_CORENET_RCPM + /* Assign a value to qoriq_pm_ops on PPC_E500MC */ fsl_rcpm_init(); -#endif - -#ifdef CONFIG_FSL_PMC +#else + /* Assign a value to qoriq_pm_ops on !PPC_E500MC */ mpc85xx_setup_pmc(); #endif if (qoriq_pm_ops) { smp_85xx_ops.give_timebase = mpc85xx_give_timebase; smp_85xx_ops.take_timebase = mpc85xx_take_timebase; +#ifdef CONFIG_HOTPLUG_CPU smp_85xx_ops.cpu_offline_self = smp_85xx_cpu_offline_self; smp_85xx_ops.cpu_die = qoriq_cpu_kill; - } #endif + } smp_ops = &smp_85xx_ops; #ifdef CONFIG_KEXEC_CORE diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c index 2124831cf57c081e7625be24fc25ea9c0e0ba286..d04079b34d7c2881e9229807111629f28a03fd18 100644 --- a/arch/powerpc/platforms/cell/iommu.c +++ b/arch/powerpc/platforms/cell/iommu.c @@ -976,6 +976,7 @@ static int __init cell_iommu_fixed_mapping_init(void) if (hbase < dbase || (hend > (dbase + dsize))) { pr_debug("iommu: hash window doesn't fit in" "real DMA window\n"); + of_node_put(np); return -1; } } diff --git a/arch/powerpc/platforms/cell/pervasive.c b/arch/powerpc/platforms/cell/pervasive.c index 9068edef71f7832c375be25cfb0754e92553b146..59999902e4a6aff631b1ba7efab56efb6080ef23 100644 --- a/arch/powerpc/platforms/cell/pervasive.c +++ b/arch/powerpc/platforms/cell/pervasive.c @@ -77,6 +77,7 @@ static int cbe_system_reset_exception(struct pt_regs *regs) switch (regs->msr & SRR1_WAKEMASK) { case SRR1_WAKEDEC: set_dec(1); + break; case SRR1_WAKEEE: /* * Handle these when interrupts get re-enabled and we take diff --git a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c index a1b7f79a8a152eae5e9ae783f9c3d69654719012..de10c13de15c6655e4ca39e433874c789d5704fa 100644 --- a/arch/powerpc/platforms/embedded6xx/hlwd-pic.c +++ b/arch/powerpc/platforms/embedded6xx/hlwd-pic.c @@ -215,6 +215,7 @@ void hlwd_pic_probe(void) irq_set_chained_handler(cascade_virq, hlwd_pic_irq_cascade); hlwd_irq_host = host; + of_node_put(np); break; } } diff --git a/arch/powerpc/platforms/powermac/low_i2c.c b/arch/powerpc/platforms/powermac/low_i2c.c index f77a59b5c2e1a9dcc7a810fc3e112ef03e7b8b32..df89d916236d965595702eef3a5f7a813deae485 100644 --- a/arch/powerpc/platforms/powermac/low_i2c.c +++ b/arch/powerpc/platforms/powermac/low_i2c.c @@ -582,6 +582,7 @@ static void __init kw_i2c_add(struct pmac_i2c_host_kw *host, bus->close = kw_i2c_close; bus->xfer = kw_i2c_xfer; mutex_init(&bus->mutex); + lockdep_register_key(&bus->lock_key); lockdep_set_class(&bus->mutex, &bus->lock_key); if (controller == busnode) bus->flags = pmac_i2c_multibus; @@ -810,6 +811,7 @@ static void __init pmu_i2c_probe(void) bus->hostdata = bus + 1; bus->xfer = pmu_i2c_xfer; mutex_init(&bus->mutex); + lockdep_register_key(&bus->lock_key); lockdep_set_class(&bus->mutex, &bus->lock_key); bus->flags = pmac_i2c_multibus; list_add(&bus->link, &pmac_i2c_busses); @@ -933,6 +935,7 @@ static void __init smu_i2c_probe(void) bus->hostdata = bus + 1; bus->xfer = smu_i2c_xfer; mutex_init(&bus->mutex); + lockdep_register_key(&bus->lock_key); lockdep_set_class(&bus->mutex, &bus->lock_key); bus->flags = 0; list_add(&bus->link, &pmac_i2c_busses); diff --git a/arch/powerpc/platforms/powernv/opal-lpc.c b/arch/powerpc/platforms/powernv/opal-lpc.c index 608569082ba0bb12e555c0b4b2ad4c392bcdd6ba..123a0e799b7bdbbd3dc507e3a5016f5728eab47a 100644 --- a/arch/powerpc/platforms/powernv/opal-lpc.c +++ b/arch/powerpc/platforms/powernv/opal-lpc.c @@ -396,6 +396,7 @@ void __init opal_lpc_init(void) if (!of_get_property(np, "primary", NULL)) continue; opal_lpc_chip_id = of_get_ibm_chip_id(np); + of_node_put(np); break; } if (opal_lpc_chip_id < 0) diff --git a/arch/powerpc/platforms/powernv/opal-prd.c b/arch/powerpc/platforms/powernv/opal-prd.c index deddaebf8c14f80f33948487529551ad5e8cf8a9..17a2874d1256177e64e5caaffd3a91bf755d2daa 100644 --- a/arch/powerpc/platforms/powernv/opal-prd.c +++ b/arch/powerpc/platforms/powernv/opal-prd.c @@ -372,6 +372,12 @@ static struct notifier_block opal_prd_event_nb = { .priority = 0, }; +static struct notifier_block opal_prd_event_nb2 = { + .notifier_call = opal_prd_msg_notifier, + .next = NULL, + .priority = 0, +}; + static int opal_prd_probe(struct platform_device *pdev) { int rc; @@ -393,9 +399,10 @@ static int opal_prd_probe(struct platform_device *pdev) return rc; } - rc = opal_message_notifier_register(OPAL_MSG_PRD2, &opal_prd_event_nb); + rc = opal_message_notifier_register(OPAL_MSG_PRD2, &opal_prd_event_nb2); if (rc) { pr_err("Couldn't register PRD2 event notifier\n"); + opal_message_notifier_unregister(OPAL_MSG_PRD, &opal_prd_event_nb); return rc; } @@ -404,6 +411,8 @@ static int opal_prd_probe(struct platform_device *pdev) pr_err("failed to register miscdev\n"); opal_message_notifier_unregister(OPAL_MSG_PRD, &opal_prd_event_nb); + opal_message_notifier_unregister(OPAL_MSG_PRD2, + &opal_prd_event_nb2); return rc; } @@ -414,6 +423,7 @@ static int opal_prd_remove(struct platform_device *pdev) { misc_deregister(&opal_prd_dev); opal_message_notifier_unregister(OPAL_MSG_PRD, &opal_prd_event_nb); + opal_message_notifier_unregister(OPAL_MSG_PRD2, &opal_prd_event_nb2); return 0; } diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index cf024fa37bda09f277fdad5e69e46d5b45d41da2..7ed38ebd0c7b629e6c29fc6f274d4b3a76e57868 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -868,6 +868,10 @@ static int __init eeh_pseries_init(void) if (is_kdump_kernel() || reset_devices) { pr_info("Issue PHB reset ...\n"); list_for_each_entry(phb, &hose_list, list_node) { + // Skip if the slot is empty + if (list_empty(&PCI_DN(phb->dn)->child_list)) + continue; + pdn = list_first_entry(&PCI_DN(phb->dn)->child_list, struct pci_dn, list); config_addr = pseries_eeh_get_pe_config_addr(pdn); diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index e4198700ed1a32e48af40f001d1b11a654a61fa7..245f1f8df6563ddbc96fdc20146ec0838e29db01 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -1034,15 +1034,6 @@ static phys_addr_t ddw_memory_hotplug_max(void) phys_addr_t max_addr = memory_hotplug_max(); struct device_node *memory; - /* - * The "ibm,pmemory" can appear anywhere in the address space. - * Assuming it is still backed by page structs, set the upper limit - * for the huge DMA window as MAX_PHYSMEM_BITS. - */ - if (of_find_node_by_type(NULL, "ibm,pmemory")) - return (sizeof(phys_addr_t) * 8 <= MAX_PHYSMEM_BITS) ? - (phys_addr_t) -1 : (1ULL << MAX_PHYSMEM_BITS); - for_each_node_by_type(memory, "memory") { unsigned long start, size; int n_mem_addr_cells, n_mem_size_cells, len; diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 5f0d446a2325e746046c0db642ee685ff7b0b920..47dfada140e19d2334ab09505bb6b34a26ad3624 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -538,6 +538,12 @@ static void init_cpu_char_feature_flags(struct h_cpu_char_result *result) if (!(result->behaviour & H_CPU_BEHAV_L1D_FLUSH_PR)) security_ftr_clear(SEC_FTR_L1D_FLUSH_PR); + if (result->behaviour & H_CPU_BEHAV_NO_L1D_FLUSH_ENTRY) + security_ftr_clear(SEC_FTR_L1D_FLUSH_ENTRY); + + if (result->behaviour & H_CPU_BEHAV_NO_L1D_FLUSH_UACCESS) + security_ftr_clear(SEC_FTR_L1D_FLUSH_UACCESS); + if (!(result->behaviour & H_CPU_BEHAV_BNDS_CHK_SPEC_BAR)) security_ftr_clear(SEC_FTR_BNDS_CHK_SPEC_BAR); } diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c index 624e80b00eb1882c28e00f5f4cd6bf90b1c32c81..f47429323eee946994014c985b110616dbf35b63 100644 --- a/arch/powerpc/platforms/pseries/smp.c +++ b/arch/powerpc/platforms/pseries/smp.c @@ -42,6 +42,7 @@ #include #include #include +#include #include "pseries.h" @@ -207,6 +208,8 @@ static __init void pSeries_smp_probe(void) if (!cpu_has_feature(CPU_FTR_SMT)) return; + check_kvm_guest(); + if (is_kvm_guest()) { /* * KVM emulates doorbells by disabling FSCR[MSGP] so msgsndp diff --git a/arch/powerpc/sysdev/dcr-low.S b/arch/powerpc/sysdev/dcr-low.S index efeeb1b885a17e4ac6c5f8e7c2394c59460179aa..329b9c4ae5429534e85e4435fbe0cc6553c88794 100644 --- a/arch/powerpc/sysdev/dcr-low.S +++ b/arch/powerpc/sysdev/dcr-low.S @@ -11,7 +11,7 @@ #include #define DCR_ACCESS_PROLOG(table) \ - cmpli cr0,r3,1024; \ + cmplwi cr0,r3,1024; \ rlwinm r3,r3,4,18,27; \ lis r5,table@h; \ ori r5,r5,table@l; \ diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index 5b0f6b6278e3d3332e4d178b8fa31b08856f546c..6018f73d947daa73d709ff25f1af098f589a60e5 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -997,7 +997,8 @@ static int xive_get_irqchip_state(struct irq_data *data, * interrupt to be inactive in that case. */ *state = (pq != XIVE_ESB_INVALID) && !xd->stale_p && - (xd->saved_p || !!(pq & XIVE_ESB_VAL_P)); + (xd->saved_p || (!!(pq & XIVE_ESB_VAL_P) && + !irqd_irq_disabled(data))); return 0; default: return -EINVAL; diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c index 1e3674d7ea7bce396f791b0558b52597d8f938a5..b57eeaff7bb33395289788d0b88832b0c46ca248 100644 --- a/arch/powerpc/sysdev/xive/spapr.c +++ b/arch/powerpc/sysdev/xive/spapr.c @@ -658,6 +658,9 @@ static int xive_spapr_debug_show(struct seq_file *m, void *private) struct xive_irq_bitmap *xibm; char *buf = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!buf) + return -ENOMEM; + list_for_each_entry(xibm, &xive_irq_bitmaps, list) { memset(buf, 0, PAGE_SIZE); bitmap_print_to_pagebuf(true, buf, xibm->bitmap, xibm->count); diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index f7abd118d23d356078c09a1b96ac7fb28860436f..1b894c327578168a561669d70777fc71067b5bac 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -138,6 +138,12 @@ config PAGE_OFFSET default 0xffffffff80000000 if 64BIT && MAXPHYSMEM_2GB default 0xffffffe000000000 if 64BIT && MAXPHYSMEM_128GB +config KASAN_SHADOW_OFFSET + hex + depends on KASAN_GENERIC + default 0xdfffffc800000000 if 64BIT + default 0xffffffff if 32BIT + config ARCH_FLATMEM_ENABLE def_bool y diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index 226c366072da3be17be66608b97376cf8bd6a6eb..4fa0b07d83c646b886ad475035b2b8ec1030d820 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -39,7 +39,7 @@ endif ifeq ($(CONFIG_LD_IS_LLD),y) KBUILD_CFLAGS += -mno-relax KBUILD_AFLAGS += -mno-relax -ifneq ($(LLVM_IAS),1) +ifndef CONFIG_AS_IS_LLVM KBUILD_CFLAGS += -Wa,-mno-relax KBUILD_AFLAGS += -Wa,-mno-relax endif @@ -50,6 +50,12 @@ riscv-march-$(CONFIG_ARCH_RV32I) := rv32ima riscv-march-$(CONFIG_ARCH_RV64I) := rv64ima riscv-march-$(CONFIG_FPU) := $(riscv-march-y)fd riscv-march-$(CONFIG_RISCV_ISA_C) := $(riscv-march-y)c + +# Newer binutils versions default to ISA spec version 20191213 which moves some +# instructions from the I extension to the Zicsr and Zifencei extensions. +toolchain-need-zicsr-zifencei := $(call cc-option-yn, -march=$(riscv-march-y)_zicsr_zifencei) +riscv-march-$(toolchain-need-zicsr-zifencei) := $(riscv-march-y)_zicsr_zifencei + KBUILD_CFLAGS += -march=$(subst fd,,$(riscv-march-y)) KBUILD_AFLAGS += -march=$(riscv-march-y) diff --git a/arch/riscv/include/asm/kasan.h b/arch/riscv/include/asm/kasan.h index b04028c6218c9f39fc086fe5a40f94459d58f0cd..db10a64e3b98111334c885a1f417cd7eb72a936a 100644 --- a/arch/riscv/include/asm/kasan.h +++ b/arch/riscv/include/asm/kasan.h @@ -14,8 +14,7 @@ #define KASAN_SHADOW_START KERN_VIRT_START /* 2^64 - 2^38 */ #define KASAN_SHADOW_END (KASAN_SHADOW_START + KASAN_SHADOW_SIZE) -#define KASAN_SHADOW_OFFSET (KASAN_SHADOW_END - (1ULL << \ - (64 - KASAN_SHADOW_SCALE_SHIFT))) +#define KASAN_SHADOW_OFFSET _AC(CONFIG_KASAN_SHADOW_OFFSET, UL) void kasan_init(void); asmlinkage void kasan_early_init(void); diff --git a/arch/riscv/include/uapi/asm/unistd.h b/arch/riscv/include/uapi/asm/unistd.h index 4b989ae15d59f7b8e4765e3ad93b07a2923b2e6e..8062996c2dfd077eca892b3ce7bc0fa8a91f2281 100644 --- a/arch/riscv/include/uapi/asm/unistd.h +++ b/arch/riscv/include/uapi/asm/unistd.h @@ -18,9 +18,10 @@ #ifdef __LP64__ #define __ARCH_WANT_NEW_STAT #define __ARCH_WANT_SET_GET_RLIMIT -#define __ARCH_WANT_SYS_CLONE3 #endif /* __LP64__ */ +#define __ARCH_WANT_SYS_CLONE3 + #include /* diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index 62de075fc60c0a617b7230e7063a6f44e0917824..bc49d5f2302b605ca72e28d43b71f35259ec8996 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -44,6 +44,8 @@ obj-$(CONFIG_MODULE_SECTIONS) += module-sections.o obj-$(CONFIG_FUNCTION_TRACER) += mcount.o ftrace.o obj-$(CONFIG_DYNAMIC_FTRACE) += mcount-dyn.o +obj-$(CONFIG_TRACE_IRQFLAGS) += trace_irq.o + obj-$(CONFIG_RISCV_BASE_PMU) += perf_event.o obj-$(CONFIG_PERF_EVENTS) += perf_callchain.o obj-$(CONFIG_HAVE_PERF_REGS) += perf_regs.o diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index 76274a4a1d8e6596dbf3069df999847a94238a7e..5214c578a602392d7af7f2d741096ca534b9afae 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -98,7 +98,7 @@ _save_context: .option pop #ifdef CONFIG_TRACE_IRQFLAGS - call trace_hardirqs_off + call __trace_hardirqs_off #endif #ifdef CONFIG_CONTEXT_TRACKING @@ -131,7 +131,7 @@ skip_context_tracking: andi t0, s1, SR_PIE beqz t0, 1f #ifdef CONFIG_TRACE_IRQFLAGS - call trace_hardirqs_on + call __trace_hardirqs_on #endif csrs CSR_STATUS, SR_IE @@ -222,7 +222,7 @@ ret_from_exception: REG_L s0, PT_STATUS(sp) csrc CSR_STATUS, SR_IE #ifdef CONFIG_TRACE_IRQFLAGS - call trace_hardirqs_off + call __trace_hardirqs_off #endif #ifdef CONFIG_RISCV_M_MODE /* the MPP value is too large to be used as an immediate arg for addi */ @@ -258,10 +258,10 @@ restore_all: REG_L s1, PT_STATUS(sp) andi t0, s1, SR_PIE beqz t0, 1f - call trace_hardirqs_on + call __trace_hardirqs_on j 2f 1: - call trace_hardirqs_off + call __trace_hardirqs_off 2: #endif REG_L a0, PT_STATUS(sp) diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S index 7e849797c9c38ddc81311787dcf5188ba9de7a02..1a819c18bedecc43fa49affaea174ddf4e9b1de2 100644 --- a/arch/riscv/kernel/head.S +++ b/arch/riscv/kernel/head.S @@ -175,6 +175,7 @@ setup_trap_vector: csrw CSR_SCRATCH, zero ret +.align 2 .Lsecondary_park: /* We lack SMP support or have too many harts, so park this hart */ wfi diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c index 104fba889cf767cb159840954de99f985f7f0929..c3310a68ac463d092ea8362c9df8fbc07de80ae1 100644 --- a/arch/riscv/kernel/module.c +++ b/arch/riscv/kernel/module.c @@ -13,6 +13,19 @@ #include #include +/* + * The auipc+jalr instruction pair can reach any PC-relative offset + * in the range [-2^31 - 2^11, 2^31 - 2^11) + */ +static bool riscv_insn_valid_32bit_offset(ptrdiff_t val) +{ +#ifdef CONFIG_32BIT + return true; +#else + return (-(1L << 31) - (1L << 11)) <= val && val < ((1L << 31) - (1L << 11)); +#endif +} + static int apply_r_riscv_32_rela(struct module *me, u32 *location, Elf_Addr v) { if (v != (u32)v) { @@ -95,7 +108,7 @@ static int apply_r_riscv_pcrel_hi20_rela(struct module *me, u32 *location, ptrdiff_t offset = (void *)v - (void *)location; s32 hi20; - if (offset != (s32)offset) { + if (!riscv_insn_valid_32bit_offset(offset)) { pr_err( "%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n", me->name, (long long)v, location); @@ -197,10 +210,9 @@ static int apply_r_riscv_call_plt_rela(struct module *me, u32 *location, Elf_Addr v) { ptrdiff_t offset = (void *)v - (void *)location; - s32 fill_v = offset; u32 hi20, lo12; - if (offset != fill_v) { + if (!riscv_insn_valid_32bit_offset(offset)) { /* Only emit the plt entry if offset over 32-bit range */ if (IS_ENABLED(CONFIG_MODULE_SECTIONS)) { offset = module_emit_plt_entry(me, v); @@ -224,10 +236,9 @@ static int apply_r_riscv_call_rela(struct module *me, u32 *location, Elf_Addr v) { ptrdiff_t offset = (void *)v - (void *)location; - s32 fill_v = offset; u32 hi20, lo12; - if (offset != fill_v) { + if (!riscv_insn_valid_32bit_offset(offset)) { pr_err( "%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n", me->name, (long long)v, location); diff --git a/arch/riscv/kernel/perf_callchain.c b/arch/riscv/kernel/perf_callchain.c index cf190197a22f6556b2b05fe686589b3442e7a090..ad3001cbdf6186778e09d33f285070c5d3f06ad9 100644 --- a/arch/riscv/kernel/perf_callchain.c +++ b/arch/riscv/kernel/perf_callchain.c @@ -60,10 +60,11 @@ static unsigned long user_backtrace(struct perf_callchain_entry_ctx *entry, void perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { + struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs(); unsigned long fp = 0; /* RISC-V does not support perf in guest mode. */ - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) + if (guest_cbs && guest_cbs->is_in_guest()) return; fp = regs->s0; @@ -84,8 +85,10 @@ void notrace walk_stackframe(struct task_struct *task, void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { + struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs(); + /* RISC-V does not support perf in guest mode. */ - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + if (guest_cbs && guest_cbs->is_in_guest()) { pr_warn("RISC-V does not support perf in guest mode!"); return; } diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c index 96167d55ed9845a23debad56fcbeb009c600c7d1..0b04e0eae3ab5bfe0e9ff714e6ebef512b83dafd 100644 --- a/arch/riscv/kernel/smpboot.c +++ b/arch/riscv/kernel/smpboot.c @@ -166,7 +166,6 @@ asmlinkage __visible void smp_callin(void) * Disable preemption before enabling interrupts, so we don't try to * schedule a CPU that hasn't actually started yet. */ - preempt_disable(); local_irq_enable(); cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); } diff --git a/arch/riscv/kernel/trace_irq.c b/arch/riscv/kernel/trace_irq.c new file mode 100644 index 0000000000000000000000000000000000000000..095ac976d7da1092cbbec8d1a2c11282236d7c36 --- /dev/null +++ b/arch/riscv/kernel/trace_irq.c @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2022 Changbin Du + */ + +#include +#include +#include "trace_irq.h" + +/* + * trace_hardirqs_on/off require the caller to setup frame pointer properly. + * Otherwise, CALLER_ADDR1 might trigger an pagging exception in kernel. + * Here we add one extra level so they can be safely called by low + * level entry code which $fp is used for other purpose. + */ + +void __trace_hardirqs_on(void) +{ + trace_hardirqs_on(); +} +NOKPROBE_SYMBOL(__trace_hardirqs_on); + +void __trace_hardirqs_off(void) +{ + trace_hardirqs_off(); +} +NOKPROBE_SYMBOL(__trace_hardirqs_off); diff --git a/arch/riscv/kernel/trace_irq.h b/arch/riscv/kernel/trace_irq.h new file mode 100644 index 0000000000000000000000000000000000000000..99fe67377e5ed6795b1a45bf6b2a0d8af6fef41d --- /dev/null +++ b/arch/riscv/kernel/trace_irq.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2022 Changbin Du + */ +#ifndef __TRACE_IRQ_H +#define __TRACE_IRQ_H + +void __trace_hardirqs_on(void); +void __trace_hardirqs_off(void); + +#endif /* __TRACE_IRQ_H */ diff --git a/arch/riscv/kernel/vdso.c b/arch/riscv/kernel/vdso.c index 3f1d35e7c98a62d9b3113999e45938e2b97dfbab..73d45931a053a337724fee63d8677444834637c3 100644 --- a/arch/riscv/kernel/vdso.c +++ b/arch/riscv/kernel/vdso.c @@ -65,7 +65,9 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, vdso_len = (vdso_pages + 1) << PAGE_SHIFT; - mmap_write_lock(mm); + if (mmap_write_lock_killable(mm)) + return -EINTR; + vdso_base = get_unmapped_area(NULL, 0, vdso_len, 0, 0); if (IS_ERR_VALUE(vdso_base)) { ret = vdso_base; diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile index 7ebaef10ea1b69e1557c9d08fec4d288ac31e842..ac7a25298a04af665ff0552d99a95e1671013ddd 100644 --- a/arch/riscv/mm/Makefile +++ b/arch/riscv/mm/Makefile @@ -24,6 +24,9 @@ obj-$(CONFIG_KASAN) += kasan_init.o ifdef CONFIG_KASAN KASAN_SANITIZE_kasan_init.o := n KASAN_SANITIZE_init.o := n +ifdef CONFIG_DEBUG_VIRTUAL +KASAN_SANITIZE_physaddr.o := n +endif endif obj-$(CONFIG_DEBUG_VIRTUAL) += physaddr.o diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c index 094118663285d545c101ac518a3b517bcec2ca8b..89f81067e09edba0c9cacdafd41cc97ea2b0e19e 100644 --- a/arch/riscv/mm/cacheflush.c +++ b/arch/riscv/mm/cacheflush.c @@ -16,6 +16,8 @@ static void ipi_remote_fence_i(void *info) void flush_icache_all(void) { + local_flush_icache_all(); + if (IS_ENABLED(CONFIG_RISCV_SBI)) sbi_remote_fence_i(NULL); else diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c index a8a2ffd9114aaa22c60661c7cd2d80023747cb2e..2db442701ee28f528602f9d7aca5d7174938322f 100644 --- a/arch/riscv/mm/kasan_init.c +++ b/arch/riscv/mm/kasan_init.c @@ -16,10 +16,12 @@ asmlinkage void __init kasan_early_init(void) uintptr_t i; pgd_t *pgd = early_pg_dir + pgd_index(KASAN_SHADOW_START); + BUILD_BUG_ON(KASAN_SHADOW_OFFSET != + KASAN_SHADOW_END - (1UL << (64 - KASAN_SHADOW_SCALE_SHIFT))); + for (i = 0; i < PTRS_PER_PTE; ++i) set_pte(kasan_early_shadow_pte + i, - mk_pte(virt_to_page(kasan_early_shadow_page), - PAGE_KERNEL)); + pfn_pte(virt_to_pfn(kasan_early_shadow_page), PAGE_KERNEL)); for (i = 0; i < PTRS_PER_PMD; ++i) set_pmd(kasan_early_shadow_pmd + i, diff --git a/arch/riscv/net/bpf_jit_core.c b/arch/riscv/net/bpf_jit_core.c index 3630d447352c63b02caf2b88bd6f4610f9b38517..cbf7d2414886e4b741c40766787555102ae6e783 100644 --- a/arch/riscv/net/bpf_jit_core.c +++ b/arch/riscv/net/bpf_jit_core.c @@ -125,7 +125,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) if (i == NR_JIT_ITERATIONS) { pr_err("bpf-jit: image did not converge in <%d passes!\n", i); - bpf_jit_binary_free(jit_data->header); + if (jit_data->header) + bpf_jit_binary_free(jit_data->header); prog = orig_prog; goto out_offset; } diff --git a/arch/s390/hypfs/hypfs_vm.c b/arch/s390/hypfs/hypfs_vm.c index e1fcc03159ef25c42e90fa55e072f9914602ee5c..a927adccb4ba7f81b08cb6f24e7ec3c48a6aa60d 100644 --- a/arch/s390/hypfs/hypfs_vm.c +++ b/arch/s390/hypfs/hypfs_vm.c @@ -20,6 +20,7 @@ static char local_guest[] = " "; static char all_guests[] = "* "; +static char *all_groups = all_guests; static char *guest_query; struct diag2fc_data { @@ -62,10 +63,11 @@ static int diag2fc(int size, char* query, void *addr) memcpy(parm_list.userid, query, NAME_LEN); ASCEBC(parm_list.userid, NAME_LEN); - parm_list.addr = (unsigned long) addr ; + memcpy(parm_list.aci_grp, all_groups, NAME_LEN); + ASCEBC(parm_list.aci_grp, NAME_LEN); + parm_list.addr = (unsigned long)addr; parm_list.size = size; parm_list.fmt = 0x02; - memset(parm_list.aci_grp, 0x40, NAME_LEN); rc = -1; diag_stat_inc(DIAG_STAT_X2FC); diff --git a/arch/s390/include/asm/extable.h b/arch/s390/include/asm/extable.h index 3beb294fd553148486014d1a63057807066a20ac..ce0db8172aad1a876f8bf9444c49d6b76cab47e3 100644 --- a/arch/s390/include/asm/extable.h +++ b/arch/s390/include/asm/extable.h @@ -69,8 +69,13 @@ static inline void swap_ex_entry_fixup(struct exception_table_entry *a, { a->fixup = b->fixup + delta; b->fixup = tmp.fixup - delta; - a->handler = b->handler + delta; - b->handler = tmp.handler - delta; + a->handler = b->handler; + if (a->handler) + a->handler += delta; + b->handler = tmp.handler; + if (b->handler) + b->handler -= delta; } +#define swap_ex_entry_fixup swap_ex_entry_fixup #endif diff --git a/arch/s390/include/asm/kexec.h b/arch/s390/include/asm/kexec.h index ea398a05f6432334154161606771c5b3d52e11a5..7f3c9ac34bd8d11d8bc9b3cf22a58a634f9c90ae 100644 --- a/arch/s390/include/asm/kexec.h +++ b/arch/s390/include/asm/kexec.h @@ -74,6 +74,12 @@ void *kexec_file_add_components(struct kimage *image, int arch_kexec_do_relocs(int r_type, void *loc, unsigned long val, unsigned long addr); +#define ARCH_HAS_KIMAGE_ARCH + +struct kimage_arch { + void *ipl_buf; +}; + extern const struct kexec_file_ops s390_kexec_image_ops; extern const struct kexec_file_ops s390_kexec_elf_ops; diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index a75d94a9bcb2f4265cbacc1d04a2580412666e94..1226971533fe46ce106b714ec43f7b36e582bb8c 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -205,6 +205,9 @@ int zpci_create_device(u32 fid, u32 fh, enum zpci_state state); void zpci_remove_device(struct zpci_dev *zdev, bool set_error); int zpci_enable_device(struct zpci_dev *); int zpci_disable_device(struct zpci_dev *); +void zpci_device_reserved(struct zpci_dev *zdev); +bool zpci_is_device_configured(struct zpci_dev *zdev); + int zpci_register_ioat(struct zpci_dev *, u8, u64, u64, u64); int zpci_unregister_ioat(struct zpci_dev *, u8); void zpci_remove_reserved_devices(void); diff --git a/arch/s390/include/asm/pci_io.h b/arch/s390/include/asm/pci_io.h index e4dc64cc9c555c11abb747e642faf6fecc910d71..287bb88f76986e127388efd03c18d117bf4c417e 100644 --- a/arch/s390/include/asm/pci_io.h +++ b/arch/s390/include/asm/pci_io.h @@ -14,12 +14,13 @@ /* I/O Map */ #define ZPCI_IOMAP_SHIFT 48 -#define ZPCI_IOMAP_ADDR_BASE 0x8000000000000000UL +#define ZPCI_IOMAP_ADDR_SHIFT 62 +#define ZPCI_IOMAP_ADDR_BASE (1UL << ZPCI_IOMAP_ADDR_SHIFT) #define ZPCI_IOMAP_ADDR_OFF_MASK ((1UL << ZPCI_IOMAP_SHIFT) - 1) #define ZPCI_IOMAP_MAX_ENTRIES \ - ((ULONG_MAX - ZPCI_IOMAP_ADDR_BASE + 1) / (1UL << ZPCI_IOMAP_SHIFT)) + (1UL << (ZPCI_IOMAP_ADDR_SHIFT - ZPCI_IOMAP_SHIFT)) #define ZPCI_IOMAP_ADDR_IDX_MASK \ - (~ZPCI_IOMAP_ADDR_OFF_MASK - ZPCI_IOMAP_ADDR_BASE) + ((ZPCI_IOMAP_ADDR_BASE - 1) & ~ZPCI_IOMAP_ADDR_OFF_MASK) struct zpci_iomap_entry { u32 fh; diff --git a/arch/s390/include/asm/preempt.h b/arch/s390/include/asm/preempt.h index 6ede29907fbf76424ed52de165db93af0e600933..e38480eb58faac5d36f32ca89264b36b447aacaf 100644 --- a/arch/s390/include/asm/preempt.h +++ b/arch/s390/include/asm/preempt.h @@ -32,7 +32,7 @@ static inline void preempt_count_set(int pc) #define init_task_preempt_count(p) do { } while (0) #define init_idle_preempt_count(p, cpu) do { \ - S390_lowcore.preempt_count = PREEMPT_ENABLED; \ + S390_lowcore.preempt_count = PREEMPT_DISABLED; \ } while (0) static inline void set_preempt_need_resched(void) @@ -91,7 +91,7 @@ static inline void preempt_count_set(int pc) #define init_task_preempt_count(p) do { } while (0) #define init_idle_preempt_count(p, cpu) do { \ - S390_lowcore.preempt_count = PREEMPT_ENABLED; \ + S390_lowcore.preempt_count = PREEMPT_DISABLED; \ } while (0) static inline void set_preempt_need_resched(void) diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 98b3aca1de8e1daaef0aa5a99a2f35c5e3cd822d..6da06905ddce524d963ba6c31507591d9c0cd4b4 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -2156,7 +2156,7 @@ void *ipl_report_finish(struct ipl_report *report) buf = vzalloc(report->size); if (!buf) - return ERR_PTR(-ENOMEM); + goto out; ptr = buf; memcpy(ptr, report->ipib, report->ipib->hdr.len); @@ -2195,6 +2195,7 @@ void *ipl_report_finish(struct ipl_report *report) } BUG_ON(ptr > buf + report->size); +out: return buf; } diff --git a/arch/s390/kernel/machine_kexec_file.c b/arch/s390/kernel/machine_kexec_file.c index f9e4baa64b675caa5ad5e1d9eccd3d01716fe721..76cd09879eaf452314d0d7aae30e5ebd9707d74d 100644 --- a/arch/s390/kernel/machine_kexec_file.c +++ b/arch/s390/kernel/machine_kexec_file.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -170,6 +171,7 @@ static int kexec_file_add_ipl_report(struct kimage *image, struct kexec_buf buf; unsigned long addr; void *ptr, *end; + int ret; buf.image = image; @@ -199,9 +201,13 @@ static int kexec_file_add_ipl_report(struct kimage *image, ptr += len; } + ret = -ENOMEM; buf.buffer = ipl_report_finish(data->report); + if (!buf.buffer) + goto out; buf.bufsz = data->report->size; buf.memsz = buf.bufsz; + image->arch.ipl_buf = buf.buffer; data->memsz += buf.memsz; @@ -209,7 +215,9 @@ static int kexec_file_add_ipl_report(struct kimage *image, data->kernel_buf + offsetof(struct lowcore, ipl_parmblock_ptr); *lc_ipl_parmblock_ptr = (__u32)buf.mem; - return kexec_add_buffer(&buf); + ret = kexec_add_buffer(&buf); +out: + return ret; } void *kexec_file_add_components(struct kimage *image, @@ -269,6 +277,7 @@ int arch_kexec_apply_relocations_add(struct purgatory_info *pi, { Elf_Rela *relas; int i, r_type; + int ret; relas = (void *)pi->ehdr + relsec->sh_offset; @@ -303,7 +312,11 @@ int arch_kexec_apply_relocations_add(struct purgatory_info *pi, addr = section->sh_addr + relas[i].r_offset; r_type = ELF64_R_TYPE(relas[i].r_info); - arch_kexec_do_relocs(r_type, loc, val, addr); + ret = arch_kexec_do_relocs(r_type, loc, val, addr); + if (ret) { + pr_err("Unknown rela relocation: %d\n", r_type); + return -ENOEXEC; + } } return 0; } @@ -321,3 +334,11 @@ int arch_kexec_kernel_image_probe(struct kimage *image, void *buf, return kexec_image_probe_default(image, buf, buf_len); } + +int arch_kimage_file_post_load_cleanup(struct kimage *image) +{ + vfree(image->arch.ipl_buf); + image->arch.ipl_buf = NULL; + + return kexec_image_post_load_cleanup_default(image); +} diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index 4055f1c4981472b049f9029e49f53bbddc3cdb6e..d76037a7529158aba1d872089d4ada93be58ffa2 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c @@ -30,18 +30,19 @@ #define DEBUGP(fmt , ...) #endif -#define PLT_ENTRY_SIZE 20 +#define PLT_ENTRY_SIZE 22 void *module_alloc(unsigned long size) { + gfp_t gfp_mask = GFP_KERNEL; void *p; if (PAGE_ALIGN(size) > MODULES_LEN) return NULL; p = __vmalloc_node_range(size, MODULE_ALIGN, MODULES_VADDR, MODULES_END, - GFP_KERNEL, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, + gfp_mask, PAGE_KERNEL_EXEC, VM_DEFER_KMEMLEAK, NUMA_NO_NODE, __builtin_return_address(0)); - if (p && (kasan_module_alloc(p, size) < 0)) { + if (p && (kasan_alloc_module_shadow(p, size, gfp_mask) < 0)) { vfree(p); return NULL; } @@ -330,27 +331,26 @@ static int apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab, case R_390_PLTOFF32: /* 32 bit offset from GOT to PLT. */ case R_390_PLTOFF64: /* 16 bit offset from GOT to PLT. */ if (info->plt_initialized == 0) { - unsigned int insn[5]; - unsigned int *ip = me->core_layout.base + - me->arch.plt_offset + - info->plt_offset; - - insn[0] = 0x0d10e310; /* basr 1,0 */ - insn[1] = 0x100a0004; /* lg 1,10(1) */ + unsigned char insn[PLT_ENTRY_SIZE]; + char *plt_base; + char *ip; + + plt_base = me->core_layout.base + me->arch.plt_offset; + ip = plt_base + info->plt_offset; + *(int *)insn = 0x0d10e310; /* basr 1,0 */ + *(int *)&insn[4] = 0x100c0004; /* lg 1,12(1) */ if (IS_ENABLED(CONFIG_EXPOLINE) && !nospec_disable) { - unsigned int *ij; - ij = me->core_layout.base + - me->arch.plt_offset + - me->arch.plt_size - PLT_ENTRY_SIZE; - insn[2] = 0xa7f40000 + /* j __jump_r1 */ - (unsigned int)(u16) - (((unsigned long) ij - 8 - - (unsigned long) ip) / 2); + char *jump_r1; + + jump_r1 = plt_base + me->arch.plt_size - + PLT_ENTRY_SIZE; + /* brcl 0xf,__jump_r1 */ + *(short *)&insn[8] = 0xc0f4; + *(int *)&insn[10] = (jump_r1 - (ip + 8)) / 2; } else { - insn[2] = 0x07f10000; /* br %r1 */ + *(int *)&insn[8] = 0x07f10000; /* br %r1 */ } - insn[3] = (unsigned int) (val >> 32); - insn[4] = (unsigned int) val; + *(long *)&insn[14] = val; write(ip, insn, sizeof(insn)); info->plt_initialized = 1; diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 1f23c4b5ea159013dcebd5d20f5e08583fe694a1..27b228f5b87a5da93e1bd29fe29f12f84dfce9a9 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -844,9 +844,6 @@ static void __init setup_memory(void) storage_key_init_range(start, end); psw_set_key(PAGE_DEFAULT_KEY); - - /* Only cosmetics */ - memblock_enforce_memory_limit(memblock_end_of_DRAM()); } /* diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 791bc373418bd3d7ad44c8deef8da7f2e715953a..7db5460553b7de5a1bf26b5987369e6e0813cdaf 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -863,7 +863,6 @@ static void smp_init_secondary(void) set_cpu_flag(CIF_ASCE_SECONDARY); cpu_init(); rcu_cpu_starting(cpu); - preempt_disable(); init_cpu_timer(); vtime_init(); pfault_init(); diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index 28c16800048344821461943d58f99ff1f50ab4c6..9f0ff9fb01b226de912466c0d11bab463fff1586 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -443,3 +443,5 @@ 438 common pidfd_getfd sys_pidfd_getfd sys_pidfd_getfd 439 common faccessat2 sys_faccessat2 sys_faccessat2 440 common process_madvise sys_process_madvise sys_process_madvise +# 447 reserved for memfd_secret +448 common process_mrelease sys_process_mrelease sys_process_mrelease diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 2bb9996ff09b40f0e5fbaaf2fde8a34342a75175..b51ab19eb9721479bb05718895e1a159fa763fef 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -2115,6 +2115,13 @@ int kvm_s390_is_stop_irq_pending(struct kvm_vcpu *vcpu) return test_bit(IRQ_PEND_SIGP_STOP, &li->pending_irqs); } +int kvm_s390_is_restart_irq_pending(struct kvm_vcpu *vcpu) +{ + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + + return test_bit(IRQ_PEND_RESTART, &li->pending_irqs); +} + void kvm_s390_clear_stop_irq(struct kvm_vcpu *vcpu) { struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; @@ -3053,13 +3060,14 @@ static void __airqs_kick_single_vcpu(struct kvm *kvm, u8 deliverable_mask) int vcpu_idx, online_vcpus = atomic_read(&kvm->online_vcpus); struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int; struct kvm_vcpu *vcpu; + u8 vcpu_isc_mask; for_each_set_bit(vcpu_idx, kvm->arch.idle_mask, online_vcpus) { vcpu = kvm_get_vcpu(kvm, vcpu_idx); if (psw_ioint_disabled(vcpu)) continue; - deliverable_mask &= (u8)(vcpu->arch.sie_block->gcr[6] >> 24); - if (deliverable_mask) { + vcpu_isc_mask = (u8)(vcpu->arch.sie_block->gcr[6] >> 24); + if (deliverable_mask & vcpu_isc_mask) { /* lately kicked but not yet running */ if (test_and_set_bit(vcpu_idx, gi->kicked_mask)) return; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 7f719b468b440296783bdf098d9952ddb2112d16..d8e9239c24ffc62c91a0f280fe5bddd89029a664 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -3312,6 +3312,7 @@ out_free_sie_block: int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) { + clear_bit(vcpu->vcpu_idx, vcpu->kvm->arch.gisa_int.kicked_mask); return kvm_s390_vcpu_has_irq(vcpu, 0); } @@ -4587,10 +4588,15 @@ int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu) } } - /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */ + /* + * Set the VCPU to STOPPED and THEN clear the interrupt flag, + * now that the SIGP STOP and SIGP STOP AND STORE STATUS orders + * have been fully processed. This will ensure that the VCPU + * is kept BUSY if another VCPU is inquiring with SIGP SENSE. + */ + kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED); kvm_s390_clear_stop_irq(vcpu); - kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED); __disable_ibs_on_vcpu(vcpu); for (i = 0; i < online_vcpus; i++) { @@ -4648,6 +4654,8 @@ static long kvm_s390_guest_sida_op(struct kvm_vcpu *vcpu, return -EINVAL; if (mop->size + mop->sida_offset > sida_size(vcpu->arch.sie_block)) return -E2BIG; + if (!kvm_s390_pv_cpu_is_protected(vcpu)) + return -EINVAL; switch (mop->op) { case KVM_S390_MEMOP_SIDA_READ: diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 2d134833bca6964c0464fdcfe8f748fa636c1858..a3e9b71d426f9313ac3764c034c0e8aa85293480 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -418,6 +418,7 @@ void kvm_s390_destroy_adapters(struct kvm *kvm); int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu); extern struct kvm_device_ops kvm_flic_ops; int kvm_s390_is_stop_irq_pending(struct kvm_vcpu *vcpu); +int kvm_s390_is_restart_irq_pending(struct kvm_vcpu *vcpu); void kvm_s390_clear_stop_irq(struct kvm_vcpu *vcpu); int kvm_s390_set_irq_state(struct kvm_vcpu *vcpu, void __user *buf, int len); diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index cd74989ce0b02fd92a3f7f14ac23c46561abb1f5..3b1a498e58d25d7273e515a8e09e0e1debbac3a5 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -397,6 +397,8 @@ static int handle_sske(struct kvm_vcpu *vcpu) mmap_read_unlock(current->mm); if (rc == -EFAULT) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + if (rc == -EAGAIN) + continue; if (rc < 0) return rc; start += PAGE_SIZE; diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c index f5847f9dec7c9e6bf90c2165b2d5eaefc09ccfe0..8228878872228c4a0fd77f237e91f1b4a8fcabde 100644 --- a/arch/s390/kvm/pv.c +++ b/arch/s390/kvm/pv.c @@ -16,18 +16,17 @@ int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc) { - int cc = 0; + int cc; - if (kvm_s390_pv_cpu_get_handle(vcpu)) { - cc = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu), - UVC_CMD_DESTROY_SEC_CPU, rc, rrc); + if (!kvm_s390_pv_cpu_get_handle(vcpu)) + return 0; + + cc = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu), UVC_CMD_DESTROY_SEC_CPU, rc, rrc); + + KVM_UV_EVENT(vcpu->kvm, 3, "PROTVIRT DESTROY VCPU %d: rc %x rrc %x", + vcpu->vcpu_id, *rc, *rrc); + WARN_ONCE(cc, "protvirt destroy cpu failed rc %x rrc %x", *rc, *rrc); - KVM_UV_EVENT(vcpu->kvm, 3, - "PROTVIRT DESTROY VCPU %d: rc %x rrc %x", - vcpu->vcpu_id, *rc, *rrc); - WARN_ONCE(cc, "protvirt destroy cpu failed rc %x rrc %x", - *rc, *rrc); - } /* Intended memory leak for something that should never happen. */ if (!cc) free_pages(vcpu->arch.pv.stor_base, @@ -191,7 +190,7 @@ int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc) uvcb.conf_base_stor_origin = (u64)kvm->arch.pv.stor_base; uvcb.conf_virt_stor_origin = (u64)kvm->arch.pv.stor_var; - cc = uv_call(0, (u64)&uvcb); + cc = uv_call_sched(0, (u64)&uvcb); *rc = uvcb.header.rc; *rrc = uvcb.header.rrc; KVM_UV_EVENT(kvm, 3, "PROTVIRT CREATE VM: handle %llx len %llx rc %x rrc %x", diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index 683036c1c92a8f9428622c8ba5b897b86cd3e6ae..3dc921e853b6e26e8cc35a9d39f29cc655216e7f 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -288,6 +288,34 @@ static int handle_sigp_dst(struct kvm_vcpu *vcpu, u8 order_code, if (!dst_vcpu) return SIGP_CC_NOT_OPERATIONAL; + /* + * SIGP RESTART, SIGP STOP, and SIGP STOP AND STORE STATUS orders + * are processed asynchronously. Until the affected VCPU finishes + * its work and calls back into KVM to clear the (RESTART or STOP) + * interrupt, we need to return any new non-reset orders "busy". + * + * This is important because a single VCPU could issue: + * 1) SIGP STOP $DESTINATION + * 2) SIGP SENSE $DESTINATION + * + * If the SIGP SENSE would not be rejected as "busy", it could + * return an incorrect answer as to whether the VCPU is STOPPED + * or OPERATING. + */ + if (order_code != SIGP_INITIAL_CPU_RESET && + order_code != SIGP_CPU_RESET) { + /* + * Lockless check. Both SIGP STOP and SIGP (RE)START + * properly synchronize everything while processing + * their orders, while the guest cannot observe a + * difference when issuing other orders from two + * different VCPUs. + */ + if (kvm_s390_is_stop_irq_pending(dst_vcpu) || + kvm_s390_is_restart_irq_pending(dst_vcpu)) + return SIGP_CC_BUSY; + } + switch (order_code) { case SIGP_SENSE: vcpu->stat.instruction_sigp_sense++; diff --git a/arch/s390/lib/string.c b/arch/s390/lib/string.c index 93b3209b94a2f8b375d7fdc95d21a3f55c836e39..db4e539d8c7fe5205c91f7c473c1938e3ddcf8c6 100644 --- a/arch/s390/lib/string.c +++ b/arch/s390/lib/string.c @@ -246,14 +246,13 @@ EXPORT_SYMBOL(strcmp); #ifdef __HAVE_ARCH_STRRCHR char *strrchr(const char *s, int c) { - size_t len = __strend(s) - s; - - if (len) - do { - if (s[len] == (char) c) - return (char *) s + len; - } while (--len > 0); - return NULL; + ssize_t len = __strend(s) - s; + + do { + if (s[len] == (char)c) + return (char *)s + len; + } while (--len >= 0); + return NULL; } EXPORT_SYMBOL(strrchr); #endif diff --git a/arch/s390/lib/test_unwind.c b/arch/s390/lib/test_unwind.c index 6bad84c372dcbbeb6b91ef74b8e3a60d8764c0ff..b0b67e6d1f6e2c492c97763e4083efd803197cd5 100644 --- a/arch/s390/lib/test_unwind.c +++ b/arch/s390/lib/test_unwind.c @@ -171,10 +171,11 @@ static noinline int unwindme_func4(struct unwindme *u) } /* - * trigger specification exception + * Trigger operation exception; use insn notation to bypass + * llvm's integrated assembler sanity checks. */ asm volatile( - " mvcl %%r1,%%r1\n" + " .insn e,0x0000\n" /* illegal opcode */ "0: nopr %%r7\n" EX_TABLE(0b, 0b) :); diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 64795d03492632b629aaef4b88907ef8adb12677..f2d19d40272cf74d08a777508924935a07b09789 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -684,9 +684,10 @@ void __gmap_zap(struct gmap *gmap, unsigned long gaddr) vmaddr |= gaddr & ~PMD_MASK; /* Get pointer to the page table entry */ ptep = get_locked_pte(gmap->mm, vmaddr, &ptl); - if (likely(ptep)) + if (likely(ptep)) { ptep_zap_unused(gmap->mm, vmaddr, ptep, 0); - pte_unmap_unlock(ptep, ptl); + pte_unmap_unlock(ptep, ptl); + } } } EXPORT_SYMBOL_GPL(__gmap_zap); diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index 11d2c8395e2ae92fdc2ac8c94283da9cada12cfc..6d99b1be0082f933c7749de801634585e782507d 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -253,13 +253,15 @@ void page_table_free(struct mm_struct *mm, unsigned long *table) /* Free 2K page table fragment of a 4K page */ bit = (__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)); spin_lock_bh(&mm->context.lock); - mask = atomic_xor_bits(&page->_refcount, 1U << (bit + 24)); + mask = atomic_xor_bits(&page->_refcount, 0x11U << (bit + 24)); mask >>= 24; if (mask & 3) list_add(&page->lru, &mm->context.pgtable_list); else list_del(&page->lru); spin_unlock_bh(&mm->context.lock); + mask = atomic_xor_bits(&page->_refcount, 0x10U << (bit + 24)); + mask >>= 24; if (mask != 0) return; } else { diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 18205f851c247aaf2e20e79a5cc564fe122c38b7..fabaedddc90cb2892c6b7cd552adc41fc0cefe24 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -988,6 +988,7 @@ EXPORT_SYMBOL(get_guest_storage_key); int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc, unsigned long *oldpte, unsigned long *oldpgste) { + struct vm_area_struct *vma; unsigned long pgstev; spinlock_t *ptl; pgste_t pgste; @@ -997,6 +998,10 @@ int pgste_perform_essa(struct mm_struct *mm, unsigned long hva, int orc, WARN_ON_ONCE(orc > ESSA_MAX); if (unlikely(orc > ESSA_MAX)) return -EINVAL; + + vma = find_vma(mm, hva); + if (!vma || hva < vma->vm_start || is_vm_hugetlb_page(vma)) + return -EFAULT; ptep = get_locked_pte(mm, hva, &ptl); if (unlikely(!ptep)) return -EFAULT; @@ -1089,10 +1094,14 @@ EXPORT_SYMBOL(pgste_perform_essa); int set_pgste_bits(struct mm_struct *mm, unsigned long hva, unsigned long bits, unsigned long value) { + struct vm_area_struct *vma; spinlock_t *ptl; pgste_t new; pte_t *ptep; + vma = find_vma(mm, hva); + if (!vma || hva < vma->vm_start || is_vm_hugetlb_page(vma)) + return -EFAULT; ptep = get_locked_pte(mm, hva, &ptl); if (unlikely(!ptep)) return -EFAULT; @@ -1117,9 +1126,13 @@ EXPORT_SYMBOL(set_pgste_bits); */ int get_pgste(struct mm_struct *mm, unsigned long hva, unsigned long *pgstep) { + struct vm_area_struct *vma; spinlock_t *ptl; pte_t *ptep; + vma = find_vma(mm, hva); + if (!vma || hva < vma->vm_start || is_vm_hugetlb_page(vma)) + return -EFAULT; ptep = get_locked_pte(mm, hva, &ptl); if (unlikely(!ptep)) return -EFAULT; diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 8d9047d2d1e11ba6ef550d537b93674f0000cb61..cd0cbdafedbd29e95ca44fe353630ec113824e4f 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -1775,7 +1775,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) jit.addrs = kvcalloc(fp->len + 1, sizeof(*jit.addrs), GFP_KERNEL); if (jit.addrs == NULL) { fp = orig_fp; - goto out; + goto free_addrs; } /* * Three initial passes: diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index f5ddbc625c1a528a9234b8e17e6ab7f34ffa5e47..e14e4a3a647a2acb219270c034316a563efb299f 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -92,7 +92,7 @@ void zpci_remove_reserved_devices(void) spin_unlock(&zpci_list_lock); list_for_each_entry_safe(zdev, tmp, &remove, entry) - zpci_zdev_put(zdev); + zpci_device_reserved(zdev); } int pci_domain_nr(struct pci_bus *bus) @@ -787,6 +787,39 @@ error: return rc; } +bool zpci_is_device_configured(struct zpci_dev *zdev) +{ + enum zpci_state state = zdev->state; + + return state != ZPCI_FN_STATE_RESERVED && + state != ZPCI_FN_STATE_STANDBY; +} + +/** + * zpci_device_reserved() - Mark device as resverved + * @zdev: the zpci_dev that was reserved + * + * Handle the case that a given zPCI function was reserved by another system. + * After a call to this function the zpci_dev can not be found via + * get_zdev_by_fid() anymore but may still be accessible via existing + * references though it will not be functional anymore. + */ +void zpci_device_reserved(struct zpci_dev *zdev) +{ + if (zdev->has_hp_slot) + zpci_exit_slot(zdev); + /* + * Remove device from zpci_list as it is going away. This also + * makes sure we ignore subsequent zPCI events for this device. + */ + spin_lock(&zpci_list_lock); + list_del(&zdev->entry); + spin_unlock(&zpci_list_lock); + zdev->state = ZPCI_FN_STATE_RESERVED; + zpci_dbg(3, "rsv fid:%x\n", zdev->fid); + zpci_zdev_put(zdev); +} + void zpci_release_device(struct kref *kref) { struct zpci_dev *zdev = container_of(kref, struct zpci_dev, kref); @@ -802,6 +835,12 @@ void zpci_release_device(struct kref *kref) case ZPCI_FN_STATE_STANDBY: if (zdev->has_hp_slot) zpci_exit_slot(zdev); + spin_lock(&zpci_list_lock); + list_del(&zdev->entry); + spin_unlock(&zpci_list_lock); + zpci_dbg(3, "rsv fid:%x\n", zdev->fid); + fallthrough; + case ZPCI_FN_STATE_RESERVED: zpci_cleanup_bus_resources(zdev); zpci_bus_device_unregister(zdev); zpci_destroy_iommu(zdev); @@ -809,10 +848,6 @@ void zpci_release_device(struct kref *kref) default: break; } - - spin_lock(&zpci_list_lock); - list_del(&zdev->entry); - spin_unlock(&zpci_list_lock); zpci_dbg(3, "rem fid:%x\n", zdev->fid); kfree(zdev); } diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index ac0c65cdd69d928480aab918ae6026681fdc847f..b7cfde7e80a8aa4ba02b28d81c5a6b49e7b32df8 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -146,7 +146,7 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) zdev->state = ZPCI_FN_STATE_STANDBY; if (!clp_get_state(ccdf->fid, &state) && state == ZPCI_FN_STATE_RESERVED) { - zpci_zdev_put(zdev); + zpci_device_reserved(zdev); } break; case 0x0306: /* 0x308 or 0x302 for multiple devices */ @@ -156,7 +156,7 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) case 0x0308: /* Standby -> Reserved */ if (!zdev) break; - zpci_zdev_put(zdev); + zpci_device_reserved(zdev); break; default: break; diff --git a/arch/sh/Kconfig.debug b/arch/sh/Kconfig.debug index 28a43d63bde1f57670303bed5097db20ebd06827..97b0e26cf05a1713c03ac75119cb6e5b4f91577e 100644 --- a/arch/sh/Kconfig.debug +++ b/arch/sh/Kconfig.debug @@ -57,6 +57,7 @@ config DUMP_CODE config DWARF_UNWINDER bool "Enable the DWARF unwinder for stacktraces" + depends on DEBUG_KERNEL select FRAME_POINTER default n help diff --git a/arch/sh/include/asm/sfp-machine.h b/arch/sh/include/asm/sfp-machine.h index cbc7cf8c97ce613fec2c0f3e887cf95c5f1788d1..2d2423478b71d4722132e827e05948d347ff883e 100644 --- a/arch/sh/include/asm/sfp-machine.h +++ b/arch/sh/include/asm/sfp-machine.h @@ -13,6 +13,14 @@ #ifndef _SFP_MACHINE_H #define _SFP_MACHINE_H +#ifdef __BIG_ENDIAN__ +#define __BYTE_ORDER __BIG_ENDIAN +#define __LITTLE_ENDIAN 0 +#else +#define __BYTE_ORDER __LITTLE_ENDIAN +#define __BIG_ENDIAN 0 +#endif + #define _FP_W_TYPE_SIZE 32 #define _FP_W_TYPE unsigned long #define _FP_WS_TYPE signed long diff --git a/arch/sh/kernel/cpu/fpu.c b/arch/sh/kernel/cpu/fpu.c index ae354a2931e7e17eec6e375c1d5add9ed0dcc54b..fd6db0ab19288f42fcdd6c6055c54a3a80f1d68a 100644 --- a/arch/sh/kernel/cpu/fpu.c +++ b/arch/sh/kernel/cpu/fpu.c @@ -62,18 +62,20 @@ void fpu_state_restore(struct pt_regs *regs) } if (!tsk_used_math(tsk)) { - local_irq_enable(); + int ret; /* * does a slab alloc which can sleep */ - if (init_fpu(tsk)) { + local_irq_enable(); + ret = init_fpu(tsk); + local_irq_disable(); + if (ret) { /* * ran out of memory! */ - do_group_exit(SIGKILL); + force_sig(SIGKILL); return; } - local_irq_disable(); } grab_fpu(regs); diff --git a/arch/sh/kernel/cpu/sh4a/smp-shx3.c b/arch/sh/kernel/cpu/sh4a/smp-shx3.c index f8a2bec0f260b373b7e3e6cd3529a34635aa1b11..1261dc7b84e8be2b268f60bf3897f0d1e454c5d4 100644 --- a/arch/sh/kernel/cpu/sh4a/smp-shx3.c +++ b/arch/sh/kernel/cpu/sh4a/smp-shx3.c @@ -73,8 +73,9 @@ static void shx3_prepare_cpus(unsigned int max_cpus) BUILD_BUG_ON(SMP_MSG_NR >= 8); for (i = 0; i < SMP_MSG_NR; i++) - request_irq(104 + i, ipi_interrupt_handler, - IRQF_PERCPU, "IPI", (void *)(long)i); + if (request_irq(104 + i, ipi_interrupt_handler, + IRQF_PERCPU, "IPI", (void *)(long)i)) + pr_err("Failed to request irq %d\n", i); for (i = 0; i < max_cpus; i++) set_cpu_present(i, true); diff --git a/arch/sh/kernel/smp.c b/arch/sh/kernel/smp.c index 372acdc9033eb35da2b5a610cc6221f53208e573..65924d9ec24598f81e995bee85bf4cb135b1d84b 100644 --- a/arch/sh/kernel/smp.c +++ b/arch/sh/kernel/smp.c @@ -186,8 +186,6 @@ asmlinkage void start_secondary(void) per_cpu_trap_init(); - preempt_disable(); - notify_cpu_starting(cpu); local_irq_enable(); diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl index 783738448ff555f66baa8d9b559dce0d3e08b325..8db35413732c79587703668a6064d33e63e5a681 100644 --- a/arch/sh/kernel/syscalls/syscall.tbl +++ b/arch/sh/kernel/syscalls/syscall.tbl @@ -443,3 +443,5 @@ 438 common pidfd_getfd sys_pidfd_getfd 439 common faccessat2 sys_faccessat2 440 common process_madvise sys_process_madvise +# 447 reserved for memfd_secret +448 common process_mrelease sys_process_mrelease diff --git a/arch/sh/math-emu/math.c b/arch/sh/math-emu/math.c index e8be0eca0444a05da6e80e9f683695fa0429b378..615ba932c398e4547cb9657cf68bacd8b0b3bce6 100644 --- a/arch/sh/math-emu/math.c +++ b/arch/sh/math-emu/math.c @@ -467,109 +467,6 @@ static int fpu_emulate(u16 code, struct sh_fpu_soft_struct *fregs, struct pt_reg return id_sys(fregs, regs, code); } -/** - * denormal_to_double - Given denormalized float number, - * store double float - * - * @fpu: Pointer to sh_fpu_soft structure - * @n: Index to FP register - */ -static void denormal_to_double(struct sh_fpu_soft_struct *fpu, int n) -{ - unsigned long du, dl; - unsigned long x = fpu->fpul; - int exp = 1023 - 126; - - if (x != 0 && (x & 0x7f800000) == 0) { - du = (x & 0x80000000); - while ((x & 0x00800000) == 0) { - x <<= 1; - exp--; - } - x &= 0x007fffff; - du |= (exp << 20) | (x >> 3); - dl = x << 29; - - fpu->fp_regs[n] = du; - fpu->fp_regs[n+1] = dl; - } -} - -/** - * ieee_fpe_handler - Handle denormalized number exception - * - * @regs: Pointer to register structure - * - * Returns 1 when it's handled (should not cause exception). - */ -static int ieee_fpe_handler(struct pt_regs *regs) -{ - unsigned short insn = *(unsigned short *)regs->pc; - unsigned short finsn; - unsigned long nextpc; - int nib[4] = { - (insn >> 12) & 0xf, - (insn >> 8) & 0xf, - (insn >> 4) & 0xf, - insn & 0xf}; - - if (nib[0] == 0xb || - (nib[0] == 0x4 && nib[2] == 0x0 && nib[3] == 0xb)) /* bsr & jsr */ - regs->pr = regs->pc + 4; - - if (nib[0] == 0xa || nib[0] == 0xb) { /* bra & bsr */ - nextpc = regs->pc + 4 + ((short) ((insn & 0xfff) << 4) >> 3); - finsn = *(unsigned short *) (regs->pc + 2); - } else if (nib[0] == 0x8 && nib[1] == 0xd) { /* bt/s */ - if (regs->sr & 1) - nextpc = regs->pc + 4 + ((char) (insn & 0xff) << 1); - else - nextpc = regs->pc + 4; - finsn = *(unsigned short *) (regs->pc + 2); - } else if (nib[0] == 0x8 && nib[1] == 0xf) { /* bf/s */ - if (regs->sr & 1) - nextpc = regs->pc + 4; - else - nextpc = regs->pc + 4 + ((char) (insn & 0xff) << 1); - finsn = *(unsigned short *) (regs->pc + 2); - } else if (nib[0] == 0x4 && nib[3] == 0xb && - (nib[2] == 0x0 || nib[2] == 0x2)) { /* jmp & jsr */ - nextpc = regs->regs[nib[1]]; - finsn = *(unsigned short *) (regs->pc + 2); - } else if (nib[0] == 0x0 && nib[3] == 0x3 && - (nib[2] == 0x0 || nib[2] == 0x2)) { /* braf & bsrf */ - nextpc = regs->pc + 4 + regs->regs[nib[1]]; - finsn = *(unsigned short *) (regs->pc + 2); - } else if (insn == 0x000b) { /* rts */ - nextpc = regs->pr; - finsn = *(unsigned short *) (regs->pc + 2); - } else { - nextpc = regs->pc + 2; - finsn = insn; - } - - if ((finsn & 0xf1ff) == 0xf0ad) { /* fcnvsd */ - struct task_struct *tsk = current; - - if ((tsk->thread.xstate->softfpu.fpscr & (1 << 17))) { - /* FPU error */ - denormal_to_double (&tsk->thread.xstate->softfpu, - (finsn >> 8) & 0xf); - tsk->thread.xstate->softfpu.fpscr &= - ~(FPSCR_CAUSE_MASK | FPSCR_FLAG_MASK); - task_thread_info(tsk)->status |= TS_USEDFPU; - } else { - force_sig_fault(SIGFPE, FPE_FLTINV, - (void __user *)regs->pc); - } - - regs->pc = nextpc; - return 1; - } - - return 0; -} - /** * fpu_init - Initialize FPU registers * @fpu: Pointer to software emulated FPU registers. diff --git a/arch/sparc/kernel/smp_32.c b/arch/sparc/kernel/smp_32.c index 50c127ab46d5bc810f6e874a106188f8029e6af4..22b148e5a5f88c76c46b17b87c53f7df510940c8 100644 --- a/arch/sparc/kernel/smp_32.c +++ b/arch/sparc/kernel/smp_32.c @@ -348,7 +348,6 @@ static void sparc_start_secondary(void *arg) */ arch_cpu_pre_starting(arg); - preempt_disable(); cpu = smp_processor_id(); notify_cpu_starting(cpu); diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index e38d8bf454e866636e7091d131277e0596baebb3..ae5faa1d989d2ec7e8ef3e0f05f7d91148b25bc3 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -138,9 +138,6 @@ void smp_callin(void) set_cpu_online(cpuid, true); - /* idle thread is expected to have preempt disabled */ - preempt_disable(); - local_irq_enable(); cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl index 78160260991be726ff888371314cfd10b82d5b6a..e451795515c8530da3992585ff4b41fdf238c3db 100644 --- a/arch/sparc/kernel/syscalls/syscall.tbl +++ b/arch/sparc/kernel/syscalls/syscall.tbl @@ -486,3 +486,5 @@ 438 common pidfd_getfd sys_pidfd_getfd 439 common faccessat2 sys_faccessat2 440 common process_madvise sys_process_madvise +# 447 reserved for memfd_secret +448 common process_mrelease sys_process_mrelease diff --git a/arch/sparc/lib/iomap.c b/arch/sparc/lib/iomap.c index c9da9f139694dafe211b5bb30e4d36892cabaf77..f3a8cd491ce0de2e5b0de17e65b368d2d86231d9 100644 --- a/arch/sparc/lib/iomap.c +++ b/arch/sparc/lib/iomap.c @@ -19,8 +19,10 @@ void ioport_unmap(void __iomem *addr) EXPORT_SYMBOL(ioport_map); EXPORT_SYMBOL(ioport_unmap); +#ifdef CONFIG_PCI void pci_iounmap(struct pci_dev *dev, void __iomem * addr) { /* nothing to do */ } EXPORT_SYMBOL(pci_iounmap); +#endif diff --git a/arch/um/drivers/virtio_uml.c b/arch/um/drivers/virtio_uml.c index d11b3d41c378552b8890c305ad1437096bca838f..d5d768188b3ba3f19edafc091ee1fe8bc672c849 100644 --- a/arch/um/drivers/virtio_uml.c +++ b/arch/um/drivers/virtio_uml.c @@ -1076,6 +1076,8 @@ static void virtio_uml_release_dev(struct device *d) container_of(d, struct virtio_device, dev); struct virtio_uml_device *vu_dev = to_virtio_uml_device(vdev); + time_travel_propagate_time(); + /* might not have been opened due to not negotiating the feature */ if (vu_dev->req_fd >= 0) { um_free_irq(VIRTIO_IRQ, vu_dev); @@ -1109,6 +1111,8 @@ static int virtio_uml_probe(struct platform_device *pdev) vu_dev->pdev = pdev; vu_dev->req_fd = -1; + time_travel_propagate_time(); + do { rc = os_connect_socket(pdata->socket_path); } while (rc == -EINTR); diff --git a/arch/um/include/asm/delay.h b/arch/um/include/asm/delay.h index 56fc2b8f2dd019ec8b75065f2a7f2c791c0c2d7a..e79b2ab6f40c81d6afd54391dbdfabaed82cdaaf 100644 --- a/arch/um/include/asm/delay.h +++ b/arch/um/include/asm/delay.h @@ -14,7 +14,7 @@ static inline void um_ndelay(unsigned long nsecs) ndelay(nsecs); } #undef ndelay -#define ndelay um_ndelay +#define ndelay(n) um_ndelay(n) static inline void um_udelay(unsigned long usecs) { @@ -26,5 +26,5 @@ static inline void um_udelay(unsigned long usecs) udelay(usecs); } #undef udelay -#define udelay um_udelay +#define udelay(n) um_udelay(n) #endif /* __UM_DELAY_H */ diff --git a/arch/um/include/shared/registers.h b/arch/um/include/shared/registers.h index 0c50fa6e8a55b7330f43027c4dea3170e5173d72..fbb709a222839132cec2a2bbf16c4467d88450e6 100644 --- a/arch/um/include/shared/registers.h +++ b/arch/um/include/shared/registers.h @@ -16,8 +16,8 @@ extern int restore_fp_registers(int pid, unsigned long *fp_regs); extern int save_fpx_registers(int pid, unsigned long *fp_regs); extern int restore_fpx_registers(int pid, unsigned long *fp_regs); extern int save_registers(int pid, struct uml_pt_regs *regs); -extern int restore_registers(int pid, struct uml_pt_regs *regs); -extern int init_registers(int pid); +extern int restore_pid_registers(int pid, struct uml_pt_regs *regs); +extern int init_pid_registers(int pid); extern void get_safe_registers(unsigned long *regs, unsigned long *fp_regs); extern unsigned long get_thread_reg(int reg, jmp_buf *buf); extern int get_fp_registers(int pid, unsigned long *regs); diff --git a/arch/um/os-Linux/registers.c b/arch/um/os-Linux/registers.c index 2d9270508e1565620e418f0134bf7573474351eb..b123955be7accf01958684dbbd2fea0ef8130202 100644 --- a/arch/um/os-Linux/registers.c +++ b/arch/um/os-Linux/registers.c @@ -21,7 +21,7 @@ int save_registers(int pid, struct uml_pt_regs *regs) return 0; } -int restore_registers(int pid, struct uml_pt_regs *regs) +int restore_pid_registers(int pid, struct uml_pt_regs *regs) { int err; @@ -36,7 +36,7 @@ int restore_registers(int pid, struct uml_pt_regs *regs) static unsigned long exec_regs[MAX_REG_NR]; static unsigned long exec_fp_regs[FP_SIZE]; -int init_registers(int pid) +int init_pid_registers(int pid) { int err; diff --git a/arch/um/os-Linux/start_up.c b/arch/um/os-Linux/start_up.c index f79dc338279e65e09653e23c1ac7169bf26076ff..b28373a2b8d2d035c7f96d374c50f9b479250459 100644 --- a/arch/um/os-Linux/start_up.c +++ b/arch/um/os-Linux/start_up.c @@ -336,7 +336,7 @@ void __init os_early_checks(void) check_tmpexec(); pid = start_ptraced_child(); - if (init_registers(pid)) + if (init_pid_registers(pid)) fatal("Failed to initialize default registers"); stop_ptraced_child(pid, 1, 1); } diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 0673cae912658624528da74aebe922ac6f46d2d5..f6fb0f6277b958cea9828f88848c83f132924e40 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -76,6 +76,7 @@ config X86 select ARCH_HAS_PMEM_API if X86_64 select ARCH_HAS_PTE_DEVMAP if X86_64 select ARCH_HAS_PTE_SPECIAL + select ARCH_HAS_NONLEAF_PMD_YOUNG select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64 select ARCH_HAS_COPY_MC if X86_64 select ARCH_HAS_SET_MEMORY @@ -1273,7 +1274,8 @@ config TOSHIBA config I8K tristate "Dell i8k legacy laptop support" - select HWMON + depends on HWMON + depends on PROC_FS select SENSORS_DELL_SMM help This option enables legacy /proc/i8k userspace interface in hwmon @@ -1422,7 +1424,7 @@ config HIGHMEM4G config HIGHMEM64G bool "64GB" - depends on !M486SX && !M486 && !M586 && !M586TSC && !M586MMX && !MGEODE_LX && !MGEODEGX1 && !MCYRIXIII && !MELAN && !MWINCHIPC6 && !WINCHIP3D && !MK6 + depends on !M486SX && !M486 && !M586 && !M586TSC && !M586MMX && !MGEODE_LX && !MGEODEGX1 && !MCYRIXIII && !MELAN && !MWINCHIPC6 && !MWINCHIP3D && !MK6 select X86_PAE help Select this if you have a 32-bit processor and more than 4 @@ -1534,6 +1536,7 @@ config AMD_MEM_ENCRYPT select ARCH_USE_MEMREMAP_PROT select ARCH_HAS_FORCE_DMA_UNENCRYPTED select INSTRUCTION_DECODER + select ARCH_HAS_CC_PLATFORM help Say yes to enable support for the encryption of system memory. This requires an AMD processor that supports Secure Memory @@ -1541,7 +1544,6 @@ config AMD_MEM_ENCRYPT config AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT bool "Activate AMD Secure Memory Encryption (SME) by default" - default y depends on AMD_MEM_ENCRYPT help Say yes to have system memory encrypted by default if running on @@ -1945,6 +1947,7 @@ config EFI depends on ACPI select UCS2_STRING select EFI_RUNTIME_WRAPPERS + select ARCH_USE_MEMREMAP_PROT help This enables the kernel to use EFI runtime services that are available (such as the EFI variable services). diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 6004047d25fdda7135884cb5f85363caab72d8b4..bf91e0a36d77fa1fb3882e247633873ed18a6bc4 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -28,7 +28,11 @@ KCOV_INSTRUMENT := n targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma \ vmlinux.bin.xz vmlinux.bin.lzo vmlinux.bin.lz4 vmlinux.bin.zst -KBUILD_CFLAGS := -m$(BITS) -O2 +# CLANG_FLAGS must come before any cc-disable-warning or cc-option calls in +# case of cross compiling, as it has the '--target=' flag, which is needed to +# avoid errors with '-march=i386', and future flags may depend on the target to +# be valid. +KBUILD_CFLAGS := -m$(BITS) -O2 $(CLANG_FLAGS) KBUILD_CFLAGS += -fno-strict-aliasing -fPIE KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING cflags-$(CONFIG_X86_32) := -march=i386 @@ -46,7 +50,6 @@ KBUILD_CFLAGS += -D__DISABLE_EXPORTS # Disable relocation relaxation in case the link is not PIE. KBUILD_CFLAGS += $(call as-option,-Wa$(comma)-mrelax-relocations=no) KBUILD_CFLAGS += -include $(srctree)/include/linux/hidden.h -KBUILD_CFLAGS += $(CLANG_FLAGS) # sev-es.c indirectly inludes inat-table.h which is generated during # compilation and stored in $(objtree). Add the directory to the includes so diff --git a/arch/x86/configs/gki_defconfig b/arch/x86/configs/gki_defconfig index a4b51298fab78266332fd9b79a5c44e4adba9422..103720489453e3fe33cf6c05a70f94e24c147aad 100644 --- a/arch/x86/configs/gki_defconfig +++ b/arch/x86/configs/gki_defconfig @@ -47,6 +47,7 @@ CONFIG_SLAB_FREELIST_RANDOM=y CONFIG_SLAB_FREELIST_HARDENED=y CONFIG_SHUFFLE_PAGE_ALLOCATOR=y CONFIG_PROFILING=y +# CONFIG_ZONE_DMA is not set CONFIG_SMP=y CONFIG_X86_X2APIC=y CONFIG_HYPERVISOR_GUEST=y @@ -75,7 +76,10 @@ CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODVERSIONS=y CONFIG_MODULE_SCMVERSION=y +CONFIG_MODULE_SIG=y +CONFIG_MODULE_SIG_PROTECT=y CONFIG_BLK_CGROUP_IOCOST=y +CONFIG_BLK_CGROUP_IOPRIO=y CONFIG_BLK_INLINE_ENCRYPTION=y CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK=y CONFIG_IOSCHED_BFQ=y @@ -94,6 +98,11 @@ CONFIG_CMA_DEBUGFS=y CONFIG_CMA_SYSFS=y CONFIG_CMA_AREAS=16 CONFIG_READ_ONLY_THP_FOR_FS=y +CONFIG_ANON_VMA_NAME=y +CONFIG_LRU_GEN=y +CONFIG_DAMON=y +CONFIG_DAMON_PADDR=y +CONFIG_DAMON_RECLAIM=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y @@ -201,6 +210,7 @@ CONFIG_IP6_NF_RAW=y CONFIG_TIPC=y CONFIG_L2TP=y CONFIG_BRIDGE=y +CONFIG_VLAN_8021Q=y CONFIG_6LOWPAN=y CONFIG_IEEE802154=y CONFIG_IEEE802154_6LOWPAN=y @@ -235,6 +245,7 @@ CONFIG_NET_ACT_POLICE=y CONFIG_NET_ACT_GACT=y CONFIG_NET_ACT_MIRRED=y CONFIG_NET_ACT_SKBEDIT=y +CONFIG_NET_ACT_BPF=y CONFIG_VSOCKETS=y CONFIG_CGROUP_NET_PRIO=y CONFIG_BPF_JIT=y @@ -295,6 +306,7 @@ CONFIG_NETDEVICES=y CONFIG_DUMMY=y CONFIG_WIREGUARD=y CONFIG_IFB=y +CONFIG_MACSEC=y CONFIG_TUN=y CONFIG_VETH=y CONFIG_PPP=y @@ -306,7 +318,6 @@ CONFIG_PPPOL2TP=y CONFIG_USB_RTL8150=y CONFIG_USB_RTL8152=y CONFIG_USB_USBNET=y -# CONFIG_USB_NET_AX88179_178A is not set CONFIG_USB_NET_CDC_EEM=y # CONFIG_USB_NET_NET1080 is not set # CONFIG_USB_NET_CDC_SUBSET is not set @@ -343,6 +354,7 @@ CONFIG_INPUT_UINPUT=y CONFIG_SERIAL_8250=y # CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250_RUNTIME_UARTS=0 CONFIG_SERIAL_OF_PLATFORM=y CONFIG_SERIAL_SAMSUNG=y CONFIG_SERIAL_SAMSUNG_CONSOLE=y @@ -351,6 +363,7 @@ CONFIG_HW_RANDOM=y # CONFIG_DEVMEM is not set # CONFIG_DEVPORT is not set CONFIG_HPET=y +CONFIG_RANDOM_TRUST_CPU=y # CONFIG_I2C_COMPAT is not set # CONFIG_I2C_HELPER_AUTO is not set CONFIG_I3C=y @@ -441,7 +454,6 @@ CONFIG_USB_CONFIGFS_SERIAL=y CONFIG_USB_CONFIGFS_ACM=y CONFIG_USB_CONFIGFS_NCM=y CONFIG_USB_CONFIGFS_ECM=y -CONFIG_USB_CONFIGFS_RNDIS=y CONFIG_USB_CONFIGFS_EEM=y CONFIG_USB_CONFIGFS_MASS_STORAGE=y CONFIG_USB_CONFIGFS_F_FS=y @@ -497,6 +509,7 @@ CONFIG_EXT4_FS_SECURITY=y CONFIG_F2FS_FS=y CONFIG_F2FS_FS_SECURITY=y CONFIG_F2FS_FS_COMPRESSION=y +CONFIG_F2FS_UNFAIR_RWSEM=y CONFIG_FS_ENCRYPTION=y CONFIG_FS_ENCRYPTION_INLINE_CRYPT=y CONFIG_FS_VERITY=y @@ -506,6 +519,7 @@ CONFIG_QUOTA=y CONFIG_QFMT_V2=y CONFIG_FUSE_FS=y CONFIG_VIRTIO_FS=y +CONFIG_FUSE_BPF=y CONFIG_OVERLAY_FS=y CONFIG_INCREMENTAL_FS=y CONFIG_MSDOS_FS=y @@ -574,7 +588,6 @@ CONFIG_SECURITYFS=y CONFIG_SECURITY_NETWORK=y CONFIG_HARDENED_USERCOPY=y # CONFIG_HARDENED_USERCOPY_FALLBACK is not set -CONFIG_FORTIFY_SOURCE=y CONFIG_STATIC_USERMODEHELPER=y CONFIG_STATIC_USERMODEHELPER_PATH="" CONFIG_SECURITY_SELINUX=y @@ -599,6 +612,7 @@ CONFIG_PRINTK_TIME=y CONFIG_DYNAMIC_DEBUG_CORE=y CONFIG_DEBUG_INFO=y CONFIG_DEBUG_INFO_DWARF4=y +CONFIG_DEBUG_INFO_BTF=y # CONFIG_ENABLE_MUST_CHECK is not set CONFIG_HEADERS_INSTALL=y # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set @@ -615,9 +629,10 @@ CONFIG_KFENCE_SAMPLE_INTERVAL=500 CONFIG_KFENCE_NUM_OBJECTS=63 CONFIG_PANIC_ON_OOPS=y CONFIG_PANIC_TIMEOUT=-1 -CONFIG_DETECT_HUNG_TASK=y +CONFIG_SOFTLOCKUP_DETECTOR=y CONFIG_WQ_WATCHDOG=y CONFIG_SCHEDSTATS=y CONFIG_BUG_ON_DATA_CORRUPTION=y CONFIG_TRACEFS_DISABLE_AUTOMOUNT=y +CONFIG_HIST_TRIGGERS=y CONFIG_UNWINDER_FRAME_POINTER=y diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index 78210793d357cf027b8a44419ec3b9447bdad98a..38d7acb9610cc46ae24f40c9d59e8ae24e2aac74 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig @@ -264,3 +264,4 @@ CONFIG_BLK_DEV_IO_TRACE=y CONFIG_PROVIDE_OHCI1394_DMA_INIT=y CONFIG_EARLY_PRINTK_DBGP=y CONFIG_DEBUG_BOOT_PARAMS=y +CONFIG_KALLSYMS_ALL=y diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index 9936528e19393afad0f87a89196e43dd2c52e171..c6e587a9a6f850b9b4539652565a8b4e70cc9c50 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig @@ -260,3 +260,4 @@ CONFIG_BLK_DEV_IO_TRACE=y CONFIG_PROVIDE_OHCI1394_DMA_INIT=y CONFIG_EARLY_PRINTK_DBGP=y CONFIG_DEBUG_BOOT_PARAMS=y +CONFIG_KALLSYMS_ALL=y diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index f18f3932e971af15817e62979335aaab01765c0e..a24ce5905ab8291687a81ea798e7e1a1c90ac6c4 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -575,6 +575,10 @@ SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL) ud2 1: #endif +#ifdef CONFIG_XEN_PV + ALTERNATIVE "", "jmp xenpv_restore_regs_and_return_to_usermode", X86_FEATURE_XENPV +#endif + POP_REGS pop_rdi=0 /* @@ -669,7 +673,7 @@ native_irq_return_ldt: */ pushq %rdi /* Stash user RDI */ - SWAPGS /* to kernel GS */ + swapgs /* to kernel GS */ SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi /* to kernel CR3 */ movq PER_CPU_VAR(espfix_waddr), %rdi @@ -699,7 +703,7 @@ native_irq_return_ldt: orq PER_CPU_VAR(espfix_stack), %rax SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi - SWAPGS /* to user GS */ + swapgs /* to user GS */ popq %rdi /* Restore user RDI */ movq %rax, %rsp @@ -932,6 +936,7 @@ SYM_CODE_START_LOCAL(paranoid_entry) .Lparanoid_entry_checkgs: /* EBX = 1 -> kernel GSBASE active, no restore required */ movl $1, %ebx + /* * The kernel-enforced convention is a negative GSBASE indicates * a kernel value. No SWAPGS needed on entry and exit. @@ -939,21 +944,14 @@ SYM_CODE_START_LOCAL(paranoid_entry) movl $MSR_GS_BASE, %ecx rdmsr testl %edx, %edx - jns .Lparanoid_entry_swapgs - ret - -.Lparanoid_entry_swapgs: - SWAPGS - - /* - * The above SAVE_AND_SWITCH_TO_KERNEL_CR3 macro doesn't do an - * unconditional CR3 write, even in the PTI case. So do an lfence - * to prevent GS speculation, regardless of whether PTI is enabled. - */ - FENCE_SWAPGS_KERNEL_ENTRY + js .Lparanoid_kernel_gsbase /* EBX = 0 -> SWAPGS required on exit */ xorl %ebx, %ebx + swapgs +.Lparanoid_kernel_gsbase: + + FENCE_SWAPGS_KERNEL_ENTRY ret SYM_CODE_END(paranoid_entry) @@ -1001,7 +999,7 @@ SYM_CODE_START_LOCAL(paranoid_exit) jnz restore_regs_and_return_to_kernel /* We are returning to a context with user GSBASE */ - SWAPGS_UNSAFE_STACK + swapgs jmp restore_regs_and_return_to_kernel SYM_CODE_END(paranoid_exit) @@ -1035,11 +1033,6 @@ SYM_CODE_START_LOCAL(error_entry) pushq %r12 ret -.Lerror_entry_done_lfence: - FENCE_SWAPGS_KERNEL_ENTRY -.Lerror_entry_done: - ret - /* * There are two places in the kernel that can potentially fault with * usergs. Handle them here. B stepping K8s sometimes report a @@ -1062,8 +1055,14 @@ SYM_CODE_START_LOCAL(error_entry) * .Lgs_change's error handler with kernel gsbase. */ SWAPGS - FENCE_SWAPGS_USER_ENTRY - jmp .Lerror_entry_done + + /* + * Issue an LFENCE to prevent GS speculation, regardless of whether it is a + * kernel or user gsbase. + */ +.Lerror_entry_done_lfence: + FENCE_SWAPGS_KERNEL_ENTRY + ret .Lbstep_iret: /* Fix truncated RIP */ @@ -1426,7 +1425,7 @@ nmi_no_fsgsbase: jnz nmi_restore nmi_swapgs: - SWAPGS_UNSAFE_STACK + swapgs nmi_restore: POP_REGS diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 0d0667a9fbd70aeb9e297342b9f855a457d45ea4..5a6c85f13d9986abda0364155758c7df05aae16d 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -445,3 +445,4 @@ 438 i386 pidfd_getfd sys_pidfd_getfd 439 i386 faccessat2 sys_faccessat2 440 i386 process_madvise sys_process_madvise +448 i386 process_mrelease sys_process_mrelease diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index 379819244b91d275b2c98f7ff38a59049c24b42a..d31c21e3408df778b2c3d1db5decefb7cd5c4dc6 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -362,6 +362,7 @@ 438 common pidfd_getfd sys_pidfd_getfd 439 common faccessat2 sys_faccessat2 440 common process_madvise sys_process_madvise +448 common process_mrelease sys_process_mrelease # # Due to a historical design error, certain syscalls are numbered differently diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index e6db1a1f22d7d60d7e8e81abce3b12d5be7da997..b79b9f21cbb3be91ecd35838c2ef2b704b21a780 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -2284,6 +2284,7 @@ static int x86_pmu_event_init(struct perf_event *event) if (err) { if (event->destroy) event->destroy(event); + event->destroy = NULL; } if (READ_ONCE(x86_pmu.attr_rdpmc) && @@ -2544,10 +2545,11 @@ static bool perf_hw_regs(struct pt_regs *regs) void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { + struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs(); struct unwind_state state; unsigned long addr; - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + if (guest_cbs && guest_cbs->is_in_guest()) { /* TODO: We don't support guest os callchain now */ return; } @@ -2647,10 +2649,11 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry_ctx *ent void perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { + struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs(); struct stack_frame frame; const struct stack_frame __user *fp; - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + if (guest_cbs && guest_cbs->is_in_guest()) { /* TODO: We don't support guest os callchain now */ return; } @@ -2727,18 +2730,21 @@ static unsigned long code_segment_base(struct pt_regs *regs) unsigned long perf_instruction_pointer(struct pt_regs *regs) { - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) - return perf_guest_cbs->get_guest_ip(); + struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs(); + + if (guest_cbs && guest_cbs->is_in_guest()) + return guest_cbs->get_guest_ip(); return regs->ip + code_segment_base(regs); } unsigned long perf_misc_flags(struct pt_regs *regs) { + struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs(); int misc = 0; - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { - if (perf_guest_cbs->is_user_mode()) + if (guest_cbs && guest_cbs->is_in_guest()) { + if (guest_cbs->is_user_mode()) misc |= PERF_RECORD_MISC_GUEST_USER; else misc |= PERF_RECORD_MISC_GUEST_KERNEL; diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 3b8b8eede1a8aa7862209f3f216fb2aa53cf6d16..5ba13b00e3a714108b8dca51b5ac106d9e7298bc 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -263,6 +263,7 @@ static struct event_constraint intel_icl_event_constraints[] = { INTEL_EVENT_CONSTRAINT_RANGE(0xa8, 0xb0, 0xf), INTEL_EVENT_CONSTRAINT_RANGE(0xb7, 0xbd, 0xf), INTEL_EVENT_CONSTRAINT_RANGE(0xd0, 0xe6, 0xf), + INTEL_EVENT_CONSTRAINT(0xef, 0xf), INTEL_EVENT_CONSTRAINT_RANGE(0xf0, 0xf4, 0xf), EVENT_CONSTRAINT_END }; @@ -2585,6 +2586,7 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status) { struct perf_sample_data data; struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); + struct perf_guest_info_callbacks *guest_cbs; int bit; int handled = 0; @@ -2650,9 +2652,11 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status) */ if (__test_and_clear_bit(GLOBAL_STATUS_TRACE_TOPAPMI_BIT, (unsigned long *)&status)) { handled++; - if (unlikely(perf_guest_cbs && perf_guest_cbs->is_in_guest() && - perf_guest_cbs->handle_intel_pt_intr)) - perf_guest_cbs->handle_intel_pt_intr(); + + guest_cbs = perf_get_guest_cbs(); + if (unlikely(guest_cbs && guest_cbs->is_in_guest() && + guest_cbs->handle_intel_pt_intr)) + guest_cbs->handle_intel_pt_intr(); else intel_pt_interrupt(); } @@ -2878,8 +2882,10 @@ intel_vlbr_constraints(struct perf_event *event) { struct event_constraint *c = &vlbr_constraint; - if (unlikely(constraint_match(c, event->hw.config))) + if (unlikely(constraint_match(c, event->hw.config))) { + event->hw.flags |= c->flags; return c; + } return NULL; } @@ -4347,6 +4353,19 @@ static __initconst const struct x86_pmu intel_pmu = { .lbr_read = intel_pmu_lbr_read_64, .lbr_save = intel_pmu_lbr_save, .lbr_restore = intel_pmu_lbr_restore, + + /* + * SMM has access to all 4 rings and while traditionally SMM code only + * ran in CPL0, 2021-era firmware is starting to make use of CPL3 in SMM. + * + * Since the EVENTSEL.{USR,OS} CPL filtering makes no distinction + * between SMM or not, this results in what should be pure userspace + * counters including SMM data. + * + * This is a clear privilege issue, therefore globally disable + * counting SMM by default. + */ + .attr_freeze_on_smi = 1, }; static __init void intel_clovertown_quirk(void) diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index 9c1a013d5682297885704ee425127db6d464b741..bd8516e6c353c75c524df1b8ed84733c61407890 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -1734,6 +1734,9 @@ static bool is_arch_lbr_xsave_available(void) * Check the LBR state with the corresponding software structure. * Disable LBR XSAVES support if the size doesn't match. */ + if (xfeature_size(XFEATURE_LBR) == 0) + return false; + if (WARN_ON(xfeature_size(XFEATURE_LBR) != get_lbr_state_size())) return false; diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c index 37129b76135a14b55a9105da3c5cedcb06a20f67..c084899e9582546d96cb49997e936ef1a8d89939 100644 --- a/arch/x86/events/intel/pt.c +++ b/arch/x86/events/intel/pt.c @@ -897,8 +897,9 @@ static void pt_handle_status(struct pt *pt) * means we are already losing data; need to let the decoder * know. */ - if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries) || - buf->output_off == pt_buffer_region_size(buf)) { + if (!buf->single && + (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries) || + buf->output_off == pt_buffer_region_size(buf))) { perf_aux_output_flag(&pt->handle, PERF_AUX_FLAG_TRUNCATED); advance++; diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 2701f87a9a7c612f48df7eeea9a6fc1da4d678a1..03c8047bebb38f83cf83f07ce412f5053e1eba14 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -444,7 +444,7 @@ #define ICX_M3UPI_PCI_PMON_BOX_CTL 0xa0 /* ICX IMC */ -#define ICX_NUMBER_IMC_CHN 2 +#define ICX_NUMBER_IMC_CHN 3 #define ICX_IMC_MEM_STRIDE 0x4 DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7"); @@ -3545,6 +3545,9 @@ static int skx_cha_hw_config(struct intel_uncore_box *box, struct perf_event *ev struct hw_perf_event_extra *reg1 = &event->hw.extra_reg; struct extra_reg *er; int idx = 0; + /* Any of the CHA events may be filtered by Thread/Core-ID.*/ + if (event->hw.config & SNBEP_CBO_PMON_CTL_TID_EN) + idx = SKX_CHA_MSR_PMON_BOX_FILTER_TID; for (er = skx_uncore_cha_extra_regs; er->msr; er++) { if (er->event != (event->hw.config & er->config_mask)) @@ -3612,6 +3615,7 @@ static struct event_constraint skx_uncore_iio_constraints[] = { UNCORE_EVENT_CONSTRAINT(0xc0, 0xc), UNCORE_EVENT_CONSTRAINT(0xc5, 0xc), UNCORE_EVENT_CONSTRAINT(0xd4, 0xc), + UNCORE_EVENT_CONSTRAINT(0xd5, 0xc), EVENT_CONSTRAINT_END }; @@ -4898,8 +4902,10 @@ static struct event_constraint icx_uncore_iio_constraints[] = { UNCORE_EVENT_CONSTRAINT(0x02, 0x3), UNCORE_EVENT_CONSTRAINT(0x03, 0x3), UNCORE_EVENT_CONSTRAINT(0x83, 0x3), + UNCORE_EVENT_CONSTRAINT(0x88, 0xc), UNCORE_EVENT_CONSTRAINT(0xc0, 0xc), UNCORE_EVENT_CONSTRAINT(0xc5, 0xc), + UNCORE_EVENT_CONSTRAINT(0xd5, 0xc), EVENT_CONSTRAINT_END }; @@ -5228,12 +5234,12 @@ static struct intel_uncore_ops icx_uncore_mmio_ops = { static struct intel_uncore_type icx_uncore_imc = { .name = "imc", .num_counters = 4, - .num_boxes = 8, + .num_boxes = 12, .perf_ctr_bits = 48, .fixed_ctr_bits = 48, .fixed_ctr = SNR_IMC_MMIO_PMON_FIXED_CTR, .fixed_ctl = SNR_IMC_MMIO_PMON_FIXED_CTL, - .event_descs = hswep_uncore_imc_events, + .event_descs = snr_uncore_imc_events, .perf_ctr = SNR_IMC_MMIO_PMON_CTR0, .event_ctl = SNR_IMC_MMIO_PMON_CTL0, .event_mask = SNBEP_PMON_RAW_EVENT_MASK, diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c index 4be8f9cabd07055515b7e51e4878bfb1ae0a2df7..ca8ce64df2ceb8000cad5b8c8529ed6c751ee319 100644 --- a/arch/x86/events/msr.c +++ b/arch/x86/events/msr.c @@ -68,6 +68,7 @@ static bool test_intel(int idx, void *data) case INTEL_FAM6_BROADWELL_D: case INTEL_FAM6_BROADWELL_G: case INTEL_FAM6_BROADWELL_X: + case INTEL_FAM6_SAPPHIRERAPIDS_X: case INTEL_FAM6_ATOM_SILVERMONT: case INTEL_FAM6_ATOM_SILVERMONT_D: diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 6375967a8244dc4e6777d8ff95de1be33ea77e06..01860c0d324d75ed99c24ccbf3161a5a685bd9c9 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -168,7 +168,6 @@ void set_hv_tscchange_cb(void (*cb)(void)) struct hv_reenlightenment_control re_ctrl = { .vector = HYPERV_REENLIGHTENMENT_VECTOR, .enabled = 1, - .target_vp = hv_vp_index[smp_processor_id()] }; struct hv_tsc_emulation_control emu_ctrl = {.enabled = 1}; @@ -177,13 +176,20 @@ void set_hv_tscchange_cb(void (*cb)(void)) return; } + if (!hv_vp_index) + return; + hv_reenlightenment_cb = cb; /* Make sure callback is registered before we write to MSRs */ wmb(); + re_ctrl.target_vp = hv_vp_index[get_cpu()]; + wrmsrl(HV_X64_MSR_REENLIGHTENMENT_CONTROL, *((u64 *)&re_ctrl)); wrmsrl(HV_X64_MSR_TSC_EMULATION_CONTROL, *((u64 *)&emu_ctrl)); + + put_cpu(); } EXPORT_SYMBOL_GPL(set_hv_tscchange_cb); diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h index 3d52b094850a90acf22c3b6953998b2db61f9d53..dd5ea1bdf04c5fae8c4fc9af3aa592533055bca5 100644 --- a/arch/x86/include/asm/cpu_entry_area.h +++ b/arch/x86/include/asm/cpu_entry_area.h @@ -10,6 +10,12 @@ #ifdef CONFIG_X86_64 +#ifdef CONFIG_AMD_MEM_ENCRYPT +#define VC_EXCEPTION_STKSZ EXCEPTION_STKSZ +#else +#define VC_EXCEPTION_STKSZ 0 +#endif + /* Macro to enforce the same ordering and stack sizes */ #define ESTACKS_MEMBERS(guardsize, optional_stack_size) \ char DF_stack_guard[guardsize]; \ @@ -28,7 +34,7 @@ /* The exception stacks' physical storage. No guard pages required */ struct exception_stacks { - ESTACKS_MEMBERS(0, 0) + ESTACKS_MEMBERS(0, VC_EXCEPTION_STKSZ) }; /* The effective cpu entry area mapping with guard pages. */ diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index dad350d42ecfbf3063c64138375e81591729b092..3b407f46f1a0d4d0ba60360dc8081de21294dd50 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -204,7 +204,7 @@ #define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */ #define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */ #define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ -#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */ +#define X86_FEATURE_RETPOLINE_LFENCE ( 7*32+13) /* "" Use LFENCE for Spectre variant 2 */ #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ #define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */ #define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */ diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h index 6fe54b2813c13960786595db35b7b1f2b0c0dba1..4a382fb6a9ef88b4e2a7cc3efe2464837d3ec1c6 100644 --- a/arch/x86/include/asm/entry-common.h +++ b/arch/x86/include/asm/entry-common.h @@ -24,7 +24,7 @@ static __always_inline void arch_check_user_regs(struct pt_regs *regs) * For !SMAP hardware we patch out CLAC on entry. */ if (boot_cpu_has(X86_FEATURE_SMAP) || - (IS_ENABLED(CONFIG_64_BIT) && boot_cpu_has(X86_FEATURE_XENPV))) + (IS_ENABLED(CONFIG_64BIT) && boot_cpu_has(X86_FEATURE_XENPV))) mask |= X86_EFLAGS_AC; WARN_ON_ONCE(flags & mask); diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 4e5af2b00d89ba9651d3e5a84ef754ce96b68ce1..70b9bc5403c5e175081d3ba7c351af1b7ffa78d5 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -531,9 +531,11 @@ static inline void __fpregs_load_activate(void) * The FPU context is only stored/restored for a user task and * PF_KTHREAD is used to distinguish between kernel and user threads. */ -static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu) +static inline void switch_fpu_prepare(struct task_struct *prev, int cpu) { - if (static_cpu_has(X86_FEATURE_FPU) && !(current->flags & PF_KTHREAD)) { + struct fpu *old_fpu = &prev->thread.fpu; + + if (static_cpu_has(X86_FEATURE_FPU) && !(prev->flags & PF_KTHREAD)) { if (!copy_fpregs_to_fpstate(old_fpu)) old_fpu->last_cpu = -1; else @@ -552,10 +554,11 @@ static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu) * Load PKRU from the FPU context if available. Delay loading of the * complete FPU state until the return to userland. */ -static inline void switch_fpu_finish(struct fpu *new_fpu) +static inline void switch_fpu_finish(struct task_struct *next) { u32 pkru_val = init_pkru_value; struct pkru_state *pk; + struct fpu *next_fpu = &next->thread.fpu; if (!static_cpu_has(X86_FEATURE_FPU)) return; @@ -569,7 +572,7 @@ static inline void switch_fpu_finish(struct fpu *new_fpu) * PKRU state is switched eagerly because it needs to be valid before we * return to userland e.g. for a copy_to_user() operation. */ - if (!(current->flags & PF_KTHREAD)) { + if (!(next->flags & PF_KTHREAD)) { /* * If the PKRU bit in xsave.header.xfeatures is not set, * then the PKRU component was in init state, which means @@ -578,7 +581,7 @@ static inline void switch_fpu_finish(struct fpu *new_fpu) * in memory is not valid. This means pkru_val has to be * set to 0 and not to init_pkru_value. */ - pk = get_xsave_addr(&new_fpu->state.xsave, XFEATURE_PKRU); + pk = get_xsave_addr(&next_fpu->state.xsave, XFEATURE_PKRU); pkru_val = pk ? pk->pkru : 0; } __write_pkru(pkru_val); diff --git a/arch/x86/include/asm/insn-eval.h b/arch/x86/include/asm/insn-eval.h index 98b4dae5e8bc8a93398682080ab180982b33280f..c1438f9239e46ce5f22f1792e24c1014647b99fc 100644 --- a/arch/x86/include/asm/insn-eval.h +++ b/arch/x86/include/asm/insn-eval.h @@ -21,6 +21,7 @@ int insn_get_modrm_rm_off(struct insn *insn, struct pt_regs *regs); int insn_get_modrm_reg_off(struct insn *insn, struct pt_regs *regs); unsigned long insn_get_seg_base(struct pt_regs *regs, int seg_reg_idx); int insn_get_code_seg_params(struct pt_regs *regs); +unsigned long insn_get_effective_ip(struct pt_regs *regs); int insn_fetch_from_user(struct pt_regs *regs, unsigned char buf[MAX_INSN_SIZE]); int insn_fetch_from_user_inatomic(struct pt_regs *regs, diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h index 2dfc8d380dab1143c29c215390ba37b1335e4bd7..8c86edefa11508ab3401ead3bcfd757b3834e015 100644 --- a/arch/x86/include/asm/irqflags.h +++ b/arch/x86/include/asm/irqflags.h @@ -131,18 +131,6 @@ static __always_inline unsigned long arch_local_irq_save(void) #define SAVE_FLAGS(x) pushfq; popq %rax #endif -#define SWAPGS swapgs -/* - * Currently paravirt can't handle swapgs nicely when we - * don't have a stack we can rely on (such as a user space - * stack). So we either find a way around these or just fault - * and emulate if a guest tries to call swapgs directly. - * - * Either way, this is a good way to document that we don't - * have a reliable stack. x86_64 only. - */ -#define SWAPGS_UNSAFE_STACK swapgs - #define INTERRUPT_RETURN jmp native_iret #define USERGS_SYSRET64 \ swapgs; \ @@ -170,6 +158,14 @@ static __always_inline int arch_irqs_disabled(void) return arch_irqs_disabled_flags(flags); } +#else +#ifdef CONFIG_X86_64 +#ifdef CONFIG_XEN_PV +#define SWAPGS ALTERNATIVE "swapgs", "", X86_FEATURE_XENPV +#else +#define SWAPGS swapgs +#endif +#endif #endif /* !__ASSEMBLY__ */ #endif diff --git a/arch/x86/include/asm/kfence.h b/arch/x86/include/asm/kfence.h index 97bbb4a9083a76cce625d5255d2e25ce86aeb2a5..05b48b33baf0b811b2788750d4539f18c9bd2255 100644 --- a/arch/x86/include/asm/kfence.h +++ b/arch/x86/include/asm/kfence.h @@ -56,8 +56,13 @@ static inline bool kfence_protect_page(unsigned long addr, bool protect) else set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT)); - /* Flush this CPU's TLB. */ + /* + * Flush this CPU's TLB, assuming whoever did the allocation/free is + * likely to continue running on this CPU. + */ + preempt_disable(); flush_tlb_one_kernel(addr); + preempt_enable(); return true; } diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index b1cd8334db11a7789465f31aeb4e4a9d6ee3cdee..0eb41dce55da392b79a168d398943bdbc6614570 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -85,7 +85,7 @@ KVM_ARCH_REQ_FLAGS(25, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_TLB_FLUSH_CURRENT KVM_ARCH_REQ(26) #define KVM_REQ_TLB_FLUSH_GUEST \ - KVM_ARCH_REQ_FLAGS(27, KVM_REQUEST_NO_WAKEUP) + KVM_ARCH_REQ_FLAGS(27, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_APF_READY KVM_ARCH_REQ(28) #define KVM_REQ_MSR_FILTER_CHANGED KVM_ARCH_REQ(29) @@ -1285,6 +1285,7 @@ struct kvm_x86_ops { }; struct kvm_x86_nested_ops { + void (*leave_nested)(struct kvm_vcpu *vcpu); int (*check_events)(struct kvm_vcpu *vcpu); bool (*hv_timer_pending)(struct kvm_vcpu *vcpu); int (*get_state)(struct kvm_vcpu *vcpu, @@ -1306,6 +1307,7 @@ struct kvm_x86_init_ops { int (*disabled_by_bios)(void); int (*check_processor_compatibility)(void); int (*hardware_setup)(void); + bool (*intel_pt_intr_in_guest)(void); struct kvm_x86_ops *runtime_ops; }; diff --git a/arch/x86/include/asm/kvm_page_track.h b/arch/x86/include/asm/kvm_page_track.h index 87bd6025d91d4bb22df73bd821072904b337a5f3..6a5f3acf2b331291be6b5a2a1600361b9b2f2393 100644 --- a/arch/x86/include/asm/kvm_page_track.h +++ b/arch/x86/include/asm/kvm_page_track.h @@ -46,7 +46,7 @@ struct kvm_page_track_notifier_node { struct kvm_page_track_notifier_node *node); }; -void kvm_page_track_init(struct kvm *kvm); +int kvm_page_track_init(struct kvm *kvm); void kvm_page_track_cleanup(struct kvm *kvm); void kvm_page_track_free_memslot(struct kvm_memory_slot *slot); diff --git a/arch/x86/include/asm/kvmclock.h b/arch/x86/include/asm/kvmclock.h index eceea929909740613471b74567d8e24413b57339..6c576519210280483b93f67d2101aa4f1d26445d 100644 --- a/arch/x86/include/asm/kvmclock.h +++ b/arch/x86/include/asm/kvmclock.h @@ -2,6 +2,20 @@ #ifndef _ASM_X86_KVM_CLOCK_H #define _ASM_X86_KVM_CLOCK_H +#include + extern struct clocksource kvm_clock; +DECLARE_PER_CPU(struct pvclock_vsyscall_time_info *, hv_clock_per_cpu); + +static inline struct pvclock_vcpu_time_info *this_cpu_pvti(void) +{ + return &this_cpu_read(hv_clock_per_cpu)->pvti; +} + +static inline struct pvclock_vsyscall_time_info *this_cpu_hvclock(void) +{ + return this_cpu_read(hv_clock_per_cpu); +} + #endif /* _ASM_X86_KVM_CLOCK_H */ diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h index 31c4df123aa0f4f51ae822c0d8438c2cd63cf79f..4de031ca00105cfdee41f26289dafe6061d53e02 100644 --- a/arch/x86/include/asm/mem_encrypt.h +++ b/arch/x86/include/asm/mem_encrypt.h @@ -13,6 +13,7 @@ #ifndef __ASSEMBLY__ #include +#include #include diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index cb9ad6b739737e4e36f47f3e93e0604f5a4c7713..4d0f5386e637ba09361f36abddc7fb8ac0fe3bde 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -82,7 +82,7 @@ #ifdef CONFIG_RETPOLINE ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \ __stringify(jmp __x86_retpoline_\reg), X86_FEATURE_RETPOLINE, \ - __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), X86_FEATURE_RETPOLINE_AMD + __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), X86_FEATURE_RETPOLINE_LFENCE #else jmp *%\reg #endif @@ -92,7 +92,7 @@ #ifdef CONFIG_RETPOLINE ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; call *%\reg), \ __stringify(call __x86_retpoline_\reg), X86_FEATURE_RETPOLINE, \ - __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *%\reg), X86_FEATURE_RETPOLINE_AMD + __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *%\reg), X86_FEATURE_RETPOLINE_LFENCE #else call *%\reg #endif @@ -134,7 +134,7 @@ "lfence;\n" \ ANNOTATE_RETPOLINE_SAFE \ "call *%[thunk_target]\n", \ - X86_FEATURE_RETPOLINE_AMD) + X86_FEATURE_RETPOLINE_LFENCE) # define THUNK_TARGET(addr) [thunk_target] "r" (addr) @@ -164,7 +164,7 @@ "lfence;\n" \ ANNOTATE_RETPOLINE_SAFE \ "call *%[thunk_target]\n", \ - X86_FEATURE_RETPOLINE_AMD) + X86_FEATURE_RETPOLINE_LFENCE) # define THUNK_TARGET(addr) [thunk_target] "rm" (addr) #endif @@ -176,9 +176,11 @@ /* The Spectre V2 mitigation variants */ enum spectre_v2_mitigation { SPECTRE_V2_NONE, - SPECTRE_V2_RETPOLINE_GENERIC, - SPECTRE_V2_RETPOLINE_AMD, - SPECTRE_V2_IBRS_ENHANCED, + SPECTRE_V2_RETPOLINE, + SPECTRE_V2_LFENCE, + SPECTRE_V2_EIBRS, + SPECTRE_V2_EIBRS_RETPOLINE, + SPECTRE_V2_EIBRS_LFENCE, }; /* The indirect branch speculation control variants */ diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index 3f49dac03617df13574dc7757e5dbbc298fa7f72..224d71aef63034118e5e5c8ed6a2f99c966d87c9 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h @@ -15,7 +15,7 @@ #define THREAD_SIZE_ORDER (2 + KASAN_STACK_ORDER) #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) -#define EXCEPTION_STACK_ORDER (0 + KASAN_STACK_ORDER) +#define EXCEPTION_STACK_ORDER (1 + KASAN_STACK_ORDER) #define EXCEPTION_STKSZ (PAGE_SIZE << EXCEPTION_STACK_ORDER) #define IRQ_STACK_ORDER (2 + KASAN_STACK_ORDER) diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index d25cc6830e895b3142f69684e5758fce2ef84a90..5647bcdba776e49f6c3744255a06d8c22c194a22 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -776,26 +776,6 @@ extern void default_banner(void); #ifdef CONFIG_X86_64 #ifdef CONFIG_PARAVIRT_XXL -/* - * If swapgs is used while the userspace stack is still current, - * there's no way to call a pvop. The PV replacement *must* be - * inlined, or the swapgs instruction must be trapped and emulated. - */ -#define SWAPGS_UNSAFE_STACK \ - PARA_SITE(PARA_PATCH(PV_CPU_swapgs), swapgs) - -/* - * Note: swapgs is very special, and in practise is either going to be - * implemented with a single "swapgs" instruction or something very - * special. Either way, we don't need to save any registers for - * it. - */ -#define SWAPGS \ - PARA_SITE(PARA_PATCH(PV_CPU_swapgs), \ - ANNOTATE_RETPOLINE_SAFE; \ - call PARA_INDIRECT(pv_ops+PV_CPU_swapgs); \ - ) - #define USERGS_SYSRET64 \ PARA_SITE(PARA_PATCH(PV_CPU_usergs_sysret64), \ ANNOTATE_RETPOLINE_SAFE; \ diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 0fad9f61c76ab8dfb78a022cc119867df40eb254..903d71884fa25dca7bf3fe4ef70d3be278c34abb 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -169,8 +169,6 @@ struct pv_cpu_ops { frame set up. */ void (*iret)(void); - void (*swapgs)(void); - void (*start_context_switch)(struct task_struct *prev); void (*end_context_switch)(struct task_struct *next); #endif diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index a02c67291cfcbdf69048030323e1c4d4f2aee907..ed09aca6203ad27de6d887b385119c07c8436e99 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -846,7 +846,8 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd) static inline int pmd_bad(pmd_t pmd) { - return (pmd_flags(pmd) & ~_PAGE_USER) != _KERNPG_TABLE; + return (pmd_flags(pmd) & ~(_PAGE_USER | _PAGE_ACCESSED)) != + (_KERNPG_TABLE & ~_PAGE_ACCESSED); } static inline unsigned long pages_to_mb(unsigned long npg) @@ -1360,8 +1361,8 @@ static inline pmd_t pmd_swp_clear_uffd_wp(pmd_t pmd) } #endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */ -#define PKRU_AD_BIT 0x1 -#define PKRU_WD_BIT 0x2 +#define PKRU_AD_BIT 0x1u +#define PKRU_WD_BIT 0x2u #define PKRU_BITS_PER_PKEY 2 #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS @@ -1452,10 +1453,10 @@ static inline bool arch_has_pfn_modify_check(void) return boot_cpu_has_bug(X86_BUG_L1TF); } -#define arch_faults_on_old_pte arch_faults_on_old_pte -static inline bool arch_faults_on_old_pte(void) +#define arch_has_hw_pte_young arch_has_hw_pte_young +static inline bool arch_has_hw_pte_young(void) { - return false; + return true; } #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h index 69485ca13665f1722a919bbf1c8345cd25e8d8ae..a334dd0d7c42c58dc587da6105b9fa87562e19da 100644 --- a/arch/x86/include/asm/preempt.h +++ b/arch/x86/include/asm/preempt.h @@ -43,7 +43,7 @@ static __always_inline void preempt_count_set(int pc) #define init_task_preempt_count(p) do { } while (0) #define init_idle_preempt_count(p, cpu) do { \ - per_cpu(__preempt_count, (cpu)) = PREEMPT_ENABLED; \ + per_cpu(__preempt_count, (cpu)) = PREEMPT_DISABLED; \ } while (0) /* diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 50d02db723177c212ab614b9336df751fdfb893b..d428d611a43a97fa4ce48671c8ac0a89b73e7b55 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -534,6 +534,7 @@ struct thread_struct { */ unsigned long iopl_emul; + unsigned int iopl_warn:1; unsigned int sig_on_uaccess_err:1; /* Floating point and extended processor state */ diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h index 5db5d083c87322ec4913252d1c22752b85d517f4..331474b150f16964e1ba8788e6a623d0214aee62 100644 --- a/arch/x86/include/asm/realmode.h +++ b/arch/x86/include/asm/realmode.h @@ -89,6 +89,7 @@ static inline void set_real_mode_mem(phys_addr_t mem) } void reserve_real_mode(void); +void load_trampoline_pgtable(void); #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 5c95d242f38d708e313f39cc781fa8455fdd16ed..bb1430283c726c1fa5791b56ca88bee298200b94 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -314,11 +314,12 @@ do { \ do { \ __chk_user_ptr(ptr); \ switch (size) { \ - unsigned char x_u8__; \ - case 1: \ + case 1: { \ + unsigned char x_u8__; \ __get_user_asm(x_u8__, ptr, "b", "=q", label); \ (x) = x_u8__; \ break; \ + } \ case 2: \ __get_user_asm(x, ptr, "w", "=r", label); \ break; \ diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 68608bd892c0d56c6d9b257214268d0a7d519f23..c06f3a961d647f635d63a3c9e92607cf73153f85 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -21,6 +21,7 @@ CFLAGS_REMOVE_ftrace.o = -pg CFLAGS_REMOVE_early_printk.o = -pg CFLAGS_REMOVE_head64.o = -pg CFLAGS_REMOVE_sev-es.o = -pg +CFLAGS_REMOVE_cc_platform.o = -pg endif KASAN_SANITIZE_head$(BITS).o := n @@ -29,6 +30,7 @@ KASAN_SANITIZE_dumpstack_$(BITS).o := n KASAN_SANITIZE_stacktrace.o := n KASAN_SANITIZE_paravirt.o := n KASAN_SANITIZE_sev-es.o := n +KASAN_SANITIZE_cc_platform.o := n # With some compiler versions the generated code results in boot hangs, caused # by several compilation units. To be safe, disable all instrumentation. @@ -48,6 +50,7 @@ endif KCOV_INSTRUMENT := n CFLAGS_head$(BITS).o += -fno-stack-protector +CFLAGS_cc_platform.o += -fno-stack-protector CFLAGS_irq.o := -I $(srctree)/$(src)/../include/asm/trace @@ -151,6 +154,9 @@ obj-$(CONFIG_UNWINDER_FRAME_POINTER) += unwind_frame.o obj-$(CONFIG_UNWINDER_GUESS) += unwind_guess.o obj-$(CONFIG_AMD_MEM_ENCRYPT) += sev-es.o + +obj-$(CONFIG_ARCH_HAS_CC_PLATFORM) += cc_platform.o + ### # 64 bit specific files ifeq ($(CONFIG_X86_64),y) diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index 828be792231e9d935796b4178289303ce80f9e68..1354bc30614d7e9411167093c6df09d784cdd090 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -15,7 +15,6 @@ int main(void) #ifdef CONFIG_PARAVIRT_XXL OFFSET(PV_CPU_usergs_sysret64, paravirt_patch_template, cpu.usergs_sysret64); - OFFSET(PV_CPU_swapgs, paravirt_patch_template, cpu.swapgs); #ifdef CONFIG_DEBUG_ENTRY OFFSET(PV_IRQ_save_fl, paravirt_patch_template, irq.save_fl); #endif diff --git a/arch/x86/kernel/cc_platform.c b/arch/x86/kernel/cc_platform.c new file mode 100644 index 0000000000000000000000000000000000000000..03bb2f343ddb7d36f577dc116fc563535326377f --- /dev/null +++ b/arch/x86/kernel/cc_platform.c @@ -0,0 +1,69 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Confidential Computing Platform Capability checks + * + * Copyright (C) 2021 Advanced Micro Devices, Inc. + * + * Author: Tom Lendacky + */ + +#include +#include +#include + +#include + +static bool __maybe_unused intel_cc_platform_has(enum cc_attr attr) +{ +#ifdef CONFIG_INTEL_TDX_GUEST + return false; +#else + return false; +#endif +} + +/* + * SME and SEV are very similar but they are not the same, so there are + * times that the kernel will need to distinguish between SME and SEV. The + * cc_platform_has() function is used for this. When a distinction isn't + * needed, the CC_ATTR_MEM_ENCRYPT attribute can be used. + * + * The trampoline code is a good example for this requirement. Before + * paging is activated, SME will access all memory as decrypted, but SEV + * will access all memory as encrypted. So, when APs are being brought + * up under SME the trampoline area cannot be encrypted, whereas under SEV + * the trampoline area must be encrypted. + */ +static bool amd_cc_platform_has(enum cc_attr attr) +{ +#ifdef CONFIG_AMD_MEM_ENCRYPT + switch (attr) { + case CC_ATTR_MEM_ENCRYPT: + return sme_me_mask; + + case CC_ATTR_HOST_MEM_ENCRYPT: + return sme_me_mask && !(sev_status & MSR_AMD64_SEV_ENABLED); + + case CC_ATTR_GUEST_MEM_ENCRYPT: + return sev_status & MSR_AMD64_SEV_ENABLED; + + case CC_ATTR_GUEST_STATE_ENCRYPT: + return sev_status & MSR_AMD64_SEV_ES_ENABLED; + + default: + return false; + } +#else + return false; +#endif +} + + +bool cc_platform_has(enum cc_attr attr) +{ + if (sme_me_mask) + return amd_cc_platform_has(attr); + + return false; +} +EXPORT_SYMBOL_GPL(cc_platform_has); diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index a2551b10780c674d846d55a0c78922e42b6af70e..acea05eed27d4f35a6762dbf6cd15f4202aeffad 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -1017,6 +1017,8 @@ static void init_amd(struct cpuinfo_x86 *c) if (cpu_has(c, X86_FEATURE_IRPERF) && !cpu_has_amd_erratum(c, amd_erratum_1054)) msr_set_bit(MSR_K7_HWCR, MSR_K7_HWCR_IRPERF_EN_BIT); + + check_null_seg_clears_base(c); } #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index d41b70fe4918e3f8f9613b87e452d55d01984671..78b9514a3844051916aec16ba69d8a8fe1937417 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -613,6 +614,32 @@ static inline const char *spectre_v2_module_string(void) static inline const char *spectre_v2_module_string(void) { return ""; } #endif +#define SPECTRE_V2_LFENCE_MSG "WARNING: LFENCE mitigation is not recommended for this CPU, data leaks possible!\n" +#define SPECTRE_V2_EIBRS_EBPF_MSG "WARNING: Unprivileged eBPF is enabled with eIBRS on, data leaks possible via Spectre v2 BHB attacks!\n" +#define SPECTRE_V2_EIBRS_LFENCE_EBPF_SMT_MSG "WARNING: Unprivileged eBPF is enabled with eIBRS+LFENCE mitigation and SMT, data leaks possible via Spectre v2 BHB attacks!\n" + +#ifdef CONFIG_BPF_SYSCALL +void unpriv_ebpf_notify(int new_state) +{ + if (new_state) + return; + + /* Unprivileged eBPF is enabled */ + + switch (spectre_v2_enabled) { + case SPECTRE_V2_EIBRS: + pr_err(SPECTRE_V2_EIBRS_EBPF_MSG); + break; + case SPECTRE_V2_EIBRS_LFENCE: + if (sched_smt_active()) + pr_err(SPECTRE_V2_EIBRS_LFENCE_EBPF_SMT_MSG); + break; + default: + break; + } +} +#endif + static inline bool match_option(const char *arg, int arglen, const char *opt) { int len = strlen(opt); @@ -627,7 +654,10 @@ enum spectre_v2_mitigation_cmd { SPECTRE_V2_CMD_FORCE, SPECTRE_V2_CMD_RETPOLINE, SPECTRE_V2_CMD_RETPOLINE_GENERIC, - SPECTRE_V2_CMD_RETPOLINE_AMD, + SPECTRE_V2_CMD_RETPOLINE_LFENCE, + SPECTRE_V2_CMD_EIBRS, + SPECTRE_V2_CMD_EIBRS_RETPOLINE, + SPECTRE_V2_CMD_EIBRS_LFENCE, }; enum spectre_v2_user_cmd { @@ -700,6 +730,13 @@ spectre_v2_parse_user_cmdline(enum spectre_v2_mitigation_cmd v2_cmd) return SPECTRE_V2_USER_CMD_AUTO; } +static inline bool spectre_v2_in_eibrs_mode(enum spectre_v2_mitigation mode) +{ + return (mode == SPECTRE_V2_EIBRS || + mode == SPECTRE_V2_EIBRS_RETPOLINE || + mode == SPECTRE_V2_EIBRS_LFENCE); +} + static void __init spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) { @@ -767,7 +804,7 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) */ if (!boot_cpu_has(X86_FEATURE_STIBP) || !smt_possible || - spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) + spectre_v2_in_eibrs_mode(spectre_v2_enabled)) return; /* @@ -787,9 +824,11 @@ set_mode: static const char * const spectre_v2_strings[] = { [SPECTRE_V2_NONE] = "Vulnerable", - [SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline", - [SPECTRE_V2_RETPOLINE_AMD] = "Mitigation: Full AMD retpoline", - [SPECTRE_V2_IBRS_ENHANCED] = "Mitigation: Enhanced IBRS", + [SPECTRE_V2_RETPOLINE] = "Mitigation: Retpolines", + [SPECTRE_V2_LFENCE] = "Mitigation: LFENCE", + [SPECTRE_V2_EIBRS] = "Mitigation: Enhanced IBRS", + [SPECTRE_V2_EIBRS_LFENCE] = "Mitigation: Enhanced IBRS + LFENCE", + [SPECTRE_V2_EIBRS_RETPOLINE] = "Mitigation: Enhanced IBRS + Retpolines", }; static const struct { @@ -800,8 +839,12 @@ static const struct { { "off", SPECTRE_V2_CMD_NONE, false }, { "on", SPECTRE_V2_CMD_FORCE, true }, { "retpoline", SPECTRE_V2_CMD_RETPOLINE, false }, - { "retpoline,amd", SPECTRE_V2_CMD_RETPOLINE_AMD, false }, + { "retpoline,amd", SPECTRE_V2_CMD_RETPOLINE_LFENCE, false }, + { "retpoline,lfence", SPECTRE_V2_CMD_RETPOLINE_LFENCE, false }, { "retpoline,generic", SPECTRE_V2_CMD_RETPOLINE_GENERIC, false }, + { "eibrs", SPECTRE_V2_CMD_EIBRS, false }, + { "eibrs,lfence", SPECTRE_V2_CMD_EIBRS_LFENCE, false }, + { "eibrs,retpoline", SPECTRE_V2_CMD_EIBRS_RETPOLINE, false }, { "auto", SPECTRE_V2_CMD_AUTO, false }, }; @@ -838,17 +881,30 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) } if ((cmd == SPECTRE_V2_CMD_RETPOLINE || - cmd == SPECTRE_V2_CMD_RETPOLINE_AMD || - cmd == SPECTRE_V2_CMD_RETPOLINE_GENERIC) && + cmd == SPECTRE_V2_CMD_RETPOLINE_LFENCE || + cmd == SPECTRE_V2_CMD_RETPOLINE_GENERIC || + cmd == SPECTRE_V2_CMD_EIBRS_LFENCE || + cmd == SPECTRE_V2_CMD_EIBRS_RETPOLINE) && !IS_ENABLED(CONFIG_RETPOLINE)) { - pr_err("%s selected but not compiled in. Switching to AUTO select\n", mitigation_options[i].option); + pr_err("%s selected but not compiled in. Switching to AUTO select\n", + mitigation_options[i].option); return SPECTRE_V2_CMD_AUTO; } - if (cmd == SPECTRE_V2_CMD_RETPOLINE_AMD && - boot_cpu_data.x86_vendor != X86_VENDOR_HYGON && - boot_cpu_data.x86_vendor != X86_VENDOR_AMD) { - pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n"); + if ((cmd == SPECTRE_V2_CMD_EIBRS || + cmd == SPECTRE_V2_CMD_EIBRS_LFENCE || + cmd == SPECTRE_V2_CMD_EIBRS_RETPOLINE) && + !boot_cpu_has(X86_FEATURE_IBRS_ENHANCED)) { + pr_err("%s selected but CPU doesn't have eIBRS. Switching to AUTO select\n", + mitigation_options[i].option); + return SPECTRE_V2_CMD_AUTO; + } + + if ((cmd == SPECTRE_V2_CMD_RETPOLINE_LFENCE || + cmd == SPECTRE_V2_CMD_EIBRS_LFENCE) && + !boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) { + pr_err("%s selected, but CPU doesn't have a serializing LFENCE. Switching to AUTO select\n", + mitigation_options[i].option); return SPECTRE_V2_CMD_AUTO; } @@ -857,6 +913,16 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) return cmd; } +static enum spectre_v2_mitigation __init spectre_v2_select_retpoline(void) +{ + if (!IS_ENABLED(CONFIG_RETPOLINE)) { + pr_err("Kernel not compiled with retpoline; no mitigation available!"); + return SPECTRE_V2_NONE; + } + + return SPECTRE_V2_RETPOLINE; +} + static void __init spectre_v2_select_mitigation(void) { enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline(); @@ -877,49 +943,64 @@ static void __init spectre_v2_select_mitigation(void) case SPECTRE_V2_CMD_FORCE: case SPECTRE_V2_CMD_AUTO: if (boot_cpu_has(X86_FEATURE_IBRS_ENHANCED)) { - mode = SPECTRE_V2_IBRS_ENHANCED; - /* Force it so VMEXIT will restore correctly */ - x86_spec_ctrl_base |= SPEC_CTRL_IBRS; - wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); - goto specv2_set_mode; + mode = SPECTRE_V2_EIBRS; + break; } - if (IS_ENABLED(CONFIG_RETPOLINE)) - goto retpoline_auto; + + mode = spectre_v2_select_retpoline(); break; - case SPECTRE_V2_CMD_RETPOLINE_AMD: - if (IS_ENABLED(CONFIG_RETPOLINE)) - goto retpoline_amd; + + case SPECTRE_V2_CMD_RETPOLINE_LFENCE: + pr_err(SPECTRE_V2_LFENCE_MSG); + mode = SPECTRE_V2_LFENCE; break; + case SPECTRE_V2_CMD_RETPOLINE_GENERIC: - if (IS_ENABLED(CONFIG_RETPOLINE)) - goto retpoline_generic; + mode = SPECTRE_V2_RETPOLINE; break; + case SPECTRE_V2_CMD_RETPOLINE: - if (IS_ENABLED(CONFIG_RETPOLINE)) - goto retpoline_auto; + mode = spectre_v2_select_retpoline(); + break; + + case SPECTRE_V2_CMD_EIBRS: + mode = SPECTRE_V2_EIBRS; + break; + + case SPECTRE_V2_CMD_EIBRS_LFENCE: + mode = SPECTRE_V2_EIBRS_LFENCE; + break; + + case SPECTRE_V2_CMD_EIBRS_RETPOLINE: + mode = SPECTRE_V2_EIBRS_RETPOLINE; break; } - pr_err("Spectre mitigation: kernel not compiled with retpoline; no mitigation available!"); - return; -retpoline_auto: - if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || - boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) { - retpoline_amd: - if (!boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) { - pr_err("Spectre mitigation: LFENCE not serializing, switching to generic retpoline\n"); - goto retpoline_generic; - } - mode = SPECTRE_V2_RETPOLINE_AMD; - setup_force_cpu_cap(X86_FEATURE_RETPOLINE_AMD); - setup_force_cpu_cap(X86_FEATURE_RETPOLINE); - } else { - retpoline_generic: - mode = SPECTRE_V2_RETPOLINE_GENERIC; + if (mode == SPECTRE_V2_EIBRS && unprivileged_ebpf_enabled()) + pr_err(SPECTRE_V2_EIBRS_EBPF_MSG); + + if (spectre_v2_in_eibrs_mode(mode)) { + /* Force it so VMEXIT will restore correctly */ + x86_spec_ctrl_base |= SPEC_CTRL_IBRS; + wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); + } + + switch (mode) { + case SPECTRE_V2_NONE: + case SPECTRE_V2_EIBRS: + break; + + case SPECTRE_V2_LFENCE: + case SPECTRE_V2_EIBRS_LFENCE: + setup_force_cpu_cap(X86_FEATURE_RETPOLINE_LFENCE); + fallthrough; + + case SPECTRE_V2_RETPOLINE: + case SPECTRE_V2_EIBRS_RETPOLINE: setup_force_cpu_cap(X86_FEATURE_RETPOLINE); + break; } -specv2_set_mode: spectre_v2_enabled = mode; pr_info("%s\n", spectre_v2_strings[mode]); @@ -945,7 +1026,7 @@ specv2_set_mode: * the CPU supports Enhanced IBRS, kernel might un-intentionally not * enable IBRS around firmware calls. */ - if (boot_cpu_has(X86_FEATURE_IBRS) && mode != SPECTRE_V2_IBRS_ENHANCED) { + if (boot_cpu_has(X86_FEATURE_IBRS) && !spectre_v2_in_eibrs_mode(mode)) { setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW); pr_info("Enabling Restricted Speculation for firmware calls\n"); } @@ -1015,6 +1096,10 @@ void cpu_bugs_smt_update(void) { mutex_lock(&spec_ctrl_mutex); + if (sched_smt_active() && unprivileged_ebpf_enabled() && + spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE) + pr_warn_once(SPECTRE_V2_EIBRS_LFENCE_EBPF_SMT_MSG); + switch (spectre_v2_user_stibp) { case SPECTRE_V2_USER_NONE: break; @@ -1621,7 +1706,7 @@ static ssize_t tsx_async_abort_show_state(char *buf) static char *stibp_state(void) { - if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) + if (spectre_v2_in_eibrs_mode(spectre_v2_enabled)) return ""; switch (spectre_v2_user_stibp) { @@ -1651,6 +1736,27 @@ static char *ibpb_state(void) return ""; } +static ssize_t spectre_v2_show_state(char *buf) +{ + if (spectre_v2_enabled == SPECTRE_V2_LFENCE) + return sprintf(buf, "Vulnerable: LFENCE\n"); + + if (spectre_v2_enabled == SPECTRE_V2_EIBRS && unprivileged_ebpf_enabled()) + return sprintf(buf, "Vulnerable: eIBRS with unprivileged eBPF\n"); + + if (sched_smt_active() && unprivileged_ebpf_enabled() && + spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE) + return sprintf(buf, "Vulnerable: eIBRS+LFENCE with unprivileged eBPF and SMT\n"); + + return sprintf(buf, "%s%s%s%s%s%s\n", + spectre_v2_strings[spectre_v2_enabled], + ibpb_state(), + boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", + stibp_state(), + boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "", + spectre_v2_module_string()); +} + static ssize_t srbds_show_state(char *buf) { return sprintf(buf, "%s\n", srbds_strings[srbds_mitigation]); @@ -1676,12 +1782,7 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr return sprintf(buf, "%s\n", spectre_v1_strings[spectre_v1_mitigation]); case X86_BUG_SPECTRE_V2: - return sprintf(buf, "%s%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], - ibpb_state(), - boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", - stibp_state(), - boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "", - spectre_v2_module_string()); + return spectre_v2_show_state(buf); case X86_BUG_SPEC_STORE_BYPASS: return sprintf(buf, "%s\n", ssb_strings[ssb_mode]); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 25148ebd36341df8b0256029a9215867371ae1b3..9c8fc6f513ed3f9b9142a1c253c65f3760345814 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -320,6 +320,7 @@ static __always_inline void setup_smap(struct cpuinfo_x86 *c) #ifdef CONFIG_X86_SMAP cr4_set_bits(X86_CR4_SMAP); #else + clear_cpu_cap(c, X86_FEATURE_SMAP); cr4_clear_bits(X86_CR4_SMAP); #endif } @@ -1390,9 +1391,8 @@ void __init early_cpu_init(void) early_identify_cpu(&boot_cpu_data); } -static void detect_null_seg_behavior(struct cpuinfo_x86 *c) +static bool detect_null_seg_behavior(void) { -#ifdef CONFIG_X86_64 /* * Empirically, writing zero to a segment selector on AMD does * not clear the base, whereas writing zero to a segment @@ -1413,10 +1413,43 @@ static void detect_null_seg_behavior(struct cpuinfo_x86 *c) wrmsrl(MSR_FS_BASE, 1); loadsegment(fs, 0); rdmsrl(MSR_FS_BASE, tmp); - if (tmp != 0) - set_cpu_bug(c, X86_BUG_NULL_SEG); wrmsrl(MSR_FS_BASE, old_base); -#endif + return tmp == 0; +} + +void check_null_seg_clears_base(struct cpuinfo_x86 *c) +{ + /* BUG_NULL_SEG is only relevant with 64bit userspace */ + if (!IS_ENABLED(CONFIG_X86_64)) + return; + + /* Zen3 CPUs advertise Null Selector Clears Base in CPUID. */ + if (c->extended_cpuid_level >= 0x80000021 && + cpuid_eax(0x80000021) & BIT(6)) + return; + + /* + * CPUID bit above wasn't set. If this kernel is still running + * as a HV guest, then the HV has decided not to advertize + * that CPUID bit for whatever reason. For example, one + * member of the migration pool might be vulnerable. Which + * means, the bug is present: set the BUG flag and return. + */ + if (cpu_has(c, X86_FEATURE_HYPERVISOR)) { + set_cpu_bug(c, X86_BUG_NULL_SEG); + return; + } + + /* + * Zen2 CPUs also have this behaviour, but no CPUID bit. + * 0x18 is the respective family for Hygon. + */ + if ((c->x86 == 0x17 || c->x86 == 0x18) && + detect_null_seg_behavior()) + return; + + /* All the remaining ones are affected */ + set_cpu_bug(c, X86_BUG_NULL_SEG); } static void generic_identify(struct cpuinfo_x86 *c) @@ -1452,8 +1485,6 @@ static void generic_identify(struct cpuinfo_x86 *c) get_model_name(c); /* Default name */ - detect_null_seg_behavior(c); - /* * ESPFIX is a strange bug. All real CPUs have it. Paravirt * systems that run Linux at CPL > 0 may or may not have the diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 67944128876d7300e29545c7d5336a52a81daa75..093f5fc860e3fafa864c9d1414867105402c9461 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -73,6 +73,7 @@ extern int detect_extended_topology_early(struct cpuinfo_x86 *c); extern int detect_extended_topology(struct cpuinfo_x86 *c); extern int detect_ht_early(struct cpuinfo_x86 *c); extern void detect_ht(struct cpuinfo_x86 *c); +extern void check_null_seg_clears_base(struct cpuinfo_x86 *c); unsigned int aperfmperf_get_khz(int cpu); diff --git a/arch/x86/kernel/cpu/hygon.c b/arch/x86/kernel/cpu/hygon.c index dc0840aae26c14e8c8b6fb9ae6a7b4996a9edc5e..b78c471ec344b282ef81fc35728d77163fe89b6a 100644 --- a/arch/x86/kernel/cpu/hygon.c +++ b/arch/x86/kernel/cpu/hygon.c @@ -351,6 +351,8 @@ static void init_hygon(struct cpuinfo_x86 *c) /* Hygon CPUs don't reset SS attributes on SYSRET, Xen does. */ if (!cpu_has(c, X86_FEATURE_XENPV)) set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS); + + check_null_seg_clears_base(c); } static void cpu_detect_tlb_hygon(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index 0c6b02dd744c1424c3235035aab439d2f1d0d1b8..f73f1184b1c13d9b2ef62fecda27b85138f10869 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -387,7 +387,7 @@ static void threshold_restart_bank(void *_tr) u32 hi, lo; /* sysfs write might race against an offline operation */ - if (this_cpu_read(threshold_banks)) + if (!this_cpu_read(threshold_banks) && !tr->set_lvt_off) return; rdmsr(tr->b->address, lo, hi); diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 14b34963eb1f7f165ee59337f6f6f6a44e4ea0e4..5cf1a024408bf948a3671424f96a9945945970db 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -295,11 +295,17 @@ static void wait_for_panic(void) panic("Panicing machine check CPU died"); } -static void mce_panic(const char *msg, struct mce *final, char *exp) +static noinstr void mce_panic(const char *msg, struct mce *final, char *exp) { - int apei_err = 0; struct llist_node *pending; struct mce_evt_llist *l; + int apei_err = 0; + + /* + * Allow instrumentation around external facilities usage. Not that it + * matters a whole lot since the machine is going to panic anyway. + */ + instrumentation_begin(); if (!fake_panic) { /* @@ -314,7 +320,7 @@ static void mce_panic(const char *msg, struct mce *final, char *exp) } else { /* Don't log too much for fake panic */ if (atomic_inc_return(&mce_fake_panicked) > 1) - return; + goto out; } pending = mce_gen_pool_prepare_records(); /* First print corrected ones that are still unlogged */ @@ -352,6 +358,9 @@ static void mce_panic(const char *msg, struct mce *final, char *exp) panic(msg); } else pr_emerg(HW_ERR "Fake kernel panic: %s\n", msg); + +out: + instrumentation_end(); } /* Support code for software error injection */ @@ -682,7 +691,7 @@ static struct notifier_block mce_default_nb = { /* * Read ADDR and MISC registers. */ -static void mce_read_aux(struct mce *m, int i) +static noinstr void mce_read_aux(struct mce *m, int i) { if (m->status & MCI_STATUS_MISCV) m->misc = mce_rdmsrl(msr_ops.misc(i)); @@ -1061,10 +1070,13 @@ static int mce_start(int *no_way_out) * Synchronize between CPUs after main scanning loop. * This invokes the bulk of the Monarch processing. */ -static int mce_end(int order) +static noinstr int mce_end(int order) { - int ret = -1; u64 timeout = (u64)mca_cfg.monarch_timeout * NSEC_PER_USEC; + int ret = -1; + + /* Allow instrumentation around external facilities. */ + instrumentation_begin(); if (!timeout) goto reset; @@ -1108,7 +1120,8 @@ static int mce_end(int order) /* * Don't reset anything. That's done by the Monarch. */ - return 0; + ret = 0; + goto out; } /* @@ -1123,6 +1136,10 @@ reset: * Let others run again. */ atomic_set(&mce_executing, 0); + +out: + instrumentation_end(); + return ret; } @@ -1443,6 +1460,14 @@ noinstr void do_machine_check(struct pt_regs *regs) if (worst != MCE_AR_SEVERITY && !kill_it) goto out; + /* + * Enable instrumentation around the external facilities like + * task_work_add() (via queue_task_work()), fixup_exception() etc. + * For now, that is. Fixing this properly would need a lot more involved + * reorganization. + */ + instrumentation_begin(); + /* Fault was in user mode and we need to take some action */ if ((m.cs & 3) == 3) { /* If this triggers there is no way to recover. Die hard. */ @@ -1468,6 +1493,9 @@ noinstr void do_machine_check(struct pt_regs *regs) if (m.kflags & MCE_IN_KERNEL_COPYIN) queue_task_work(&m, msg, kill_it); } + + instrumentation_end(); + out: mce_wrmsrl(MSR_IA32_MCG_STATUS, 0); } diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c index 3a44346f2276601a06d9aa5a5aae4b78c48b2418..e7808309d471073d9e3b29dbec5291a01376e451 100644 --- a/arch/x86/kernel/cpu/mce/inject.c +++ b/arch/x86/kernel/cpu/mce/inject.c @@ -347,7 +347,7 @@ static ssize_t flags_write(struct file *filp, const char __user *ubuf, char buf[MAX_FLAG_OPT_SIZE], *__buf; int err; - if (cnt > MAX_FLAG_OPT_SIZE) + if (!cnt || cnt > MAX_FLAG_OPT_SIZE) return -EINVAL; if (copy_from_user(&buf, ubuf, cnt)) diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c index abe9fe0fb8517ce9b1af73740684708a83f07829..886d4648c9dd429ac6dcd06ae57f93b90035ab9e 100644 --- a/arch/x86/kernel/cpu/mce/intel.c +++ b/arch/x86/kernel/cpu/mce/intel.c @@ -486,6 +486,8 @@ static void intel_ppin_init(struct cpuinfo_x86 *c) case INTEL_FAM6_BROADWELL_X: case INTEL_FAM6_SKYLAKE_X: case INTEL_FAM6_ICELAKE_X: + case INTEL_FAM6_ICELAKE_D: + case INTEL_FAM6_SAPPHIRERAPIDS_X: case INTEL_FAM6_XEON_PHI_KNL: case INTEL_FAM6_XEON_PHI_KNM: @@ -526,12 +528,13 @@ bool intel_filter_mce(struct mce *m) { struct cpuinfo_x86 *c = &boot_cpu_data; - /* MCE errata HSD131, HSM142, HSW131, BDM48, and HSM142 */ + /* MCE errata HSD131, HSM142, HSW131, BDM48, HSM142 and SKX37 */ if ((c->x86 == 6) && ((c->x86_model == INTEL_FAM6_HASWELL) || (c->x86_model == INTEL_FAM6_HASWELL_L) || (c->x86_model == INTEL_FAM6_BROADWELL) || - (c->x86_model == INTEL_FAM6_HASWELL_G)) && + (c->x86_model == INTEL_FAM6_HASWELL_G) || + (c->x86_model == INTEL_FAM6_SKYLAKE_X)) && (m->bank == 0) && ((m->status & 0xa0000000ffffffff) == 0x80000000000f0005)) return true; diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index e8b5f1cf1ae8ceb13f263793c2489589e9627e5d..4ccb9039f59509a23a12d938bf9760fbba11121e 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -590,6 +590,8 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r) } if (r->mon_capable && domain_setup_mon_state(r, d)) { + kfree(d->ctrl_val); + kfree(d->mbps_val); kfree(d); return; } diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 629c4994f1654cd059daa7116b511a3578e22b66..7f57110f958e1cc6e853a697b619117528d7e05b 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -995,8 +995,10 @@ early_param("memmap", parse_memmap_opt); */ void __init e820__reserve_setup_data(void) { + struct setup_indirect *indirect; struct setup_data *data; - u64 pa_data; + u64 pa_data, pa_next; + u32 len; pa_data = boot_params.hdr.setup_data; if (!pa_data) @@ -1004,6 +1006,14 @@ void __init e820__reserve_setup_data(void) while (pa_data) { data = early_memremap(pa_data, sizeof(*data)); + if (!data) { + pr_warn("e820: failed to memremap setup_data entry\n"); + return; + } + + len = sizeof(*data); + pa_next = data->next; + e820__range_update(pa_data, sizeof(*data)+data->len, E820_TYPE_RAM, E820_TYPE_RESERVED_KERN); /* @@ -1015,18 +1025,27 @@ void __init e820__reserve_setup_data(void) sizeof(*data) + data->len, E820_TYPE_RAM, E820_TYPE_RESERVED_KERN); - if (data->type == SETUP_INDIRECT && - ((struct setup_indirect *)data->data)->type != SETUP_INDIRECT) { - e820__range_update(((struct setup_indirect *)data->data)->addr, - ((struct setup_indirect *)data->data)->len, - E820_TYPE_RAM, E820_TYPE_RESERVED_KERN); - e820__range_update_kexec(((struct setup_indirect *)data->data)->addr, - ((struct setup_indirect *)data->data)->len, - E820_TYPE_RAM, E820_TYPE_RESERVED_KERN); + if (data->type == SETUP_INDIRECT) { + len += data->len; + early_memunmap(data, sizeof(*data)); + data = early_memremap(pa_data, len); + if (!data) { + pr_warn("e820: failed to memremap indirect setup_data\n"); + return; + } + + indirect = (struct setup_indirect *)data->data; + + if (indirect->type != SETUP_INDIRECT) { + e820__range_update(indirect->addr, indirect->len, + E820_TYPE_RAM, E820_TYPE_RESERVED_KERN); + e820__range_update_kexec(indirect->addr, indirect->len, + E820_TYPE_RAM, E820_TYPE_RESERVED_KERN); + } } - pa_data = data->next; - early_memunmap(data, sizeof(*data)); + pa_data = pa_next; + early_memunmap(data, len); } e820__update_table(e820_table); diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index a4b5af03dcc1bc1ddde2d4468d8e5bf538dc2294..8e27cbefaa4bf1e33420601adebf9815cc701584 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -515,6 +515,7 @@ static const struct intel_early_ops gen11_early_ops __initconst = { .stolen_size = gen9_stolen_size, }; +/* Intel integrated GPUs for which we need to reserve "stolen memory" */ static const struct pci_device_id intel_early_ids[] __initconst = { INTEL_I830_IDS(&i830_early_ops), INTEL_I845G_IDS(&i845_early_ops), @@ -588,6 +589,13 @@ static void __init intel_graphics_quirks(int num, int slot, int func) u16 device; int i; + /* + * Reserve "stolen memory" for an integrated GPU. If we've already + * found one, there's nothing to do for other (discrete) GPUs. + */ + if (resource_size(&intel_graphics_stolen_res)) + return; + device = read_pci_config_16(num, slot, func, PCI_DEVICE_ID); for (i = 0; i < ARRAY_SIZE(intel_early_ids); i++) { @@ -700,7 +708,7 @@ static struct chipset early_qrk[] __initdata = { { PCI_VENDOR_ID_INTEL, 0x3406, PCI_CLASS_BRIDGE_HOST, PCI_BASE_CLASS_BRIDGE, 0, intel_remapping_check }, { PCI_VENDOR_ID_INTEL, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA, PCI_ANY_ID, - QFLAG_APPLY_ONCE, intel_graphics_quirks }, + 0, intel_graphics_quirks }, /* * HPET on the current version of the Baytrail platform has accuracy * problems: it will halt in deep idle state - so we disable it. @@ -711,12 +719,6 @@ static struct chipset early_qrk[] __initdata = { */ { PCI_VENDOR_ID_INTEL, 0x0f00, PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet}, - { PCI_VENDOR_ID_INTEL, 0x3e20, - PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet}, - { PCI_VENDOR_ID_INTEL, 0x3ec4, - PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet}, - { PCI_VENDOR_ID_INTEL, 0x8a12, - PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet}, { PCI_VENDOR_ID_BROADCOM, 0x4331, PCI_CLASS_NETWORK_OTHER, PCI_ANY_ID, 0, apple_airport_reset}, {} diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 7a50f0b62a709091dfd17619252efbdf4eb1ad7f..4ab7a9757e521ee703fdcbb7e1b13402372b020a 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -9,6 +9,7 @@ #include #include +#include #undef pr_fmt #define pr_fmt(fmt) "hpet: " fmt @@ -806,6 +807,83 @@ static bool __init hpet_counting(void) return false; } +static bool __init mwait_pc10_supported(void) +{ + unsigned int eax, ebx, ecx, mwait_substates; + + if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) + return false; + + if (!cpu_feature_enabled(X86_FEATURE_MWAIT)) + return false; + + if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF) + return false; + + cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &mwait_substates); + + return (ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) && + (ecx & CPUID5_ECX_INTERRUPT_BREAK) && + (mwait_substates & (0xF << 28)); +} + +/* + * Check whether the system supports PC10. If so force disable HPET as that + * stops counting in PC10. This check is overbroad as it does not take any + * of the following into account: + * + * - ACPI tables + * - Enablement of intel_idle + * - Command line arguments which limit intel_idle C-state support + * + * That's perfectly fine. HPET is a piece of hardware designed by committee + * and the only reasons why it is still in use on modern systems is the + * fact that it is impossible to reliably query TSC and CPU frequency via + * CPUID or firmware. + * + * If HPET is functional it is useful for calibrating TSC, but this can be + * done via PMTIMER as well which seems to be the last remaining timer on + * X86/INTEL platforms that has not been completely wreckaged by feature + * creep. + * + * In theory HPET support should be removed altogether, but there are older + * systems out there which depend on it because TSC and APIC timer are + * dysfunctional in deeper C-states. + * + * It's only 20 years now that hardware people have been asked to provide + * reliable and discoverable facilities which can be used for timekeeping + * and per CPU timer interrupts. + * + * The probability that this problem is going to be solved in the + * forseeable future is close to zero, so the kernel has to be cluttered + * with heuristics to keep up with the ever growing amount of hardware and + * firmware trainwrecks. Hopefully some day hardware people will understand + * that the approach of "This can be fixed in software" is not sustainable. + * Hope dies last... + */ +static bool __init hpet_is_pc10_damaged(void) +{ + unsigned long long pcfg; + + /* Check whether PC10 substates are supported */ + if (!mwait_pc10_supported()) + return false; + + /* Check whether PC10 is enabled in PKG C-state limit */ + rdmsrl(MSR_PKG_CST_CONFIG_CONTROL, pcfg); + if ((pcfg & 0xF) < 8) + return false; + + if (hpet_force_user) { + pr_warn("HPET force enabled via command line, but dysfunctional in PC10.\n"); + return false; + } + + pr_info("HPET dysfunctional in PC10. Force disabled.\n"); + boot_hpet_disable = true; + return true; +} + /** * hpet_enable - Try to setup the HPET timer. Returns 1 on success. */ @@ -819,6 +897,9 @@ int __init hpet_enable(void) if (!is_hpet_capable()) return 0; + if (hpet_is_pc10_damaged()) + return 0; + hpet_set_mapping(); if (!hpet_virt_address) return 0; diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index c5dd50369e2f3394bc483b8744ace94eaaea552b..ce904c89c6c7068df8fb20bcf29103a5b5ea3c77 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -290,8 +290,10 @@ void kvm_set_posted_intr_wakeup_handler(void (*handler)(void)) { if (handler) kvm_posted_intr_wakeup_handler = handler; - else + else { kvm_posted_intr_wakeup_handler = dummy_handler; + synchronize_rcu(); + } } EXPORT_SYMBOL_GPL(kvm_set_posted_intr_wakeup_handler); diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c index 64b6da95af984868962777bb4978b3fa8a4eff16..e2e89bebcbc32840357788cbd803805e5f652c62 100644 --- a/arch/x86/kernel/kdebugfs.c +++ b/arch/x86/kernel/kdebugfs.c @@ -88,11 +88,13 @@ create_setup_data_node(struct dentry *parent, int no, static int __init create_setup_data_nodes(struct dentry *parent) { + struct setup_indirect *indirect; struct setup_data_node *node; struct setup_data *data; - int error; + u64 pa_data, pa_next; struct dentry *d; - u64 pa_data; + int error; + u32 len; int no = 0; d = debugfs_create_dir("setup_data", parent); @@ -112,12 +114,29 @@ static int __init create_setup_data_nodes(struct dentry *parent) error = -ENOMEM; goto err_dir; } - - if (data->type == SETUP_INDIRECT && - ((struct setup_indirect *)data->data)->type != SETUP_INDIRECT) { - node->paddr = ((struct setup_indirect *)data->data)->addr; - node->type = ((struct setup_indirect *)data->data)->type; - node->len = ((struct setup_indirect *)data->data)->len; + pa_next = data->next; + + if (data->type == SETUP_INDIRECT) { + len = sizeof(*data) + data->len; + memunmap(data); + data = memremap(pa_data, len, MEMREMAP_WB); + if (!data) { + kfree(node); + error = -ENOMEM; + goto err_dir; + } + + indirect = (struct setup_indirect *)data->data; + + if (indirect->type != SETUP_INDIRECT) { + node->paddr = indirect->addr; + node->type = indirect->type; + node->len = indirect->len; + } else { + node->paddr = pa_data; + node->type = data->type; + node->len = data->len; + } } else { node->paddr = pa_data; node->type = data->type; @@ -125,7 +144,7 @@ static int __init create_setup_data_nodes(struct dentry *parent) } create_setup_data_node(d, no, node); - pa_data = data->next; + pa_data = pa_next; memunmap(data); no++; diff --git a/arch/x86/kernel/ksysfs.c b/arch/x86/kernel/ksysfs.c index d0a19121c6a4f1f4bb1f62e05f12d8350d4710a1..257892fcefa794803d8eaf2d3d1810ebb278957b 100644 --- a/arch/x86/kernel/ksysfs.c +++ b/arch/x86/kernel/ksysfs.c @@ -91,26 +91,41 @@ static int get_setup_data_paddr(int nr, u64 *paddr) static int __init get_setup_data_size(int nr, size_t *size) { - int i = 0; + u64 pa_data = boot_params.hdr.setup_data, pa_next; + struct setup_indirect *indirect; struct setup_data *data; - u64 pa_data = boot_params.hdr.setup_data; + int i = 0; + u32 len; while (pa_data) { data = memremap(pa_data, sizeof(*data), MEMREMAP_WB); if (!data) return -ENOMEM; + pa_next = data->next; + if (nr == i) { - if (data->type == SETUP_INDIRECT && - ((struct setup_indirect *)data->data)->type != SETUP_INDIRECT) - *size = ((struct setup_indirect *)data->data)->len; - else + if (data->type == SETUP_INDIRECT) { + len = sizeof(*data) + data->len; + memunmap(data); + data = memremap(pa_data, len, MEMREMAP_WB); + if (!data) + return -ENOMEM; + + indirect = (struct setup_indirect *)data->data; + + if (indirect->type != SETUP_INDIRECT) + *size = indirect->len; + else + *size = data->len; + } else { *size = data->len; + } memunmap(data); return 0; } - pa_data = data->next; + pa_data = pa_next; memunmap(data); i++; } @@ -120,9 +135,11 @@ static int __init get_setup_data_size(int nr, size_t *size) static ssize_t type_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { + struct setup_indirect *indirect; + struct setup_data *data; int nr, ret; u64 paddr; - struct setup_data *data; + u32 len; ret = kobj_to_setup_data_nr(kobj, &nr); if (ret) @@ -135,10 +152,20 @@ static ssize_t type_show(struct kobject *kobj, if (!data) return -ENOMEM; - if (data->type == SETUP_INDIRECT) - ret = sprintf(buf, "0x%x\n", ((struct setup_indirect *)data->data)->type); - else + if (data->type == SETUP_INDIRECT) { + len = sizeof(*data) + data->len; + memunmap(data); + data = memremap(paddr, len, MEMREMAP_WB); + if (!data) + return -ENOMEM; + + indirect = (struct setup_indirect *)data->data; + + ret = sprintf(buf, "0x%x\n", indirect->type); + } else { ret = sprintf(buf, "0x%x\n", data->type); + } + memunmap(data); return ret; } @@ -149,9 +176,10 @@ static ssize_t setup_data_data_read(struct file *fp, char *buf, loff_t off, size_t count) { + struct setup_indirect *indirect; + struct setup_data *data; int nr, ret = 0; u64 paddr, len; - struct setup_data *data; void *p; ret = kobj_to_setup_data_nr(kobj, &nr); @@ -165,10 +193,27 @@ static ssize_t setup_data_data_read(struct file *fp, if (!data) return -ENOMEM; - if (data->type == SETUP_INDIRECT && - ((struct setup_indirect *)data->data)->type != SETUP_INDIRECT) { - paddr = ((struct setup_indirect *)data->data)->addr; - len = ((struct setup_indirect *)data->data)->len; + if (data->type == SETUP_INDIRECT) { + len = sizeof(*data) + data->len; + memunmap(data); + data = memremap(paddr, len, MEMREMAP_WB); + if (!data) + return -ENOMEM; + + indirect = (struct setup_indirect *)data->data; + + if (indirect->type != SETUP_INDIRECT) { + paddr = indirect->addr; + len = indirect->len; + } else { + /* + * Even though this is technically undefined, return + * the data as though it is a normal setup_data struct. + * This will at least allow it to be inspected. + */ + paddr += sizeof(*data); + len = data->len; + } } else { paddr += sizeof(*data); len = data->len; diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index c4ac26333bc418c9d0b9928b2644cb1fb710a9a6..bb657e2e6c6875c9c851d8176624972d4c2d6c84 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -50,18 +50,9 @@ early_param("no-kvmclock-vsyscall", parse_no_kvmclock_vsyscall); static struct pvclock_vsyscall_time_info hv_clock_boot[HVC_BOOT_ARRAY_SIZE] __bss_decrypted __aligned(PAGE_SIZE); static struct pvclock_wall_clock wall_clock __bss_decrypted; -static DEFINE_PER_CPU(struct pvclock_vsyscall_time_info *, hv_clock_per_cpu); static struct pvclock_vsyscall_time_info *hvclock_mem; - -static inline struct pvclock_vcpu_time_info *this_cpu_pvti(void) -{ - return &this_cpu_read(hv_clock_per_cpu)->pvti; -} - -static inline struct pvclock_vsyscall_time_info *this_cpu_hvclock(void) -{ - return this_cpu_read(hv_clock_per_cpu); -} +DEFINE_PER_CPU(struct pvclock_vsyscall_time_info *, hv_clock_per_cpu); +EXPORT_PER_CPU_SYMBOL_GPL(hv_clock_per_cpu); /* * The wallclock is the time of day when we booted. Since then, some time may diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index c98c220d39a964c0ae8f9451d6cb33f2ebd3d0cd..2fe7dcec1819c13705f19c96f9ddd2420464c1e7 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -67,6 +67,7 @@ static unsigned long int get_module_load_offset(void) void *module_alloc(unsigned long size) { + gfp_t gfp_mask = GFP_KERNEL; void *p; if (PAGE_ALIGN(size) > MODULES_LEN) @@ -74,10 +75,10 @@ void *module_alloc(unsigned long size) p = __vmalloc_node_range(size, MODULE_ALIGN, MODULES_VADDR + get_module_load_offset(), - MODULES_END, GFP_KERNEL, - PAGE_KERNEL, 0, NUMA_NO_NODE, + MODULES_END, gfp_mask, + PAGE_KERNEL, VM_DEFER_KMEMLEAK, NUMA_NO_NODE, __builtin_return_address(0)); - if (p && (kasan_module_alloc(p, size) < 0)) { + if (p && (kasan_alloc_module_shadow(p, size, gfp_mask) < 0)) { vfree(p); return NULL; } diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 6c3407ba6ee9851e5f0913a10d5a53926f3afdcb..5e5fcf5c376de76bd29a45bd5c452909351bfde2 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -312,7 +312,6 @@ struct paravirt_patch_template pv_ops = { .cpu.usergs_sysret64 = native_usergs_sysret64, .cpu.iret = native_iret, - .cpu.swapgs = native_swapgs, #ifdef CONFIG_X86_IOPL_IOPERM .cpu.invalidate_io_bitmap = native_tss_invalidate_io_bitmap, diff --git a/arch/x86/kernel/paravirt_patch.c b/arch/x86/kernel/paravirt_patch.c index ace6e334cb39384dc8f76384d0b10724fd30e924..7c518b08aa3c5c0fcd49fe89f1259e65c18cfd71 100644 --- a/arch/x86/kernel/paravirt_patch.c +++ b/arch/x86/kernel/paravirt_patch.c @@ -28,7 +28,6 @@ struct patch_xxl { const unsigned char irq_restore_fl[2]; const unsigned char cpu_wbinvd[2]; const unsigned char cpu_usergs_sysret64[6]; - const unsigned char cpu_swapgs[3]; const unsigned char mov64[3]; }; @@ -43,7 +42,6 @@ static const struct patch_xxl patch_data_xxl = { .cpu_wbinvd = { 0x0f, 0x09 }, // wbinvd .cpu_usergs_sysret64 = { 0x0f, 0x01, 0xf8, 0x48, 0x0f, 0x07 }, // swapgs; sysretq - .cpu_swapgs = { 0x0f, 0x01, 0xf8 }, // swapgs .mov64 = { 0x48, 0x89, 0xf8 }, // mov %rdi, %rax }; @@ -86,7 +84,6 @@ unsigned int native_patch(u8 type, void *insn_buff, unsigned long addr, PATCH_CASE(mmu, write_cr3, xxl, insn_buff, len); PATCH_CASE(cpu, usergs_sysret64, xxl, insn_buff, len); - PATCH_CASE(cpu, swapgs, xxl, insn_buff, len); PATCH_CASE(cpu, wbinvd, xxl, insn_buff, len); #endif diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 145a7ac0c19aa1f9a6e72d47d65a257029ec434a..0aa1baf9a3afc144eb0385a19bdb677bbb296a11 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -138,6 +138,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg, frame->ret_addr = (unsigned long) ret_from_fork; p->thread.sp = (unsigned long) fork_frame; p->thread.io_bitmap = NULL; + p->thread.iopl_warn = 0; memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); #ifdef CONFIG_X86_64 diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 4f2f54e1281c3f1d35d4166c8835f71bf7954f99..98bf8fd189025d02168436669b28f11cfad81b73 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -159,14 +159,12 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) { struct thread_struct *prev = &prev_p->thread, *next = &next_p->thread; - struct fpu *prev_fpu = &prev->fpu; - struct fpu *next_fpu = &next->fpu; int cpu = smp_processor_id(); /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ if (!test_thread_flag(TIF_NEED_FPU_LOAD)) - switch_fpu_prepare(prev_fpu, cpu); + switch_fpu_prepare(prev_p, cpu); /* * Save away %gs. No need to save %fs, as it was saved on the @@ -213,7 +211,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) this_cpu_write(current_task, next_p); - switch_fpu_finish(next_fpu); + switch_fpu_finish(next_p); /* Load the Intel cache allocation PQR MSR. */ resctrl_sched_in(); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index df342bedea88afc1e8571d6e20024bf477569c82..ad3f82a18de9df1e151e41ea3c5f184a493b9cac 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -535,15 +535,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) { struct thread_struct *prev = &prev_p->thread; struct thread_struct *next = &next_p->thread; - struct fpu *prev_fpu = &prev->fpu; - struct fpu *next_fpu = &next->fpu; int cpu = smp_processor_id(); WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) && this_cpu_read(irq_count) != -1); if (!test_thread_flag(TIF_NEED_FPU_LOAD)) - switch_fpu_prepare(prev_fpu, cpu); + switch_fpu_prepare(prev_p, cpu); /* We must save %fs and %gs before load_TLS() because * %fs and %gs may be cleared by load_TLS(). @@ -595,7 +593,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) this_cpu_write(current_task, next_p); this_cpu_write(cpu_current_top_of_stack, task_top_of_stack(next_p)); - switch_fpu_finish(next_fpu); + switch_fpu_finish(next_p); /* Reload sp0. */ update_task_stack(next_p); diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 798a6f73f89460559fd3b81e249dd5b306a18cea..df3514835b3563522fdf93a2e9e36aabb2234b69 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -113,17 +113,9 @@ void __noreturn machine_real_restart(unsigned int type) spin_unlock(&rtc_lock); /* - * Switch back to the initial page table. + * Switch to the trampoline page table. */ -#ifdef CONFIG_X86_32 - load_cr3(initial_page_table); -#else - write_cr3(real_mode_header->trampoline_pgd); - - /* Exiting long mode will fail if CR4.PCIDE is set. */ - if (boot_cpu_has(X86_FEATURE_PCID)) - cr4_clear_bits(X86_CR4_PCIDE); -#endif + load_trampoline_pgtable(); /* Jump to the identity-mapped low memory code */ #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 9497bebb9b2ef0f4b6c71496c9916d3c3dc7bdd6..9257a8f0f9306c60c1196398fe08ff432c5652f2 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -371,21 +371,41 @@ static void __init parse_setup_data(void) static void __init memblock_x86_reserve_range_setup_data(void) { + struct setup_indirect *indirect; struct setup_data *data; - u64 pa_data; + u64 pa_data, pa_next; + u32 len; pa_data = boot_params.hdr.setup_data; while (pa_data) { data = early_memremap(pa_data, sizeof(*data)); + if (!data) { + pr_warn("setup: failed to memremap setup_data entry\n"); + return; + } + + len = sizeof(*data); + pa_next = data->next; + memblock_reserve(pa_data, sizeof(*data) + data->len); - if (data->type == SETUP_INDIRECT && - ((struct setup_indirect *)data->data)->type != SETUP_INDIRECT) - memblock_reserve(((struct setup_indirect *)data->data)->addr, - ((struct setup_indirect *)data->data)->len); + if (data->type == SETUP_INDIRECT) { + len += data->len; + early_memunmap(data, sizeof(*data)); + data = early_memremap(pa_data, len); + if (!data) { + pr_warn("setup: failed to memremap indirect setup_data\n"); + return; + } - pa_data = data->next; - early_memunmap(data, sizeof(*data)); + indirect = (struct setup_indirect *)data->data; + + if (indirect->type != SETUP_INDIRECT) + memblock_reserve(indirect->addr, indirect->len); + } + + pa_data = pa_next; + early_memunmap(data, len); } } diff --git a/arch/x86/kernel/sev-es-shared.c b/arch/x86/kernel/sev-es-shared.c index ecb20b17b7df611984d7254210512234cbfe44d2..82db4014deb212abd361ec6f663fd8a50f731130 100644 --- a/arch/x86/kernel/sev-es-shared.c +++ b/arch/x86/kernel/sev-es-shared.c @@ -130,6 +130,8 @@ static enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb, } else { ret = ES_VMM_ERROR; } + } else if (ghcb->save.sw_exit_info_1 & 0xffffffff) { + ret = ES_VMM_ERROR; } else { ret = ES_OK; } diff --git a/arch/x86/kernel/sev-es.c b/arch/x86/kernel/sev-es.c index f3202b2e3c1579f2371361dd3065e7afc6cc8592..c222fab112cbdab56cc18a5df75d12985e807294 100644 --- a/arch/x86/kernel/sev-es.c +++ b/arch/x86/kernel/sev-es.c @@ -46,16 +46,6 @@ static struct ghcb __initdata *boot_ghcb; struct sev_es_runtime_data { struct ghcb ghcb_page; - /* Physical storage for the per-CPU IST stack of the #VC handler */ - char ist_stack[EXCEPTION_STKSZ] __aligned(PAGE_SIZE); - - /* - * Physical storage for the per-CPU fall-back stack of the #VC handler. - * The fall-back stack is used when it is not safe to switch back to the - * interrupted stack in the #VC entry code. - */ - char fallback_stack[EXCEPTION_STKSZ] __aligned(PAGE_SIZE); - /* * Reserve one page per CPU as backup storage for the unencrypted GHCB. * It is needed when an NMI happens while the #VC handler uses the real @@ -99,27 +89,6 @@ DEFINE_STATIC_KEY_FALSE(sev_es_enable_key); /* Needed in vc_early_forward_exception */ void do_early_exception(struct pt_regs *regs, int trapnr); -static void __init setup_vc_stacks(int cpu) -{ - struct sev_es_runtime_data *data; - struct cpu_entry_area *cea; - unsigned long vaddr; - phys_addr_t pa; - - data = per_cpu(runtime_data, cpu); - cea = get_cpu_entry_area(cpu); - - /* Map #VC IST stack */ - vaddr = CEA_ESTACK_BOT(&cea->estacks, VC); - pa = __pa(data->ist_stack); - cea_set_pte((void *)vaddr, pa, PAGE_KERNEL); - - /* Map VC fall-back stack */ - vaddr = CEA_ESTACK_BOT(&cea->estacks, VC2); - pa = __pa(data->fallback_stack); - cea_set_pte((void *)vaddr, pa, PAGE_KERNEL); -} - static __always_inline bool on_vc_stack(struct pt_regs *regs) { unsigned long sp = regs->sp; @@ -291,11 +260,6 @@ static enum es_result vc_write_mem(struct es_em_ctxt *ctxt, char *dst, char *buf, size_t size) { unsigned long error_code = X86_PF_PROT | X86_PF_WRITE; - char __user *target = (char __user *)dst; - u64 d8; - u32 d4; - u16 d2; - u8 d1; /* * This function uses __put_user() independent of whether kernel or user @@ -317,26 +281,42 @@ static enum es_result vc_write_mem(struct es_em_ctxt *ctxt, * instructions here would cause infinite nesting. */ switch (size) { - case 1: + case 1: { + u8 d1; + u8 __user *target = (u8 __user *)dst; + memcpy(&d1, buf, 1); if (__put_user(d1, target)) goto fault; break; - case 2: + } + case 2: { + u16 d2; + u16 __user *target = (u16 __user *)dst; + memcpy(&d2, buf, 2); if (__put_user(d2, target)) goto fault; break; - case 4: + } + case 4: { + u32 d4; + u32 __user *target = (u32 __user *)dst; + memcpy(&d4, buf, 4); if (__put_user(d4, target)) goto fault; break; - case 8: + } + case 8: { + u64 d8; + u64 __user *target = (u64 __user *)dst; + memcpy(&d8, buf, 8); if (__put_user(d8, target)) goto fault; break; + } default: WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size); return ES_UNSUPPORTED; @@ -359,11 +339,6 @@ static enum es_result vc_read_mem(struct es_em_ctxt *ctxt, char *src, char *buf, size_t size) { unsigned long error_code = X86_PF_PROT; - char __user *s = (char __user *)src; - u64 d8; - u32 d4; - u16 d2; - u8 d1; /* * This function uses __get_user() independent of whether kernel or user @@ -385,26 +360,41 @@ static enum es_result vc_read_mem(struct es_em_ctxt *ctxt, * instructions here would cause infinite nesting. */ switch (size) { - case 1: + case 1: { + u8 d1; + u8 __user *s = (u8 __user *)src; + if (__get_user(d1, s)) goto fault; memcpy(buf, &d1, 1); break; - case 2: + } + case 2: { + u16 d2; + u16 __user *s = (u16 __user *)src; + if (__get_user(d2, s)) goto fault; memcpy(buf, &d2, 2); break; - case 4: + } + case 4: { + u32 d4; + u32 __user *s = (u32 __user *)src; + if (__get_user(d4, s)) goto fault; memcpy(buf, &d4, 4); break; - case 8: + } + case 8: { + u64 d8; + u64 __user *s = (u64 __user *)src; if (__get_user(d8, s)) goto fault; memcpy(buf, &d8, 8); break; + } default: WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size); return ES_UNSUPPORTED; @@ -753,7 +743,6 @@ void __init sev_es_init_vc_handling(void) for_each_possible_cpu(cpu) { alloc_runtime_data(cpu); init_ghcb(cpu); - setup_vc_stacks(cpu); } sev_es_setup_play_dead(); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 582387fc939f409ffa78c48a1f2c7b73fdf9986e..8baff500914ea12f0e0f0310171a9d2f257edb83 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -230,7 +230,6 @@ static void notrace start_secondary(void *unused) cpu_init_exception_handling(); cpu_init(); x86_cpuinit.early_percpu_clock_init(); - preempt_disable(); smp_callin(); enable_start_cpu0 = 0; diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 7692bf7908e6c57765871d884b49d5ba4d840502..2a39a2df6f43e5e6294469eb9ff76ed71d8246d5 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -523,6 +523,37 @@ static enum kernel_gp_hint get_kernel_gp_address(struct pt_regs *regs, #define GPFSTR "general protection fault" +static bool fixup_iopl_exception(struct pt_regs *regs) +{ + struct thread_struct *t = ¤t->thread; + unsigned char byte; + unsigned long ip; + + if (!IS_ENABLED(CONFIG_X86_IOPL_IOPERM) || t->iopl_emul != 3) + return false; + + ip = insn_get_effective_ip(regs); + if (!ip) + return false; + + if (get_user(byte, (const char __user *)ip)) + return false; + + if (byte != 0xfa && byte != 0xfb) + return false; + + if (!t->iopl_warn && printk_ratelimit()) { + pr_err("%s[%d] attempts to use CLI/STI, pretending it's a NOP, ip:%lx", + current->comm, task_pid_nr(current), ip); + print_vma_addr(KERN_CONT " in ", ip); + pr_cont("\n"); + t->iopl_warn = 1; + } + + regs->ip += 1; + return true; +} + DEFINE_IDTENTRY_ERRORCODE(exc_general_protection) { char desc[sizeof(GPFSTR) + 50 + 2*sizeof(unsigned long) + 1] = GPFSTR; @@ -548,6 +579,9 @@ DEFINE_IDTENTRY_ERRORCODE(exc_general_protection) tsk = current; if (user_mode(regs)) { + if (fixup_iopl_exception(regs)) + goto exit; + tsk->thread.error_code = error_code; tsk->thread.trap_nr = X86_TRAP_GP; @@ -617,6 +651,7 @@ static bool do_int3(struct pt_regs *regs) return res == NOTIFY_STOP; } +NOKPROBE_SYMBOL(do_int3); static void do_int3_user(struct pt_regs *regs) { @@ -701,7 +736,7 @@ asmlinkage __visible noinstr struct pt_regs *vc_switch_off_ist(struct pt_regs *r stack = (unsigned long *)sp; if (!get_stack_info_noinstr(stack, current, &info) || info.type == STACK_TYPE_ENTRY || - info.type >= STACK_TYPE_EXCEPTION_LAST) + info.type > STACK_TYPE_EXCEPTION_LAST) sp = __this_cpu_ist_top_va(VC2); sync: diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 56289170753c580c59f96fe1030b0c3a68ebbc8c..13d1a0ac8916a86b2931fe721fd533b294929362 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -1127,6 +1127,7 @@ static int tsc_cs_enable(struct clocksource *cs) static struct clocksource clocksource_tsc_early = { .name = "tsc-early", .rating = 299, + .uncertainty_margin = 32 * NSEC_PER_MSEC, .read = read_tsc, .mask = CLOCKSOURCE_MASK(64), .flags = CLOCK_SOURCE_IS_CONTINUOUS | @@ -1178,6 +1179,12 @@ void mark_tsc_unstable(char *reason) EXPORT_SYMBOL_GPL(mark_tsc_unstable); +static void __init tsc_disable_clocksource_watchdog(void) +{ + clocksource_tsc_early.flags &= ~CLOCK_SOURCE_MUST_VERIFY; + clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY; +} + static void __init check_system_tsc_reliable(void) { #if defined(CONFIG_MGEODEGX1) || defined(CONFIG_MGEODE_LX) || defined(CONFIG_X86_GENERIC) @@ -1194,6 +1201,23 @@ static void __init check_system_tsc_reliable(void) #endif if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) tsc_clocksource_reliable = 1; + + /* + * Disable the clocksource watchdog when the system has: + * - TSC running at constant frequency + * - TSC which does not stop in C-States + * - the TSC_ADJUST register which allows to detect even minimal + * modifications + * - not more than two sockets. As the number of sockets cannot be + * evaluated at the early boot stage where this has to be + * invoked, check the number of online memory nodes as a + * fallback solution which is an reasonable estimate. + */ + if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC) && + boot_cpu_has(X86_FEATURE_NONSTOP_TSC) && + boot_cpu_has(X86_FEATURE_TSC_ADJUST) && + nr_online_nodes <= 2) + tsc_disable_clocksource_watchdog(); } /* @@ -1385,9 +1409,6 @@ static int __init init_tsc_clocksource(void) if (tsc_unstable) goto unreg; - if (tsc_clocksource_reliable || no_tsc_watchdog) - clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY; - if (boot_cpu_has(X86_FEATURE_NONSTOP_TSC_S3)) clocksource_tsc.flags |= CLOCK_SOURCE_SUSPEND_NONSTOP; @@ -1525,7 +1546,7 @@ void __init tsc_init(void) } if (tsc_clocksource_reliable || no_tsc_watchdog) - clocksource_tsc_early.flags &= ~CLOCK_SOURCE_MUST_VERIFY; + tsc_disable_clocksource_watchdog(); clocksource_register_khz(&clocksource_tsc_early, tsc_khz); detect_art(); diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c index 3d3c761eb74a64a535f0d0c516d275e48a0a36b8..923660057f363f6844a365e8ebbf13b46b56e72a 100644 --- a/arch/x86/kernel/tsc_sync.c +++ b/arch/x86/kernel/tsc_sync.c @@ -30,6 +30,7 @@ struct tsc_adjust { }; static DEFINE_PER_CPU(struct tsc_adjust, tsc_adjust); +static struct timer_list tsc_sync_check_timer; /* * TSC's on different sockets may be reset asynchronously. @@ -77,6 +78,46 @@ void tsc_verify_tsc_adjust(bool resume) } } +/* + * Normally the tsc_sync will be checked every time system enters idle + * state, but there is still caveat that a system won't enter idle, + * either because it's too busy or configured purposely to not enter + * idle. + * + * So setup a periodic timer (every 10 minutes) to make sure the check + * is always on. + */ + +#define SYNC_CHECK_INTERVAL (HZ * 600) + +static void tsc_sync_check_timer_fn(struct timer_list *unused) +{ + int next_cpu; + + tsc_verify_tsc_adjust(false); + + /* Run the check for all onlined CPUs in turn */ + next_cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask); + if (next_cpu >= nr_cpu_ids) + next_cpu = cpumask_first(cpu_online_mask); + + tsc_sync_check_timer.expires += SYNC_CHECK_INTERVAL; + add_timer_on(&tsc_sync_check_timer, next_cpu); +} + +static int __init start_sync_check_timer(void) +{ + if (!cpu_feature_enabled(X86_FEATURE_TSC_ADJUST) || tsc_clocksource_reliable) + return 0; + + timer_setup(&tsc_sync_check_timer, tsc_sync_check_timer_fn, 0); + tsc_sync_check_timer.expires = jiffies + SYNC_CHECK_INTERVAL; + add_timer(&tsc_sync_check_timer); + + return 0; +} +late_initcall(start_sync_check_timer); + static void tsc_sanitize_first_cpu(struct tsc_adjust *cur, s64 bootval, unsigned int cpu, bool bootcpu) { diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index bb39f493447cf8c1aa96bd2185e81b0b2f7aab72..328f37e4fd3a723a48b198d56141aa2d99be1dbc 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -1641,11 +1641,13 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *current_vcpu, u64 ingpa, u64 outgpa, all_cpus = send_ipi_ex.vp_set.format == HV_GENERIC_SET_ALL; + if (all_cpus) + goto check_and_send_ipi; + if (!sparse_banks_len) goto ret_success; - if (!all_cpus && - kvm_read_guest(kvm, + if (kvm_read_guest(kvm, ingpa + offsetof(struct hv_send_ipi_ex, vp_set.bank_contents), sparse_banks, @@ -1653,6 +1655,7 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *current_vcpu, u64 ingpa, u64 outgpa, return HV_STATUS_INVALID_HYPERCALL_INPUT; } +check_and_send_ipi: if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR)) return HV_STATUS_INVALID_HYPERCALL_INPUT; diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index ff005fe738a4c1dae5e2b6b4648e1d5f34cfd24c..4e0f52660842b906340816677624d42cf59e50cb 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -96,7 +96,7 @@ static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic, static void rtc_irq_eoi_tracking_reset(struct kvm_ioapic *ioapic) { ioapic->rtc_status.pending_eoi = 0; - bitmap_zero(ioapic->rtc_status.dest_map.map, KVM_MAX_VCPU_ID + 1); + bitmap_zero(ioapic->rtc_status.dest_map.map, KVM_MAX_VCPU_ID); } static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic); @@ -319,8 +319,8 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) unsigned index; bool mask_before, mask_after; union kvm_ioapic_redirect_entry *e; - unsigned long vcpu_bitmap; int old_remote_irr, old_delivery_status, old_dest_id, old_dest_mode; + DECLARE_BITMAP(vcpu_bitmap, KVM_MAX_VCPUS); switch (ioapic->ioregsel) { case IOAPIC_REG_VERSION: @@ -384,9 +384,9 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) irq.shorthand = APIC_DEST_NOSHORT; irq.dest_id = e->fields.dest_id; irq.msi_redir_hint = false; - bitmap_zero(&vcpu_bitmap, 16); + bitmap_zero(vcpu_bitmap, KVM_MAX_VCPUS); kvm_bitmap_or_dest_vcpus(ioapic->kvm, &irq, - &vcpu_bitmap); + vcpu_bitmap); if (old_dest_mode != e->fields.dest_mode || old_dest_id != e->fields.dest_id) { /* @@ -399,10 +399,10 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) kvm_lapic_irq_dest_mode( !!e->fields.dest_mode); kvm_bitmap_or_dest_vcpus(ioapic->kvm, &irq, - &vcpu_bitmap); + vcpu_bitmap); } kvm_make_scan_ioapic_request_mask(ioapic->kvm, - &vcpu_bitmap); + vcpu_bitmap); } else { kvm_make_scan_ioapic_request(ioapic->kvm); } diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h index 11e4065e16176b6fe7cafebc0b78f61e98d1238b..660401700075dbc29012a9f7e3dd29388440e850 100644 --- a/arch/x86/kvm/ioapic.h +++ b/arch/x86/kvm/ioapic.h @@ -43,13 +43,13 @@ struct kvm_vcpu; struct dest_map { /* vcpu bitmap where IRQ has been sent */ - DECLARE_BITMAP(map, KVM_MAX_VCPU_ID + 1); + DECLARE_BITMAP(map, KVM_MAX_VCPU_ID); /* * Vector sent to a given vcpu, only valid when * the vcpu's bit in map is set */ - u8 vectors[KVM_MAX_VCPU_ID + 1]; + u8 vectors[KVM_MAX_VCPU_ID]; }; diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 060d9a906535ca47ad399d0bccad19cb475e4dc0..20d29ae8ed702f21d56e0c4fa43fc18b8a377a9f 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -3545,7 +3545,7 @@ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep) * reserved bit and EPT's invalid memtype/XWR checks to avoid * adding a Jcc in the loop. */ - reserved |= __is_bad_mt_xwr(rsvd_check, sptes[level - 1]) | + reserved |= __is_bad_mt_xwr(rsvd_check, sptes[level - 1]) || __is_rsvd_bits_set(rsvd_check, sptes[level - 1], level); } @@ -3631,12 +3631,23 @@ static void shadow_page_table_clear_flood(struct kvm_vcpu *vcpu, gva_t addr) walk_shadow_page_lockless_end(vcpu); } +static u32 alloc_apf_token(struct kvm_vcpu *vcpu) +{ + /* make sure the token value is not 0 */ + u32 id = vcpu->arch.apf.id; + + if (id << 12 == 0) + vcpu->arch.apf.id = 1; + + return (vcpu->arch.apf.id++ << 12) | vcpu->vcpu_id; +} + static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, gfn_t gfn) { struct kvm_arch_async_pf arch; - arch.token = (vcpu->arch.apf.id++ << 12) | vcpu->vcpu_id; + arch.token = alloc_apf_token(vcpu); arch.gfn = gfn; arch.direct_map = vcpu->arch.mmu->direct_map; arch.cr3 = vcpu->arch.mmu->get_guest_pgd(vcpu); @@ -5152,7 +5163,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_invalidate_gva); void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva) { - kvm_mmu_invalidate_gva(vcpu, vcpu->arch.mmu, gva, INVALID_PAGE); + kvm_mmu_invalidate_gva(vcpu, vcpu->arch.walk_mmu, gva, INVALID_PAGE); ++vcpu->stat.invlpg; } EXPORT_SYMBOL_GPL(kvm_mmu_invlpg); diff --git a/arch/x86/kvm/mmu/page_track.c b/arch/x86/kvm/mmu/page_track.c index 8443a675715b0f6f4fc5ad45c768a718dc9ca5d6..81cf4babbd0b475cddc204267b9ea87503b79e3f 100644 --- a/arch/x86/kvm/mmu/page_track.c +++ b/arch/x86/kvm/mmu/page_track.c @@ -163,13 +163,13 @@ void kvm_page_track_cleanup(struct kvm *kvm) cleanup_srcu_struct(&head->track_srcu); } -void kvm_page_track_init(struct kvm *kvm) +int kvm_page_track_init(struct kvm *kvm) { struct kvm_page_track_notifier_head *head; head = &kvm->arch.track_notifier_head; - init_srcu_struct(&head->track_srcu); INIT_HLIST_HEAD(&head->track_notifier_list); + return init_srcu_struct(&head->track_srcu); } /* diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index f2ddf663e72e92a0f871522dc97e776ac283fc29..7e08efb06839379328323af53ad7b887dbbbc284 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -1130,12 +1130,12 @@ static bool write_protect_gfn(struct kvm *kvm, struct kvm_mmu_page *root, bool spte_set = false; tdp_root_for_each_leaf_pte(iter, root, gfn, gfn + 1) { - if (!is_writable_pte(iter.old_spte)) - break; - new_spte = iter.old_spte & ~(PT_WRITABLE_MASK | SPTE_MMU_WRITEABLE); + if (new_spte == iter.old_spte) + break; + tdp_mmu_set_spte(kvm, &iter, new_spte); spte_set = true; } diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index 67741d2a030851b9424406921755cdad8d30cfc7..2f83b5d948b33c4129dfdc011ba8c9dd7be5f992 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -95,7 +95,7 @@ static void kvm_perf_overflow_intr(struct perf_event *perf_event, } static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type, - unsigned config, bool exclude_user, + u64 config, bool exclude_user, bool exclude_kernel, bool intr, bool in_tx, bool in_tx_cp) { @@ -170,8 +170,8 @@ static bool pmc_resume_counter(struct kvm_pmc *pmc) void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel) { - unsigned config, type = PERF_TYPE_RAW; - u8 event_select, unit_mask; + u64 config; + u32 type = PERF_TYPE_RAW; struct kvm *kvm = pmc->vcpu->kvm; struct kvm_pmu_event_filter *filter; int i; @@ -203,23 +203,18 @@ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel) if (!allow_event) return; - event_select = eventsel & ARCH_PERFMON_EVENTSEL_EVENT; - unit_mask = (eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8; - if (!(eventsel & (ARCH_PERFMON_EVENTSEL_EDGE | ARCH_PERFMON_EVENTSEL_INV | ARCH_PERFMON_EVENTSEL_CMASK | HSW_IN_TX | HSW_IN_TX_CHECKPOINTED))) { - config = kvm_x86_ops.pmu_ops->find_arch_event(pmc_to_pmu(pmc), - event_select, - unit_mask); + config = kvm_x86_ops.pmu_ops->pmc_perf_hw_id(pmc); if (config != PERF_COUNT_HW_MAX) type = PERF_TYPE_HARDWARE; } if (type == PERF_TYPE_RAW) - config = eventsel & X86_RAW_EVENT_MASK; + config = eventsel & AMD64_RAW_EVENT_MASK; if (pmc->current_config == eventsel && pmc_resume_counter(pmc)) return; diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h index 067fef51760c4702c8d0ec5c7ff1d51ae00b5d93..1a44e29e733309d2ba8b2c59f6644076d30e1d53 100644 --- a/arch/x86/kvm/pmu.h +++ b/arch/x86/kvm/pmu.h @@ -24,8 +24,7 @@ struct kvm_event_hw_type_mapping { }; struct kvm_pmu_ops { - unsigned (*find_arch_event)(struct kvm_pmu *pmu, u8 event_select, - u8 unit_mask); + unsigned int (*pmc_perf_hw_id)(struct kvm_pmc *pmc); unsigned (*find_fixed_event)(int idx); bool (*pmc_is_enabled)(struct kvm_pmc *pmc); struct kvm_pmc *(*pmc_idx_to_pmc)(struct kvm_pmu *pmu, int pmc_idx); diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index 8c550999ace0c26066824c59fe3fb8b616659ace..a8b5533cf601d686b21bd8e11dd874ae70f8c1a6 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -344,8 +344,6 @@ int avic_incomplete_ipi_interception(struct vcpu_svm *svm) break; } case AVIC_IPI_FAILURE_INVALID_TARGET: - WARN_ONCE(1, "Invalid IPI target: index=%u, vcpu=%d, icr=%#0x:%#0x\n", - index, svm->vcpu.vcpu_id, icrh, icrl); break; case AVIC_IPI_FAILURE_INVALID_BACKING_PAGE: WARN_ONCE(1, "Invalid backing page\n"); diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index df17146e841fba1a36ff039ccdd41fea1b6456d8..23910e6a3f011ccdf30b90954fbf253061cc65ec 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -447,7 +447,6 @@ static void nested_prepare_vmcb_control(struct vcpu_svm *svm) (svm->nested.ctl.int_ctl & int_ctl_vmcb12_bits) | (svm->nested.hsave->control.int_ctl & int_ctl_vmcb01_bits); - svm->vmcb->control.virt_ext = svm->nested.ctl.virt_ext; svm->vmcb->control.int_vector = svm->nested.ctl.int_vector; svm->vmcb->control.int_state = svm->nested.ctl.int_state; svm->vmcb->control.event_inj = svm->nested.ctl.event_inj; @@ -784,8 +783,10 @@ void svm_free_nested(struct vcpu_svm *svm) /* * Forcibly leave nested mode in order to be able to reset the VCPU later on. */ -void svm_leave_nested(struct vcpu_svm *svm) +void svm_leave_nested(struct kvm_vcpu *vcpu) { + struct vcpu_svm *svm = to_svm(vcpu); + if (is_guest_mode(&svm->vcpu)) { struct vmcb *hsave = svm->nested.hsave; struct vmcb *vmcb = svm->vmcb; @@ -1186,7 +1187,7 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu, return -EINVAL; if (!(kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE)) { - svm_leave_nested(svm); + svm_leave_nested(vcpu); svm_set_gif(svm, !!(kvm_state->flags & KVM_STATE_NESTED_GIF_SET)); return 0; } @@ -1239,6 +1240,9 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu, copy_vmcb_control_area(&hsave->control, &svm->vmcb->control); hsave->save = *save; + if (is_guest_mode(vcpu)) + svm_leave_nested(vcpu); + svm->nested.vmcb12_gpa = kvm_state->hdr.svm.vmcb_pa; load_nested_vmcb_control(svm, ctl); nested_prepare_vmcb_control(svm); @@ -1253,6 +1257,7 @@ out_free: } struct kvm_x86_nested_ops svm_nested_ops = { + .leave_nested = svm_leave_nested, .check_events = svm_check_nested_events, .get_nested_state_pages = svm_get_nested_state_pages, .get_state = svm_get_nested_state, diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c index 035da07500e8bf033ad1fd78675739961b9b8423..4e7093bcb64b62ef9c853dba9013db543b357e45 100644 --- a/arch/x86/kvm/svm/pmu.c +++ b/arch/x86/kvm/svm/pmu.c @@ -126,10 +126,10 @@ static inline struct kvm_pmc *get_gp_pmc_amd(struct kvm_pmu *pmu, u32 msr, return &pmu->gp_counters[msr_to_index(msr)]; } -static unsigned amd_find_arch_event(struct kvm_pmu *pmu, - u8 event_select, - u8 unit_mask) +static unsigned int amd_pmc_perf_hw_id(struct kvm_pmc *pmc) { + u8 event_select = pmc->eventsel & ARCH_PERFMON_EVENTSEL_EVENT; + u8 unit_mask = (pmc->eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8; int i; for (i = 0; i < ARRAY_SIZE(amd_event_mapping); i++) @@ -274,7 +274,7 @@ static void amd_pmu_refresh(struct kvm_vcpu *vcpu) pmu->nr_arch_gp_counters = AMD64_NUM_COUNTERS; pmu->counter_bitmask[KVM_PMC_GP] = ((u64)1 << 48) - 1; - pmu->reserved_bits = 0xffffffff00200000ull; + pmu->reserved_bits = 0xfffffff000280000ull; pmu->version = 1; /* not applicable to AMD; but clean them to prevent any fall out */ pmu->counter_bitmask[KVM_PMC_FIXED] = 0; @@ -312,7 +312,7 @@ static void amd_pmu_reset(struct kvm_vcpu *vcpu) } struct kvm_pmu_ops amd_pmu_ops = { - .find_arch_event = amd_find_arch_event, + .pmc_perf_hw_id = amd_pmc_perf_hw_id, .find_fixed_event = amd_find_fixed_event, .pmc_is_enabled = amd_pmc_is_enabled, .pmc_idx_to_pmc = amd_pmc_idx_to_pmc, diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 1c23aee3778c3fef317e7408a43744bdd3df314c..7773a765f548923888359b18aa92a96abc6cc35b 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -279,7 +279,7 @@ int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) if ((old_efer & EFER_SVME) != (efer & EFER_SVME)) { if (!(efer & EFER_SVME)) { - svm_leave_nested(svm); + svm_leave_nested(vcpu); svm_set_gif(svm, true); /* @@ -1497,6 +1497,8 @@ static void svm_clear_vintr(struct vcpu_svm *svm) (svm->nested.ctl.int_ctl & V_TPR_MASK)); svm->vmcb->control.int_ctl |= svm->nested.ctl.int_ctl & V_IRQ_INJECTION_BITS_MASK; + + svm->vmcb->control.int_vector = svm->nested.ctl.int_vector; } vmcb_mark_dirty(svm->vmcb, VMCB_INTR); @@ -4101,6 +4103,10 @@ static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, void *insn, int i bool smep, smap, is_user; unsigned long cr4; + /* Emulation is always possible when KVM has access to all guest state. */ + if (!sev_guest(vcpu->kvm)) + return true; + /* * Detect and workaround Errata 1096 Fam_17h_00_0Fh. * @@ -4144,23 +4150,27 @@ static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, void *insn, int i if (likely(!insn || insn_len)) return true; - /* - * If RIP is invalid, go ahead with emulation which will cause an - * internal error exit. - */ - if (!kvm_vcpu_gfn_to_memslot(vcpu, kvm_rip_read(vcpu) >> PAGE_SHIFT)) - return true; - cr4 = kvm_read_cr4(vcpu); smep = cr4 & X86_CR4_SMEP; smap = cr4 & X86_CR4_SMAP; is_user = svm_get_cpl(vcpu) == 3; if (smap && (!smep || is_user)) { - if (!sev_guest(vcpu->kvm)) - return true; - pr_err_ratelimited("KVM: SEV Guest triggered AMD Erratum 1096\n"); - kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); + + /* + * If the fault occurred in userspace, arbitrarily inject #GP + * to avoid killing the guest and to hopefully avoid confusing + * the guest kernel too much, e.g. injecting #PF would not be + * coherent with respect to the guest's page tables. Request + * triple fault if the fault occurred in the kernel as there's + * no fault that KVM can inject without confusing the guest. + * In practice, the triple fault is moot as no sane SEV kernel + * will execute from user memory while also running with SMAP=1. + */ + if (is_user) + kvm_inject_gp(vcpu, 0); + else + kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); } return false; diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index be74e22b82ea7332d80670f355c94cd8b1a7ccc5..2c007241fbf5397054c00aa9f17c5da79f4cf0fc 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -393,7 +393,7 @@ static inline bool nested_exit_on_nmi(struct vcpu_svm *svm) int enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa, struct vmcb *nested_vmcb); -void svm_leave_nested(struct vcpu_svm *svm); +void svm_leave_nested(struct kvm_vcpu *vcpu); void svm_free_nested(struct vcpu_svm *svm); int svm_allocate_nested(struct vcpu_svm *svm); int nested_svm_vmrun(struct vcpu_svm *svm); diff --git a/arch/x86/kvm/vmx/evmcs.c b/arch/x86/kvm/vmx/evmcs.c index f3199bb02f22dfd9b8338d7d3465842fb3f7718e..5b68034ec5f9c336d628ad9036b6589ed5970550 100644 --- a/arch/x86/kvm/vmx/evmcs.c +++ b/arch/x86/kvm/vmx/evmcs.c @@ -352,14 +352,21 @@ void nested_evmcs_filter_control_msr(u32 msr_index, u64 *pdata) switch (msr_index) { case MSR_IA32_VMX_EXIT_CTLS: case MSR_IA32_VMX_TRUE_EXIT_CTLS: - ctl_high &= ~VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL; + ctl_high &= ~EVMCS1_UNSUPPORTED_VMEXIT_CTRL; break; case MSR_IA32_VMX_ENTRY_CTLS: case MSR_IA32_VMX_TRUE_ENTRY_CTLS: - ctl_high &= ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL; + ctl_high &= ~EVMCS1_UNSUPPORTED_VMENTRY_CTRL; break; case MSR_IA32_VMX_PROCBASED_CTLS2: - ctl_high &= ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; + ctl_high &= ~EVMCS1_UNSUPPORTED_2NDEXEC; + break; + case MSR_IA32_VMX_TRUE_PINBASED_CTLS: + case MSR_IA32_VMX_PINBASED_CTLS: + ctl_high &= ~EVMCS1_UNSUPPORTED_PINCTRL; + break; + case MSR_IA32_VMX_VMFUNC: + ctl_low &= ~EVMCS1_UNSUPPORTED_VMFUNC; break; } diff --git a/arch/x86/kvm/vmx/evmcs.h b/arch/x86/kvm/vmx/evmcs.h index bd41d9462355fd65909554b41b3ce49654ef3e4d..011929a63823084e21e2dc5575e1567bbc478ee7 100644 --- a/arch/x86/kvm/vmx/evmcs.h +++ b/arch/x86/kvm/vmx/evmcs.h @@ -59,7 +59,9 @@ DECLARE_STATIC_KEY_FALSE(enable_evmcs); SECONDARY_EXEC_SHADOW_VMCS | \ SECONDARY_EXEC_TSC_SCALING | \ SECONDARY_EXEC_PAUSE_LOOP_EXITING) -#define EVMCS1_UNSUPPORTED_VMEXIT_CTRL (VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) +#define EVMCS1_UNSUPPORTED_VMEXIT_CTRL \ + (VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | \ + VM_EXIT_SAVE_VMX_PREEMPTION_TIMER) #define EVMCS1_UNSUPPORTED_VMENTRY_CTRL (VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) #define EVMCS1_UNSUPPORTED_VMFUNC (VMX_VMFUNC_EPTP_SWITCHING) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index d5f24a2f3e9163511fea1892aeb2fbfb23abceac..0c2389d0fdafe23c9e90cf4e05e893e100739f8e 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -2619,8 +2619,10 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) && WARN_ON_ONCE(kvm_set_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL, - vmcs12->guest_ia32_perf_global_ctrl))) + vmcs12->guest_ia32_perf_global_ctrl))) { + *entry_failure_code = ENTRY_FAIL_DEFAULT; return -EINVAL; + } kvm_rsp_write(vcpu, vmcs12->guest_rsp); kvm_rip_write(vcpu, vmcs12->guest_rip); @@ -2851,6 +2853,17 @@ static int nested_vmx_check_controls(struct kvm_vcpu *vcpu, return 0; } +static int nested_vmx_check_address_space_size(struct kvm_vcpu *vcpu, + struct vmcs12 *vmcs12) +{ +#ifdef CONFIG_X86_64 + if (CC(!!(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE) != + !!(vcpu->arch.efer & EFER_LMA))) + return -EINVAL; +#endif + return 0; +} + static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) { @@ -2875,18 +2888,16 @@ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu, return -EINVAL; #ifdef CONFIG_X86_64 - ia32e = !!(vcpu->arch.efer & EFER_LMA); + ia32e = !!(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE); #else ia32e = false; #endif if (ia32e) { - if (CC(!(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE)) || - CC(!(vmcs12->host_cr4 & X86_CR4_PAE))) + if (CC(!(vmcs12->host_cr4 & X86_CR4_PAE))) return -EINVAL; } else { - if (CC(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE) || - CC(vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) || + if (CC(vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) || CC(vmcs12->host_cr4 & X86_CR4_PCIDE) || CC((vmcs12->host_rip) >> 32)) return -EINVAL; @@ -3555,6 +3566,9 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) if (nested_vmx_check_controls(vcpu, vmcs12)) return nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); + if (nested_vmx_check_address_space_size(vcpu, vmcs12)) + return nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_HOST_STATE_FIELD); + if (nested_vmx_check_host_state(vcpu, vmcs12)) return nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_HOST_STATE_FIELD); @@ -6614,6 +6628,7 @@ __init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *)) } struct kvm_x86_nested_ops vmx_nested_ops = { + .leave_nested = vmx_leave_nested, .check_events = vmx_check_nested_events, .hv_timer_pending = nested_vmx_preemption_timer_pending, .get_state = vmx_get_nested_state, diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index cdf5f34518f43b4c2e46bf123a489cf6cf002461..bd70c1d7f3458594e8496ebd1330b666fb7c9d62 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -68,10 +68,11 @@ static void global_ctrl_changed(struct kvm_pmu *pmu, u64 data) reprogram_counter(pmu, bit); } -static unsigned intel_find_arch_event(struct kvm_pmu *pmu, - u8 event_select, - u8 unit_mask) +static unsigned int intel_pmc_perf_hw_id(struct kvm_pmc *pmc) { + struct kvm_pmu *pmu = pmc_to_pmu(pmc); + u8 event_select = pmc->eventsel & ARCH_PERFMON_EVENTSEL_EVENT; + u8 unit_mask = (pmc->eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8; int i; for (i = 0; i < ARRAY_SIZE(intel_arch_events); i++) @@ -432,7 +433,7 @@ static void intel_pmu_reset(struct kvm_vcpu *vcpu) } struct kvm_pmu_ops intel_pmu_ops = { - .find_arch_event = intel_find_arch_event, + .pmc_perf_hw_id = intel_pmc_perf_hw_id, .find_fixed_event = intel_find_fixed_event, .pmc_is_enabled = intel_pmc_is_enabled, .pmc_idx_to_pmc = intel_pmc_idx_to_pmc, diff --git a/arch/x86/kvm/vmx/posted_intr.c b/arch/x86/kvm/vmx/posted_intr.c index f02962dcc72cb34ac274030d1aaeafe11dcffca6..5f8acd2faa7c1ea4848b257a5564e54376fe25ec 100644 --- a/arch/x86/kvm/vmx/posted_intr.c +++ b/arch/x86/kvm/vmx/posted_intr.c @@ -5,6 +5,7 @@ #include #include "lapic.h" +#include "irq.h" #include "posted_intr.h" #include "trace.h" #include "vmx.h" @@ -14,7 +15,7 @@ * can find which vCPU should be waken up. */ static DEFINE_PER_CPU(struct list_head, blocked_vcpu_on_cpu); -static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock); +static DEFINE_PER_CPU(raw_spinlock_t, blocked_vcpu_on_cpu_lock); static inline struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu) { @@ -77,13 +78,18 @@ after_clear_sn: pi_set_on(pi_desc); } +static bool vmx_can_use_vtd_pi(struct kvm *kvm) +{ + return irqchip_in_kernel(kvm) && enable_apicv && + kvm_arch_has_assigned_device(kvm) && + irq_remapping_cap(IRQ_POSTING_CAP); +} + void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu) { struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); - if (!kvm_arch_has_assigned_device(vcpu->kvm) || - !irq_remapping_cap(IRQ_POSTING_CAP) || - !kvm_vcpu_apicv_active(vcpu)) + if (!vmx_can_use_vtd_pi(vcpu->kvm)) return; /* Set SN when the vCPU is preempted */ @@ -115,9 +121,9 @@ static void __pi_post_block(struct kvm_vcpu *vcpu) new.control) != old.control); if (!WARN_ON_ONCE(vcpu->pre_pcpu == -1)) { - spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); + raw_spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); list_del(&vcpu->blocked_vcpu_list); - spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); + raw_spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); vcpu->pre_pcpu = -1; } } @@ -141,20 +147,18 @@ int pi_pre_block(struct kvm_vcpu *vcpu) struct pi_desc old, new; struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); - if (!kvm_arch_has_assigned_device(vcpu->kvm) || - !irq_remapping_cap(IRQ_POSTING_CAP) || - !kvm_vcpu_apicv_active(vcpu)) + if (!vmx_can_use_vtd_pi(vcpu->kvm)) return 0; WARN_ON(irqs_disabled()); local_irq_disable(); if (!WARN_ON_ONCE(vcpu->pre_pcpu != -1)) { vcpu->pre_pcpu = vcpu->cpu; - spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); + raw_spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); list_add_tail(&vcpu->blocked_vcpu_list, &per_cpu(blocked_vcpu_on_cpu, vcpu->pre_pcpu)); - spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); + raw_spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu)); } do { @@ -211,7 +215,7 @@ void pi_wakeup_handler(void) struct kvm_vcpu *vcpu; int cpu = smp_processor_id(); - spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu)); + raw_spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu)); list_for_each_entry(vcpu, &per_cpu(blocked_vcpu_on_cpu, cpu), blocked_vcpu_list) { struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); @@ -219,13 +223,13 @@ void pi_wakeup_handler(void) if (pi_test_on(pi_desc) == 1) kvm_vcpu_kick(vcpu); } - spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu)); + raw_spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu)); } void __init pi_init_cpu(int cpu) { INIT_LIST_HEAD(&per_cpu(blocked_vcpu_on_cpu, cpu)); - spin_lock_init(&per_cpu(blocked_vcpu_on_cpu_lock, cpu)); + raw_spin_lock_init(&per_cpu(blocked_vcpu_on_cpu_lock, cpu)); } bool pi_has_pending_interrupt(struct kvm_vcpu *vcpu) @@ -256,9 +260,7 @@ int pi_update_irte(struct kvm *kvm, unsigned int host_irq, uint32_t guest_irq, struct vcpu_data vcpu_info; int idx, ret = 0; - if (!kvm_arch_has_assigned_device(kvm) || - !irq_remapping_cap(IRQ_POSTING_CAP) || - !kvm_vcpu_apicv_active(kvm->vcpus[0])) + if (!vmx_can_use_vtd_pi(kvm)) return 0; idx = srcu_read_lock(&kvm->irq_srcu); diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index fcd8bcb7e0ea9b74a9a9dbd55b73942dc89613fc..94f5f2129e3b4d0314f4b94955650a70e332d321 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -859,15 +859,15 @@ void update_exception_bitmap(struct kvm_vcpu *vcpu) /* * Check if MSR is intercepted for currently loaded MSR bitmap. */ -static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr) +static bool msr_write_intercepted(struct vcpu_vmx *vmx, u32 msr) { unsigned long *msr_bitmap; int f = sizeof(unsigned long); - if (!cpu_has_vmx_msr_bitmap()) + if (!(exec_controls_get(vmx) & CPU_BASED_USE_MSR_BITMAPS)) return true; - msr_bitmap = to_vmx(vcpu)->loaded_vmcs->msr_bitmap; + msr_bitmap = vmx->loaded_vmcs->msr_bitmap; if (msr <= 0x1fff) { return !!test_bit(msr, msr_bitmap + 0x800 / f); @@ -1867,10 +1867,11 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) &msr_info->data)) return 1; /* - * Enlightened VMCS v1 doesn't have certain fields, but buggy - * Hyper-V versions are still trying to use corresponding - * features when they are exposed. Filter out the essential - * minimum. + * Enlightened VMCS v1 doesn't have certain VMCS fields but + * instead of just ignoring the features, different Hyper-V + * versions are either trying to use them and fail or do some + * sanity checking and refuse to boot. Filter all unsupported + * features out. */ if (!msr_info->host_initiated && vmx->nested.enlightened_vmcs_enabled) @@ -2907,6 +2908,13 @@ static void vmx_flush_tlb_all(struct kvm_vcpu *vcpu) } } +static inline int vmx_get_current_vpid(struct kvm_vcpu *vcpu) +{ + if (is_guest_mode(vcpu)) + return nested_get_vpid02(vcpu); + return to_vmx(vcpu)->vpid; +} + static void vmx_flush_tlb_current(struct kvm_vcpu *vcpu) { struct kvm_mmu *mmu = vcpu->arch.mmu; @@ -2919,31 +2927,29 @@ static void vmx_flush_tlb_current(struct kvm_vcpu *vcpu) if (enable_ept) ept_sync_context(construct_eptp(vcpu, root_hpa, mmu->shadow_root_level)); - else if (!is_guest_mode(vcpu)) - vpid_sync_context(to_vmx(vcpu)->vpid); else - vpid_sync_context(nested_get_vpid02(vcpu)); + vpid_sync_context(vmx_get_current_vpid(vcpu)); } static void vmx_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t addr) { /* - * vpid_sync_vcpu_addr() is a nop if vmx->vpid==0, see the comment in + * vpid_sync_vcpu_addr() is a nop if vpid==0, see the comment in * vmx_flush_tlb_guest() for an explanation of why this is ok. */ - vpid_sync_vcpu_addr(to_vmx(vcpu)->vpid, addr); + vpid_sync_vcpu_addr(vmx_get_current_vpid(vcpu), addr); } static void vmx_flush_tlb_guest(struct kvm_vcpu *vcpu) { /* - * vpid_sync_context() is a nop if vmx->vpid==0, e.g. if enable_vpid==0 - * or a vpid couldn't be allocated for this vCPU. VM-Enter and VM-Exit - * are required to flush GVA->{G,H}PA mappings from the TLB if vpid is + * vpid_sync_context() is a nop if vpid==0, e.g. if enable_vpid==0 or a + * vpid couldn't be allocated for this vCPU. VM-Enter and VM-Exit are + * required to flush GVA->{G,H}PA mappings from the TLB if vpid is * disabled (VM-Enter with vpid enabled and vpid==0 is disallowed), * i.e. no explicit INVVPID is necessary. */ - vpid_sync_context(to_vmx(vcpu)->vpid); + vpid_sync_context(vmx_get_current_vpid(vcpu)); } void vmx_ept_load_pdptrs(struct kvm_vcpu *vcpu) @@ -4001,8 +4007,7 @@ static int vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector) if (pi_test_and_set_on(&vmx->pi_desc)) return 0; - if (vcpu != kvm_get_running_vcpu() && - !kvm_vcpu_trigger_posted_interrupt(vcpu, false)) + if (!kvm_vcpu_trigger_posted_interrupt(vcpu, false)) kvm_vcpu_kick(vcpu); return 0; @@ -4841,8 +4846,33 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu) dr6 = vmx_get_exit_qual(vcpu); if (!(vcpu->guest_debug & (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) { + /* + * If the #DB was due to ICEBP, a.k.a. INT1, skip the + * instruction. ICEBP generates a trap-like #DB, but + * despite its interception control being tied to #DB, + * is an instruction intercept, i.e. the VM-Exit occurs + * on the ICEBP itself. Note, skipping ICEBP also + * clears STI and MOVSS blocking. + * + * For all other #DBs, set vmcs.PENDING_DBG_EXCEPTIONS.BS + * if single-step is enabled in RFLAGS and STI or MOVSS + * blocking is active, as the CPU doesn't set the bit + * on VM-Exit due to #DB interception. VM-Entry has a + * consistency check that a single-step #DB is pending + * in this scenario as the previous instruction cannot + * have toggled RFLAGS.TF 0=>1 (because STI and POP/MOV + * don't modify RFLAGS), therefore the one instruction + * delay when activating single-step breakpoints must + * have already expired. Note, the CPU sets/clears BS + * as appropriate for all other VM-Exits types. + */ if (is_icebp(intr_info)) WARN_ON(!skip_emulated_instruction(vcpu)); + else if ((vmx_get_rflags(vcpu) & X86_EFLAGS_TF) && + (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & + (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS))) + vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, + vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS) | DR6_BS); kvm_queue_exception_p(vcpu, DB_VECTOR, dr6); return 1; @@ -6315,18 +6345,13 @@ static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu) /* * If we are running L2 and L1 has a new pending interrupt - * which can be injected, we should re-evaluate - * what should be done with this new L1 interrupt. - * If L1 intercepts external-interrupts, we should - * exit from L2 to L1. Otherwise, interrupt should be - * delivered directly to L2. + * which can be injected, this may cause a vmexit or it may + * be injected into L2. Either way, this interrupt will be + * processed via KVM_REQ_EVENT, not RVI, because we do not use + * virtual interrupt delivery to inject L1 interrupts into L2. */ - if (is_guest_mode(vcpu) && max_irr_updated) { - if (nested_exit_on_intr(vcpu)) - kvm_vcpu_exiting_guest_mode(vcpu); - else - kvm_make_request(KVM_REQ_EVENT, vcpu); - } + if (is_guest_mode(vcpu) && max_irr_updated) + kvm_make_request(KVM_REQ_EVENT, vcpu); } else { max_irr = kvm_lapic_find_highest_irr(vcpu); } @@ -6748,7 +6773,7 @@ reenter_guest: * If the L02 MSR bitmap does not intercept the MSR, then we need to * save it. */ - if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))) + if (unlikely(!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL))) vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); x86_spec_ctrl_restore_host(vmx->spec_ctrl, 0); @@ -7590,6 +7615,8 @@ static void vmx_migrate_timers(struct kvm_vcpu *vcpu) static void hardware_unsetup(void) { + kvm_set_posted_intr_wakeup_handler(NULL); + if (nested) nested_vmx_hardware_unsetup(); @@ -7881,8 +7908,6 @@ static __init int hardware_setup(void) vmx_x86_ops.request_immediate_exit = __kvm_request_immediate_exit; } - kvm_set_posted_intr_wakeup_handler(pi_wakeup_handler); - kvm_mce_cap_supported |= MCG_LMCE_P; if (pt_mode != PT_MODE_SYSTEM && pt_mode != PT_MODE_HOST_GUEST) @@ -7904,6 +7929,9 @@ static __init int hardware_setup(void) r = alloc_kvm_area(); if (r) nested_vmx_hardware_unsetup(); + + kvm_set_posted_intr_wakeup_handler(pi_wakeup_handler); + return r; } @@ -7912,6 +7940,7 @@ static struct kvm_x86_init_ops vmx_init_ops __initdata = { .disabled_by_bios = vmx_disabled_by_bios, .check_processor_compatibility = vmx_check_processor_compat, .hardware_setup = hardware_setup, + .intel_pt_intr_in_guest = vmx_pt_mode_is_host_guest, .runtime_ops = &vmx_x86_ops, }; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 75c59ad27e9fdb1fe5f23e331455c6690b466d0b..a5d6d79b023bc1a1be066f86a1944ed0c709ed2b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1229,7 +1229,7 @@ static const u32 msrs_to_save_all[] = { MSR_IA32_UMWAIT_CONTROL, MSR_ARCH_PERFMON_FIXED_CTR0, MSR_ARCH_PERFMON_FIXED_CTR1, - MSR_ARCH_PERFMON_FIXED_CTR0 + 2, MSR_ARCH_PERFMON_FIXED_CTR0 + 3, + MSR_ARCH_PERFMON_FIXED_CTR0 + 2, MSR_CORE_PERF_FIXED_CTR_CTRL, MSR_CORE_PERF_GLOBAL_STATUS, MSR_CORE_PERF_GLOBAL_CTRL, MSR_CORE_PERF_GLOBAL_OVF_CTRL, MSR_ARCH_PERFMON_PERFCTR0, MSR_ARCH_PERFMON_PERFCTR1, @@ -1250,6 +1250,13 @@ static const u32 msrs_to_save_all[] = { MSR_ARCH_PERFMON_EVENTSEL0 + 12, MSR_ARCH_PERFMON_EVENTSEL0 + 13, MSR_ARCH_PERFMON_EVENTSEL0 + 14, MSR_ARCH_PERFMON_EVENTSEL0 + 15, MSR_ARCH_PERFMON_EVENTSEL0 + 16, MSR_ARCH_PERFMON_EVENTSEL0 + 17, + + MSR_K7_EVNTSEL0, MSR_K7_EVNTSEL1, MSR_K7_EVNTSEL2, MSR_K7_EVNTSEL3, + MSR_K7_PERFCTR0, MSR_K7_PERFCTR1, MSR_K7_PERFCTR2, MSR_K7_PERFCTR3, + MSR_F15H_PERF_CTL0, MSR_F15H_PERF_CTL1, MSR_F15H_PERF_CTL2, + MSR_F15H_PERF_CTL3, MSR_F15H_PERF_CTL4, MSR_F15H_PERF_CTL5, + MSR_F15H_PERF_CTR0, MSR_F15H_PERF_CTR1, MSR_F15H_PERF_CTR2, + MSR_F15H_PERF_CTR3, MSR_F15H_PERF_CTR4, MSR_F15H_PERF_CTR5, }; static u32 msrs_to_save[ARRAY_SIZE(msrs_to_save_all)]; @@ -3058,7 +3065,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (!msr_info->host_initiated) return 1; - if (guest_cpuid_has(vcpu, X86_FEATURE_PDCM) && kvm_get_msr_feature(&msr_ent)) + if (kvm_get_msr_feature(&msr_ent)) return 1; if (data & ~msr_ent.data) return 1; @@ -3164,6 +3171,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (data & ~supported_xss) return 1; vcpu->arch.ia32_xss = data; + kvm_update_cpuid_runtime(vcpu); break; case MSR_SMI_COUNT: if (!msr_info->host_initiated) @@ -4383,6 +4391,8 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, vcpu->arch.hflags |= HF_SMM_MASK; else vcpu->arch.hflags &= ~HF_SMM_MASK; + + kvm_x86_ops.nested_ops->leave_nested(vcpu); kvm_smm_changed(vcpu); } @@ -7875,7 +7885,7 @@ static struct perf_guest_info_callbacks kvm_guest_cbs = { .is_in_guest = kvm_is_in_guest, .is_user_mode = kvm_is_user_mode, .get_guest_ip = kvm_get_guest_ip, - .handle_intel_pt_intr = kvm_handle_intel_pt_intr, + .handle_intel_pt_intr = NULL, }; #ifdef CONFIG_X86_64 @@ -7998,6 +8008,8 @@ int kvm_arch_init(void *opaque) PT_PRESENT_MASK, 0, sme_me_mask); kvm_timer_init(); + if (ops->intel_pt_intr_in_guest && ops->intel_pt_intr_in_guest()) + kvm_guest_cbs.handle_intel_pt_intr = kvm_handle_intel_pt_intr; perf_register_guest_info_callbacks(&kvm_guest_cbs); if (boot_cpu_has(X86_FEATURE_XSAVE)) { @@ -8035,6 +8047,7 @@ void kvm_arch_exit(void) #endif kvm_lapic_exit(); perf_unregister_guest_info_callbacks(&kvm_guest_cbs); + kvm_guest_cbs.handle_intel_pt_intr = NULL; if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block, @@ -10392,9 +10405,15 @@ void kvm_arch_free_vm(struct kvm *kvm) int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) { + int ret; + if (type) return -EINVAL; + ret = kvm_page_track_init(kvm); + if (ret) + return ret; + INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list); INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages); @@ -10421,7 +10440,6 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) INIT_DELAYED_WORK(&kvm->arch.kvmclock_sync_work, kvmclock_sync_fn); kvm_hv_init_vm(kvm); - kvm_page_track_init(kvm); kvm_mmu_init_vm(kvm); return kvm_x86_ops.vm_init(kvm); diff --git a/arch/x86/lib/insn-eval.c b/arch/x86/lib/insn-eval.c index bb0b3fe1e0a0251c716ec2568576d8e0ceb13900..c6a19c88af547637f312c0abad8afe3b4dbd32b9 100644 --- a/arch/x86/lib/insn-eval.c +++ b/arch/x86/lib/insn-eval.c @@ -1415,7 +1415,7 @@ void __user *insn_get_addr_ref(struct insn *insn, struct pt_regs *regs) } } -static unsigned long insn_get_effective_ip(struct pt_regs *regs) +unsigned long insn_get_effective_ip(struct pt_regs *regs) { unsigned long seg_base = 0; diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c index f5e1e60c9095fffb8210e39aefc58a5807252026..6c2f1b76a0b61028548e33f215b1eb548ad8bf01 100644 --- a/arch/x86/mm/cpu_entry_area.c +++ b/arch/x86/mm/cpu_entry_area.c @@ -110,6 +110,13 @@ static void __init percpu_setup_exception_stacks(unsigned int cpu) cea_map_stack(NMI); cea_map_stack(DB); cea_map_stack(MCE); + + if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT)) { + if (cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) { + cea_map_stack(VC); + cea_map_stack(VC2); + } + } } #else static inline void percpu_setup_exception_stacks(unsigned int cpu) diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index c7a47603537f27e85f634122b756015f17f2444e..155c95dc17725676b664cf71d84f32c6f697615b 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -102,6 +102,12 @@ static unsigned long min_pfn_mapped; static bool __initdata can_use_brk_pgt = true; +/* + * Provide a run-time mean of disabling ZONE_DMA32 if it is enabled via + * CONFIG_ZONE_DMA32. + */ +static bool disable_dma32 __ro_after_init; + /* * Pages returned are already directly mapped. * @@ -996,7 +1002,7 @@ void __init zone_sizes_init(void) max_zone_pfns[ZONE_DMA] = min(MAX_DMA_PFN, max_low_pfn); #endif #ifdef CONFIG_ZONE_DMA32 - max_zone_pfns[ZONE_DMA32] = min(MAX_DMA32_PFN, max_low_pfn); + max_zone_pfns[ZONE_DMA32] = disable_dma32 ? 0 : min(MAX_DMA32_PFN, max_low_pfn); #endif max_zone_pfns[ZONE_NORMAL] = max_low_pfn; #ifdef CONFIG_HIGHMEM @@ -1006,6 +1012,18 @@ void __init zone_sizes_init(void) free_area_init(max_zone_pfns); } +static int __init early_disable_dma32(char *buf) +{ + if (!buf) + return -EINVAL; + + if (!strcmp(buf, "on")) + disable_dma32 = true; + + return 0; +} +early_param("disable_dma32", early_disable_dma32); + __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = { .loaded_mm = &init_mm, .next_asid = 1, diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 356b746dfbe7adb3b654784ebce4240a2587cdcc..91e61dbba3e0c571da67779b40cb9415e78093d9 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -633,6 +633,7 @@ static bool memremap_is_efi_data(resource_size_t phys_addr, static bool memremap_is_setup_data(resource_size_t phys_addr, unsigned long size) { + struct setup_indirect *indirect; struct setup_data *data; u64 paddr, paddr_next; @@ -645,6 +646,10 @@ static bool memremap_is_setup_data(resource_size_t phys_addr, data = memremap(paddr, sizeof(*data), MEMREMAP_WB | MEMREMAP_DEC); + if (!data) { + pr_warn("failed to memremap setup_data entry\n"); + return false; + } paddr_next = data->next; len = data->len; @@ -654,10 +659,21 @@ static bool memremap_is_setup_data(resource_size_t phys_addr, return true; } - if (data->type == SETUP_INDIRECT && - ((struct setup_indirect *)data->data)->type != SETUP_INDIRECT) { - paddr = ((struct setup_indirect *)data->data)->addr; - len = ((struct setup_indirect *)data->data)->len; + if (data->type == SETUP_INDIRECT) { + memunmap(data); + data = memremap(paddr, sizeof(*data) + len, + MEMREMAP_WB | MEMREMAP_DEC); + if (!data) { + pr_warn("failed to memremap indirect setup_data\n"); + return false; + } + + indirect = (struct setup_indirect *)data->data; + + if (indirect->type != SETUP_INDIRECT) { + paddr = indirect->addr; + len = indirect->len; + } } memunmap(data); @@ -678,22 +694,51 @@ static bool memremap_is_setup_data(resource_size_t phys_addr, static bool __init early_memremap_is_setup_data(resource_size_t phys_addr, unsigned long size) { + struct setup_indirect *indirect; struct setup_data *data; u64 paddr, paddr_next; paddr = boot_params.hdr.setup_data; while (paddr) { - unsigned int len; + unsigned int len, size; if (phys_addr == paddr) return true; data = early_memremap_decrypted(paddr, sizeof(*data)); + if (!data) { + pr_warn("failed to early memremap setup_data entry\n"); + return false; + } + + size = sizeof(*data); paddr_next = data->next; len = data->len; - early_memunmap(data, sizeof(*data)); + if ((phys_addr > paddr) && (phys_addr < (paddr + len))) { + early_memunmap(data, sizeof(*data)); + return true; + } + + if (data->type == SETUP_INDIRECT) { + size += len; + early_memunmap(data, sizeof(*data)); + data = early_memremap_decrypted(paddr, size); + if (!data) { + pr_warn("failed to early memremap indirect setup_data\n"); + return false; + } + + indirect = (struct setup_indirect *)data->data; + + if (indirect->type != SETUP_INDIRECT) { + paddr = indirect->addr; + len = indirect->len; + } + } + + early_memunmap(data, size); if ((phys_addr > paddr) && (phys_addr < (paddr + len))) return true; diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c index 868fd69814bfff1b51a3cee07fe116749085f7b3..daffe92aaedbeeed4c6bff60410b943e9911e736 100644 --- a/arch/x86/mm/mem_encrypt.c +++ b/arch/x86/mm/mem_encrypt.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c index 65f599e9075bc358dfbba84bfb96dddd045d01a5..011e042b47ba7daecf56c5360cf1ef69b0698a36 100644 --- a/arch/x86/mm/mem_encrypt_identity.c +++ b/arch/x86/mm/mem_encrypt_identity.c @@ -27,6 +27,15 @@ #undef CONFIG_PARAVIRT_XXL #undef CONFIG_PARAVIRT_SPINLOCKS +/* + * This code runs before CPU feature bits are set. By default, the + * pgtable_l5_enabled() function uses bit X86_FEATURE_LA57 to determine if + * 5-level paging is active, so that won't work here. USE_EARLY_PGTABLE_L5 + * is provided to handle this situation and, instead, use a variable that + * has been set by the early boot code. + */ +#define USE_EARLY_PGTABLE_L5 + #include #include #include diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index f6a9e2e3664259e56844c54385e3f97409900b07..fd0f1cb6fc6c974a11675fd6c6d87ff764641859 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -550,7 +550,7 @@ int ptep_test_and_clear_young(struct vm_area_struct *vma, return ret; } -#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) int pmdp_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmdp) { @@ -562,6 +562,9 @@ int pmdp_test_and_clear_young(struct vm_area_struct *vma, return ret; } +#endif + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE int pudp_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pud_t *pudp) { diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 0a962cd6bac18f4cac092c8b2d667bba90b2b5b6..a0a7ead52698caaf4e2e654ead81ef23e06dad16 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1547,7 +1547,7 @@ static void restore_regs(const struct btf_func_model *m, u8 **prog, int nr_args, } static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog, - struct bpf_prog *p, int stack_size, bool mod_ret) + struct bpf_prog *p, int stack_size, bool save_ret) { u8 *prog = *pprog; int cnt = 0; @@ -1573,11 +1573,15 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog, if (emit_call(&prog, p->bpf_func, prog)) return -EINVAL; - /* BPF_TRAMP_MODIFY_RETURN trampolines can modify the return + /* + * BPF_TRAMP_MODIFY_RETURN trampolines can modify the return * of the previous call which is then passed on the stack to * the next BPF program. + * + * BPF_TRAMP_FENTRY trampoline may need to return the return + * value of BPF_PROG_TYPE_STRUCT_OPS prog. */ - if (mod_ret) + if (save_ret) emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8); if (p->aux->sleepable) { @@ -1645,13 +1649,15 @@ static int emit_cond_near_jump(u8 **pprog, void *func, void *ip, u8 jmp_cond) } static int invoke_bpf(const struct btf_func_model *m, u8 **pprog, - struct bpf_tramp_progs *tp, int stack_size) + struct bpf_tramp_progs *tp, int stack_size, + bool save_ret) { int i; u8 *prog = *pprog; for (i = 0; i < tp->nr_progs; i++) { - if (invoke_bpf_prog(m, &prog, tp->progs[i], stack_size, false)) + if (invoke_bpf_prog(m, &prog, tp->progs[i], stack_size, + save_ret)) return -EINVAL; } *pprog = prog; @@ -1694,6 +1700,23 @@ static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog, return 0; } +static bool is_valid_bpf_tramp_flags(unsigned int flags) +{ + if ((flags & BPF_TRAMP_F_RESTORE_REGS) && + (flags & BPF_TRAMP_F_SKIP_FRAME)) + return false; + + /* + * BPF_TRAMP_F_RET_FENTRY_RET is only used by bpf_struct_ops, + * and it must be used alone. + */ + if ((flags & BPF_TRAMP_F_RET_FENTRY_RET) && + (flags & ~BPF_TRAMP_F_RET_FENTRY_RET)) + return false; + + return true; +} + /* Example: * __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev); * its 'struct btf_func_model' will be nr_args=2 @@ -1766,17 +1789,19 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i struct bpf_tramp_progs *fmod_ret = &tprogs[BPF_TRAMP_MODIFY_RETURN]; u8 **branches = NULL; u8 *prog; + bool save_ret; /* x86-64 supports up to 6 arguments. 7+ can be added in the future */ if (nr_args > 6) return -ENOTSUPP; - if ((flags & BPF_TRAMP_F_RESTORE_REGS) && - (flags & BPF_TRAMP_F_SKIP_FRAME)) + if (!is_valid_bpf_tramp_flags(flags)) return -EINVAL; - if (flags & BPF_TRAMP_F_CALL_ORIG) - stack_size += 8; /* room for return value of orig_call */ + /* room for return value of orig_call or fentry prog */ + save_ret = flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET); + if (save_ret) + stack_size += 8; if (flags & BPF_TRAMP_F_SKIP_FRAME) /* skip patched call instruction and point orig_call to actual @@ -1803,7 +1828,8 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i } if (fentry->nr_progs) - if (invoke_bpf(m, &prog, fentry, stack_size)) + if (invoke_bpf(m, &prog, fentry, stack_size, + flags & BPF_TRAMP_F_RET_FENTRY_RET)) return -EINVAL; if (fmod_ret->nr_progs) { @@ -1850,7 +1876,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i } if (fexit->nr_progs) - if (invoke_bpf(m, &prog, fexit, stack_size)) { + if (invoke_bpf(m, &prog, fexit, stack_size, false)) { ret = -EINVAL; goto cleanup; } @@ -1870,9 +1896,10 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i ret = -EINVAL; goto cleanup; } - /* restore original return value back into RAX */ - emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, -8); } + /* restore return value of orig_call or fentry prog back into RAX */ + if (save_ret) + emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, -8); EMIT1(0x5B); /* pop rbx */ EMIT1(0xC9); /* leave */ diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c index 5a40fe411ebda0fedbf65d9de0f9c1f0ba92abee..c1eec019dcee690521d8a7bf30d992c9503c714e 100644 --- a/arch/x86/platform/efi/quirks.c +++ b/arch/x86/platform/efi/quirks.c @@ -277,7 +277,8 @@ void __init efi_arch_mem_reserve(phys_addr_t addr, u64 size) return; } - new = early_memremap(data.phys_map, data.size); + new = early_memremap_prot(data.phys_map, data.size, + pgprot_val(pgprot_encrypted(FIXMAP_PAGE_NORMAL))); if (!new) { pr_err("Failed to map new boot services memmap\n"); return; diff --git a/arch/x86/platform/olpc/olpc.c b/arch/x86/platform/olpc/olpc.c index ee2beda590d0d0156e2dd0f0febf45b71452cdd3..1d4a00e767ece00edc2a0ed8b3fcdfe160ee7b54 100644 --- a/arch/x86/platform/olpc/olpc.c +++ b/arch/x86/platform/olpc/olpc.c @@ -274,7 +274,7 @@ static struct olpc_ec_driver ec_xo1_driver = { static struct olpc_ec_driver ec_xo1_5_driver = { .ec_cmd = olpc_xo1_ec_cmd, -#ifdef CONFIG_OLPC_XO1_5_SCI +#ifdef CONFIG_OLPC_XO15_SCI /* * XO-1.5 EC wakeups are available when olpc-xo15-sci driver is * compiled in diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c index 22fda7d991590504f956f943d4d4d7bfdcea67ee..1a702c6a226ec2daff44df71e742c0cc126bc370 100644 --- a/arch/x86/realmode/init.c +++ b/arch/x86/realmode/init.c @@ -17,6 +17,32 @@ u32 *trampoline_cr4_features; /* Hold the pgd entry used on booting additional CPUs */ pgd_t trampoline_pgd_entry; +void load_trampoline_pgtable(void) +{ +#ifdef CONFIG_X86_32 + load_cr3(initial_page_table); +#else + /* + * This function is called before exiting to real-mode and that will + * fail with CR4.PCIDE still set. + */ + if (boot_cpu_has(X86_FEATURE_PCID)) + cr4_clear_bits(X86_CR4_PCIDE); + + write_cr3(real_mode_header->trampoline_pgd); +#endif + + /* + * The CR3 write above will not flush global TLB entries. + * Stale, global entries from previous page tables may still be + * present. Flush those stale entries. + * + * This ensures that memory accessed while running with + * trampoline_pgd is *actually* mapped into trampoline_pgd. + */ + __flush_tlb_all(); +} + void __init reserve_real_mode(void) { phys_addr_t mem; @@ -70,6 +96,7 @@ static void __init setup_real_mode(void) #ifdef CONFIG_X86_64 u64 *trampoline_pgd; u64 efer; + int i; #endif base = (unsigned char *)real_mode_header; @@ -126,8 +153,17 @@ static void __init setup_real_mode(void) trampoline_header->flags = 0; trampoline_pgd = (u64 *) __va(real_mode_header->trampoline_pgd); + + /* Map the real mode stub as virtual == physical */ trampoline_pgd[0] = trampoline_pgd_entry.pgd; - trampoline_pgd[511] = init_top_pgt[511].pgd; + + /* + * Include the entirety of the kernel mapping into the trampoline + * PGD. This way, all mappings present in the normal kernel page + * tables are usable while running on trampoline_pgd. + */ + for (i = pgd_index(__PAGE_OFFSET); i < PTRS_PER_PGD; i++) + trampoline_pgd[i] = init_top_pgt[i].pgd; #endif sme_sev_setup_real_mode(trampoline_header); diff --git a/arch/x86/um/syscalls_64.c b/arch/x86/um/syscalls_64.c index 58f51667e2e4beb82ec794cf618bf7026cfcb66c..8249685b409605a486d19621c2a78a9713856c58 100644 --- a/arch/x86/um/syscalls_64.c +++ b/arch/x86/um/syscalls_64.c @@ -11,6 +11,7 @@ #include #include /* XXX This should get the constants from libc */ #include +#include long arch_prctl(struct task_struct *task, int option, unsigned long __user *arg2) @@ -35,7 +36,7 @@ long arch_prctl(struct task_struct *task, int option, switch (option) { case ARCH_SET_FS: case ARCH_SET_GS: - ret = restore_registers(pid, ¤t->thread.regs.regs); + ret = restore_pid_registers(pid, ¤t->thread.regs.regs); if (ret) return ret; break; diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index aa9f50fccc5da08b1088dc5240eabfa50fbc09c5..0f68c6da7382bca2cf4b037cbbe15a96ff9718ed 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -51,9 +51,6 @@ DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info); DEFINE_PER_CPU(uint32_t, xen_vcpu_id); EXPORT_PER_CPU_SYMBOL(xen_vcpu_id); -enum xen_domain_type xen_domain_type = XEN_NATIVE; -EXPORT_SYMBOL_GPL(xen_domain_type); - unsigned long *machine_to_phys_mapping = (void *)MACH2PHYS_VIRT_START; EXPORT_SYMBOL(machine_to_phys_mapping); unsigned long machine_to_phys_nr; @@ -68,9 +65,11 @@ __read_mostly int xen_have_vector_callback; EXPORT_SYMBOL_GPL(xen_have_vector_callback); /* - * NB: needs to live in .data because it's used by xen_prepare_pvh which runs - * before clearing the bss. + * NB: These need to live in .data or alike because they're used by + * xen_prepare_pvh() which runs before clearing the bss. */ +enum xen_domain_type __ro_after_init xen_domain_type = XEN_NATIVE; +EXPORT_SYMBOL_GPL(xen_domain_type); uint32_t xen_start_flags __section(".data") = 0; EXPORT_SYMBOL(xen_start_flags); diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 5af0421ef74ba44df87f4845c288b7d5ba55064a..804c65d2b95f3c066f9040d5d5580e1b6256f0ce 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -1083,9 +1083,6 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = { #endif .io_delay = xen_io_delay, - /* Xen takes care of %gs when switching to usermode for us */ - .swapgs = paravirt_nop, - .start_context_switch = paravirt_start_context_switch, .end_context_switch = xen_end_context_switch, }; @@ -1390,10 +1387,6 @@ asmlinkage __visible void __init xen_start_kernel(void) xen_acpi_sleep_register(); - /* Avoid searching for BIOS MP tables */ - x86_init.mpparse.find_smp_config = x86_init_noop; - x86_init.mpparse.get_smp_config = x86_init_uint_noop; - xen_boot_params_init_edd(); #ifdef CONFIG_ACPI diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index c2ac319f11a4b785e07ac52041f54d8adc2c4e85..8f9e7e2407c873621f50c6954e6cff712437c6bf 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -149,28 +149,12 @@ int xen_smp_intr_init_pv(unsigned int cpu) return rc; } -static void __init xen_fill_possible_map(void) -{ - int i, rc; - - if (xen_initial_domain()) - return; - - for (i = 0; i < nr_cpu_ids; i++) { - rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL); - if (rc >= 0) { - num_processors++; - set_cpu_possible(i, true); - } - } -} - -static void __init xen_filter_cpu_maps(void) +static void __init _get_smp_config(unsigned int early) { int i, rc; unsigned int subtract = 0; - if (!xen_initial_domain()) + if (early) return; num_processors = 0; @@ -211,7 +195,6 @@ static void __init xen_pv_smp_prepare_boot_cpu(void) * sure the old memory can be recycled. */ make_lowmem_page_readwrite(xen_initial_gdt); - xen_filter_cpu_maps(); xen_setup_vcpu_info_placement(); /* @@ -491,5 +474,8 @@ static const struct smp_ops xen_smp_ops __initconst = { void __init xen_smp_init(void) { smp_ops = xen_smp_ops; - xen_fill_possible_map(); + + /* Avoid searching for BIOS MP tables */ + x86_init.mpparse.find_smp_config = x86_init_noop; + x86_init.mpparse.get_smp_config = _get_smp_config; } diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S index 53cf8aa35032d69948093f97c1f0836a3bc4075c..011ec649f38869328c48c079cfe47af15eeadccc 100644 --- a/arch/x86/xen/xen-asm.S +++ b/arch/x86/xen/xen-asm.S @@ -19,6 +19,7 @@ #include #include +#include <../entry/calling.h> /* * Enable events. This clears the event mask and tests the pending @@ -235,6 +236,25 @@ SYM_CODE_START(xen_sysret64) jmp hypercall_iret SYM_CODE_END(xen_sysret64) +/* + * XEN pv doesn't use trampoline stack, PER_CPU_VAR(cpu_tss_rw + TSS_sp0) is + * also the kernel stack. Reusing swapgs_restore_regs_and_return_to_usermode() + * in XEN pv would cause %rsp to move up to the top of the kernel stack and + * leave the IRET frame below %rsp, which is dangerous to be corrupted if #NMI + * interrupts. And swapgs_restore_regs_and_return_to_usermode() pushing the IRET + * frame at the same address is useless. + */ +SYM_CODE_START(xenpv_restore_regs_and_return_to_usermode) + UNWIND_HINT_REGS + POP_REGS + + /* stackleak_erase() can work safely on the kernel stack. */ + STACKLEAK_ERASE_NOCLOBBER + + addq $8, %rsp /* skip regs->orig_ax */ + jmp xen_iret +SYM_CODE_END(xenpv_restore_regs_and_return_to_usermode) + /* * Xen handles syscall callbacks much like ordinary exceptions, which * means we have: diff --git a/arch/xtensa/include/asm/kmem_layout.h b/arch/xtensa/include/asm/kmem_layout.h index 7cbf68ca71060dec42583ac60cd2e5f47b68ed09..6fc05cba61a27563273cde66d2f99ffe56fd1ab1 100644 --- a/arch/xtensa/include/asm/kmem_layout.h +++ b/arch/xtensa/include/asm/kmem_layout.h @@ -78,7 +78,7 @@ #endif #define XCHAL_KIO_SIZE 0x10000000 -#if (!XCHAL_HAVE_PTP_MMU || XCHAL_HAVE_SPANNING_WAY) && defined(CONFIG_OF) +#if (!XCHAL_HAVE_PTP_MMU || XCHAL_HAVE_SPANNING_WAY) && defined(CONFIG_USE_OF) #define XCHAL_KIO_PADDR xtensa_get_kio_paddr() #ifndef __ASSEMBLY__ extern unsigned long xtensa_kio_paddr; diff --git a/arch/xtensa/kernel/irq.c b/arch/xtensa/kernel/irq.c index a48bf2d10ac2d9fc71f677d1e516ebcb0aef2e65..80cc9770a8d2d633c7857e4b33607afc5be1d1bb 100644 --- a/arch/xtensa/kernel/irq.c +++ b/arch/xtensa/kernel/irq.c @@ -145,7 +145,7 @@ unsigned xtensa_get_ext_irq_no(unsigned irq) void __init init_IRQ(void) { -#ifdef CONFIG_OF +#ifdef CONFIG_USE_OF irqchip_init(); #else #ifdef CONFIG_HAVE_SMP diff --git a/arch/xtensa/kernel/setup.c b/arch/xtensa/kernel/setup.c index ed184106e4cf945a0631f7f43d72a700b568507a..ee9082a142feb49cf187c014df9d6df8b605a939 100644 --- a/arch/xtensa/kernel/setup.c +++ b/arch/xtensa/kernel/setup.c @@ -63,7 +63,7 @@ extern unsigned long initrd_end; extern int initrd_below_start_ok; #endif -#ifdef CONFIG_OF +#ifdef CONFIG_USE_OF void *dtb_start = __dtb_start; #endif @@ -125,7 +125,7 @@ __tagtable(BP_TAG_INITRD, parse_tag_initrd); #endif /* CONFIG_BLK_DEV_INITRD */ -#ifdef CONFIG_OF +#ifdef CONFIG_USE_OF static int __init parse_tag_fdt(const bp_tag_t *tag) { @@ -135,7 +135,7 @@ static int __init parse_tag_fdt(const bp_tag_t *tag) __tagtable(BP_TAG_FDT, parse_tag_fdt); -#endif /* CONFIG_OF */ +#endif /* CONFIG_USE_OF */ static int __init parse_tag_cmdline(const bp_tag_t* tag) { @@ -183,7 +183,7 @@ static int __init parse_bootparam(const bp_tag_t *tag) } #endif -#ifdef CONFIG_OF +#ifdef CONFIG_USE_OF #if !XCHAL_HAVE_PTP_MMU || XCHAL_HAVE_SPANNING_WAY unsigned long xtensa_kio_paddr = XCHAL_KIO_DEFAULT_PADDR; @@ -232,7 +232,7 @@ void __init early_init_devtree(void *params) strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); } -#endif /* CONFIG_OF */ +#endif /* CONFIG_USE_OF */ /* * Initialize architecture. (Early stage) @@ -253,7 +253,7 @@ void __init init_arch(bp_tag_t *bp_start) if (bp_start) parse_bootparam(bp_start); -#ifdef CONFIG_OF +#ifdef CONFIG_USE_OF early_init_devtree(dtb_start); #endif diff --git a/arch/xtensa/kernel/smp.c b/arch/xtensa/kernel/smp.c index cd85a7a2722ba4f4f101957c8cbc7665deda12ac..1254da07ead1f42712918b90c54b45ebf83cb5bb 100644 --- a/arch/xtensa/kernel/smp.c +++ b/arch/xtensa/kernel/smp.c @@ -145,7 +145,6 @@ void secondary_start_kernel(void) cpumask_set_cpu(cpu, mm_cpumask(mm)); enter_lazy_tlb(mm, current); - preempt_disable(); trace_hardirqs_off(); calibrate_delay(); diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl index b070f272995d6447895287704a0c76237619c2cc..4fd1365f6aa2c77dcd0305dbc326cae9ef6905a7 100644 --- a/arch/xtensa/kernel/syscalls/syscall.tbl +++ b/arch/xtensa/kernel/syscalls/syscall.tbl @@ -411,3 +411,5 @@ 438 common pidfd_getfd sys_pidfd_getfd 439 common faccessat2 sys_faccessat2 440 common process_madvise sys_process_madvise +# 447 reserved for memfd_secret +448 common process_mrelease sys_process_mrelease diff --git a/arch/xtensa/mm/mmu.c b/arch/xtensa/mm/mmu.c index fd2193df8a14520c105018021d438e75f1140353..511bb92518f2893c5e225660984c3ad586d100fe 100644 --- a/arch/xtensa/mm/mmu.c +++ b/arch/xtensa/mm/mmu.c @@ -100,7 +100,7 @@ void init_mmu(void) void init_kio(void) { -#if XCHAL_HAVE_PTP_MMU && XCHAL_HAVE_SPANNING_WAY && defined(CONFIG_OF) +#if XCHAL_HAVE_PTP_MMU && XCHAL_HAVE_SPANNING_WAY && defined(CONFIG_USE_OF) /* * Update the IO area mapping in case xtensa_kio_paddr has changed */ diff --git a/arch/xtensa/platforms/xtfpga/setup.c b/arch/xtensa/platforms/xtfpga/setup.c index 4f7d6142d41fa4c0847b8a68bf82e7067a95fce8..538e6748e85a7dbb912bd7df2e04860ff51dbb03 100644 --- a/arch/xtensa/platforms/xtfpga/setup.c +++ b/arch/xtensa/platforms/xtfpga/setup.c @@ -51,8 +51,12 @@ void platform_power_off(void) void platform_restart(void) { - /* Flush and reset the mmu, simulate a processor reset, and - * jump to the reset vector. */ + /* Try software reset first. */ + WRITE_ONCE(*(u32 *)XTFPGA_SWRST_VADDR, 0xdead); + + /* If software reset did not work, flush and reset the mmu, + * simulate a processor reset, and jump to the reset vector. + */ cpu_reset(); /* control never gets here */ } @@ -66,7 +70,7 @@ void __init platform_calibrate_ccount(void) #endif -#ifdef CONFIG_OF +#ifdef CONFIG_USE_OF static void __init xtfpga_clk_setup(struct device_node *np) { @@ -284,4 +288,4 @@ static int __init xtavnet_init(void) */ arch_initcall(xtavnet_init); -#endif /* CONFIG_OF */ +#endif /* CONFIG_USE_OF */ diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index e9167904f270ac0874918156433255ef52962019..481b80a609942fc13b02551e987a010d3e7e8660 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -2526,15 +2526,6 @@ bfq_setup_merge(struct bfq_queue *bfqq, struct bfq_queue *new_bfqq) * are likely to increase the throughput. */ bfqq->new_bfqq = new_bfqq; - /* - * The above assignment schedules the following redirections: - * each time some I/O for bfqq arrives, the process that - * generated that I/O is disassociated from bfqq and - * associated with new_bfqq. Here we increases new_bfqq->ref - * in advance, adding the number of processes that are - * expected to be associated with new_bfqq as they happen to - * issue I/O. - */ new_bfqq->ref += process_refs; return new_bfqq; } @@ -2594,10 +2585,6 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq, { struct bfq_queue *in_service_bfqq, *new_bfqq; - /* if a merge has already been setup, then proceed with that first */ - if (bfqq->new_bfqq) - return bfqq->new_bfqq; - /* * Do not perform queue merging if the device is non * rotational and performs internal queueing. In fact, such a @@ -2652,6 +2639,9 @@ bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq, if (bfq_too_late_for_merging(bfqq)) return NULL; + if (bfqq->new_bfqq) + return bfqq->new_bfqq; + if (!io_struct || unlikely(bfqq == &bfqd->oom_bfqq)) return NULL; @@ -6409,6 +6399,8 @@ static void bfq_exit_queue(struct elevator_queue *e) spin_unlock_irq(&bfqd->lock); #endif + wbt_enable_default(bfqd->queue); + kfree(bfqd); } diff --git a/block/bio-integrity.c b/block/bio-integrity.c index 9ffd7e2895547676ed801c099306f5a728918840..4f6f140a44e064f18d9c6b3726e3fdcb8572ed40 100644 --- a/block/bio-integrity.c +++ b/block/bio-integrity.c @@ -384,7 +384,7 @@ void bio_integrity_advance(struct bio *bio, unsigned int bytes_done) struct blk_integrity *bi = blk_get_integrity(bio->bi_disk); unsigned bytes = bio_integrity_bytes(bi, bytes_done >> 9); - bip->bip_iter.bi_sector += bytes_done >> 9; + bip->bip_iter.bi_sector += bio_integrity_intervals(bi, bytes_done >> 9); bvec_iter_advance(bip->bip_vec, &bip->bip_iter, bytes); } diff --git a/block/bio.c b/block/bio.c index 0703a208ca248cfaf41a779ad434db50d89ca7b2..f8d26ce7b61b0059197d1f1b40fba3a36d9da0dd 100644 --- a/block/bio.c +++ b/block/bio.c @@ -575,7 +575,8 @@ void bio_truncate(struct bio *bio, unsigned new_size) offset = new_size - done; else offset = 0; - zero_user(bv.bv_page, offset, bv.bv_len - offset); + zero_user(bv.bv_page, bv.bv_offset + offset, + bv.bv_len - offset); truncated = true; } done += bv.bv_len; diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index cca79c06d33ddebcdbe5648802c0d61c4f9dcd61..22fb1beed49aa286d8652defff3615f352d90cb1 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -1182,10 +1182,6 @@ int blkcg_init_queue(struct request_queue *q) if (preloaded) radix_tree_preload_end(); - ret = blk_iolatency_init(q); - if (ret) - goto err_destroy_all; - ret = blk_ioprio_init(q); if (ret) goto err_destroy_all; @@ -1194,6 +1190,12 @@ int blkcg_init_queue(struct request_queue *q) if (ret) goto err_destroy_all; + ret = blk_iolatency_init(q); + if (ret) { + blk_throtl_exit(q); + goto err_destroy_all; + } + return 0; err_destroy_all: diff --git a/block/blk-core.c b/block/blk-core.c index 0e66579c299f06a8aab6c71a18ddbe42f738aa03..26664f2a139eb1ae2946ed35919dac2bd3d66f09 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -439,7 +440,8 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags) * responsible for ensuring that that counter is * globally visible before the queue is unfrozen. */ - if (pm || !blk_queue_pm_only(q)) { + if ((pm && queue_rpm_status(q) != RPM_SUSPENDED) || + !blk_queue_pm_only(q)) { success = true; } else { percpu_ref_put(&q->q_usage_counter); @@ -464,8 +466,7 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags) wait_event(q->mq_freeze_wq, (!q->mq_freeze_depth && - (pm || (blk_pm_request_resume(q), - !blk_queue_pm_only(q)))) || + blk_pm_resume_queue(pm, q)) || blk_queue_dying(q)); if (blk_queue_dying(q)) return -ENODEV; @@ -896,10 +897,8 @@ static noinline_for_stack bool submit_bio_checks(struct bio *bio) if (unlikely(!current->io_context)) create_task_io_context(current, GFP_ATOMIC, q->node); - if (blk_throtl_bio(bio)) { - blkcg_bio_issue_init(bio); + if (blk_throtl_bio(bio)) return false; - } blk_cgroup_bio_start(bio); blkcg_bio_issue_init(bio); diff --git a/block/blk-flush.c b/block/blk-flush.c index 70f1d02135ed6419b1e81da7f03a85e99fc4dc19..33b487b5cbf78226b1b5ff431a59ff2baa796d51 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -236,8 +236,10 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error) * avoiding use-after-free. */ WRITE_ONCE(flush_rq->state, MQ_RQ_IDLE); - if (fq->rq_status != BLK_STS_OK) + if (fq->rq_status != BLK_STS_OK) { error = fq->rq_status; + fq->rq_status = BLK_STS_OK; + } if (!q->elevator) { flush_rq->tag = BLK_MQ_NO_TAG; diff --git a/block/blk-iocost.c b/block/blk-iocost.c index e95b93f72bd5c8082b16f54d9896a6a35bb547cb..9af32b44b7173e73ea627f8aef6a840b2ad84549 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -2246,7 +2246,14 @@ static void ioc_timer_fn(struct timer_list *timer) hwm = current_hweight_max(iocg); new_hwi = hweight_after_donation(iocg, old_hwi, hwm, usage, &now); - if (new_hwi < hwm) { + /* + * Donation calculation assumes hweight_after_donation + * to be positive, a condition that a donor w/ hwa < 2 + * can't meet. Don't bother with donation if hwa is + * below 2. It's not gonna make a meaningful difference + * anyway. + */ + if (new_hwi < hwm && hwa >= 2) { iocg->hweight_donating = hwa; iocg->hweight_after_donation = new_hwi; list_add(&iocg->surplus_list, &surpluses); diff --git a/block/blk-map.c b/block/blk-map.c index 21630dccac628c3fac3131c5d900fc144ea962e1..369e204d14d013f66e3422444d19361d09a10863 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -150,9 +150,7 @@ static int bio_copy_user_iov(struct request *rq, struct rq_map_data *map_data, bmd->is_our_pages = !map_data; bmd->is_null_mapped = (map_data && map_data->null_mapped); - nr_pages = DIV_ROUND_UP(offset + len, PAGE_SIZE); - if (nr_pages > BIO_MAX_PAGES) - nr_pages = BIO_MAX_PAGES; + nr_pages = bio_max_segs(DIV_ROUND_UP(offset + len, PAGE_SIZE)); ret = -ENOMEM; bio = bio_kmalloc(gfp_mask, nr_pages); diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 34a5eff632bf7c66901f181caa5acfada59c95e0..43d4e9df161b83af5a6d2648c086fcfa7ec179c6 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -129,6 +129,7 @@ static const char *const blk_queue_flag_name[] = { QUEUE_FLAG_NAME(PCI_P2PDMA), QUEUE_FLAG_NAME(ZONE_RESETALL), QUEUE_FLAG_NAME(RQ_ALLOC_TIME), + QUEUE_FLAG_NAME(HCTX_ACTIVE), QUEUE_FLAG_NAME(NOWAIT), }; #undef QUEUE_FLAG_NAME diff --git a/block/blk-mq.c b/block/blk-mq.c index b4c7aac6fd757082ca311b95e8b178ddd8aeb1a1..86adacfd719cb126ffff0ad27353314ccd899b99 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -766,7 +766,6 @@ void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list) /* this request will be re-inserted to io scheduler queue */ blk_mq_sched_requeue_request(rq); - BUG_ON(!list_empty(&rq->queuelist)); blk_mq_add_to_requeue_list(rq, true, kick_requeue_list); } EXPORT_SYMBOL(blk_mq_requeue_request); @@ -1319,6 +1318,7 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list, int errors, queued; blk_status_t ret = BLK_STS_OK; LIST_HEAD(zone_list); + bool needs_resource = false; if (list_empty(list)) return false; @@ -1364,6 +1364,8 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list, queued++; break; case BLK_STS_RESOURCE: + needs_resource = true; + fallthrough; case BLK_STS_DEV_RESOURCE: blk_mq_handle_dev_resource(rq, list); goto out; @@ -1374,6 +1376,7 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list, * accept. */ blk_mq_handle_zone_resource(rq, &zone_list); + needs_resource = true; break; default: errors++; @@ -1400,7 +1403,6 @@ out: /* For non-shared tags, the RESTART check will suffice */ bool no_tag = prep == PREP_DISPATCH_NO_TAG && (hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED); - bool no_budget_avail = prep == PREP_DISPATCH_NO_BUDGET; blk_mq_release_budgets(q, nr_budgets); @@ -1440,14 +1442,16 @@ out: * If driver returns BLK_STS_RESOURCE and SCHED_RESTART * bit is set, run queue after a delay to avoid IO stalls * that could otherwise occur if the queue is idle. We'll do - * similar if we couldn't get budget and SCHED_RESTART is set. + * similar if we couldn't get budget or couldn't lock a zone + * and SCHED_RESTART is set. */ needs_restart = blk_mq_sched_needs_restart(hctx); + if (prep == PREP_DISPATCH_NO_BUDGET) + needs_resource = true; if (!needs_restart || (no_tag && list_empty_careful(&hctx->dispatch_wait.entry))) blk_mq_run_hw_queue(hctx, true); - else if (needs_restart && (ret == BLK_STS_RESOURCE || - no_budget_avail)) + else if (needs_restart && needs_resource) blk_mq_delay_run_hw_queue(hctx, BLK_MQ_RESOURCE_DELAY); blk_mq_update_dispatch_busy(hctx, true); @@ -2157,14 +2161,14 @@ static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq) } /* - * Allow 4x BLK_MAX_REQUEST_COUNT requests on plug queue for multiple + * Allow 2x BLK_MAX_REQUEST_COUNT requests on plug queue for multiple * queues. This is important for md arrays to benefit from merging * requests. */ static inline unsigned short blk_plug_max_rq_count(struct blk_plug *plug) { if (plug->multiple_queues) - return BLK_MAX_REQUEST_COUNT * 4; + return BLK_MAX_REQUEST_COUNT * 2; return BLK_MAX_REQUEST_COUNT; } diff --git a/block/blk-pm.c b/block/blk-pm.c index 17bd020268d421434a3724ad63552f2df49da0f4..2dad62cc157272e114eb31255026d2d49b30d550 100644 --- a/block/blk-pm.c +++ b/block/blk-pm.c @@ -163,27 +163,19 @@ EXPORT_SYMBOL(blk_pre_runtime_resume); /** * blk_post_runtime_resume - Post runtime resume processing * @q: the queue of the device - * @err: return value of the device's runtime_resume function * * Description: - * Update the queue's runtime status according to the return value of the - * device's runtime_resume function. If the resume was successful, call - * blk_set_runtime_active() to do the real work of restarting the queue. + * For historical reasons, this routine merely calls blk_set_runtime_active() + * to do the real work of restarting the queue. It does this regardless of + * whether the device's runtime-resume succeeded; even if it failed the + * driver or error handler will need to communicate with the device. * * This function should be called near the end of the device's * runtime_resume callback. */ -void blk_post_runtime_resume(struct request_queue *q, int err) +void blk_post_runtime_resume(struct request_queue *q) { - if (!q->dev) - return; - if (!err) { - blk_set_runtime_active(q); - } else { - spin_lock_irq(&q->queue_lock); - q->rpm_status = RPM_SUSPENDED; - spin_unlock_irq(&q->queue_lock); - } + blk_set_runtime_active(q); } EXPORT_SYMBOL(blk_post_runtime_resume); @@ -201,7 +193,7 @@ EXPORT_SYMBOL(blk_post_runtime_resume); * runtime PM status and re-enable peeking requests from the queue. It * should be called before first request is added to the queue. * - * This function is also called by blk_post_runtime_resume() for successful + * This function is also called by blk_post_runtime_resume() for * runtime resumes. It does everything necessary to restart the queue. */ void blk_set_runtime_active(struct request_queue *q) diff --git a/block/blk-pm.h b/block/blk-pm.h index ea5507d23e75976e0c234c1a4951fd443e86811c..a2283cc9f716dc89622a5966fd6e894d8ddefee9 100644 --- a/block/blk-pm.h +++ b/block/blk-pm.h @@ -6,11 +6,14 @@ #include #ifdef CONFIG_PM -static inline void blk_pm_request_resume(struct request_queue *q) +static inline int blk_pm_resume_queue(const bool pm, struct request_queue *q) { - if (q->dev && (q->rpm_status == RPM_SUSPENDED || - q->rpm_status == RPM_SUSPENDING)) - pm_request_resume(q->dev); + if (!q->dev || !blk_queue_pm_only(q)) + return 1; /* Nothing to do */ + if (pm && q->rpm_status != RPM_SUSPENDED) + return 1; /* Request allowed */ + pm_request_resume(q->dev); + return 0; } static inline void blk_pm_mark_last_busy(struct request *rq) @@ -44,8 +47,9 @@ static inline void blk_pm_put_request(struct request *rq) --rq->q->nr_pending; } #else -static inline void blk_pm_request_resume(struct request_queue *q) +static inline int blk_pm_resume_queue(const bool pm, struct request_queue *q) { + return 1; } static inline void blk_pm_mark_last_busy(struct request *rq) diff --git a/block/blk.h b/block/blk.h index f84c83300f6faa1055d79c8cadf2440db1fcc5f9..997941cd999f6d801b8fd4eda40722c639ff4a2f 100644 --- a/block/blk.h +++ b/block/blk.h @@ -188,6 +188,12 @@ bool blk_bio_list_merge(struct request_queue *q, struct list_head *list, void blk_account_io_start(struct request *req); void blk_account_io_done(struct request *req, u64 now); +/* + * Plug flush limits + */ +#define BLK_MAX_REQUEST_COUNT 32 +#define BLK_PLUG_FLUSH_SIZE (128 * 1024) + /* * Internal elevator interface */ diff --git a/block/elevator.c b/block/elevator.c index 2a525863d4e92ab9b19bcb1b40d923365e5bbdef..f762b2af1d2acbbd334d6cd1b1afb40e3bcda7cd 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -518,8 +518,6 @@ void elv_unregister_queue(struct request_queue *q) kobject_del(&e->kobj); e->registered = 0; - /* Re-enable throttling in case elevator disabled it */ - wbt_enable_default(q); } } @@ -624,6 +622,9 @@ static inline bool elv_support_iosched(struct request_queue *q) */ static struct elevator_type *elevator_get_default(struct request_queue *q) { + if (q->tag_set && q->tag_set->flags & BLK_MQ_F_NO_SCHED_BY_DEFAULT) + return NULL; + if (q->nr_hw_queues != 1) return NULL; diff --git a/block/ioprio.c b/block/ioprio.c index 04ebd37966f181eee55282e67bc96c696de2fdc2..c8878647de40094ee7c3ab7f2e49c20edffa0048 100644 --- a/block/ioprio.c +++ b/block/ioprio.c @@ -69,7 +69,14 @@ int ioprio_check_cap(int ioprio) switch (class) { case IOPRIO_CLASS_RT: - if (!capable(CAP_SYS_ADMIN)) + /* + * Originally this only checked for CAP_SYS_ADMIN, + * which was implicitly allowed for pid 0 by security + * modules such as SELinux. Make sure we check + * CAP_SYS_ADMIN first to avoid a denial/avc for + * possibly missing CAP_SYS_NICE permission. + */ + if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_NICE)) return -EPERM; fallthrough; /* rt has prio field too */ @@ -207,6 +214,7 @@ SYSCALL_DEFINE2(ioprio_get, int, which, int, who) pgrp = task_pgrp(current); else pgrp = find_vpid(who); + read_lock(&tasklist_lock); do_each_pid_thread(pgrp, PIDTYPE_PGID, p) { tmpio = get_task_ioprio(p); if (tmpio < 0) @@ -216,6 +224,8 @@ SYSCALL_DEFINE2(ioprio_get, int, which, int, who) else ret = ioprio_best(ret, tmpio); } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); + read_unlock(&tasklist_lock); + break; case IOPRIO_WHO_USER: uid = make_kuid(current_user_ns(), who); diff --git a/block/mq-deadline.c b/block/mq-deadline.c index 0c6e2b3cf98ebf32eb1789d9daf1da4b3526462a..6556aa2182a53ea1703d6f45412419435bb4bbcf 100644 --- a/block/mq-deadline.c +++ b/block/mq-deadline.c @@ -860,7 +860,7 @@ SHOW_JIFFIES(deadline_write_expire_show, dd->fifo_expire[DD_WRITE]); SHOW_JIFFIES(deadline_prio_aging_expire_show, dd->prio_aging_expire); SHOW_INT(deadline_writes_starved_show, dd->writes_starved); SHOW_INT(deadline_front_merges_show, dd->front_merges); -SHOW_INT(deadline_async_depth_show, dd->front_merges); +SHOW_INT(deadline_async_depth_show, dd->async_depth); SHOW_INT(deadline_fifo_batch_show, dd->fifo_batch); #undef SHOW_INT #undef SHOW_JIFFIES @@ -890,7 +890,7 @@ STORE_JIFFIES(deadline_write_expire_store, &dd->fifo_expire[DD_WRITE], 0, INT_MA STORE_JIFFIES(deadline_prio_aging_expire_store, &dd->prio_aging_expire, 0, INT_MAX); STORE_INT(deadline_writes_starved_store, &dd->writes_starved, INT_MIN, INT_MAX); STORE_INT(deadline_front_merges_store, &dd->front_merges, 0, 1); -STORE_INT(deadline_async_depth_store, &dd->front_merges, 1, INT_MAX); +STORE_INT(deadline_async_depth_store, &dd->async_depth, 1, INT_MAX); STORE_INT(deadline_fifo_batch_store, &dd->fifo_batch, 0, INT_MAX); #undef STORE_FUNCTION #undef STORE_INT diff --git a/build.config.aarch64 b/build.config.aarch64 index 0346722447a5c47f2f9091a73d6bb6ddb0d34f03..2bf2c158fcdb7ea7cec39c220d3081f85293154a 100644 --- a/build.config.aarch64 +++ b/build.config.aarch64 @@ -1,10 +1,4 @@ ARCH=arm64 - -LLVM_IAS=1 -CROSS_COMPILE=aarch64-linux-gnu- -CROSS_COMPILE_COMPAT=arm-linux-gnueabi- -LINUX_GCC_CROSS_COMPILE_PREBUILTS_BIN=prebuilts/gas/linux-x86 - MAKE_GOALS=" Image modules diff --git a/build.config.allmodconfig b/build.config.allmodconfig index 945c4bac54c61fa155a4e8b62b2cd0968ce91c9c..fe3aefa9051823b94300e3d133f3b687125a54f2 100644 --- a/build.config.allmodconfig +++ b/build.config.allmodconfig @@ -1,7 +1,5 @@ DEFCONFIG=allmodconfig -HERMETIC_TOOLCHAIN=0 - POST_DEFCONFIG_CMDS="update_config" function update_config() { ${KERNEL_DIR}/scripts/config --file ${OUT_DIR}/.config \ diff --git a/build.config.arm b/build.config.arm index d944676a87e0776340909de876ece96cc7a862b6..832c3c07173a6e8462488f75c90a3f94112cae70 100644 --- a/build.config.arm +++ b/build.config.arm @@ -1,9 +1,4 @@ ARCH=arm - -LLVM_IAS=1 -CROSS_COMPILE=arm-linux-gnueabi- -LINUX_GCC_CROSS_COMPILE_PREBUILTS_BIN=prebuilts/gas/linux-x86 - MAKE_GOALS=" zImage modules diff --git a/build.config.common b/build.config.common index 2d2f281865b01376bf0d0ea4cb09ddf0840bb045..a07fb7f8c01202db2a915038f656388358a0dbc9 100644 --- a/build.config.common +++ b/build.config.common @@ -1,15 +1,19 @@ -BRANCH=android13-5.10 -KMI_GENERATION=0 +. ${ROOT_DIR}/${KERNEL_DIR}/build.config.constants + +KMI_GENERATION=4 LLVM=1 DEPMOD=depmod -CLANG_PREBUILT_BIN=prebuilts/clang/host/linux-x86/clang-r433403/bin -BUILDTOOLS_PREBUILT_BIN=build/build-tools/path/linux-x86 +CLANG_PREBUILT_BIN=prebuilts/clang/host/linux-x86/clang-${CLANG_VERSION}/bin +BUILDTOOLS_PREBUILT_BIN=build/kernel/build-tools/path/linux-x86 DTC=${ROOT_DIR}/${BUILDTOOLS_PREBUILT_BIN}/dtc +KCFLAGS="${KCFLAGS} -D__ANDROID_COMMON_KERNEL__" EXTRA_CMDS='' STOP_SHIP_TRACEPRINTK=1 IN_KERNEL_MODULES=1 DO_NOT_STRIP_MODULES=1 HERMETIC_TOOLCHAIN=${HERMETIC_TOOLCHAIN:-1} + +KLEAF_SUPPRESS_BUILD_SH_DEPRECATION_WARNING=${KLEAF_SUPPRESS_BUILD_SH_DEPRECATION_WARNING:-1} diff --git a/build.config.constants b/build.config.constants new file mode 100644 index 0000000000000000000000000000000000000000..028ad6a8474972ffddf191e4d1d3fe85f59018af --- /dev/null +++ b/build.config.constants @@ -0,0 +1,2 @@ +BRANCH=android13-5.10 +CLANG_VERSION=r450784e diff --git a/build.config.gki.aarch64 b/build.config.gki.aarch64 index a20dc711978c5f81d2f1912205f40e7ad2cea374..2e7f202446bf37e21809875acc0b1ccea39d9a4e 100644 --- a/build.config.gki.aarch64 +++ b/build.config.gki.aarch64 @@ -6,7 +6,9 @@ MAKE_GOALS="${MAKE_GOALS} Image.lz4 " -TIDY_ABI=1 +ABI_DEFINITION=android/abi_gki_aarch64.xml +# Update BUILD.bazel, define_common_kernels() if the list differs from +# android/abi_gki_aarch64* in the filesystem. KMI_SYMBOL_LIST=android/abi_gki_aarch64 ADDITIONAL_KMI_SYMBOL_LISTS=" android/abi_gki_aarch64_core @@ -21,10 +23,17 @@ FILES="${FILES} arch/arm64/boot/Image.lz4 " +# Update BUILD.bazel, define_common_kernels() if the value is not 1. TRIM_NONLISTED_KMI=${TRIM_NONLISTED_KMI:-1} KMI_SYMBOL_LIST_STRICT_MODE=${KMI_SYMBOL_LIST_STRICT_MODE:-1} +KMI_ENFORCED=1 + +BUILD_SYSTEM_DLKM=1 +MODULES_LIST=${ROOT_DIR}/${KERNEL_DIR}/android/gki_system_dlkm_modules MODULES_ORDER=android/gki_aarch64_modules if [ -n "${GKI_DEFCONFIG_FRAGMENT}" ]; then source ${GKI_DEFCONFIG_FRAGMENT} fi + +BUILD_GKI_CERTIFICATION_TOOLS=1 diff --git a/build.config.gki.x86_64 b/build.config.gki.x86_64 index 0e04fc692df29c8a02e9ee09bc92ee68a3598aac..739ae0b86e0d2cd6cfa32dd30288116fac8b99fb 100644 --- a/build.config.gki.x86_64 +++ b/build.config.gki.x86_64 @@ -2,3 +2,7 @@ . ${ROOT_DIR}/${KERNEL_DIR}/build.config.x86_64 . ${ROOT_DIR}/${KERNEL_DIR}/build.config.gki +BUILD_SYSTEM_DLKM=1 +MODULES_LIST=${ROOT_DIR}/${KERNEL_DIR}/android/gki_system_dlkm_modules + +BUILD_GKI_CERTIFICATION_TOOLS=1 diff --git a/build.config.x86_64 b/build.config.x86_64 index ae4d41a33a295a8510e4844cc40e56d3e4d7550a..db29dd3e371fb54199a03353adac2e16804685c9 100644 --- a/build.config.x86_64 +++ b/build.config.x86_64 @@ -1,9 +1,4 @@ ARCH=x86_64 - -LLVM_IAS=1 -CROSS_COMPILE=x86_64-linux-gnu- -LINUX_GCC_CROSS_COMPILE_PREBUILTS_BIN=prebuilts/gas/linux-x86 - MAKE_GOALS=" bzImage modules diff --git a/crypto/Kconfig b/crypto/Kconfig index d726b1c3a7f856a76659c7ab6301535c021acff4..a86d17ebe6fafe6b1dec5b1c3aed92967195927b 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -53,14 +53,14 @@ config CRYPTO_FIPS140_MOD meet FIPS 140 and NIAP FPT_TST_EXT.1 requirements. It shouldn't be used if you don't need to meet these requirements. -config CRYPTO_FIPS140_MOD_ERROR_INJECTION - bool "Support injecting failures into the FIPS 140 self-tests" +config CRYPTO_FIPS140_MOD_EVAL_TESTING + bool "Enable evaluation testing features in FIPS 140 module" depends on CRYPTO_FIPS140_MOD help - This option adds a module parameter "broken_alg" to the fips140 module - which can be used to fail the self-tests for a particular algorithm, - causing a kernel panic. This option is for FIPS lab testing only, and - it shouldn't be enabled on production systems. + This option adds some features to the FIPS 140 module which are needed + for lab evaluation testing of the module, e.g. support for injecting + errors and support for a userspace interface to some of the module's + services. This option should not be enabled in production builds. config CRYPTO_ALGAPI tristate @@ -268,12 +268,12 @@ config CRYPTO_DH config CRYPTO_ECC tristate + select CRYPTO_RNG_DEFAULT config CRYPTO_ECDH tristate "ECDH algorithm" select CRYPTO_ECC select CRYPTO_KPP - select CRYPTO_RNG_DEFAULT help Generic implementation of the ECDH algorithm diff --git a/crypto/Makefile b/crypto/Makefile index f353273de018156a7986ab3d3a0ef4b4970248fd..f8a3677d2eecf5817e66a37a23f2e2dc0e894804 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -200,7 +200,7 @@ obj-$(CONFIG_CRYPTO_SIMD) += crypto_simd.o ifneq ($(CONFIG_CRYPTO_FIPS140_MOD),) -FIPS140_CFLAGS := -D__DISABLE_EXPORTS -DBUILD_FIPS140_KO +FIPS140_CFLAGS := -D__DISABLE_EXPORTS -DBUILD_FIPS140_KO -include fips140-defs.h CFLAGS_jitterentropy-fips.o := -O0 KASAN_SANITIZE_jitterentropy-fips.o = n @@ -233,10 +233,20 @@ $(obj)/lib-crypto-%-fips.o: $(srctree)/lib/crypto/%.c FORCE $(obj)/crypto-fips.a: $(addprefix $(obj)/,$(crypto-fips-objs)) FORCE $(call if_changed,ar_and_symver) -fips140-objs := fips140-module.o fips140-selftests.o crypto-fips.a -obj-m += fips140.o - +fips140-objs := \ + fips140-alg-registration.o \ + fips140-module.o \ + fips140-refs.o \ + fips140-selftests.o \ + crypto-fips.a +fips140-$(CONFIG_CRYPTO_FIPS140_MOD_EVAL_TESTING) += \ + fips140-eval-testing.o +obj-m += fips140.o + +CFLAGS_fips140-alg-registration.o += $(FIPS140_CFLAGS) CFLAGS_fips140-module.o += $(FIPS140_CFLAGS) +CFLAGS_fips140-selftests.o += $(FIPS140_CFLAGS) +CFLAGS_fips140-eval-testing.o += $(FIPS140_CFLAGS) hostprogs-always-y := fips140_gen_hmac HOSTLDLIBS_fips140_gen_hmac := -lcrypto -lelf diff --git a/crypto/algapi.c b/crypto/algapi.c index fdabf2675b63fefed7e93ad994e35e7f7b3844d5..9de27daa98b47b7c8503c5615dd5f4c2fea9fb60 100644 --- a/crypto/algapi.c +++ b/crypto/algapi.c @@ -1295,3 +1295,4 @@ module_exit(crypto_algapi_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Cryptographic algorithms API"); +MODULE_SOFTDEP("pre: cryptomgr"); diff --git a/crypto/api.c b/crypto/api.c index c4eda56cff8917e08c2de3b0ada13df291c785ab..5ffcd3ab4a75309edcd605122d9cf9b1c7b5b978 100644 --- a/crypto/api.c +++ b/crypto/api.c @@ -603,4 +603,3 @@ EXPORT_SYMBOL_GPL(crypto_req_done); MODULE_DESCRIPTION("Cryptographic core API"); MODULE_LICENSE("GPL"); -MODULE_SOFTDEP("pre: cryptomgr"); diff --git a/crypto/fips140-alg-registration.c b/crypto/fips140-alg-registration.c new file mode 100644 index 0000000000000000000000000000000000000000..03757f88890b3c5e15ea652198584660c87b4817 --- /dev/null +++ b/crypto/fips140-alg-registration.c @@ -0,0 +1,388 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Block crypto operations until tests complete + * + * Copyright 2021 Google LLC + * + * This file defines the fips140_crypto_register_*() functions, to which all + * calls to crypto_register_*() in the module are redirected. These functions + * override the tfm initialization function of each algorithm to insert a wait + * for the module having completed its self-tests and integrity check. + * + * The exact field that we override depends on the algorithm type. For + * algorithm types that have a strongly-typed initialization function pointer + * (e.g. skcipher), we must override that, since cra_init isn't guaranteed to be + * called for those despite the field being present in the base struct. For the + * other algorithm types (e.g. "cipher") we must override cra_init. + * + * All of this applies to both normal algorithms and template instances. + * + * The purpose of all of this is to meet a FIPS requirement where the module + * must not produce any output from cryptographic algorithms until it completes + * its tests. Technically this is impossible, but this solution meets the + * intent of the requirement, assuming the user makes a supported sequence of + * API calls. Note that we can't simply run the tests before registering the + * algorithms, as the algorithms must be registered in order to run the tests. + * + * It would be much easier to handle this in the kernel's crypto API framework. + * Unfortunately, that was deemed insufficient because the module itself is + * required to do the enforcement. What is *actually* required is still very + * vague, but the approach implemented here should meet the requirement. + */ + +/* + * This file is the one place in fips140.ko that needs to call the kernel's real + * algorithm registration functions, so #undefine all the macros from + * fips140-defs.h so that the "fips140_" prefix doesn't automatically get added. + */ +#undef aead_register_instance +#undef ahash_register_instance +#undef crypto_register_aead +#undef crypto_register_aeads +#undef crypto_register_ahash +#undef crypto_register_ahashes +#undef crypto_register_alg +#undef crypto_register_algs +#undef crypto_register_rng +#undef crypto_register_rngs +#undef crypto_register_shash +#undef crypto_register_shashes +#undef crypto_register_skcipher +#undef crypto_register_skciphers +#undef shash_register_instance +#undef skcipher_register_instance + +#include +#include +#include +#include +#include +#include + +#include "fips140-module.h" + +/* Indicates whether the self-tests and integrity check have completed */ +DECLARE_COMPLETION(fips140_tests_done); + +/* The thread running the self-tests and integrity check */ +struct task_struct *fips140_init_thread; + +/* + * Map from crypto_alg to original initialization function (possibly NULL) + * + * Note: unregistering an algorithm will leak its map entry, as we don't bother + * to remove it. This should be fine since fips140.ko can't be unloaded. The + * proper solution would be to store the original function pointer in a new + * field in 'struct crypto_alg', but that would require kernel support. + */ +static DEFINE_XARRAY(fips140_init_func_map); + +static bool fips140_ready(void) +{ + return completion_done(&fips140_tests_done); +} + +/* + * Wait until crypto operations are allowed to proceed. Return true if the + * tests are done, or false if the caller is the thread running the tests so it + * is allowed to proceed anyway. + */ +static bool fips140_wait_until_ready(struct crypto_alg *alg) +{ + if (fips140_ready()) + return true; + /* + * The thread running the tests must not wait. Since tfms can only be + * allocated in task context, we can reliably determine whether the + * invocation is from that thread or not by checking 'current'. + */ + if (current == fips140_init_thread) + return false; + + pr_info("blocking user of %s until tests complete\n", + alg->cra_driver_name); + wait_for_completion(&fips140_tests_done); + pr_info("tests done, allowing %s to proceed\n", alg->cra_driver_name); + return true; +} + +static int fips140_store_init_function(struct crypto_alg *alg, void *func) +{ + void *ret; + + /* + * The XArray API requires 4-byte aligned values. Although function + * pointers in general aren't guaranteed to be 4-byte aligned, it should + * be the case for the platforms this module is used on. + */ + if (WARN_ON((unsigned long)func & 3)) + return -EINVAL; + + ret = xa_store(&fips140_init_func_map, (unsigned long)alg, func, + GFP_KERNEL); + return xa_err(ret); +} + +/* Get the algorithm's original initialization function (possibly NULL) */ +static void *fips140_load_init_function(struct crypto_alg *alg) +{ + return xa_load(&fips140_init_func_map, (unsigned long)alg); +} + +/* tfm initialization function overrides */ + +static int fips140_alg_init_tfm(struct crypto_tfm *tfm) +{ + struct crypto_alg *alg = tfm->__crt_alg; + int (*cra_init)(struct crypto_tfm *tfm) = + fips140_load_init_function(alg); + + if (fips140_wait_until_ready(alg)) + WRITE_ONCE(alg->cra_init, cra_init); + return cra_init ? cra_init(tfm) : 0; +} + +static int fips140_aead_init_tfm(struct crypto_aead *tfm) +{ + struct aead_alg *alg = crypto_aead_alg(tfm); + int (*init)(struct crypto_aead *tfm) = + fips140_load_init_function(&alg->base); + + if (fips140_wait_until_ready(&alg->base)) + WRITE_ONCE(alg->init, init); + return init ? init(tfm) : 0; +} + +static int fips140_ahash_init_tfm(struct crypto_ahash *tfm) +{ + struct hash_alg_common *halg = crypto_hash_alg_common(tfm); + struct ahash_alg *alg = container_of(halg, struct ahash_alg, halg); + int (*init_tfm)(struct crypto_ahash *tfm) = + fips140_load_init_function(&halg->base); + + if (fips140_wait_until_ready(&halg->base)) + WRITE_ONCE(alg->init_tfm, init_tfm); + return init_tfm ? init_tfm(tfm) : 0; +} + +static int fips140_shash_init_tfm(struct crypto_shash *tfm) +{ + struct shash_alg *alg = crypto_shash_alg(tfm); + int (*init_tfm)(struct crypto_shash *tfm) = + fips140_load_init_function(&alg->base); + + if (fips140_wait_until_ready(&alg->base)) + WRITE_ONCE(alg->init_tfm, init_tfm); + return init_tfm ? init_tfm(tfm) : 0; +} + +static int fips140_skcipher_init_tfm(struct crypto_skcipher *tfm) +{ + struct skcipher_alg *alg = crypto_skcipher_alg(tfm); + int (*init)(struct crypto_skcipher *tfm) = + fips140_load_init_function(&alg->base); + + if (fips140_wait_until_ready(&alg->base)) + WRITE_ONCE(alg->init, init); + return init ? init(tfm) : 0; +} + +/* Single algorithm registration */ + +#define prepare_alg(alg, base_alg, field, wrapper_func) \ +({ \ + int err = 0; \ + \ + if (!fips140_ready() && alg->field != wrapper_func) { \ + err = fips140_store_init_function(base_alg, alg->field);\ + if (err == 0) \ + alg->field = wrapper_func; \ + } \ + err; \ +}) + +static int fips140_prepare_alg(struct crypto_alg *alg) +{ + /* + * Override cra_init. This is only for algorithm types like cipher and + * rng that don't have a strongly-typed initialization function. + */ + return prepare_alg(alg, alg, cra_init, fips140_alg_init_tfm); +} + +static int fips140_prepare_aead_alg(struct aead_alg *alg) +{ + return prepare_alg(alg, &alg->base, init, fips140_aead_init_tfm); +} + +static int fips140_prepare_ahash_alg(struct ahash_alg *alg) +{ + return prepare_alg(alg, &alg->halg.base, init_tfm, + fips140_ahash_init_tfm); +} + +static int fips140_prepare_rng_alg(struct rng_alg *alg) +{ + /* + * rng doesn't have a strongly-typed initialization function, so we must + * treat rng algorithms as "generic" algorithms. + */ + return fips140_prepare_alg(&alg->base); +} + +static int fips140_prepare_shash_alg(struct shash_alg *alg) +{ + return prepare_alg(alg, &alg->base, init_tfm, fips140_shash_init_tfm); +} + +static int fips140_prepare_skcipher_alg(struct skcipher_alg *alg) +{ + return prepare_alg(alg, &alg->base, init, fips140_skcipher_init_tfm); +} + +int fips140_crypto_register_alg(struct crypto_alg *alg) +{ + return fips140_prepare_alg(alg) ?: crypto_register_alg(alg); +} + +int fips140_crypto_register_aead(struct aead_alg *alg) +{ + return fips140_prepare_aead_alg(alg) ?: crypto_register_aead(alg); +} + +int fips140_crypto_register_ahash(struct ahash_alg *alg) +{ + return fips140_prepare_ahash_alg(alg) ?: crypto_register_ahash(alg); +} + +int fips140_crypto_register_rng(struct rng_alg *alg) +{ + return fips140_prepare_rng_alg(alg) ?: crypto_register_rng(alg); +} + +int fips140_crypto_register_shash(struct shash_alg *alg) +{ + return fips140_prepare_shash_alg(alg) ?: crypto_register_shash(alg); +} + +int fips140_crypto_register_skcipher(struct skcipher_alg *alg) +{ + return fips140_prepare_skcipher_alg(alg) ?: + crypto_register_skcipher(alg); +} + +/* Instance registration */ + +int fips140_aead_register_instance(struct crypto_template *tmpl, + struct aead_instance *inst) +{ + return fips140_prepare_aead_alg(&inst->alg) ?: + aead_register_instance(tmpl, inst); +} + +int fips140_ahash_register_instance(struct crypto_template *tmpl, + struct ahash_instance *inst) +{ + return fips140_prepare_ahash_alg(&inst->alg) ?: + ahash_register_instance(tmpl, inst); +} + +int fips140_shash_register_instance(struct crypto_template *tmpl, + struct shash_instance *inst) +{ + return fips140_prepare_shash_alg(&inst->alg) ?: + shash_register_instance(tmpl, inst); +} + +int fips140_skcipher_register_instance(struct crypto_template *tmpl, + struct skcipher_instance *inst) +{ + return fips140_prepare_skcipher_alg(&inst->alg) ?: + skcipher_register_instance(tmpl, inst); +} + +/* Bulk algorithm registration */ + +int fips140_crypto_register_algs(struct crypto_alg *algs, int count) +{ + int i; + int err; + + for (i = 0; i < count; i++) { + err = fips140_prepare_alg(&algs[i]); + if (err) + return err; + } + + return crypto_register_algs(algs, count); +} + +int fips140_crypto_register_aeads(struct aead_alg *algs, int count) +{ + int i; + int err; + + for (i = 0; i < count; i++) { + err = fips140_prepare_aead_alg(&algs[i]); + if (err) + return err; + } + + return crypto_register_aeads(algs, count); +} + +int fips140_crypto_register_ahashes(struct ahash_alg *algs, int count) +{ + int i; + int err; + + for (i = 0; i < count; i++) { + err = fips140_prepare_ahash_alg(&algs[i]); + if (err) + return err; + } + + return crypto_register_ahashes(algs, count); +} + +int fips140_crypto_register_rngs(struct rng_alg *algs, int count) +{ + int i; + int err; + + for (i = 0; i < count; i++) { + err = fips140_prepare_rng_alg(&algs[i]); + if (err) + return err; + } + + return crypto_register_rngs(algs, count); +} + +int fips140_crypto_register_shashes(struct shash_alg *algs, int count) +{ + int i; + int err; + + for (i = 0; i < count; i++) { + err = fips140_prepare_shash_alg(&algs[i]); + if (err) + return err; + } + + return crypto_register_shashes(algs, count); +} + +int fips140_crypto_register_skciphers(struct skcipher_alg *algs, int count) +{ + int i; + int err; + + for (i = 0; i < count; i++) { + err = fips140_prepare_skcipher_alg(&algs[i]); + if (err) + return err; + } + + return crypto_register_skciphers(algs, count); +} diff --git a/crypto/fips140-defs.h b/crypto/fips140-defs.h new file mode 100644 index 0000000000000000000000000000000000000000..e64a2f739aa91bc51962895259358020ea957ab0 --- /dev/null +++ b/crypto/fips140-defs.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright 2021 Google LLC + * + * This file is automatically included by all files built into fips140.ko, via + * the "-include" compiler flag. It redirects all calls to algorithm + * registration functions to the wrapper functions defined within the module. + */ + +#define aead_register_instance fips140_aead_register_instance +#define ahash_register_instance fips140_ahash_register_instance +#define crypto_register_aead fips140_crypto_register_aead +#define crypto_register_aeads fips140_crypto_register_aeads +#define crypto_register_ahash fips140_crypto_register_ahash +#define crypto_register_ahashes fips140_crypto_register_ahashes +#define crypto_register_alg fips140_crypto_register_alg +#define crypto_register_algs fips140_crypto_register_algs +#define crypto_register_rng fips140_crypto_register_rng +#define crypto_register_rngs fips140_crypto_register_rngs +#define crypto_register_shash fips140_crypto_register_shash +#define crypto_register_shashes fips140_crypto_register_shashes +#define crypto_register_skcipher fips140_crypto_register_skcipher +#define crypto_register_skciphers fips140_crypto_register_skciphers +#define shash_register_instance fips140_shash_register_instance +#define skcipher_register_instance fips140_skcipher_register_instance diff --git a/crypto/fips140-eval-testing-uapi.h b/crypto/fips140-eval-testing-uapi.h new file mode 100644 index 0000000000000000000000000000000000000000..04e6cf633594a7505e1acfc4cab1b91ffab601e8 --- /dev/null +++ b/crypto/fips140-eval-testing-uapi.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + +#ifndef _CRYPTO_FIPS140_EVAL_TESTING_H +#define _CRYPTO_FIPS140_EVAL_TESTING_H + +#include + +/* + * This header defines the ioctls that are available on the fips140 character + * device. These ioctls expose some of the module's services to userspace so + * that they can be tested by the FIPS certification lab; this is a required + * part of getting a FIPS 140 certification. These ioctls do not have any other + * purpose, and they do not need to be present in production builds. + */ + +/* + * Call the fips140_is_approved_service() function. The argument must be the + * service name as a NUL-terminated string. The return value will be 1 if + * fips140_is_approved_service() returned true, or 0 if it returned false. + */ +#define FIPS140_IOCTL_IS_APPROVED_SERVICE _IO('F', 0) + +/* + * Call the fips140_module_version() function. The argument must be a pointer + * to a buffer of size >= 256 chars. The NUL-terminated string returned by + * fips140_module_version() will be written to this buffer. + */ +#define FIPS140_IOCTL_MODULE_VERSION _IOR('F', 1, char[256]) + +#endif /* _CRYPTO_FIPS140_EVAL_TESTING_H */ diff --git a/crypto/fips140-eval-testing.c b/crypto/fips140-eval-testing.c new file mode 100644 index 0000000000000000000000000000000000000000..ea3cd653983a0d78223cef87811f6291f5f67497 --- /dev/null +++ b/crypto/fips140-eval-testing.c @@ -0,0 +1,129 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2021 Google LLC + * + * This file can optionally be built into fips140.ko in order to support certain + * types of testing that the FIPS lab has to do to evaluate the module. It + * should not be included in production builds of the module. + */ + +/* + * We have to redefine inline to mean always_inline, so that _copy_to_user() + * gets inlined. This is needed for it to be placed into the correct section. + * See fips140_copy_to_user(). + * + * We also need to undefine BUILD_FIPS140_KO to allow the use of the code + * patching which copy_to_user() requires. + */ +#undef inline +#define inline inline __attribute__((__always_inline__)) __gnu_inline \ + __inline_maybe_unused notrace +#undef BUILD_FIPS140_KO + +#include +#include +#include +#include + +#include "fips140-module.h" +#include "fips140-eval-testing-uapi.h" + +/* + * This option allows deliberately failing the self-tests for a particular + * algorithm. + */ +static char *fips140_fail_selftest; +module_param_named(fail_selftest, fips140_fail_selftest, charp, 0); + +/* This option allows deliberately failing the integrity check. */ +static bool fips140_fail_integrity_check; +module_param_named(fail_integrity_check, fips140_fail_integrity_check, bool, 0); + +static dev_t fips140_devnum; +static struct cdev fips140_cdev; + +/* Inject a self-test failure (via corrupting the result) if requested. */ +void fips140_inject_selftest_failure(const char *impl, u8 *result) +{ + if (fips140_fail_selftest && strcmp(impl, fips140_fail_selftest) == 0) + result[0] ^= 0xff; +} + +/* Inject an integrity check failure (via corrupting the text) if requested. */ +void fips140_inject_integrity_failure(u8 *textcopy) +{ + if (fips140_fail_integrity_check) + textcopy[0] ^= 0xff; +} + +static long fips140_ioctl_is_approved_service(unsigned long arg) +{ + const char *service_name = strndup_user((const char __user *)arg, 256); + long ret; + + if (IS_ERR(service_name)) + return PTR_ERR(service_name); + + ret = fips140_is_approved_service(service_name); + + kfree(service_name); + return ret; +} + +/* + * Code in fips140.ko is covered by an integrity check by default, and this + * check breaks if copy_to_user() is called. This is because copy_to_user() is + * an inline function that relies on code patching. However, since this is + * "evaluation testing" code which isn't included in the production builds of + * fips140.ko, it's acceptable to just exclude it from the integrity check. + */ +static noinline unsigned long __section("text.._fips140_unchecked") +fips140_copy_to_user(void __user *to, const void *from, unsigned long n) +{ + return copy_to_user(to, from, n); +} + +static long fips140_ioctl_module_version(unsigned long arg) +{ + const char *version = fips140_module_version(); + size_t len = strlen(version) + 1; + + if (len > 256) + return -EOVERFLOW; + + if (fips140_copy_to_user((void __user *)arg, version, len)) + return -EFAULT; + + return 0; +} + +static long fips140_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + switch (cmd) { + case FIPS140_IOCTL_IS_APPROVED_SERVICE: + return fips140_ioctl_is_approved_service(arg); + case FIPS140_IOCTL_MODULE_VERSION: + return fips140_ioctl_module_version(arg); + default: + return -ENOTTY; + } +} + +static const struct file_operations fips140_fops = { + .unlocked_ioctl = fips140_ioctl, +}; + +bool fips140_eval_testing_init(void) +{ + if (alloc_chrdev_region(&fips140_devnum, 1, 1, "fips140") != 0) { + pr_err("failed to allocate device number\n"); + return false; + } + cdev_init(&fips140_cdev, &fips140_fops); + if (cdev_add(&fips140_cdev, fips140_devnum, 1) != 0) { + pr_err("failed to add fips140 character device\n"); + return false; + } + return true; +} diff --git a/crypto/fips140-module.c b/crypto/fips140-module.c index 0be7642773e285b3afd57543d5b48808ddfd9f5d..cd854fe28845b4ced97ac5ccdfc1148084985f13 100644 --- a/crypto/fips140-module.c +++ b/crypto/fips140-module.c @@ -14,6 +14,8 @@ * don't need to meet these requirements. */ +#undef __DISABLE_EXPORTS + #include #include #include @@ -27,15 +29,6 @@ #include "fips140-module.h" #include "internal.h" -/* - * This option allows deliberately failing the self-tests for a particular - * algorithm. This is for FIPS lab testing only. - */ -#ifdef CONFIG_CRYPTO_FIPS140_MOD_ERROR_INJECTION -char *fips140_broken_alg; -module_param_named(broken_alg, fips140_broken_alg, charp, 0); -#endif - /* * FIPS 140-2 prefers the use of HMAC with a public key over a plain hash. */ @@ -86,34 +79,37 @@ const u8 *__rodata_start = &__fips140_rodata_start; * When adding a new algorithm here, make sure to consider whether it needs a * self-test added to fips140_selftests[] as well. */ -static const char * const fips140_algorithms[] __initconst = { - "aes", - - "cmac(aes)", - "ecb(aes)", - - "cbc(aes)", - "cts(cbc(aes))", - "ctr(aes)", - "xts(aes)", - "gcm(aes)", - - "hmac(sha1)", - "hmac(sha224)", - "hmac(sha256)", - "hmac(sha384)", - "hmac(sha512)", - "sha1", - "sha224", - "sha256", - "sha384", - "sha512", - - "stdrng", - "jitterentropy_rng", +static const struct { + const char *name; + bool approved; +} fips140_algs_to_replace[] = { + {"aes", true}, + + {"cmac(aes)", true}, + {"ecb(aes)", true}, + + {"cbc(aes)", true}, + {"cts(cbc(aes))", true}, + {"ctr(aes)", true}, + {"xts(aes)", true}, + {"gcm(aes)", false}, + + {"hmac(sha1)", true}, + {"hmac(sha224)", true}, + {"hmac(sha256)", true}, + {"hmac(sha384)", true}, + {"hmac(sha512)", true}, + {"sha1", true}, + {"sha224", true}, + {"sha256", true}, + {"sha384", true}, + {"sha512", true}, + + {"stdrng", true}, + {"jitterentropy_rng", false}, }; -static bool __init is_fips140_algo(struct crypto_alg *alg) +static bool __init fips140_should_unregister_alg(struct crypto_alg *alg) { int i; @@ -124,13 +120,70 @@ static bool __init is_fips140_algo(struct crypto_alg *alg) if (alg->cra_flags & CRYPTO_ALG_ASYNC) return false; - for (i = 0; i < ARRAY_SIZE(fips140_algorithms); i++) - if (!strcmp(alg->cra_name, fips140_algorithms[i])) + for (i = 0; i < ARRAY_SIZE(fips140_algs_to_replace); i++) { + if (!strcmp(alg->cra_name, fips140_algs_to_replace[i].name)) return true; + } + return false; +} + +/* + * FIPS 140-3 service indicators. FIPS 140-3 requires that all services + * "provide an indicator when the service utilises an approved cryptographic + * algorithm, security function or process in an approved manner". What this + * means is very debatable, even with the help of the FIPS 140-3 Implementation + * Guidance document. However, it was decided that a function that takes in an + * algorithm name and returns whether that algorithm is approved or not will + * meet this requirement. Note, this relies on some properties of the module: + * + * - The module doesn't distinguish between "services" and "algorithms"; its + * services are simply its algorithms. + * + * - The status of an approved algorithm is never non-approved, since (a) the + * module doesn't support operating in a non-approved mode, such as a mode + * where the self-tests are skipped; (b) there are no cases where the module + * supports non-approved settings for approved algorithms, e.g. + * non-approved key sizes; and (c) this function isn't available to be + * called until the module_init function has completed, so it's guaranteed + * that the self-tests and integrity check have already passed. + * + * - The module does support some non-approved algorithms, so a single static + * indicator ("return true;") would not be acceptable. + */ +bool fips140_is_approved_service(const char *name) +{ + size_t i; + + for (i = 0; i < ARRAY_SIZE(fips140_algs_to_replace); i++) { + if (!strcmp(name, fips140_algs_to_replace[i].name)) + return fips140_algs_to_replace[i].approved; + } return false; } +EXPORT_SYMBOL_GPL(fips140_is_approved_service); + +/* + * FIPS 140-3 requires that modules provide a "service" that outputs "the name + * or module identifier and the versioning information that can be correlated + * with a validation record". This function meets that requirement. + * + * Note: the module also prints this same information to the kernel log when it + * is loaded. That might meet the requirement by itself. However, given the + * vagueness of what counts as a "service", we provide this function too, just + * in case the certification lab or CMVP is happier with an explicit function. + * + * Note: /sys/modules/fips140/scmversion also provides versioning information + * about the module. However that file just shows the bare git commit ID, so it + * probably isn't sufficient to meet the FIPS requirement, which seems to want + * the "official" module name and version number used in the FIPS certificate. + */ +const char *fips140_module_version(void) +{ + return FIPS140_MODULE_NAME " " FIPS140_MODULE_VERSION; +} +EXPORT_SYMBOL_GPL(fips140_module_version); -static LIST_HEAD(unchecked_fips140_algos); +static LIST_HEAD(existing_live_algos); /* * Release a list of algorithms which have been removed from crypto_alg_list. @@ -173,38 +226,53 @@ static void __init unregister_existing_fips140_algos(void) down_write(&crypto_alg_sem); /* - * Find all registered algorithms that we care about, and move them to - * a private list so that they are no longer exposed via the algo - * lookup API. Subsequently, we will unregister them if they are not in - * active use. If they are, we cannot simply remove them but we can - * adapt them later to use our integrity checked backing code. + * Find all registered algorithms that we care about, and move them to a + * private list so that they are no longer exposed via the algo lookup + * API. Subsequently, we will unregister them if they are not in active + * use. If they are, we can't fully unregister them but we can ensure + * that new users won't use them. */ list_for_each_entry_safe(alg, tmp, &crypto_alg_list, cra_list) { - if (is_fips140_algo(alg)) { - if (refcount_read(&alg->cra_refcnt) == 1) { - /* - * This algorithm is not currently in use, but - * there may be template instances holding - * references to it via spawns. So let's tear - * it down like crypto_unregister_alg() would, - * but without releasing the lock, to prevent - * races with concurrent TFM allocations. - */ - alg->cra_flags |= CRYPTO_ALG_DEAD; - list_move(&alg->cra_list, &remove_list); - crypto_remove_spawns(alg, &spawns, NULL); - } else { - /* - * This algorithm is live, i.e., there are TFMs - * allocated that rely on it for its crypto - * transformations. We will swap these out - * later with integrity checked versions. - */ - pr_info("found already-live algorithm '%s' ('%s')\n", - alg->cra_name, alg->cra_driver_name); - list_move(&alg->cra_list, - &unchecked_fips140_algos); - } + if (!fips140_should_unregister_alg(alg)) + continue; + if (refcount_read(&alg->cra_refcnt) == 1) { + /* + * This algorithm is not currently in use, but there may + * be template instances holding references to it via + * spawns. So let's tear it down like + * crypto_unregister_alg() would, but without releasing + * the lock, to prevent races with concurrent TFM + * allocations. + */ + alg->cra_flags |= CRYPTO_ALG_DEAD; + list_move(&alg->cra_list, &remove_list); + crypto_remove_spawns(alg, &spawns, NULL); + } else { + /* + * This algorithm is live, i.e. it has TFMs allocated, + * so we can't fully unregister it. It's not necessary + * to dynamically redirect existing users to the FIPS + * code, given that they can't be relying on FIPS + * certified crypto in the first place. However, we do + * need to ensure that new users will get the FIPS code. + * + * In most cases, setting alg->cra_priority to 0 + * achieves this. However, that isn't enough for + * algorithms like "hmac(sha256)" that need to be + * instantiated from a template, since existing + * algorithms always take priority over a template being + * instantiated. Therefore, we move the algorithm to + * a private list so that algorithm lookups won't find + * it anymore. To further distinguish it from the FIPS + * algorithms, we also append "+orig" to its name. + */ + pr_info("found already-live algorithm '%s' ('%s')\n", + alg->cra_name, alg->cra_driver_name); + alg->cra_priority = 0; + strlcat(alg->cra_name, "+orig", CRYPTO_MAX_ALG_NAME); + strlcat(alg->cra_driver_name, "+orig", + CRYPTO_MAX_ALG_NAME); + list_move(&alg->cra_list, &existing_live_algos); } } up_write(&crypto_alg_sem); @@ -279,12 +347,19 @@ static void __init unapply_rodata_relocations(void *section, int section_size, } } +extern struct { + u32 offset; + u32 count; +} fips140_rela_text, fips140_rela_rodata; + static bool __init check_fips140_module_hmac(void) { + struct crypto_shash *tfm = NULL; SHASH_DESC_ON_STACK(desc, dontcare); u8 digest[SHA256_DIGEST_SIZE]; void *textcopy, *rodatacopy; int textsize, rodatasize; + bool ok = false; int err; textsize = &__fips140_text_end - &__fips140_text_start; @@ -296,7 +371,7 @@ static bool __init check_fips140_module_hmac(void) textcopy = kmalloc(textsize + rodatasize, GFP_KERNEL); if (!textcopy) { pr_err("Failed to allocate memory for copy of .text\n"); - return false; + goto out; } rodatacopy = textcopy + textsize; @@ -306,38 +381,38 @@ static bool __init check_fips140_module_hmac(void) // apply the relocations in reverse on the copies of .text and .rodata unapply_text_relocations(textcopy, textsize, - __this_module.arch.text_relocations, - __this_module.arch.num_text_relocations); + offset_to_ptr(&fips140_rela_text.offset), + fips140_rela_text.count); unapply_rodata_relocations(rodatacopy, rodatasize, - __this_module.arch.rodata_relocations, - __this_module.arch.num_rodata_relocations); + offset_to_ptr(&fips140_rela_rodata.offset), + fips140_rela_rodata.count); - kfree(__this_module.arch.text_relocations); - kfree(__this_module.arch.rodata_relocations); + fips140_inject_integrity_failure(textcopy); - desc->tfm = crypto_alloc_shash("hmac(sha256)", 0, 0); - if (IS_ERR(desc->tfm)) { - pr_err("failed to allocate hmac tfm (%ld)\n", PTR_ERR(desc->tfm)); - kfree(textcopy); - return false; + tfm = crypto_alloc_shash("hmac(sha256)", 0, 0); + if (IS_ERR(tfm)) { + pr_err("failed to allocate hmac tfm (%ld)\n", PTR_ERR(tfm)); + tfm = NULL; + goto out; } + desc->tfm = tfm; pr_info("using '%s' for integrity check\n", - crypto_shash_driver_name(desc->tfm)); + crypto_shash_driver_name(tfm)); - err = crypto_shash_setkey(desc->tfm, fips140_integ_hmac_key, + err = crypto_shash_setkey(tfm, fips140_integ_hmac_key, strlen(fips140_integ_hmac_key)) ?: crypto_shash_init(desc) ?: crypto_shash_update(desc, textcopy, textsize) ?: crypto_shash_finup(desc, rodatacopy, rodatasize, digest); - crypto_free_shash(desc->tfm); - kfree(textcopy); + /* Zeroizing this is important; see the comment below. */ + shash_desc_zero(desc); if (err) { pr_err("failed to calculate hmac shash (%d)\n", err); - return false; + goto out; } if (memcmp(digest, fips140_integ_hmac_digest, sizeof(digest))) { @@ -346,171 +421,20 @@ static bool __init check_fips140_module_hmac(void) pr_err("calculated digest: %*phN\n", (int)sizeof(digest), digest); - - return false; + goto out; } - - return true; -} - -static bool __init update_live_fips140_algos(void) -{ - struct crypto_alg *alg, *new_alg, *tmp; - + ok = true; +out: /* - * Find all algorithms that we could not unregister the last time - * around, due to the fact that they were already in use. + * FIPS 140-3 requires that all "temporary value(s) generated during the + * integrity test" be zeroized (ref: FIPS 140-3 IG 9.7.B). There is no + * technical reason to do this given that these values are public + * information, but this is the requirement so we follow it. */ - down_write(&crypto_alg_sem); - list_for_each_entry_safe(alg, tmp, &unchecked_fips140_algos, cra_list) { - - /* - * Take this algo off the list before releasing the lock. This - * ensures that a concurrent invocation of - * crypto_unregister_alg() observes a consistent state, i.e., - * the algo is still on the list, and crypto_unregister_alg() - * will release it, or it is not, and crypto_unregister_alg() - * will issue a warning but ignore this condition otherwise. - */ - list_del_init(&alg->cra_list); - up_write(&crypto_alg_sem); - - /* - * Grab the algo that will replace the live one. - * Note that this will instantiate template based instances as - * well, as long as their driver name uses the conventional - * pattern of "template(algo)". In this case, we are relying on - * the fact that the templates carried by this module will - * supersede the builtin ones, due to the fact that they were - * registered later, and therefore appear first in the linked - * list. For example, "hmac(sha1-ce)" constructed using the - * builtin hmac template and the builtin SHA1 driver will be - * superseded by the integrity checked versions of HMAC and - * SHA1-ce carried in this module. - * - * Note that this takes a reference to the new algorithm which - * will never get released. This is intentional: once we copy - * the function pointers from the new algo into the old one, we - * cannot drop the new algo unless we are sure that the old one - * has been released, and this is someting we don't keep track - * of at the moment. - */ - new_alg = crypto_alg_mod_lookup(alg->cra_driver_name, - alg->cra_flags & CRYPTO_ALG_TYPE_MASK, - CRYPTO_ALG_TYPE_MASK | CRYPTO_NOLOAD); - - if (IS_ERR(new_alg)) { - pr_crit("Failed to allocate '%s' for updating live algo (%ld)\n", - alg->cra_driver_name, PTR_ERR(new_alg)); - return false; - } - - /* - * The FIPS module's algorithms are expected to be built from - * the same source code as the in-kernel ones so that they are - * fully compatible. In general, there's no way to verify full - * compatibility at runtime, but we can at least verify that - * the algorithm properties match. - */ - if (alg->cra_ctxsize != new_alg->cra_ctxsize || - alg->cra_alignmask != new_alg->cra_alignmask) { - pr_crit("Failed to update live algo '%s' due to mismatch:\n" - "cra_ctxsize : %u vs %u\n" - "cra_alignmask : 0x%x vs 0x%x\n", - alg->cra_driver_name, - alg->cra_ctxsize, new_alg->cra_ctxsize, - alg->cra_alignmask, new_alg->cra_alignmask); - return false; - } - - /* - * Update the name and priority so the algorithm stands out as - * one that was updated in order to comply with FIPS140, and - * that it is not the preferred version for further use. - */ - strlcat(alg->cra_name, "+orig", CRYPTO_MAX_ALG_NAME); - alg->cra_priority = 0; - - switch (alg->cra_flags & CRYPTO_ALG_TYPE_MASK) { - struct aead_alg *old_aead, *new_aead; - struct skcipher_alg *old_skcipher, *new_skcipher; - struct shash_alg *old_shash, *new_shash; - struct rng_alg *old_rng, *new_rng; - - case CRYPTO_ALG_TYPE_CIPHER: - alg->cra_u.cipher = new_alg->cra_u.cipher; - break; - - case CRYPTO_ALG_TYPE_AEAD: - old_aead = container_of(alg, struct aead_alg, base); - new_aead = container_of(new_alg, struct aead_alg, base); - - old_aead->setkey = new_aead->setkey; - old_aead->setauthsize = new_aead->setauthsize; - old_aead->encrypt = new_aead->encrypt; - old_aead->decrypt = new_aead->decrypt; - old_aead->init = new_aead->init; - old_aead->exit = new_aead->exit; - break; - - case CRYPTO_ALG_TYPE_SKCIPHER: - old_skcipher = container_of(alg, struct skcipher_alg, base); - new_skcipher = container_of(new_alg, struct skcipher_alg, base); - - old_skcipher->setkey = new_skcipher->setkey; - old_skcipher->encrypt = new_skcipher->encrypt; - old_skcipher->decrypt = new_skcipher->decrypt; - old_skcipher->init = new_skcipher->init; - old_skcipher->exit = new_skcipher->exit; - break; - - case CRYPTO_ALG_TYPE_SHASH: - old_shash = container_of(alg, struct shash_alg, base); - new_shash = container_of(new_alg, struct shash_alg, base); - - old_shash->init = new_shash->init; - old_shash->update = new_shash->update; - old_shash->final = new_shash->final; - old_shash->finup = new_shash->finup; - old_shash->digest = new_shash->digest; - old_shash->export = new_shash->export; - old_shash->import = new_shash->import; - old_shash->setkey = new_shash->setkey; - old_shash->init_tfm = new_shash->init_tfm; - old_shash->exit_tfm = new_shash->exit_tfm; - break; - - case CRYPTO_ALG_TYPE_RNG: - old_rng = container_of(alg, struct rng_alg, base); - new_rng = container_of(new_alg, struct rng_alg, base); - - old_rng->generate = new_rng->generate; - old_rng->seed = new_rng->seed; - old_rng->set_ent = new_rng->set_ent; - break; - default: - /* - * This should never happen: every item on the - * fips140_algorithms list should match one of the - * cases above, so if we end up here, something is - * definitely wrong. - */ - pr_crit("Unexpected type %u for algo %s, giving up ...\n", - alg->cra_flags & CRYPTO_ALG_TYPE_MASK, - alg->cra_driver_name); - return false; - } - - /* - * Move the algorithm back to the algorithm list, so it is - * visible in /proc/crypto et al. - */ - down_write(&crypto_alg_sem); - list_add_tail(&alg->cra_list, &crypto_alg_list); - } - up_write(&crypto_alg_sem); - - return true; + crypto_free_shash(tfm); + memzero_explicit(digest, sizeof(digest)); + kfree_sensitive(textcopy); + return ok; } static void fips140_sha256(void *p, const u8 *data, unsigned int len, u8 *out, @@ -568,7 +492,8 @@ fips140_init(void) { const u32 *initcall; - pr_info("loading module\n"); + pr_info("loading " FIPS140_MODULE_NAME " " FIPS140_MODULE_VERSION "\n"); + fips140_init_thread = current; unregister_existing_fips140_algos(); @@ -590,19 +515,6 @@ fips140_init(void) } } - if (!update_live_fips140_algos()) - goto panic; - - if (!update_fips140_library_routines()) - goto panic; - - /* - * Wait until all tasks have at least been scheduled once and preempted - * voluntarily. This ensures that none of the superseded algorithms that - * were already in use will still be live. - */ - synchronize_rcu_tasks(); - if (!fips140_run_selftests()) goto panic; @@ -621,6 +533,14 @@ fips140_init(void) } pr_info("integrity check passed\n"); + complete_all(&fips140_tests_done); + + if (!update_fips140_library_routines()) + goto panic; + + if (!fips140_eval_testing_init()) + goto panic; + pr_info("module successfully loaded\n"); return 0; diff --git a/crypto/fips140-module.h b/crypto/fips140-module.h index b30a2e2928a650319a6e80f99be58f0e55093820..a2a63194eb6481d7fdcf0b9eb7932d2ffe3b67b6 100644 --- a/crypto/fips140-module.h +++ b/crypto/fips140-module.h @@ -6,15 +6,45 @@ #ifndef _CRYPTO_FIPS140_MODULE_H #define _CRYPTO_FIPS140_MODULE_H +#include #include +#include #undef pr_fmt #define pr_fmt(fmt) "fips140: " fmt -#ifdef CONFIG_CRYPTO_FIPS140_MOD_ERROR_INJECTION -extern char *fips140_broken_alg; -#endif +/* + * This is the name and version number of the module that are shown on the FIPS + * certificate. + */ +#define FIPS140_MODULE_NAME "Android Kernel Cryptographic Module" +#define FIPS140_MODULE_VERSION UTS_RELEASE + +/* fips140-eval-testing.c */ +#ifdef CONFIG_CRYPTO_FIPS140_MOD_EVAL_TESTING +void fips140_inject_selftest_failure(const char *impl, u8 *result); +void fips140_inject_integrity_failure(u8 *textcopy); +bool fips140_eval_testing_init(void); +#else +static inline void fips140_inject_selftest_failure(const char *impl, u8 *result) +{ +} +static inline void fips140_inject_integrity_failure(u8 *textcopy) +{ +} +static inline bool fips140_eval_testing_init(void) +{ + return true; +} +#endif /* !CONFIG_CRYPTO_FIPS140_MOD_EVAL_TESTING */ + +/* fips140-module.c */ +extern struct completion fips140_tests_done; +extern struct task_struct *fips140_init_thread; +bool fips140_is_approved_service(const char *name); +const char *fips140_module_version(void); +/* fips140-selftests.c */ bool __init __must_check fips140_run_selftests(void); #endif /* _CRYPTO_FIPS140_MODULE_H */ diff --git a/crypto/fips140-refs.S b/crypto/fips140-refs.S new file mode 100644 index 0000000000000000000000000000000000000000..fcbd527763231ca161b8ab05e3bdaadd01a9ccfa --- /dev/null +++ b/crypto/fips140-refs.S @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright 2021 Google LLC + * Author: Ard Biesheuvel + * + * This file contains the variable definitions that will be used by the FIPS140 + * s/w module to access the RELA sections in the ELF image. These are used to + * apply the relocations applied by the module loader in reverse, so that we + * can reconstruct the image that was used to derive the HMAC used by the + * integrity check. + * + * The first .long of each entry will be populated by the module loader based + * on the actual placement of the respective RELA section in memory. The second + * .long carries the RELA entry count, and is populated by the host tool that + * also generates the HMAC of the contents of .text and .rodata. + */ + +#include +#include + + .section ".init.rodata", "a" + + .align 2 + .globl fips140_rela_text +fips140_rela_text: + .weak __sec_rela_text + .long __sec_rela_text - . + .long 0 + + .globl fips140_rela_rodata +fips140_rela_rodata: + .weak __sec_rela_rodata + .long __sec_rela_rodata - . + .long 0 diff --git a/crypto/fips140-selftests.c b/crypto/fips140-selftests.c index 0ab388a9f213ea5ea6e3d4694501bd5886fbbb05..56c503cacc414fdab406e73a73bcf2c47b513286 100644 --- a/crypto/fips140-selftests.c +++ b/crypto/fips140-selftests.c @@ -146,11 +146,7 @@ static int __init __must_check fips_check_result(u8 *result, const u8 *expected_result, size_t result_size, const char *impl, const char *operation) { -#ifdef CONFIG_CRYPTO_FIPS140_MOD_ERROR_INJECTION - /* Inject a failure (via corrupting the result) if requested. */ - if (fips140_broken_alg && strcmp(impl, fips140_broken_alg) == 0) - result[0] ^= 0xff; -#endif + fips140_inject_selftest_failure(impl, result); if (memcmp(result, expected_result, result_size) != 0) { pr_err("wrong result from %s %s\n", impl, operation); return -EBADMSG; diff --git a/crypto/fips140_gen_hmac.c b/crypto/fips140_gen_hmac.c index eebdc729ca994220939af9fb9804a97bde864659..69f754d38a1d68ecff2760568f5a36b054b046c4 100644 --- a/crypto/fips140_gen_hmac.c +++ b/crypto/fips140_gen_hmac.c @@ -28,7 +28,7 @@ static Elf64_Ehdr *ehdr; static Elf64_Shdr *shdr; static int num_shdr; -static const char *strtab; +static const char *strtab, *shstrtab; static Elf64_Sym *syms; static int num_syms; @@ -42,17 +42,78 @@ static Elf64_Shdr *find_symtab_section(void) return NULL; } -static void *get_sym_addr(const char *sym_name) +static int get_section_idx(const char *name) +{ + int i; + + for (i = 0; i < num_shdr; i++) + if (!strcmp(shstrtab + shdr[i].sh_name, name)) + return i; + return -1; +} + +static int get_sym_idx(const char *sym_name) { int i; for (i = 0; i < num_syms; i++) if (!strcmp(strtab + syms[i].st_name, sym_name)) - return (void *)ehdr + shdr[syms[i].st_shndx].sh_offset + - syms[i].st_value; + return i; + return -1; +} + +static void *get_sym_addr(const char *sym_name) +{ + int i = get_sym_idx(sym_name); + + if (i >= 0) + return (void *)ehdr + shdr[syms[i].st_shndx].sh_offset + + syms[i].st_value; return NULL; } +static int update_rela_ref(const char *name) +{ + /* + * We need to do a couple of things to ensure that the copied RELA data + * is accessible to the module itself at module init time: + * - the associated entry in the symbol table needs to refer to the + * correct section index, and have SECTION type and GLOBAL linkage. + * - the 'count' global variable in the module need to be set to the + * right value based on the size of the RELA section. + */ + unsigned int *size_var; + int sec_idx, sym_idx; + char str[32]; + + sprintf(str, "fips140_rela_%s", name); + size_var = get_sym_addr(str); + if (!size_var) { + printf("variable '%s' not found, disregarding .%s section\n", + str, name); + return 1; + } + + sprintf(str, "__sec_rela_%s", name); + sym_idx = get_sym_idx(str); + + sprintf(str, ".init.rela.%s", name); + sec_idx = get_section_idx(str); + + if (sec_idx < 0 || sym_idx < 0) { + fprintf(stderr, "failed to locate metadata for .%s section in binary\n", + name); + return 0; + } + + syms[sym_idx].st_shndx = sec_idx; + syms[sym_idx].st_info = (STB_GLOBAL << 4) | STT_SECTION; + + size_var[1] = shdr[sec_idx].sh_size / sizeof(Elf64_Rela); + + return 1; +} + static void hmac_section(HMAC_CTX *hmac, const char *start, const char *end) { void *start_addr = get_sym_addr(start); @@ -103,6 +164,10 @@ int main(int argc, char **argv) num_syms = symtab_shdr->sh_size / sizeof(Elf64_Sym); strtab = (void *)ehdr + shdr[symtab_shdr->sh_link].sh_offset; + shstrtab = (void *)ehdr + shdr[ehdr->e_shstrndx].sh_offset; + + if (!update_rela_ref("text") || !update_rela_ref("rodata")) + exit(EXIT_FAILURE); hmac_key = get_sym_addr("fips140_integ_hmac_key"); if (!hmac_key) { diff --git a/crypto/jitterentropy.c b/crypto/jitterentropy.c index 6e147c43fc186c2c540c6a23c8b995c1da413c2d..37c4c308339e4f225ce8b224c7058edd543f3bfe 100644 --- a/crypto/jitterentropy.c +++ b/crypto/jitterentropy.c @@ -265,7 +265,6 @@ static int jent_stuck(struct rand_data *ec, __u64 current_delta) { __u64 delta2 = jent_delta(ec->last_delta, current_delta); __u64 delta3 = jent_delta(ec->last_delta2, delta2); - unsigned int delta_masked = current_delta & JENT_APT_WORD_MASK; ec->last_delta = current_delta; ec->last_delta2 = delta2; @@ -274,7 +273,7 @@ static int jent_stuck(struct rand_data *ec, __u64 current_delta) * Insert the result of the comparison of two back-to-back time * deltas. */ - jent_apt_insert(ec, delta_masked); + jent_apt_insert(ec, current_delta); if (!current_delta || !delta2 || !delta3) { /* RCT with a stuck bit */ diff --git a/crypto/pcrypt.c b/crypto/pcrypt.c index d569c7ed6c8005db8c250d1e40eaa1c7b79e7bc3..9d10b846ccf730ffbcf1f6164b63c711641a2fcc 100644 --- a/crypto/pcrypt.c +++ b/crypto/pcrypt.c @@ -78,12 +78,14 @@ static void pcrypt_aead_enc(struct padata_priv *padata) { struct pcrypt_request *preq = pcrypt_padata_request(padata); struct aead_request *req = pcrypt_request_ctx(preq); + int ret; - padata->info = crypto_aead_encrypt(req); + ret = crypto_aead_encrypt(req); - if (padata->info == -EINPROGRESS) + if (ret == -EINPROGRESS) return; + padata->info = ret; padata_do_serial(padata); } @@ -123,12 +125,14 @@ static void pcrypt_aead_dec(struct padata_priv *padata) { struct pcrypt_request *preq = pcrypt_padata_request(padata); struct aead_request *req = pcrypt_request_ctx(preq); + int ret; - padata->info = crypto_aead_decrypt(req); + ret = crypto_aead_decrypt(req); - if (padata->info == -EINPROGRESS) + if (ret == -EINPROGRESS) return; + padata->info = ret; padata_do_serial(padata); } diff --git a/drivers/accessibility/speakup/speakup_dectlk.c b/drivers/accessibility/speakup/speakup_dectlk.c index ab6d61e80b1cbac8d66951190a89d5f6c99ee7f7..d689ec5e276f12aa8fb8630bf7cdddcf9aad86a7 100644 --- a/drivers/accessibility/speakup/speakup_dectlk.c +++ b/drivers/accessibility/speakup/speakup_dectlk.c @@ -44,6 +44,7 @@ static struct var_t vars[] = { { CAPS_START, .u.s = {"[:dv ap 160] " } }, { CAPS_STOP, .u.s = {"[:dv ap 100 ] " } }, { RATE, .u.n = {"[:ra %d] ", 180, 75, 650, 0, 0, NULL } }, + { PITCH, .u.n = {"[:dv ap %d] ", 122, 50, 350, 0, 0, NULL } }, { INFLECTION, .u.n = {"[:dv pr %d] ", 100, 0, 10000, 0, 0, NULL } }, { VOL, .u.n = {"[:dv g5 %d] ", 86, 60, 86, 0, 0, NULL } }, { PUNCT, .u.n = {"[:pu %c] ", 0, 0, 2, 0, 0, "nsa" } }, diff --git a/drivers/acpi/ac.c b/drivers/acpi/ac.c index 46a64e9fa7165bb5361ac0815987fefe78767842..23ca1a1c67b75c9b4775e838c5618c4a883108b6 100644 --- a/drivers/acpi/ac.c +++ b/drivers/acpi/ac.c @@ -64,6 +64,7 @@ static SIMPLE_DEV_PM_OPS(acpi_ac_pm, NULL, acpi_ac_resume); static int ac_sleep_before_get_state_ms; static int ac_check_pmic = 1; +static int ac_only; static struct acpi_driver acpi_ac_driver = { .name = "ac", @@ -99,6 +100,11 @@ static int acpi_ac_get_state(struct acpi_ac *ac) if (!ac) return -EINVAL; + if (ac_only) { + ac->state = 1; + return 0; + } + status = acpi_evaluate_integer(ac->device->handle, "_PSR", NULL, &ac->state); if (ACPI_FAILURE(status)) { @@ -212,6 +218,12 @@ static int __init ac_do_not_check_pmic_quirk(const struct dmi_system_id *d) return 0; } +static int __init ac_only_quirk(const struct dmi_system_id *d) +{ + ac_only = 1; + return 0; +} + /* Please keep this list alphabetically sorted */ static const struct dmi_system_id ac_dmi_table[] __initconst = { { @@ -221,6 +233,13 @@ static const struct dmi_system_id ac_dmi_table[] __initconst = { DMI_MATCH(DMI_PRODUCT_NAME, "EF20EA"), }, }, + { + /* Kodlix GK45 returning incorrect state */ + .callback = ac_only_quirk, + .matches = { + DMI_MATCH(DMI_PRODUCT_NAME, "GK45"), + }, + }, { /* Lenovo Ideapad Miix 320, AXP288 PMIC, separate fuel-gauge */ .callback = ac_do_not_check_pmic_quirk, diff --git a/drivers/acpi/acpica/acglobal.h b/drivers/acpi/acpica/acglobal.h index 2fee91f57b213823b0d27fecef4916e4752e6500..bd84d7f95e5f99282aa326332427aaa708dd99e9 100644 --- a/drivers/acpi/acpica/acglobal.h +++ b/drivers/acpi/acpica/acglobal.h @@ -226,6 +226,8 @@ extern struct acpi_bit_register_info acpi_gbl_bit_register_info[ACPI_NUM_BITREG]; ACPI_GLOBAL(u8, acpi_gbl_sleep_type_a); ACPI_GLOBAL(u8, acpi_gbl_sleep_type_b); +ACPI_GLOBAL(u8, acpi_gbl_sleep_type_a_s0); +ACPI_GLOBAL(u8, acpi_gbl_sleep_type_b_s0); /***************************************************************************** * diff --git a/drivers/acpi/acpica/exfield.c b/drivers/acpi/acpica/exfield.c index 3323a2ba6a3138472403a0a4b700a77825dbe839..b3230e511870aad7ec54e49c0cad6cb901ca9aa1 100644 --- a/drivers/acpi/acpica/exfield.c +++ b/drivers/acpi/acpica/exfield.c @@ -326,12 +326,7 @@ acpi_ex_write_data_to_field(union acpi_operand_object *source_desc, obj_desc->field.base_byte_offset, source_desc->buffer.pointer, data_length); - if ((obj_desc->field.region_obj->region.address == - PCC_MASTER_SUBSPACE - && MASTER_SUBSPACE_COMMAND(obj_desc->field. - base_byte_offset)) - || GENERIC_SUBSPACE_COMMAND(obj_desc->field. - base_byte_offset)) { + if (MASTER_SUBSPACE_COMMAND(obj_desc->field.base_byte_offset)) { /* Perform the write */ diff --git a/drivers/acpi/acpica/exoparg1.c b/drivers/acpi/acpica/exoparg1.c index a46d685a3ffcf0888571e89cbdde6e0263197adc..9d67dfd93d5b652af943cd8e3317c699413d1f07 100644 --- a/drivers/acpi/acpica/exoparg1.c +++ b/drivers/acpi/acpica/exoparg1.c @@ -1007,7 +1007,8 @@ acpi_status acpi_ex_opcode_1A_0T_1R(struct acpi_walk_state *walk_state) (walk_state, return_desc, &temp_desc); if (ACPI_FAILURE(status)) { - goto cleanup; + return_ACPI_STATUS + (status); } return_desc = temp_desc; diff --git a/drivers/acpi/acpica/hwesleep.c b/drivers/acpi/acpica/hwesleep.c index d9be5d0545d4c854caf0ff78f2e9eb46b853220e..142a755be68811445ebef9b90660c5ac8924c421 100644 --- a/drivers/acpi/acpica/hwesleep.c +++ b/drivers/acpi/acpica/hwesleep.c @@ -104,7 +104,9 @@ acpi_status acpi_hw_extended_sleep(u8 sleep_state) /* Flush caches, as per ACPI specification */ - ACPI_FLUSH_CPU_CACHE(); + if (sleep_state < ACPI_STATE_S4) { + ACPI_FLUSH_CPU_CACHE(); + } status = acpi_os_enter_sleep(sleep_state, sleep_control, 0); if (status == AE_CTRL_TERMINATE) { @@ -147,17 +149,13 @@ acpi_status acpi_hw_extended_sleep(u8 sleep_state) acpi_status acpi_hw_extended_wake_prep(u8 sleep_state) { - acpi_status status; u8 sleep_type_value; ACPI_FUNCTION_TRACE(hw_extended_wake_prep); - status = acpi_get_sleep_type_data(ACPI_STATE_S0, - &acpi_gbl_sleep_type_a, - &acpi_gbl_sleep_type_b); - if (ACPI_SUCCESS(status)) { + if (acpi_gbl_sleep_type_a_s0 != ACPI_SLEEP_TYPE_INVALID) { sleep_type_value = - ((acpi_gbl_sleep_type_a << ACPI_X_SLEEP_TYPE_POSITION) & + ((acpi_gbl_sleep_type_a_s0 << ACPI_X_SLEEP_TYPE_POSITION) & ACPI_X_SLEEP_TYPE_MASK); (void)acpi_write((u64)(sleep_type_value | ACPI_X_SLEEP_ENABLE), diff --git a/drivers/acpi/acpica/hwsleep.c b/drivers/acpi/acpica/hwsleep.c index 317ae870336b7edfffacdea56c6a029d82714dbc..6a20bb5059c1d815a33ab83149dca348065e5a44 100644 --- a/drivers/acpi/acpica/hwsleep.c +++ b/drivers/acpi/acpica/hwsleep.c @@ -110,7 +110,9 @@ acpi_status acpi_hw_legacy_sleep(u8 sleep_state) /* Flush caches, as per ACPI specification */ - ACPI_FLUSH_CPU_CACHE(); + if (sleep_state < ACPI_STATE_S4) { + ACPI_FLUSH_CPU_CACHE(); + } status = acpi_os_enter_sleep(sleep_state, pm1a_control, pm1b_control); if (status == AE_CTRL_TERMINATE) { @@ -179,7 +181,7 @@ acpi_status acpi_hw_legacy_sleep(u8 sleep_state) acpi_status acpi_hw_legacy_wake_prep(u8 sleep_state) { - acpi_status status; + acpi_status status = AE_OK; struct acpi_bit_register_info *sleep_type_reg_info; struct acpi_bit_register_info *sleep_enable_reg_info; u32 pm1a_control; @@ -192,10 +194,7 @@ acpi_status acpi_hw_legacy_wake_prep(u8 sleep_state) * This is unclear from the ACPI Spec, but it is required * by some machines. */ - status = acpi_get_sleep_type_data(ACPI_STATE_S0, - &acpi_gbl_sleep_type_a, - &acpi_gbl_sleep_type_b); - if (ACPI_SUCCESS(status)) { + if (acpi_gbl_sleep_type_a_s0 != ACPI_SLEEP_TYPE_INVALID) { sleep_type_reg_info = acpi_hw_get_bit_register_info(ACPI_BITREG_SLEEP_TYPE); sleep_enable_reg_info = @@ -216,9 +215,9 @@ acpi_status acpi_hw_legacy_wake_prep(u8 sleep_state) /* Insert the SLP_TYP bits */ - pm1a_control |= (acpi_gbl_sleep_type_a << + pm1a_control |= (acpi_gbl_sleep_type_a_s0 << sleep_type_reg_info->bit_position); - pm1b_control |= (acpi_gbl_sleep_type_b << + pm1b_control |= (acpi_gbl_sleep_type_b_s0 << sleep_type_reg_info->bit_position); /* Write the control registers and ignore any errors */ diff --git a/drivers/acpi/acpica/hwxfsleep.c b/drivers/acpi/acpica/hwxfsleep.c index a4b66f4b2714123f5b30fac0c94a490321125b18..3948c34d85830e0e671ff4fd2008431d21ffe5e0 100644 --- a/drivers/acpi/acpica/hwxfsleep.c +++ b/drivers/acpi/acpica/hwxfsleep.c @@ -162,8 +162,6 @@ acpi_status acpi_enter_sleep_state_s4bios(void) return_ACPI_STATUS(status); } - ACPI_FLUSH_CPU_CACHE(); - status = acpi_hw_write_port(acpi_gbl_FADT.smi_command, (u32)acpi_gbl_FADT.s4_bios_request, 8); if (ACPI_FAILURE(status)) { @@ -217,6 +215,13 @@ acpi_status acpi_enter_sleep_state_prep(u8 sleep_state) return_ACPI_STATUS(status); } + status = acpi_get_sleep_type_data(ACPI_STATE_S0, + &acpi_gbl_sleep_type_a_s0, + &acpi_gbl_sleep_type_b_s0); + if (ACPI_FAILURE(status)) { + acpi_gbl_sleep_type_a_s0 = ACPI_SLEEP_TYPE_INVALID; + } + /* Execute the _PTS method (Prepare To Sleep) */ arg_list.count = 1; diff --git a/drivers/acpi/acpica/utdelete.c b/drivers/acpi/acpica/utdelete.c index 72d2c0b656339c1f06473b252765713c2053039f..cb1750e7a6281ae9a91bed0d586dffc6b605db6d 100644 --- a/drivers/acpi/acpica/utdelete.c +++ b/drivers/acpi/acpica/utdelete.c @@ -422,6 +422,7 @@ acpi_ut_update_ref_count(union acpi_operand_object *object, u32 action) ACPI_WARNING((AE_INFO, "Obj %p, Reference Count is already zero, cannot decrement\n", object)); + return; } ACPI_DEBUG_PRINT_RAW((ACPI_DB_ALLOCATIONS, diff --git a/drivers/acpi/arm64/gtdt.c b/drivers/acpi/arm64/gtdt.c index 0a0a982f9c28d44e5cf48b170662c0df40852272..c0e77c1c8e09d6fa15094b2fff28d1c7bef0aed4 100644 --- a/drivers/acpi/arm64/gtdt.c +++ b/drivers/acpi/arm64/gtdt.c @@ -36,7 +36,7 @@ struct acpi_gtdt_descriptor { static struct acpi_gtdt_descriptor acpi_gtdt_desc __initdata; -static inline void *next_platform_timer(void *platform_timer) +static inline __init void *next_platform_timer(void *platform_timer) { struct acpi_gtdt_header *gh = platform_timer; diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index 2494138a6905ed105009894a8eb49ed2ee4ac4e6..50ed949dc14494538084767a5050d296e0e0463e 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -1454,9 +1454,17 @@ static void __init arm_smmu_v3_pmcg_init_resources(struct resource *res, res[0].start = pmcg->page0_base_address; res[0].end = pmcg->page0_base_address + SZ_4K - 1; res[0].flags = IORESOURCE_MEM; - res[1].start = pmcg->page1_base_address; - res[1].end = pmcg->page1_base_address + SZ_4K - 1; - res[1].flags = IORESOURCE_MEM; + /* + * The initial version in DEN0049C lacked a way to describe register + * page 1, which makes it broken for most PMCG implementations; in + * that case, just let the driver fail gracefully if it expects to + * find a second memory resource. + */ + if (node->revision > 0) { + res[1].start = pmcg->page1_base_address; + res[1].end = pmcg->page1_base_address + SZ_4K - 1; + res[1].flags = IORESOURCE_MEM; + } if (pmcg->overflow_gsiv) acpi_iort_register_irq(pmcg->overflow_gsiv, "overflow", diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c index 08ee1c7b12e00004b05f01c5cd9a7fbf64cfb1f1..2376f57b3617aa2f4ff47d4100a85d8ba792effa 100644 --- a/drivers/acpi/battery.c +++ b/drivers/acpi/battery.c @@ -59,6 +59,7 @@ static int battery_bix_broken_package; static int battery_notification_delay_ms; static int battery_ac_is_broken; static int battery_check_pmic = 1; +static int battery_quirk_notcharging; static unsigned int cache_time = 1000; module_param(cache_time, uint, 0644); MODULE_PARM_DESC(cache_time, "cache time in milliseconds"); @@ -174,7 +175,7 @@ static int acpi_battery_is_charged(struct acpi_battery *battery) return 1; /* fallback to using design values for broken batteries */ - if (battery->design_capacity == battery->capacity_now) + if (battery->design_capacity <= battery->capacity_now) return 1; /* we don't do any sort of metric based on percentages */ @@ -222,6 +223,8 @@ static int acpi_battery_get_property(struct power_supply *psy, val->intval = POWER_SUPPLY_STATUS_CHARGING; else if (acpi_battery_is_charged(battery)) val->intval = POWER_SUPPLY_STATUS_FULL; + else if (battery_quirk_notcharging) + val->intval = POWER_SUPPLY_STATUS_NOT_CHARGING; else val->intval = POWER_SUPPLY_STATUS_UNKNOWN; break; @@ -1105,6 +1108,12 @@ battery_do_not_check_pmic_quirk(const struct dmi_system_id *d) return 0; } +static int __init battery_quirk_not_charging(const struct dmi_system_id *d) +{ + battery_quirk_notcharging = 1; + return 0; +} + static const struct dmi_system_id bat_dmi_table[] __initconst = { { /* NEC LZ750/LS */ @@ -1149,6 +1158,19 @@ static const struct dmi_system_id bat_dmi_table[] __initconst = { DMI_MATCH(DMI_PRODUCT_VERSION, "Lenovo MIIX 320-10ICR"), }, }, + { + /* + * On Lenovo ThinkPads the BIOS specification defines + * a state when the bits for charging and discharging + * are both set to 0. That state is "Not Charging". + */ + .callback = battery_quirk_not_charging, + .ident = "Lenovo ThinkPad", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad"), + }, + }, {}, }; diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index e317214aabec5570fcc1b5bbe1a89211fabe0198..5e14288fcabe904e493e7b68af3968cccddc5f80 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -98,8 +98,8 @@ int acpi_bus_get_status(struct acpi_device *device) acpi_status status; unsigned long long sta; - if (acpi_device_always_present(device)) { - acpi_set_device_status(device, ACPI_STA_DEFAULT); + if (acpi_device_override_status(device, &sta)) { + acpi_set_device_status(device, sta); return 0; } diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index be3e0921a6c006f820d2cfa1fea47e995e3c8fa7..3f2e5ea9ab6b7a62b99c225c68cf1b9ae9458b79 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -166,6 +166,7 @@ struct acpi_ec_query { struct transaction transaction; struct work_struct work; struct acpi_ec_query_handler *handler; + struct acpi_ec *ec; }; static int acpi_ec_query(struct acpi_ec *ec, u8 *data); @@ -469,6 +470,7 @@ static void acpi_ec_submit_query(struct acpi_ec *ec) ec_dbg_evt("Command(%s) submitted/blocked", acpi_ec_cmd_string(ACPI_EC_COMMAND_QUERY)); ec->nr_pending_queries++; + ec->events_in_progress++; queue_work(ec_wq, &ec->work); } } @@ -535,7 +537,7 @@ static void acpi_ec_enable_event(struct acpi_ec *ec) #ifdef CONFIG_PM_SLEEP static void __acpi_ec_flush_work(void) { - drain_workqueue(ec_wq); /* flush ec->work */ + flush_workqueue(ec_wq); /* flush ec->work */ flush_workqueue(ec_query_wq); /* flush queries */ } @@ -1116,7 +1118,7 @@ void acpi_ec_remove_query_handler(struct acpi_ec *ec, u8 query_bit) } EXPORT_SYMBOL_GPL(acpi_ec_remove_query_handler); -static struct acpi_ec_query *acpi_ec_create_query(u8 *pval) +static struct acpi_ec_query *acpi_ec_create_query(struct acpi_ec *ec, u8 *pval) { struct acpi_ec_query *q; struct transaction *t; @@ -1124,11 +1126,13 @@ static struct acpi_ec_query *acpi_ec_create_query(u8 *pval) q = kzalloc(sizeof (struct acpi_ec_query), GFP_KERNEL); if (!q) return NULL; + INIT_WORK(&q->work, acpi_ec_event_processor); t = &q->transaction; t->command = ACPI_EC_COMMAND_QUERY; t->rdata = pval; t->rlen = 1; + q->ec = ec; return q; } @@ -1145,13 +1149,21 @@ static void acpi_ec_event_processor(struct work_struct *work) { struct acpi_ec_query *q = container_of(work, struct acpi_ec_query, work); struct acpi_ec_query_handler *handler = q->handler; + struct acpi_ec *ec = q->ec; ec_dbg_evt("Query(0x%02x) started", handler->query_bit); + if (handler->func) handler->func(handler->data); else if (handler->handle) acpi_evaluate_object(handler->handle, NULL, NULL, NULL); + ec_dbg_evt("Query(0x%02x) stopped", handler->query_bit); + + spin_lock_irq(&ec->lock); + ec->queries_in_progress--; + spin_unlock_irq(&ec->lock); + acpi_ec_delete_query(q); } @@ -1161,7 +1173,7 @@ static int acpi_ec_query(struct acpi_ec *ec, u8 *data) int result; struct acpi_ec_query *q; - q = acpi_ec_create_query(&value); + q = acpi_ec_create_query(ec, &value); if (!q) return -ENOMEM; @@ -1183,19 +1195,20 @@ static int acpi_ec_query(struct acpi_ec *ec, u8 *data) } /* - * It is reported that _Qxx are evaluated in a parallel way on - * Windows: + * It is reported that _Qxx are evaluated in a parallel way on Windows: * https://bugzilla.kernel.org/show_bug.cgi?id=94411 * - * Put this log entry before schedule_work() in order to make - * it appearing before any other log entries occurred during the - * work queue execution. + * Put this log entry before queue_work() to make it appear in the log + * before any other messages emitted during workqueue handling. */ ec_dbg_evt("Query(0x%02x) scheduled", value); - if (!queue_work(ec_query_wq, &q->work)) { - ec_dbg_evt("Query(0x%02x) overlapped", value); - result = -EBUSY; - } + + spin_lock_irq(&ec->lock); + + ec->queries_in_progress++; + queue_work(ec_query_wq, &q->work); + + spin_unlock_irq(&ec->lock); err_exit: if (result) @@ -1253,6 +1266,10 @@ static void acpi_ec_event_handler(struct work_struct *work) ec_dbg_evt("Event stopped"); acpi_ec_check_event(ec); + + spin_lock_irqsave(&ec->lock, flags); + ec->events_in_progress--; + spin_unlock_irqrestore(&ec->lock, flags); } static void acpi_ec_handle_interrupt(struct acpi_ec *ec) @@ -2034,6 +2051,7 @@ void acpi_ec_set_gpe_wake_mask(u8 action) bool acpi_ec_dispatch_gpe(void) { + bool work_in_progress; u32 ret; if (!first_ec) @@ -2054,8 +2072,19 @@ bool acpi_ec_dispatch_gpe(void) if (ret == ACPI_INTERRUPT_HANDLED) pm_pr_dbg("ACPI EC GPE dispatched\n"); - /* Flush the event and query workqueues. */ - acpi_ec_flush_work(); + /* Drain EC work. */ + do { + acpi_ec_flush_work(); + + pm_pr_dbg("ACPI EC work flushed\n"); + + spin_lock_irq(&first_ec->lock); + + work_in_progress = first_ec->events_in_progress + + first_ec->queries_in_progress > 0; + + spin_unlock_irq(&first_ec->lock); + } while (work_in_progress && !pm_wakeup_pending()); return false; } diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h index a958ad60a3394d4398ed774c6b42187329ec13b7..125e4901c9b47834238778e61a7315108ab976a9 100644 --- a/drivers/acpi/internal.h +++ b/drivers/acpi/internal.h @@ -184,6 +184,8 @@ struct acpi_ec { struct work_struct work; unsigned long timestamp; unsigned long nr_pending_queries; + unsigned int events_in_progress; + unsigned int queries_in_progress; bool busy_polling; unsigned int polling_guard; }; diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index d061bff5cc96c838711a9c0527b238a88134dd9f..99e23a5df0267447f9b4eed07eacdfb724f8b04a 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -3018,6 +3018,18 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc, ndr_desc->target_node = NUMA_NO_NODE; } + /* Fallback to address based numa information if node lookup failed */ + if (ndr_desc->numa_node == NUMA_NO_NODE) { + ndr_desc->numa_node = memory_add_physaddr_to_nid(spa->address); + dev_info(acpi_desc->dev, "changing numa node from %d to %d for nfit region [%pa-%pa]", + NUMA_NO_NODE, ndr_desc->numa_node, &res.start, &res.end); + } + if (ndr_desc->target_node == NUMA_NO_NODE) { + ndr_desc->target_node = phys_to_target_node(spa->address); + dev_info(acpi_desc->dev, "changing target node from %d to %d for nfit region [%pa-%pa]", + NUMA_NO_NODE, ndr_desc->numa_node, &res.start, &res.end); + } + /* * Persistence domain bits are hierarchical, if * ACPI_NFIT_CAPABILITY_CACHE_FLUSH is set then diff --git a/drivers/acpi/pmic/intel_pmic.c b/drivers/acpi/pmic/intel_pmic.c index a371f273f99dd59de3a4d13eaa3e9d888def59bc..9cde299eba88096cc3871b3e5ec9a8d7ab3bdb45 100644 --- a/drivers/acpi/pmic/intel_pmic.c +++ b/drivers/acpi/pmic/intel_pmic.c @@ -211,31 +211,36 @@ static acpi_status intel_pmic_regs_handler(u32 function, void *handler_context, void *region_context) { struct intel_pmic_opregion *opregion = region_context; - int result = 0; + int result = -EINVAL; + + if (function == ACPI_WRITE) { + switch (address) { + case 0: + return AE_OK; + case 1: + opregion->ctx.addr |= (*value64 & 0xff) << 8; + return AE_OK; + case 2: + opregion->ctx.addr |= *value64 & 0xff; + return AE_OK; + case 3: + opregion->ctx.val = *value64 & 0xff; + return AE_OK; + case 4: + if (*value64) { + result = regmap_write(opregion->regmap, opregion->ctx.addr, + opregion->ctx.val); + } else { + result = regmap_read(opregion->regmap, opregion->ctx.addr, + &opregion->ctx.val); + } + opregion->ctx.addr = 0; + } + } - switch (address) { - case 0: - return AE_OK; - case 1: - opregion->ctx.addr |= (*value64 & 0xff) << 8; - return AE_OK; - case 2: - opregion->ctx.addr |= *value64 & 0xff; + if (function == ACPI_READ && address == 3) { + *value64 = opregion->ctx.val; return AE_OK; - case 3: - opregion->ctx.val = *value64 & 0xff; - return AE_OK; - case 4: - if (*value64) { - result = regmap_write(opregion->regmap, opregion->ctx.addr, - opregion->ctx.val); - } else { - result = regmap_read(opregion->regmap, opregion->ctx.addr, - &opregion->ctx.val); - if (result == 0) - *value64 = opregion->ctx.val; - } - memset(&opregion->ctx, 0x00, sizeof(opregion->ctx)); } if (result < 0) { diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c index 16b28084c1ca67513b6f82343fa751633013b7c3..592e1ee9b338dae4c94bf28fe21728222e1bf51f 100644 --- a/drivers/acpi/property.c +++ b/drivers/acpi/property.c @@ -1110,15 +1110,10 @@ struct fwnode_handle *acpi_node_get_parent(const struct fwnode_handle *fwnode) /* All data nodes have parent pointer so just return that */ return to_acpi_data_node(fwnode)->parent; } else if (is_acpi_device_node(fwnode)) { - acpi_handle handle, parent_handle; + struct device *dev = to_acpi_device_node(fwnode)->dev.parent; - handle = to_acpi_device_node(fwnode)->handle; - if (ACPI_SUCCESS(acpi_get_parent(handle, &parent_handle))) { - struct acpi_device *adev; - - if (!acpi_bus_get_device(parent_handle, &adev)) - return acpi_fwnode_handle(adev); - } + if (dev) + return acpi_fwnode_handle(to_acpi_device(dev)); } return NULL; diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 0040dd5c76069c4a2b72ca379691cd78b3b37a59..1bf7a82478fb3dd810b2f43bef5b6954addc9819 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -1577,6 +1577,7 @@ static bool acpi_device_enumeration_by_parent(struct acpi_device *device) { struct list_head resource_list; bool is_serial_bus_slave = false; + static const struct acpi_device_id ignore_serial_bus_ids[] = { /* * These devices have multiple I2cSerialBus resources and an i2c-client * must be instantiated for each, each with its own i2c_device_id. @@ -1585,11 +1586,18 @@ static bool acpi_device_enumeration_by_parent(struct acpi_device *device) * drivers/platform/x86/i2c-multi-instantiate.c driver, which knows * which i2c_device_id to use for each resource. */ - static const struct acpi_device_id i2c_multi_instantiate_ids[] = { {"BSG1160", }, {"BSG2150", }, {"INT33FE", }, {"INT3515", }, + /* + * HIDs of device with an UartSerialBusV2 resource for which userspace + * expects a regular tty cdev to be created (instead of the in kernel + * serdev) and which have a kernel driver which expects a platform_dev + * such as the rfkill-gpio driver. + */ + {"BCM4752", }, + {"LNV4752", }, {} }; @@ -1603,8 +1611,7 @@ static bool acpi_device_enumeration_by_parent(struct acpi_device *device) fwnode_property_present(&device->fwnode, "baud"))) return true; - /* Instantiate a pdev for the i2c-multi-instantiate drv to bind to */ - if (!acpi_match_device_ids(device, i2c_multi_instantiate_ids)) + if (!acpi_match_device_ids(device, ignore_serial_bus_ids)) return false; INIT_LIST_HEAD(&resource_list); diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index 31c9d0c8ae11f67092e9fef9ed9a953cee0a6e28..503935b1deeb1efba7ec001123b6db402d446425 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -1040,6 +1040,7 @@ static bool acpi_s2idle_wake(void) return true; } + pm_wakeup_clear(acpi_sci_irq); rearm_wake_irq(acpi_sci_irq); } diff --git a/drivers/acpi/x86/utils.c b/drivers/acpi/x86/utils.c index bdc1ba00aee9f7e54f8b3c20e04577135cdbb430..3f9a162be84e373648ac0b01ff5b54a21feeeb3e 100644 --- a/drivers/acpi/x86/utils.c +++ b/drivers/acpi/x86/utils.c @@ -22,58 +22,71 @@ * Some BIOS-es (temporarily) hide specific APCI devices to work around Windows * driver bugs. We use DMI matching to match known cases of this. * - * We work around this by always reporting ACPI_STA_DEFAULT for these - * devices. Note this MUST only be done for devices where this is safe. + * Likewise sometimes some not-actually present devices are sometimes + * reported as present, which may cause issues. * - * This forcing of devices to be present is limited to specific CPU (SoC) - * models both to avoid potentially causing trouble on other models and - * because some HIDs are re-used on different SoCs for completely - * different devices. + * We work around this by using the below quirk list to override the status + * reported by the _STA method with a fixed value (ACPI_STA_DEFAULT or 0). + * Note this MUST only be done for devices where this is safe. + * + * This status overriding is limited to specific CPU (SoC) models both to + * avoid potentially causing trouble on other models and because some HIDs + * are re-used on different SoCs for completely different devices. */ -struct always_present_id { +struct override_status_id { struct acpi_device_id hid[2]; struct x86_cpu_id cpu_ids[2]; struct dmi_system_id dmi_ids[2]; /* Optional */ const char *uid; + const char *path; + unsigned long long status; }; -#define X86_MATCH(model) X86_MATCH_INTEL_FAM6_MODEL(model, NULL) - -#define ENTRY(hid, uid, cpu_models, dmi...) { \ +#define ENTRY(status, hid, uid, path, cpu_model, dmi...) { \ { { hid, }, {} }, \ - { cpu_models, {} }, \ + { X86_MATCH_INTEL_FAM6_MODEL(cpu_model, NULL), {} }, \ { { .matches = dmi }, {} }, \ uid, \ + path, \ + status, \ } -static const struct always_present_id always_present_ids[] = { +#define PRESENT_ENTRY_HID(hid, uid, cpu_model, dmi...) \ + ENTRY(ACPI_STA_DEFAULT, hid, uid, NULL, cpu_model, dmi) + +#define NOT_PRESENT_ENTRY_HID(hid, uid, cpu_model, dmi...) \ + ENTRY(0, hid, uid, NULL, cpu_model, dmi) + +#define PRESENT_ENTRY_PATH(path, cpu_model, dmi...) \ + ENTRY(ACPI_STA_DEFAULT, "", NULL, path, cpu_model, dmi) + +#define NOT_PRESENT_ENTRY_PATH(path, cpu_model, dmi...) \ + ENTRY(0, "", NULL, path, cpu_model, dmi) + +static const struct override_status_id override_status_ids[] = { /* * Bay / Cherry Trail PWM directly poked by GPU driver in win10, * but Linux uses a separate PWM driver, harmless if not used. */ - ENTRY("80860F09", "1", X86_MATCH(ATOM_SILVERMONT), {}), - ENTRY("80862288", "1", X86_MATCH(ATOM_AIRMONT), {}), + PRESENT_ENTRY_HID("80860F09", "1", ATOM_SILVERMONT, {}), + PRESENT_ENTRY_HID("80862288", "1", ATOM_AIRMONT, {}), - /* Lenovo Yoga Book uses PWM2 for keyboard backlight control */ - ENTRY("80862289", "2", X86_MATCH(ATOM_AIRMONT), { - DMI_MATCH(DMI_PRODUCT_NAME, "Lenovo YB1-X9"), - }), /* * The INT0002 device is necessary to clear wakeup interrupt sources * on Cherry Trail devices, without it we get nobody cared IRQ msgs. */ - ENTRY("INT0002", "1", X86_MATCH(ATOM_AIRMONT), {}), + PRESENT_ENTRY_HID("INT0002", "1", ATOM_AIRMONT, {}), /* * On the Dell Venue 11 Pro 7130 and 7139, the DSDT hides * the touchscreen ACPI device until a certain time * after _SB.PCI0.GFX0.LCD.LCD1._ON gets called has passed * *and* _STA has been called at least 3 times since. */ - ENTRY("SYNA7500", "1", X86_MATCH(HASWELL_L), { + PRESENT_ENTRY_HID("SYNA7500", "1", HASWELL_L, { DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), DMI_MATCH(DMI_PRODUCT_NAME, "Venue 11 Pro 7130"), }), - ENTRY("SYNA7500", "1", X86_MATCH(HASWELL_L), { + PRESENT_ENTRY_HID("SYNA7500", "1", HASWELL_L, { DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), DMI_MATCH(DMI_PRODUCT_NAME, "Venue 11 Pro 7139"), }), @@ -81,54 +94,83 @@ static const struct always_present_id always_present_ids[] = { /* * The GPD win BIOS dated 20170221 has disabled the accelerometer, the * drivers sometimes cause crashes under Windows and this is how the - * manufacturer has solved this :| Note that the the DMI data is less - * generic then it seems, a board_vendor of "AMI Corporation" is quite - * rare and a board_name of "Default String" also is rare. + * manufacturer has solved this :| The DMI match may not seem unique, + * but it is. In the 67000+ DMI decode dumps from linux-hardware.org + * only 116 have board_vendor set to "AMI Corporation" and of those 116 + * only the GPD win and pocket entries' board_name is "Default string". * * Unfortunately the GPD pocket also uses these strings and its BIOS * was copy-pasted from the GPD win, so it has a disabled KIOX000A * node which we should not enable, thus we also check the BIOS date. */ - ENTRY("KIOX000A", "1", X86_MATCH(ATOM_AIRMONT), { + PRESENT_ENTRY_HID("KIOX000A", "1", ATOM_AIRMONT, { DMI_MATCH(DMI_BOARD_VENDOR, "AMI Corporation"), DMI_MATCH(DMI_BOARD_NAME, "Default string"), DMI_MATCH(DMI_PRODUCT_NAME, "Default string"), DMI_MATCH(DMI_BIOS_DATE, "02/21/2017") }), - ENTRY("KIOX000A", "1", X86_MATCH(ATOM_AIRMONT), { + PRESENT_ENTRY_HID("KIOX000A", "1", ATOM_AIRMONT, { DMI_MATCH(DMI_BOARD_VENDOR, "AMI Corporation"), DMI_MATCH(DMI_BOARD_NAME, "Default string"), DMI_MATCH(DMI_PRODUCT_NAME, "Default string"), DMI_MATCH(DMI_BIOS_DATE, "03/20/2017") }), - ENTRY("KIOX000A", "1", X86_MATCH(ATOM_AIRMONT), { + PRESENT_ENTRY_HID("KIOX000A", "1", ATOM_AIRMONT, { DMI_MATCH(DMI_BOARD_VENDOR, "AMI Corporation"), DMI_MATCH(DMI_BOARD_NAME, "Default string"), DMI_MATCH(DMI_PRODUCT_NAME, "Default string"), DMI_MATCH(DMI_BIOS_DATE, "05/25/2017") }), + + /* + * The GPD win/pocket have a PCI wifi card, but its DSDT has the SDIO + * mmc controller enabled and that has a child-device which _PS3 + * method sets a GPIO causing the PCI wifi card to turn off. + * See above remark about uniqueness of the DMI match. + */ + NOT_PRESENT_ENTRY_PATH("\\_SB_.PCI0.SDHB.BRC1", ATOM_AIRMONT, { + DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "AMI Corporation"), + DMI_EXACT_MATCH(DMI_BOARD_NAME, "Default string"), + DMI_EXACT_MATCH(DMI_BOARD_SERIAL, "Default string"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Default string"), + }), }; -bool acpi_device_always_present(struct acpi_device *adev) +bool acpi_device_override_status(struct acpi_device *adev, unsigned long long *status) { bool ret = false; unsigned int i; - for (i = 0; i < ARRAY_SIZE(always_present_ids); i++) { - if (acpi_match_device_ids(adev, always_present_ids[i].hid)) + for (i = 0; i < ARRAY_SIZE(override_status_ids); i++) { + if (!x86_match_cpu(override_status_ids[i].cpu_ids)) continue; - if (!adev->pnp.unique_id || - strcmp(adev->pnp.unique_id, always_present_ids[i].uid)) + if (override_status_ids[i].dmi_ids[0].matches[0].slot && + !dmi_check_system(override_status_ids[i].dmi_ids)) continue; - if (!x86_match_cpu(always_present_ids[i].cpu_ids)) - continue; + if (override_status_ids[i].path) { + struct acpi_buffer path = { ACPI_ALLOCATE_BUFFER, NULL }; + bool match; - if (always_present_ids[i].dmi_ids[0].matches[0].slot && - !dmi_check_system(always_present_ids[i].dmi_ids)) - continue; + if (acpi_get_name(adev->handle, ACPI_FULL_PATHNAME, &path)) + continue; + + match = strcmp((char *)path.pointer, override_status_ids[i].path) == 0; + kfree(path.pointer); + + if (!match) + continue; + } else { + if (acpi_match_device_ids(adev, override_status_ids[i].hid)) + continue; + + if (!adev->pnp.unique_id || + strcmp(adev->pnp.unique_id, override_status_ids[i].uid)) + continue; + } + *status = override_status_ids[i].status; ret = true; break; } diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c index 0c0462e13ef98bf967ee0ef0f6c90e3fa2e6970a..47c72447ccd5948280b511db47faaa039921ff58 100644 --- a/drivers/amba/bus.c +++ b/drivers/amba/bus.c @@ -374,9 +374,6 @@ static int amba_device_try_add(struct amba_device *dev, struct resource *parent) void __iomem *tmp; int i, ret; - WARN_ON(dev->irq[0] == (unsigned int)-1); - WARN_ON(dev->irq[1] == (unsigned int)-1); - ret = request_resource(parent, &dev->res); if (ret) goto err_out; diff --git a/drivers/android/binder.c b/drivers/android/binder.c index d565081d2f3d94d01c178615a20f050d5fef4b06..a367ebf160be5a07365bf3d93e18a324e6784c11 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -1995,7 +1995,7 @@ static void binder_transaction_buffer_release(struct binder_proc *proc, binder_dec_node(buffer->target_node, 1, 0); off_start_offset = ALIGN(buffer->data_size, sizeof(void *)); - off_end_offset = is_failure ? failed_at : + off_end_offset = is_failure && failed_at ? failed_at : off_start_offset + buffer->offsets_size; for (buffer_offset = off_start_offset; buffer_offset < off_end_offset; buffer_offset += sizeof(binder_size_t)) { @@ -2081,9 +2081,8 @@ static void binder_transaction_buffer_release(struct binder_proc *proc, binder_size_t fd_buf_size; binder_size_t num_valid; - if (proc->tsk != current->group_leader) { + if (is_failure) { /* - * Nothing to do if running in sender context * The fd fixups have not been applied so no * fds need to be closed. */ @@ -2181,7 +2180,7 @@ static int binder_translate_binder(struct flat_binder_object *fp, ret = -EINVAL; goto done; } - if (security_binder_transfer_binder(proc->tsk, target_proc->tsk)) { + if (security_binder_transfer_binder(proc->cred, target_proc->cred)) { ret = -EPERM; goto done; } @@ -2227,7 +2226,7 @@ static int binder_translate_handle(struct flat_binder_object *fp, proc->pid, thread->pid, fp->handle); return -EINVAL; } - if (security_binder_transfer_binder(proc->tsk, target_proc->tsk)) { + if (security_binder_transfer_binder(proc->cred, target_proc->cred)) { ret = -EPERM; goto done; } @@ -2315,7 +2314,7 @@ static int binder_translate_fd(u32 fd, binder_size_t fd_offset, ret = -EBADF; goto err_fget; } - ret = security_binder_transfer_file(proc->tsk, target_proc->tsk, file); + ret = security_binder_transfer_file(proc->cred, target_proc->cred, file); if (ret < 0) { ret = -EPERM; goto err_security; @@ -2395,8 +2394,8 @@ static int binder_translate_fd_array(struct binder_fd_array_object *fda, if (!ret) ret = binder_translate_fd(fd, offset, t, thread, in_reply_to); - if (ret < 0) - return ret; + if (ret) + return ret > 0 ? -EINVAL : ret; } return 0; } @@ -2691,8 +2690,8 @@ static void binder_transaction(struct binder_proc *proc, ref->node, &target_proc, &return_error); } else { - binder_user_error("%d:%d got transaction to invalid handle\n", - proc->pid, thread->pid); + binder_user_error("%d:%d got transaction to invalid handle, %u\n", + proc->pid, thread->pid, tr->target.handle); return_error = BR_FAILED_REPLY; } binder_proc_unlock(proc); @@ -2725,8 +2724,8 @@ static void binder_transaction(struct binder_proc *proc, } e->to_node = target_node->debug_id; trace_android_vh_binder_trans(target_proc, proc, thread, tr); - if (security_binder_transaction(proc->tsk, - target_proc->tsk) < 0) { + if (security_binder_transaction(proc->cred, + target_proc->cred) < 0) { return_error = BR_FAILED_REPLY; return_error_param = -EPERM; return_error_line = __LINE__; @@ -2862,7 +2861,7 @@ static void binder_transaction(struct binder_proc *proc, size_t added_size; int max_retries = 100; - security_task_getsecid(proc->tsk, &secid); + security_cred_getsecid(proc->cred, &secid); retry_alloc: ret = security_secid_to_secctx(secid, &secctx, &secctx_sz); if (ret == -ENOMEM && max_retries-- > 0) { @@ -2893,7 +2892,7 @@ static void binder_transaction(struct binder_proc *proc, if (extra_buffers_size < added_size) { /* integer overflow of extra_buffers_size */ return_error = BR_FAILED_REPLY; - return_error_param = EINVAL; + return_error_param = -EINVAL; return_error_line = __LINE__; goto err_bad_extra_size; } @@ -3196,9 +3195,8 @@ static void binder_transaction(struct binder_proc *proc, if (reply) { binder_enqueue_thread_work(thread, tcomplete); binder_inner_proc_lock(target_proc); - if (target_thread->is_dead || target_proc->is_frozen) { - return_error = target_thread->is_dead ? - BR_DEAD_REPLY : BR_FROZEN_REPLY; + if (target_thread->is_dead) { + return_error = BR_DEAD_REPLY; binder_inner_proc_unlock(target_proc); goto err_dead_proc_or_thread; } @@ -3337,6 +3335,7 @@ err_invalid_target_handle: * binder_free_buf() - free the specified buffer * @proc: binder proc that owns buffer * @buffer: buffer to be freed + * @is_failure: failed to send transaction * * If buffer for an async transaction, enqueue the next async * transaction from the node. @@ -3346,7 +3345,7 @@ err_invalid_target_handle: static void binder_free_buf(struct binder_proc *proc, struct binder_thread *thread, - struct binder_buffer *buffer) + struct binder_buffer *buffer, bool is_failure) { binder_inner_proc_lock(proc); if (buffer->transaction) { @@ -3374,7 +3373,7 @@ binder_free_buf(struct binder_proc *proc, binder_node_inner_unlock(buf_node); } trace_binder_transaction_buffer_release(buffer); - binder_transaction_buffer_release(proc, thread, buffer, 0, false); + binder_transaction_buffer_release(proc, thread, buffer, 0, is_failure); binder_alloc_free_buf(&proc->alloc, buffer); } @@ -3568,7 +3567,7 @@ static int binder_thread_write(struct binder_proc *proc, proc->pid, thread->pid, (u64)data_ptr, buffer->debug_id, buffer->transaction ? "active" : "finished"); - binder_free_buf(proc, thread, buffer); + binder_free_buf(proc, thread, buffer, false); break; } @@ -4262,7 +4261,7 @@ retry: buffer->transaction = NULL; binder_cleanup_transaction(t, "fd fixups failed", BR_FAILED_REPLY); - binder_free_buf(proc, thread, buffer); + binder_free_buf(proc, thread, buffer, true); binder_debug(BINDER_DEBUG_FAILED_TRANSACTION, "%d:%d %stransaction %d fd fixups failed %d/%d, line %d\n", proc->pid, thread->pid, @@ -4500,6 +4499,7 @@ static void binder_free_proc(struct binder_proc *proc) } binder_alloc_deferred_release(&proc->alloc); put_task_struct(proc->tsk); + put_cred(proc->cred); binder_stats_deleted(BINDER_STAT_PROC); kfree(proc); } @@ -4578,23 +4578,20 @@ static int binder_thread_release(struct binder_proc *proc, __release(&t->lock); /* - * If this thread used poll, make sure we remove the waitqueue - * from any epoll data structures holding it with POLLFREE. - * waitqueue_active() is safe to use here because we're holding - * the inner lock. + * If this thread used poll, make sure we remove the waitqueue from any + * poll data structures holding it. */ - if ((thread->looper & BINDER_LOOPER_STATE_POLL) && - waitqueue_active(&thread->wait)) { - wake_up_poll(&thread->wait, EPOLLHUP | POLLFREE); - } + if (thread->looper & BINDER_LOOPER_STATE_POLL) + wake_up_pollfree(&thread->wait); binder_inner_proc_unlock(thread->proc); /* - * This is needed to avoid races between wake_up_poll() above and - * and ep_remove_waitqueue() called for other reasons (eg the epoll file - * descriptor being closed); ep_remove_waitqueue() holds an RCU read - * lock, so we can be sure it's done after calling synchronize_rcu(). + * This is needed to avoid races between wake_up_pollfree() above and + * someone else removing the last entry from the queue for other reasons + * (e.g. ep_remove_wait_queue() being called due to an epoll file + * descriptor being closed). Such other users hold an RCU read lock, so + * we can be sure they're done after we call synchronize_rcu(). */ if (thread->looper & BINDER_LOOPER_STATE_POLL) synchronize_rcu(); @@ -4712,7 +4709,7 @@ static int binder_ioctl_set_ctx_mgr(struct file *filp, ret = -EBUSY; goto out; } - ret = security_binder_set_context_mgr(proc->tsk); + ret = security_binder_set_context_mgr(proc->cred); if (ret < 0) goto out; if (uid_valid(context->binder_context_mgr_uid)) { @@ -4806,6 +4803,22 @@ static int binder_ioctl_get_node_debug_info(struct binder_proc *proc, return 0; } +static bool binder_txns_pending_ilocked(struct binder_proc *proc) +{ + struct rb_node *n; + struct binder_thread *thread; + + if (proc->outstanding_txns > 0) + return true; + + for (n = rb_first(&proc->threads); n; n = rb_next(n)) { + thread = rb_entry(n, struct binder_thread, rb_node); + if (thread->transaction_stack) + return true; + } + return false; +} + static int binder_ioctl_freeze(struct binder_freeze_info *info, struct binder_proc *target_proc) { @@ -4837,8 +4850,13 @@ static int binder_ioctl_freeze(struct binder_freeze_info *info, (!target_proc->outstanding_txns), msecs_to_jiffies(info->timeout_ms)); - if (!ret && target_proc->outstanding_txns) - ret = -EAGAIN; + /* Check pending transactions that wait for reply */ + if (ret >= 0) { + binder_inner_proc_lock(target_proc); + if (binder_txns_pending_ilocked(target_proc)) + ret = -EAGAIN; + binder_inner_proc_unlock(target_proc); + } if (ret < 0) { binder_inner_proc_lock(target_proc); @@ -4854,6 +4872,7 @@ static int binder_ioctl_get_freezer_info( { struct binder_proc *target_proc; bool found = false; + __u32 txns_pending; info->sync_recv = 0; info->async_recv = 0; @@ -4863,7 +4882,9 @@ static int binder_ioctl_get_freezer_info( if (target_proc->pid == info->pid) { found = true; binder_inner_proc_lock(target_proc); - info->sync_recv |= target_proc->sync_recv; + txns_pending = binder_txns_pending_ilocked(target_proc); + info->sync_recv |= target_proc->sync_recv | + (txns_pending << 1); info->async_recv |= target_proc->async_recv; binder_inner_proc_unlock(target_proc); } @@ -5179,6 +5200,7 @@ static int binder_open(struct inode *nodp, struct file *filp) spin_lock_init(&proc->outer_lock); get_task_struct(current->group_leader); proc->tsk = current->group_leader; + proc->cred = get_cred(filp->f_cred); INIT_LIST_HEAD(&proc->todo); init_waitqueue_head(&proc->freeze_wait); if (binder_supported_policy(current->policy)) { diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c index 34d8fe2f17b8b89402645a40cb07f2f5a810438d..d30267e085363d0d373cfd98fd8ed6e444e626aa 100644 --- a/drivers/android/binder_alloc.c +++ b/drivers/android/binder_alloc.c @@ -673,7 +673,7 @@ static void binder_free_buf_locked(struct binder_alloc *alloc, BUG_ON(buffer->user_data > alloc->buffer + alloc->buffer_size); if (buffer->async_transaction) { - alloc->free_async_space += size + sizeof(struct binder_buffer); + alloc->free_async_space += buffer_size + sizeof(struct binder_buffer); binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC_ASYNC, "%d: binder_free_buf size %zd async free %zd\n", diff --git a/drivers/android/binder_internal.h b/drivers/android/binder_internal.h index 2ddbec09d2f902310053e2e84bd107298f8265f5..c2bb53e26d196d8b89b9b501687f66fff95e7eaf 100644 --- a/drivers/android/binder_internal.h +++ b/drivers/android/binder_internal.h @@ -385,6 +385,9 @@ struct binder_priority { * (invariant after initialized) * @tsk task_struct for group_leader of process * (invariant after initialized) + * @cred struct cred associated with the `struct file` + * in binder_open() + * (invariant after initialized) * @deferred_work_node: element for binder_deferred_list * (protected by binder_deferred_lock) * @deferred_work: bitmap of deferred work to perform @@ -399,6 +402,8 @@ struct binder_priority { * binder transactions * (protected by @inner_lock) * @sync_recv: process received sync transactions since last frozen + * bit 0: received sync transaction after being frozen + * bit 1: new pending sync transaction during freezing * (protected by @inner_lock) * @async_recv: process received async transactions since last frozen * (protected by @inner_lock) @@ -445,6 +450,7 @@ struct binder_proc { struct list_head waiting_threads; int pid; struct task_struct *tsk; + const struct cred *cred; struct hlist_node deferred_work_node; int deferred_work; int outstanding_txns; diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c index 7b4f154f07e6c96eb8f5e6416f7a6127976be2ee..346f315e8bd7e45fbf9f10cc954ece897c88c926 100644 --- a/drivers/android/binderfs.c +++ b/drivers/android/binderfs.c @@ -58,6 +58,10 @@ enum binderfs_stats_mode { binderfs_stats_mode_global, }; +struct binder_features { + bool oneway_spam_detection; +}; + static const struct constant_table binderfs_param_stats[] = { { "global", binderfs_stats_mode_global }, {} @@ -69,6 +73,10 @@ static const struct fs_parameter_spec binderfs_fs_parameters[] = { {} }; +static struct binder_features binder_features = { + .oneway_spam_detection = true, +}; + static inline struct binderfs_info *BINDERFS_SB(const struct super_block *sb) { return sb->s_fs_info; @@ -581,6 +589,33 @@ out: return dentry; } +static int binder_features_show(struct seq_file *m, void *unused) +{ + bool *feature = m->private; + + seq_printf(m, "%d\n", *feature); + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(binder_features); + +static int init_binder_features(struct super_block *sb) +{ + struct dentry *dentry, *dir; + + dir = binderfs_create_dir(sb->s_root, "features"); + if (IS_ERR(dir)) + return PTR_ERR(dir); + + dentry = binderfs_create_file(dir, "oneway_spam_detection", + &binder_features_fops, + &binder_features.oneway_spam_detection); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + + return 0; +} + static int init_binder_logs(struct super_block *sb) { struct dentry *binder_logs_root_dir, *dentry, *proc_log_dir; @@ -721,6 +756,10 @@ static int binderfs_fill_super(struct super_block *sb, struct fs_context *fc) name++; } + ret = init_binder_features(sb); + if (ret) + return ret; + if (info->mount_opts.stats_mode == binderfs_stats_mode_global) return init_binder_logs(sb); diff --git a/drivers/android/debug_symbols.c b/drivers/android/debug_symbols.c index 6a4a6f8992f2a505953a018cce85c3790b8dbc74..f2a5a0614ccc520d63a2f39413def83f7e365d2a 100644 --- a/drivers/android/debug_symbols.c +++ b/drivers/android/debug_symbols.c @@ -68,6 +68,7 @@ static const struct ads_entry ads_entries[ADS_END] = { #ifdef CONFIG_SYSCTL ADS_ENTRY(ADS_SYSCTL_LEGACY_VA_LAYOUT, &sysctl_legacy_va_layout), #endif + ADS_ENTRY(ADS_SHOW_MEM, show_mem), }; /* diff --git a/drivers/android/vendor_hooks.c b/drivers/android/vendor_hooks.c index 3bba6cf5d1bb38255661c5ea81f1d2af3d7566f4..e8773213ed4c5853bf24cf0654ca3dfba0c29b40 100644 --- a/drivers/android/vendor_hooks.c +++ b/drivers/android/vendor_hooks.c @@ -8,6 +8,7 @@ #define CREATE_TRACE_POINTS #include +#include #include #include #include @@ -72,6 +73,8 @@ #include #include #include +#include +#include /* * Export tracepoints that act as a bare tracehook (ie: have no trace event @@ -79,6 +82,7 @@ */ EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_select_task_rq_fair); EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_select_task_rq_rt); +EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_select_task_rq_dl); EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_select_fallback_rq); EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_refrigerator); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_scheduler_tick); @@ -136,6 +140,9 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_show_suspend_epoch_val); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_show_resume_epoch_val); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_show_max_freq); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_freq_table_limits); +EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_cpufreq_resolve_freq); +EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_cpufreq_fast_switch); +EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_cpufreq_target); EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_sched_newidle_balance); EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_sched_nohz_balancer_kick); EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_sched_rebalance_domains); @@ -184,6 +191,8 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_ftrace_format_check); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_ftrace_dump_buffer); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_allow_domain_state); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_map_util_freq); +EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_update_next_freq); +EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_sugov_get_util); EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_report_bug); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_em_cpu_energy); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_cpu_up); @@ -198,7 +207,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_die_kernel_fault); EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_do_sea); EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_do_mem_abort); EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_do_sp_pc_abort); -EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_iommu_setup_dma_ops); +EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_iommu_setup_dma_ops); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_iommu_alloc_iova); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_iommu_free_iova); EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_pick_next_entity); @@ -212,6 +221,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_filemap_fault_cache_page); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_enable_thermal_genl_check); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_thermal_pm_notify_suspend); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_ufs_fill_prdt); +EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_ufs_complete_init); EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_ufs_reprogram_all_keys); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_ufs_prepare_command); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_ufs_update_sysfs); @@ -225,6 +235,8 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_ufs_clock_scaling); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_do_wake_up_sync); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_set_wake_flags); EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_uclamp_eff_get); +EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_uclamp_task_util); +EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_uclamp_rq_util_with); EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_cpufreq_transition); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_cgroup_set_task); EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_cgroup_force_kthread_migration); @@ -367,3 +379,21 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_usb_dev_resume); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_ipv6_gen_linklocal_addr); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_sound_usb_support_cpu_suspend); EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_snd_compr_use_pause_in_drain); +EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_modify_thermal_request_freq); +EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_modify_thermal_target_freq); +EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_enable_thermal_power_throttle); +EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_thermal_power_cap); +EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_get_thermal_zone_device); +EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_typec_tcpm_modify_src_caps); +EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_try_grab_compound_head); +EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh___get_user_pages_remote); +EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_get_user_pages); +EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_internal_get_user_pages_fast); +EXPORT_TRACEPOINT_SYMBOL_GPL(android_vh_pin_user_pages); +EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_attach_entity_load_avg); +EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_detach_entity_load_avg); +EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_update_load_avg); +EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_remove_entity_load_avg); +EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_update_blocked_fair); +EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_update_rt_rq_load_avg); +EXPORT_TRACEPOINT_SYMBOL_GPL(android_rvh_pci_d3_sleep); diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 33192a8f687d664e3b62ff5e4cd2118e1bc324f5..ff2add0101fe56e5d02149f51088ac29f906d316 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -442,6 +442,7 @@ static const struct pci_device_id ahci_pci_tbl[] = { /* AMD */ { PCI_VDEVICE(AMD, 0x7800), board_ahci }, /* AMD Hudson-2 */ { PCI_VDEVICE(AMD, 0x7900), board_ahci }, /* AMD CZ */ + { PCI_VDEVICE(AMD, 0x7901), board_ahci_mobile }, /* AMD Green Sardine */ /* AMD is using RAID class only for ahci controllers */ { PCI_VENDOR_ID_AMD, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_STORAGE_RAID << 8, 0xffffff, board_ahci }, diff --git a/drivers/ata/libahci_platform.c b/drivers/ata/libahci_platform.c index b2f55208829187d5234b505e1bdc28c8c0c5fdd8..0910441321f729bc9ea449c326e83c776c3906df 100644 --- a/drivers/ata/libahci_platform.c +++ b/drivers/ata/libahci_platform.c @@ -440,10 +440,7 @@ struct ahci_host_priv *ahci_platform_get_resources(struct platform_device *pdev, hpriv->phy_regulator = devm_regulator_get(dev, "phy"); if (IS_ERR(hpriv->phy_regulator)) { rc = PTR_ERR(hpriv->phy_regulator); - if (rc == -EPROBE_DEFER) - goto err_out; - rc = 0; - hpriv->phy_regulator = NULL; + goto err_out; } if (flags & AHCI_PLATFORM_GET_RESETS) { diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 0e6e73b8023fcd96962545b7c7edbc7620047ad0..d2b544bdc7b5e735289d7c71395bce92b34e0f8a 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -2004,7 +2004,7 @@ unsigned int ata_read_log_page(struct ata_device *dev, u8 log, retry: ata_tf_init(dev, &tf); - if (dev->dma_mode && ata_id_has_read_log_dma_ext(dev->id) && + if (ata_dma_enabled(dev) && ata_id_has_read_log_dma_ext(dev->id) && !(dev->horkage & ATA_HORKAGE_NO_DMA_LOG)) { tf.command = ATA_CMD_READ_LOG_DMA_EXT; tf.protocol = ATA_PROT_DMA; @@ -2199,6 +2199,25 @@ static void ata_dev_config_ncq_prio(struct ata_device *dev) } +static bool ata_dev_check_adapter(struct ata_device *dev, + unsigned short vendor_id) +{ + struct pci_dev *pcidev = NULL; + struct device *parent_dev = NULL; + + for (parent_dev = dev->tdev.parent; parent_dev != NULL; + parent_dev = parent_dev->parent) { + if (dev_is_pci(parent_dev)) { + pcidev = to_pci_dev(parent_dev); + if (pcidev->vendor == vendor_id) + return true; + break; + } + } + + return false; +} + static int ata_dev_config_ncq(struct ata_device *dev, char *desc, size_t desc_sz) { @@ -2217,6 +2236,13 @@ static int ata_dev_config_ncq(struct ata_device *dev, snprintf(desc, desc_sz, "NCQ (not used)"); return 0; } + + if (dev->horkage & ATA_HORKAGE_NO_NCQ_ON_ATI && + ata_dev_check_adapter(dev, PCI_VENDOR_ID_ATI)) { + snprintf(desc, desc_sz, "NCQ (not used)"); + return 0; + } + if (ap->flags & ATA_FLAG_NCQ) { hdepth = min(ap->scsi_host->can_queue, ATA_MAX_QUEUE); dev->flags |= ATA_DFLAG_NCQ; @@ -3805,6 +3831,8 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { { "VRFDFC22048UCHC-TE*", NULL, ATA_HORKAGE_NODMA }, /* Odd clown on sil3726/4726 PMPs */ { "Config Disk", NULL, ATA_HORKAGE_DISABLE }, + /* Similar story with ASMedia 1092 */ + { "ASMT109x- Config", NULL, ATA_HORKAGE_DISABLE }, /* Weird ATAPI devices */ { "TORiSAN DVD-ROM DRD-N216", NULL, ATA_HORKAGE_MAX_SEC_128 }, @@ -3951,14 +3979,17 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { { "Samsung SSD 850*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | ATA_HORKAGE_ZERO_AFTER_TRIM, }, { "Samsung SSD 860*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | - ATA_HORKAGE_ZERO_AFTER_TRIM, }, + ATA_HORKAGE_ZERO_AFTER_TRIM | + ATA_HORKAGE_NO_NCQ_ON_ATI, }, { "Samsung SSD 870*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | - ATA_HORKAGE_ZERO_AFTER_TRIM, }, + ATA_HORKAGE_ZERO_AFTER_TRIM | + ATA_HORKAGE_NO_NCQ_ON_ATI, }, { "FCCT*M500*", NULL, ATA_HORKAGE_NO_NCQ_TRIM | ATA_HORKAGE_ZERO_AFTER_TRIM, }, /* devices that don't properly handle TRIM commands */ { "SuperSSpeed S238*", NULL, ATA_HORKAGE_NOTRIM, }, + { "M88V29*", NULL, ATA_HORKAGE_NOTRIM, }, /* * As defined, the DRAT (Deterministic Read After Trim) and RZAT @@ -6108,6 +6139,8 @@ static int __init ata_parse_force_one(char **cur, { "ncq", .horkage_off = ATA_HORKAGE_NONCQ }, { "noncqtrim", .horkage_on = ATA_HORKAGE_NO_NCQ_TRIM }, { "ncqtrim", .horkage_off = ATA_HORKAGE_NO_NCQ_TRIM }, + { "noncqati", .horkage_on = ATA_HORKAGE_NO_NCQ_ON_ATI }, + { "ncqati", .horkage_off = ATA_HORKAGE_NO_NCQ_ON_ATI }, { "dump_id", .horkage_on = ATA_HORKAGE_DUMP_ID }, { "pio0", .xfer_mask = 1 << (ATA_SHIFT_PIO + 0) }, { "pio1", .xfer_mask = 1 << (ATA_SHIFT_PIO + 1) }, diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index b6f92050e60cb639768fdddf3be3d77a1e1c3c08..018ed8736a64d1f67b3657435b63841265ca35fe 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -93,6 +93,12 @@ static const unsigned long ata_eh_identify_timeouts[] = { ULONG_MAX, }; +static const unsigned long ata_eh_revalidate_timeouts[] = { + 15000, /* Some drives are slow to read log pages when waking-up */ + 15000, /* combined time till here is enough even for media access */ + ULONG_MAX, +}; + static const unsigned long ata_eh_flush_timeouts[] = { 15000, /* be generous with flush */ 15000, /* ditto */ @@ -129,6 +135,8 @@ static const struct ata_eh_cmd_timeout_ent ata_eh_cmd_timeout_table[ATA_EH_CMD_TIMEOUT_TABLE_SIZE] = { { .commands = CMDS(ATA_CMD_ID_ATA, ATA_CMD_ID_ATAPI), .timeouts = ata_eh_identify_timeouts, }, + { .commands = CMDS(ATA_CMD_READ_LOG_EXT, ATA_CMD_READ_LOG_DMA_EXT), + .timeouts = ata_eh_revalidate_timeouts, }, { .commands = CMDS(ATA_CMD_READ_NATIVE_MAX, ATA_CMD_READ_NATIVE_MAX_EXT), .timeouts = ata_eh_other_timeouts, }, { .commands = CMDS(ATA_CMD_SET_MAX, ATA_CMD_SET_MAX_EXT), diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 48b8934970f36a580ddafae0c26c3f2a43ad6da6..a0e788b6482148aaf474d1a5950ebd95857bd45d 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -2870,8 +2870,19 @@ static unsigned int ata_scsi_pass_thru(struct ata_queued_cmd *qc) goto invalid_fld; } - if (ata_is_ncq(tf->protocol) && (cdb[2 + cdb_offset] & 0x3) == 0) - tf->protocol = ATA_PROT_NCQ_NODATA; + if ((cdb[2 + cdb_offset] & 0x3) == 0) { + /* + * When T_LENGTH is zero (No data is transferred), dir should + * be DMA_NONE. + */ + if (scmd->sc_data_direction != DMA_NONE) { + fp = 2 + cdb_offset; + goto invalid_fld; + } + + if (ata_is_ncq(tf->protocol)) + tf->protocol = ATA_PROT_NCQ_NODATA; + } /* enable LBA */ tf->flags |= ATA_TFLAG_LBA; diff --git a/drivers/ata/pata_hpt37x.c b/drivers/ata/pata_hpt37x.c index fad6c6a873130a5b4af26e117a36ba94ec9fe42f..fef46de2f6b23c29f2c8aba3d26850c1222d542e 100644 --- a/drivers/ata/pata_hpt37x.c +++ b/drivers/ata/pata_hpt37x.c @@ -917,6 +917,20 @@ static int hpt37x_init_one(struct pci_dev *dev, const struct pci_device_id *id) irqmask &= ~0x10; pci_write_config_byte(dev, 0x5a, irqmask); + /* + * HPT371 chips physically have only one channel, the secondary one, + * but the primary channel registers do exist! Go figure... + * So, we manually disable the non-existing channel here + * (if the BIOS hasn't done this already). + */ + if (dev->device == PCI_DEVICE_ID_TTI_HPT371) { + u8 mcr1; + + pci_read_config_byte(dev, 0x50, &mcr1); + mcr1 &= ~0x04; + pci_write_config_byte(dev, 0x50, mcr1); + } + /* * default to pci clock. make sure MA15/16 are set to output * to prevent drives having problems with 40-pin cables. Needed @@ -948,14 +962,14 @@ static int hpt37x_init_one(struct pci_dev *dev, const struct pci_device_id *id) if ((freq >> 12) != 0xABCDE) { int i; - u8 sr; + u16 sr; u32 total = 0; pr_warn("BIOS has not set timing clocks\n"); /* This is the process the HPT371 BIOS is reported to use */ for (i = 0; i < 128; i++) { - pci_read_config_byte(dev, 0x78, &sr); + pci_read_config_word(dev, 0x78, &sr); total += sr & 0x1FF; udelay(15); } diff --git a/drivers/ata/pata_legacy.c b/drivers/ata/pata_legacy.c index 4fd12b20df239595e3e49ed98db68a1ac88d72c5..d91ba47f2fc44db4c23ba48d9cd3a00ae00269bc 100644 --- a/drivers/ata/pata_legacy.c +++ b/drivers/ata/pata_legacy.c @@ -315,7 +315,8 @@ static unsigned int pdc_data_xfer_vlb(struct ata_queued_cmd *qc, iowrite32_rep(ap->ioaddr.data_addr, buf, buflen >> 2); if (unlikely(slop)) { - __le32 pad; + __le32 pad = 0; + if (rw == READ) { pad = cpu_to_le32(ioread32(ap->ioaddr.data_addr)); memcpy(buf + buflen - slop, &pad, slop); @@ -705,7 +706,8 @@ static unsigned int vlb32_data_xfer(struct ata_queued_cmd *qc, ioread32_rep(ap->ioaddr.data_addr, buf, buflen >> 2); if (unlikely(slop)) { - __le32 pad; + __le32 pad = 0; + if (rw == WRITE) { memcpy(&pad, buf + buflen - slop, slop); iowrite32(le32_to_cpu(pad), ap->ioaddr.data_addr); diff --git a/drivers/ata/sata_fsl.c b/drivers/ata/sata_fsl.c index d55ee244d6931fcae2089dce4e3a07409685276d..0ba231e80b1911dd2a780440b15f944ca2ce04a5 100644 --- a/drivers/ata/sata_fsl.c +++ b/drivers/ata/sata_fsl.c @@ -1394,6 +1394,14 @@ static int sata_fsl_init_controller(struct ata_host *host) return 0; } +static void sata_fsl_host_stop(struct ata_host *host) +{ + struct sata_fsl_host_priv *host_priv = host->private_data; + + iounmap(host_priv->hcr_base); + kfree(host_priv); +} + /* * scsi mid-layer and libata interface structures */ @@ -1426,6 +1434,8 @@ static struct ata_port_operations sata_fsl_ops = { .port_start = sata_fsl_port_start, .port_stop = sata_fsl_port_stop, + .host_stop = sata_fsl_host_stop, + .pmp_attach = sata_fsl_pmp_attach, .pmp_detach = sata_fsl_pmp_detach, }; @@ -1480,9 +1490,9 @@ static int sata_fsl_probe(struct platform_device *ofdev) host_priv->ssr_base = ssr_base; host_priv->csr_base = csr_base; - irq = irq_of_parse_and_map(ofdev->dev.of_node, 0); - if (!irq) { - dev_err(&ofdev->dev, "invalid irq from platform\n"); + irq = platform_get_irq(ofdev, 0); + if (irq < 0) { + retval = irq; goto error_exit_with_cleanup; } host_priv->irq = irq; @@ -1557,10 +1567,6 @@ static int sata_fsl_remove(struct platform_device *ofdev) ata_host_detach(host); - irq_dispose_mapping(host_priv->irq); - iounmap(host_priv->hcr_base); - kfree(host_priv); - return 0; } diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c index b62446ea5f408a5c4fda3992197314022cdcc62c..11be88f70690ef1a86b84a360f4c067fa5fc0098 100644 --- a/drivers/ata/sata_mv.c +++ b/drivers/ata/sata_mv.c @@ -3892,8 +3892,8 @@ static int mv_chip_id(struct ata_host *host, unsigned int board_idx) break; default: - dev_err(host->dev, "BUG: invalid board index %u\n", board_idx); - return 1; + dev_alert(host->dev, "BUG: invalid board index %u\n", board_idx); + return -EINVAL; } hpriv->hp_flags = hp_flags; diff --git a/drivers/atm/firestream.c b/drivers/atm/firestream.c index 0ddd611b427766f4f4d01eb17334a1e7ff13124b..43a34aee33b82059983637eb259b200ea46f3e64 100644 --- a/drivers/atm/firestream.c +++ b/drivers/atm/firestream.c @@ -1675,6 +1675,8 @@ static int fs_init(struct fs_dev *dev) dev->hw_base = pci_resource_start(pci_dev, 0); dev->base = ioremap(dev->hw_base, 0x1000); + if (!dev->base) + return 1; reset_chip (dev); diff --git a/drivers/auxdisplay/ht16k33.c b/drivers/auxdisplay/ht16k33.c index d8602843e8a53eeefde03802eb3114a154af252e..7e3858c4e030f97a7a6c04ccba5643361f7db22b 100644 --- a/drivers/auxdisplay/ht16k33.c +++ b/drivers/auxdisplay/ht16k33.c @@ -219,6 +219,15 @@ static const struct backlight_ops ht16k33_bl_ops = { .check_fb = ht16k33_bl_check_fb, }; +/* + * Blank events will be passed to the actual device handling the backlight when + * we return zero here. + */ +static int ht16k33_blank(int blank, struct fb_info *info) +{ + return 0; +} + static int ht16k33_mmap(struct fb_info *info, struct vm_area_struct *vma) { struct ht16k33_priv *priv = info->par; @@ -231,6 +240,7 @@ static const struct fb_ops ht16k33_fb_ops = { .owner = THIS_MODULE, .fb_read = fb_sys_read, .fb_write = fb_sys_write, + .fb_blank = ht16k33_blank, .fb_fillrect = sys_fillrect, .fb_copyarea = sys_copyarea, .fb_imageblit = sys_imageblit, @@ -418,6 +428,33 @@ static int ht16k33_probe(struct i2c_client *client, if (err) return err; + /* Backlight */ + memset(&bl_props, 0, sizeof(struct backlight_properties)); + bl_props.type = BACKLIGHT_RAW; + bl_props.max_brightness = MAX_BRIGHTNESS; + + bl = devm_backlight_device_register(&client->dev, DRIVER_NAME"-bl", + &client->dev, priv, + &ht16k33_bl_ops, &bl_props); + if (IS_ERR(bl)) { + dev_err(&client->dev, "failed to register backlight\n"); + return PTR_ERR(bl); + } + + err = of_property_read_u32(node, "default-brightness-level", + &dft_brightness); + if (err) { + dft_brightness = MAX_BRIGHTNESS; + } else if (dft_brightness > MAX_BRIGHTNESS) { + dev_warn(&client->dev, + "invalid default brightness level: %u, using %u\n", + dft_brightness, MAX_BRIGHTNESS); + dft_brightness = MAX_BRIGHTNESS; + } + + bl->props.brightness = dft_brightness; + ht16k33_bl_update_status(bl); + /* Framebuffer (2 bytes per column) */ BUILD_BUG_ON(PAGE_SIZE < HT16K33_FB_SIZE); fbdev->buffer = (unsigned char *) get_zeroed_page(GFP_KERNEL); @@ -450,6 +487,7 @@ static int ht16k33_probe(struct i2c_client *client, fbdev->info->screen_size = HT16K33_FB_SIZE; fbdev->info->fix = ht16k33_fb_fix; fbdev->info->var = ht16k33_fb_var; + fbdev->info->bl_dev = bl; fbdev->info->pseudo_palette = NULL; fbdev->info->flags = FBINFO_FLAG_DEFAULT; fbdev->info->par = priv; @@ -462,34 +500,6 @@ static int ht16k33_probe(struct i2c_client *client, if (err) goto err_fbdev_unregister; - /* Backlight */ - memset(&bl_props, 0, sizeof(struct backlight_properties)); - bl_props.type = BACKLIGHT_RAW; - bl_props.max_brightness = MAX_BRIGHTNESS; - - bl = devm_backlight_device_register(&client->dev, DRIVER_NAME"-bl", - &client->dev, priv, - &ht16k33_bl_ops, &bl_props); - if (IS_ERR(bl)) { - dev_err(&client->dev, "failed to register backlight\n"); - err = PTR_ERR(bl); - goto err_fbdev_unregister; - } - - err = of_property_read_u32(node, "default-brightness-level", - &dft_brightness); - if (err) { - dft_brightness = MAX_BRIGHTNESS; - } else if (dft_brightness > MAX_BRIGHTNESS) { - dev_warn(&client->dev, - "invalid default brightness level: %u, using %u\n", - dft_brightness, MAX_BRIGHTNESS); - dft_brightness = MAX_BRIGHTNESS; - } - - bl->props.brightness = dft_brightness; - ht16k33_bl_update_status(bl); - ht16k33_fb_queue(priv); return 0; diff --git a/drivers/auxdisplay/img-ascii-lcd.c b/drivers/auxdisplay/img-ascii-lcd.c index 1cce409ce5cacbc8a9a7ae271233c981b0ef2dcd..e33ce0151cdfd150b95f79dce22cf02d2228f1da 100644 --- a/drivers/auxdisplay/img-ascii-lcd.c +++ b/drivers/auxdisplay/img-ascii-lcd.c @@ -280,6 +280,16 @@ static int img_ascii_lcd_display(struct img_ascii_lcd_ctx *ctx, if (msg[count - 1] == '\n') count--; + if (!count) { + /* clear the LCD */ + devm_kfree(&ctx->pdev->dev, ctx->message); + ctx->message = NULL; + ctx->message_len = 0; + memset(ctx->curr, ' ', ctx->cfg->num_chars); + ctx->cfg->update(ctx); + return 0; + } + new_msg = devm_kmalloc(&ctx->pdev->dev, count + 1, GFP_KERNEL); if (!new_msg) return -ENOMEM; diff --git a/drivers/base/core.c b/drivers/base/core.c index 6baad6445fa9f41291026b2c8b98b95d48dea85c..106896b4ccafed302d02b232d4a3e44462285a47 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -471,8 +471,7 @@ static void device_link_release_fn(struct work_struct *work) /* Ensure that all references to the link object have been dropped. */ device_link_synchronize_removal(); - while (refcount_dec_not_one(&link->rpm_active)) - pm_runtime_put(link->supplier); + pm_runtime_release_supplier(link, true); put_device(link->consumer); put_device(link->supplier); @@ -673,7 +672,8 @@ struct device_link *device_link_add(struct device *consumer, { struct device_link *link; - if (!consumer || !supplier || flags & ~DL_ADD_VALID_FLAGS || + if (!consumer || !supplier || consumer == supplier || + flags & ~DL_ADD_VALID_FLAGS || (flags & DL_FLAG_STATELESS && flags & DL_MANAGED_LINK_FLAGS) || (flags & DL_FLAG_SYNC_STATE_ONLY && (flags & ~DL_FLAG_INFERRED) != DL_FLAG_SYNC_STATE_ONLY) || @@ -806,9 +806,7 @@ struct device_link *device_link_add(struct device *consumer, dev_bus_name(supplier), dev_name(supplier), dev_bus_name(consumer), dev_name(consumer)); if (device_register(&link->link_dev)) { - put_device(consumer); - put_device(supplier); - kfree(link); + put_device(&link->link_dev); link = NULL; goto out; } @@ -884,6 +882,8 @@ static void device_link_put_kref(struct device_link *link) { if (link->flags & DL_FLAG_STATELESS) kref_put(&link->kref, __device_link_del); + else if (!device_is_registered(link->consumer)) + __device_link_del(&link->kref); else WARN(1, "Unable to drop a managed device link reference\n"); } @@ -1691,14 +1691,21 @@ static int fw_devlink_create_devlink(struct device *con, * be broken by applying logic. Check for these types of cycles and * break them so that devices in the cycle probe properly. * - * If the supplier's parent is dependent on the consumer, then - * the consumer-supplier dependency is a false dependency. So, - * treat it as an invalid link. + * If the supplier's parent is dependent on the consumer, then the + * consumer and supplier have a cyclic dependency. Since fw_devlink + * can't tell which of the inferred dependencies are incorrect, don't + * enforce probe ordering between any of the devices in this cyclic + * dependency. Do this by relaxing all the fw_devlink device links in + * this cycle and by treating the fwnode link between the consumer and + * the supplier as an invalid dependency. */ sup_dev = fwnode_get_next_parent_dev(sup_handle); if (sup_dev && device_is_dependent(con, sup_dev)) { - dev_dbg(con, "Not linking to %pfwP - False link\n", - sup_handle); + dev_info(con, "Fixing up cyclic dependency with %pfwP (%s)\n", + sup_handle, dev_name(sup_dev)); + device_links_write_lock(); + fw_devlink_relax_cycle(con, sup_dev); + device_links_write_unlock(); ret = -EINVAL; } else { /* diff --git a/drivers/base/dd.c b/drivers/base/dd.c index 5fcb97bf2ba30baad0e73d8f6248bc4bf6b95198..d3b33edc40ece242a8942452810b49009f03d8b5 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -592,6 +592,9 @@ re_probe: drv->remove(dev); devres_release_all(dev); + arch_teardown_dma_ops(dev); + kfree(dev->dma_range_map); + dev->dma_range_map = NULL; driver_sysfs_remove(dev); dev->driver = NULL; dev_set_drvdata(dev, NULL); @@ -1168,6 +1171,8 @@ static void __device_release_driver(struct device *dev, struct device *parent) devres_release_all(dev); arch_teardown_dma_ops(dev); + kfree(dev->dma_range_map); + dev->dma_range_map = NULL; dev->driver = NULL; dev_set_drvdata(dev, NULL); if (dev->pm_domain && dev->pm_domain->dismiss) diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c index a71d1411794394d0781ec22412c5bf87109950ea..b5cbaa61cbea7ad51c85df214b546a70f5b07b76 100644 --- a/drivers/base/devtmpfs.c +++ b/drivers/base/devtmpfs.c @@ -59,8 +59,15 @@ static struct dentry *public_dev_mount(struct file_system_type *fs_type, int fla const char *dev_name, void *data) { struct super_block *s = mnt->mnt_sb; + int err; + atomic_inc(&s->s_active); down_write(&s->s_umount); + err = reconfigure_single(s, flags, data); + if (err < 0) { + deactivate_locked_super(s); + return ERR_PTR(err); + } return dget(s->s_root); } diff --git a/drivers/base/firmware_loader/main.c b/drivers/base/firmware_loader/main.c index f41e4e4993d372139a64daf3f4990e7e5e439967..1372f40d0371f5e9d733fbbf430c8f8b2a23f772 100644 --- a/drivers/base/firmware_loader/main.c +++ b/drivers/base/firmware_loader/main.c @@ -99,12 +99,15 @@ static struct firmware_cache fw_cache; extern struct builtin_fw __start_builtin_fw[]; extern struct builtin_fw __end_builtin_fw[]; -static void fw_copy_to_prealloc_buf(struct firmware *fw, +static bool fw_copy_to_prealloc_buf(struct firmware *fw, void *buf, size_t size) { - if (!buf || size < fw->size) - return; + if (!buf) + return true; + if (size < fw->size) + return false; memcpy(buf, fw->data, fw->size); + return true; } static bool fw_get_builtin_firmware(struct firmware *fw, const char *name, @@ -116,9 +119,7 @@ static bool fw_get_builtin_firmware(struct firmware *fw, const char *name, if (strcmp(name, b_fw->name) == 0) { fw->size = b_fw->size; fw->data = b_fw->data; - fw_copy_to_prealloc_buf(fw, buf, size); - - return true; + return fw_copy_to_prealloc_buf(fw, buf, size); } } diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 521fa086b6c832df295332028ed855e01949ac5a..1ec4520d77a93dc185e21dd8fe6caf0dcb57ab18 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -1054,7 +1054,7 @@ static void device_complete(struct device *dev, pm_message_t state) const char *info = NULL; if (dev->power.syscore) - return; + goto out; device_lock(dev); @@ -1084,6 +1084,7 @@ static void device_complete(struct device *dev, pm_message_t state) device_unlock(dev); +out: pm_runtime_put(dev); } @@ -1804,9 +1805,6 @@ static int device_prepare(struct device *dev, pm_message_t state) int (*callback)(struct device *) = NULL; int ret = 0; - if (dev->power.syscore) - return 0; - /* * If a device's parent goes into runtime suspend at the wrong time, * it won't be possible to resume the device. To prevent this we @@ -1815,6 +1813,9 @@ static int device_prepare(struct device *dev, pm_message_t state) */ pm_runtime_get_noresume(dev); + if (dev->power.syscore) + return 0; + device_lock(dev); dev->power.wakeup_path = false; diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index bc649da4899a0b873884226c40af571c6cfed8a2..1573319404888ad8b07bffdb4f0620d4dc553a4b 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -305,19 +305,40 @@ static int rpm_get_suppliers(struct device *dev) return 0; } +/** + * pm_runtime_release_supplier - Drop references to device link's supplier. + * @link: Target device link. + * @check_idle: Whether or not to check if the supplier device is idle. + * + * Drop all runtime PM references associated with @link to its supplier device + * and if @check_idle is set, check if that device is idle (and so it can be + * suspended). + */ +void pm_runtime_release_supplier(struct device_link *link, bool check_idle) +{ + struct device *supplier = link->supplier; + + /* + * The additional power.usage_count check is a safety net in case + * the rpm_active refcount becomes saturated, in which case + * refcount_dec_not_one() would return true forever, but it is not + * strictly necessary. + */ + while (refcount_dec_not_one(&link->rpm_active) && + atomic_read(&supplier->power.usage_count) > 0) + pm_runtime_put_noidle(supplier); + + if (check_idle) + pm_request_idle(supplier); +} + static void __rpm_put_suppliers(struct device *dev, bool try_to_suspend) { struct device_link *link; list_for_each_entry_rcu(link, &dev->links.suppliers, c_node, - device_links_read_lock_held()) { - - while (refcount_dec_not_one(&link->rpm_active)) - pm_runtime_put_noidle(link->supplier); - - if (try_to_suspend) - pm_request_idle(link->supplier); - } + device_links_read_lock_held()) + pm_runtime_release_supplier(link, try_to_suspend); } static void rpm_put_suppliers(struct device *dev) @@ -1755,9 +1776,7 @@ void pm_runtime_drop_link(struct device_link *link) return; pm_runtime_drop_link_count(link->consumer); - - while (refcount_dec_not_one(&link->rpm_active)) - pm_runtime_put(link->supplier); + pm_runtime_release_supplier(link, true); } static bool pm_runtime_need_not_resume(struct device *dev) diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index 01057f640233075f61844f9c5b2b0efe29690b2c..f29d4563e6871fd53e35feaa24b6cac71d316cb3 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -37,7 +37,8 @@ suspend_state_t pm_suspend_target_state; bool events_check_enabled __read_mostly; /* First wakeup IRQ seen by the kernel in the last cycle. */ -unsigned int pm_wakeup_irq __read_mostly; +static unsigned int wakeup_irq[2] __read_mostly; +static DEFINE_RAW_SPINLOCK(wakeup_irq_lock); /* If greater than 0 and the system is suspending, terminate the suspend. */ static atomic_t pm_abort_suspend __read_mostly; @@ -980,16 +981,39 @@ void pm_system_cancel_wakeup(void) atomic_dec_if_positive(&pm_abort_suspend); } -void pm_wakeup_clear(bool reset) +void pm_wakeup_clear(unsigned int irq_number) { - pm_wakeup_irq = 0; - if (reset) + raw_spin_lock_irq(&wakeup_irq_lock); + + if (irq_number && wakeup_irq[0] == irq_number) + wakeup_irq[0] = wakeup_irq[1]; + else + wakeup_irq[0] = 0; + + wakeup_irq[1] = 0; + + raw_spin_unlock_irq(&wakeup_irq_lock); + + if (!irq_number) atomic_set(&pm_abort_suspend, 0); } void pm_system_irq_wakeup(unsigned int irq_number) { - if (pm_wakeup_irq == 0) { + unsigned long flags; + + raw_spin_lock_irqsave(&wakeup_irq_lock, flags); + + if (wakeup_irq[0] == 0) + wakeup_irq[0] = irq_number; + else if (wakeup_irq[1] == 0) + wakeup_irq[1] = irq_number; + else + irq_number = 0; + + raw_spin_unlock_irqrestore(&wakeup_irq_lock, flags); + + if (irq_number) { struct irq_desc *desc; const char *name = "null"; @@ -1001,12 +1025,15 @@ void pm_system_irq_wakeup(unsigned int irq_number) log_irq_wakeup_reason(irq_number); pr_warn("%s: %d triggered %s\n", __func__, irq_number, name); - - pm_wakeup_irq = irq_number; pm_system_wakeup(); } } +unsigned int pm_wakeup_irq(void) +{ + return wakeup_irq[0]; +} + /** * pm_get_wakeup_count - Read the number of registered wakeup events. * @count: Address to store the value at. diff --git a/drivers/base/property.c b/drivers/base/property.c index 35b95c6ac0c6aa133aaeffd1870b7e66afc5e48d..55c4a9270b137df3bc969d285e2a7617a16c8fdb 100644 --- a/drivers/base/property.c +++ b/drivers/base/property.c @@ -1247,8 +1247,10 @@ fwnode_graph_devcon_match(struct fwnode_handle *fwnode, const char *con_id, fwnode_graph_for_each_endpoint(fwnode, ep) { node = fwnode_graph_get_remote_port_parent(ep); - if (!fwnode_device_is_available(node)) + if (!fwnode_device_is_available(node)) { + fwnode_handle_put(node); continue; + } ret = match(node, con_id, data); fwnode_handle_put(node); diff --git a/drivers/base/regmap/regcache-rbtree.c b/drivers/base/regmap/regcache-rbtree.c index cfa29dc89bbff456a366ff6057a57a22e46dcf19..fabf87058d80bc0610f2e7cff25b29c91821cb56 100644 --- a/drivers/base/regmap/regcache-rbtree.c +++ b/drivers/base/regmap/regcache-rbtree.c @@ -281,14 +281,14 @@ static int regcache_rbtree_insert_to_block(struct regmap *map, if (!blk) return -ENOMEM; + rbnode->block = blk; + if (BITS_TO_LONGS(blklen) > BITS_TO_LONGS(rbnode->blklen)) { present = krealloc(rbnode->cache_present, BITS_TO_LONGS(blklen) * sizeof(*present), GFP_KERNEL); - if (!present) { - kfree(blk); + if (!present) return -ENOMEM; - } memset(present + BITS_TO_LONGS(rbnode->blklen), 0, (BITS_TO_LONGS(blklen) - BITS_TO_LONGS(rbnode->blklen)) @@ -305,7 +305,6 @@ static int regcache_rbtree_insert_to_block(struct regmap *map, } /* update the rbnode block, its size and the base register */ - rbnode->block = blk; rbnode->blklen = blklen; rbnode->base_reg = base_reg; rbnode->cache_present = present; diff --git a/drivers/base/regmap/regmap-irq.c b/drivers/base/regmap/regmap-irq.c index ad5c2de395d1f11690e4b6dd03fc435565a51a85..87c5c421e0f461a307c484821243f82aef75a1b0 100644 --- a/drivers/base/regmap/regmap-irq.c +++ b/drivers/base/regmap/regmap-irq.c @@ -170,11 +170,9 @@ static void regmap_irq_sync_unlock(struct irq_data *data) ret = regmap_write(map, reg, d->mask_buf[i]); if (d->chip->clear_ack) { if (d->chip->ack_invert && !ret) - ret = regmap_write(map, reg, - d->mask_buf[i]); + ret = regmap_write(map, reg, UINT_MAX); else if (!ret) - ret = regmap_write(map, reg, - ~d->mask_buf[i]); + ret = regmap_write(map, reg, 0); } if (ret != 0) dev_err(d->map->dev, "Failed to ack 0x%x: %d\n", @@ -509,11 +507,9 @@ static irqreturn_t regmap_irq_thread(int irq, void *d) data->status_buf[i]); if (chip->clear_ack) { if (chip->ack_invert && !ret) - ret = regmap_write(map, reg, - data->status_buf[i]); + ret = regmap_write(map, reg, UINT_MAX); else if (!ret) - ret = regmap_write(map, reg, - ~data->status_buf[i]); + ret = regmap_write(map, reg, 0); } if (ret != 0) dev_err(map->dev, "Failed to ack 0x%x: %d\n", @@ -745,13 +741,9 @@ int regmap_add_irq_chip_fwnode(struct fwnode_handle *fwnode, d->status_buf[i] & d->mask_buf[i]); if (chip->clear_ack) { if (chip->ack_invert && !ret) - ret = regmap_write(map, reg, - (d->status_buf[i] & - d->mask_buf[i])); + ret = regmap_write(map, reg, UINT_MAX); else if (!ret) - ret = regmap_write(map, reg, - ~(d->status_buf[i] & - d->mask_buf[i])); + ret = regmap_write(map, reg, 0); } if (ret != 0) { dev_err(map->dev, "Failed to ack 0x%x: %d\n", diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index 456a1787e18d06b5ad5eec33a46a65eae7cb93bd..55a30afc14a00cc3dd3bba15281f623561442b62 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -620,6 +620,7 @@ int regmap_attach_dev(struct device *dev, struct regmap *map, if (ret) return ret; + regmap_debugfs_exit(map); regmap_debugfs_init(map); /* Add a devres resource for dev_get_regmap() */ diff --git a/drivers/base/swnode.c b/drivers/base/swnode.c index c9867ced801177fe37c3526780988c98c71fb657..7c2171723f90e1d2d96626f8fc0200782d828d6b 100644 --- a/drivers/base/swnode.c +++ b/drivers/base/swnode.c @@ -532,7 +532,7 @@ software_node_get_reference_args(const struct fwnode_handle *fwnode, return -ENOENT; if (nargs_prop) { - error = property_entry_read_int_array(swnode->node->properties, + error = property_entry_read_int_array(ref->node->properties, nargs_prop, sizeof(u32), &nargs_prop_val, 1); if (error) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 7df79ae6b0a1e194834b938844697520bc7e8852..aaee15058d181409ffeda855dc9b284bcc6c5e70 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -1015,7 +1015,7 @@ static DECLARE_DELAYED_WORK(fd_timer, fd_timer_workfn); static void cancel_activity(void) { do_floppy = NULL; - cancel_delayed_work_sync(&fd_timer); + cancel_delayed_work(&fd_timer); cancel_work_sync(&floppy_work); } @@ -3169,6 +3169,8 @@ static void raw_cmd_free(struct floppy_raw_cmd **ptr) } } +#define MAX_LEN (1UL << MAX_ORDER << PAGE_SHIFT) + static int raw_cmd_copyin(int cmd, void __user *param, struct floppy_raw_cmd **rcmd) { @@ -3198,7 +3200,7 @@ loop: ptr->resultcode = 0; if (ptr->flags & (FD_RAW_READ | FD_RAW_WRITE)) { - if (ptr->length <= 0) + if (ptr->length <= 0 || ptr->length >= MAX_LEN) return -EINVAL; ptr->kernel_data = (char *)fd_dma_mem_alloc(ptr->length); fallback_on_nodma_alloc(&ptr->kernel_data, ptr->length); diff --git a/drivers/block/loop.c b/drivers/block/loop.c index f0fa0c8e7ec60c151c0562bba147e8553dcd11f2..dea24990d270ba0894a500a3287550e924206190 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -228,19 +228,6 @@ static void __loop_update_dio(struct loop_device *lo, bool dio) blk_mq_unfreeze_queue(lo->lo_queue); } -/** - * loop_validate_block_size() - validates the passed in block size - * @bsize: size to validate - */ -static int -loop_validate_block_size(unsigned short bsize) -{ - if (bsize < 512 || bsize > PAGE_SIZE || !is_power_of_2(bsize)) - return -EINVAL; - - return 0; -} - /** * loop_set_size() - sets device size and notifies userspace * @lo: struct loop_device to set the size for @@ -1121,7 +1108,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode, } if (config->block_size) { - error = loop_validate_block_size(config->block_size); + error = blk_validate_block_size(config->block_size); if (error) goto out_unlock; } @@ -1617,7 +1604,7 @@ static int loop_set_block_size(struct loop_device *lo, unsigned long arg) if (lo->lo_state != Lo_bound) return -ENXIO; - err = loop_validate_block_size(arg); + err = blk_validate_block_size(arg); if (err) return err; @@ -2117,7 +2104,8 @@ static int loop_add(struct loop_device **l, int i) lo->tag_set.queue_depth = 128; lo->tag_set.numa_node = NUMA_NO_NODE; lo->tag_set.cmd_size = sizeof(struct loop_cmd); - lo->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_STACKING; + lo->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_STACKING | + BLK_MQ_F_NO_SCHED_BY_DEFAULT; lo->tag_set.driver_data = lo; err = blk_mq_alloc_tag_set(&lo->tag_set); diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 018fb172a0987a34559bde6dbb5321c972093635..73f30b5c2f5232ffba3f2cae9e9ebe1bc09a4acb 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -21,6 +21,9 @@ #define VQ_NAME_LEN 16 #define MAX_DISCARD_SEGMENTS 256u +/* The maximum number of sg elements that fit into a virtqueue */ +#define VIRTIO_BLK_MAX_SG_ELEMS 32768 + static int major; static DEFINE_IDA(vd_index_ida); @@ -506,6 +509,10 @@ static int init_vq(struct virtio_blk *vblk) &num_vqs); if (err) num_vqs = 1; + if (!err && !num_vqs) { + dev_err(&vdev->dev, "MQ advertisted but zero queues reported\n"); + return -EINVAL; + } num_vqs = min_t(unsigned int, nr_cpu_ids, num_vqs); @@ -730,7 +737,10 @@ static int virtblk_probe(struct virtio_device *vdev) if (err || !sg_elems) sg_elems = 1; - /* We need an extra sg elements at head and tail. */ + /* Prevent integer overflows and honor max vq size */ + sg_elems = min_t(u32, sg_elems, VIRTIO_BLK_MAX_SG_ELEMS - 2); + + /* We need extra sg elements at head and tail. */ sg_elems += 2; vdev->priv = vblk = kmalloc(sizeof(*vblk), GFP_KERNEL); if (!vblk) { @@ -827,9 +837,17 @@ static int virtblk_probe(struct virtio_device *vdev) err = virtio_cread_feature(vdev, VIRTIO_BLK_F_BLK_SIZE, struct virtio_blk_config, blk_size, &blk_size); - if (!err) + if (!err) { + err = blk_validate_block_size(blk_size); + if (err) { + dev_err(&vdev->dev, + "virtio_blk: invalid block size: 0x%x\n", + blk_size); + goto out_cleanup_disk; + } + blk_queue_logical_block_size(q, blk_size); - else + } else blk_size = queue_logical_block_size(q); /* Use topology information if available */ @@ -871,9 +889,15 @@ static int virtblk_probe(struct virtio_device *vdev) virtio_cread(vdev, struct virtio_blk_config, max_discard_seg, &v); + + /* + * max_discard_seg == 0 is out of spec but we always + * handled it. + */ + if (!v) + v = sg_elems - 2; blk_queue_max_discard_segments(q, - min_not_zero(v, - MAX_DISCARD_SEGMENTS)); + min(v, MAX_DISCARD_SEGMENTS)); blk_queue_flag_set(QUEUE_FLAG_DISCARD, q); } @@ -890,6 +914,8 @@ static int virtblk_probe(struct virtio_device *vdev) device_add_disk(&vdev->dev, vblk->disk, virtblk_attr_groups); return 0; +out_cleanup_disk: + blk_cleanup_queue(vblk->disk->queue); out_free_tags: blk_mq_free_tag_set(&vblk->tag_set); out_put_disk: diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 3874233f7194d9a2c2a9774ea8281052198575b6..14e452896d04c6293bb4041fb1e6e6430792ef87 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -1326,9 +1326,7 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring, pages[i]->page, seg[i].nsec << 9, seg[i].offset) == 0)) { - - int nr_iovecs = min_t(int, (nseg-i), BIO_MAX_PAGES); - bio = bio_alloc(GFP_KERNEL, nr_iovecs); + bio = bio_alloc(GFP_KERNEL, bio_max_segs(nseg - i)); if (unlikely(bio == NULL)) goto fail_put_bio; diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 10078a743564415d6aeabd184b316b65dd831c0d..47d4bb23d6f31440e8eaed12e3b626ce52143706 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -80,6 +80,7 @@ enum blkif_state { BLKIF_STATE_DISCONNECTED, BLKIF_STATE_CONNECTED, BLKIF_STATE_SUSPENDED, + BLKIF_STATE_ERROR, }; struct grant { @@ -89,6 +90,7 @@ struct grant { }; enum blk_req_status { + REQ_PROCESSING, REQ_WAITING, REQ_DONE, REQ_ERROR, @@ -543,10 +545,10 @@ static unsigned long blkif_ring_get_request(struct blkfront_ring_info *rinfo, id = get_id_from_freelist(rinfo); rinfo->shadow[id].request = req; - rinfo->shadow[id].status = REQ_WAITING; + rinfo->shadow[id].status = REQ_PROCESSING; rinfo->shadow[id].associated_id = NO_ASSOCIATED_ID; - (*ring_req)->u.rw.id = id; + rinfo->shadow[id].req.u.rw.id = id; return id; } @@ -554,11 +556,12 @@ static unsigned long blkif_ring_get_request(struct blkfront_ring_info *rinfo, static int blkif_queue_discard_req(struct request *req, struct blkfront_ring_info *rinfo) { struct blkfront_info *info = rinfo->dev_info; - struct blkif_request *ring_req; + struct blkif_request *ring_req, *final_ring_req; unsigned long id; /* Fill out a communications ring structure. */ - id = blkif_ring_get_request(rinfo, req, &ring_req); + id = blkif_ring_get_request(rinfo, req, &final_ring_req); + ring_req = &rinfo->shadow[id].req; ring_req->operation = BLKIF_OP_DISCARD; ring_req->u.discard.nr_sectors = blk_rq_sectors(req); @@ -569,8 +572,9 @@ static int blkif_queue_discard_req(struct request *req, struct blkfront_ring_inf else ring_req->u.discard.flag = 0; - /* Keep a private copy so we can reissue requests when recovering. */ - rinfo->shadow[id].req = *ring_req; + /* Copy the request to the ring page. */ + *final_ring_req = *ring_req; + rinfo->shadow[id].status = REQ_WAITING; return 0; } @@ -703,6 +707,7 @@ static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *ri { struct blkfront_info *info = rinfo->dev_info; struct blkif_request *ring_req, *extra_ring_req = NULL; + struct blkif_request *final_ring_req, *final_extra_ring_req = NULL; unsigned long id, extra_id = NO_ASSOCIATED_ID; bool require_extra_req = false; int i; @@ -747,7 +752,8 @@ static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *ri } /* Fill out a communications ring structure. */ - id = blkif_ring_get_request(rinfo, req, &ring_req); + id = blkif_ring_get_request(rinfo, req, &final_ring_req); + ring_req = &rinfo->shadow[id].req; num_sg = blk_rq_map_sg(req->q, req, rinfo->shadow[id].sg); num_grant = 0; @@ -798,7 +804,9 @@ static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *ri ring_req->u.rw.nr_segments = num_grant; if (unlikely(require_extra_req)) { extra_id = blkif_ring_get_request(rinfo, req, - &extra_ring_req); + &final_extra_ring_req); + extra_ring_req = &rinfo->shadow[extra_id].req; + /* * Only the first request contains the scatter-gather * list. @@ -840,10 +848,13 @@ static int blkif_queue_rw_req(struct request *req, struct blkfront_ring_info *ri if (setup.segments) kunmap_atomic(setup.segments); - /* Keep a private copy so we can reissue requests when recovering. */ - rinfo->shadow[id].req = *ring_req; - if (unlikely(require_extra_req)) - rinfo->shadow[extra_id].req = *extra_ring_req; + /* Copy request(s) to the ring page. */ + *final_ring_req = *ring_req; + rinfo->shadow[id].status = REQ_WAITING; + if (unlikely(require_extra_req)) { + *final_extra_ring_req = *extra_ring_req; + rinfo->shadow[extra_id].status = REQ_WAITING; + } if (new_persistent_gnts) gnttab_free_grant_references(setup.gref_head); @@ -1341,7 +1352,8 @@ free_shadow: rinfo->ring_ref[i] = GRANT_INVALID_REF; } } - free_pages((unsigned long)rinfo->ring.sring, get_order(info->nr_ring_pages * XEN_PAGE_SIZE)); + free_pages_exact(rinfo->ring.sring, + info->nr_ring_pages * XEN_PAGE_SIZE); rinfo->ring.sring = NULL; if (rinfo->irq) @@ -1415,8 +1427,8 @@ static enum blk_req_status blkif_rsp_to_req_status(int rsp) static int blkif_get_final_status(enum blk_req_status s1, enum blk_req_status s2) { - BUG_ON(s1 == REQ_WAITING); - BUG_ON(s2 == REQ_WAITING); + BUG_ON(s1 < REQ_DONE); + BUG_ON(s2 < REQ_DONE); if (s1 == REQ_ERROR || s2 == REQ_ERROR) return BLKIF_RSP_ERROR; @@ -1425,9 +1437,15 @@ static int blkif_get_final_status(enum blk_req_status s1, return BLKIF_RSP_OKAY; } -static bool blkif_completion(unsigned long *id, - struct blkfront_ring_info *rinfo, - struct blkif_response *bret) +/* + * Return values: + * 1 response processed. + * 0 missing further responses. + * -1 error while processing. + */ +static int blkif_completion(unsigned long *id, + struct blkfront_ring_info *rinfo, + struct blkif_response *bret) { int i = 0; struct scatterlist *sg; @@ -1449,8 +1467,8 @@ static bool blkif_completion(unsigned long *id, s->status = blkif_rsp_to_req_status(bret->status); /* Wait the second response if not yet here. */ - if (s2->status == REQ_WAITING) - return false; + if (s2->status < REQ_DONE) + return 0; bret->status = blkif_get_final_status(s->status, s2->status); @@ -1501,42 +1519,43 @@ static bool blkif_completion(unsigned long *id, } /* Add the persistent grant into the list of free grants */ for (i = 0; i < num_grant; i++) { - if (gnttab_query_foreign_access(s->grants_used[i]->gref)) { + if (!gnttab_try_end_foreign_access(s->grants_used[i]->gref)) { /* * If the grant is still mapped by the backend (the * backend has chosen to make this grant persistent) * we add it at the head of the list, so it will be * reused first. */ - if (!info->feature_persistent) - pr_alert_ratelimited("backed has not unmapped grant: %u\n", - s->grants_used[i]->gref); + if (!info->feature_persistent) { + pr_alert("backed has not unmapped grant: %u\n", + s->grants_used[i]->gref); + return -1; + } list_add(&s->grants_used[i]->node, &rinfo->grants); rinfo->persistent_gnts_c++; } else { /* - * If the grant is not mapped by the backend we end the - * foreign access and add it to the tail of the list, - * so it will not be picked again unless we run out of - * persistent grants. + * If the grant is not mapped by the backend we add it + * to the tail of the list, so it will not be picked + * again unless we run out of persistent grants. */ - gnttab_end_foreign_access(s->grants_used[i]->gref, 0, 0UL); s->grants_used[i]->gref = GRANT_INVALID_REF; list_add_tail(&s->grants_used[i]->node, &rinfo->grants); } } if (s->req.operation == BLKIF_OP_INDIRECT) { for (i = 0; i < INDIRECT_GREFS(num_grant); i++) { - if (gnttab_query_foreign_access(s->indirect_grants[i]->gref)) { - if (!info->feature_persistent) - pr_alert_ratelimited("backed has not unmapped grant: %u\n", - s->indirect_grants[i]->gref); + if (!gnttab_try_end_foreign_access(s->indirect_grants[i]->gref)) { + if (!info->feature_persistent) { + pr_alert("backed has not unmapped grant: %u\n", + s->indirect_grants[i]->gref); + return -1; + } list_add(&s->indirect_grants[i]->node, &rinfo->grants); rinfo->persistent_gnts_c++; } else { struct page *indirect_page; - gnttab_end_foreign_access(s->indirect_grants[i]->gref, 0, 0UL); /* * Add the used indirect page back to the list of * available pages for indirect grefs. @@ -1551,71 +1570,103 @@ static bool blkif_completion(unsigned long *id, } } - return true; + return 1; } static irqreturn_t blkif_interrupt(int irq, void *dev_id) { struct request *req; - struct blkif_response *bret; + struct blkif_response bret; RING_IDX i, rp; unsigned long flags; struct blkfront_ring_info *rinfo = (struct blkfront_ring_info *)dev_id; struct blkfront_info *info = rinfo->dev_info; + unsigned int eoiflag = XEN_EOI_FLAG_SPURIOUS; - if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) + if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) { + xen_irq_lateeoi(irq, XEN_EOI_FLAG_SPURIOUS); return IRQ_HANDLED; + } spin_lock_irqsave(&rinfo->ring_lock, flags); again: - rp = rinfo->ring.sring->rsp_prod; - rmb(); /* Ensure we see queued responses up to 'rp'. */ + rp = READ_ONCE(rinfo->ring.sring->rsp_prod); + virt_rmb(); /* Ensure we see queued responses up to 'rp'. */ + if (RING_RESPONSE_PROD_OVERFLOW(&rinfo->ring, rp)) { + pr_alert("%s: illegal number of responses %u\n", + info->gd->disk_name, rp - rinfo->ring.rsp_cons); + goto err; + } for (i = rinfo->ring.rsp_cons; i != rp; i++) { unsigned long id; + unsigned int op; + + eoiflag = 0; + + RING_COPY_RESPONSE(&rinfo->ring, i, &bret); + id = bret.id; - bret = RING_GET_RESPONSE(&rinfo->ring, i); - id = bret->id; /* * The backend has messed up and given us an id that we would * never have given to it (we stamp it up to BLK_RING_SIZE - * look in get_id_from_freelist. */ if (id >= BLK_RING_SIZE(info)) { - WARN(1, "%s: response to %s has incorrect id (%ld)\n", - info->gd->disk_name, op_name(bret->operation), id); - /* We can't safely get the 'struct request' as - * the id is busted. */ - continue; + pr_alert("%s: response has incorrect id (%ld)\n", + info->gd->disk_name, id); + goto err; } + if (rinfo->shadow[id].status != REQ_WAITING) { + pr_alert("%s: response references no pending request\n", + info->gd->disk_name); + goto err; + } + + rinfo->shadow[id].status = REQ_PROCESSING; req = rinfo->shadow[id].request; - if (bret->operation != BLKIF_OP_DISCARD) { + op = rinfo->shadow[id].req.operation; + if (op == BLKIF_OP_INDIRECT) + op = rinfo->shadow[id].req.u.indirect.indirect_op; + if (bret.operation != op) { + pr_alert("%s: response has wrong operation (%u instead of %u)\n", + info->gd->disk_name, bret.operation, op); + goto err; + } + + if (bret.operation != BLKIF_OP_DISCARD) { + int ret; + /* * We may need to wait for an extra response if the * I/O request is split in 2 */ - if (!blkif_completion(&id, rinfo, bret)) + ret = blkif_completion(&id, rinfo, &bret); + if (!ret) continue; + if (unlikely(ret < 0)) + goto err; } if (add_id_to_freelist(rinfo, id)) { WARN(1, "%s: response to %s (id %ld) couldn't be recycled!\n", - info->gd->disk_name, op_name(bret->operation), id); + info->gd->disk_name, op_name(bret.operation), id); continue; } - if (bret->status == BLKIF_RSP_OKAY) + if (bret.status == BLKIF_RSP_OKAY) blkif_req(req)->error = BLK_STS_OK; else blkif_req(req)->error = BLK_STS_IOERR; - switch (bret->operation) { + switch (bret.operation) { case BLKIF_OP_DISCARD: - if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) { + if (unlikely(bret.status == BLKIF_RSP_EOPNOTSUPP)) { struct request_queue *rq = info->rq; - printk(KERN_WARNING "blkfront: %s: %s op failed\n", - info->gd->disk_name, op_name(bret->operation)); + + pr_warn_ratelimited("blkfront: %s: %s op failed\n", + info->gd->disk_name, op_name(bret.operation)); blkif_req(req)->error = BLK_STS_NOTSUPP; info->feature_discard = 0; info->feature_secdiscard = 0; @@ -1625,15 +1676,15 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) break; case BLKIF_OP_FLUSH_DISKCACHE: case BLKIF_OP_WRITE_BARRIER: - if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) { - printk(KERN_WARNING "blkfront: %s: %s op failed\n", - info->gd->disk_name, op_name(bret->operation)); + if (unlikely(bret.status == BLKIF_RSP_EOPNOTSUPP)) { + pr_warn_ratelimited("blkfront: %s: %s op failed\n", + info->gd->disk_name, op_name(bret.operation)); blkif_req(req)->error = BLK_STS_NOTSUPP; } - if (unlikely(bret->status == BLKIF_RSP_ERROR && + if (unlikely(bret.status == BLKIF_RSP_ERROR && rinfo->shadow[id].req.u.rw.nr_segments == 0)) { - printk(KERN_WARNING "blkfront: %s: empty %s op failed\n", - info->gd->disk_name, op_name(bret->operation)); + pr_warn_ratelimited("blkfront: %s: empty %s op failed\n", + info->gd->disk_name, op_name(bret.operation)); blkif_req(req)->error = BLK_STS_NOTSUPP; } if (unlikely(blkif_req(req)->error)) { @@ -1646,9 +1697,10 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) fallthrough; case BLKIF_OP_READ: case BLKIF_OP_WRITE: - if (unlikely(bret->status != BLKIF_RSP_OKAY)) - dev_dbg(&info->xbdev->dev, "Bad return from blkdev data " - "request: %x\n", bret->status); + if (unlikely(bret.status != BLKIF_RSP_OKAY)) + dev_dbg_ratelimited(&info->xbdev->dev, + "Bad return from blkdev data request: %#x\n", + bret.status); break; default: @@ -1673,6 +1725,18 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) spin_unlock_irqrestore(&rinfo->ring_lock, flags); + xen_irq_lateeoi(irq, eoiflag); + + return IRQ_HANDLED; + + err: + info->connected = BLKIF_STATE_ERROR; + + spin_unlock_irqrestore(&rinfo->ring_lock, flags); + + /* No EOI in order to avoid further interrupts. */ + + pr_alert("%s disabled for further use\n", info->gd->disk_name); return IRQ_HANDLED; } @@ -1689,8 +1753,7 @@ static int setup_blkring(struct xenbus_device *dev, for (i = 0; i < info->nr_ring_pages; i++) rinfo->ring_ref[i] = GRANT_INVALID_REF; - sring = (struct blkif_sring *)__get_free_pages(GFP_NOIO | __GFP_HIGH, - get_order(ring_size)); + sring = alloc_pages_exact(ring_size, GFP_NOIO); if (!sring) { xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring"); return -ENOMEM; @@ -1700,7 +1763,7 @@ static int setup_blkring(struct xenbus_device *dev, err = xenbus_grant_ring(dev, rinfo->ring.sring, info->nr_ring_pages, gref); if (err < 0) { - free_pages((unsigned long)sring, get_order(ring_size)); + free_pages_exact(sring, ring_size); rinfo->ring.sring = NULL; goto fail; } @@ -1711,8 +1774,8 @@ static int setup_blkring(struct xenbus_device *dev, if (err) goto fail; - err = bind_evtchn_to_irqhandler(rinfo->evtchn, blkif_interrupt, 0, - "blkif", rinfo); + err = bind_evtchn_to_irqhandler_lateeoi(rinfo->evtchn, blkif_interrupt, + 0, "blkif", rinfo); if (err <= 0) { xenbus_dev_fatal(dev, err, "bind_evtchn_to_irqhandler failed"); @@ -2678,11 +2741,10 @@ static void purge_persistent_grants(struct blkfront_info *info) list_for_each_entry_safe(gnt_list_entry, tmp, &rinfo->grants, node) { if (gnt_list_entry->gref == GRANT_INVALID_REF || - gnttab_query_foreign_access(gnt_list_entry->gref)) + !gnttab_try_end_foreign_access(gnt_list_entry->gref)) continue; list_del(&gnt_list_entry->node); - gnttab_end_foreign_access(gnt_list_entry->gref, 0, 0UL); rinfo->persistent_gnts_c--; gnt_list_entry->gref = GRANT_INVALID_REF; list_add_tail(&gnt_list_entry->node, &rinfo->grants); diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index afef1a6a1e52ab93e291df5c7fd5c5fedd3edfdd..a8b8538803683a3202c82dce9a483f2a9fa39f8d 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -920,7 +920,7 @@ static ssize_t read_block_state(struct file *file, char __user *buf, zram_test_flag(zram, index, ZRAM_HUGE) ? 'h' : '.', zram_test_flag(zram, index, ZRAM_IDLE) ? 'i' : '.'); - if (count < copied) { + if (count <= copied) { zram_slot_unlock(zram, index); break; } diff --git a/drivers/bluetooth/bfusb.c b/drivers/bluetooth/bfusb.c index 5a321b4076aababf90bdaac7b6bdb1cae173e4f6..cab93935cc7f16aabb2688dad98fe2065940b622 100644 --- a/drivers/bluetooth/bfusb.c +++ b/drivers/bluetooth/bfusb.c @@ -628,6 +628,9 @@ static int bfusb_probe(struct usb_interface *intf, const struct usb_device_id *i data->bulk_out_ep = bulk_out_ep->desc.bEndpointAddress; data->bulk_pkt_size = le16_to_cpu(bulk_out_ep->desc.wMaxPacketSize); + if (!data->bulk_pkt_size) + goto done; + rwlock_init(&data->lock); data->reassembly = NULL; diff --git a/drivers/bluetooth/btmtksdio.c b/drivers/bluetooth/btmtksdio.c index 5f9f027956317ef3a5bb71f6753f39d7522023cf..74856a58621622dae4582f05a4914dd7bdfe3f05 100644 --- a/drivers/bluetooth/btmtksdio.c +++ b/drivers/bluetooth/btmtksdio.c @@ -1042,6 +1042,8 @@ static int btmtksdio_runtime_suspend(struct device *dev) if (!bdev) return 0; + sdio_set_host_pm_flags(func, MMC_PM_KEEP_POWER); + sdio_claim_host(bdev->func); sdio_writel(bdev->func, C_FW_OWN_REQ_SET, MTK_REG_CHLPCR, &err); diff --git a/drivers/bluetooth/btmtkuart.c b/drivers/bluetooth/btmtkuart.c index 6c40bc75fb5b8d5c9798f4bbbcb6a1d82b087d71..719d4685a2dddbf79e8e47a4973eb25daefa5bf0 100644 --- a/drivers/bluetooth/btmtkuart.c +++ b/drivers/bluetooth/btmtkuart.c @@ -158,8 +158,10 @@ static int mtk_hci_wmt_sync(struct hci_dev *hdev, int err; hlen = sizeof(*hdr) + wmt_params->dlen; - if (hlen > 255) - return -EINVAL; + if (hlen > 255) { + err = -EINVAL; + goto err_free_skb; + } hdr = (struct mtk_wmt_hdr *)&wc; hdr->dir = 1; @@ -173,7 +175,7 @@ static int mtk_hci_wmt_sync(struct hci_dev *hdev, err = __hci_cmd_send(hdev, 0xfc6f, hlen, &wc); if (err < 0) { clear_bit(BTMTKUART_TX_WAIT_VND_EVT, &bdev->tx_state); - return err; + goto err_free_skb; } /* The vendor specific WMT commands are all answered by a vendor @@ -190,13 +192,14 @@ static int mtk_hci_wmt_sync(struct hci_dev *hdev, if (err == -EINTR) { bt_dev_err(hdev, "Execution of wmt command interrupted"); clear_bit(BTMTKUART_TX_WAIT_VND_EVT, &bdev->tx_state); - return err; + goto err_free_skb; } if (err) { bt_dev_err(hdev, "Execution of wmt command timed out"); clear_bit(BTMTKUART_TX_WAIT_VND_EVT, &bdev->tx_state); - return -ETIMEDOUT; + err = -ETIMEDOUT; + goto err_free_skb; } /* Parse and handle the return WMT event */ diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index e0859f4e2807351dc8d6cf4900e1c1c7ebce44b6..538232b4c42ac165de8d5c4b1086097a4be80cdb 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -379,6 +379,15 @@ static const struct usb_device_id blacklist_table[] = { { USB_DEVICE(0x8087, 0x0aaa), .driver_info = BTUSB_INTEL_NEW | BTUSB_WIDEBAND_SPEECH | BTUSB_VALID_LE_STATES }, + { USB_DEVICE(0x10ab, 0x9309), .driver_info = BTUSB_QCA_WCN6855 | + BTUSB_WIDEBAND_SPEECH | + BTUSB_VALID_LE_STATES }, + { USB_DEVICE(0x10ab, 0x9409), .driver_info = BTUSB_QCA_WCN6855 | + BTUSB_WIDEBAND_SPEECH | + BTUSB_VALID_LE_STATES }, + { USB_DEVICE(0x0489, 0xe0d0), .driver_info = BTUSB_QCA_WCN6855 | + BTUSB_WIDEBAND_SPEECH | + BTUSB_VALID_LE_STATES }, /* Other Intel Bluetooth devices */ { USB_VENDOR_AND_INTERFACE_INFO(0x8087, 0xe0, 0x01, 0x01), @@ -400,6 +409,14 @@ static const struct usb_device_id blacklist_table[] = { BTUSB_WIDEBAND_SPEECH | BTUSB_VALID_LE_STATES }, + /* MediaTek MT7922A Bluetooth devices */ + { USB_DEVICE(0x0489, 0xe0d8), .driver_info = BTUSB_MEDIATEK | + BTUSB_WIDEBAND_SPEECH | + BTUSB_VALID_LE_STATES }, + { USB_DEVICE(0x0489, 0xe0d9), .driver_info = BTUSB_MEDIATEK | + BTUSB_WIDEBAND_SPEECH | + BTUSB_VALID_LE_STATES }, + /* Additional Realtek 8723AE Bluetooth devices */ { USB_DEVICE(0x0930, 0x021d), .driver_info = BTUSB_REALTEK }, { USB_DEVICE(0x13d3, 0x3394), .driver_info = BTUSB_REALTEK }, @@ -2845,6 +2862,7 @@ static void btusb_mtk_wmt_recv(struct urb *urb) skb = bt_skb_alloc(HCI_WMT_MAX_EVENT_SIZE, GFP_ATOMIC); if (!skb) { hdev->stat.err_rx++; + kfree(urb->setup_packet); return; } @@ -2865,6 +2883,7 @@ static void btusb_mtk_wmt_recv(struct urb *urb) data->evt_skb = skb_clone(skb, GFP_ATOMIC); if (!data->evt_skb) { kfree_skb(skb); + kfree(urb->setup_packet); return; } } @@ -2873,6 +2892,7 @@ static void btusb_mtk_wmt_recv(struct urb *urb) if (err < 0) { kfree_skb(data->evt_skb); data->evt_skb = NULL; + kfree(urb->setup_packet); return; } @@ -2883,6 +2903,7 @@ static void btusb_mtk_wmt_recv(struct urb *urb) wake_up_bit(&data->flags, BTUSB_TX_WAIT_VND_EVT); } + kfree(urb->setup_packet); return; } else if (urb->status == -ENOENT) { /* Avoid suspend failed when usb_kill_urb */ @@ -2903,6 +2924,7 @@ static void btusb_mtk_wmt_recv(struct urb *urb) usb_anchor_urb(urb, &data->ctrl_anchor); err = usb_submit_urb(urb, GFP_ATOMIC); if (err < 0) { + kfree(urb->setup_packet); /* -EPERM: urb is being killed; * -ENODEV: device got disconnected */ diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c index 3764ceb6fa0d52a454bd2954f8c19c4e10ba060a..3de0ce5386acc6e9774fa9663f879599489b63f2 100644 --- a/drivers/bluetooth/hci_bcm.c +++ b/drivers/bluetooth/hci_bcm.c @@ -1165,7 +1165,12 @@ static int bcm_probe(struct platform_device *pdev) return -ENOMEM; dev->dev = &pdev->dev; - dev->irq = platform_get_irq(pdev, 0); + + ret = platform_get_irq(pdev, 0); + if (ret < 0) + return ret; + + dev->irq = ret; /* Initialize routing field to an unused value */ dev->pcm_int_params[0] = 0xff; diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 4184faef9f169b9defdfede96734f9486487a40d..dc7ee5dd2eeca3f15685ea97ba85e8396a3f77c4 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -1844,6 +1844,9 @@ static int qca_power_off(struct hci_dev *hdev) hu->hdev->hw_error = NULL; hu->hdev->cmd_timeout = NULL; + del_timer_sync(&qca->wake_retrans_timer); + del_timer_sync(&qca->tx_idle_timer); + /* Stop sending shutdown command if soc crashes. */ if (soc_type != QCA_ROME && qca->memdump_state == QCA_MEMDUMP_IDLE) { @@ -1987,7 +1990,7 @@ static int qca_serdev_probe(struct serdev_device *serdev) qcadev->bt_en = devm_gpiod_get_optional(&serdev->dev, "enable", GPIOD_OUT_LOW); - if (!qcadev->bt_en) { + if (IS_ERR_OR_NULL(qcadev->bt_en)) { dev_warn(&serdev->dev, "failed to acquire enable gpio\n"); power_ctrl_enabled = false; } diff --git a/drivers/bluetooth/hci_vhci.c b/drivers/bluetooth/hci_vhci.c index 8ab26dec5f6e8ccdd1a9c796f4c66ebdd13a63c7..8469f9876dd2674f86d4f02705013140c1c1f060 100644 --- a/drivers/bluetooth/hci_vhci.c +++ b/drivers/bluetooth/hci_vhci.c @@ -121,6 +121,8 @@ static int __vhci_create_device(struct vhci_data *data, __u8 opcode) if (opcode & 0x80) set_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks); + set_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks); + if (hci_register_dev(hdev) < 0) { BT_ERR("Can't register HCI device"); hci_free_dev(hdev); diff --git a/drivers/bus/ti-sysc.c b/drivers/bus/ti-sysc.c index 159b57c6dc4df47520ba65267a60cfd04b80a7be..18f0650c5d40577fe672238640c960205351837f 100644 --- a/drivers/bus/ti-sysc.c +++ b/drivers/bus/ti-sysc.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -17,6 +18,7 @@ #include #include #include +#include #include #include @@ -51,11 +53,18 @@ struct sysc_address { struct list_head node; }; +struct sysc_module { + struct sysc *ddata; + struct list_head node; +}; + struct sysc_soc_info { unsigned long general_purpose:1; enum sysc_soc soc; - struct mutex list_lock; /* disabled modules list lock */ + struct mutex list_lock; /* disabled and restored modules list lock */ struct list_head disabled_modules; + struct list_head restored_modules; + struct notifier_block nb; }; enum sysc_clocks { @@ -223,37 +232,77 @@ static u32 sysc_read_sysstatus(struct sysc *ddata) return sysc_read(ddata, offset); } -/* Poll on reset status */ -static int sysc_wait_softreset(struct sysc *ddata) +static int sysc_poll_reset_sysstatus(struct sysc *ddata) { - u32 sysc_mask, syss_done, rstval; - int syss_offset, error = 0; - - if (ddata->cap->regbits->srst_shift < 0) - return 0; - - syss_offset = ddata->offsets[SYSC_SYSSTATUS]; - sysc_mask = BIT(ddata->cap->regbits->srst_shift); + int error, retries; + u32 syss_done, rstval; if (ddata->cfg.quirks & SYSS_QUIRK_RESETDONE_INVERTED) syss_done = 0; else syss_done = ddata->cfg.syss_mask; - if (syss_offset >= 0) { + if (likely(!timekeeping_suspended)) { error = readx_poll_timeout_atomic(sysc_read_sysstatus, ddata, rstval, (rstval & ddata->cfg.syss_mask) == syss_done, 100, MAX_MODULE_SOFTRESET_WAIT); + } else { + retries = MAX_MODULE_SOFTRESET_WAIT; + while (retries--) { + rstval = sysc_read_sysstatus(ddata); + if ((rstval & ddata->cfg.syss_mask) == syss_done) + return 0; + udelay(2); /* Account for udelay flakeyness */ + } + error = -ETIMEDOUT; + } + + return error; +} + +static int sysc_poll_reset_sysconfig(struct sysc *ddata) +{ + int error, retries; + u32 sysc_mask, rstval; - } else if (ddata->cfg.quirks & SYSC_QUIRK_RESET_STATUS) { + sysc_mask = BIT(ddata->cap->regbits->srst_shift); + + if (likely(!timekeeping_suspended)) { error = readx_poll_timeout_atomic(sysc_read_sysconfig, ddata, rstval, !(rstval & sysc_mask), 100, MAX_MODULE_SOFTRESET_WAIT); + } else { + retries = MAX_MODULE_SOFTRESET_WAIT; + while (retries--) { + rstval = sysc_read_sysconfig(ddata); + if (!(rstval & sysc_mask)) + return 0; + udelay(2); /* Account for udelay flakeyness */ + } + error = -ETIMEDOUT; } return error; } +/* Poll on reset status */ +static int sysc_wait_softreset(struct sysc *ddata) +{ + int syss_offset, error = 0; + + if (ddata->cap->regbits->srst_shift < 0) + return 0; + + syss_offset = ddata->offsets[SYSC_SYSSTATUS]; + + if (syss_offset >= 0) + error = sysc_poll_reset_sysstatus(ddata); + else if (ddata->cfg.quirks & SYSC_QUIRK_RESET_STATUS) + error = sysc_poll_reset_sysconfig(ddata); + + return error; +} + static int sysc_add_named_clock_from_child(struct sysc *ddata, const char *name, const char *optfck_name) @@ -1464,6 +1513,9 @@ static const struct sysc_revision_quirk sysc_revision_quirks[] = { /* Quirks that need to be set based on detected module */ SYSC_QUIRK("aess", 0, 0, 0x10, -ENODEV, 0x40000000, 0xffffffff, SYSC_MODULE_QUIRK_AESS), + /* Errata i893 handling for dra7 dcan1 and 2 */ + SYSC_QUIRK("dcan", 0x4ae3c000, 0x20, -ENODEV, -ENODEV, 0xa3170504, 0xffffffff, + SYSC_QUIRK_CLKDM_NOAUTO), SYSC_QUIRK("dcan", 0x48480000, 0x20, -ENODEV, -ENODEV, 0xa3170504, 0xffffffff, SYSC_QUIRK_CLKDM_NOAUTO), SYSC_QUIRK("dss", 0x4832a000, 0, 0x10, 0x14, 0x00000020, 0xffffffff, @@ -1511,7 +1563,7 @@ static const struct sysc_revision_quirk sysc_revision_quirks[] = { 0xffffffff, SYSC_QUIRK_SWSUP_SIDLE | SYSC_QUIRK_SWSUP_MSTANDBY), SYSC_QUIRK("usb_otg_hs", 0, 0, 0x10, -ENODEV, 0x4ea2080d, 0xffffffff, SYSC_QUIRK_SWSUP_SIDLE | SYSC_QUIRK_SWSUP_MSTANDBY | - SYSC_QUIRK_REINIT_ON_RESUME), + SYSC_QUIRK_REINIT_ON_CTX_LOST), SYSC_QUIRK("wdt", 0, 0, 0x10, 0x14, 0x502a0500, 0xfffff0f0, SYSC_MODULE_QUIRK_WDT), /* PRUSS on am3, am4 and am5 */ @@ -2385,6 +2437,78 @@ static struct dev_pm_domain sysc_child_pm_domain = { } }; +/* Caller needs to take list_lock if ever used outside of cpu_pm */ +static void sysc_reinit_modules(struct sysc_soc_info *soc) +{ + struct sysc_module *module; + struct list_head *pos; + struct sysc *ddata; + + list_for_each(pos, &sysc_soc->restored_modules) { + module = list_entry(pos, struct sysc_module, node); + ddata = module->ddata; + sysc_reinit_module(ddata, ddata->enabled); + } +} + +/** + * sysc_context_notifier - optionally reset and restore module after idle + * @nb: notifier block + * @cmd: unused + * @v: unused + * + * Some interconnect target modules need to be restored, or reset and restored + * on CPU_PM CPU_PM_CLUSTER_EXIT notifier. This is needed at least for am335x + * OTG and GPMC target modules even if the modules are unused. + */ +static int sysc_context_notifier(struct notifier_block *nb, unsigned long cmd, + void *v) +{ + struct sysc_soc_info *soc; + + soc = container_of(nb, struct sysc_soc_info, nb); + + switch (cmd) { + case CPU_CLUSTER_PM_ENTER: + break; + case CPU_CLUSTER_PM_ENTER_FAILED: /* No need to restore context */ + break; + case CPU_CLUSTER_PM_EXIT: + sysc_reinit_modules(soc); + break; + } + + return NOTIFY_OK; +} + +/** + * sysc_add_restored - optionally add reset and restore quirk hanlling + * @ddata: device data + */ +static void sysc_add_restored(struct sysc *ddata) +{ + struct sysc_module *restored_module; + + restored_module = kzalloc(sizeof(*restored_module), GFP_KERNEL); + if (!restored_module) + return; + + restored_module->ddata = ddata; + + mutex_lock(&sysc_soc->list_lock); + + list_add(&restored_module->node, &sysc_soc->restored_modules); + + if (sysc_soc->nb.notifier_call) + goto out_unlock; + + sysc_soc->nb.notifier_call = sysc_context_notifier; + cpu_pm_register_notifier(&sysc_soc->nb); + +out_unlock: + mutex_unlock(&sysc_soc->list_lock); +} + /** * sysc_legacy_idle_quirk - handle children in omap_device compatible way * @ddata: device driver data @@ -2884,12 +3008,14 @@ static int sysc_add_disabled(unsigned long base) } /* - * One time init to detect the booted SoC and disable unavailable features. + * One time init to detect the booted SoC, disable unavailable features + * and initialize list for optional cpu_pm notifier. + * * Note that we initialize static data shared across all ti-sysc instances * so ddata is only used for SoC type. This can be called from module_init * once we no longer need to rely on platform data. */ -static int sysc_init_soc(struct sysc *ddata) +static int sysc_init_static_data(struct sysc *ddata) { const struct soc_device_attribute *match; struct ti_sysc_platform_data *pdata; @@ -2904,6 +3030,7 @@ static int sysc_init_soc(struct sysc *ddata) mutex_init(&sysc_soc->list_lock); INIT_LIST_HEAD(&sysc_soc->disabled_modules); + INIT_LIST_HEAD(&sysc_soc->restored_modules); sysc_soc->general_purpose = true; pdata = dev_get_platdata(ddata->dev); @@ -2922,6 +3049,7 @@ static int sysc_init_soc(struct sysc *ddata) break; case SOC_AM3: sysc_add_disabled(0x48310000); /* rng */ + break; default: break; }; @@ -2949,15 +3077,24 @@ static int sysc_init_soc(struct sysc *ddata) return 0; } -static void sysc_cleanup_soc(void) +static void sysc_cleanup_static_data(void) { + struct sysc_module *restored_module; struct sysc_address *disabled_module; struct list_head *pos, *tmp; if (!sysc_soc) return; + if (sysc_soc->nb.notifier_call) + cpu_pm_unregister_notifier(&sysc_soc->nb); + mutex_lock(&sysc_soc->list_lock); + list_for_each_safe(pos, tmp, &sysc_soc->restored_modules) { + restored_module = list_entry(pos, struct sysc_module, node); + list_del(pos); + kfree(restored_module); + } list_for_each_safe(pos, tmp, &sysc_soc->disabled_modules) { disabled_module = list_entry(pos, struct sysc_address, node); list_del(pos); @@ -3022,7 +3159,7 @@ static int sysc_probe(struct platform_device *pdev) ddata->dev = &pdev->dev; platform_set_drvdata(pdev, ddata); - error = sysc_init_soc(ddata); + error = sysc_init_static_data(ddata); if (error) return error; @@ -3121,6 +3258,9 @@ static int sysc_probe(struct platform_device *pdev) pm_runtime_put(&pdev->dev); } + if (ddata->cfg.quirks & SYSC_QUIRK_REINIT_ON_CTX_LOST) + sysc_add_restored(ddata); + return 0; err: @@ -3203,7 +3343,7 @@ static void __exit sysc_exit(void) { bus_unregister_notifier(&platform_bus_type, &sysc_nb); platform_driver_unregister(&sysc_driver); - sysc_cleanup_soc(); + sysc_cleanup_static_data(); } module_exit(sysc_exit); diff --git a/drivers/char/agp/parisc-agp.c b/drivers/char/agp/parisc-agp.c index ed3c4c42fc23b7f8280394060a44006b9d5adcf4..d68d05d5d38388523b0fecfdd45cb6e32488c784 100644 --- a/drivers/char/agp/parisc-agp.c +++ b/drivers/char/agp/parisc-agp.c @@ -281,7 +281,7 @@ agp_ioc_init(void __iomem *ioc_regs) return 0; } -static int +static int __init lba_find_capability(int cap) { struct _parisc_agp_info *info = &parisc_agp_info; @@ -366,7 +366,7 @@ fail: return error; } -static int +static int __init find_quicksilver(struct device *dev, void *data) { struct parisc_device **lba = data; @@ -378,7 +378,7 @@ find_quicksilver(struct device *dev, void *data) return 0; } -static int +static int __init parisc_agp_init(void) { extern struct sba_device *sba_list; diff --git a/drivers/char/hw_random/mtk-rng.c b/drivers/char/hw_random/mtk-rng.c index 8ad7b515a51b84689fb6326e5af8bfd105024ece..6c00ea0085553a60c246837f626e3a69c5ae01cc 100644 --- a/drivers/char/hw_random/mtk-rng.c +++ b/drivers/char/hw_random/mtk-rng.c @@ -166,8 +166,13 @@ static int mtk_rng_runtime_resume(struct device *dev) return mtk_rng_init(&priv->rng); } -static UNIVERSAL_DEV_PM_OPS(mtk_rng_pm_ops, mtk_rng_runtime_suspend, - mtk_rng_runtime_resume, NULL); +static const struct dev_pm_ops mtk_rng_pm_ops = { + SET_RUNTIME_PM_OPS(mtk_rng_runtime_suspend, + mtk_rng_runtime_resume, NULL) + SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, + pm_runtime_force_resume) +}; + #define MTK_RNG_PM_OPS (&mtk_rng_pm_ops) #else /* CONFIG_PM */ #define MTK_RNG_PM_OPS NULL diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c index 8774a3b8ff95955615e81bbaf8308b8b11b36e44..8f147274f826a5b26ed817a85094830edb7607e9 100644 --- a/drivers/char/ipmi/ipmi_msghandler.c +++ b/drivers/char/ipmi/ipmi_msghandler.c @@ -203,6 +203,8 @@ struct ipmi_user { struct work_struct remove_work; }; +static struct workqueue_struct *remove_work_wq; + static struct ipmi_user *acquire_ipmi_user(struct ipmi_user *user, int *index) __acquires(user->release_barrier) { @@ -1272,7 +1274,7 @@ static void free_user(struct kref *ref) struct ipmi_user *user = container_of(ref, struct ipmi_user, refcount); /* SRCU cleanup must happen in task context. */ - schedule_work(&user->remove_work); + queue_work(remove_work_wq, &user->remove_work); } static void _ipmi_destroy_user(struct ipmi_user *user) @@ -2943,7 +2945,7 @@ cleanup_bmc_device(struct kref *ref) * with removing the device attributes while reading a device * attribute. */ - schedule_work(&bmc->remove_work); + queue_work(remove_work_wq, &bmc->remove_work); } /* @@ -4802,7 +4804,9 @@ static atomic_t recv_msg_inuse_count = ATOMIC_INIT(0); static void free_smi_msg(struct ipmi_smi_msg *msg) { atomic_dec(&smi_msg_inuse_count); - kfree(msg); + /* Try to keep as much stuff out of the panic path as possible. */ + if (!oops_in_progress) + kfree(msg); } struct ipmi_smi_msg *ipmi_alloc_smi_msg(void) @@ -4821,7 +4825,9 @@ EXPORT_SYMBOL(ipmi_alloc_smi_msg); static void free_recv_msg(struct ipmi_recv_msg *msg) { atomic_dec(&recv_msg_inuse_count); - kfree(msg); + /* Try to keep as much stuff out of the panic path as possible. */ + if (!oops_in_progress) + kfree(msg); } static struct ipmi_recv_msg *ipmi_alloc_recv_msg(void) @@ -4839,7 +4845,7 @@ static struct ipmi_recv_msg *ipmi_alloc_recv_msg(void) void ipmi_free_recv_msg(struct ipmi_recv_msg *msg) { - if (msg->user) + if (msg->user && !oops_in_progress) kref_put(&msg->user->refcount, free_user); msg->done(msg); } @@ -5155,7 +5161,16 @@ static int ipmi_init_msghandler(void) if (initialized) goto out; - init_srcu_struct(&ipmi_interfaces_srcu); + rv = init_srcu_struct(&ipmi_interfaces_srcu); + if (rv) + goto out; + + remove_work_wq = create_singlethread_workqueue("ipmi-msghandler-remove-wq"); + if (!remove_work_wq) { + pr_err("unable to create ipmi-msghandler-remove-wq workqueue"); + rv = -ENOMEM; + goto out_wq; + } timer_setup(&ipmi_timer, ipmi_timeout, 0); mod_timer(&ipmi_timer, jiffies + IPMI_TIMEOUT_JIFFIES); @@ -5164,6 +5179,9 @@ static int ipmi_init_msghandler(void) initialized = true; +out_wq: + if (rv) + cleanup_srcu_struct(&ipmi_interfaces_srcu); out: mutex_unlock(&ipmi_interfaces_mutex); return rv; @@ -5187,6 +5205,8 @@ static void __exit cleanup_ipmi(void) int count; if (initialized) { + destroy_workqueue(remove_work_wq); + atomic_notifier_chain_unregister(&panic_notifier_list, &panic_block); diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c index 0416b9c9d410541b21a1902f001f99613075f79e..3de679723648b1e732607bc433a47e7a41627eca 100644 --- a/drivers/char/ipmi/ipmi_ssif.c +++ b/drivers/char/ipmi/ipmi_ssif.c @@ -1700,6 +1700,9 @@ static int ssif_probe(struct i2c_client *client, const struct i2c_device_id *id) } } + ssif_info->client = client; + i2c_set_clientdata(client, ssif_info); + rv = ssif_check_and_remove(client, ssif_info); /* If rv is 0 and addr source is not SI_ACPI, continue probing */ if (!rv && ssif_info->addr_source == SI_ACPI) { @@ -1720,9 +1723,6 @@ static int ssif_probe(struct i2c_client *client, const struct i2c_device_id *id) ipmi_addr_src_to_str(ssif_info->addr_source), client->addr, client->adapter->name, slave_addr); - ssif_info->client = client; - i2c_set_clientdata(client, ssif_info); - /* Now check for system interface capabilities */ msg[0] = IPMI_NETFN_APP_REQUEST << 2; msg[1] = IPMI_GET_SYSTEM_INTERFACE_CAPABILITIES_CMD; @@ -1922,6 +1922,7 @@ static int ssif_probe(struct i2c_client *client, const struct i2c_device_id *id) dev_err(&ssif_info->client->dev, "Unable to start IPMI SSIF: %d\n", rv); + i2c_set_clientdata(client, NULL); kfree(ssif_info); } kfree(resp); diff --git a/drivers/char/ipmi/ipmi_watchdog.c b/drivers/char/ipmi/ipmi_watchdog.c index 6384510c48d6b167aa9978a2473a7e9fc4555381..92eda5b2f1341ba46053a67c715b6ecc67be7516 100644 --- a/drivers/char/ipmi/ipmi_watchdog.c +++ b/drivers/char/ipmi/ipmi_watchdog.c @@ -342,13 +342,17 @@ static atomic_t msg_tofree = ATOMIC_INIT(0); static DECLARE_COMPLETION(msg_wait); static void msg_free_smi(struct ipmi_smi_msg *msg) { - if (atomic_dec_and_test(&msg_tofree)) - complete(&msg_wait); + if (atomic_dec_and_test(&msg_tofree)) { + if (!oops_in_progress) + complete(&msg_wait); + } } static void msg_free_recv(struct ipmi_recv_msg *msg) { - if (atomic_dec_and_test(&msg_tofree)) - complete(&msg_wait); + if (atomic_dec_and_test(&msg_tofree)) { + if (!oops_in_progress) + complete(&msg_wait); + } } static struct ipmi_smi_msg smi_msg = { .done = msg_free_smi @@ -434,8 +438,10 @@ static int _ipmi_set_timeout(int do_heartbeat) rv = __ipmi_set_timeout(&smi_msg, &recv_msg, &send_heartbeat_now); - if (rv) + if (rv) { + atomic_set(&msg_tofree, 0); return rv; + } wait_for_completion(&msg_wait); @@ -580,6 +586,7 @@ restart: &recv_msg, 1); if (rv) { + atomic_set(&msg_tofree, 0); pr_warn("heartbeat send failure: %d\n", rv); return rv; } diff --git a/drivers/char/mwave/3780i.h b/drivers/char/mwave/3780i.h index 9ccb6b270b071cfbf03b250d1af442f28d75bafe..95164246afd1a313d3fb3a2726c6abcff35c77b0 100644 --- a/drivers/char/mwave/3780i.h +++ b/drivers/char/mwave/3780i.h @@ -68,7 +68,7 @@ typedef struct { unsigned char ClockControl:1; /* RW: Clock control: 0=normal, 1=stop 3780i clocks */ unsigned char SoftReset:1; /* RW: Soft reset 0=normal, 1=soft reset active */ unsigned char ConfigMode:1; /* RW: Configuration mode, 0=normal, 1=config mode */ - unsigned char Reserved:5; /* 0: Reserved */ + unsigned short Reserved:13; /* 0: Reserved */ } DSP_ISA_SLAVE_CONTROL; diff --git a/drivers/char/random.c b/drivers/char/random.c index 79164fce1bf900b823838646d1ead601af7d1f2d..e3743bb464f14e9519d75e300c430697b6252d55 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -461,6 +461,7 @@ static struct crng_state primary_crng = { * its value (from 0->1->2). */ static int crng_init = 0; +static bool crng_need_final_init = false; #define crng_ready() (likely(crng_init > 1)) static int crng_init_cnt = 0; static unsigned long crng_global_init_time = 0; @@ -838,6 +839,36 @@ static void __init crng_initialize_primary(struct crng_state *crng) crng->init_time = jiffies - CRNG_RESEED_INTERVAL - 1; } +static void crng_finalize_init(struct crng_state *crng) +{ + if (crng != &primary_crng || crng_init >= 2) + return; + if (!system_wq) { + /* We can't call numa_crng_init until we have workqueues, + * so mark this for processing later. */ + crng_need_final_init = true; + return; + } + + invalidate_batched_entropy(); + numa_crng_init(); + crng_init = 2; + process_random_ready_list(); + wake_up_interruptible(&crng_init_wait); + kill_fasync(&fasync, SIGIO, POLL_IN); + pr_notice("crng init done\n"); + if (unseeded_warning.missed) { + pr_notice("%d get_random_xx warning(s) missed due to ratelimiting\n", + unseeded_warning.missed); + unseeded_warning.missed = 0; + } + if (urandom_warning.missed) { + pr_notice("%d urandom warning(s) missed due to ratelimiting\n", + urandom_warning.missed); + urandom_warning.missed = 0; + } +} + #ifdef CONFIG_NUMA static void do_numa_crng_init(struct work_struct *work) { @@ -853,8 +884,8 @@ static void do_numa_crng_init(struct work_struct *work) crng_initialize_secondary(crng); pool[i] = crng; } - mb(); - if (cmpxchg(&crng_node_pool, NULL, pool)) { + /* pairs with READ_ONCE() in select_crng() */ + if (cmpxchg_release(&crng_node_pool, NULL, pool) != NULL) { for_each_node(i) kfree(pool[i]); kfree(pool); @@ -867,18 +898,38 @@ static void numa_crng_init(void) { schedule_work(&numa_crng_init_work); } + +static struct crng_state *select_crng(void) +{ + struct crng_state **pool; + int nid = numa_node_id(); + + /* pairs with cmpxchg_release() in do_numa_crng_init() */ + pool = READ_ONCE(crng_node_pool); + if (pool && pool[nid]) + return pool[nid]; + + return &primary_crng; +} #else static void numa_crng_init(void) {} + +static struct crng_state *select_crng(void) +{ + return &primary_crng; +} #endif /* * crng_fast_load() can be called by code in the interrupt service - * path. So we can't afford to dilly-dally. + * path. So we can't afford to dilly-dally. Returns the number of + * bytes processed from cp. */ -static int crng_fast_load(const char *cp, size_t len) +static size_t crng_fast_load(const char *cp, size_t len) { unsigned long flags; char *p; + size_t ret = 0; if (!spin_trylock_irqsave(&primary_crng.lock, flags)) return 0; @@ -889,7 +940,7 @@ static int crng_fast_load(const char *cp, size_t len) p = (unsigned char *) &primary_crng.state[4]; while (len > 0 && crng_init_cnt < CRNG_INIT_CNT_THRESH) { p[crng_init_cnt % CHACHA_KEY_SIZE] ^= *cp; - cp++; crng_init_cnt++; len--; + cp++; crng_init_cnt++; len--; ret++; } spin_unlock_irqrestore(&primary_crng.lock, flags); if (crng_init_cnt >= CRNG_INIT_CNT_THRESH) { @@ -897,7 +948,7 @@ static int crng_fast_load(const char *cp, size_t len) crng_init = 1; pr_notice("fast init done\n"); } - return 1; + return ret; } /* @@ -972,38 +1023,23 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r) crng->state[i+4] ^= buf.key[i] ^ rv; } memzero_explicit(&buf, sizeof(buf)); - crng->init_time = jiffies; + WRITE_ONCE(crng->init_time, jiffies); spin_unlock_irqrestore(&crng->lock, flags); - if (crng == &primary_crng && crng_init < 2) { - invalidate_batched_entropy(); - numa_crng_init(); - crng_init = 2; - process_random_ready_list(); - wake_up_interruptible(&crng_init_wait); - kill_fasync(&fasync, SIGIO, POLL_IN); - pr_notice("crng init done\n"); - if (unseeded_warning.missed) { - pr_notice("%d get_random_xx warning(s) missed due to ratelimiting\n", - unseeded_warning.missed); - unseeded_warning.missed = 0; - } - if (urandom_warning.missed) { - pr_notice("%d urandom warning(s) missed due to ratelimiting\n", - urandom_warning.missed); - urandom_warning.missed = 0; - } - } + crng_finalize_init(crng); } static void _extract_crng(struct crng_state *crng, __u8 out[CHACHA_BLOCK_SIZE]) { - unsigned long v, flags; - - if (crng_ready() && - (time_after(crng_global_init_time, crng->init_time) || - time_after(jiffies, crng->init_time + CRNG_RESEED_INTERVAL))) - crng_reseed(crng, crng == &primary_crng ? &input_pool : NULL); + unsigned long v, flags, init_time; + + if (crng_ready()) { + init_time = READ_ONCE(crng->init_time); + if (time_after(READ_ONCE(crng_global_init_time), init_time) || + time_after(jiffies, init_time + CRNG_RESEED_INTERVAL)) + crng_reseed(crng, crng == &primary_crng ? + &input_pool : NULL); + } spin_lock_irqsave(&crng->lock, flags); if (arch_get_random_long(&v)) crng->state[14] ^= v; @@ -1015,15 +1051,7 @@ static void _extract_crng(struct crng_state *crng, static void extract_crng(__u8 out[CHACHA_BLOCK_SIZE]) { - struct crng_state *crng = NULL; - -#ifdef CONFIG_NUMA - if (crng_node_pool) - crng = crng_node_pool[numa_node_id()]; - if (crng == NULL) -#endif - crng = &primary_crng; - _extract_crng(crng, out); + _extract_crng(select_crng(), out); } /* @@ -1052,15 +1080,7 @@ static void _crng_backtrack_protect(struct crng_state *crng, static void crng_backtrack_protect(__u8 tmp[CHACHA_BLOCK_SIZE], int used) { - struct crng_state *crng = NULL; - -#ifdef CONFIG_NUMA - if (crng_node_pool) - crng = crng_node_pool[numa_node_id()]; - if (crng == NULL) -#endif - crng = &primary_crng; - _crng_backtrack_protect(crng, tmp, used); + _crng_backtrack_protect(select_crng(), tmp, used); } static ssize_t extract_crng_user(void __user *buf, size_t nbytes) @@ -1279,7 +1299,7 @@ void add_interrupt_randomness(int irq, int irq_flags) if (unlikely(crng_init == 0)) { if ((fast_pool->count >= 64) && crng_fast_load((char *) fast_pool->pool, - sizeof(fast_pool->pool))) { + sizeof(fast_pool->pool)) > 0) { fast_pool->count = 0; fast_pool->last = now; } @@ -1786,6 +1806,8 @@ static void __init init_std_data(struct entropy_store *r) int __init rand_initialize(void) { init_std_data(&input_pool); + if (crng_need_final_init) + crng_finalize_init(&primary_crng); crng_initialize_primary(&primary_crng); crng_global_init_time = jiffies; if (ratelimit_disable) { @@ -1952,7 +1974,10 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg) */ if (!capable(CAP_SYS_ADMIN)) return -EPERM; - input_pool.entropy_count = 0; + if (xchg(&input_pool.entropy_count, 0) && random_write_wakeup_bits) { + wake_up_interruptible(&random_write_wait); + kill_fasync(&fasync, SIGIO, POLL_OUT); + } return 0; case RNDRESEEDCRNG: if (!capable(CAP_SYS_ADMIN)) @@ -1960,7 +1985,7 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg) if (crng_init < 2) return -ENODATA; crng_reseed(&primary_crng, &input_pool); - crng_global_init_time = jiffies - 1; + WRITE_ONCE(crng_global_init_time, jiffies - 1); return 0; default: return -EINVAL; @@ -2286,15 +2311,19 @@ void add_hwgenerator_randomness(const char *buffer, size_t count, struct entropy_store *poolp = &input_pool; if (unlikely(crng_init == 0)) { - crng_fast_load(buffer, count); - return; + size_t ret = crng_fast_load(buffer, count); + count -= ret; + buffer += ret; + if (!count || crng_init == 0) + return; } /* Suspend writing if we're above the trickle threshold. * We'll be woken up again once below random_write_wakeup_thresh, * or when the calling thread is about to terminate. */ - wait_event_interruptible(random_write_wait, kthread_should_stop() || + wait_event_interruptible(random_write_wait, + !system_wq || kthread_should_stop() || ENTROPY_BITS(&input_pool) <= random_write_wakeup_bits); mix_pool_bytes(poolp, buffer, count); credit_entropy_bits(poolp, entropy); diff --git a/drivers/char/tpm/tpm2-space.c b/drivers/char/tpm/tpm2-space.c index 784b8b3cb903f5e098d1eaac235fec69f5a5a4d6..97e916856cf3e25445c94256dbe94569f5e84524 100644 --- a/drivers/char/tpm/tpm2-space.c +++ b/drivers/char/tpm/tpm2-space.c @@ -455,6 +455,9 @@ static int tpm2_map_response_body(struct tpm_chip *chip, u32 cc, u8 *rsp, if (be32_to_cpu(data->capability) != TPM2_CAP_HANDLES) return 0; + if (be32_to_cpu(data->count) > (UINT_MAX - TPM_HEADER_SIZE - 9) / 4) + return -EFAULT; + if (len != TPM_HEADER_SIZE + 9 + 4 * be32_to_cpu(data->count)) return -EFAULT; diff --git a/drivers/char/tpm/tpm_tis_core.c b/drivers/char/tpm/tpm_tis_core.c index 69579efb247b3b7508df00d5796cb9d429db3889..dc56b976d8162cff62ad6e6d3af5ba60a239606a 100644 --- a/drivers/char/tpm/tpm_tis_core.c +++ b/drivers/char/tpm/tpm_tis_core.c @@ -48,6 +48,7 @@ static int wait_for_tpm_stat(struct tpm_chip *chip, u8 mask, unsigned long timeout, wait_queue_head_t *queue, bool check_cancel) { + struct tpm_tis_data *priv = dev_get_drvdata(&chip->dev); unsigned long stop; long rc; u8 status; @@ -80,8 +81,8 @@ again: } } else { do { - usleep_range(TPM_TIMEOUT_USECS_MIN, - TPM_TIMEOUT_USECS_MAX); + usleep_range(priv->timeout_min, + priv->timeout_max); status = chip->ops->status(chip); if ((status & mask) == mask) return 0; @@ -945,9 +946,24 @@ int tpm_tis_core_init(struct device *dev, struct tpm_tis_data *priv, int irq, chip->timeout_b = msecs_to_jiffies(TIS_TIMEOUT_B_MAX); chip->timeout_c = msecs_to_jiffies(TIS_TIMEOUT_C_MAX); chip->timeout_d = msecs_to_jiffies(TIS_TIMEOUT_D_MAX); + priv->timeout_min = TPM_TIMEOUT_USECS_MIN; + priv->timeout_max = TPM_TIMEOUT_USECS_MAX; priv->phy_ops = phy_ops; + dev_set_drvdata(&chip->dev, priv); + rc = tpm_tis_read32(priv, TPM_DID_VID(0), &vendor); + if (rc < 0) + return rc; + + priv->manufacturer_id = vendor; + + if (priv->manufacturer_id == TPM_VID_ATML && + !(chip->flags & TPM_CHIP_FLAG_TPM2)) { + priv->timeout_min = TIS_TIMEOUT_MIN_ATML; + priv->timeout_max = TIS_TIMEOUT_MAX_ATML; + } + if (is_bsw()) { priv->ilb_base_addr = ioremap(INTEL_LEGACY_BLK_BASE_ADDR, ILB_REMAP_SIZE); @@ -978,7 +994,15 @@ int tpm_tis_core_init(struct device *dev, struct tpm_tis_data *priv, int irq, intmask |= TPM_INTF_CMD_READY_INT | TPM_INTF_LOCALITY_CHANGE_INT | TPM_INTF_DATA_AVAIL_INT | TPM_INTF_STS_VALID_INT; intmask &= ~TPM_GLOBAL_INT_ENABLE; + + rc = request_locality(chip, 0); + if (rc < 0) { + rc = -ENODEV; + goto out_err; + } + tpm_tis_write32(priv, TPM_INT_ENABLE(priv->locality), intmask); + release_locality(chip, 0); rc = tpm_chip_start(chip); if (rc) @@ -988,12 +1012,6 @@ int tpm_tis_core_init(struct device *dev, struct tpm_tis_data *priv, int irq, if (rc) goto out_err; - rc = tpm_tis_read32(priv, TPM_DID_VID(0), &vendor); - if (rc < 0) - goto out_err; - - priv->manufacturer_id = vendor; - rc = tpm_tis_read8(priv, TPM_RID(0), &rid); if (rc < 0) goto out_err; diff --git a/drivers/char/tpm/tpm_tis_core.h b/drivers/char/tpm/tpm_tis_core.h index b2a3c6c72882d48feada725f9e3bcda3ca53010e..3be24f221e32af812e276acac08f88c29e60eb15 100644 --- a/drivers/char/tpm/tpm_tis_core.h +++ b/drivers/char/tpm/tpm_tis_core.h @@ -54,6 +54,8 @@ enum tis_defaults { TIS_MEM_LEN = 0x5000, TIS_SHORT_TIMEOUT = 750, /* ms */ TIS_LONG_TIMEOUT = 2000, /* 2 sec */ + TIS_TIMEOUT_MIN_ATML = 14700, /* usecs */ + TIS_TIMEOUT_MAX_ATML = 15000, /* usecs */ }; /* Some timeout values are needed before it is known whether the chip is @@ -98,6 +100,8 @@ struct tpm_tis_data { wait_queue_head_t read_queue; const struct tpm_tis_phy_ops *phy_ops; unsigned short rng_quality; + unsigned int timeout_min; /* usecs */ + unsigned int timeout_max; /* usecs */ }; struct tpm_tis_phy_ops { diff --git a/drivers/char/tpm/tpm_tis_spi_main.c b/drivers/char/tpm/tpm_tis_spi_main.c index de4209003a448ea0e141538d51f8b67bf2aad0a9..d64bea3298a2907c571e3f3763600769cc9e2f39 100644 --- a/drivers/char/tpm/tpm_tis_spi_main.c +++ b/drivers/char/tpm/tpm_tis_spi_main.c @@ -263,6 +263,7 @@ static const struct spi_device_id tpm_tis_spi_id[] = { { "st33htpm-spi", (unsigned long)tpm_tis_spi_probe }, { "slb9670", (unsigned long)tpm_tis_spi_probe }, { "tpm_tis_spi", (unsigned long)tpm_tis_spi_probe }, + { "tpm_tis-spi", (unsigned long)tpm_tis_spi_probe }, { "cr50", (unsigned long)cr50_spi_probe }, {} }; diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index 673522874cec43e6e502b83c9795978671c0ef96..a5e9d1a8a53c2122f33193e6c101bd76677ceacb 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -28,6 +28,7 @@ #include "../tty/hvc/hvc_console.h" #define is_rproc_enabled IS_ENABLED(CONFIG_REMOTEPROC) +#define VIRTCONS_MAX_PORTS 0x8000 /* * This is a global struct for storing common data for all the devices @@ -2039,6 +2040,14 @@ static int virtcons_probe(struct virtio_device *vdev) virtio_cread_feature(vdev, VIRTIO_CONSOLE_F_MULTIPORT, struct virtio_console_config, max_nr_ports, &portdev->max_nr_ports) == 0) { + if (portdev->max_nr_ports == 0 || + portdev->max_nr_ports > VIRTCONS_MAX_PORTS) { + dev_err(&vdev->dev, + "Invalidate max_nr_ports %d", + portdev->max_nr_ports); + err = -EINVAL; + goto free; + } multiport = true; } diff --git a/drivers/clk/at91/clk-sam9x60-pll.c b/drivers/clk/at91/clk-sam9x60-pll.c index 78f458a7b2ef48e555ee4b22a40777a4c27f4d6b..5a9daa3643a72c5f1751512c3eccab7f4a4b4c8a 100644 --- a/drivers/clk/at91/clk-sam9x60-pll.c +++ b/drivers/clk/at91/clk-sam9x60-pll.c @@ -71,8 +71,8 @@ static unsigned long sam9x60_frac_pll_recalc_rate(struct clk_hw *hw, struct sam9x60_pll_core *core = to_sam9x60_pll_core(hw); struct sam9x60_frac *frac = to_sam9x60_frac(core); - return (parent_rate * (frac->mul + 1) + - ((u64)parent_rate * frac->frac >> 22)); + return parent_rate * (frac->mul + 1) + + DIV_ROUND_CLOSEST_ULL((u64)parent_rate * frac->frac, (1 << 22)); } static int sam9x60_frac_pll_prepare(struct clk_hw *hw) diff --git a/drivers/clk/at91/pmc.c b/drivers/clk/at91/pmc.c index 20ee9dccee78792678a6839b51d5aec95525d7f8..b40035b011d0aeb99c3752a5cf9cabfce8560949 100644 --- a/drivers/clk/at91/pmc.c +++ b/drivers/clk/at91/pmc.c @@ -267,6 +267,11 @@ static int __init pmc_register_ops(void) if (!np) return -ENODEV; + if (!of_device_is_available(np)) { + of_node_put(np); + return -ENODEV; + } + pmcreg = device_node_to_regmap(np); of_node_put(np); if (IS_ERR(pmcreg)) diff --git a/drivers/clk/bcm/clk-bcm2835.c b/drivers/clk/bcm/clk-bcm2835.c index 1ac803e14fa3e17bcc8775dcb12aed891e7ea629..178886823b90ca2a8449c3ae00f992607030c1f7 100644 --- a/drivers/clk/bcm/clk-bcm2835.c +++ b/drivers/clk/bcm/clk-bcm2835.c @@ -933,8 +933,7 @@ static int bcm2835_clock_is_on(struct clk_hw *hw) static u32 bcm2835_clock_choose_div(struct clk_hw *hw, unsigned long rate, - unsigned long parent_rate, - bool round_up) + unsigned long parent_rate) { struct bcm2835_clock *clock = bcm2835_clock_from_hw(hw); const struct bcm2835_clock_data *data = clock->data; @@ -946,10 +945,6 @@ static u32 bcm2835_clock_choose_div(struct clk_hw *hw, rem = do_div(temp, rate); div = temp; - - /* Round up and mask off the unused bits */ - if (round_up && ((div & unused_frac_mask) != 0 || rem != 0)) - div += unused_frac_mask + 1; div &= ~unused_frac_mask; /* different clamping limits apply for a mash clock */ @@ -1080,7 +1075,7 @@ static int bcm2835_clock_set_rate(struct clk_hw *hw, struct bcm2835_clock *clock = bcm2835_clock_from_hw(hw); struct bcm2835_cprman *cprman = clock->cprman; const struct bcm2835_clock_data *data = clock->data; - u32 div = bcm2835_clock_choose_div(hw, rate, parent_rate, false); + u32 div = bcm2835_clock_choose_div(hw, rate, parent_rate); u32 ctl; spin_lock(&cprman->regs_lock); @@ -1131,7 +1126,7 @@ static unsigned long bcm2835_clock_choose_div_and_prate(struct clk_hw *hw, if (!(BIT(parent_idx) & data->set_rate_parent)) { *prate = clk_hw_get_rate(parent); - *div = bcm2835_clock_choose_div(hw, rate, *prate, true); + *div = bcm2835_clock_choose_div(hw, rate, *prate); *avgrate = bcm2835_clock_rate_from_divisor(clock, *prate, *div); @@ -1217,7 +1212,7 @@ static int bcm2835_clock_determine_rate(struct clk_hw *hw, rate = bcm2835_clock_choose_div_and_prate(hw, i, req->rate, &div, &prate, &avgrate); - if (rate > best_rate && rate <= req->rate) { + if (abs(req->rate - rate) < abs(req->rate - best_rate)) { best_parent = parent; best_prate = prate; best_rate = rate; diff --git a/drivers/clk/clk-ast2600.c b/drivers/clk/clk-ast2600.c index bc3be5f3eae15513bdf66e5ba559fcce54ba83fa..24dab2312bc6faf572e7a7b63c187de602e6df18 100644 --- a/drivers/clk/clk-ast2600.c +++ b/drivers/clk/clk-ast2600.c @@ -51,6 +51,8 @@ static DEFINE_SPINLOCK(aspeed_g6_clk_lock); static struct clk_hw_onecell_data *aspeed_g6_clk_data; static void __iomem *scu_g6_base; +/* AST2600 revision: A0, A1, A2, etc */ +static u8 soc_rev; /* * Clocks marked with CLK_IS_CRITICAL: @@ -191,9 +193,8 @@ static struct clk_hw *ast2600_calc_pll(const char *name, u32 val) static struct clk_hw *ast2600_calc_apll(const char *name, u32 val) { unsigned int mult, div; - u32 chip_id = readl(scu_g6_base + ASPEED_G6_SILICON_REV); - if (((chip_id & CHIP_REVISION_ID) >> 16) >= 2) { + if (soc_rev >= 2) { if (val & BIT(24)) { /* Pass through mode */ mult = div = 1; @@ -707,7 +708,7 @@ static const u32 ast2600_a1_axi_ahb200_tbl[] = { static void __init aspeed_g6_cc(struct regmap *map) { struct clk_hw *hw; - u32 val, div, divbits, chip_id, axi_div, ahb_div; + u32 val, div, divbits, axi_div, ahb_div; clk_hw_register_fixed_rate(NULL, "clkin", NULL, 0, 25000000); @@ -738,8 +739,7 @@ static void __init aspeed_g6_cc(struct regmap *map) axi_div = 2; divbits = (val >> 11) & 0x3; - regmap_read(map, ASPEED_G6_SILICON_REV, &chip_id); - if (chip_id & BIT(16)) { + if (soc_rev >= 1) { if (!divbits) { ahb_div = ast2600_a1_axi_ahb200_tbl[(val >> 8) & 0x3]; if (val & BIT(16)) @@ -784,6 +784,8 @@ static void __init aspeed_g6_cc_init(struct device_node *np) if (!scu_g6_base) return; + soc_rev = (readl(scu_g6_base + ASPEED_G6_SILICON_REV) & CHIP_REVISION_ID) >> 16; + aspeed_g6_clk_data = kzalloc(struct_size(aspeed_g6_clk_data, hws, ASPEED_G6_NUM_CLKS), GFP_KERNEL); if (!aspeed_g6_clk_data) diff --git a/drivers/clk/clk-bm1880.c b/drivers/clk/clk-bm1880.c index e6d6599d310a1da8b2011151c81d13efb8d42cb7..fad78a22218e8d7cb12a05bf49ed60bbf2b91072 100644 --- a/drivers/clk/clk-bm1880.c +++ b/drivers/clk/clk-bm1880.c @@ -522,14 +522,6 @@ static struct clk_hw *bm1880_clk_register_pll(struct bm1880_pll_hw_clock *pll_cl return hw; } -static void bm1880_clk_unregister_pll(struct clk_hw *hw) -{ - struct bm1880_pll_hw_clock *pll_hw = to_bm1880_pll_clk(hw); - - clk_hw_unregister(hw); - kfree(pll_hw); -} - static int bm1880_clk_register_plls(struct bm1880_pll_hw_clock *clks, int num_clks, struct bm1880_clock_data *data) @@ -555,7 +547,7 @@ static int bm1880_clk_register_plls(struct bm1880_pll_hw_clock *clks, err_clk: while (i--) - bm1880_clk_unregister_pll(data->hw_data.hws[clks[i].pll.id]); + clk_hw_unregister(data->hw_data.hws[clks[i].pll.id]); return PTR_ERR(hw); } @@ -695,14 +687,6 @@ static struct clk_hw *bm1880_clk_register_div(struct bm1880_div_hw_clock *div_cl return hw; } -static void bm1880_clk_unregister_div(struct clk_hw *hw) -{ - struct bm1880_div_hw_clock *div_hw = to_bm1880_div_clk(hw); - - clk_hw_unregister(hw); - kfree(div_hw); -} - static int bm1880_clk_register_divs(struct bm1880_div_hw_clock *clks, int num_clks, struct bm1880_clock_data *data) @@ -729,7 +713,7 @@ static int bm1880_clk_register_divs(struct bm1880_div_hw_clock *clks, err_clk: while (i--) - bm1880_clk_unregister_div(data->hw_data.hws[clks[i].div.id]); + clk_hw_unregister(data->hw_data.hws[clks[i].div.id]); return PTR_ERR(hw); } diff --git a/drivers/clk/clk-si5341.c b/drivers/clk/clk-si5341.c index eb22f4fdbc6b42c2f3384bafdc33ef93c8c8375e..772b48ad0cd7809c8dbede6a2fd30cc410e2a753 100644 --- a/drivers/clk/clk-si5341.c +++ b/drivers/clk/clk-si5341.c @@ -1576,7 +1576,7 @@ static int si5341_probe(struct i2c_client *client, clk_prepare(data->clk[i].hw.clk); } - err = of_clk_add_hw_provider(client->dev.of_node, of_clk_si5341_get, + err = devm_of_clk_add_hw_provider(&client->dev, of_clk_si5341_get, data); if (err) { dev_err(&client->dev, "unable to add clk provider\n"); diff --git a/drivers/clk/clk-stm32f4.c b/drivers/clk/clk-stm32f4.c index 5c75e3d906c209767b5716992b8f6b718cd3d098..682a18b392f0843ae4aeab2b56caaa7a62153add 100644 --- a/drivers/clk/clk-stm32f4.c +++ b/drivers/clk/clk-stm32f4.c @@ -129,7 +129,6 @@ static const struct stm32f4_gate_data stm32f429_gates[] __initconst = { { STM32F4_RCC_APB2ENR, 20, "spi5", "apb2_div" }, { STM32F4_RCC_APB2ENR, 21, "spi6", "apb2_div" }, { STM32F4_RCC_APB2ENR, 22, "sai1", "apb2_div" }, - { STM32F4_RCC_APB2ENR, 26, "ltdc", "apb2_div" }, }; static const struct stm32f4_gate_data stm32f469_gates[] __initconst = { @@ -211,7 +210,6 @@ static const struct stm32f4_gate_data stm32f469_gates[] __initconst = { { STM32F4_RCC_APB2ENR, 20, "spi5", "apb2_div" }, { STM32F4_RCC_APB2ENR, 21, "spi6", "apb2_div" }, { STM32F4_RCC_APB2ENR, 22, "sai1", "apb2_div" }, - { STM32F4_RCC_APB2ENR, 26, "ltdc", "apb2_div" }, }; static const struct stm32f4_gate_data stm32f746_gates[] __initconst = { @@ -286,7 +284,6 @@ static const struct stm32f4_gate_data stm32f746_gates[] __initconst = { { STM32F4_RCC_APB2ENR, 21, "spi6", "apb2_div" }, { STM32F4_RCC_APB2ENR, 22, "sai1", "apb2_div" }, { STM32F4_RCC_APB2ENR, 23, "sai2", "apb2_div" }, - { STM32F4_RCC_APB2ENR, 26, "ltdc", "apb2_div" }, }; static const struct stm32f4_gate_data stm32f769_gates[] __initconst = { @@ -364,7 +361,6 @@ static const struct stm32f4_gate_data stm32f769_gates[] __initconst = { { STM32F4_RCC_APB2ENR, 21, "spi6", "apb2_div" }, { STM32F4_RCC_APB2ENR, 22, "sai1", "apb2_div" }, { STM32F4_RCC_APB2ENR, 23, "sai2", "apb2_div" }, - { STM32F4_RCC_APB2ENR, 26, "ltdc", "apb2_div" }, { STM32F4_RCC_APB2ENR, 30, "mdio", "apb2_div" }, }; diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c index f047d33d765c19a0096be80ef83106ae7747e2bd..411ca7a1c0882da8982284b64109de3dcca9677c 100644 --- a/drivers/clk/clk.c +++ b/drivers/clk/clk.c @@ -3469,6 +3469,14 @@ static int __clk_core_init(struct clk_core *core) clk_prepare_lock(); + /* + * Set hw->core after grabbing the prepare_lock to synchronize with + * callers of clk_core_fill_parent_index() where we treat hw->core + * being NULL as the clk not being registered yet. This is crucial so + * that clks aren't parented until their parent is fully registered. + */ + core->hw->core = core; + ret = clk_pm_runtime_get(core); if (ret) goto unlock; @@ -3640,8 +3648,10 @@ static int __clk_core_init(struct clk_core *core) out: clk_pm_runtime_put(core); unlock: - if (ret) + if (ret) { hlist_del_init(&core->child_node); + core->hw->core = NULL; + } clk_prepare_unlock(); @@ -3905,7 +3915,6 @@ __clk_register(struct device *dev, struct device_node *np, struct clk_hw *hw) core->num_parents = init->num_parents; core->min_rate = 0; core->max_rate = ULONG_MAX; - hw->core = core; ret = clk_core_populate_parent_map(core, init); if (ret) @@ -3923,7 +3932,7 @@ __clk_register(struct device *dev, struct device_node *np, struct clk_hw *hw) goto fail_create_clk; } - clk_core_link_consumer(hw->core, hw->clk); + clk_core_link_consumer(core, hw->clk); ret = __clk_core_init(core); if (!ret) diff --git a/drivers/clk/imx/clk-imx6ul.c b/drivers/clk/imx/clk-imx6ul.c index 5dbb6a9377324d6b3d9b68cbe277012bdda286bb..206e4c43f68f80bb9a00e10c45a74bd8c3129ade 100644 --- a/drivers/clk/imx/clk-imx6ul.c +++ b/drivers/clk/imx/clk-imx6ul.c @@ -161,7 +161,6 @@ static void __init imx6ul_clocks_init(struct device_node *ccm_node) hws[IMX6UL_PLL5_BYPASS] = imx_clk_hw_mux_flags("pll5_bypass", base + 0xa0, 16, 1, pll5_bypass_sels, ARRAY_SIZE(pll5_bypass_sels), CLK_SET_RATE_PARENT); hws[IMX6UL_PLL6_BYPASS] = imx_clk_hw_mux_flags("pll6_bypass", base + 0xe0, 16, 1, pll6_bypass_sels, ARRAY_SIZE(pll6_bypass_sels), CLK_SET_RATE_PARENT); hws[IMX6UL_PLL7_BYPASS] = imx_clk_hw_mux_flags("pll7_bypass", base + 0x20, 16, 1, pll7_bypass_sels, ARRAY_SIZE(pll7_bypass_sels), CLK_SET_RATE_PARENT); - hws[IMX6UL_CLK_CSI_SEL] = imx_clk_hw_mux_flags("csi_sel", base + 0x3c, 9, 2, csi_sels, ARRAY_SIZE(csi_sels), CLK_SET_RATE_PARENT); /* Do not bypass PLLs initially */ clk_set_parent(hws[IMX6UL_PLL1_BYPASS]->clk, hws[IMX6UL_CLK_PLL1]->clk); @@ -270,6 +269,7 @@ static void __init imx6ul_clocks_init(struct device_node *ccm_node) hws[IMX6UL_CLK_ECSPI_SEL] = imx_clk_hw_mux("ecspi_sel", base + 0x38, 18, 1, ecspi_sels, ARRAY_SIZE(ecspi_sels)); hws[IMX6UL_CLK_LCDIF_PRE_SEL] = imx_clk_hw_mux_flags("lcdif_pre_sel", base + 0x38, 15, 3, lcdif_pre_sels, ARRAY_SIZE(lcdif_pre_sels), CLK_SET_RATE_PARENT); hws[IMX6UL_CLK_LCDIF_SEL] = imx_clk_hw_mux("lcdif_sel", base + 0x38, 9, 3, lcdif_sels, ARRAY_SIZE(lcdif_sels)); + hws[IMX6UL_CLK_CSI_SEL] = imx_clk_hw_mux("csi_sel", base + 0x3c, 9, 2, csi_sels, ARRAY_SIZE(csi_sels)); hws[IMX6UL_CLK_LDB_DI0_DIV_SEL] = imx_clk_hw_mux("ldb_di0", base + 0x20, 10, 1, ldb_di0_div_sels, ARRAY_SIZE(ldb_di0_div_sels)); hws[IMX6UL_CLK_LDB_DI1_DIV_SEL] = imx_clk_hw_mux("ldb_di1", base + 0x20, 11, 1, ldb_di1_div_sels, ARRAY_SIZE(ldb_di1_div_sels)); diff --git a/drivers/clk/imx/clk-imx8mn.c b/drivers/clk/imx/clk-imx8mn.c index 33a7ddc23cd24e7e643faf5b93b0dfabd98113a9..db122d94db583364a321094774ee4b055342d626 100644 --- a/drivers/clk/imx/clk-imx8mn.c +++ b/drivers/clk/imx/clk-imx8mn.c @@ -274,9 +274,9 @@ static const char * const imx8mn_pdm_sels[] = {"osc_24m", "sys_pll2_100m", "audi static const char * const imx8mn_dram_core_sels[] = {"dram_pll_out", "dram_alt_root", }; -static const char * const imx8mn_clko1_sels[] = {"osc_24m", "sys_pll1_800m", "osc_27m", - "sys_pll1_200m", "audio_pll2_out", "vpu_pll", - "sys_pll1_80m", }; +static const char * const imx8mn_clko1_sels[] = {"osc_24m", "sys_pll1_800m", "dummy", + "sys_pll1_200m", "audio_pll2_out", "sys_pll2_500m", + "dummy", "sys_pll1_80m", }; static const char * const imx8mn_clko2_sels[] = {"osc_24m", "sys_pll2_200m", "sys_pll1_400m", "sys_pll2_166m", "sys_pll3_out", "audio_pll1_out", "video_pll1_out", "osc_32k", }; diff --git a/drivers/clk/imx/clk-imx8qxp-lpcg.c b/drivers/clk/imx/clk-imx8qxp-lpcg.c index e947a70054acd8c47ce73d87ad0c701033852f21..522c03a12b693063aa3d79e8690442a5c5d11f01 100644 --- a/drivers/clk/imx/clk-imx8qxp-lpcg.c +++ b/drivers/clk/imx/clk-imx8qxp-lpcg.c @@ -231,7 +231,7 @@ static struct platform_driver imx8qxp_lpcg_clk_driver = { .probe = imx8qxp_lpcg_clk_probe, }; -builtin_platform_driver(imx8qxp_lpcg_clk_driver); +module_platform_driver(imx8qxp_lpcg_clk_driver); MODULE_AUTHOR("Aisheng Dong "); MODULE_DESCRIPTION("NXP i.MX8QXP LPCG clock driver"); diff --git a/drivers/clk/imx/clk-imx8qxp.c b/drivers/clk/imx/clk-imx8qxp.c index d650ca33cdc80c31a787ee3cb12e02fcf5b12cbb..8c14e0bbe1a24a6319101aec99631275ad554a11 100644 --- a/drivers/clk/imx/clk-imx8qxp.c +++ b/drivers/clk/imx/clk-imx8qxp.c @@ -151,7 +151,7 @@ static struct platform_driver imx8qxp_clk_driver = { }, .probe = imx8qxp_clk_probe, }; -builtin_platform_driver(imx8qxp_clk_driver); +module_platform_driver(imx8qxp_clk_driver); MODULE_AUTHOR("Aisheng Dong "); MODULE_DESCRIPTION("NXP i.MX8QXP clock driver"); diff --git a/drivers/clk/ingenic/cgu.c b/drivers/clk/ingenic/cgu.c index c8e9cb6c8e39cc8f27f99990e8053f764d07869e..2b9bb7d55efc8f9a3450cebbb23301083646313f 100644 --- a/drivers/clk/ingenic/cgu.c +++ b/drivers/clk/ingenic/cgu.c @@ -425,15 +425,15 @@ ingenic_clk_calc_div(const struct ingenic_cgu_clk_info *clk_info, } /* Impose hardware constraints */ - div = min_t(unsigned, div, 1 << clk_info->div.bits); - div = max_t(unsigned, div, 1); + div = clamp_t(unsigned int, div, clk_info->div.div, + clk_info->div.div << clk_info->div.bits); /* * If the divider value itself must be divided before being written to * the divider register, we must ensure we don't have any bits set that * would be lost as a result of doing so. */ - div /= clk_info->div.div; + div = DIV_ROUND_UP(div, clk_info->div.div); div *= clk_info->div.div; return div; diff --git a/drivers/clk/ingenic/jz4725b-cgu.c b/drivers/clk/ingenic/jz4725b-cgu.c index 8c38e72d14a79919f51569ee226f4b94558f6722..786e361a4a6a456ef090fa770a27704f4ba5c783 100644 --- a/drivers/clk/ingenic/jz4725b-cgu.c +++ b/drivers/clk/ingenic/jz4725b-cgu.c @@ -139,11 +139,10 @@ static const struct ingenic_cgu_clk_info jz4725b_cgu_clocks[] = { }, [JZ4725B_CLK_I2S] = { - "i2s", CGU_CLK_MUX | CGU_CLK_DIV | CGU_CLK_GATE, + "i2s", CGU_CLK_MUX | CGU_CLK_DIV, .parents = { JZ4725B_CLK_EXT, JZ4725B_CLK_PLL_HALF, -1, -1 }, .mux = { CGU_REG_CPCCR, 31, 1 }, .div = { CGU_REG_I2SCDR, 0, 1, 9, -1, -1, -1 }, - .gate = { CGU_REG_CLKGR, 6 }, }, [JZ4725B_CLK_SPI] = { diff --git a/drivers/clk/meson/gxbb.c b/drivers/clk/meson/gxbb.c index d6eed760327d031d1df0730eb016ddf14a9d211c..608e0e8ca49a8bec81330f9a59742f2a1373f1aa 100644 --- a/drivers/clk/meson/gxbb.c +++ b/drivers/clk/meson/gxbb.c @@ -713,6 +713,35 @@ static struct clk_regmap gxbb_mpll_prediv = { }; static struct clk_regmap gxbb_mpll0_div = { + .data = &(struct meson_clk_mpll_data){ + .sdm = { + .reg_off = HHI_MPLL_CNTL7, + .shift = 0, + .width = 14, + }, + .sdm_en = { + .reg_off = HHI_MPLL_CNTL, + .shift = 25, + .width = 1, + }, + .n2 = { + .reg_off = HHI_MPLL_CNTL7, + .shift = 16, + .width = 9, + }, + .lock = &meson_clk_lock, + }, + .hw.init = &(struct clk_init_data){ + .name = "mpll0_div", + .ops = &meson_clk_mpll_ops, + .parent_hws = (const struct clk_hw *[]) { + &gxbb_mpll_prediv.hw + }, + .num_parents = 1, + }, +}; + +static struct clk_regmap gxl_mpll0_div = { .data = &(struct meson_clk_mpll_data){ .sdm = { .reg_off = HHI_MPLL_CNTL7, @@ -749,7 +778,16 @@ static struct clk_regmap gxbb_mpll0 = { .hw.init = &(struct clk_init_data){ .name = "mpll0", .ops = &clk_regmap_gate_ops, - .parent_hws = (const struct clk_hw *[]) { &gxbb_mpll0_div.hw }, + .parent_data = &(const struct clk_parent_data) { + /* + * Note: + * GXL and GXBB have different SDM_EN registers. We + * fallback to the global naming string mechanism so + * mpll0_div picks up the appropriate one. + */ + .name = "mpll0_div", + .index = -1, + }, .num_parents = 1, .flags = CLK_SET_RATE_PARENT, }, @@ -3044,7 +3082,7 @@ static struct clk_hw_onecell_data gxl_hw_onecell_data = { [CLKID_VAPB_1] = &gxbb_vapb_1.hw, [CLKID_VAPB_SEL] = &gxbb_vapb_sel.hw, [CLKID_VAPB] = &gxbb_vapb.hw, - [CLKID_MPLL0_DIV] = &gxbb_mpll0_div.hw, + [CLKID_MPLL0_DIV] = &gxl_mpll0_div.hw, [CLKID_MPLL1_DIV] = &gxbb_mpll1_div.hw, [CLKID_MPLL2_DIV] = &gxbb_mpll2_div.hw, [CLKID_MPLL_PREDIV] = &gxbb_mpll_prediv.hw, @@ -3439,7 +3477,7 @@ static struct clk_regmap *const gxl_clk_regmaps[] = { &gxbb_mpll0, &gxbb_mpll1, &gxbb_mpll2, - &gxbb_mpll0_div, + &gxl_mpll0_div, &gxbb_mpll1_div, &gxbb_mpll2_div, &gxbb_cts_amclk_div, diff --git a/drivers/clk/mvebu/ap-cpu-clk.c b/drivers/clk/mvebu/ap-cpu-clk.c index b4259b60dcfd67b486beb3a3f630c0c2e4025df0..25de4b6da776fac799be1d2b9988124d2fc8e3b9 100644 --- a/drivers/clk/mvebu/ap-cpu-clk.c +++ b/drivers/clk/mvebu/ap-cpu-clk.c @@ -256,12 +256,15 @@ static int ap_cpu_clock_probe(struct platform_device *pdev) int cpu, err; err = of_property_read_u32(dn, "reg", &cpu); - if (WARN_ON(err)) + if (WARN_ON(err)) { + of_node_put(dn); return err; + } /* If cpu2 or cpu3 is enabled */ if (cpu & APN806_CLUSTER_NUM_MASK) { nclusters = 2; + of_node_put(dn); break; } } @@ -288,8 +291,10 @@ static int ap_cpu_clock_probe(struct platform_device *pdev) int cpu, err; err = of_property_read_u32(dn, "reg", &cpu); - if (WARN_ON(err)) + if (WARN_ON(err)) { + of_node_put(dn); return err; + } cluster_index = cpu & APN806_CLUSTER_NUM_MASK; cluster_index >>= APN806_CLUSTER_NUM_OFFSET; @@ -301,6 +306,7 @@ static int ap_cpu_clock_probe(struct platform_device *pdev) parent = of_clk_get(np, cluster_index); if (IS_ERR(parent)) { dev_err(dev, "Could not get the clock parent\n"); + of_node_put(dn); return -EINVAL; } parent_name = __clk_get_name(parent); @@ -319,8 +325,10 @@ static int ap_cpu_clock_probe(struct platform_device *pdev) init.parent_names = &parent_name; ret = devm_clk_hw_register(dev, &ap_cpu_clk[cluster_index].hw); - if (ret) + if (ret) { + of_node_put(dn); return ret; + } ap_cpu_data->hws[cluster_index] = &ap_cpu_clk[cluster_index].hw; } diff --git a/drivers/clk/qcom/clk-regmap-mux.c b/drivers/clk/qcom/clk-regmap-mux.c index b2d00b4519634614d2b6e1dfd6e1f648440e0321..45d9cca28064fb89465ceaa453ca31cac911c46b 100644 --- a/drivers/clk/qcom/clk-regmap-mux.c +++ b/drivers/clk/qcom/clk-regmap-mux.c @@ -28,7 +28,7 @@ static u8 mux_get_parent(struct clk_hw *hw) val &= mask; if (mux->parent_map) - return qcom_find_src_index(hw, mux->parent_map, val); + return qcom_find_cfg_index(hw, mux->parent_map, val); return val; } diff --git a/drivers/clk/qcom/common.c b/drivers/clk/qcom/common.c index 60d2a78d1395071fa344895d7e037f792f1e805b..2af04fc4abfa9b5041e87703fdc72c4d27bf978f 100644 --- a/drivers/clk/qcom/common.c +++ b/drivers/clk/qcom/common.c @@ -69,6 +69,18 @@ int qcom_find_src_index(struct clk_hw *hw, const struct parent_map *map, u8 src) } EXPORT_SYMBOL_GPL(qcom_find_src_index); +int qcom_find_cfg_index(struct clk_hw *hw, const struct parent_map *map, u8 cfg) +{ + int i, num_parents = clk_hw_get_num_parents(hw); + + for (i = 0; i < num_parents; i++) + if (cfg == map[i].cfg) + return i; + + return -ENOENT; +} +EXPORT_SYMBOL_GPL(qcom_find_cfg_index); + struct regmap * qcom_cc_map(struct platform_device *pdev, const struct qcom_cc_desc *desc) { diff --git a/drivers/clk/qcom/common.h b/drivers/clk/qcom/common.h index bb39a7e106d8a94950aabb25ab3885f16a421ddf..9c8f7b798d9fc92ceea235e93a68bb3e08d138de 100644 --- a/drivers/clk/qcom/common.h +++ b/drivers/clk/qcom/common.h @@ -49,6 +49,8 @@ extern void qcom_pll_set_fsm_mode(struct regmap *m, u32 reg, u8 bias_count, u8 lock_count); extern int qcom_find_src_index(struct clk_hw *hw, const struct parent_map *map, u8 src); +extern int qcom_find_cfg_index(struct clk_hw *hw, const struct parent_map *map, + u8 cfg); extern int qcom_cc_register_board_clk(struct device *dev, const char *path, const char *name, unsigned long rate); diff --git a/drivers/clk/qcom/gcc-msm8996.c b/drivers/clk/qcom/gcc-msm8996.c index 3c3a7ff0456219280e65fa14303745af5e1fa79a..9b1674b28d45ded36260dbdb36b79de181eb2687 100644 --- a/drivers/clk/qcom/gcc-msm8996.c +++ b/drivers/clk/qcom/gcc-msm8996.c @@ -2937,20 +2937,6 @@ static struct clk_branch gcc_smmu_aggre0_ahb_clk = { }, }; -static struct clk_branch gcc_aggre1_pnoc_ahb_clk = { - .halt_reg = 0x82014, - .clkr = { - .enable_reg = 0x82014, - .enable_mask = BIT(0), - .hw.init = &(struct clk_init_data){ - .name = "gcc_aggre1_pnoc_ahb_clk", - .parent_names = (const char *[]){ "periph_noc_clk_src" }, - .num_parents = 1, - .ops = &clk_branch2_ops, - }, - }, -}; - static struct clk_branch gcc_aggre2_ufs_axi_clk = { .halt_reg = 0x83014, .clkr = { @@ -3474,7 +3460,6 @@ static struct clk_regmap *gcc_msm8996_clocks[] = { [GCC_AGGRE0_CNOC_AHB_CLK] = &gcc_aggre0_cnoc_ahb_clk.clkr, [GCC_SMMU_AGGRE0_AXI_CLK] = &gcc_smmu_aggre0_axi_clk.clkr, [GCC_SMMU_AGGRE0_AHB_CLK] = &gcc_smmu_aggre0_ahb_clk.clkr, - [GCC_AGGRE1_PNOC_AHB_CLK] = &gcc_aggre1_pnoc_ahb_clk.clkr, [GCC_AGGRE2_UFS_AXI_CLK] = &gcc_aggre2_ufs_axi_clk.clkr, [GCC_AGGRE2_USB3_AXI_CLK] = &gcc_aggre2_usb3_axi_clk.clkr, [GCC_QSPI_AHB_CLK] = &gcc_qspi_ahb_clk.clkr, diff --git a/drivers/clk/qcom/gdsc.c b/drivers/clk/qcom/gdsc.c index 4ece326ea233e90e51c7dd4e5eecf48b8d6e424d..cf23cfd7e46743703776d12b0d593a05524df31c 100644 --- a/drivers/clk/qcom/gdsc.c +++ b/drivers/clk/qcom/gdsc.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (c) 2015, 2017-2018, The Linux Foundation. All rights reserved. + * Copyright (c) 2015, 2017-2018, 2022, The Linux Foundation. All rights reserved. */ #include @@ -34,9 +34,14 @@ #define CFG_GDSCR_OFFSET 0x4 /* Wait 2^n CXO cycles between all states. Here, n=2 (4 cycles). */ -#define EN_REST_WAIT_VAL (0x2 << 20) -#define EN_FEW_WAIT_VAL (0x8 << 16) -#define CLK_DIS_WAIT_VAL (0x2 << 12) +#define EN_REST_WAIT_VAL 0x2 +#define EN_FEW_WAIT_VAL 0x8 +#define CLK_DIS_WAIT_VAL 0x2 + +/* Transition delay shifts */ +#define EN_REST_WAIT_SHIFT 20 +#define EN_FEW_WAIT_SHIFT 16 +#define CLK_DIS_WAIT_SHIFT 12 #define RETAIN_MEM BIT(14) #define RETAIN_PERIPH BIT(13) @@ -341,7 +346,18 @@ static int gdsc_init(struct gdsc *sc) */ mask = HW_CONTROL_MASK | SW_OVERRIDE_MASK | EN_REST_WAIT_MASK | EN_FEW_WAIT_MASK | CLK_DIS_WAIT_MASK; - val = EN_REST_WAIT_VAL | EN_FEW_WAIT_VAL | CLK_DIS_WAIT_VAL; + + if (!sc->en_rest_wait_val) + sc->en_rest_wait_val = EN_REST_WAIT_VAL; + if (!sc->en_few_wait_val) + sc->en_few_wait_val = EN_FEW_WAIT_VAL; + if (!sc->clk_dis_wait_val) + sc->clk_dis_wait_val = CLK_DIS_WAIT_VAL; + + val = sc->en_rest_wait_val << EN_REST_WAIT_SHIFT | + sc->en_few_wait_val << EN_FEW_WAIT_SHIFT | + sc->clk_dis_wait_val << CLK_DIS_WAIT_SHIFT; + ret = regmap_update_bits(sc->regmap, sc->gdscr, mask, val); if (ret) return ret; diff --git a/drivers/clk/qcom/gdsc.h b/drivers/clk/qcom/gdsc.h index 5bb396b344d16f814f23c0ce7f717eac859c7b54..762f1b5e1ec51b1a4d09f23630d1988ed8f7144f 100644 --- a/drivers/clk/qcom/gdsc.h +++ b/drivers/clk/qcom/gdsc.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (c) 2015, 2017-2018, The Linux Foundation. All rights reserved. + * Copyright (c) 2015, 2017-2018, 2022, The Linux Foundation. All rights reserved. */ #ifndef __QCOM_GDSC_H__ @@ -22,6 +22,9 @@ struct reset_controller_dev; * @cxcs: offsets of branch registers to toggle mem/periph bits in * @cxc_count: number of @cxcs * @pwrsts: Possible powerdomain power states + * @en_rest_wait_val: transition delay value for receiving enr ack signal + * @en_few_wait_val: transition delay value for receiving enf ack signal + * @clk_dis_wait_val: transition delay value for halting clock * @resets: ids of resets associated with this gdsc * @reset_count: number of @resets * @rcdev: reset controller @@ -35,6 +38,9 @@ struct gdsc { unsigned int clamp_io_ctrl; unsigned int *cxcs; unsigned int cxc_count; + unsigned int en_rest_wait_val; + unsigned int en_few_wait_val; + unsigned int clk_dis_wait_val; const u8 pwrsts; /* Powerdomain allowable state bitfields */ #define PWRSTS_OFF BIT(0) diff --git a/drivers/clk/socfpga/clk-agilex.c b/drivers/clk/socfpga/clk-agilex.c index 7182afb4258a743308b4a0972bcf7e15ee5b406b..225636c2b5696b0ccbac31f879e688d459749520 100644 --- a/drivers/clk/socfpga/clk-agilex.c +++ b/drivers/clk/socfpga/clk-agilex.c @@ -165,13 +165,6 @@ static const struct clk_parent_data mpu_mux[] = { .name = "boot_clk", }, }; -static const struct clk_parent_data s2f_usr0_mux[] = { - { .fw_name = "f2s-free-clk", - .name = "f2s-free-clk", }, - { .fw_name = "boot_clk", - .name = "boot_clk", }, -}; - static const struct clk_parent_data emac_mux[] = { { .fw_name = "emaca_free_clk", .name = "emaca_free_clk", }, @@ -299,8 +292,6 @@ static const struct stratix10_gate_clock agilex_gate_clks[] = { 4, 0x44, 28, 1, 0, 0, 0}, { AGILEX_CS_TIMER_CLK, "cs_timer_clk", NULL, noc_mux, ARRAY_SIZE(noc_mux), 0, 0x24, 5, 0, 0, 0, 0x30, 1, 0}, - { AGILEX_S2F_USER0_CLK, "s2f_user0_clk", NULL, s2f_usr0_mux, ARRAY_SIZE(s2f_usr0_mux), 0, 0x24, - 6, 0, 0, 0, 0, 0, 0}, { AGILEX_EMAC0_CLK, "emac0_clk", NULL, emac_mux, ARRAY_SIZE(emac_mux), 0, 0x7C, 0, 0, 0, 0, 0x94, 26, 0}, { AGILEX_EMAC1_CLK, "emac1_clk", NULL, emac_mux, ARRAY_SIZE(emac_mux), 0, 0x7C, diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig index 54dac7c41519aa8ae3f4a66ef4e1f53f507f09dc..df50146f79f03db13ee1f155c5d7c6788dc917f8 100644 --- a/drivers/clocksource/Kconfig +++ b/drivers/clocksource/Kconfig @@ -24,6 +24,7 @@ config I8253_LOCK config OMAP_DM_TIMER bool + select TIMER_OF config CLKBLD_I8253 def_bool y if CLKSRC_I8253 || CLKEVT_I8253 || I8253_LOCK diff --git a/drivers/clocksource/timer-ti-dm-systimer.c b/drivers/clocksource/timer-ti-dm-systimer.c index b6f97960d8ee020f908dbd260b6c04c188c0b4d8..1fccb457fcc54233507a654813c3eeb220fb90c6 100644 --- a/drivers/clocksource/timer-ti-dm-systimer.c +++ b/drivers/clocksource/timer-ti-dm-systimer.c @@ -241,8 +241,7 @@ static void __init dmtimer_systimer_assign_alwon(void) bool quirk_unreliable_oscillator = false; /* Quirk unreliable 32 KiHz oscillator with incomplete dts */ - if (of_machine_is_compatible("ti,omap3-beagle") || - of_machine_is_compatible("timll,omap3-devkit8000")) { + if (of_machine_is_compatible("ti,omap3-beagle-ab4")) { quirk_unreliable_oscillator = true; counter_32k = -ENODEV; } diff --git a/drivers/counter/Kconfig b/drivers/counter/Kconfig index 2de53ab0dd252be4bb89f018b0827d3bf1fd6627..cbdf84200e278695d9da6149655c70ce0f66694a 100644 --- a/drivers/counter/Kconfig +++ b/drivers/counter/Kconfig @@ -41,7 +41,7 @@ config STM32_TIMER_CNT config STM32_LPTIMER_CNT tristate "STM32 LP Timer encoder counter driver" - depends on (MFD_STM32_LPTIMER || COMPILE_TEST) && IIO + depends on MFD_STM32_LPTIMER || COMPILE_TEST help Select this option to enable STM32 Low-Power Timer quadrature encoder and counter driver. diff --git a/drivers/counter/stm32-lptimer-cnt.c b/drivers/counter/stm32-lptimer-cnt.c index fd6828e2d34f532629217c5167d188c6e4241e2f..937439635d53f4a6fcc0fd334ca70caee19a521d 100644 --- a/drivers/counter/stm32-lptimer-cnt.c +++ b/drivers/counter/stm32-lptimer-cnt.c @@ -12,8 +12,8 @@ #include #include -#include #include +#include #include #include #include @@ -107,249 +107,27 @@ static int stm32_lptim_setup(struct stm32_lptim_cnt *priv, int enable) return regmap_update_bits(priv->regmap, STM32_LPTIM_CFGR, mask, val); } -static int stm32_lptim_write_raw(struct iio_dev *indio_dev, - struct iio_chan_spec const *chan, - int val, int val2, long mask) -{ - struct stm32_lptim_cnt *priv = iio_priv(indio_dev); - int ret; - - switch (mask) { - case IIO_CHAN_INFO_ENABLE: - if (val < 0 || val > 1) - return -EINVAL; - - /* Check nobody uses the timer, or already disabled/enabled */ - ret = stm32_lptim_is_enabled(priv); - if ((ret < 0) || (!ret && !val)) - return ret; - if (val && ret) - return -EBUSY; - - ret = stm32_lptim_setup(priv, val); - if (ret) - return ret; - return stm32_lptim_set_enable_state(priv, val); - - default: - return -EINVAL; - } -} - -static int stm32_lptim_read_raw(struct iio_dev *indio_dev, - struct iio_chan_spec const *chan, - int *val, int *val2, long mask) -{ - struct stm32_lptim_cnt *priv = iio_priv(indio_dev); - u32 dat; - int ret; - - switch (mask) { - case IIO_CHAN_INFO_RAW: - ret = regmap_read(priv->regmap, STM32_LPTIM_CNT, &dat); - if (ret) - return ret; - *val = dat; - return IIO_VAL_INT; - - case IIO_CHAN_INFO_ENABLE: - ret = stm32_lptim_is_enabled(priv); - if (ret < 0) - return ret; - *val = ret; - return IIO_VAL_INT; - - case IIO_CHAN_INFO_SCALE: - /* Non-quadrature mode: scale = 1 */ - *val = 1; - *val2 = 0; - if (priv->quadrature_mode) { - /* - * Quadrature encoder mode: - * - both edges, quarter cycle, scale is 0.25 - * - either rising/falling edge scale is 0.5 - */ - if (priv->polarity > 1) - *val2 = 2; - else - *val2 = 1; - } - return IIO_VAL_FRACTIONAL_LOG2; - - default: - return -EINVAL; - } -} - -static const struct iio_info stm32_lptim_cnt_iio_info = { - .read_raw = stm32_lptim_read_raw, - .write_raw = stm32_lptim_write_raw, -}; - -static const char *const stm32_lptim_quadrature_modes[] = { - "non-quadrature", - "quadrature", -}; - -static int stm32_lptim_get_quadrature_mode(struct iio_dev *indio_dev, - const struct iio_chan_spec *chan) -{ - struct stm32_lptim_cnt *priv = iio_priv(indio_dev); - - return priv->quadrature_mode; -} - -static int stm32_lptim_set_quadrature_mode(struct iio_dev *indio_dev, - const struct iio_chan_spec *chan, - unsigned int type) -{ - struct stm32_lptim_cnt *priv = iio_priv(indio_dev); - - if (stm32_lptim_is_enabled(priv)) - return -EBUSY; - - priv->quadrature_mode = type; - - return 0; -} - -static const struct iio_enum stm32_lptim_quadrature_mode_en = { - .items = stm32_lptim_quadrature_modes, - .num_items = ARRAY_SIZE(stm32_lptim_quadrature_modes), - .get = stm32_lptim_get_quadrature_mode, - .set = stm32_lptim_set_quadrature_mode, -}; - -static const char * const stm32_lptim_cnt_polarity[] = { - "rising-edge", "falling-edge", "both-edges", -}; - -static int stm32_lptim_cnt_get_polarity(struct iio_dev *indio_dev, - const struct iio_chan_spec *chan) -{ - struct stm32_lptim_cnt *priv = iio_priv(indio_dev); - - return priv->polarity; -} - -static int stm32_lptim_cnt_set_polarity(struct iio_dev *indio_dev, - const struct iio_chan_spec *chan, - unsigned int type) -{ - struct stm32_lptim_cnt *priv = iio_priv(indio_dev); - - if (stm32_lptim_is_enabled(priv)) - return -EBUSY; - - priv->polarity = type; - - return 0; -} - -static const struct iio_enum stm32_lptim_cnt_polarity_en = { - .items = stm32_lptim_cnt_polarity, - .num_items = ARRAY_SIZE(stm32_lptim_cnt_polarity), - .get = stm32_lptim_cnt_get_polarity, - .set = stm32_lptim_cnt_set_polarity, -}; - -static ssize_t stm32_lptim_cnt_get_ceiling(struct stm32_lptim_cnt *priv, - char *buf) -{ - return snprintf(buf, PAGE_SIZE, "%u\n", priv->ceiling); -} - -static ssize_t stm32_lptim_cnt_set_ceiling(struct stm32_lptim_cnt *priv, - const char *buf, size_t len) -{ - int ret; - - if (stm32_lptim_is_enabled(priv)) - return -EBUSY; - - ret = kstrtouint(buf, 0, &priv->ceiling); - if (ret) - return ret; - - if (priv->ceiling > STM32_LPTIM_MAX_ARR) - return -EINVAL; - - return len; -} - -static ssize_t stm32_lptim_cnt_get_preset_iio(struct iio_dev *indio_dev, - uintptr_t private, - const struct iio_chan_spec *chan, - char *buf) -{ - struct stm32_lptim_cnt *priv = iio_priv(indio_dev); - - return stm32_lptim_cnt_get_ceiling(priv, buf); -} - -static ssize_t stm32_lptim_cnt_set_preset_iio(struct iio_dev *indio_dev, - uintptr_t private, - const struct iio_chan_spec *chan, - const char *buf, size_t len) -{ - struct stm32_lptim_cnt *priv = iio_priv(indio_dev); - - return stm32_lptim_cnt_set_ceiling(priv, buf, len); -} - -/* LP timer with encoder */ -static const struct iio_chan_spec_ext_info stm32_lptim_enc_ext_info[] = { - { - .name = "preset", - .shared = IIO_SEPARATE, - .read = stm32_lptim_cnt_get_preset_iio, - .write = stm32_lptim_cnt_set_preset_iio, - }, - IIO_ENUM("polarity", IIO_SEPARATE, &stm32_lptim_cnt_polarity_en), - IIO_ENUM_AVAILABLE("polarity", &stm32_lptim_cnt_polarity_en), - IIO_ENUM("quadrature_mode", IIO_SEPARATE, - &stm32_lptim_quadrature_mode_en), - IIO_ENUM_AVAILABLE("quadrature_mode", &stm32_lptim_quadrature_mode_en), - {} -}; - -static const struct iio_chan_spec stm32_lptim_enc_channels = { - .type = IIO_COUNT, - .channel = 0, - .info_mask_separate = BIT(IIO_CHAN_INFO_RAW) | - BIT(IIO_CHAN_INFO_ENABLE) | - BIT(IIO_CHAN_INFO_SCALE), - .ext_info = stm32_lptim_enc_ext_info, - .indexed = 1, -}; - -/* LP timer without encoder (counter only) */ -static const struct iio_chan_spec_ext_info stm32_lptim_cnt_ext_info[] = { - { - .name = "preset", - .shared = IIO_SEPARATE, - .read = stm32_lptim_cnt_get_preset_iio, - .write = stm32_lptim_cnt_set_preset_iio, - }, - IIO_ENUM("polarity", IIO_SEPARATE, &stm32_lptim_cnt_polarity_en), - IIO_ENUM_AVAILABLE("polarity", &stm32_lptim_cnt_polarity_en), - {} -}; - -static const struct iio_chan_spec stm32_lptim_cnt_channels = { - .type = IIO_COUNT, - .channel = 0, - .info_mask_separate = BIT(IIO_CHAN_INFO_RAW) | - BIT(IIO_CHAN_INFO_ENABLE) | - BIT(IIO_CHAN_INFO_SCALE), - .ext_info = stm32_lptim_cnt_ext_info, - .indexed = 1, -}; - /** * enum stm32_lptim_cnt_function - enumerates LPTimer counter & encoder modes * @STM32_LPTIM_COUNTER_INCREASE: up count on IN1 rising, falling or both edges * @STM32_LPTIM_ENCODER_BOTH_EDGE: count on both edges (IN1 & IN2 quadrature) + * + * In non-quadrature mode, device counts up on active edge. + * In quadrature mode, encoder counting scenarios are as follows: + * +---------+----------+--------------------+--------------------+ + * | Active | Level on | IN1 signal | IN2 signal | + * | edge | opposite +----------+---------+----------+---------+ + * | | signal | Rising | Falling | Rising | Falling | + * +---------+----------+----------+---------+----------+---------+ + * | Rising | High -> | Down | - | Up | - | + * | edge | Low -> | Up | - | Down | - | + * +---------+----------+----------+---------+----------+---------+ + * | Falling | High -> | - | Up | - | Down | + * | edge | Low -> | - | Down | - | Up | + * +---------+----------+----------+---------+----------+---------+ + * | Both | High -> | Down | Up | Up | Down | + * | edges | Low -> | Up | Down | Down | Up | + * +---------+----------+----------+---------+----------+---------+ */ enum stm32_lptim_cnt_function { STM32_LPTIM_COUNTER_INCREASE, @@ -484,7 +262,7 @@ static ssize_t stm32_lptim_cnt_ceiling_read(struct counter_device *counter, { struct stm32_lptim_cnt *const priv = counter->priv; - return stm32_lptim_cnt_get_ceiling(priv, buf); + return snprintf(buf, PAGE_SIZE, "%u\n", priv->ceiling); } static ssize_t stm32_lptim_cnt_ceiling_write(struct counter_device *counter, @@ -493,8 +271,22 @@ static ssize_t stm32_lptim_cnt_ceiling_write(struct counter_device *counter, const char *buf, size_t len) { struct stm32_lptim_cnt *const priv = counter->priv; + unsigned int ceiling; + int ret; + + if (stm32_lptim_is_enabled(priv)) + return -EBUSY; + + ret = kstrtouint(buf, 0, &ceiling); + if (ret) + return ret; + + if (ceiling > STM32_LPTIM_MAX_ARR) + return -EINVAL; + + priv->ceiling = ceiling; - return stm32_lptim_cnt_set_ceiling(priv, buf, len); + return len; } static const struct counter_count_ext stm32_lptim_cnt_ext[] = { @@ -630,32 +422,19 @@ static int stm32_lptim_cnt_probe(struct platform_device *pdev) { struct stm32_lptimer *ddata = dev_get_drvdata(pdev->dev.parent); struct stm32_lptim_cnt *priv; - struct iio_dev *indio_dev; - int ret; if (IS_ERR_OR_NULL(ddata)) return -EINVAL; - indio_dev = devm_iio_device_alloc(&pdev->dev, sizeof(*priv)); - if (!indio_dev) + priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL); + if (!priv) return -ENOMEM; - priv = iio_priv(indio_dev); priv->dev = &pdev->dev; priv->regmap = ddata->regmap; priv->clk = ddata->clk; priv->ceiling = STM32_LPTIM_MAX_ARR; - /* Initialize IIO device */ - indio_dev->name = dev_name(&pdev->dev); - indio_dev->dev.of_node = pdev->dev.of_node; - indio_dev->info = &stm32_lptim_cnt_iio_info; - if (ddata->has_encoder) - indio_dev->channels = &stm32_lptim_enc_channels; - else - indio_dev->channels = &stm32_lptim_cnt_channels; - indio_dev->num_channels = 1; - /* Initialize Counter device */ priv->counter.name = dev_name(&pdev->dev); priv->counter.parent = &pdev->dev; @@ -673,10 +452,6 @@ static int stm32_lptim_cnt_probe(struct platform_device *pdev) platform_set_drvdata(pdev, priv); - ret = devm_iio_device_register(&pdev->dev, indio_dev); - if (ret) - return ret; - return devm_counter_register(&pdev->dev, &priv->counter); } diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 6498da359cbf561ad6357877e0940d54ded46e9d..a96bb9baa8e78825c54e7736a813b91b9087d8ca 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -543,7 +543,10 @@ EXPORT_SYMBOL_GPL(cpufreq_disable_fast_switch); unsigned int cpufreq_driver_resolve_freq(struct cpufreq_policy *policy, unsigned int target_freq) { + unsigned int old_target_freq = target_freq; + target_freq = clamp_val(target_freq, policy->min, policy->max); + trace_android_vh_cpufreq_resolve_freq(policy, &target_freq, old_target_freq); policy->cached_target_freq = target_freq; if (cpufreq_driver->target_index) { @@ -1015,10 +1018,9 @@ static struct kobj_type ktype_cpufreq = { .release = cpufreq_sysfs_release, }; -static void add_cpu_dev_symlink(struct cpufreq_policy *policy, unsigned int cpu) +static void add_cpu_dev_symlink(struct cpufreq_policy *policy, unsigned int cpu, + struct device *dev) { - struct device *dev = get_cpu_device(cpu); - if (unlikely(!dev)) return; @@ -1402,7 +1404,7 @@ static int cpufreq_online(unsigned int cpu) if (new_policy) { for_each_cpu(j, policy->related_cpus) { per_cpu(cpufreq_cpu_data, j) = policy; - add_cpu_dev_symlink(policy, j); + add_cpu_dev_symlink(policy, j, get_cpu_device(j)); } policy->min_freq_req = kzalloc(2 * sizeof(*policy->min_freq_req), @@ -1412,7 +1414,7 @@ static int cpufreq_online(unsigned int cpu) ret = freq_qos_add_request(&policy->constraints, policy->min_freq_req, FREQ_QOS_MIN, - policy->min); + FREQ_QOS_MIN_DEFAULT_VALUE); if (ret < 0) { /* * So we don't call freq_qos_remove_request() for an @@ -1432,7 +1434,7 @@ static int cpufreq_online(unsigned int cpu) ret = freq_qos_add_request(&policy->constraints, policy->max_freq_req, FREQ_QOS_MAX, - policy->max); + FREQ_QOS_MAX_DEFAULT_VALUE); if (ret < 0) { policy->max_freq_req = NULL; goto out_destroy_policy; @@ -1565,7 +1567,7 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) /* Create sysfs link on CPU registration */ policy = per_cpu(cpufreq_cpu_data, cpu); if (policy) - add_cpu_dev_symlink(policy, cpu); + add_cpu_dev_symlink(policy, cpu, dev); return 0; } @@ -2093,9 +2095,11 @@ unsigned int cpufreq_driver_fast_switch(struct cpufreq_policy *policy, unsigned int target_freq) { unsigned int freq; + unsigned int old_target_freq = target_freq; int cpu; target_freq = clamp_val(target_freq, policy->min, policy->max); + trace_android_vh_cpufreq_fast_switch(policy, &target_freq, old_target_freq); freq = cpufreq_driver->fast_switch(policy, target_freq); if (!freq) @@ -2211,6 +2215,7 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy, /* Make sure that target_freq is within supported range */ target_freq = clamp_val(target_freq, policy->min, policy->max); + trace_android_vh_cpufreq_target(policy, &target_freq, old_target_freq); pr_debug("target for CPU %u: %u kHz, relation %u, requested %u kHz\n", policy->cpu, target_freq, relation, old_target_freq); diff --git a/drivers/cpufreq/cpufreq_governor_attr_set.c b/drivers/cpufreq/cpufreq_governor_attr_set.c index 66b05a326910eaa01c3e1c50e551f5e9e4cf530b..a6f365b9cc1ad8b026d906dcff015885cebdef40 100644 --- a/drivers/cpufreq/cpufreq_governor_attr_set.c +++ b/drivers/cpufreq/cpufreq_governor_attr_set.c @@ -74,8 +74,8 @@ unsigned int gov_attr_set_put(struct gov_attr_set *attr_set, struct list_head *l if (count) return count; - kobject_put(&attr_set->kobj); mutex_destroy(&attr_set->update_lock); + kobject_put(&attr_set->kobj); return 0; } EXPORT_SYMBOL_GPL(gov_attr_set_put); diff --git a/drivers/cpuidle/sysfs.c b/drivers/cpuidle/sysfs.c index 53ec9585ccd448eda6f8955553642d337745cef7..469e18547d06c83d4fe133b88d302e374783df77 100644 --- a/drivers/cpuidle/sysfs.c +++ b/drivers/cpuidle/sysfs.c @@ -488,6 +488,7 @@ static int cpuidle_add_state_sysfs(struct cpuidle_device *device) &kdev->kobj, "state%d", i); if (ret) { kobject_put(&kobj->kobj); + kfree(kobj); goto error_state; } cpuidle_add_s2idle_attr_group(kobj); @@ -619,6 +620,7 @@ static int cpuidle_add_driver_sysfs(struct cpuidle_device *dev) &kdev->kobj, "driver"); if (ret) { kobject_put(&kdrv->kobj); + kfree(kdrv); return ret; } @@ -705,7 +707,6 @@ int cpuidle_add_sysfs(struct cpuidle_device *dev) if (!kdev) return -ENOMEM; kdev->dev = dev; - dev->kobj_dev = kdev; init_completion(&kdev->kobj_unregister); @@ -713,9 +714,11 @@ int cpuidle_add_sysfs(struct cpuidle_device *dev) "cpuidle"); if (error) { kobject_put(&kdev->kobj); + kfree(kdev); return error; } + dev->kobj_dev = kdev; kobject_uevent(&kdev->kobj, KOBJ_ADD); return 0; diff --git a/drivers/crypto/caam/caamalg_qi2.c b/drivers/crypto/caam/caamalg_qi2.c index a780e627838ae9a3aa41835a32d353c4d4ea00ff..5a40c7d10cc9a6912e3dba2104c0d6990dc78111 100644 --- a/drivers/crypto/caam/caamalg_qi2.c +++ b/drivers/crypto/caam/caamalg_qi2.c @@ -5467,7 +5467,7 @@ int dpaa2_caam_enqueue(struct device *dev, struct caam_request *req) dpaa2_fd_set_len(&fd, dpaa2_fl_get_len(&req->fd_flt[1])); dpaa2_fd_set_flc(&fd, req->flc_dma); - ppriv = this_cpu_ptr(priv->ppriv); + ppriv = raw_cpu_ptr(priv->ppriv); for (i = 0; i < (priv->dpseci_attr.num_tx_queues << 1); i++) { err = dpaa2_io_service_enqueue_fq(ppriv->dpio, ppriv->req_fqid, &fd); diff --git a/drivers/crypto/caam/caampkc.c b/drivers/crypto/caam/caampkc.c index dd5f101e43f83ce1a1f358b06a3f3f1686e4a94c..3acc825da4cca37e95b6b825e3bede75438742b3 100644 --- a/drivers/crypto/caam/caampkc.c +++ b/drivers/crypto/caam/caampkc.c @@ -1152,16 +1152,27 @@ static struct caam_akcipher_alg caam_rsa = { int caam_pkc_init(struct device *ctrldev) { struct caam_drv_private *priv = dev_get_drvdata(ctrldev); - u32 pk_inst; + u32 pk_inst, pkha; int err; init_done = false; /* Determine public key hardware accelerator presence. */ - if (priv->era < 10) + if (priv->era < 10) { pk_inst = (rd_reg32(&priv->ctrl->perfmon.cha_num_ls) & CHA_ID_LS_PK_MASK) >> CHA_ID_LS_PK_SHIFT; - else - pk_inst = rd_reg32(&priv->ctrl->vreg.pkha) & CHA_VER_NUM_MASK; + } else { + pkha = rd_reg32(&priv->ctrl->vreg.pkha); + pk_inst = pkha & CHA_VER_NUM_MASK; + + /* + * Newer CAAMs support partially disabled functionality. If this is the + * case, the number is non-zero, but this bit is set to indicate that + * no encryption or decryption is supported. Only signing and verifying + * is supported. + */ + if (pkha & CHA_VER_MISC_PKHA_NO_CRYPT) + pk_inst = 0; + } /* Do not register algorithms if PKHA is not present. */ if (!pk_inst) diff --git a/drivers/crypto/caam/regs.h b/drivers/crypto/caam/regs.h index af61f3a2c0d46a780ff49ed5bc891f6356bfe106..3738625c025092840a96e9528e7bbf250ce1cb3a 100644 --- a/drivers/crypto/caam/regs.h +++ b/drivers/crypto/caam/regs.h @@ -322,6 +322,9 @@ struct version_regs { /* CHA Miscellaneous Information - AESA_MISC specific */ #define CHA_VER_MISC_AES_GCM BIT(1 + CHA_VER_MISC_SHIFT) +/* CHA Miscellaneous Information - PKHA_MISC specific */ +#define CHA_VER_MISC_PKHA_NO_CRYPT BIT(7 + CHA_VER_MISC_SHIFT) + /* * caam_perfmon - Performance Monitor/Secure Memory Status/ * CAAM Global Status/Component Version IDs diff --git a/drivers/crypto/ccp/ccp-ops.c b/drivers/crypto/ccp/ccp-ops.c index d6a8f4e4b14a8099649404ba0a34abadf9c99696..c15625e8ff66e0cb1f0ace209eaf75e97829424a 100644 --- a/drivers/crypto/ccp/ccp-ops.c +++ b/drivers/crypto/ccp/ccp-ops.c @@ -778,7 +778,7 @@ ccp_run_aes_gcm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE); if (ret) - goto e_ctx; + goto e_aad; if (in_place) { dst = src; @@ -863,7 +863,7 @@ ccp_run_aes_gcm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) op.u.aes.size = 0; ret = cmd_q->ccp->vdata->perform->aes(&op); if (ret) - goto e_dst; + goto e_final_wa; if (aes->action == CCP_AES_ACTION_ENCRYPT) { /* Put the ciphered tag after the ciphertext. */ @@ -873,17 +873,19 @@ ccp_run_aes_gcm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) ret = ccp_init_dm_workarea(&tag, cmd_q, authsize, DMA_BIDIRECTIONAL); if (ret) - goto e_tag; + goto e_final_wa; ret = ccp_set_dm_area(&tag, 0, p_tag, 0, authsize); - if (ret) - goto e_tag; + if (ret) { + ccp_dm_free(&tag); + goto e_final_wa; + } ret = crypto_memneq(tag.address, final_wa.address, authsize) ? -EBADMSG : 0; ccp_dm_free(&tag); } -e_tag: +e_final_wa: ccp_dm_free(&final_wa); e_dst: diff --git a/drivers/crypto/omap-aes.c b/drivers/crypto/omap-aes.c index 9b968ac4ee7b617302ae4748b4b130a10ae580e3..a196bb8b170103a28d2a9140d3dee7a3c3d51e35 100644 --- a/drivers/crypto/omap-aes.c +++ b/drivers/crypto/omap-aes.c @@ -1302,7 +1302,7 @@ static int omap_aes_suspend(struct device *dev) static int omap_aes_resume(struct device *dev) { - pm_runtime_resume_and_get(dev); + pm_runtime_get_sync(dev); return 0; } #endif diff --git a/drivers/crypto/qat/qat_common/adf_pf2vf_msg.c b/drivers/crypto/qat/qat_common/adf_pf2vf_msg.c index e829c6aaf16fdf4306828d20dedaa2ef5ea0c0fc..74afafc84c7164ca4dc2bfe873f2682df64b84a7 100644 --- a/drivers/crypto/qat/qat_common/adf_pf2vf_msg.c +++ b/drivers/crypto/qat/qat_common/adf_pf2vf_msg.c @@ -111,37 +111,19 @@ static int __adf_iov_putmsg(struct adf_accel_dev *accel_dev, u32 msg, u8 vf_nr) mutex_lock(lock); - /* Check if PF2VF CSR is in use by remote function */ + /* Check if the PFVF CSR is in use by remote function */ val = ADF_CSR_RD(pmisc_bar_addr, pf2vf_offset); if ((val & remote_in_use_mask) == remote_in_use_pattern) { dev_dbg(&GET_DEV(accel_dev), - "PF2VF CSR in use by remote function\n"); + "PFVF CSR in use by remote function\n"); ret = -EBUSY; goto out; } - /* Attempt to get ownership of PF2VF CSR */ msg &= ~local_in_use_mask; msg |= local_in_use_pattern; - ADF_CSR_WR(pmisc_bar_addr, pf2vf_offset, msg); - /* Wait in case remote func also attempting to get ownership */ - msleep(ADF_IOV_MSG_COLLISION_DETECT_DELAY); - - val = ADF_CSR_RD(pmisc_bar_addr, pf2vf_offset); - if ((val & local_in_use_mask) != local_in_use_pattern) { - dev_dbg(&GET_DEV(accel_dev), - "PF2VF CSR in use by remote - collision detected\n"); - ret = -EBUSY; - goto out; - } - - /* - * This function now owns the PV2VF CSR. The IN_USE_BY pattern must - * remain in the PF2VF CSR for all writes including ACK from remote - * until this local function relinquishes the CSR. Send the message - * by interrupting the remote. - */ + /* Attempt to get ownership of the PFVF CSR */ ADF_CSR_WR(pmisc_bar_addr, pf2vf_offset, msg | int_bit); /* Wait for confirmation from remote func it received the message */ @@ -156,7 +138,14 @@ static int __adf_iov_putmsg(struct adf_accel_dev *accel_dev, u32 msg, u8 vf_nr) ret = -EIO; } - /* Finished with PF2VF CSR; relinquish it and leave msg in CSR */ + if (val != msg) { + dev_dbg(&GET_DEV(accel_dev), + "Collision - PFVF CSR overwritten by remote function\n"); + ret = -EIO; + goto out; + } + + /* Finished with the PFVF CSR; relinquish it and leave msg in CSR */ ADF_CSR_WR(pmisc_bar_addr, pf2vf_offset, val & ~local_in_use_mask); out: mutex_unlock(lock); @@ -164,12 +153,13 @@ out: } /** - * adf_iov_putmsg() - send PF2VF message + * adf_iov_putmsg() - send PFVF message * @accel_dev: Pointer to acceleration device. * @msg: Message to send - * @vf_nr: VF number to which the message will be sent + * @vf_nr: VF number to which the message will be sent if on PF, ignored + * otherwise * - * Function sends a messge from the PF to a VF + * Function sends a message through the PFVF channel * * Return: 0 on success, error code otherwise. */ @@ -198,6 +188,11 @@ void adf_vf2pf_req_hndl(struct adf_accel_vf_info *vf_info) /* Read message from the VF */ msg = ADF_CSR_RD(pmisc_addr, hw_data->get_pf2vf_offset(vf_nr)); + if (!(msg & ADF_VF2PF_INT)) { + dev_info(&GET_DEV(accel_dev), + "Spurious VF2PF interrupt, msg %X. Ignored\n", msg); + goto out; + } /* To ACK, clear the VF2PFINT bit */ msg &= ~ADF_VF2PF_INT; @@ -281,6 +276,7 @@ void adf_vf2pf_req_hndl(struct adf_accel_vf_info *vf_info) if (resp && adf_iov_putmsg(accel_dev, resp, vf_nr)) dev_err(&GET_DEV(accel_dev), "Failed to send response to VF\n"); +out: /* re-enable interrupt on PF from this VF */ adf_enable_vf2pf_interrupts(accel_dev, (1 << vf_nr)); return; diff --git a/drivers/crypto/qat/qat_common/adf_vf2pf_msg.c b/drivers/crypto/qat/qat_common/adf_vf2pf_msg.c index 54b738da829d899e4558ccbb4d7e07ac8c36e5d5..3e25fac051b25f005a29cf6275fc9aa4a539f4bf 100644 --- a/drivers/crypto/qat/qat_common/adf_vf2pf_msg.c +++ b/drivers/crypto/qat/qat_common/adf_vf2pf_msg.c @@ -8,7 +8,7 @@ * adf_vf2pf_notify_init() - send init msg to PF * @accel_dev: Pointer to acceleration VF device. * - * Function sends an init messge from the VF to a PF + * Function sends an init message from the VF to a PF * * Return: 0 on success, error code otherwise. */ @@ -31,7 +31,7 @@ EXPORT_SYMBOL_GPL(adf_vf2pf_notify_init); * adf_vf2pf_notify_shutdown() - send shutdown msg to PF * @accel_dev: Pointer to acceleration VF device. * - * Function sends a shutdown messge from the VF to a PF + * Function sends a shutdown message from the VF to a PF * * Return: void */ diff --git a/drivers/crypto/qat/qat_common/adf_vf_isr.c b/drivers/crypto/qat/qat_common/adf_vf_isr.c index 024401ec9d1ae45b62671d722372990a3891b7c8..fa1b3a94155cca2c0b129b587f9b5df7a32162f4 100644 --- a/drivers/crypto/qat/qat_common/adf_vf_isr.c +++ b/drivers/crypto/qat/qat_common/adf_vf_isr.c @@ -79,6 +79,11 @@ static void adf_pf2vf_bh_handler(void *data) /* Read the message from PF */ msg = ADF_CSR_RD(pmisc_bar_addr, hw_data->get_pf2vf_offset(0)); + if (!(msg & ADF_PF2VF_INT)) { + dev_info(&GET_DEV(accel_dev), + "Spurious PF2VF interrupt, msg %X. Ignored\n", msg); + goto out; + } if (!(msg & ADF_PF2VF_MSGORIGIN_SYSTEM)) /* Ignore legacy non-system (non-kernel) PF2VF messages */ @@ -127,6 +132,7 @@ static void adf_pf2vf_bh_handler(void *data) msg &= ~ADF_PF2VF_INT; ADF_CSR_WR(pmisc_bar_addr, hw_data->get_pf2vf_offset(0), msg); +out: /* Re-enable PF2VF interrupts */ adf_enable_pf2vf_interrupts(accel_dev); return; diff --git a/drivers/crypto/qce/sha.c b/drivers/crypto/qce/sha.c index 87be96a0b0bba65f2f4c58dbb4994fa6fa42d027..8b4e79d882af45c6a9e1effa893f57becba194bc 100644 --- a/drivers/crypto/qce/sha.c +++ b/drivers/crypto/qce/sha.c @@ -533,8 +533,8 @@ static int qce_ahash_register_one(const struct qce_ahash_def *def, ret = crypto_register_ahash(alg); if (ret) { - kfree(tmpl); dev_err(qce->dev, "%s registration failed\n", base->cra_name); + kfree(tmpl); return ret; } diff --git a/drivers/crypto/qce/skcipher.c b/drivers/crypto/qce/skcipher.c index d8053789c8828a6dc6ac0e0a02ead50e3eacc248..89c7fc3efbd717511d72bbf7ef52333320641314 100644 --- a/drivers/crypto/qce/skcipher.c +++ b/drivers/crypto/qce/skcipher.c @@ -433,8 +433,8 @@ static int qce_skcipher_register_one(const struct qce_skcipher_def *def, ret = crypto_register_skcipher(alg); if (ret) { - kfree(tmpl); dev_err(qce->dev, "%s registration failed\n", alg->base.cra_name); + kfree(tmpl); return ret; } diff --git a/drivers/crypto/s5p-sss.c b/drivers/crypto/s5p-sss.c index 88a6c853ffd73f4ba70c025c93ae469e20f15a72..c4a7a2bba0866ea2314f2b36cdf4d6855e05e832 100644 --- a/drivers/crypto/s5p-sss.c +++ b/drivers/crypto/s5p-sss.c @@ -2173,6 +2173,8 @@ static int s5p_aes_probe(struct platform_device *pdev) variant = find_s5p_sss_version(pdev); res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) + return -EINVAL; /* * Note: HASH and PRNG uses the same registers in secss, avoid diff --git a/drivers/crypto/stm32/stm32-crc32.c b/drivers/crypto/stm32/stm32-crc32.c index 75867c0b00172a013edcb2bfea5070e785ec5557..be1bf39a317de15b335d6289bf556e7e1205371b 100644 --- a/drivers/crypto/stm32/stm32-crc32.c +++ b/drivers/crypto/stm32/stm32-crc32.c @@ -279,7 +279,7 @@ static struct shash_alg algs[] = { .digestsize = CHKSUM_DIGEST_SIZE, .base = { .cra_name = "crc32", - .cra_driver_name = DRIVER_NAME, + .cra_driver_name = "stm32-crc32-crc32", .cra_priority = 200, .cra_flags = CRYPTO_ALG_OPTIONAL_KEY, .cra_blocksize = CHKSUM_BLOCK_SIZE, @@ -301,7 +301,7 @@ static struct shash_alg algs[] = { .digestsize = CHKSUM_DIGEST_SIZE, .base = { .cra_name = "crc32c", - .cra_driver_name = DRIVER_NAME, + .cra_driver_name = "stm32-crc32-crc32c", .cra_priority = 200, .cra_flags = CRYPTO_ALG_OPTIONAL_KEY, .cra_blocksize = CHKSUM_BLOCK_SIZE, diff --git a/drivers/crypto/stm32/stm32-cryp.c b/drivers/crypto/stm32/stm32-cryp.c index 7999b26a16ed057217a570cfe780c29d0d5a229c..81eb136b6c11df0308969c9ee0ec91f696a0213f 100644 --- a/drivers/crypto/stm32/stm32-cryp.c +++ b/drivers/crypto/stm32/stm32-cryp.c @@ -37,7 +37,6 @@ /* Mode mask = bits [15..0] */ #define FLG_MODE_MASK GENMASK(15, 0) /* Bit [31..16] status */ -#define FLG_CCM_PADDED_WA BIT(16) /* Registers */ #define CRYP_CR 0x00000000 @@ -105,8 +104,6 @@ /* Misc */ #define AES_BLOCK_32 (AES_BLOCK_SIZE / sizeof(u32)) #define GCM_CTR_INIT 2 -#define _walked_in (cryp->in_walk.offset - cryp->in_sg->offset) -#define _walked_out (cryp->out_walk.offset - cryp->out_sg->offset) #define CRYP_AUTOSUSPEND_DELAY 50 struct stm32_cryp_caps { @@ -144,26 +141,16 @@ struct stm32_cryp { size_t authsize; size_t hw_blocksize; - size_t total_in; - size_t total_in_save; - size_t total_out; - size_t total_out_save; + size_t payload_in; + size_t header_in; + size_t payload_out; - struct scatterlist *in_sg; struct scatterlist *out_sg; - struct scatterlist *out_sg_save; - - struct scatterlist in_sgl; - struct scatterlist out_sgl; - bool sgs_copied; - - int in_sg_len; - int out_sg_len; struct scatter_walk in_walk; struct scatter_walk out_walk; - u32 last_ctr[4]; + __be32 last_ctr[4]; u32 gcm_ctr; }; @@ -262,6 +249,7 @@ static inline int stm32_cryp_wait_output(struct stm32_cryp *cryp) } static int stm32_cryp_read_auth_tag(struct stm32_cryp *cryp); +static void stm32_cryp_finish_req(struct stm32_cryp *cryp, int err); static struct stm32_cryp *stm32_cryp_find_dev(struct stm32_cryp_ctx *ctx) { @@ -283,103 +271,6 @@ static struct stm32_cryp *stm32_cryp_find_dev(struct stm32_cryp_ctx *ctx) return cryp; } -static int stm32_cryp_check_aligned(struct scatterlist *sg, size_t total, - size_t align) -{ - int len = 0; - - if (!total) - return 0; - - if (!IS_ALIGNED(total, align)) - return -EINVAL; - - while (sg) { - if (!IS_ALIGNED(sg->offset, sizeof(u32))) - return -EINVAL; - - if (!IS_ALIGNED(sg->length, align)) - return -EINVAL; - - len += sg->length; - sg = sg_next(sg); - } - - if (len != total) - return -EINVAL; - - return 0; -} - -static int stm32_cryp_check_io_aligned(struct stm32_cryp *cryp) -{ - int ret; - - ret = stm32_cryp_check_aligned(cryp->in_sg, cryp->total_in, - cryp->hw_blocksize); - if (ret) - return ret; - - ret = stm32_cryp_check_aligned(cryp->out_sg, cryp->total_out, - cryp->hw_blocksize); - - return ret; -} - -static void sg_copy_buf(void *buf, struct scatterlist *sg, - unsigned int start, unsigned int nbytes, int out) -{ - struct scatter_walk walk; - - if (!nbytes) - return; - - scatterwalk_start(&walk, sg); - scatterwalk_advance(&walk, start); - scatterwalk_copychunks(buf, &walk, nbytes, out); - scatterwalk_done(&walk, out, 0); -} - -static int stm32_cryp_copy_sgs(struct stm32_cryp *cryp) -{ - void *buf_in, *buf_out; - int pages, total_in, total_out; - - if (!stm32_cryp_check_io_aligned(cryp)) { - cryp->sgs_copied = 0; - return 0; - } - - total_in = ALIGN(cryp->total_in, cryp->hw_blocksize); - pages = total_in ? get_order(total_in) : 1; - buf_in = (void *)__get_free_pages(GFP_ATOMIC, pages); - - total_out = ALIGN(cryp->total_out, cryp->hw_blocksize); - pages = total_out ? get_order(total_out) : 1; - buf_out = (void *)__get_free_pages(GFP_ATOMIC, pages); - - if (!buf_in || !buf_out) { - dev_err(cryp->dev, "Can't allocate pages when unaligned\n"); - cryp->sgs_copied = 0; - return -EFAULT; - } - - sg_copy_buf(buf_in, cryp->in_sg, 0, cryp->total_in, 0); - - sg_init_one(&cryp->in_sgl, buf_in, total_in); - cryp->in_sg = &cryp->in_sgl; - cryp->in_sg_len = 1; - - sg_init_one(&cryp->out_sgl, buf_out, total_out); - cryp->out_sg_save = cryp->out_sg; - cryp->out_sg = &cryp->out_sgl; - cryp->out_sg_len = 1; - - cryp->sgs_copied = 1; - - return 0; -} - static void stm32_cryp_hw_write_iv(struct stm32_cryp *cryp, __be32 *iv) { if (!iv) @@ -481,16 +372,99 @@ static int stm32_cryp_gcm_init(struct stm32_cryp *cryp, u32 cfg) /* Wait for end of processing */ ret = stm32_cryp_wait_enable(cryp); - if (ret) + if (ret) { dev_err(cryp->dev, "Timeout (gcm init)\n"); + return ret; + } - return ret; + /* Prepare next phase */ + if (cryp->areq->assoclen) { + cfg |= CR_PH_HEADER; + stm32_cryp_write(cryp, CRYP_CR, cfg); + } else if (stm32_cryp_get_input_text_len(cryp)) { + cfg |= CR_PH_PAYLOAD; + stm32_cryp_write(cryp, CRYP_CR, cfg); + } + + return 0; +} + +static void stm32_crypt_gcmccm_end_header(struct stm32_cryp *cryp) +{ + u32 cfg; + int err; + + /* Check if whole header written */ + if (!cryp->header_in) { + /* Wait for completion */ + err = stm32_cryp_wait_busy(cryp); + if (err) { + dev_err(cryp->dev, "Timeout (gcm/ccm header)\n"); + stm32_cryp_write(cryp, CRYP_IMSCR, 0); + stm32_cryp_finish_req(cryp, err); + return; + } + + if (stm32_cryp_get_input_text_len(cryp)) { + /* Phase 3 : payload */ + cfg = stm32_cryp_read(cryp, CRYP_CR); + cfg &= ~CR_CRYPEN; + stm32_cryp_write(cryp, CRYP_CR, cfg); + + cfg &= ~CR_PH_MASK; + cfg |= CR_PH_PAYLOAD | CR_CRYPEN; + stm32_cryp_write(cryp, CRYP_CR, cfg); + } else { + /* + * Phase 4 : tag. + * Nothing to read, nothing to write, caller have to + * end request + */ + } + } +} + +static void stm32_cryp_write_ccm_first_header(struct stm32_cryp *cryp) +{ + unsigned int i; + size_t written; + size_t len; + u32 alen = cryp->areq->assoclen; + u32 block[AES_BLOCK_32] = {0}; + u8 *b8 = (u8 *)block; + + if (alen <= 65280) { + /* Write first u32 of B1 */ + b8[0] = (alen >> 8) & 0xFF; + b8[1] = alen & 0xFF; + len = 2; + } else { + /* Build the two first u32 of B1 */ + b8[0] = 0xFF; + b8[1] = 0xFE; + b8[2] = (alen & 0xFF000000) >> 24; + b8[3] = (alen & 0x00FF0000) >> 16; + b8[4] = (alen & 0x0000FF00) >> 8; + b8[5] = alen & 0x000000FF; + len = 6; + } + + written = min_t(size_t, AES_BLOCK_SIZE - len, alen); + + scatterwalk_copychunks((char *)block + len, &cryp->in_walk, written, 0); + for (i = 0; i < AES_BLOCK_32; i++) + stm32_cryp_write(cryp, CRYP_DIN, block[i]); + + cryp->header_in -= written; + + stm32_crypt_gcmccm_end_header(cryp); } static int stm32_cryp_ccm_init(struct stm32_cryp *cryp, u32 cfg) { int ret; - u8 iv[AES_BLOCK_SIZE], b0[AES_BLOCK_SIZE]; + u32 iv_32[AES_BLOCK_32], b0_32[AES_BLOCK_32]; + u8 *iv = (u8 *)iv_32, *b0 = (u8 *)b0_32; __be32 *bd; u32 *d; unsigned int i, textlen; @@ -531,10 +505,24 @@ static int stm32_cryp_ccm_init(struct stm32_cryp *cryp, u32 cfg) /* Wait for end of processing */ ret = stm32_cryp_wait_enable(cryp); - if (ret) + if (ret) { dev_err(cryp->dev, "Timeout (ccm init)\n"); + return ret; + } - return ret; + /* Prepare next phase */ + if (cryp->areq->assoclen) { + cfg |= CR_PH_HEADER | CR_CRYPEN; + stm32_cryp_write(cryp, CRYP_CR, cfg); + + /* Write first (special) block (may move to next phase [payload]) */ + stm32_cryp_write_ccm_first_header(cryp); + } else if (stm32_cryp_get_input_text_len(cryp)) { + cfg |= CR_PH_PAYLOAD; + stm32_cryp_write(cryp, CRYP_CR, cfg); + } + + return 0; } static int stm32_cryp_hw_init(struct stm32_cryp *cryp) @@ -542,7 +530,7 @@ static int stm32_cryp_hw_init(struct stm32_cryp *cryp) int ret; u32 cfg, hw_mode; - pm_runtime_resume_and_get(cryp->dev); + pm_runtime_get_sync(cryp->dev); /* Disable interrupt */ stm32_cryp_write(cryp, CRYP_IMSCR, 0); @@ -605,16 +593,6 @@ static int stm32_cryp_hw_init(struct stm32_cryp *cryp) if (ret) return ret; - /* Phase 2 : header (authenticated data) */ - if (cryp->areq->assoclen) { - cfg |= CR_PH_HEADER; - } else if (stm32_cryp_get_input_text_len(cryp)) { - cfg |= CR_PH_PAYLOAD; - stm32_cryp_write(cryp, CRYP_CR, cfg); - } else { - cfg |= CR_PH_INIT; - } - break; case CR_DES_CBC: @@ -633,8 +611,6 @@ static int stm32_cryp_hw_init(struct stm32_cryp *cryp) stm32_cryp_write(cryp, CRYP_CR, cfg); - cryp->flags &= ~FLG_CCM_PADDED_WA; - return 0; } @@ -644,28 +620,9 @@ static void stm32_cryp_finish_req(struct stm32_cryp *cryp, int err) /* Phase 4 : output tag */ err = stm32_cryp_read_auth_tag(cryp); - if (!err && (!(is_gcm(cryp) || is_ccm(cryp)))) + if (!err && (!(is_gcm(cryp) || is_ccm(cryp) || is_ecb(cryp)))) stm32_cryp_get_iv(cryp); - if (cryp->sgs_copied) { - void *buf_in, *buf_out; - int pages, len; - - buf_in = sg_virt(&cryp->in_sgl); - buf_out = sg_virt(&cryp->out_sgl); - - sg_copy_buf(buf_out, cryp->out_sg_save, 0, - cryp->total_out_save, 1); - - len = ALIGN(cryp->total_in_save, cryp->hw_blocksize); - pages = len ? get_order(len) : 1; - free_pages((unsigned long)buf_in, pages); - - len = ALIGN(cryp->total_out_save, cryp->hw_blocksize); - pages = len ? get_order(len) : 1; - free_pages((unsigned long)buf_out, pages); - } - pm_runtime_mark_last_busy(cryp->dev); pm_runtime_put_autosuspend(cryp->dev); @@ -674,8 +631,6 @@ static void stm32_cryp_finish_req(struct stm32_cryp *cryp, int err) else crypto_finalize_skcipher_request(cryp->engine, cryp->req, err); - - memset(cryp->ctx->key, 0, cryp->ctx->keylen); } static int stm32_cryp_cpu_start(struct stm32_cryp *cryp) @@ -801,7 +756,20 @@ static int stm32_cryp_aes_aead_setkey(struct crypto_aead *tfm, const u8 *key, static int stm32_cryp_aes_gcm_setauthsize(struct crypto_aead *tfm, unsigned int authsize) { - return authsize == AES_BLOCK_SIZE ? 0 : -EINVAL; + switch (authsize) { + case 4: + case 8: + case 12: + case 13: + case 14: + case 15: + case 16: + break; + default: + return -EINVAL; + } + + return 0; } static int stm32_cryp_aes_ccm_setauthsize(struct crypto_aead *tfm, @@ -825,31 +793,61 @@ static int stm32_cryp_aes_ccm_setauthsize(struct crypto_aead *tfm, static int stm32_cryp_aes_ecb_encrypt(struct skcipher_request *req) { + if (req->cryptlen % AES_BLOCK_SIZE) + return -EINVAL; + + if (req->cryptlen == 0) + return 0; + return stm32_cryp_crypt(req, FLG_AES | FLG_ECB | FLG_ENCRYPT); } static int stm32_cryp_aes_ecb_decrypt(struct skcipher_request *req) { + if (req->cryptlen % AES_BLOCK_SIZE) + return -EINVAL; + + if (req->cryptlen == 0) + return 0; + return stm32_cryp_crypt(req, FLG_AES | FLG_ECB); } static int stm32_cryp_aes_cbc_encrypt(struct skcipher_request *req) { + if (req->cryptlen % AES_BLOCK_SIZE) + return -EINVAL; + + if (req->cryptlen == 0) + return 0; + return stm32_cryp_crypt(req, FLG_AES | FLG_CBC | FLG_ENCRYPT); } static int stm32_cryp_aes_cbc_decrypt(struct skcipher_request *req) { + if (req->cryptlen % AES_BLOCK_SIZE) + return -EINVAL; + + if (req->cryptlen == 0) + return 0; + return stm32_cryp_crypt(req, FLG_AES | FLG_CBC); } static int stm32_cryp_aes_ctr_encrypt(struct skcipher_request *req) { + if (req->cryptlen == 0) + return 0; + return stm32_cryp_crypt(req, FLG_AES | FLG_CTR | FLG_ENCRYPT); } static int stm32_cryp_aes_ctr_decrypt(struct skcipher_request *req) { + if (req->cryptlen == 0) + return 0; + return stm32_cryp_crypt(req, FLG_AES | FLG_CTR); } @@ -863,53 +861,122 @@ static int stm32_cryp_aes_gcm_decrypt(struct aead_request *req) return stm32_cryp_aead_crypt(req, FLG_AES | FLG_GCM); } +static inline int crypto_ccm_check_iv(const u8 *iv) +{ + /* 2 <= L <= 8, so 1 <= L' <= 7. */ + if (iv[0] < 1 || iv[0] > 7) + return -EINVAL; + + return 0; +} + static int stm32_cryp_aes_ccm_encrypt(struct aead_request *req) { + int err; + + err = crypto_ccm_check_iv(req->iv); + if (err) + return err; + return stm32_cryp_aead_crypt(req, FLG_AES | FLG_CCM | FLG_ENCRYPT); } static int stm32_cryp_aes_ccm_decrypt(struct aead_request *req) { + int err; + + err = crypto_ccm_check_iv(req->iv); + if (err) + return err; + return stm32_cryp_aead_crypt(req, FLG_AES | FLG_CCM); } static int stm32_cryp_des_ecb_encrypt(struct skcipher_request *req) { + if (req->cryptlen % DES_BLOCK_SIZE) + return -EINVAL; + + if (req->cryptlen == 0) + return 0; + return stm32_cryp_crypt(req, FLG_DES | FLG_ECB | FLG_ENCRYPT); } static int stm32_cryp_des_ecb_decrypt(struct skcipher_request *req) { + if (req->cryptlen % DES_BLOCK_SIZE) + return -EINVAL; + + if (req->cryptlen == 0) + return 0; + return stm32_cryp_crypt(req, FLG_DES | FLG_ECB); } static int stm32_cryp_des_cbc_encrypt(struct skcipher_request *req) { + if (req->cryptlen % DES_BLOCK_SIZE) + return -EINVAL; + + if (req->cryptlen == 0) + return 0; + return stm32_cryp_crypt(req, FLG_DES | FLG_CBC | FLG_ENCRYPT); } static int stm32_cryp_des_cbc_decrypt(struct skcipher_request *req) { + if (req->cryptlen % DES_BLOCK_SIZE) + return -EINVAL; + + if (req->cryptlen == 0) + return 0; + return stm32_cryp_crypt(req, FLG_DES | FLG_CBC); } static int stm32_cryp_tdes_ecb_encrypt(struct skcipher_request *req) { + if (req->cryptlen % DES_BLOCK_SIZE) + return -EINVAL; + + if (req->cryptlen == 0) + return 0; + return stm32_cryp_crypt(req, FLG_TDES | FLG_ECB | FLG_ENCRYPT); } static int stm32_cryp_tdes_ecb_decrypt(struct skcipher_request *req) { + if (req->cryptlen % DES_BLOCK_SIZE) + return -EINVAL; + + if (req->cryptlen == 0) + return 0; + return stm32_cryp_crypt(req, FLG_TDES | FLG_ECB); } static int stm32_cryp_tdes_cbc_encrypt(struct skcipher_request *req) { + if (req->cryptlen % DES_BLOCK_SIZE) + return -EINVAL; + + if (req->cryptlen == 0) + return 0; + return stm32_cryp_crypt(req, FLG_TDES | FLG_CBC | FLG_ENCRYPT); } static int stm32_cryp_tdes_cbc_decrypt(struct skcipher_request *req) { + if (req->cryptlen % DES_BLOCK_SIZE) + return -EINVAL; + + if (req->cryptlen == 0) + return 0; + return stm32_cryp_crypt(req, FLG_TDES | FLG_CBC); } @@ -919,6 +986,7 @@ static int stm32_cryp_prepare_req(struct skcipher_request *req, struct stm32_cryp_ctx *ctx; struct stm32_cryp *cryp; struct stm32_cryp_reqctx *rctx; + struct scatterlist *in_sg; int ret; if (!req && !areq) @@ -944,76 +1012,55 @@ static int stm32_cryp_prepare_req(struct skcipher_request *req, if (req) { cryp->req = req; cryp->areq = NULL; - cryp->total_in = req->cryptlen; - cryp->total_out = cryp->total_in; + cryp->header_in = 0; + cryp->payload_in = req->cryptlen; + cryp->payload_out = req->cryptlen; + cryp->authsize = 0; } else { /* * Length of input and output data: * Encryption case: - * INPUT = AssocData || PlainText + * INPUT = AssocData || PlainText * <- assoclen -> <- cryptlen -> - * <------- total_in -----------> * - * OUTPUT = AssocData || CipherText || AuthTag - * <- assoclen -> <- cryptlen -> <- authsize -> - * <---------------- total_out -----------------> + * OUTPUT = AssocData || CipherText || AuthTag + * <- assoclen -> <-- cryptlen --> <- authsize -> * * Decryption case: - * INPUT = AssocData || CipherText || AuthTag - * <- assoclen -> <--------- cryptlen ---------> - * <- authsize -> - * <---------------- total_in ------------------> + * INPUT = AssocData || CipherTex || AuthTag + * <- assoclen ---> <---------- cryptlen ----------> * - * OUTPUT = AssocData || PlainText - * <- assoclen -> <- crypten - authsize -> - * <---------- total_out -----------------> + * OUTPUT = AssocData || PlainText + * <- assoclen -> <- cryptlen - authsize -> */ cryp->areq = areq; cryp->req = NULL; cryp->authsize = crypto_aead_authsize(crypto_aead_reqtfm(areq)); - cryp->total_in = areq->assoclen + areq->cryptlen; - if (is_encrypt(cryp)) - /* Append auth tag to output */ - cryp->total_out = cryp->total_in + cryp->authsize; - else - /* No auth tag in output */ - cryp->total_out = cryp->total_in - cryp->authsize; + if (is_encrypt(cryp)) { + cryp->payload_in = areq->cryptlen; + cryp->header_in = areq->assoclen; + cryp->payload_out = areq->cryptlen; + } else { + cryp->payload_in = areq->cryptlen - cryp->authsize; + cryp->header_in = areq->assoclen; + cryp->payload_out = cryp->payload_in; + } } - cryp->total_in_save = cryp->total_in; - cryp->total_out_save = cryp->total_out; + in_sg = req ? req->src : areq->src; + scatterwalk_start(&cryp->in_walk, in_sg); - cryp->in_sg = req ? req->src : areq->src; cryp->out_sg = req ? req->dst : areq->dst; - cryp->out_sg_save = cryp->out_sg; - - cryp->in_sg_len = sg_nents_for_len(cryp->in_sg, cryp->total_in); - if (cryp->in_sg_len < 0) { - dev_err(cryp->dev, "Cannot get in_sg_len\n"); - ret = cryp->in_sg_len; - return ret; - } - - cryp->out_sg_len = sg_nents_for_len(cryp->out_sg, cryp->total_out); - if (cryp->out_sg_len < 0) { - dev_err(cryp->dev, "Cannot get out_sg_len\n"); - ret = cryp->out_sg_len; - return ret; - } - - ret = stm32_cryp_copy_sgs(cryp); - if (ret) - return ret; - - scatterwalk_start(&cryp->in_walk, cryp->in_sg); scatterwalk_start(&cryp->out_walk, cryp->out_sg); if (is_gcm(cryp) || is_ccm(cryp)) { /* In output, jump after assoc data */ - scatterwalk_advance(&cryp->out_walk, cryp->areq->assoclen); - cryp->total_out -= cryp->areq->assoclen; + scatterwalk_copychunks(NULL, &cryp->out_walk, cryp->areq->assoclen, 2); } + if (is_ctr(cryp)) + memset(cryp->last_ctr, 0, sizeof(cryp->last_ctr)); + ret = stm32_cryp_hw_init(cryp); return ret; } @@ -1061,8 +1108,7 @@ static int stm32_cryp_aead_one_req(struct crypto_engine *engine, void *areq) if (!cryp) return -ENODEV; - if (unlikely(!cryp->areq->assoclen && - !stm32_cryp_get_input_text_len(cryp))) { + if (unlikely(!cryp->payload_in && !cryp->header_in)) { /* No input data to process: get tag and finish */ stm32_cryp_finish_req(cryp, 0); return 0; @@ -1071,43 +1117,10 @@ static int stm32_cryp_aead_one_req(struct crypto_engine *engine, void *areq) return stm32_cryp_cpu_start(cryp); } -static u32 *stm32_cryp_next_out(struct stm32_cryp *cryp, u32 *dst, - unsigned int n) -{ - scatterwalk_advance(&cryp->out_walk, n); - - if (unlikely(cryp->out_sg->length == _walked_out)) { - cryp->out_sg = sg_next(cryp->out_sg); - if (cryp->out_sg) { - scatterwalk_start(&cryp->out_walk, cryp->out_sg); - return (sg_virt(cryp->out_sg) + _walked_out); - } - } - - return (u32 *)((u8 *)dst + n); -} - -static u32 *stm32_cryp_next_in(struct stm32_cryp *cryp, u32 *src, - unsigned int n) -{ - scatterwalk_advance(&cryp->in_walk, n); - - if (unlikely(cryp->in_sg->length == _walked_in)) { - cryp->in_sg = sg_next(cryp->in_sg); - if (cryp->in_sg) { - scatterwalk_start(&cryp->in_walk, cryp->in_sg); - return (sg_virt(cryp->in_sg) + _walked_in); - } - } - - return (u32 *)((u8 *)src + n); -} - static int stm32_cryp_read_auth_tag(struct stm32_cryp *cryp) { - u32 cfg, size_bit, *dst, d32; - u8 *d8; - unsigned int i, j; + u32 cfg, size_bit; + unsigned int i; int ret = 0; /* Update Config */ @@ -1130,7 +1143,7 @@ static int stm32_cryp_read_auth_tag(struct stm32_cryp *cryp) stm32_cryp_write(cryp, CRYP_DIN, size_bit); size_bit = is_encrypt(cryp) ? cryp->areq->cryptlen : - cryp->areq->cryptlen - AES_BLOCK_SIZE; + cryp->areq->cryptlen - cryp->authsize; size_bit *= 8; if (cryp->caps->swap_final) size_bit = (__force u32)cpu_to_be32(size_bit); @@ -1139,11 +1152,9 @@ static int stm32_cryp_read_auth_tag(struct stm32_cryp *cryp) stm32_cryp_write(cryp, CRYP_DIN, size_bit); } else { /* CCM: write CTR0 */ - u8 iv[AES_BLOCK_SIZE]; - u32 *iv32 = (u32 *)iv; - __be32 *biv; - - biv = (void *)iv; + u32 iv32[AES_BLOCK_32]; + u8 *iv = (u8 *)iv32; + __be32 *biv = (__be32 *)iv32; memcpy(iv, cryp->areq->iv, AES_BLOCK_SIZE); memset(iv + AES_BLOCK_SIZE - 1 - iv[0], 0, iv[0] + 1); @@ -1165,39 +1176,18 @@ static int stm32_cryp_read_auth_tag(struct stm32_cryp *cryp) } if (is_encrypt(cryp)) { + u32 out_tag[AES_BLOCK_32]; + /* Get and write tag */ - dst = sg_virt(cryp->out_sg) + _walked_out; + for (i = 0; i < AES_BLOCK_32; i++) + out_tag[i] = stm32_cryp_read(cryp, CRYP_DOUT); - for (i = 0; i < AES_BLOCK_32; i++) { - if (cryp->total_out >= sizeof(u32)) { - /* Read a full u32 */ - *dst = stm32_cryp_read(cryp, CRYP_DOUT); - - dst = stm32_cryp_next_out(cryp, dst, - sizeof(u32)); - cryp->total_out -= sizeof(u32); - } else if (!cryp->total_out) { - /* Empty fifo out (data from input padding) */ - stm32_cryp_read(cryp, CRYP_DOUT); - } else { - /* Read less than an u32 */ - d32 = stm32_cryp_read(cryp, CRYP_DOUT); - d8 = (u8 *)&d32; - - for (j = 0; j < cryp->total_out; j++) { - *((u8 *)dst) = *(d8++); - dst = stm32_cryp_next_out(cryp, dst, 1); - } - cryp->total_out = 0; - } - } + scatterwalk_copychunks(out_tag, &cryp->out_walk, cryp->authsize, 1); } else { /* Get and check tag */ u32 in_tag[AES_BLOCK_32], out_tag[AES_BLOCK_32]; - scatterwalk_map_and_copy(in_tag, cryp->in_sg, - cryp->total_in_save - cryp->authsize, - cryp->authsize, 0); + scatterwalk_copychunks(in_tag, &cryp->in_walk, cryp->authsize, 0); for (i = 0; i < AES_BLOCK_32; i++) out_tag[i] = stm32_cryp_read(cryp, CRYP_DOUT); @@ -1217,115 +1207,59 @@ static void stm32_cryp_check_ctr_counter(struct stm32_cryp *cryp) { u32 cr; - if (unlikely(cryp->last_ctr[3] == 0xFFFFFFFF)) { - cryp->last_ctr[3] = 0; - cryp->last_ctr[2]++; - if (!cryp->last_ctr[2]) { - cryp->last_ctr[1]++; - if (!cryp->last_ctr[1]) - cryp->last_ctr[0]++; - } + if (unlikely(cryp->last_ctr[3] == cpu_to_be32(0xFFFFFFFF))) { + /* + * In this case, we need to increment manually the ctr counter, + * as HW doesn't handle the U32 carry. + */ + crypto_inc((u8 *)cryp->last_ctr, sizeof(cryp->last_ctr)); cr = stm32_cryp_read(cryp, CRYP_CR); stm32_cryp_write(cryp, CRYP_CR, cr & ~CR_CRYPEN); - stm32_cryp_hw_write_iv(cryp, (u32 *)cryp->last_ctr); + stm32_cryp_hw_write_iv(cryp, cryp->last_ctr); stm32_cryp_write(cryp, CRYP_CR, cr); } - cryp->last_ctr[0] = stm32_cryp_read(cryp, CRYP_IV0LR); - cryp->last_ctr[1] = stm32_cryp_read(cryp, CRYP_IV0RR); - cryp->last_ctr[2] = stm32_cryp_read(cryp, CRYP_IV1LR); - cryp->last_ctr[3] = stm32_cryp_read(cryp, CRYP_IV1RR); + /* The IV registers are BE */ + cryp->last_ctr[0] = cpu_to_be32(stm32_cryp_read(cryp, CRYP_IV0LR)); + cryp->last_ctr[1] = cpu_to_be32(stm32_cryp_read(cryp, CRYP_IV0RR)); + cryp->last_ctr[2] = cpu_to_be32(stm32_cryp_read(cryp, CRYP_IV1LR)); + cryp->last_ctr[3] = cpu_to_be32(stm32_cryp_read(cryp, CRYP_IV1RR)); } -static bool stm32_cryp_irq_read_data(struct stm32_cryp *cryp) +static void stm32_cryp_irq_read_data(struct stm32_cryp *cryp) { - unsigned int i, j; - u32 d32, *dst; - u8 *d8; - size_t tag_size; - - /* Do no read tag now (if any) */ - if (is_encrypt(cryp) && (is_gcm(cryp) || is_ccm(cryp))) - tag_size = cryp->authsize; - else - tag_size = 0; - - dst = sg_virt(cryp->out_sg) + _walked_out; + unsigned int i; + u32 block[AES_BLOCK_32]; - for (i = 0; i < cryp->hw_blocksize / sizeof(u32); i++) { - if (likely(cryp->total_out - tag_size >= sizeof(u32))) { - /* Read a full u32 */ - *dst = stm32_cryp_read(cryp, CRYP_DOUT); + for (i = 0; i < cryp->hw_blocksize / sizeof(u32); i++) + block[i] = stm32_cryp_read(cryp, CRYP_DOUT); - dst = stm32_cryp_next_out(cryp, dst, sizeof(u32)); - cryp->total_out -= sizeof(u32); - } else if (cryp->total_out == tag_size) { - /* Empty fifo out (data from input padding) */ - d32 = stm32_cryp_read(cryp, CRYP_DOUT); - } else { - /* Read less than an u32 */ - d32 = stm32_cryp_read(cryp, CRYP_DOUT); - d8 = (u8 *)&d32; - - for (j = 0; j < cryp->total_out - tag_size; j++) { - *((u8 *)dst) = *(d8++); - dst = stm32_cryp_next_out(cryp, dst, 1); - } - cryp->total_out = tag_size; - } - } - - return !(cryp->total_out - tag_size) || !cryp->total_in; + scatterwalk_copychunks(block, &cryp->out_walk, min_t(size_t, cryp->hw_blocksize, + cryp->payload_out), 1); + cryp->payload_out -= min_t(size_t, cryp->hw_blocksize, + cryp->payload_out); } static void stm32_cryp_irq_write_block(struct stm32_cryp *cryp) { - unsigned int i, j; - u32 *src; - u8 d8[4]; - size_t tag_size; - - /* Do no write tag (if any) */ - if (is_decrypt(cryp) && (is_gcm(cryp) || is_ccm(cryp))) - tag_size = cryp->authsize; - else - tag_size = 0; - - src = sg_virt(cryp->in_sg) + _walked_in; + unsigned int i; + u32 block[AES_BLOCK_32] = {0}; - for (i = 0; i < cryp->hw_blocksize / sizeof(u32); i++) { - if (likely(cryp->total_in - tag_size >= sizeof(u32))) { - /* Write a full u32 */ - stm32_cryp_write(cryp, CRYP_DIN, *src); + scatterwalk_copychunks(block, &cryp->in_walk, min_t(size_t, cryp->hw_blocksize, + cryp->payload_in), 0); + for (i = 0; i < cryp->hw_blocksize / sizeof(u32); i++) + stm32_cryp_write(cryp, CRYP_DIN, block[i]); - src = stm32_cryp_next_in(cryp, src, sizeof(u32)); - cryp->total_in -= sizeof(u32); - } else if (cryp->total_in == tag_size) { - /* Write padding data */ - stm32_cryp_write(cryp, CRYP_DIN, 0); - } else { - /* Write less than an u32 */ - memset(d8, 0, sizeof(u32)); - for (j = 0; j < cryp->total_in - tag_size; j++) { - d8[j] = *((u8 *)src); - src = stm32_cryp_next_in(cryp, src, 1); - } - - stm32_cryp_write(cryp, CRYP_DIN, *(u32 *)d8); - cryp->total_in = tag_size; - } - } + cryp->payload_in -= min_t(size_t, cryp->hw_blocksize, cryp->payload_in); } static void stm32_cryp_irq_write_gcm_padded_data(struct stm32_cryp *cryp) { int err; - u32 cfg, tmp[AES_BLOCK_32]; - size_t total_in_ori = cryp->total_in; - struct scatterlist *out_sg_ori = cryp->out_sg; + u32 cfg, block[AES_BLOCK_32] = {0}; unsigned int i; /* 'Special workaround' procedure described in the datasheet */ @@ -1350,18 +1284,25 @@ static void stm32_cryp_irq_write_gcm_padded_data(struct stm32_cryp *cryp) /* b) pad and write the last block */ stm32_cryp_irq_write_block(cryp); - cryp->total_in = total_in_ori; + /* wait end of process */ err = stm32_cryp_wait_output(cryp); if (err) { - dev_err(cryp->dev, "Timeout (write gcm header)\n"); + dev_err(cryp->dev, "Timeout (write gcm last data)\n"); return stm32_cryp_finish_req(cryp, err); } /* c) get and store encrypted data */ - stm32_cryp_irq_read_data(cryp); - scatterwalk_map_and_copy(tmp, out_sg_ori, - cryp->total_in_save - total_in_ori, - total_in_ori, 0); + /* + * Same code as stm32_cryp_irq_read_data(), but we want to store + * block value + */ + for (i = 0; i < cryp->hw_blocksize / sizeof(u32); i++) + block[i] = stm32_cryp_read(cryp, CRYP_DOUT); + + scatterwalk_copychunks(block, &cryp->out_walk, min_t(size_t, cryp->hw_blocksize, + cryp->payload_out), 1); + cryp->payload_out -= min_t(size_t, cryp->hw_blocksize, + cryp->payload_out); /* d) change mode back to AES GCM */ cfg &= ~CR_ALGO_MASK; @@ -1374,19 +1315,13 @@ static void stm32_cryp_irq_write_gcm_padded_data(struct stm32_cryp *cryp) stm32_cryp_write(cryp, CRYP_CR, cfg); /* f) write padded data */ - for (i = 0; i < AES_BLOCK_32; i++) { - if (cryp->total_in) - stm32_cryp_write(cryp, CRYP_DIN, tmp[i]); - else - stm32_cryp_write(cryp, CRYP_DIN, 0); - - cryp->total_in -= min_t(size_t, sizeof(u32), cryp->total_in); - } + for (i = 0; i < AES_BLOCK_32; i++) + stm32_cryp_write(cryp, CRYP_DIN, block[i]); /* g) Empty fifo out */ err = stm32_cryp_wait_output(cryp); if (err) { - dev_err(cryp->dev, "Timeout (write gcm header)\n"); + dev_err(cryp->dev, "Timeout (write gcm padded data)\n"); return stm32_cryp_finish_req(cryp, err); } @@ -1399,16 +1334,14 @@ static void stm32_cryp_irq_write_gcm_padded_data(struct stm32_cryp *cryp) static void stm32_cryp_irq_set_npblb(struct stm32_cryp *cryp) { - u32 cfg, payload_bytes; + u32 cfg; /* disable ip, set NPBLB and reneable ip */ cfg = stm32_cryp_read(cryp, CRYP_CR); cfg &= ~CR_CRYPEN; stm32_cryp_write(cryp, CRYP_CR, cfg); - payload_bytes = is_decrypt(cryp) ? cryp->total_in - cryp->authsize : - cryp->total_in; - cfg |= (cryp->hw_blocksize - payload_bytes) << CR_NBPBL_SHIFT; + cfg |= (cryp->hw_blocksize - cryp->payload_in) << CR_NBPBL_SHIFT; cfg |= CR_CRYPEN; stm32_cryp_write(cryp, CRYP_CR, cfg); } @@ -1417,13 +1350,11 @@ static void stm32_cryp_irq_write_ccm_padded_data(struct stm32_cryp *cryp) { int err = 0; u32 cfg, iv1tmp; - u32 cstmp1[AES_BLOCK_32], cstmp2[AES_BLOCK_32], tmp[AES_BLOCK_32]; - size_t last_total_out, total_in_ori = cryp->total_in; - struct scatterlist *out_sg_ori = cryp->out_sg; + u32 cstmp1[AES_BLOCK_32], cstmp2[AES_BLOCK_32]; + u32 block[AES_BLOCK_32] = {0}; unsigned int i; /* 'Special workaround' procedure described in the datasheet */ - cryp->flags |= FLG_CCM_PADDED_WA; /* a) disable ip */ stm32_cryp_write(cryp, CRYP_IMSCR, 0); @@ -1453,7 +1384,7 @@ static void stm32_cryp_irq_write_ccm_padded_data(struct stm32_cryp *cryp) /* b) pad and write the last block */ stm32_cryp_irq_write_block(cryp); - cryp->total_in = total_in_ori; + /* wait end of process */ err = stm32_cryp_wait_output(cryp); if (err) { dev_err(cryp->dev, "Timeout (wite ccm padded data)\n"); @@ -1461,13 +1392,16 @@ static void stm32_cryp_irq_write_ccm_padded_data(struct stm32_cryp *cryp) } /* c) get and store decrypted data */ - last_total_out = cryp->total_out; - stm32_cryp_irq_read_data(cryp); + /* + * Same code as stm32_cryp_irq_read_data(), but we want to store + * block value + */ + for (i = 0; i < cryp->hw_blocksize / sizeof(u32); i++) + block[i] = stm32_cryp_read(cryp, CRYP_DOUT); - memset(tmp, 0, sizeof(tmp)); - scatterwalk_map_and_copy(tmp, out_sg_ori, - cryp->total_out_save - last_total_out, - last_total_out, 0); + scatterwalk_copychunks(block, &cryp->out_walk, min_t(size_t, cryp->hw_blocksize, + cryp->payload_out), 1); + cryp->payload_out -= min_t(size_t, cryp->hw_blocksize, cryp->payload_out); /* d) Load again CRYP_CSGCMCCMxR */ for (i = 0; i < ARRAY_SIZE(cstmp2); i++) @@ -1484,10 +1418,10 @@ static void stm32_cryp_irq_write_ccm_padded_data(struct stm32_cryp *cryp) stm32_cryp_write(cryp, CRYP_CR, cfg); /* g) XOR and write padded data */ - for (i = 0; i < ARRAY_SIZE(tmp); i++) { - tmp[i] ^= cstmp1[i]; - tmp[i] ^= cstmp2[i]; - stm32_cryp_write(cryp, CRYP_DIN, tmp[i]); + for (i = 0; i < ARRAY_SIZE(block); i++) { + block[i] ^= cstmp1[i]; + block[i] ^= cstmp2[i]; + stm32_cryp_write(cryp, CRYP_DIN, block[i]); } /* h) wait for completion */ @@ -1501,30 +1435,34 @@ static void stm32_cryp_irq_write_ccm_padded_data(struct stm32_cryp *cryp) static void stm32_cryp_irq_write_data(struct stm32_cryp *cryp) { - if (unlikely(!cryp->total_in)) { + if (unlikely(!cryp->payload_in)) { dev_warn(cryp->dev, "No more data to process\n"); return; } - if (unlikely(cryp->total_in < AES_BLOCK_SIZE && + if (unlikely(cryp->payload_in < AES_BLOCK_SIZE && (stm32_cryp_get_hw_mode(cryp) == CR_AES_GCM) && is_encrypt(cryp))) { /* Padding for AES GCM encryption */ - if (cryp->caps->padding_wa) + if (cryp->caps->padding_wa) { /* Special case 1 */ - return stm32_cryp_irq_write_gcm_padded_data(cryp); + stm32_cryp_irq_write_gcm_padded_data(cryp); + return; + } /* Setting padding bytes (NBBLB) */ stm32_cryp_irq_set_npblb(cryp); } - if (unlikely((cryp->total_in - cryp->authsize < AES_BLOCK_SIZE) && + if (unlikely((cryp->payload_in < AES_BLOCK_SIZE) && (stm32_cryp_get_hw_mode(cryp) == CR_AES_CCM) && is_decrypt(cryp))) { /* Padding for AES CCM decryption */ - if (cryp->caps->padding_wa) + if (cryp->caps->padding_wa) { /* Special case 2 */ - return stm32_cryp_irq_write_ccm_padded_data(cryp); + stm32_cryp_irq_write_ccm_padded_data(cryp); + return; + } /* Setting padding bytes (NBBLB) */ stm32_cryp_irq_set_npblb(cryp); @@ -1536,192 +1474,60 @@ static void stm32_cryp_irq_write_data(struct stm32_cryp *cryp) stm32_cryp_irq_write_block(cryp); } -static void stm32_cryp_irq_write_gcm_header(struct stm32_cryp *cryp) +static void stm32_cryp_irq_write_gcmccm_header(struct stm32_cryp *cryp) { - int err; - unsigned int i, j; - u32 cfg, *src; - - src = sg_virt(cryp->in_sg) + _walked_in; - - for (i = 0; i < AES_BLOCK_32; i++) { - stm32_cryp_write(cryp, CRYP_DIN, *src); - - src = stm32_cryp_next_in(cryp, src, sizeof(u32)); - cryp->total_in -= min_t(size_t, sizeof(u32), cryp->total_in); - - /* Check if whole header written */ - if ((cryp->total_in_save - cryp->total_in) == - cryp->areq->assoclen) { - /* Write padding if needed */ - for (j = i + 1; j < AES_BLOCK_32; j++) - stm32_cryp_write(cryp, CRYP_DIN, 0); - - /* Wait for completion */ - err = stm32_cryp_wait_busy(cryp); - if (err) { - dev_err(cryp->dev, "Timeout (gcm header)\n"); - return stm32_cryp_finish_req(cryp, err); - } - - if (stm32_cryp_get_input_text_len(cryp)) { - /* Phase 3 : payload */ - cfg = stm32_cryp_read(cryp, CRYP_CR); - cfg &= ~CR_CRYPEN; - stm32_cryp_write(cryp, CRYP_CR, cfg); - - cfg &= ~CR_PH_MASK; - cfg |= CR_PH_PAYLOAD; - cfg |= CR_CRYPEN; - stm32_cryp_write(cryp, CRYP_CR, cfg); - } else { - /* Phase 4 : tag */ - stm32_cryp_write(cryp, CRYP_IMSCR, 0); - stm32_cryp_finish_req(cryp, 0); - } - - break; - } - - if (!cryp->total_in) - break; - } -} + unsigned int i; + u32 block[AES_BLOCK_32] = {0}; + size_t written; -static void stm32_cryp_irq_write_ccm_header(struct stm32_cryp *cryp) -{ - int err; - unsigned int i = 0, j, k; - u32 alen, cfg, *src; - u8 d8[4]; - - src = sg_virt(cryp->in_sg) + _walked_in; - alen = cryp->areq->assoclen; - - if (!_walked_in) { - if (cryp->areq->assoclen <= 65280) { - /* Write first u32 of B1 */ - d8[0] = (alen >> 8) & 0xFF; - d8[1] = alen & 0xFF; - d8[2] = *((u8 *)src); - src = stm32_cryp_next_in(cryp, src, 1); - d8[3] = *((u8 *)src); - src = stm32_cryp_next_in(cryp, src, 1); - - stm32_cryp_write(cryp, CRYP_DIN, *(u32 *)d8); - i++; - - cryp->total_in -= min_t(size_t, 2, cryp->total_in); - } else { - /* Build the two first u32 of B1 */ - d8[0] = 0xFF; - d8[1] = 0xFE; - d8[2] = alen & 0xFF000000; - d8[3] = alen & 0x00FF0000; - - stm32_cryp_write(cryp, CRYP_DIN, *(u32 *)d8); - i++; - - d8[0] = alen & 0x0000FF00; - d8[1] = alen & 0x000000FF; - d8[2] = *((u8 *)src); - src = stm32_cryp_next_in(cryp, src, 1); - d8[3] = *((u8 *)src); - src = stm32_cryp_next_in(cryp, src, 1); - - stm32_cryp_write(cryp, CRYP_DIN, *(u32 *)d8); - i++; - - cryp->total_in -= min_t(size_t, 2, cryp->total_in); - } - } + written = min_t(size_t, AES_BLOCK_SIZE, cryp->header_in); - /* Write next u32 */ - for (; i < AES_BLOCK_32; i++) { - /* Build an u32 */ - memset(d8, 0, sizeof(u32)); - for (k = 0; k < sizeof(u32); k++) { - d8[k] = *((u8 *)src); - src = stm32_cryp_next_in(cryp, src, 1); - - cryp->total_in -= min_t(size_t, 1, cryp->total_in); - if ((cryp->total_in_save - cryp->total_in) == alen) - break; - } + scatterwalk_copychunks(block, &cryp->in_walk, written, 0); + for (i = 0; i < AES_BLOCK_32; i++) + stm32_cryp_write(cryp, CRYP_DIN, block[i]); - stm32_cryp_write(cryp, CRYP_DIN, *(u32 *)d8); - - if ((cryp->total_in_save - cryp->total_in) == alen) { - /* Write padding if needed */ - for (j = i + 1; j < AES_BLOCK_32; j++) - stm32_cryp_write(cryp, CRYP_DIN, 0); - - /* Wait for completion */ - err = stm32_cryp_wait_busy(cryp); - if (err) { - dev_err(cryp->dev, "Timeout (ccm header)\n"); - return stm32_cryp_finish_req(cryp, err); - } - - if (stm32_cryp_get_input_text_len(cryp)) { - /* Phase 3 : payload */ - cfg = stm32_cryp_read(cryp, CRYP_CR); - cfg &= ~CR_CRYPEN; - stm32_cryp_write(cryp, CRYP_CR, cfg); - - cfg &= ~CR_PH_MASK; - cfg |= CR_PH_PAYLOAD; - cfg |= CR_CRYPEN; - stm32_cryp_write(cryp, CRYP_CR, cfg); - } else { - /* Phase 4 : tag */ - stm32_cryp_write(cryp, CRYP_IMSCR, 0); - stm32_cryp_finish_req(cryp, 0); - } + cryp->header_in -= written; - break; - } - } + stm32_crypt_gcmccm_end_header(cryp); } static irqreturn_t stm32_cryp_irq_thread(int irq, void *arg) { struct stm32_cryp *cryp = arg; u32 ph; + u32 it_mask = stm32_cryp_read(cryp, CRYP_IMSCR); if (cryp->irq_status & MISR_OUT) /* Output FIFO IRQ: read data */ - if (unlikely(stm32_cryp_irq_read_data(cryp))) { - /* All bytes processed, finish */ - stm32_cryp_write(cryp, CRYP_IMSCR, 0); - stm32_cryp_finish_req(cryp, 0); - return IRQ_HANDLED; - } + stm32_cryp_irq_read_data(cryp); if (cryp->irq_status & MISR_IN) { - if (is_gcm(cryp)) { + if (is_gcm(cryp) || is_ccm(cryp)) { ph = stm32_cryp_read(cryp, CRYP_CR) & CR_PH_MASK; if (unlikely(ph == CR_PH_HEADER)) /* Write Header */ - stm32_cryp_irq_write_gcm_header(cryp); - else - /* Input FIFO IRQ: write data */ - stm32_cryp_irq_write_data(cryp); - cryp->gcm_ctr++; - } else if (is_ccm(cryp)) { - ph = stm32_cryp_read(cryp, CRYP_CR) & CR_PH_MASK; - if (unlikely(ph == CR_PH_HEADER)) - /* Write Header */ - stm32_cryp_irq_write_ccm_header(cryp); + stm32_cryp_irq_write_gcmccm_header(cryp); else /* Input FIFO IRQ: write data */ stm32_cryp_irq_write_data(cryp); + if (is_gcm(cryp)) + cryp->gcm_ctr++; } else { /* Input FIFO IRQ: write data */ stm32_cryp_irq_write_data(cryp); } } + /* Mask useless interrupts */ + if (!cryp->payload_in && !cryp->header_in) + it_mask &= ~IMSCR_IN; + if (!cryp->payload_out) + it_mask &= ~IMSCR_OUT; + stm32_cryp_write(cryp, CRYP_IMSCR, it_mask); + + if (!cryp->payload_in && !cryp->header_in && !cryp->payload_out) + stm32_cryp_finish_req(cryp, 0); + return IRQ_HANDLED; } @@ -1742,7 +1548,7 @@ static struct skcipher_alg crypto_algs[] = { .base.cra_flags = CRYPTO_ALG_ASYNC, .base.cra_blocksize = AES_BLOCK_SIZE, .base.cra_ctxsize = sizeof(struct stm32_cryp_ctx), - .base.cra_alignmask = 0xf, + .base.cra_alignmask = 0, .base.cra_module = THIS_MODULE, .init = stm32_cryp_init_tfm, @@ -1759,7 +1565,7 @@ static struct skcipher_alg crypto_algs[] = { .base.cra_flags = CRYPTO_ALG_ASYNC, .base.cra_blocksize = AES_BLOCK_SIZE, .base.cra_ctxsize = sizeof(struct stm32_cryp_ctx), - .base.cra_alignmask = 0xf, + .base.cra_alignmask = 0, .base.cra_module = THIS_MODULE, .init = stm32_cryp_init_tfm, @@ -1777,7 +1583,7 @@ static struct skcipher_alg crypto_algs[] = { .base.cra_flags = CRYPTO_ALG_ASYNC, .base.cra_blocksize = 1, .base.cra_ctxsize = sizeof(struct stm32_cryp_ctx), - .base.cra_alignmask = 0xf, + .base.cra_alignmask = 0, .base.cra_module = THIS_MODULE, .init = stm32_cryp_init_tfm, @@ -1795,7 +1601,7 @@ static struct skcipher_alg crypto_algs[] = { .base.cra_flags = CRYPTO_ALG_ASYNC, .base.cra_blocksize = DES_BLOCK_SIZE, .base.cra_ctxsize = sizeof(struct stm32_cryp_ctx), - .base.cra_alignmask = 0xf, + .base.cra_alignmask = 0, .base.cra_module = THIS_MODULE, .init = stm32_cryp_init_tfm, @@ -1812,7 +1618,7 @@ static struct skcipher_alg crypto_algs[] = { .base.cra_flags = CRYPTO_ALG_ASYNC, .base.cra_blocksize = DES_BLOCK_SIZE, .base.cra_ctxsize = sizeof(struct stm32_cryp_ctx), - .base.cra_alignmask = 0xf, + .base.cra_alignmask = 0, .base.cra_module = THIS_MODULE, .init = stm32_cryp_init_tfm, @@ -1830,7 +1636,7 @@ static struct skcipher_alg crypto_algs[] = { .base.cra_flags = CRYPTO_ALG_ASYNC, .base.cra_blocksize = DES_BLOCK_SIZE, .base.cra_ctxsize = sizeof(struct stm32_cryp_ctx), - .base.cra_alignmask = 0xf, + .base.cra_alignmask = 0, .base.cra_module = THIS_MODULE, .init = stm32_cryp_init_tfm, @@ -1847,7 +1653,7 @@ static struct skcipher_alg crypto_algs[] = { .base.cra_flags = CRYPTO_ALG_ASYNC, .base.cra_blocksize = DES_BLOCK_SIZE, .base.cra_ctxsize = sizeof(struct stm32_cryp_ctx), - .base.cra_alignmask = 0xf, + .base.cra_alignmask = 0, .base.cra_module = THIS_MODULE, .init = stm32_cryp_init_tfm, @@ -1877,7 +1683,7 @@ static struct aead_alg aead_algs[] = { .cra_flags = CRYPTO_ALG_ASYNC, .cra_blocksize = 1, .cra_ctxsize = sizeof(struct stm32_cryp_ctx), - .cra_alignmask = 0xf, + .cra_alignmask = 0, .cra_module = THIS_MODULE, }, }, @@ -1897,7 +1703,7 @@ static struct aead_alg aead_algs[] = { .cra_flags = CRYPTO_ALG_ASYNC, .cra_blocksize = 1, .cra_ctxsize = sizeof(struct stm32_cryp_ctx), - .cra_alignmask = 0xf, + .cra_alignmask = 0, .cra_module = THIS_MODULE, }, }, @@ -2025,8 +1831,6 @@ err_engine1: list_del(&cryp->list); spin_unlock(&cryp_list.lock); - pm_runtime_disable(dev); - pm_runtime_put_noidle(dev); pm_runtime_disable(dev); pm_runtime_put_noidle(dev); diff --git a/drivers/crypto/stm32/stm32-hash.c b/drivers/crypto/stm32/stm32-hash.c index ff5362da118d8c294f5a5a7bc2feb4b4feddb520..16bb52836b28dbb6095dddd113f774db7fd5cb28 100644 --- a/drivers/crypto/stm32/stm32-hash.c +++ b/drivers/crypto/stm32/stm32-hash.c @@ -812,7 +812,7 @@ static void stm32_hash_finish_req(struct ahash_request *req, int err) static int stm32_hash_hw_init(struct stm32_hash_dev *hdev, struct stm32_hash_request_ctx *rctx) { - pm_runtime_resume_and_get(hdev->dev); + pm_runtime_get_sync(hdev->dev); if (!(HASH_FLAGS_INIT & hdev->flags)) { stm32_hash_write(hdev, HASH_CR, HASH_CR_INIT); @@ -961,7 +961,7 @@ static int stm32_hash_export(struct ahash_request *req, void *out) u32 *preg; unsigned int i; - pm_runtime_resume_and_get(hdev->dev); + pm_runtime_get_sync(hdev->dev); while ((stm32_hash_read(hdev, HASH_SR) & HASH_SR_BUSY)) cpu_relax(); @@ -999,7 +999,7 @@ static int stm32_hash_import(struct ahash_request *req, const void *in) preg = rctx->hw_context; - pm_runtime_resume_and_get(hdev->dev); + pm_runtime_get_sync(hdev->dev); stm32_hash_write(hdev, HASH_IMR, *preg++); stm32_hash_write(hdev, HASH_STR, *preg++); diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c index d9948d58b3f4ca79d93bf599a4d87e5e48037a9a..a44db03329391395278c889d0ac6476ebc6baf1f 100644 --- a/drivers/dma-buf/dma-buf.c +++ b/drivers/dma-buf/dma-buf.c @@ -104,6 +104,7 @@ static void dma_buf_release(struct dentry *dentry) if (dmabuf->resv == (struct dma_resv *)&dmabuf[1]) dma_resv_fini(dmabuf->resv); + WARN_ON(!list_empty(&dmabuf->attachments)); module_put(dmabuf->owner); kfree(dmabuf->name); kfree(dmabuf); @@ -1399,6 +1400,7 @@ static int dma_buf_debug_show(struct seq_file *s, void *unused) if (ret) goto error_unlock; + spin_lock(&buf_obj->name_lock); seq_printf(s, "%08zu\t%08x\t%08x\t%08ld\t%s\t%08lu\t%s\n", buf_obj->size, buf_obj->file->f_flags, buf_obj->file->f_mode, @@ -1406,6 +1408,7 @@ static int dma_buf_debug_show(struct seq_file *s, void *unused) buf_obj->exp_name, file_inode(buf_obj->file)->i_ino, buf_obj->name ?: ""); + spin_unlock(&buf_obj->name_lock); robj = buf_obj->resv; while (true) { diff --git a/drivers/dma-buf/dma-fence-array.c b/drivers/dma-buf/dma-fence-array.c index d3fbd950be944f7a3bc7c2d231a0573cea5eac70..3e07f961e2f3d75af19b0e88f9346318fc3e6eb2 100644 --- a/drivers/dma-buf/dma-fence-array.c +++ b/drivers/dma-buf/dma-fence-array.c @@ -104,7 +104,11 @@ static bool dma_fence_array_signaled(struct dma_fence *fence) { struct dma_fence_array *array = to_dma_fence_array(fence); - return atomic_read(&array->num_pending) <= 0; + if (atomic_read(&array->num_pending) > 0) + return false; + + dma_fence_array_clear_pending_error(array); + return true; } static void dma_fence_array_release(struct dma_fence *fence) diff --git a/drivers/dma-buf/dma-heap.c b/drivers/dma-buf/dma-heap.c index 4fb22001b2ca9be72ddf65877f84401663c76dec..26d59b48f421cec0b206820f7fdde728a6c51436 100644 --- a/drivers/dma-buf/dma-heap.c +++ b/drivers/dma-buf/dma-heap.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -172,6 +173,7 @@ static long dma_heap_ioctl(struct file *file, unsigned int ucmd, if (nr >= ARRAY_SIZE(dma_heap_ioctl_cmds)) return -EINVAL; + nr = array_index_nospec(nr, ARRAY_SIZE(dma_heap_ioctl_cmds)); /* Get the kernel ioctl cmd that matches */ kcmd = dma_heap_ioctl_cmds[nr]; diff --git a/drivers/dma-buf/heaps/page_pool.c b/drivers/dma-buf/heaps/page_pool.c index 7c34a1ba48bba38176d164693450882aa2034b0c..b79e737bac957b781c18af758a95c8b553136ac5 100644 --- a/drivers/dma-buf/heaps/page_pool.c +++ b/drivers/dma-buf/heaps/page_pool.c @@ -44,9 +44,9 @@ static void dmabuf_page_pool_add(struct dmabuf_page_pool *pool, struct page *pag mutex_lock(&pool->mutex); list_add_tail(&page->lru, &pool->items[index]); pool->count[index]++; - mutex_unlock(&pool->mutex); mod_node_page_state(page_pgdat(page), NR_KERNEL_MISC_RECLAIMABLE, 1 << pool->order); + mutex_unlock(&pool->mutex); } static struct page *dmabuf_page_pool_remove(struct dmabuf_page_pool *pool, int index) diff --git a/drivers/dma-buf/heaps/system_heap.c b/drivers/dma-buf/heaps/system_heap.c index 5f71a73a00a446661f3d555b20e7f5dc2e383473..bbca2e195b4f4be5098ecc1e10d0e01eba10b2ca 100644 --- a/drivers/dma-buf/heaps/system_heap.c +++ b/drivers/dma-buf/heaps/system_heap.c @@ -49,11 +49,12 @@ struct dma_heap_attachment { bool uncached; }; +#define LOW_ORDER_GFP (GFP_HIGHUSER | __GFP_ZERO | __GFP_COMP) +#define MID_ORDER_GFP (LOW_ORDER_GFP | __GFP_NOWARN) #define HIGH_ORDER_GFP (((GFP_HIGHUSER | __GFP_ZERO | __GFP_NOWARN \ | __GFP_NORETRY) & ~__GFP_RECLAIM) \ | __GFP_COMP) -#define LOW_ORDER_GFP (GFP_HIGHUSER | __GFP_ZERO | __GFP_COMP) -static gfp_t order_flags[] = {HIGH_ORDER_GFP, LOW_ORDER_GFP, LOW_ORDER_GFP}; +static gfp_t order_flags[] = {HIGH_ORDER_GFP, MID_ORDER_GFP, LOW_ORDER_GFP}; /* * The selection of the orders used for allocation (1MB, 64K, 4K) is designed * to match with the sizes often found in IOMMUs. Using order 4 pages instead @@ -337,7 +338,7 @@ static void system_heap_buf_free(struct deferred_freelist_item *item, reason = DF_UNDER_PRESSURE; // On failure, just free table = &buffer->sg_table; - for_each_sg(table->sgl, sg, table->nents, i) { + for_each_sgtable_sg(table, sg, i) { struct page *page = sg_page(sg); if (reason == DF_UNDER_PRESSURE) { diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c index 3b53115db2686a773f7958c70033c1c9db2ec70f..90afba0b36fe97514b661d8489be85b20b95966e 100644 --- a/drivers/dma/at_xdmac.c +++ b/drivers/dma/at_xdmac.c @@ -89,6 +89,7 @@ #define AT_XDMAC_CNDC_NDE (0x1 << 0) /* Channel x Next Descriptor Enable */ #define AT_XDMAC_CNDC_NDSUP (0x1 << 1) /* Channel x Next Descriptor Source Update */ #define AT_XDMAC_CNDC_NDDUP (0x1 << 2) /* Channel x Next Descriptor Destination Update */ +#define AT_XDMAC_CNDC_NDVIEW_MASK GENMASK(28, 27) #define AT_XDMAC_CNDC_NDVIEW_NDV0 (0x0 << 3) /* Channel x Next Descriptor View 0 */ #define AT_XDMAC_CNDC_NDVIEW_NDV1 (0x1 << 3) /* Channel x Next Descriptor View 1 */ #define AT_XDMAC_CNDC_NDVIEW_NDV2 (0x2 << 3) /* Channel x Next Descriptor View 2 */ @@ -145,7 +146,7 @@ #define AT_XDMAC_CC_WRIP (0x1 << 23) /* Write in Progress (read only) */ #define AT_XDMAC_CC_WRIP_DONE (0x0 << 23) #define AT_XDMAC_CC_WRIP_IN_PROGRESS (0x1 << 23) -#define AT_XDMAC_CC_PERID(i) (0x7f & (i) << 24) /* Channel Peripheral Identifier */ +#define AT_XDMAC_CC_PERID(i) ((0x7f & (i)) << 24) /* Channel Peripheral Identifier */ #define AT_XDMAC_CDS_MSP 0x2C /* Channel Data Stride Memory Set Pattern */ #define AT_XDMAC_CSUS 0x30 /* Channel Source Microblock Stride */ #define AT_XDMAC_CDUS 0x34 /* Channel Destination Microblock Stride */ @@ -220,15 +221,15 @@ struct at_xdmac { /* Linked List Descriptor */ struct at_xdmac_lld { - dma_addr_t mbr_nda; /* Next Descriptor Member */ - u32 mbr_ubc; /* Microblock Control Member */ - dma_addr_t mbr_sa; /* Source Address Member */ - dma_addr_t mbr_da; /* Destination Address Member */ - u32 mbr_cfg; /* Configuration Register */ - u32 mbr_bc; /* Block Control Register */ - u32 mbr_ds; /* Data Stride Register */ - u32 mbr_sus; /* Source Microblock Stride Register */ - u32 mbr_dus; /* Destination Microblock Stride Register */ + u32 mbr_nda; /* Next Descriptor Member */ + u32 mbr_ubc; /* Microblock Control Member */ + u32 mbr_sa; /* Source Address Member */ + u32 mbr_da; /* Destination Address Member */ + u32 mbr_cfg; /* Configuration Register */ + u32 mbr_bc; /* Block Control Register */ + u32 mbr_ds; /* Data Stride Register */ + u32 mbr_sus; /* Source Microblock Stride Register */ + u32 mbr_dus; /* Destination Microblock Stride Register */ }; /* 64-bit alignment needed to update CNDA and CUBC registers in an atomic way. */ @@ -338,9 +339,6 @@ static void at_xdmac_start_xfer(struct at_xdmac_chan *atchan, dev_vdbg(chan2dev(&atchan->chan), "%s: desc 0x%p\n", __func__, first); - if (at_xdmac_chan_is_enabled(atchan)) - return; - /* Set transfer as active to not try to start it again. */ first->active_xfer = true; @@ -356,7 +354,8 @@ static void at_xdmac_start_xfer(struct at_xdmac_chan *atchan, */ if (at_xdmac_chan_is_cyclic(atchan)) reg = AT_XDMAC_CNDC_NDVIEW_NDV1; - else if (first->lld.mbr_ubc & AT_XDMAC_MBR_UBC_NDV3) + else if ((first->lld.mbr_ubc & + AT_XDMAC_CNDC_NDVIEW_MASK) == AT_XDMAC_MBR_UBC_NDV3) reg = AT_XDMAC_CNDC_NDVIEW_NDV3; else reg = AT_XDMAC_CNDC_NDVIEW_NDV2; @@ -427,13 +426,12 @@ static dma_cookie_t at_xdmac_tx_submit(struct dma_async_tx_descriptor *tx) spin_lock_irqsave(&atchan->lock, irqflags); cookie = dma_cookie_assign(tx); + list_add_tail(&desc->xfer_node, &atchan->xfers_list); + spin_unlock_irqrestore(&atchan->lock, irqflags); + dev_vdbg(chan2dev(tx->chan), "%s: atchan 0x%p, add desc 0x%p to xfers_list\n", __func__, atchan, desc); - list_add_tail(&desc->xfer_node, &atchan->xfers_list); - if (list_is_singular(&atchan->xfers_list)) - at_xdmac_start_xfer(atchan, desc); - spin_unlock_irqrestore(&atchan->lock, irqflags); return cookie; } @@ -1563,14 +1561,17 @@ static void at_xdmac_handle_cyclic(struct at_xdmac_chan *atchan) struct at_xdmac_desc *desc; struct dma_async_tx_descriptor *txd; - if (!list_empty(&atchan->xfers_list)) { - desc = list_first_entry(&atchan->xfers_list, - struct at_xdmac_desc, xfer_node); - txd = &desc->tx_dma_desc; - - if (txd->flags & DMA_PREP_INTERRUPT) - dmaengine_desc_get_callback_invoke(txd, NULL); + spin_lock_irq(&atchan->lock); + if (list_empty(&atchan->xfers_list)) { + spin_unlock_irq(&atchan->lock); + return; } + desc = list_first_entry(&atchan->xfers_list, struct at_xdmac_desc, + xfer_node); + spin_unlock_irq(&atchan->lock); + txd = &desc->tx_dma_desc; + if (txd->flags & DMA_PREP_INTERRUPT) + dmaengine_desc_get_callback_invoke(txd, NULL); } static void at_xdmac_handle_error(struct at_xdmac_chan *atchan) @@ -1724,11 +1725,9 @@ static void at_xdmac_issue_pending(struct dma_chan *chan) dev_dbg(chan2dev(&atchan->chan), "%s\n", __func__); - if (!at_xdmac_chan_is_cyclic(atchan)) { - spin_lock_irqsave(&atchan->lock, flags); - at_xdmac_advance_work(atchan); - spin_unlock_irqrestore(&atchan->lock, flags); - } + spin_lock_irqsave(&atchan->lock, flags); + at_xdmac_advance_work(atchan); + spin_unlock_irqrestore(&atchan->lock, flags); return; } diff --git a/drivers/dma/dmaengine.h b/drivers/dma/dmaengine.h index 1bfbd64b13717e0f7e1136c191986fc6384c99ac..53f16d3f002943c609ffe7027213e8dae23a25ab 100644 --- a/drivers/dma/dmaengine.h +++ b/drivers/dma/dmaengine.h @@ -176,7 +176,7 @@ dmaengine_desc_get_callback_invoke(struct dma_async_tx_descriptor *tx, static inline bool dmaengine_desc_callback_valid(struct dmaengine_desc_callback *cb) { - return (cb->callback) ? true : false; + return cb->callback || cb->callback_result; } struct dma_chan *dma_get_slave_channel(struct dma_chan *chan); diff --git a/drivers/dma/mmp_pdma.c b/drivers/dma/mmp_pdma.c index b84303be8edf55d5ed1dfdf80e5705d8f30fc918..4eb63f1ad224765d690256e9b5eef55d0abe50c7 100644 --- a/drivers/dma/mmp_pdma.c +++ b/drivers/dma/mmp_pdma.c @@ -728,12 +728,6 @@ static int mmp_pdma_config_write(struct dma_chan *dchan, chan->dir = direction; chan->dev_addr = addr; - /* FIXME: drivers should be ported over to use the filter - * function. Once that's done, the following two lines can - * be removed. - */ - if (cfg->slave_id) - chan->drcmr = cfg->slave_id; return 0; } diff --git a/drivers/dma/pxa_dma.c b/drivers/dma/pxa_dma.c index 349fb312c8725678a2a1268a4371047dbcb2ed11..b4ef4f19f7decb9c007a3ecfa369ba974fd1faca 100644 --- a/drivers/dma/pxa_dma.c +++ b/drivers/dma/pxa_dma.c @@ -911,13 +911,6 @@ static void pxad_get_config(struct pxad_chan *chan, *dcmd |= PXA_DCMD_BURST16; else if (maxburst == 32) *dcmd |= PXA_DCMD_BURST32; - - /* FIXME: drivers should be ported over to use the filter - * function. Once that's done, the following two lines can - * be removed. - */ - if (chan->cfg.slave_id) - chan->drcmr = chan->cfg.slave_id; } static struct dma_async_tx_descriptor * diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c index 991a7b5da29f085625356abf60344f4e8be8fc20..7c268d1bd205071d70f638d915c05f50fedd5cb2 100644 --- a/drivers/dma/sh/rcar-dmac.c +++ b/drivers/dma/sh/rcar-dmac.c @@ -1844,8 +1844,13 @@ static int rcar_dmac_probe(struct platform_device *pdev) dmac->dev = &pdev->dev; platform_set_drvdata(pdev, dmac); - dma_set_max_seg_size(dmac->dev, RCAR_DMATCR_MASK); - dma_set_mask_and_coherent(dmac->dev, DMA_BIT_MASK(40)); + ret = dma_set_max_seg_size(dmac->dev, RCAR_DMATCR_MASK); + if (ret) + return ret; + + ret = dma_set_mask_and_coherent(dmac->dev, DMA_BIT_MASK(40)); + if (ret) + return ret; ret = rcar_dmac_parse_of(&pdev->dev, dmac); if (ret < 0) diff --git a/drivers/dma/sh/shdma-base.c b/drivers/dma/sh/shdma-base.c index 7f72b3f4cd1aefd083e1c8d8017d682820091cf0..19ac95c0098f0f7a245c6657ee4af07dd9996f97 100644 --- a/drivers/dma/sh/shdma-base.c +++ b/drivers/dma/sh/shdma-base.c @@ -115,8 +115,10 @@ static dma_cookie_t shdma_tx_submit(struct dma_async_tx_descriptor *tx) ret = pm_runtime_get(schan->dev); spin_unlock_irq(&schan->chan_lock); - if (ret < 0) + if (ret < 0) { dev_err(schan->dev, "%s(): GET = %d\n", __func__, ret); + pm_runtime_put(schan->dev); + } pm_runtime_barrier(schan->dev); diff --git a/drivers/dma/st_fdma.c b/drivers/dma/st_fdma.c index 962b6e05287b5f46f03dcb7df7a0f56cf9c7c80c..d95c421877fb7361b1da14c8ed8a93e34faea985 100644 --- a/drivers/dma/st_fdma.c +++ b/drivers/dma/st_fdma.c @@ -874,4 +874,4 @@ MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("STMicroelectronics FDMA engine driver"); MODULE_AUTHOR("Ludovic.barre "); MODULE_AUTHOR("Peter Griffin "); -MODULE_ALIAS("platform: " DRIVER_NAME); +MODULE_ALIAS("platform:" DRIVER_NAME); diff --git a/drivers/dma/stm32-dmamux.c b/drivers/dma/stm32-dmamux.c index bddd3b23f33fc8dc54ff25b1667ae1d4ad93297a..f04bcffd3c24a847ca2a88a80baf1c0345a04f14 100644 --- a/drivers/dma/stm32-dmamux.c +++ b/drivers/dma/stm32-dmamux.c @@ -292,10 +292,12 @@ static int stm32_dmamux_probe(struct platform_device *pdev) ret = of_dma_router_register(node, stm32_dmamux_route_allocate, &stm32_dmamux->dmarouter); if (ret) - goto err_clk; + goto pm_disable; return 0; +pm_disable: + pm_runtime_disable(&pdev->dev); err_clk: clk_disable_unprepare(stm32_dmamux->clk); diff --git a/drivers/dma/stm32-mdma.c b/drivers/dma/stm32-mdma.c index 9d473923712adab957e8dcbc200d496b2c786c37..fe36738f2dd7e35f71b46669e6d1bfd761236add 100644 --- a/drivers/dma/stm32-mdma.c +++ b/drivers/dma/stm32-mdma.c @@ -184,7 +184,7 @@ #define STM32_MDMA_CTBR(x) (0x68 + 0x40 * (x)) #define STM32_MDMA_CTBR_DBUS BIT(17) #define STM32_MDMA_CTBR_SBUS BIT(16) -#define STM32_MDMA_CTBR_TSEL_MASK GENMASK(7, 0) +#define STM32_MDMA_CTBR_TSEL_MASK GENMASK(5, 0) #define STM32_MDMA_CTBR_TSEL(n) STM32_MDMA_SET(n, \ STM32_MDMA_CTBR_TSEL_MASK) diff --git a/drivers/dma/uniphier-xdmac.c b/drivers/dma/uniphier-xdmac.c index d6b8a202474f4a8f817da25fb689701538fa7460..290836b7e1be2b9ac86dbd437038d8c828a0832e 100644 --- a/drivers/dma/uniphier-xdmac.c +++ b/drivers/dma/uniphier-xdmac.c @@ -131,8 +131,9 @@ uniphier_xdmac_next_desc(struct uniphier_xdmac_chan *xc) static void uniphier_xdmac_chan_start(struct uniphier_xdmac_chan *xc, struct uniphier_xdmac_desc *xd) { - u32 src_mode, src_addr, src_width; - u32 dst_mode, dst_addr, dst_width; + u32 src_mode, src_width; + u32 dst_mode, dst_width; + dma_addr_t src_addr, dst_addr; u32 val, its, tnum; enum dma_slave_buswidth buswidth; diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c index e91cf1147a4e0f4740a65f39560c35bc5a5ecec0..be38fd71f731a5c4989cff9a340734e4d552adba 100644 --- a/drivers/edac/altera_edac.c +++ b/drivers/edac/altera_edac.c @@ -349,7 +349,7 @@ static int altr_sdram_probe(struct platform_device *pdev) if (irq < 0) { edac_printk(KERN_ERR, EDAC_MC, "No irq %d in DT\n", irq); - return -ENODEV; + return irq; } /* Arria10 has a 2nd IRQ */ diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index b36d5879b91e05df07052721339541fcf2130c55..f5635dfa9acf6024b0d6f4ab2a8d39ec9744e240 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -786,12 +786,14 @@ static void debug_dump_dramcfg_low(struct amd64_pvt *pvt, u32 dclr, int chan) #define CS_ODD_PRIMARY BIT(1) #define CS_EVEN_SECONDARY BIT(2) #define CS_ODD_SECONDARY BIT(3) +#define CS_3R_INTERLEAVE BIT(4) #define CS_EVEN (CS_EVEN_PRIMARY | CS_EVEN_SECONDARY) #define CS_ODD (CS_ODD_PRIMARY | CS_ODD_SECONDARY) static int f17_get_cs_mode(int dimm, u8 ctrl, struct amd64_pvt *pvt) { + u8 base, count = 0; int cs_mode = 0; if (csrow_enabled(2 * dimm, ctrl, pvt)) @@ -804,6 +806,20 @@ static int f17_get_cs_mode(int dimm, u8 ctrl, struct amd64_pvt *pvt) if (csrow_sec_enabled(2 * dimm + 1, ctrl, pvt)) cs_mode |= CS_ODD_SECONDARY; + /* + * 3 Rank inteleaving support. + * There should be only three bases enabled and their two masks should + * be equal. + */ + for_each_chip_select(base, ctrl, pvt) + count += csrow_enabled(base, ctrl, pvt); + + if (count == 3 && + pvt->csels[ctrl].csmasks[0] == pvt->csels[ctrl].csmasks[1]) { + edac_dbg(1, "3R interleaving in use.\n"); + cs_mode |= CS_3R_INTERLEAVE; + } + return cs_mode; } @@ -1612,10 +1628,14 @@ static int f17_addr_mask_to_cs_size(struct amd64_pvt *pvt, u8 umc, * * The MSB is the number of bits in the full mask because BIT[0] is * always 0. + * + * In the special 3 Rank interleaving case, a single bit is flipped + * without swapping with the most significant bit. This can be handled + * by keeping the MSB where it is and ignoring the single zero bit. */ msb = fls(addr_mask_orig) - 1; weight = hweight_long(addr_mask_orig); - num_zero_bits = msb - weight; + num_zero_bits = msb - weight - !!(cs_mode & CS_3R_INTERLEAVE); /* Take the number of zero bits off from the top of the mask. */ addr_mask_deinterleaved = GENMASK_ULL(msb - num_zero_bits, 1); diff --git a/drivers/edac/armada_xp_edac.c b/drivers/edac/armada_xp_edac.c index e3e757513d1bc38eee01eb0c6836ecb8a63bd9a3..b1f46a974b9e0001b5333ae8c00fa49b4eafb271 100644 --- a/drivers/edac/armada_xp_edac.c +++ b/drivers/edac/armada_xp_edac.c @@ -178,7 +178,7 @@ static void axp_mc_check(struct mem_ctl_info *mci) "details unavailable (multiple errors)"); if (cnt_dbe) edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, - cnt_sbe, /* error count */ + cnt_dbe, /* error count */ 0, 0, 0, /* pfn, offset, syndrome */ -1, -1, -1, /* top, mid, low layer */ mci->ctl_name, diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index 01ff71f7b64562f37459b5dd6dd86835458d215e..f4eb071327be08163e9463d9abbbb3ac1885f9c1 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -210,7 +210,7 @@ void *edac_align_ptr(void **p, unsigned int size, int n_elems) else return (char *)ptr; - r = (unsigned long)p % align; + r = (unsigned long)ptr % align; if (r == 0) return (char *)ptr; diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index 4c626fcd4dcbb5f43f1aa40749e9d561d8815116..1522d4aa2ca62a94149746415b4bff1ed194d2c2 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -1052,7 +1052,7 @@ static u64 haswell_get_tohm(struct sbridge_pvt *pvt) pci_read_config_dword(pvt->info.pci_vtd, HASWELL_TOHM_1, ®); rc = ((reg << 6) | rc) << 26; - return rc | 0x1ffffff; + return rc | 0x3ffffff; } static u64 knl_get_tolm(struct sbridge_pvt *pvt) diff --git a/drivers/edac/synopsys_edac.c b/drivers/edac/synopsys_edac.c index 1a801a5d3b08b3c66bd117a6a55b7cf475992e0c..92906b56b1a2b9f64045fe2acacbed2b7a39fa7a 100644 --- a/drivers/edac/synopsys_edac.c +++ b/drivers/edac/synopsys_edac.c @@ -1351,8 +1351,7 @@ static int mc_probe(struct platform_device *pdev) } } - if (of_device_is_compatible(pdev->dev.of_node, - "xlnx,zynqmp-ddrc-2.40a")) + if (priv->p_data->quirks & DDR_ECC_INTR_SUPPORT) setup_address_map(priv); #endif diff --git a/drivers/edac/xgene_edac.c b/drivers/edac/xgene_edac.c index 1d2c27a00a4a8348eb5e0d0c203eafa4520c7030..cd1eefeff1923192735c787d05143eb6fc7661e2 100644 --- a/drivers/edac/xgene_edac.c +++ b/drivers/edac/xgene_edac.c @@ -1919,7 +1919,7 @@ static int xgene_edac_probe(struct platform_device *pdev) irq = platform_get_irq(pdev, i); if (irq < 0) { dev_err(&pdev->dev, "No IRQ resource\n"); - rc = -EINVAL; + rc = irq; goto out_err; } rc = devm_request_irq(&pdev->dev, irq, diff --git a/drivers/extcon/extcon.c b/drivers/extcon/extcon.c index e7a9561a826d3908a8c0ffb3c7e61f8c9ac856d4..872c8aee8b2c7df1d8cf24b0782d73f22c985b88 100644 --- a/drivers/extcon/extcon.c +++ b/drivers/extcon/extcon.c @@ -486,7 +486,7 @@ EXPORT_SYMBOL_GPL(extcon_sync); * * Returns 0 if success or error number if fail. */ -int extcon_get_state(struct extcon_dev *edev, const unsigned int id) +int extcon_get_state(struct extcon_dev *edev, unsigned int id) { int index, state; unsigned long flags; diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig index 359f6516b5ac1e9add2b4d807823004e27746424..9a7fe24dfd0824d11571967198f3c31181dec222 100644 --- a/drivers/firmware/Kconfig +++ b/drivers/firmware/Kconfig @@ -296,6 +296,7 @@ config TURRIS_MOX_RWTM other manufacturing data and also utilize the Entropy Bit Generator for hardware random number generation. +source "drivers/firmware/arm_ffa/Kconfig" source "drivers/firmware/broadcom/Kconfig" source "drivers/firmware/google/Kconfig" source "drivers/firmware/efi/Kconfig" diff --git a/drivers/firmware/Makefile b/drivers/firmware/Makefile index 523173cbff33568d58cdfabf824044dd7b634ded..3c2af2e98def81e9af5223c2042ce1d196bbcafe 100644 --- a/drivers/firmware/Makefile +++ b/drivers/firmware/Makefile @@ -23,6 +23,7 @@ obj-$(CONFIG_TI_SCI_PROTOCOL) += ti_sci.o obj-$(CONFIG_TRUSTED_FOUNDATIONS) += trusted_foundations.o obj-$(CONFIG_TURRIS_MOX_RWTM) += turris-mox-rwtm.o +obj-y += arm_ffa/ obj-y += arm_scmi/ obj-y += broadcom/ obj-y += meson/ diff --git a/drivers/firmware/arm_ffa/Kconfig b/drivers/firmware/arm_ffa/Kconfig new file mode 100644 index 0000000000000000000000000000000000000000..5e3ae5cf82e8e1d4b84671ba749f7cdf5d18e8b8 --- /dev/null +++ b/drivers/firmware/arm_ffa/Kconfig @@ -0,0 +1,21 @@ +# SPDX-License-Identifier: GPL-2.0-only +config ARM_FFA_TRANSPORT + tristate "Arm Firmware Framework for Armv8-A" + depends on OF + depends on ARM64 + default n + help + This Firmware Framework(FF) for Arm A-profile processors describes + interfaces that standardize communication between the various + software images which includes communication between images in + the Secure world and Normal world. It also leverages the + virtualization extension to isolate software images provided + by an ecosystem of vendors from each other. + + This driver provides interface for all the client drivers making + use of the features offered by ARM FF-A. + +config ARM_FFA_SMCCC + bool + default ARM_FFA_TRANSPORT + depends on ARM64 && HAVE_ARM_SMCCC_DISCOVERY diff --git a/drivers/firmware/arm_ffa/Makefile b/drivers/firmware/arm_ffa/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..9d9f375232003e0aeaf66730fda99d1861f7b467 --- /dev/null +++ b/drivers/firmware/arm_ffa/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0-only +ffa-bus-y = bus.o +ffa-driver-y = driver.o +ffa-transport-$(CONFIG_ARM_FFA_SMCCC) += smccc.o +ffa-module-objs := $(ffa-bus-y) $(ffa-driver-y) $(ffa-transport-y) +obj-$(CONFIG_ARM_FFA_TRANSPORT) = ffa-module.o diff --git a/drivers/firmware/arm_ffa/bus.c b/drivers/firmware/arm_ffa/bus.c new file mode 100644 index 0000000000000000000000000000000000000000..fca1e311ea6c761ffeb0bd20b7eb23961d9f9c48 --- /dev/null +++ b/drivers/firmware/arm_ffa/bus.c @@ -0,0 +1,220 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 ARM Ltd. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include + +#include "common.h" + +static int ffa_device_match(struct device *dev, struct device_driver *drv) +{ + const struct ffa_device_id *id_table; + struct ffa_device *ffa_dev; + + id_table = to_ffa_driver(drv)->id_table; + ffa_dev = to_ffa_dev(dev); + + while (!uuid_is_null(&id_table->uuid)) { + /* + * FF-A v1.0 doesn't provide discovery of UUIDs, just the + * partition IDs, so fetch the partitions IDs for this + * id_table UUID and assign the UUID to the device if the + * partition ID matches + */ + if (uuid_is_null(&ffa_dev->uuid)) + ffa_device_match_uuid(ffa_dev, &id_table->uuid); + + if (uuid_equal(&ffa_dev->uuid, &id_table->uuid)) + return 1; + id_table++; + } + + return 0; +} + +static int ffa_device_probe(struct device *dev) +{ + struct ffa_driver *ffa_drv = to_ffa_driver(dev->driver); + struct ffa_device *ffa_dev = to_ffa_dev(dev); + + return ffa_drv->probe(ffa_dev); +} + +static int ffa_device_remove(struct device *dev) +{ + struct ffa_driver *ffa_drv = to_ffa_driver(dev->driver); + + ffa_drv->remove(to_ffa_dev(dev)); + + return 0; +} + +static int ffa_device_uevent(struct device *dev, struct kobj_uevent_env *env) +{ + struct ffa_device *ffa_dev = to_ffa_dev(dev); + + return add_uevent_var(env, "MODALIAS=arm_ffa:%04x:%pUb", + ffa_dev->vm_id, &ffa_dev->uuid); +} + +static ssize_t partition_id_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct ffa_device *ffa_dev = to_ffa_dev(dev); + + return sprintf(buf, "0x%04x\n", ffa_dev->vm_id); +} +static DEVICE_ATTR_RO(partition_id); + +static ssize_t uuid_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct ffa_device *ffa_dev = to_ffa_dev(dev); + + return sprintf(buf, "%pUb\n", &ffa_dev->uuid); +} +static DEVICE_ATTR_RO(uuid); + +static struct attribute *ffa_device_attributes_attrs[] = { + &dev_attr_partition_id.attr, + &dev_attr_uuid.attr, + NULL, +}; +ATTRIBUTE_GROUPS(ffa_device_attributes); + +struct bus_type ffa_bus_type = { + .name = "arm_ffa", + .match = ffa_device_match, + .probe = ffa_device_probe, + .remove = ffa_device_remove, + .uevent = ffa_device_uevent, + .dev_groups = ffa_device_attributes_groups, +}; +EXPORT_SYMBOL_GPL(ffa_bus_type); + +int ffa_driver_register(struct ffa_driver *driver, struct module *owner, + const char *mod_name) +{ + int ret; + + if (!driver->probe) + return -EINVAL; + + driver->driver.bus = &ffa_bus_type; + driver->driver.name = driver->name; + driver->driver.owner = owner; + driver->driver.mod_name = mod_name; + + ret = driver_register(&driver->driver); + if (!ret) + pr_debug("registered new ffa driver %s\n", driver->name); + + return ret; +} +EXPORT_SYMBOL_GPL(ffa_driver_register); + +void ffa_driver_unregister(struct ffa_driver *driver) +{ + driver_unregister(&driver->driver); +} +EXPORT_SYMBOL_GPL(ffa_driver_unregister); + +static void ffa_release_device(struct device *dev) +{ + struct ffa_device *ffa_dev = to_ffa_dev(dev); + + kfree(ffa_dev); +} + +static int __ffa_devices_unregister(struct device *dev, void *data) +{ + device_unregister(dev); + + return 0; +} + +static void ffa_devices_unregister(void) +{ + bus_for_each_dev(&ffa_bus_type, NULL, NULL, + __ffa_devices_unregister); +} + +bool ffa_device_is_valid(struct ffa_device *ffa_dev) +{ + bool valid = false; + struct device *dev = NULL; + struct ffa_device *tmp_dev; + + do { + dev = bus_find_next_device(&ffa_bus_type, dev); + tmp_dev = to_ffa_dev(dev); + if (tmp_dev == ffa_dev) { + valid = true; + break; + } + put_device(dev); + } while (dev); + + put_device(dev); + + return valid; +} + +struct ffa_device *ffa_device_register(const uuid_t *uuid, int vm_id) +{ + int ret; + struct device *dev; + struct ffa_device *ffa_dev; + + ffa_dev = kzalloc(sizeof(*ffa_dev), GFP_KERNEL); + if (!ffa_dev) + return NULL; + + dev = &ffa_dev->dev; + dev->bus = &ffa_bus_type; + dev->release = ffa_release_device; + dev_set_name(&ffa_dev->dev, "arm-ffa-%04x", vm_id); + + ffa_dev->vm_id = vm_id; + uuid_copy(&ffa_dev->uuid, uuid); + + ret = device_register(&ffa_dev->dev); + if (ret) { + dev_err(dev, "unable to register device %s err=%d\n", + dev_name(dev), ret); + put_device(dev); + return NULL; + } + + return ffa_dev; +} +EXPORT_SYMBOL_GPL(ffa_device_register); + +void ffa_device_unregister(struct ffa_device *ffa_dev) +{ + if (!ffa_dev) + return; + + device_unregister(&ffa_dev->dev); +} +EXPORT_SYMBOL_GPL(ffa_device_unregister); + +int arm_ffa_bus_init(void) +{ + return bus_register(&ffa_bus_type); +} + +void arm_ffa_bus_exit(void) +{ + ffa_devices_unregister(); + bus_unregister(&ffa_bus_type); +} diff --git a/drivers/firmware/arm_ffa/common.h b/drivers/firmware/arm_ffa/common.h new file mode 100644 index 0000000000000000000000000000000000000000..d6eccf1fd3f68a3b1f6ad34f4188cd4b1c983201 --- /dev/null +++ b/drivers/firmware/arm_ffa/common.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2021 ARM Ltd. + */ + +#ifndef _FFA_COMMON_H +#define _FFA_COMMON_H + +#include +#include +#include + +typedef struct arm_smccc_1_2_regs ffa_value_t; + +typedef void (ffa_fn)(ffa_value_t, ffa_value_t *); + +int arm_ffa_bus_init(void); +void arm_ffa_bus_exit(void); +bool ffa_device_is_valid(struct ffa_device *ffa_dev); +void ffa_device_match_uuid(struct ffa_device *ffa_dev, const uuid_t *uuid); + +#ifdef CONFIG_ARM_FFA_SMCCC +int __init ffa_transport_init(ffa_fn **invoke_ffa_fn); +#else +static inline int __init ffa_transport_init(ffa_fn **invoke_ffa_fn) +{ + return -EOPNOTSUPP; +} +#endif + +#endif /* _FFA_COMMON_H */ diff --git a/drivers/firmware/arm_ffa/driver.c b/drivers/firmware/arm_ffa/driver.c new file mode 100644 index 0000000000000000000000000000000000000000..fc6d3b9bd4724fe3e342287194c8d22403c18f66 --- /dev/null +++ b/drivers/firmware/arm_ffa/driver.c @@ -0,0 +1,695 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Arm Firmware Framework for ARMv8-A(FFA) interface driver + * + * The Arm FFA specification[1] describes a software architecture to + * leverages the virtualization extension to isolate software images + * provided by an ecosystem of vendors from each other and describes + * interfaces that standardize communication between the various software + * images including communication between images in the Secure world and + * Normal world. Any Hypervisor could use the FFA interfaces to enable + * communication between VMs it manages. + * + * The Hypervisor a.k.a Partition managers in FFA terminology can assign + * system resources(Memory regions, Devices, CPU cycles) to the partitions + * and manage isolation amongst them. + * + * [1] https://developer.arm.com/docs/den0077/latest + * + * Copyright (C) 2021 ARM Ltd. + */ + +#define DRIVER_NAME "ARM FF-A" +#define pr_fmt(fmt) DRIVER_NAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "common.h" + +#define FFA_DRIVER_VERSION FFA_VERSION_1_0 +#define FFA_MIN_VERSION FFA_VERSION_1_0 + +#define SENDER_ID_MASK GENMASK(31, 16) +#define RECEIVER_ID_MASK GENMASK(15, 0) +#define SENDER_ID(x) ((u16)(FIELD_GET(SENDER_ID_MASK, (x)))) +#define RECEIVER_ID(x) ((u16)(FIELD_GET(RECEIVER_ID_MASK, (x)))) +#define PACK_TARGET_INFO(s, r) \ + (FIELD_PREP(SENDER_ID_MASK, (s)) | FIELD_PREP(RECEIVER_ID_MASK, (r))) + +/* + * Keeping RX TX buffer size as 4K for now + * 64K may be preferred to keep it min a page in 64K PAGE_SIZE config + */ +#define RXTX_BUFFER_SIZE SZ_4K + +static ffa_fn *invoke_ffa_fn; + +static const int ffa_linux_errmap[] = { + /* better than switch case as long as return value is continuous */ + 0, /* FFA_RET_SUCCESS */ + -EOPNOTSUPP, /* FFA_RET_NOT_SUPPORTED */ + -EINVAL, /* FFA_RET_INVALID_PARAMETERS */ + -ENOMEM, /* FFA_RET_NO_MEMORY */ + -EBUSY, /* FFA_RET_BUSY */ + -EINTR, /* FFA_RET_INTERRUPTED */ + -EACCES, /* FFA_RET_DENIED */ + -EAGAIN, /* FFA_RET_RETRY */ + -ECANCELED, /* FFA_RET_ABORTED */ +}; + +static inline int ffa_to_linux_errno(int errno) +{ + int err_idx = -errno; + + if (err_idx >= 0 && err_idx < ARRAY_SIZE(ffa_linux_errmap)) + return ffa_linux_errmap[err_idx]; + return -EINVAL; +} + +struct ffa_drv_info { + u32 version; + u16 vm_id; + struct mutex rx_lock; /* lock to protect Rx buffer */ + struct mutex tx_lock; /* lock to protect Tx buffer */ + void *rx_buffer; + void *tx_buffer; +}; + +static struct ffa_drv_info *drv_info; + +/* + * The driver must be able to support all the versions from the earliest + * supported FFA_MIN_VERSION to the latest supported FFA_DRIVER_VERSION. + * The specification states that if firmware supports a FFA implementation + * that is incompatible with and at a greater version number than specified + * by the caller(FFA_DRIVER_VERSION passed as parameter to FFA_VERSION), + * it must return the NOT_SUPPORTED error code. + */ +static u32 ffa_compatible_version_find(u32 version) +{ + u16 major = FFA_MAJOR_VERSION(version), minor = FFA_MINOR_VERSION(version); + u16 drv_major = FFA_MAJOR_VERSION(FFA_DRIVER_VERSION); + u16 drv_minor = FFA_MINOR_VERSION(FFA_DRIVER_VERSION); + + if ((major < drv_major) || (major == drv_major && minor <= drv_minor)) + return version; + + pr_info("Firmware version higher than driver version, downgrading\n"); + return FFA_DRIVER_VERSION; +} + +static int ffa_version_check(u32 *version) +{ + ffa_value_t ver; + + invoke_ffa_fn((ffa_value_t){ + .a0 = FFA_VERSION, .a1 = FFA_DRIVER_VERSION, + }, &ver); + + if (ver.a0 == FFA_RET_NOT_SUPPORTED) { + pr_info("FFA_VERSION returned not supported\n"); + return -EOPNOTSUPP; + } + + if (ver.a0 < FFA_MIN_VERSION) { + pr_err("Incompatible v%d.%d! Earliest supported v%d.%d\n", + FFA_MAJOR_VERSION(ver.a0), FFA_MINOR_VERSION(ver.a0), + FFA_MAJOR_VERSION(FFA_MIN_VERSION), + FFA_MINOR_VERSION(FFA_MIN_VERSION)); + return -EINVAL; + } + + pr_info("Driver version %d.%d\n", FFA_MAJOR_VERSION(FFA_DRIVER_VERSION), + FFA_MINOR_VERSION(FFA_DRIVER_VERSION)); + pr_info("Firmware version %d.%d found\n", FFA_MAJOR_VERSION(ver.a0), + FFA_MINOR_VERSION(ver.a0)); + *version = ffa_compatible_version_find(ver.a0); + + return 0; +} + +static int ffa_rx_release(void) +{ + ffa_value_t ret; + + invoke_ffa_fn((ffa_value_t){ + .a0 = FFA_RX_RELEASE, + }, &ret); + + if (ret.a0 == FFA_ERROR) + return ffa_to_linux_errno((int)ret.a2); + + /* check for ret.a0 == FFA_RX_RELEASE ? */ + + return 0; +} + +static int ffa_rxtx_map(phys_addr_t tx_buf, phys_addr_t rx_buf, u32 pg_cnt) +{ + ffa_value_t ret; + + invoke_ffa_fn((ffa_value_t){ + .a0 = FFA_FN_NATIVE(RXTX_MAP), + .a1 = tx_buf, .a2 = rx_buf, .a3 = pg_cnt, + }, &ret); + + if (ret.a0 == FFA_ERROR) + return ffa_to_linux_errno((int)ret.a2); + + return 0; +} + +static int ffa_rxtx_unmap(u16 vm_id) +{ + ffa_value_t ret; + + invoke_ffa_fn((ffa_value_t){ + .a0 = FFA_RXTX_UNMAP, .a1 = PACK_TARGET_INFO(vm_id, 0), + }, &ret); + + if (ret.a0 == FFA_ERROR) + return ffa_to_linux_errno((int)ret.a2); + + return 0; +} + +/* buffer must be sizeof(struct ffa_partition_info) * num_partitions */ +static int +__ffa_partition_info_get(u32 uuid0, u32 uuid1, u32 uuid2, u32 uuid3, + struct ffa_partition_info *buffer, int num_partitions) +{ + int count; + ffa_value_t partition_info; + + mutex_lock(&drv_info->rx_lock); + invoke_ffa_fn((ffa_value_t){ + .a0 = FFA_PARTITION_INFO_GET, + .a1 = uuid0, .a2 = uuid1, .a3 = uuid2, .a4 = uuid3, + }, &partition_info); + + if (partition_info.a0 == FFA_ERROR) { + mutex_unlock(&drv_info->rx_lock); + return ffa_to_linux_errno((int)partition_info.a2); + } + + count = partition_info.a2; + + if (buffer && count <= num_partitions) + memcpy(buffer, drv_info->rx_buffer, sizeof(*buffer) * count); + + ffa_rx_release(); + + mutex_unlock(&drv_info->rx_lock); + + return count; +} + +/* buffer is allocated and caller must free the same if returned count > 0 */ +static int +ffa_partition_probe(const uuid_t *uuid, struct ffa_partition_info **buffer) +{ + int count; + u32 uuid0_4[4]; + struct ffa_partition_info *pbuf; + + export_uuid((u8 *)uuid0_4, uuid); + count = __ffa_partition_info_get(uuid0_4[0], uuid0_4[1], uuid0_4[2], + uuid0_4[3], NULL, 0); + if (count <= 0) + return count; + + pbuf = kcalloc(count, sizeof(*pbuf), GFP_KERNEL); + if (!pbuf) + return -ENOMEM; + + count = __ffa_partition_info_get(uuid0_4[0], uuid0_4[1], uuid0_4[2], + uuid0_4[3], pbuf, count); + if (count <= 0) + kfree(pbuf); + else + *buffer = pbuf; + + return count; +} + +#define VM_ID_MASK GENMASK(15, 0) +static int ffa_id_get(u16 *vm_id) +{ + ffa_value_t id; + + invoke_ffa_fn((ffa_value_t){ + .a0 = FFA_ID_GET, + }, &id); + + if (id.a0 == FFA_ERROR) + return ffa_to_linux_errno((int)id.a2); + + *vm_id = FIELD_GET(VM_ID_MASK, (id.a2)); + + return 0; +} + +static int ffa_msg_send_direct_req(u16 src_id, u16 dst_id, bool mode_32bit, + struct ffa_send_direct_data *data) +{ + u32 req_id, resp_id, src_dst_ids = PACK_TARGET_INFO(src_id, dst_id); + ffa_value_t ret; + + if (mode_32bit) { + req_id = FFA_MSG_SEND_DIRECT_REQ; + resp_id = FFA_MSG_SEND_DIRECT_RESP; + } else { + req_id = FFA_FN_NATIVE(MSG_SEND_DIRECT_REQ); + resp_id = FFA_FN_NATIVE(MSG_SEND_DIRECT_RESP); + } + + invoke_ffa_fn((ffa_value_t){ + .a0 = req_id, .a1 = src_dst_ids, .a2 = 0, + .a3 = data->data0, .a4 = data->data1, .a5 = data->data2, + .a6 = data->data3, .a7 = data->data4, + }, &ret); + + while (ret.a0 == FFA_INTERRUPT) + invoke_ffa_fn((ffa_value_t){ + .a0 = FFA_RUN, .a1 = ret.a1, + }, &ret); + + if (ret.a0 == FFA_ERROR) + return ffa_to_linux_errno((int)ret.a2); + + if (ret.a0 == resp_id) { + data->data0 = ret.a3; + data->data1 = ret.a4; + data->data2 = ret.a5; + data->data3 = ret.a6; + data->data4 = ret.a7; + return 0; + } + + return -EINVAL; +} + +static int ffa_mem_first_frag(u32 func_id, phys_addr_t buf, u32 buf_sz, + u32 frag_len, u32 len, u64 *handle) +{ + ffa_value_t ret; + + invoke_ffa_fn((ffa_value_t){ + .a0 = func_id, .a1 = len, .a2 = frag_len, + .a3 = buf, .a4 = buf_sz, + }, &ret); + + while (ret.a0 == FFA_MEM_OP_PAUSE) + invoke_ffa_fn((ffa_value_t){ + .a0 = FFA_MEM_OP_RESUME, + .a1 = ret.a1, .a2 = ret.a2, + }, &ret); + + if (ret.a0 == FFA_ERROR) + return ffa_to_linux_errno((int)ret.a2); + + if (ret.a0 != FFA_SUCCESS) + return -EOPNOTSUPP; + + if (handle) + *handle = PACK_HANDLE(ret.a2, ret.a3); + + return frag_len; +} + +static int ffa_mem_next_frag(u64 handle, u32 frag_len) +{ + ffa_value_t ret; + + invoke_ffa_fn((ffa_value_t){ + .a0 = FFA_MEM_FRAG_TX, + .a1 = HANDLE_LOW(handle), .a2 = HANDLE_HIGH(handle), + .a3 = frag_len, + }, &ret); + + while (ret.a0 == FFA_MEM_OP_PAUSE) + invoke_ffa_fn((ffa_value_t){ + .a0 = FFA_MEM_OP_RESUME, + .a1 = ret.a1, .a2 = ret.a2, + }, &ret); + + if (ret.a0 == FFA_ERROR) + return ffa_to_linux_errno((int)ret.a2); + + if (ret.a0 != FFA_MEM_FRAG_RX) + return -EOPNOTSUPP; + + return ret.a3; +} + +static int +ffa_transmit_fragment(u32 func_id, phys_addr_t buf, u32 buf_sz, u32 frag_len, + u32 len, u64 *handle, bool first) +{ + if (!first) + return ffa_mem_next_frag(*handle, frag_len); + + return ffa_mem_first_frag(func_id, buf, buf_sz, frag_len, len, handle); +} + +static u32 ffa_get_num_pages_sg(struct scatterlist *sg) +{ + u32 num_pages = 0; + + do { + num_pages += sg->length / FFA_PAGE_SIZE; + } while ((sg = sg_next(sg))); + + return num_pages; +} + +static int +ffa_setup_and_transmit(u32 func_id, void *buffer, u32 max_fragsize, + struct ffa_mem_ops_args *args) +{ + int rc = 0; + bool first = true; + phys_addr_t addr = 0; + struct ffa_composite_mem_region *composite; + struct ffa_mem_region_addr_range *constituents; + struct ffa_mem_region_attributes *ep_mem_access; + struct ffa_mem_region *mem_region = buffer; + u32 idx, frag_len, length, buf_sz = 0, num_entries = sg_nents(args->sg); + + mem_region->tag = args->tag; + mem_region->flags = args->flags; + mem_region->sender_id = drv_info->vm_id; + mem_region->attributes = FFA_MEM_NORMAL | FFA_MEM_WRITE_BACK | + FFA_MEM_INNER_SHAREABLE; + ep_mem_access = &mem_region->ep_mem_access[0]; + + for (idx = 0; idx < args->nattrs; idx++, ep_mem_access++) { + ep_mem_access->receiver = args->attrs[idx].receiver; + ep_mem_access->attrs = args->attrs[idx].attrs; + ep_mem_access->composite_off = COMPOSITE_OFFSET(args->nattrs); + } + mem_region->ep_count = args->nattrs; + + composite = buffer + COMPOSITE_OFFSET(args->nattrs); + composite->total_pg_cnt = ffa_get_num_pages_sg(args->sg); + composite->addr_range_cnt = num_entries; + + length = COMPOSITE_CONSTITUENTS_OFFSET(args->nattrs, num_entries); + frag_len = COMPOSITE_CONSTITUENTS_OFFSET(args->nattrs, 0); + if (frag_len > max_fragsize) + return -ENXIO; + + if (!args->use_txbuf) { + addr = virt_to_phys(buffer); + buf_sz = max_fragsize / FFA_PAGE_SIZE; + } + + constituents = buffer + frag_len; + idx = 0; + do { + if (frag_len == max_fragsize) { + rc = ffa_transmit_fragment(func_id, addr, buf_sz, + frag_len, length, + &args->g_handle, first); + if (rc < 0) + return -ENXIO; + + first = false; + idx = 0; + frag_len = 0; + constituents = buffer; + } + + if ((void *)constituents - buffer > max_fragsize) { + pr_err("Memory Region Fragment > Tx Buffer size\n"); + return -EFAULT; + } + + constituents->address = sg_phys(args->sg); + constituents->pg_cnt = args->sg->length / FFA_PAGE_SIZE; + constituents++; + frag_len += sizeof(struct ffa_mem_region_addr_range); + } while ((args->sg = sg_next(args->sg))); + + return ffa_transmit_fragment(func_id, addr, buf_sz, frag_len, + length, &args->g_handle, first); +} + +static int ffa_memory_ops(u32 func_id, struct ffa_mem_ops_args *args) +{ + int ret; + void *buffer; + + if (!args->use_txbuf) { + buffer = alloc_pages_exact(RXTX_BUFFER_SIZE, GFP_KERNEL); + if (!buffer) + return -ENOMEM; + } else { + buffer = drv_info->tx_buffer; + mutex_lock(&drv_info->tx_lock); + } + + ret = ffa_setup_and_transmit(func_id, buffer, RXTX_BUFFER_SIZE, args); + + if (args->use_txbuf) + mutex_unlock(&drv_info->tx_lock); + else + free_pages_exact(buffer, RXTX_BUFFER_SIZE); + + return ret < 0 ? ret : 0; +} + +static int ffa_memory_reclaim(u64 g_handle, u32 flags) +{ + ffa_value_t ret; + + invoke_ffa_fn((ffa_value_t){ + .a0 = FFA_MEM_RECLAIM, + .a1 = HANDLE_LOW(g_handle), .a2 = HANDLE_HIGH(g_handle), + .a3 = flags, + }, &ret); + + if (ret.a0 == FFA_ERROR) + return ffa_to_linux_errno((int)ret.a2); + + return 0; +} + +static u32 ffa_api_version_get(void) +{ + return drv_info->version; +} + +static int ffa_partition_info_get(const char *uuid_str, + struct ffa_partition_info *buffer) +{ + int count; + uuid_t uuid; + struct ffa_partition_info *pbuf; + + if (uuid_parse(uuid_str, &uuid)) { + pr_err("invalid uuid (%s)\n", uuid_str); + return -ENODEV; + } + + count = ffa_partition_probe(&uuid_null, &pbuf); + if (count <= 0) + return -ENOENT; + + memcpy(buffer, pbuf, sizeof(*pbuf) * count); + kfree(pbuf); + return 0; +} + +static void ffa_mode_32bit_set(struct ffa_device *dev) +{ + dev->mode_32bit = true; +} + +static int ffa_sync_send_receive(struct ffa_device *dev, + struct ffa_send_direct_data *data) +{ + return ffa_msg_send_direct_req(drv_info->vm_id, dev->vm_id, + dev->mode_32bit, data); +} + +static int +ffa_memory_share(struct ffa_device *dev, struct ffa_mem_ops_args *args) +{ + if (dev->mode_32bit) + return ffa_memory_ops(FFA_MEM_SHARE, args); + + return ffa_memory_ops(FFA_FN_NATIVE(MEM_SHARE), args); +} + +static int +ffa_memory_lend(struct ffa_device *dev, struct ffa_mem_ops_args *args) +{ + /* Note that upon a successful MEM_LEND request the caller + * must ensure that the memory region specified is not accessed + * until a successful MEM_RECALIM call has been made. + * On systems with a hypervisor present this will been enforced, + * however on systems without a hypervisor the responsibility + * falls to the calling kernel driver to prevent access. + */ + if (dev->mode_32bit) + return ffa_memory_ops(FFA_MEM_LEND, args); + + return ffa_memory_ops(FFA_FN_NATIVE(MEM_LEND), args); +} + +static const struct ffa_dev_ops ffa_ops = { + .api_version_get = ffa_api_version_get, + .partition_info_get = ffa_partition_info_get, + .mode_32bit_set = ffa_mode_32bit_set, + .sync_send_receive = ffa_sync_send_receive, + .memory_reclaim = ffa_memory_reclaim, + .memory_share = ffa_memory_share, + .memory_lend = ffa_memory_lend, +}; + +const struct ffa_dev_ops *ffa_dev_ops_get(struct ffa_device *dev) +{ + if (ffa_device_is_valid(dev)) + return &ffa_ops; + + return NULL; +} +EXPORT_SYMBOL_GPL(ffa_dev_ops_get); + +void ffa_device_match_uuid(struct ffa_device *ffa_dev, const uuid_t *uuid) +{ + int count, idx; + struct ffa_partition_info *pbuf, *tpbuf; + + count = ffa_partition_probe(uuid, &pbuf); + if (count <= 0) + return; + + for (idx = 0, tpbuf = pbuf; idx < count; idx++, tpbuf++) + if (tpbuf->id == ffa_dev->vm_id) + uuid_copy(&ffa_dev->uuid, uuid); + kfree(pbuf); +} + +static void ffa_setup_partitions(void) +{ + int count, idx; + struct ffa_device *ffa_dev; + struct ffa_partition_info *pbuf, *tpbuf; + + count = ffa_partition_probe(&uuid_null, &pbuf); + if (count <= 0) { + pr_info("%s: No partitions found, error %d\n", __func__, count); + return; + } + + for (idx = 0, tpbuf = pbuf; idx < count; idx++, tpbuf++) { + /* Note that the &uuid_null parameter will require + * ffa_device_match() to find the UUID of this partition id + * with help of ffa_device_match_uuid(). Once the FF-A spec + * is updated to provide correct UUID here for each partition + * as part of the discovery API, we need to pass the + * discovered UUID here instead. + */ + ffa_dev = ffa_device_register(&uuid_null, tpbuf->id); + if (!ffa_dev) { + pr_err("%s: failed to register partition ID 0x%x\n", + __func__, tpbuf->id); + continue; + } + + ffa_dev_set_drvdata(ffa_dev, drv_info); + } + kfree(pbuf); +} + +static int __init ffa_init(void) +{ + int ret; + + ret = ffa_transport_init(&invoke_ffa_fn); + if (ret) + return ret; + + ret = arm_ffa_bus_init(); + if (ret) + return ret; + + drv_info = kzalloc(sizeof(*drv_info), GFP_KERNEL); + if (!drv_info) { + ret = -ENOMEM; + goto ffa_bus_exit; + } + + ret = ffa_version_check(&drv_info->version); + if (ret) + goto free_drv_info; + + if (ffa_id_get(&drv_info->vm_id)) { + pr_err("failed to obtain VM id for self\n"); + ret = -ENODEV; + goto free_drv_info; + } + + drv_info->rx_buffer = alloc_pages_exact(RXTX_BUFFER_SIZE, GFP_KERNEL); + if (!drv_info->rx_buffer) { + ret = -ENOMEM; + goto free_pages; + } + + drv_info->tx_buffer = alloc_pages_exact(RXTX_BUFFER_SIZE, GFP_KERNEL); + if (!drv_info->tx_buffer) { + ret = -ENOMEM; + goto free_pages; + } + + ret = ffa_rxtx_map(virt_to_phys(drv_info->tx_buffer), + virt_to_phys(drv_info->rx_buffer), + RXTX_BUFFER_SIZE / FFA_PAGE_SIZE); + if (ret) { + pr_err("failed to register FFA RxTx buffers\n"); + goto free_pages; + } + + mutex_init(&drv_info->rx_lock); + mutex_init(&drv_info->tx_lock); + + ffa_setup_partitions(); + + return 0; +free_pages: + if (drv_info->tx_buffer) + free_pages_exact(drv_info->tx_buffer, RXTX_BUFFER_SIZE); + free_pages_exact(drv_info->rx_buffer, RXTX_BUFFER_SIZE); +free_drv_info: + kfree(drv_info); +ffa_bus_exit: + arm_ffa_bus_exit(); + return ret; +} +subsys_initcall(ffa_init); + +static void __exit ffa_exit(void) +{ + ffa_rxtx_unmap(drv_info->vm_id); + free_pages_exact(drv_info->tx_buffer, RXTX_BUFFER_SIZE); + free_pages_exact(drv_info->rx_buffer, RXTX_BUFFER_SIZE); + kfree(drv_info); + arm_ffa_bus_exit(); +} +module_exit(ffa_exit); + +MODULE_ALIAS("arm-ffa"); +MODULE_AUTHOR("Sudeep Holla "); +MODULE_DESCRIPTION("Arm FF-A interface driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/firmware/arm_ffa/smccc.c b/drivers/firmware/arm_ffa/smccc.c new file mode 100644 index 0000000000000000000000000000000000000000..4d85bfff0a4e6749990ad4db90c0225326c63857 --- /dev/null +++ b/drivers/firmware/arm_ffa/smccc.c @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2021 ARM Ltd. + */ + +#include + +#include "common.h" + +static void __arm_ffa_fn_smc(ffa_value_t args, ffa_value_t *res) +{ + arm_smccc_1_2_smc(&args, res); +} + +static void __arm_ffa_fn_hvc(ffa_value_t args, ffa_value_t *res) +{ + arm_smccc_1_2_hvc(&args, res); +} + +int __init ffa_transport_init(ffa_fn **invoke_ffa_fn) +{ + enum arm_smccc_conduit conduit; + + if (arm_smccc_get_version() < ARM_SMCCC_VERSION_1_2) + return -EOPNOTSUPP; + + conduit = arm_smccc_1_1_get_conduit(); + if (conduit == SMCCC_CONDUIT_NONE) { + pr_err("%s: invalid SMCCC conduit\n", __func__); + return -EOPNOTSUPP; + } + + if (conduit == SMCCC_CONDUIT_SMC) + *invoke_ffa_fn = __arm_ffa_fn_smc; + else + *invoke_ffa_fn = __arm_ffa_fn_hvc; + + return 0; +} diff --git a/drivers/firmware/arm_scmi/driver.c b/drivers/firmware/arm_scmi/driver.c index 1732fb3e332b9982bf7d24e70e2ce1b71bbc5155..e594d739f43701cb36b6fb149d48d6cba00b08f7 100644 --- a/drivers/firmware/arm_scmi/driver.c +++ b/drivers/firmware/arm_scmi/driver.c @@ -1638,7 +1638,7 @@ static void __exit scmi_driver_exit(void) } module_exit(scmi_driver_exit); -MODULE_ALIAS("platform: arm-scmi"); +MODULE_ALIAS("platform:arm-scmi"); MODULE_AUTHOR("Sudeep Holla "); MODULE_DESCRIPTION("ARM SCMI protocol driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/firmware/arm_scmi/scmi_pm_domain.c b/drivers/firmware/arm_scmi/scmi_pm_domain.c index b85141e7e6bc3c53aac8c7b607b7e65efe418239..bcc5cf1c99d60976813f88798e172d1fb8b894e0 100644 --- a/drivers/firmware/arm_scmi/scmi_pm_domain.c +++ b/drivers/firmware/arm_scmi/scmi_pm_domain.c @@ -112,9 +112,7 @@ static int scmi_pm_domain_probe(struct scmi_device *sdev) scmi_pd_data->domains = domains; scmi_pd_data->num_domains = num_domains; - of_genpd_add_provider_onecell(np, scmi_pd_data); - - return 0; + return of_genpd_add_provider_onecell(np, scmi_pd_data); } static const struct scmi_device_id scmi_id_table[] = { diff --git a/drivers/firmware/arm_scmi/sensors.c b/drivers/firmware/arm_scmi/sensors.c index 1187547eba4fc0e596e1279fa60ed49ef41df0e0..d426b8478543c2408dc0ed3a36f952e81e85556e 100644 --- a/drivers/firmware/arm_scmi/sensors.c +++ b/drivers/firmware/arm_scmi/sensors.c @@ -637,7 +637,7 @@ static int scmi_sensor_config_get(const struct scmi_protocol_handle *ph, if (ret) return ret; - put_unaligned_le32(cpu_to_le32(sensor_id), t->tx.buf); + put_unaligned_le32(sensor_id, t->tx.buf); ret = ph->xops->do_xfer(ph, t); if (!ret) { struct sensors_info *si = ph->get_priv(ph); diff --git a/drivers/firmware/arm_scmi/voltage.c b/drivers/firmware/arm_scmi/voltage.c index 3a0cd5a531f118e3647d93201d27d0a737f1f66c..ab22f4c449aabe7ec5d160af04edf424ef687ad3 100644 --- a/drivers/firmware/arm_scmi/voltage.c +++ b/drivers/firmware/arm_scmi/voltage.c @@ -155,7 +155,7 @@ static int scmi_voltage_descriptors_get(const struct scmi_protocol_handle *ph, int cnt; cmd->domain_id = cpu_to_le32(v->id); - cmd->level_index = desc_index; + cmd->level_index = cpu_to_le32(desc_index); ret = ph->xops->do_xfer(ph, tl); if (ret) break; diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c index ea7ca74fc1730e021273dcdfc77c8def8f5c8ed6..232c092c4c9703007c5f6178130c1f4e0cbf66e9 100644 --- a/drivers/firmware/efi/cper.c +++ b/drivers/firmware/efi/cper.c @@ -25,8 +25,6 @@ #include #include -static char rcd_decode_str[CPER_REC_LEN]; - /* * CPER record ID need to be unique even after reboot, because record * ID is used as index for ERST storage, while CPER records from @@ -313,6 +311,7 @@ const char *cper_mem_err_unpack(struct trace_seq *p, struct cper_mem_err_compact *cmem) { const char *ret = trace_seq_buffer_ptr(p); + char rcd_decode_str[CPER_REC_LEN]; if (cper_mem_err_location(cmem, rcd_decode_str)) trace_seq_printf(p, "%s", rcd_decode_str); @@ -327,6 +326,7 @@ static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem, int len) { struct cper_mem_err_compact cmem; + char rcd_decode_str[CPER_REC_LEN]; /* Don't trust UEFI 2.1/2.2 structure with bad validation bits */ if (len == sizeof(struct cper_sec_mem_err_old) && diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 847f33ffc4aedee73d07980e56859530626143e3..9fa86288b78a987490a9826ccac9727d580dba10 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -719,6 +719,13 @@ void __init efi_systab_report_header(const efi_table_hdr_t *systab_hdr, systab_hdr->revision >> 16, systab_hdr->revision & 0xffff, vendor); + + if (IS_ENABLED(CONFIG_X86_64) && + systab_hdr->revision > EFI_1_10_SYSTEM_TABLE_REVISION && + !strcmp(vendor, "Apple")) { + pr_info("Apple Mac detected, using EFI v1.10 runtime services only\n"); + efi.runtime_version = EFI_1_10_SYSTEM_TABLE_REVISION; + } } static __initdata char memory_type_name[][13] = { diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c index 7bb20caf74b3e7a39e6651bc8a41f5731a99adce..7f4bafcd9d335b65d855bbd34a298699608b34eb 100644 --- a/drivers/firmware/efi/libstub/arm64-stub.c +++ b/drivers/firmware/efi/libstub/arm64-stub.c @@ -119,9 +119,9 @@ efi_status_t handle_kernel_image(unsigned long *image_addr, if (image->image_base != _text) efi_err("FIRMWARE BUG: efi_loaded_image_t::image_base has bogus value\n"); - if (!IS_ALIGNED((u64)_text, EFI_KIMG_ALIGN)) - efi_err("FIRMWARE BUG: kernel image not aligned on %ldk boundary\n", - EFI_KIMG_ALIGN >> 10); + if (!IS_ALIGNED((u64)_text, SEGMENT_ALIGN)) + efi_err("FIRMWARE BUG: kernel image not aligned on %dk boundary\n", + SEGMENT_ALIGN >> 10); kernel_size = _edata - _text; kernel_memsize = kernel_size + (_end - _edata); diff --git a/drivers/firmware/efi/libstub/riscv-stub.c b/drivers/firmware/efi/libstub/riscv-stub.c index 380e4e2513994e9d21fa7ecc520c00218d645ce3..9c460843442f5ad56a8865a19088d7a2cb3dbcbd 100644 --- a/drivers/firmware/efi/libstub/riscv-stub.c +++ b/drivers/firmware/efi/libstub/riscv-stub.c @@ -25,7 +25,7 @@ typedef void __noreturn (*jump_kernel_func)(unsigned int, unsigned long); static u32 hartid; -static u32 get_boot_hartid_from_fdt(void) +static int get_boot_hartid_from_fdt(void) { const void *fdt; int chosen_node, len; @@ -33,23 +33,26 @@ static u32 get_boot_hartid_from_fdt(void) fdt = get_efi_config_table(DEVICE_TREE_GUID); if (!fdt) - return U32_MAX; + return -EINVAL; chosen_node = fdt_path_offset(fdt, "/chosen"); if (chosen_node < 0) - return U32_MAX; + return -EINVAL; prop = fdt_getprop((void *)fdt, chosen_node, "boot-hartid", &len); if (!prop || len != sizeof(u32)) - return U32_MAX; + return -EINVAL; - return fdt32_to_cpu(*prop); + hartid = fdt32_to_cpu(*prop); + return 0; } efi_status_t check_platform_features(void) { - hartid = get_boot_hartid_from_fdt(); - if (hartid == U32_MAX) { + int ret; + + ret = get_boot_hartid_from_fdt(); + if (ret) { efi_err("/chosen/boot-hartid missing or invalid!\n"); return EFI_UNSUPPORTED; } diff --git a/drivers/firmware/efi/runtime-wrappers.c b/drivers/firmware/efi/runtime-wrappers.c index 1410beaef5c30404e3cb1503b3e7b3c509d36674..f3e54f6616f02475b95afb593fdc1c29a4eef449 100644 --- a/drivers/firmware/efi/runtime-wrappers.c +++ b/drivers/firmware/efi/runtime-wrappers.c @@ -414,7 +414,7 @@ static void virt_efi_reset_system(int reset_type, unsigned long data_size, efi_char16_t *data) { - if (down_interruptible(&efi_runtime_lock)) { + if (down_trylock(&efi_runtime_lock)) { pr_warn("failed to invoke the reset_system() runtime service:\n" "could not get exclusive access to the firmware\n"); return; diff --git a/drivers/firmware/efi/vars.c b/drivers/firmware/efi/vars.c index abdc8a6a396318a915455dd73e26439b88c1b003..cae590bd08f27c3c769459802d3546d1542c2ff8 100644 --- a/drivers/firmware/efi/vars.c +++ b/drivers/firmware/efi/vars.c @@ -742,6 +742,7 @@ int efivar_entry_set_safe(efi_char16_t *name, efi_guid_t vendor, u32 attributes, { const struct efivar_operations *ops; efi_status_t status; + unsigned long varsize; if (!__efivars) return -EINVAL; @@ -764,15 +765,17 @@ int efivar_entry_set_safe(efi_char16_t *name, efi_guid_t vendor, u32 attributes, return efivar_entry_set_nonblocking(name, vendor, attributes, size, data); + varsize = size + ucs2_strsize(name, 1024); if (!block) { if (down_trylock(&efivars_lock)) return -EBUSY; + status = check_var_size_nonblocking(attributes, varsize); } else { if (down_interruptible(&efivars_lock)) return -EINTR; + status = check_var_size(attributes, varsize); } - status = check_var_size(attributes, size + ucs2_strsize(name, 1024)); if (status != EFI_SUCCESS) { up(&efivars_lock); return -ENOSPC; diff --git a/drivers/firmware/google/Kconfig b/drivers/firmware/google/Kconfig index 97968aece54f889bbb6083fe956aceb1b2e9f162..931544c9f63d4b8e0ed169e491b51c15df2259b3 100644 --- a/drivers/firmware/google/Kconfig +++ b/drivers/firmware/google/Kconfig @@ -3,9 +3,9 @@ menuconfig GOOGLE_FIRMWARE bool "Google Firmware Drivers" default n help - These firmware drivers are used by Google's servers. They are - only useful if you are working directly on one of their - proprietary servers. If in doubt, say "N". + These firmware drivers are used by Google servers, + Chromebooks and other devices using coreboot firmware. + If in doubt, say "N". if GOOGLE_FIRMWARE diff --git a/drivers/firmware/psci/psci_checker.c b/drivers/firmware/psci/psci_checker.c index 9a369a2eda71d2614b21b18e407b0bbca0564afd..116eb465cdb4220476663f6bf49296842c1d6c2a 100644 --- a/drivers/firmware/psci/psci_checker.c +++ b/drivers/firmware/psci/psci_checker.c @@ -155,7 +155,7 @@ static int alloc_init_cpu_groups(cpumask_var_t **pcpu_groups) if (!alloc_cpumask_var(&tmp, GFP_KERNEL)) return -ENOMEM; - cpu_groups = kcalloc(nb_available_cpus, sizeof(cpu_groups), + cpu_groups = kcalloc(nb_available_cpus, sizeof(*cpu_groups), GFP_KERNEL); if (!cpu_groups) { free_cpumask_var(tmp); diff --git a/drivers/firmware/qcom_scm.c b/drivers/firmware/qcom_scm.c index cfc47602d23105d531e862b79fc08cbfa53100ee..0de70a6f8b2a5a839ffad057471f064d1d4b651f 100644 --- a/drivers/firmware/qcom_scm.c +++ b/drivers/firmware/qcom_scm.c @@ -252,7 +252,7 @@ static bool __qcom_scm_is_call_available(struct device *dev, u32 svc_id, break; default: pr_err("Unknown SMC convention being used\n"); - return -EINVAL; + return false; } ret = qcom_scm_call(dev, &desc, &res); diff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c index 172c751a4f6c24acc1e5e8cb04b594b1843a5b86..f08e056ed0ae4506db5f6336534b4095ea4cc388 100644 --- a/drivers/firmware/qemu_fw_cfg.c +++ b/drivers/firmware/qemu_fw_cfg.c @@ -388,9 +388,7 @@ static void fw_cfg_sysfs_cache_cleanup(void) struct fw_cfg_sysfs_entry *entry, *next; list_for_each_entry_safe(entry, next, &fw_cfg_entry_cache, list) { - /* will end up invoking fw_cfg_sysfs_cache_delist() - * via each object's release() method (i.e. destructor) - */ + fw_cfg_sysfs_cache_delist(entry); kobject_put(&entry->kobj); } } @@ -448,7 +446,6 @@ static void fw_cfg_sysfs_release_entry(struct kobject *kobj) { struct fw_cfg_sysfs_entry *entry = to_entry(kobj); - fw_cfg_sysfs_cache_delist(entry); kfree(entry); } @@ -601,20 +598,18 @@ static int fw_cfg_register_file(const struct fw_cfg_file *f) /* set file entry information */ entry->size = be32_to_cpu(f->size); entry->select = be16_to_cpu(f->select); - memcpy(entry->name, f->name, FW_CFG_MAX_FILE_PATH); + strscpy(entry->name, f->name, FW_CFG_MAX_FILE_PATH); /* register entry under "/sys/firmware/qemu_fw_cfg/by_key/" */ err = kobject_init_and_add(&entry->kobj, &fw_cfg_sysfs_entry_ktype, fw_cfg_sel_ko, "%d", entry->select); - if (err) { - kobject_put(&entry->kobj); - return err; - } + if (err) + goto err_put_entry; /* add raw binary content access */ err = sysfs_create_bin_file(&entry->kobj, &fw_cfg_sysfs_attr_raw); if (err) - goto err_add_raw; + goto err_del_entry; /* try adding "/sys/firmware/qemu_fw_cfg/by_name/" symlink */ fw_cfg_build_symlink(fw_cfg_fname_kset, &entry->kobj, entry->name); @@ -623,9 +618,10 @@ static int fw_cfg_register_file(const struct fw_cfg_file *f) fw_cfg_sysfs_cache_enlist(entry); return 0; -err_add_raw: +err_del_entry: kobject_del(&entry->kobj); - kfree(entry); +err_put_entry: + kobject_put(&entry->kobj); return err; } diff --git a/drivers/firmware/scpi_pm_domain.c b/drivers/firmware/scpi_pm_domain.c index 51201600d789b9cf6dd4e6c72b59597625b8550d..800673910b5111c837e5aa98dec0096f8848dfc9 100644 --- a/drivers/firmware/scpi_pm_domain.c +++ b/drivers/firmware/scpi_pm_domain.c @@ -16,7 +16,6 @@ struct scpi_pm_domain { struct generic_pm_domain genpd; struct scpi_ops *ops; u32 domain; - char name[30]; }; /* @@ -110,8 +109,13 @@ static int scpi_pm_domain_probe(struct platform_device *pdev) scpi_pd->domain = i; scpi_pd->ops = scpi_ops; - sprintf(scpi_pd->name, "%pOFn.%d", np, i); - scpi_pd->genpd.name = scpi_pd->name; + scpi_pd->genpd.name = devm_kasprintf(dev, GFP_KERNEL, + "%pOFn.%d", np, i); + if (!scpi_pd->genpd.name) { + dev_err(dev, "Failed to allocate genpd name:%pOFn.%d\n", + np, i); + continue; + } scpi_pd->genpd.power_off = scpi_pd_power_off; scpi_pd->genpd.power_on = scpi_pd_power_on; diff --git a/drivers/firmware/smccc/kvm_guest.c b/drivers/firmware/smccc/kvm_guest.c index 2d3e866decaa671f3cf28e6d74de24d390ef5b86..56169e73252a3b3a7be1e6ba3f00a31e63ec8797 100644 --- a/drivers/firmware/smccc/kvm_guest.c +++ b/drivers/firmware/smccc/kvm_guest.c @@ -9,6 +9,8 @@ #include +void __weak kvm_arm_init_hyp_services(void) {} + static DECLARE_BITMAP(__kvm_arm_hyp_services, ARM_SMCCC_KVM_NUM_FUNCS) __ro_after_init = { }; void __init kvm_init_hyp_services(void) @@ -38,6 +40,8 @@ void __init kvm_init_hyp_services(void) pr_info("hypervisor services detected (0x%08lx 0x%08lx 0x%08lx 0x%08lx)\n", res.a3, res.a2, res.a1, res.a0); + + kvm_arm_init_hyp_services(); } bool kvm_arm_hyp_service_available(u32 func_id) diff --git a/drivers/firmware/smccc/soc_id.c b/drivers/firmware/smccc/soc_id.c index 581aa5e9b0778bea6e7c853cd7c23a9bbc07f3e3..dd7c3d5e8b0bbabd32cf9395e3e39f6bf5c77d20 100644 --- a/drivers/firmware/smccc/soc_id.c +++ b/drivers/firmware/smccc/soc_id.c @@ -50,7 +50,7 @@ static int __init smccc_soc_init(void) arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, ARM_SMCCC_ARCH_SOC_ID, &res); - if (res.a0 == SMCCC_RET_NOT_SUPPORTED) { + if ((int)res.a0 == SMCCC_RET_NOT_SUPPORTED) { pr_info("ARCH_SOC_ID not implemented, skipping ....\n"); return 0; } diff --git a/drivers/gpio/gpio-aggregator.c b/drivers/gpio/gpio-aggregator.c index dfd8a4876a27ac4baa5cac2aaffdbfb0223aab54..d5f25246404d9e28e434046f921b5052525172c2 100644 --- a/drivers/gpio/gpio-aggregator.c +++ b/drivers/gpio/gpio-aggregator.c @@ -330,7 +330,8 @@ static int gpio_fwd_get(struct gpio_chip *chip, unsigned int offset) { struct gpiochip_fwd *fwd = gpiochip_get_data(chip); - return gpiod_get_value(fwd->descs[offset]); + return chip->can_sleep ? gpiod_get_value_cansleep(fwd->descs[offset]) + : gpiod_get_value(fwd->descs[offset]); } static int gpio_fwd_get_multiple(struct gpiochip_fwd *fwd, unsigned long *mask, @@ -349,7 +350,10 @@ static int gpio_fwd_get_multiple(struct gpiochip_fwd *fwd, unsigned long *mask, for_each_set_bit(i, mask, fwd->chip.ngpio) descs[j++] = fwd->descs[i]; - error = gpiod_get_array_value(j, descs, NULL, values); + if (fwd->chip.can_sleep) + error = gpiod_get_array_value_cansleep(j, descs, NULL, values); + else + error = gpiod_get_array_value(j, descs, NULL, values); if (error) return error; @@ -384,7 +388,10 @@ static void gpio_fwd_set(struct gpio_chip *chip, unsigned int offset, int value) { struct gpiochip_fwd *fwd = gpiochip_get_data(chip); - gpiod_set_value(fwd->descs[offset], value); + if (chip->can_sleep) + gpiod_set_value_cansleep(fwd->descs[offset], value); + else + gpiod_set_value(fwd->descs[offset], value); } static void gpio_fwd_set_multiple(struct gpiochip_fwd *fwd, unsigned long *mask, @@ -403,7 +410,10 @@ static void gpio_fwd_set_multiple(struct gpiochip_fwd *fwd, unsigned long *mask, descs[j++] = fwd->descs[i]; } - gpiod_set_array_value(j, descs, NULL, values); + if (fwd->chip.can_sleep) + gpiod_set_array_value_cansleep(j, descs, NULL, values); + else + gpiod_set_array_value(j, descs, NULL, values); } static void gpio_fwd_set_multiple_locked(struct gpio_chip *chip, diff --git a/drivers/gpio/gpio-aspeed.c b/drivers/gpio/gpio-aspeed.c index b966f5e28ebffd5f3a97bcd43f1001156641b43d..e0d5d80ec8e0f2f650804c523f4f624b48f4ded7 100644 --- a/drivers/gpio/gpio-aspeed.c +++ b/drivers/gpio/gpio-aspeed.c @@ -53,7 +53,7 @@ struct aspeed_gpio_config { struct aspeed_gpio { struct gpio_chip chip; struct irq_chip irqc; - spinlock_t lock; + raw_spinlock_t lock; void __iomem *base; int irq; const struct aspeed_gpio_config *config; @@ -413,14 +413,14 @@ static void aspeed_gpio_set(struct gpio_chip *gc, unsigned int offset, unsigned long flags; bool copro; - spin_lock_irqsave(&gpio->lock, flags); + raw_spin_lock_irqsave(&gpio->lock, flags); copro = aspeed_gpio_copro_request(gpio, offset); __aspeed_gpio_set(gc, offset, val); if (copro) aspeed_gpio_copro_release(gpio, offset); - spin_unlock_irqrestore(&gpio->lock, flags); + raw_spin_unlock_irqrestore(&gpio->lock, flags); } static int aspeed_gpio_dir_in(struct gpio_chip *gc, unsigned int offset) @@ -435,7 +435,7 @@ static int aspeed_gpio_dir_in(struct gpio_chip *gc, unsigned int offset) if (!have_input(gpio, offset)) return -ENOTSUPP; - spin_lock_irqsave(&gpio->lock, flags); + raw_spin_lock_irqsave(&gpio->lock, flags); reg = ioread32(addr); reg &= ~GPIO_BIT(offset); @@ -445,7 +445,7 @@ static int aspeed_gpio_dir_in(struct gpio_chip *gc, unsigned int offset) if (copro) aspeed_gpio_copro_release(gpio, offset); - spin_unlock_irqrestore(&gpio->lock, flags); + raw_spin_unlock_irqrestore(&gpio->lock, flags); return 0; } @@ -463,7 +463,7 @@ static int aspeed_gpio_dir_out(struct gpio_chip *gc, if (!have_output(gpio, offset)) return -ENOTSUPP; - spin_lock_irqsave(&gpio->lock, flags); + raw_spin_lock_irqsave(&gpio->lock, flags); reg = ioread32(addr); reg |= GPIO_BIT(offset); @@ -474,7 +474,7 @@ static int aspeed_gpio_dir_out(struct gpio_chip *gc, if (copro) aspeed_gpio_copro_release(gpio, offset); - spin_unlock_irqrestore(&gpio->lock, flags); + raw_spin_unlock_irqrestore(&gpio->lock, flags); return 0; } @@ -492,11 +492,11 @@ static int aspeed_gpio_get_direction(struct gpio_chip *gc, unsigned int offset) if (!have_output(gpio, offset)) return GPIO_LINE_DIRECTION_IN; - spin_lock_irqsave(&gpio->lock, flags); + raw_spin_lock_irqsave(&gpio->lock, flags); val = ioread32(bank_reg(gpio, bank, reg_dir)) & GPIO_BIT(offset); - spin_unlock_irqrestore(&gpio->lock, flags); + raw_spin_unlock_irqrestore(&gpio->lock, flags); return val ? GPIO_LINE_DIRECTION_OUT : GPIO_LINE_DIRECTION_IN; } @@ -539,14 +539,14 @@ static void aspeed_gpio_irq_ack(struct irq_data *d) status_addr = bank_reg(gpio, bank, reg_irq_status); - spin_lock_irqsave(&gpio->lock, flags); + raw_spin_lock_irqsave(&gpio->lock, flags); copro = aspeed_gpio_copro_request(gpio, offset); iowrite32(bit, status_addr); if (copro) aspeed_gpio_copro_release(gpio, offset); - spin_unlock_irqrestore(&gpio->lock, flags); + raw_spin_unlock_irqrestore(&gpio->lock, flags); } static void aspeed_gpio_irq_set_mask(struct irq_data *d, bool set) @@ -565,7 +565,7 @@ static void aspeed_gpio_irq_set_mask(struct irq_data *d, bool set) addr = bank_reg(gpio, bank, reg_irq_enable); - spin_lock_irqsave(&gpio->lock, flags); + raw_spin_lock_irqsave(&gpio->lock, flags); copro = aspeed_gpio_copro_request(gpio, offset); reg = ioread32(addr); @@ -577,7 +577,7 @@ static void aspeed_gpio_irq_set_mask(struct irq_data *d, bool set) if (copro) aspeed_gpio_copro_release(gpio, offset); - spin_unlock_irqrestore(&gpio->lock, flags); + raw_spin_unlock_irqrestore(&gpio->lock, flags); } static void aspeed_gpio_irq_mask(struct irq_data *d) @@ -629,7 +629,7 @@ static int aspeed_gpio_set_type(struct irq_data *d, unsigned int type) return -EINVAL; } - spin_lock_irqsave(&gpio->lock, flags); + raw_spin_lock_irqsave(&gpio->lock, flags); copro = aspeed_gpio_copro_request(gpio, offset); addr = bank_reg(gpio, bank, reg_irq_type0); @@ -649,7 +649,7 @@ static int aspeed_gpio_set_type(struct irq_data *d, unsigned int type) if (copro) aspeed_gpio_copro_release(gpio, offset); - spin_unlock_irqrestore(&gpio->lock, flags); + raw_spin_unlock_irqrestore(&gpio->lock, flags); irq_set_handler_locked(d, handler); @@ -719,7 +719,7 @@ static int aspeed_gpio_reset_tolerance(struct gpio_chip *chip, treg = bank_reg(gpio, to_bank(offset), reg_tolerance); - spin_lock_irqsave(&gpio->lock, flags); + raw_spin_lock_irqsave(&gpio->lock, flags); copro = aspeed_gpio_copro_request(gpio, offset); val = readl(treg); @@ -733,7 +733,7 @@ static int aspeed_gpio_reset_tolerance(struct gpio_chip *chip, if (copro) aspeed_gpio_copro_release(gpio, offset); - spin_unlock_irqrestore(&gpio->lock, flags); + raw_spin_unlock_irqrestore(&gpio->lock, flags); return 0; } @@ -859,7 +859,7 @@ static int enable_debounce(struct gpio_chip *chip, unsigned int offset, return rc; } - spin_lock_irqsave(&gpio->lock, flags); + raw_spin_lock_irqsave(&gpio->lock, flags); if (timer_allocation_registered(gpio, offset)) { rc = unregister_allocated_timer(gpio, offset); @@ -919,7 +919,7 @@ static int enable_debounce(struct gpio_chip *chip, unsigned int offset, configure_timer(gpio, offset, i); out: - spin_unlock_irqrestore(&gpio->lock, flags); + raw_spin_unlock_irqrestore(&gpio->lock, flags); return rc; } @@ -930,13 +930,13 @@ static int disable_debounce(struct gpio_chip *chip, unsigned int offset) unsigned long flags; int rc; - spin_lock_irqsave(&gpio->lock, flags); + raw_spin_lock_irqsave(&gpio->lock, flags); rc = unregister_allocated_timer(gpio, offset); if (!rc) configure_timer(gpio, offset, 0); - spin_unlock_irqrestore(&gpio->lock, flags); + raw_spin_unlock_irqrestore(&gpio->lock, flags); return rc; } @@ -1018,7 +1018,7 @@ int aspeed_gpio_copro_grab_gpio(struct gpio_desc *desc, return -EINVAL; bindex = offset >> 3; - spin_lock_irqsave(&gpio->lock, flags); + raw_spin_lock_irqsave(&gpio->lock, flags); /* Sanity check, this shouldn't happen */ if (gpio->cf_copro_bankmap[bindex] == 0xff) { @@ -1039,7 +1039,7 @@ int aspeed_gpio_copro_grab_gpio(struct gpio_desc *desc, if (bit) *bit = GPIO_OFFSET(offset); bail: - spin_unlock_irqrestore(&gpio->lock, flags); + raw_spin_unlock_irqrestore(&gpio->lock, flags); return rc; } EXPORT_SYMBOL_GPL(aspeed_gpio_copro_grab_gpio); @@ -1063,7 +1063,7 @@ int aspeed_gpio_copro_release_gpio(struct gpio_desc *desc) return -EINVAL; bindex = offset >> 3; - spin_lock_irqsave(&gpio->lock, flags); + raw_spin_lock_irqsave(&gpio->lock, flags); /* Sanity check, this shouldn't happen */ if (gpio->cf_copro_bankmap[bindex] == 0) { @@ -1077,7 +1077,7 @@ int aspeed_gpio_copro_release_gpio(struct gpio_desc *desc) aspeed_gpio_change_cmd_source(gpio, bank, bindex, GPIO_CMDSRC_ARM); bail: - spin_unlock_irqrestore(&gpio->lock, flags); + raw_spin_unlock_irqrestore(&gpio->lock, flags); return rc; } EXPORT_SYMBOL_GPL(aspeed_gpio_copro_release_gpio); @@ -1151,7 +1151,7 @@ static int __init aspeed_gpio_probe(struct platform_device *pdev) if (IS_ERR(gpio->base)) return PTR_ERR(gpio->base); - spin_lock_init(&gpio->lock); + raw_spin_lock_init(&gpio->lock); gpio_id = of_match_node(aspeed_gpio_of_table, pdev->dev.of_node); if (!gpio_id) diff --git a/drivers/gpio/gpio-dln2.c b/drivers/gpio/gpio-dln2.c index 4c5f6d0c8d745d8455cd70ba47628f37f4214ff8..22f11dd5210db15cf1b4d803dc690e450009bafa 100644 --- a/drivers/gpio/gpio-dln2.c +++ b/drivers/gpio/gpio-dln2.c @@ -46,6 +46,7 @@ struct dln2_gpio { struct platform_device *pdev; struct gpio_chip gpio; + struct irq_chip irqchip; /* * Cache pin direction to save us one transfer, since the hardware has @@ -383,15 +384,6 @@ static void dln2_irq_bus_unlock(struct irq_data *irqd) mutex_unlock(&dln2->irq_lock); } -static struct irq_chip dln2_gpio_irqchip = { - .name = "dln2-irq", - .irq_mask = dln2_irq_mask, - .irq_unmask = dln2_irq_unmask, - .irq_set_type = dln2_irq_set_type, - .irq_bus_lock = dln2_irq_bus_lock, - .irq_bus_sync_unlock = dln2_irq_bus_unlock, -}; - static void dln2_gpio_event(struct platform_device *pdev, u16 echo, const void *data, int len) { @@ -477,8 +469,15 @@ static int dln2_gpio_probe(struct platform_device *pdev) dln2->gpio.direction_output = dln2_gpio_direction_output; dln2->gpio.set_config = dln2_gpio_set_config; + dln2->irqchip.name = "dln2-irq", + dln2->irqchip.irq_mask = dln2_irq_mask, + dln2->irqchip.irq_unmask = dln2_irq_unmask, + dln2->irqchip.irq_set_type = dln2_irq_set_type, + dln2->irqchip.irq_bus_lock = dln2_irq_bus_lock, + dln2->irqchip.irq_bus_sync_unlock = dln2_irq_bus_unlock, + girq = &dln2->gpio.irq; - girq->chip = &dln2_gpio_irqchip; + girq->chip = &dln2->irqchip; /* The event comes from the outside so no parent handler */ girq->parent_handler = NULL; girq->num_parents = 0; diff --git a/drivers/gpio/gpio-mlxbf2.c b/drivers/gpio/gpio-mlxbf2.c index befa5e1099439e2a46b84baa38d7263ec63dd2f6..d4b250b470b412da7b940df981a98e578f6ad614 100644 --- a/drivers/gpio/gpio-mlxbf2.c +++ b/drivers/gpio/gpio-mlxbf2.c @@ -268,6 +268,11 @@ mlxbf2_gpio_probe(struct platform_device *pdev) NULL, 0); + if (ret) { + dev_err(dev, "bgpio_init failed\n"); + return ret; + } + gc->direction_input = mlxbf2_gpio_direction_input; gc->direction_output = mlxbf2_gpio_direction_output; gc->ngpio = npins; diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c index 7cc7d137133aac218f9a42918999ad69ee776f61..a78167b2c9ca2a5dee7566d5f4f776a2fb30882f 100644 --- a/drivers/gpio/gpio-pca953x.c +++ b/drivers/gpio/gpio-pca953x.c @@ -467,15 +467,8 @@ static int pca953x_gpio_get_value(struct gpio_chip *gc, unsigned off) mutex_lock(&chip->i2c_lock); ret = regmap_read(chip->regmap, inreg, ®_val); mutex_unlock(&chip->i2c_lock); - if (ret < 0) { - /* - * NOTE: - * diagnostic already emitted; that's all we should - * do unless gpio_*_value_cansleep() calls become different - * from their nonsleeping siblings (and report faults). - */ - return 0; - } + if (ret < 0) + return ret; return !!(reg_val & bit); } @@ -565,21 +558,21 @@ static int pca953x_gpio_set_pull_up_down(struct pca953x_chip *chip, mutex_lock(&chip->i2c_lock); - /* Disable pull-up/pull-down */ - ret = regmap_write_bits(chip->regmap, pull_en_reg, bit, 0); - if (ret) - goto exit; - /* Configure pull-up/pull-down */ if (config == PIN_CONFIG_BIAS_PULL_UP) ret = regmap_write_bits(chip->regmap, pull_sel_reg, bit, bit); else if (config == PIN_CONFIG_BIAS_PULL_DOWN) ret = regmap_write_bits(chip->regmap, pull_sel_reg, bit, 0); + else + ret = 0; if (ret) goto exit; - /* Enable pull-up/pull-down */ - ret = regmap_write_bits(chip->regmap, pull_en_reg, bit, bit); + /* Disable/Enable pull-up/pull-down */ + if (config == PIN_CONFIG_BIAS_DISABLE) + ret = regmap_write_bits(chip->regmap, pull_en_reg, bit, 0); + else + ret = regmap_write_bits(chip->regmap, pull_en_reg, bit, bit); exit: mutex_unlock(&chip->i2c_lock); @@ -593,7 +586,9 @@ static int pca953x_gpio_set_config(struct gpio_chip *gc, unsigned int offset, switch (pinconf_to_config_param(config)) { case PIN_CONFIG_BIAS_PULL_UP: + case PIN_CONFIG_BIAS_PULL_PIN_DEFAULT: case PIN_CONFIG_BIAS_PULL_DOWN: + case PIN_CONFIG_BIAS_DISABLE: return pca953x_gpio_set_pull_up_down(chip, offset, config); default: return -ENOTSUPP; diff --git a/drivers/gpio/gpio-sifive.c b/drivers/gpio/gpio-sifive.c index d5eb9ca119016c6ba7882334ee2eaf16ce77949a..4f28fa73450c175bb7baa5ed8e78db0db161f0b0 100644 --- a/drivers/gpio/gpio-sifive.c +++ b/drivers/gpio/gpio-sifive.c @@ -206,7 +206,7 @@ static int sifive_gpio_probe(struct platform_device *pdev) NULL, chip->base + SIFIVE_GPIO_OUTPUT_EN, chip->base + SIFIVE_GPIO_INPUT_EN, - 0); + BGPIOF_READ_OUTPUT_REG_SET); if (ret) { dev_err(dev, "unable to init generic GPIO\n"); return ret; diff --git a/drivers/gpio/gpio-tegra186.c b/drivers/gpio/gpio-tegra186.c index 9500074b1f1b55e00951af9b7b9f2d956f37a7ef..7fbe5f0681b956b582a03f12c5b409e9f7f6de14 100644 --- a/drivers/gpio/gpio-tegra186.c +++ b/drivers/gpio/gpio-tegra186.c @@ -337,9 +337,12 @@ static int tegra186_gpio_of_xlate(struct gpio_chip *chip, return offset + pin; } +#define to_tegra_gpio(x) container_of((x), struct tegra_gpio, gpio) + static void tegra186_irq_ack(struct irq_data *data) { - struct tegra_gpio *gpio = irq_data_get_irq_chip_data(data); + struct gpio_chip *gc = irq_data_get_irq_chip_data(data); + struct tegra_gpio *gpio = to_tegra_gpio(gc); void __iomem *base; base = tegra186_gpio_get_base(gpio, data->hwirq); @@ -351,7 +354,8 @@ static void tegra186_irq_ack(struct irq_data *data) static void tegra186_irq_mask(struct irq_data *data) { - struct tegra_gpio *gpio = irq_data_get_irq_chip_data(data); + struct gpio_chip *gc = irq_data_get_irq_chip_data(data); + struct tegra_gpio *gpio = to_tegra_gpio(gc); void __iomem *base; u32 value; @@ -366,7 +370,8 @@ static void tegra186_irq_mask(struct irq_data *data) static void tegra186_irq_unmask(struct irq_data *data) { - struct tegra_gpio *gpio = irq_data_get_irq_chip_data(data); + struct gpio_chip *gc = irq_data_get_irq_chip_data(data); + struct tegra_gpio *gpio = to_tegra_gpio(gc); void __iomem *base; u32 value; @@ -381,7 +386,8 @@ static void tegra186_irq_unmask(struct irq_data *data) static int tegra186_irq_set_type(struct irq_data *data, unsigned int type) { - struct tegra_gpio *gpio = irq_data_get_irq_chip_data(data); + struct gpio_chip *gc = irq_data_get_irq_chip_data(data); + struct tegra_gpio *gpio = to_tegra_gpio(gc); void __iomem *base; u32 value; diff --git a/drivers/gpio/gpio-ts4900.c b/drivers/gpio/gpio-ts4900.c index d885032cf814d89e184f7a10ab51fa09b2efc033..d918d2df4de2cbf536a67483b496f5079640ebfd 100644 --- a/drivers/gpio/gpio-ts4900.c +++ b/drivers/gpio/gpio-ts4900.c @@ -1,7 +1,7 @@ /* * Digital I/O driver for Technologic Systems I2C FPGA Core * - * Copyright (C) 2015 Technologic Systems + * Copyright (C) 2015, 2018 Technologic Systems * Copyright (C) 2016 Savoir-Faire Linux * * This program is free software; you can redistribute it and/or @@ -55,19 +55,33 @@ static int ts4900_gpio_direction_input(struct gpio_chip *chip, { struct ts4900_gpio_priv *priv = gpiochip_get_data(chip); - /* - * This will clear the output enable bit, the other bits are - * dontcare when this is cleared + /* Only clear the OE bit here, requires a RMW. Prevents potential issue + * with OE and data getting to the physical pin at different times. */ - return regmap_write(priv->regmap, offset, 0); + return regmap_update_bits(priv->regmap, offset, TS4900_GPIO_OE, 0); } static int ts4900_gpio_direction_output(struct gpio_chip *chip, unsigned int offset, int value) { struct ts4900_gpio_priv *priv = gpiochip_get_data(chip); + unsigned int reg; int ret; + /* If changing from an input to an output, we need to first set the + * proper data bit to what is requested and then set OE bit. This + * prevents a glitch that can occur on the IO line + */ + regmap_read(priv->regmap, offset, ®); + if (!(reg & TS4900_GPIO_OE)) { + if (value) + reg = TS4900_GPIO_OUT; + else + reg &= ~TS4900_GPIO_OUT; + + regmap_write(priv->regmap, offset, reg); + } + if (value) ret = regmap_write(priv->regmap, offset, TS4900_GPIO_OE | TS4900_GPIO_OUT); diff --git a/drivers/gpio/gpio-xgs-iproc.c b/drivers/gpio/gpio-xgs-iproc.c index ad5489a65d542a04311a4739552b11c1d50c9b53..dd40277b9d06dd046043bec9af6c8c9285f97494 100644 --- a/drivers/gpio/gpio-xgs-iproc.c +++ b/drivers/gpio/gpio-xgs-iproc.c @@ -224,7 +224,7 @@ static int iproc_gpio_probe(struct platform_device *pdev) } chip->gc.label = dev_name(dev); - if (of_property_read_u32(dn, "ngpios", &num_gpios)) + if (!of_property_read_u32(dn, "ngpios", &num_gpios)) chip->gc.ngpio = num_gpios; irq = platform_get_irq(pdev, 0); diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c index 6f11714ce02396abcc0268019336d542080b2a2a..55e4f402ec8b6116d4cd54ae07456f6872685350 100644 --- a/drivers/gpio/gpiolib-acpi.c +++ b/drivers/gpio/gpiolib-acpi.c @@ -969,10 +969,17 @@ int acpi_dev_gpio_irq_get_by(struct acpi_device *adev, const char *name, int ind irq_flags = acpi_dev_get_irq_type(info.triggering, info.polarity); - /* Set type if specified and different than the current one */ - if (irq_flags != IRQ_TYPE_NONE && - irq_flags != irq_get_trigger_type(irq)) - irq_set_irq_type(irq, irq_flags); + /* + * If the IRQ is not already in use then set type + * if specified and different than the current one. + */ + if (can_request_irq(irq, irq_flags)) { + if (irq_flags != IRQ_TYPE_NONE && + irq_flags != irq_get_trigger_type(irq)) + irq_set_irq_type(irq, irq_flags); + } else { + dev_dbg(&adev->dev, "IRQ %d already in use\n", irq); + } return irq; } diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 200f151a061f4da24ce629cb09bb19fafba1ce91..0a8ad0d2742a961a68d5d0f5cc605b460141a824 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -3216,6 +3216,16 @@ int gpiod_to_irq(const struct gpio_desc *desc) return retirq; } +#ifdef CONFIG_GPIOLIB_IRQCHIP + if (gc->irq.chip) { + /* + * Avoid race condition with other code, which tries to lookup + * an IRQ before the irqchip has been properly registered, + * i.e. while gpiochip is still being brought up. + */ + return -EPROBE_DEFER; + } +#endif return -ENXIO; } EXPORT_SYMBOL_GPL(gpiod_to_irq); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 0544460653b95e0017cf3632a4fb9869e189923b..fb6230c62daadb5843da9e39cf62afd464d36c5d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -47,12 +47,8 @@ int amdgpu_amdkfd_init(void) amdgpu_amdkfd_total_mem_size = si.totalram - si.totalhigh; amdgpu_amdkfd_total_mem_size *= si.mem_unit; -#ifdef CONFIG_HSA_AMD ret = kgd2kfd_init(); amdgpu_amdkfd_gpuvm_init_mem_limits(); -#else - ret = -ENOENT; -#endif kfd_initialized = !ret; return ret; @@ -194,6 +190,16 @@ void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool run_pm) kgd2kfd_suspend(adev->kfd.dev, run_pm); } +int amdgpu_amdkfd_resume_iommu(struct amdgpu_device *adev) +{ + int r = 0; + + if (adev->kfd.dev) + r = kgd2kfd_resume_iommu(adev->kfd.dev); + + return r; +} + int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool run_pm) { int r = 0; @@ -695,86 +701,3 @@ bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd) return adev->have_atomics_support; } - -#ifndef CONFIG_HSA_AMD -bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm) -{ - return false; -} - -void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo) -{ -} - -int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo) -{ - return 0; -} - -void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev, - struct amdgpu_vm *vm) -{ -} - -struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f) -{ - return NULL; -} - -int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm) -{ - return 0; -} - -struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, struct pci_dev *pdev, - unsigned int asic_type, bool vf) -{ - return NULL; -} - -bool kgd2kfd_device_init(struct kfd_dev *kfd, - struct drm_device *ddev, - const struct kgd2kfd_shared_resources *gpu_resources) -{ - return false; -} - -void kgd2kfd_device_exit(struct kfd_dev *kfd) -{ -} - -void kgd2kfd_exit(void) -{ -} - -void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm) -{ -} - -int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm) -{ - return 0; -} - -int kgd2kfd_pre_reset(struct kfd_dev *kfd) -{ - return 0; -} - -int kgd2kfd_post_reset(struct kfd_dev *kfd) -{ - return 0; -} - -void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) -{ -} - -void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd) -{ -} - -void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint32_t throttle_bitmask) -{ -} -#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index ea391ca7f2f1cfb5ded5015b8694b7b6edbc9a5a..32e385f287cb63e5dc1c0c8d3f012d391bac7619 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -94,11 +94,6 @@ enum kgd_engine_type { KGD_ENGINE_MAX }; -struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context, - struct mm_struct *mm); -bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm); -struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f); -int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo); struct amdkfd_process_info { /* List head of all VMs that belong to a KFD process */ @@ -126,14 +121,13 @@ int amdgpu_amdkfd_init(void); void amdgpu_amdkfd_fini(void); void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool run_pm); +int amdgpu_amdkfd_resume_iommu(struct amdgpu_device *adev); int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool run_pm); void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev, const void *ih_ring_entry); void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev); void amdgpu_amdkfd_device_init(struct amdgpu_device *adev); void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev); - -int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm); int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine, uint32_t vmid, uint64_t gpu_addr, uint32_t *ib_cmd, uint32_t ib_len); @@ -153,6 +147,38 @@ void amdgpu_amdkfd_gpu_reset(struct kgd_dev *kgd); int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev, int queue_bit); +struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context, + struct mm_struct *mm); +#if IS_ENABLED(CONFIG_HSA_AMD) +bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm); +struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f); +int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo); +int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm); +#else +static inline +bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm) +{ + return false; +} + +static inline +struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f) +{ + return NULL; +} + +static inline +int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo) +{ + return 0; +} + +static inline +int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm) +{ + return 0; +} +#endif /* Shared API */ int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size, void **mem_obj, uint64_t *gpu_addr, @@ -215,8 +241,6 @@ int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd, struct file *filp, u32 pasid, void **vm, void **process_info, struct dma_fence **ef); -void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev, - struct amdgpu_vm *vm); void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm); void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *vm); uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm); @@ -236,23 +260,43 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd, struct kgd_mem *mem, void **kptr, uint64_t *size); int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info, struct dma_fence **ef); - int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd, struct kfd_vm_fault_info *info); - int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd, struct dma_buf *dmabuf, uint64_t va, void *vm, struct kgd_mem **mem, uint64_t *size, uint64_t *mmap_offset); - -void amdgpu_amdkfd_gpuvm_init_mem_limits(void); -void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo); - int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd, struct tile_config *config); +#if IS_ENABLED(CONFIG_HSA_AMD) +void amdgpu_amdkfd_gpuvm_init_mem_limits(void); +void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev, + struct amdgpu_vm *vm); +void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo); +#else +static inline +void amdgpu_amdkfd_gpuvm_init_mem_limits(void) +{ +} +static inline +void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev, + struct amdgpu_vm *vm) +{ +} + +static inline +void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo) +{ +} +#endif /* KGD2KFD callbacks */ +int kgd2kfd_quiesce_mm(struct mm_struct *mm); +int kgd2kfd_resume_mm(struct mm_struct *mm); +int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm, + struct dma_fence *fence); +#if IS_ENABLED(CONFIG_HSA_AMD) int kgd2kfd_init(void); void kgd2kfd_exit(void); struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, struct pci_dev *pdev, @@ -262,15 +306,78 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, const struct kgd2kfd_shared_resources *gpu_resources); void kgd2kfd_device_exit(struct kfd_dev *kfd); void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm); +int kgd2kfd_resume_iommu(struct kfd_dev *kfd); int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm); int kgd2kfd_pre_reset(struct kfd_dev *kfd); int kgd2kfd_post_reset(struct kfd_dev *kfd); void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry); -int kgd2kfd_quiesce_mm(struct mm_struct *mm); -int kgd2kfd_resume_mm(struct mm_struct *mm); -int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm, - struct dma_fence *fence); void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd); void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint32_t throttle_bitmask); +#else +static inline int kgd2kfd_init(void) +{ + return -ENOENT; +} +static inline void kgd2kfd_exit(void) +{ +} + +static inline +struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, struct pci_dev *pdev, + unsigned int asic_type, bool vf) +{ + return NULL; +} + +static inline +bool kgd2kfd_device_init(struct kfd_dev *kfd, struct drm_device *ddev, + const struct kgd2kfd_shared_resources *gpu_resources) +{ + return false; +} + +static inline void kgd2kfd_device_exit(struct kfd_dev *kfd) +{ +} + +static inline void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm) +{ +} + +static int __maybe_unused kgd2kfd_resume_iommu(struct kfd_dev *kfd) +{ + return 0; +} + +static inline int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm) +{ + return 0; +} + +static inline int kgd2kfd_pre_reset(struct kfd_dev *kfd) +{ + return 0; +} + +static inline int kgd2kfd_post_reset(struct kfd_dev *kfd) +{ + return 0; +} + +static inline +void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) +{ +} + +static inline +void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd) +{ +} + +static inline +void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint32_t throttle_bitmask) +{ +} +#endif #endif /* AMDGPU_AMDKFD_H_INCLUDED */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index 15c45b2a398350eed6b9c6546fab6e1e951ed282..714178f1b6c6edb83464fbd2fcfbfc5d16ec3eab 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -61,7 +61,7 @@ static void amdgpu_bo_list_free(struct kref *ref) int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp, struct drm_amdgpu_bo_list_entry *info, - unsigned num_entries, struct amdgpu_bo_list **result) + size_t num_entries, struct amdgpu_bo_list **result) { unsigned last_entry = 0, first_userptr = num_entries; struct amdgpu_bo_list_entry *array; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h index a130e766cbdbe24de4b4b21cf678bd7c5acaa272..529d52a204cf4a5bc8a92d261b02d92ae2982630 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h @@ -60,7 +60,7 @@ int amdgpu_bo_create_list_entry_array(struct drm_amdgpu_bo_list_in *in, int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp, struct drm_amdgpu_bo_list_entry *info, - unsigned num_entries, + size_t num_entries, struct amdgpu_bo_list **list); static inline struct amdgpu_bo_list_entry * diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c index b9c11c2b2885a32ca6633450d43eebf66edb4148..df1f9b88a53f9fb04d5c20c5ad703a99a1208e45 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c @@ -387,6 +387,9 @@ amdgpu_connector_lcd_native_mode(struct drm_encoder *encoder) native_mode->vdisplay != 0 && native_mode->clock != 0) { mode = drm_mode_duplicate(dev, native_mode); + if (!mode) + return NULL; + mode->type = DRM_MODE_TYPE_PREFERRED | DRM_MODE_TYPE_DRIVER; drm_mode_set_name(mode); @@ -401,6 +404,9 @@ amdgpu_connector_lcd_native_mode(struct drm_encoder *encoder) * simpler. */ mode = drm_cvt_mode(dev, native_mode->hdisplay, native_mode->vdisplay, 60, true, false, false); + if (!mode) + return NULL; + mode->type = DRM_MODE_TYPE_PREFERRED | DRM_MODE_TYPE_DRIVER; DRM_DEBUG_KMS("Adding cvt approximation of native panel mode %s\n", mode->name); } @@ -827,6 +833,7 @@ static int amdgpu_connector_vga_get_modes(struct drm_connector *connector) amdgpu_connector_get_edid(connector); ret = amdgpu_connector_ddc_get_modes(connector); + amdgpu_get_native_mode(connector); return ret; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 97723f2b5ece73fba330f79befbbe56c67975f5b..f262c4e7a48a24809bacde91e95aeaf0bcdc4df4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2913,6 +2913,10 @@ static int amdgpu_device_ip_resume(struct amdgpu_device *adev) { int r; + r = amdgpu_amdkfd_resume_iommu(adev); + if (r) + return r; + r = amdgpu_device_ip_resume_phase1(adev); if (r) return r; @@ -4296,6 +4300,10 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive, if (!r) { dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n"); + r = amdgpu_amdkfd_resume_iommu(tmp_adev); + if (r) + goto out; + r = amdgpu_device_ip_resume_phase1(tmp_adev); if (r) goto out; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index bfb95143ba5e8fc335a6f84901c814aa2241edd3..ec6bfa316daa36a44c723755e666618c9d17f5da 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -372,10 +372,15 @@ int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id, return -EINVAL; } +union gc_info { + struct gc_info_v1_0 v1; + struct gc_info_v2_0 v2; +}; + int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev) { struct binary_header *bhdr; - struct gc_info_v1_0 *gc_info; + union gc_info *gc_info; if (!adev->mman.discovery_bin) { DRM_ERROR("ip discovery uninitialized\n"); @@ -383,27 +388,54 @@ int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev) } bhdr = (struct binary_header *)adev->mman.discovery_bin; - gc_info = (struct gc_info_v1_0 *)(adev->mman.discovery_bin + + gc_info = (union gc_info *)(adev->mman.discovery_bin + le16_to_cpu(bhdr->table_list[GC].offset)); - - adev->gfx.config.max_shader_engines = le32_to_cpu(gc_info->gc_num_se); - adev->gfx.config.max_cu_per_sh = 2 * (le32_to_cpu(gc_info->gc_num_wgp0_per_sa) + - le32_to_cpu(gc_info->gc_num_wgp1_per_sa)); - adev->gfx.config.max_sh_per_se = le32_to_cpu(gc_info->gc_num_sa_per_se); - adev->gfx.config.max_backends_per_se = le32_to_cpu(gc_info->gc_num_rb_per_se); - adev->gfx.config.max_texture_channel_caches = le32_to_cpu(gc_info->gc_num_gl2c); - adev->gfx.config.max_gprs = le32_to_cpu(gc_info->gc_num_gprs); - adev->gfx.config.max_gs_threads = le32_to_cpu(gc_info->gc_num_max_gs_thds); - adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gc_info->gc_gs_table_depth); - adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gc_info->gc_gsprim_buff_depth); - adev->gfx.config.double_offchip_lds_buf = le32_to_cpu(gc_info->gc_double_offchip_lds_buffer); - adev->gfx.cu_info.wave_front_size = le32_to_cpu(gc_info->gc_wave_size); - adev->gfx.cu_info.max_waves_per_simd = le32_to_cpu(gc_info->gc_max_waves_per_simd); - adev->gfx.cu_info.max_scratch_slots_per_cu = le32_to_cpu(gc_info->gc_max_scratch_slots_per_cu); - adev->gfx.cu_info.lds_size = le32_to_cpu(gc_info->gc_lds_size); - adev->gfx.config.num_sc_per_sh = le32_to_cpu(gc_info->gc_num_sc_per_se) / - le32_to_cpu(gc_info->gc_num_sa_per_se); - adev->gfx.config.num_packer_per_sc = le32_to_cpu(gc_info->gc_num_packer_per_sc); - + switch (gc_info->v1.header.version_major) { + case 1: + adev->gfx.config.max_shader_engines = le32_to_cpu(gc_info->v1.gc_num_se); + adev->gfx.config.max_cu_per_sh = 2 * (le32_to_cpu(gc_info->v1.gc_num_wgp0_per_sa) + + le32_to_cpu(gc_info->v1.gc_num_wgp1_per_sa)); + adev->gfx.config.max_sh_per_se = le32_to_cpu(gc_info->v1.gc_num_sa_per_se); + adev->gfx.config.max_backends_per_se = le32_to_cpu(gc_info->v1.gc_num_rb_per_se); + adev->gfx.config.max_texture_channel_caches = le32_to_cpu(gc_info->v1.gc_num_gl2c); + adev->gfx.config.max_gprs = le32_to_cpu(gc_info->v1.gc_num_gprs); + adev->gfx.config.max_gs_threads = le32_to_cpu(gc_info->v1.gc_num_max_gs_thds); + adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gc_info->v1.gc_gs_table_depth); + adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gc_info->v1.gc_gsprim_buff_depth); + adev->gfx.config.double_offchip_lds_buf = le32_to_cpu(gc_info->v1.gc_double_offchip_lds_buffer); + adev->gfx.cu_info.wave_front_size = le32_to_cpu(gc_info->v1.gc_wave_size); + adev->gfx.cu_info.max_waves_per_simd = le32_to_cpu(gc_info->v1.gc_max_waves_per_simd); + adev->gfx.cu_info.max_scratch_slots_per_cu = le32_to_cpu(gc_info->v1.gc_max_scratch_slots_per_cu); + adev->gfx.cu_info.lds_size = le32_to_cpu(gc_info->v1.gc_lds_size); + adev->gfx.config.num_sc_per_sh = le32_to_cpu(gc_info->v1.gc_num_sc_per_se) / + le32_to_cpu(gc_info->v1.gc_num_sa_per_se); + adev->gfx.config.num_packer_per_sc = le32_to_cpu(gc_info->v1.gc_num_packer_per_sc); + break; + case 2: + adev->gfx.config.max_shader_engines = le32_to_cpu(gc_info->v2.gc_num_se); + adev->gfx.config.max_cu_per_sh = le32_to_cpu(gc_info->v2.gc_num_cu_per_sh); + adev->gfx.config.max_sh_per_se = le32_to_cpu(gc_info->v2.gc_num_sh_per_se); + adev->gfx.config.max_backends_per_se = le32_to_cpu(gc_info->v2.gc_num_rb_per_se); + adev->gfx.config.max_texture_channel_caches = le32_to_cpu(gc_info->v2.gc_num_tccs); + adev->gfx.config.max_gprs = le32_to_cpu(gc_info->v2.gc_num_gprs); + adev->gfx.config.max_gs_threads = le32_to_cpu(gc_info->v2.gc_num_max_gs_thds); + adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gc_info->v2.gc_gs_table_depth); + adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gc_info->v2.gc_gsprim_buff_depth); + adev->gfx.config.double_offchip_lds_buf = le32_to_cpu(gc_info->v2.gc_double_offchip_lds_buffer); + adev->gfx.cu_info.wave_front_size = le32_to_cpu(gc_info->v2.gc_wave_size); + adev->gfx.cu_info.max_waves_per_simd = le32_to_cpu(gc_info->v2.gc_max_waves_per_simd); + adev->gfx.cu_info.max_scratch_slots_per_cu = le32_to_cpu(gc_info->v2.gc_max_scratch_slots_per_cu); + adev->gfx.cu_info.lds_size = le32_to_cpu(gc_info->v2.gc_lds_size); + adev->gfx.config.num_sc_per_sh = le32_to_cpu(gc_info->v2.gc_num_sc_per_se) / + le32_to_cpu(gc_info->v2.gc_num_sh_per_se); + adev->gfx.config.num_packer_per_sc = le32_to_cpu(gc_info->v2.gc_num_packer_per_sc); + break; + default: + dev_err(adev->dev, + "Unhandled GC info table %d.%d\n", + gc_info->v1.header.version_major, + gc_info->v1.header.version_minor); + return -EINVAL; + } return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index 2f70fdd6104f20df7192687ed1fbe28028c5f4da..582055136cdbfc9c7765fe696fbcc444437c7e0a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c @@ -267,7 +267,6 @@ int amdgpu_irq_init(struct amdgpu_device *adev) if (!amdgpu_device_has_dc_support(adev)) { if (!adev->enable_virtual_display) /* Disable vblank IRQs aggressively for power-saving */ - /* XXX: can this be enabled for DC? */ adev_to_drm(adev)->vblank_disable_immediate = true; r = drm_vblank_init(adev_to_drm(adev), adev->mode_info.num_crtc); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index efda38349a0322f108c2eb7d1abd2a4fa85d233a..917b94002f4b70170cc04616670fd55d7e9b9817 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -766,9 +766,9 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file dev_info.high_va_offset = AMDGPU_GMC_HOLE_END; dev_info.high_va_max = AMDGPU_GMC_HOLE_END | vm_size; } - dev_info.virtual_address_alignment = max((int)PAGE_SIZE, AMDGPU_GPU_PAGE_SIZE); + dev_info.virtual_address_alignment = max_t(u32, PAGE_SIZE, AMDGPU_GPU_PAGE_SIZE); dev_info.pte_fragment_size = (1 << adev->vm_manager.fragment_size) * AMDGPU_GPU_PAGE_SIZE; - dev_info.gart_page_size = AMDGPU_GPU_PAGE_SIZE; + dev_info.gart_page_size = max_t(u32, PAGE_SIZE, AMDGPU_GPU_PAGE_SIZE); dev_info.cu_active_number = adev->gfx.cu_info.number; dev_info.cu_ao_mask = adev->gfx.cu_info.ao_cu_mask; dev_info.ce_ram_size = adev->gfx.ce_ram_size; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 46292a1a607181a0a7fff3e77d7710efce2f417f..ed4c3d5b2fea68d5974c37f73480294f18865c58 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -2120,7 +2120,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, unsigned i; int r; - if (direct_submit && !ring->sched.ready) { + if (!direct_submit && !ring->sched.ready) { DRM_ERROR("Trying to move memory with ring turned off.\n"); return -EINVAL; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index b47829ff30af79237cad7cdf2b463d6b583b0e5b..635601d8b131052c50ed1f2bd7d5463c0348be18 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -715,11 +715,17 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, * Check if all VM PDs/PTs are ready for updates * * Returns: - * True if eviction list is empty. + * True if VM is not evicting. */ bool amdgpu_vm_ready(struct amdgpu_vm *vm) { - return list_empty(&vm->evicted); + bool ret; + + amdgpu_vm_eviction_lock(vm); + ret = !vm->evicting; + amdgpu_vm_eviction_unlock(vm); + + return ret && list_empty(&vm->evicted); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 0526dec1d736eff8a2537245d3879160f2615dc3..042c85fc528bbcffbf05baa59ecddb53f6b36f77 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -358,6 +358,7 @@ struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev) "%s", "xgmi_hive_info"); if (ret) { dev_err(adev->dev, "XGMI: failed initializing kobject for xgmi hive\n"); + kobject_put(&hive->kobj); kfree(hive); hive = NULL; goto pro_end; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index ac3a88197b2fc4473f065afec77633e0ab48e5bd..b19f7bd37781f5fea472c83d3878dd0e71f4c4d0 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -137,6 +137,11 @@ MODULE_FIRMWARE("amdgpu/green_sardine_rlc.bin"); #define mmTCP_CHAN_STEER_5_ARCT 0x0b0c #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX 0 +#define mmGOLDEN_TSC_COUNT_UPPER_Renoir 0x0025 +#define mmGOLDEN_TSC_COUNT_UPPER_Renoir_BASE_IDX 1 +#define mmGOLDEN_TSC_COUNT_LOWER_Renoir 0x0026 +#define mmGOLDEN_TSC_COUNT_LOWER_Renoir_BASE_IDX 1 + enum ta_ras_gfx_subblock { /*CPC*/ TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0, @@ -2997,8 +3002,8 @@ static void gfx_v9_0_init_pg(struct amdgpu_device *adev) AMD_PG_SUPPORT_CP | AMD_PG_SUPPORT_GDS | AMD_PG_SUPPORT_RLC_SMU_HS)) { - WREG32(mmRLC_JUMP_TABLE_RESTORE, - adev->gfx.rlc.cp_table_gpu_addr >> 8); + WREG32_SOC15(GC, 0, mmRLC_JUMP_TABLE_RESTORE, + adev->gfx.rlc.cp_table_gpu_addr >> 8); gfx_v9_0_init_gfx_power_gating(adev); } } @@ -3542,7 +3547,7 @@ static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring) /* set static priority for a queue/ring */ gfx_v9_0_mqd_set_priority(ring, mqd); - mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM); + mqd->cp_hqd_quantum = RREG32_SOC15(GC, 0, mmCP_HQD_QUANTUM); /* map_queues packet doesn't need activate the queue, * so only kiq need set this field. @@ -4147,19 +4152,38 @@ failed_kiq_read: static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev) { - uint64_t clock; + uint64_t clock, clock_lo, clock_hi, hi_check; - amdgpu_gfx_off_ctrl(adev, false); - mutex_lock(&adev->gfx.gpu_clock_mutex); - if (adev->asic_type == CHIP_VEGA10 && amdgpu_sriov_runtime(adev)) { - clock = gfx_v9_0_kiq_read_clock(adev); - } else { - WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); - clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) | - ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); + switch (adev->asic_type) { + case CHIP_RENOIR: + preempt_disable(); + clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir); + clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir); + hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir); + /* The SMUIO TSC clock frequency is 100MHz, which sets 32-bit carry over + * roughly every 42 seconds. + */ + if (hi_check != clock_hi) { + clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir); + clock_hi = hi_check; + } + preempt_enable(); + clock = clock_lo | (clock_hi << 32ULL); + break; + default: + amdgpu_gfx_off_ctrl(adev, false); + mutex_lock(&adev->gfx.gpu_clock_mutex); + if (adev->asic_type == CHIP_VEGA10 && amdgpu_sriov_runtime(adev)) { + clock = gfx_v9_0_kiq_read_clock(adev); + } else { + WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1); + clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) | + ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL); + } + mutex_unlock(&adev->gfx.gpu_clock_mutex); + amdgpu_gfx_off_ctrl(adev, true); + break; } - mutex_unlock(&adev->gfx.gpu_clock_mutex); - amdgpu_gfx_off_ctrl(adev, true); return clock; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index dbc8b76b9b78e650524775b95b3b1f06aae69ba6..150fa5258fb6fc0d0e783bfc4cb2c0c2b922bd1b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -1018,6 +1018,8 @@ static int gmc_v10_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + gmc_v10_0_gart_disable(adev); + if (amdgpu_sriov_vf(adev)) { /* full access mode, so don't touch any GMC register */ DRM_DEBUG("For SRIOV client, shouldn't do anything.\n"); @@ -1026,7 +1028,6 @@ static int gmc_v10_0_hw_fini(void *handle) amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0); amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0); - gmc_v10_0_gart_disable(adev); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index 95a9117e95640e5a31cb569d678cff8f1716e4a6..861d0cc45fc1032057beae3086160f37e3633184 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -842,12 +842,12 @@ static int gmc_v6_0_sw_init(void *handle) adev->gmc.mc_mask = 0xffffffffffULL; - r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(44)); + r = dma_set_mask_and_coherent(adev->dev, DMA_BIT_MASK(40)); if (r) { dev_warn(adev->dev, "No suitable DMA available.\n"); return r; } - adev->need_swiotlb = drm_need_swiotlb(44); + adev->need_swiotlb = drm_need_swiotlb(40); r = gmc_v6_0_init_microcode(adev); if (r) { diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 9ab65ca7df777f02c519b3c2d3b3a9cee45eac3f..873bc33912e23e74fee63e8404537dfb942a8dd3 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -524,10 +524,10 @@ static void gmc_v8_0_mc_program(struct amdgpu_device *adev) static int gmc_v8_0_mc_init(struct amdgpu_device *adev) { int r; + u32 tmp; adev->gmc.vram_width = amdgpu_atombios_get_vram_width(adev); if (!adev->gmc.vram_width) { - u32 tmp; int chansize, numchan; /* Get VRAM informations */ @@ -571,8 +571,15 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev) adev->gmc.vram_width = numchan * chansize; } /* size in MB on si */ - adev->gmc.mc_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL; - adev->gmc.real_vram_size = RREG32(mmCONFIG_MEMSIZE) * 1024ULL * 1024ULL; + tmp = RREG32(mmCONFIG_MEMSIZE); + /* some boards may have garbage in the upper 16 bits */ + if (tmp & 0xffff0000) { + DRM_INFO("Probable bad vram size: 0x%08x\n", tmp); + if (tmp & 0xffff) + tmp &= 0xffff; + } + adev->gmc.mc_vram_size = tmp * 1024ULL * 1024ULL; + adev->gmc.real_vram_size = adev->gmc.mc_vram_size; if (!(adev->flags & AMD_IS_APU)) { r = amdgpu_device_resize_fb_bar(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 3ebbddb63705cfa457dd03327d48b8892a6bb007..3a864041968f64bff614861c03bb0b860a892782 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1677,6 +1677,8 @@ static int gmc_v9_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + gmc_v9_0_gart_disable(adev); + if (amdgpu_sriov_vf(adev)) { /* full access mode, so don't touch any GMC register */ DRM_DEBUG("For SRIOV client, shouldn't do anything.\n"); @@ -1685,7 +1687,6 @@ static int gmc_v9_0_hw_fini(void *handle) amdgpu_irq_put(adev, &adev->gmc.ecc_irq, 0); amdgpu_irq_put(adev, &adev->gmc.vm_fault, 0); - gmc_v9_0_gart_disable(adev); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 37226cbbbd11a4c3d79dbe41ac9b581eb6f049e0..7212b9900e0abafdf8386235789ec6f25167384d 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -1194,8 +1194,11 @@ static int soc15_common_early_init(void *handle) AMD_CG_SUPPORT_SDMA_MGCG | AMD_CG_SUPPORT_SDMA_LS; + /* + * MMHUB PG needs to be disabled for Picasso for + * stability reasons. + */ adev->pg_flags = AMD_PG_SUPPORT_SDMA | - AMD_PG_SUPPORT_MMHUB | AMD_PG_SUPPORT_VCN; } else { adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index e8737fa438f06c7284219d2a85cb19d4f83c1eb0..7115f6dbb1372753b0565236f6939f126cabba7c 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -254,6 +254,13 @@ static int vcn_v1_0_suspend(void *handle) { int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + bool idle_work_unexecuted; + + idle_work_unexecuted = cancel_delayed_work_sync(&adev->vcn.idle_work); + if (idle_work_unexecuted) { + if (adev->pm.dpm_enabled) + amdgpu_dpm_enable_uvd(adev, false); + } r = vcn_v1_0_hw_fini(adev); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c index f493b5c3d382b85df17f0e6d22519b7cfede69fd..79bcc78f77045176c77d6e6ba1c5a3d2f2b1cfd8 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c @@ -22,6 +22,7 @@ */ #include +#include #include "amdgpu.h" #include "amdgpu_vcn.h" @@ -192,11 +193,14 @@ static int vcn_v2_0_sw_init(void *handle) */ static int vcn_v2_0_sw_fini(void *handle) { - int r; + int r, idx; struct amdgpu_device *adev = (struct amdgpu_device *)handle; volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared_cpu_addr; - fw_shared->present_flag_0 = 0; + if (drm_dev_enter(&adev->ddev, &idx)) { + fw_shared->present_flag_0 = 0; + drm_dev_exit(idx); + } amdgpu_virt_free_mm_table(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index ce64d4016f903827a5cfa5b35fdb64773057e027..381839d005db9b7f2885f4610f3497d718a90673 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -22,6 +22,7 @@ */ #include +#include #include "amdgpu.h" #include "amdgpu_vcn.h" @@ -233,17 +234,21 @@ static int vcn_v2_5_sw_init(void *handle) */ static int vcn_v2_5_sw_fini(void *handle) { - int i, r; + int i, r, idx; struct amdgpu_device *adev = (struct amdgpu_device *)handle; volatile struct amdgpu_fw_shared *fw_shared; - for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - if (adev->vcn.harvest_config & (1 << i)) - continue; - fw_shared = adev->vcn.inst[i].fw_shared_cpu_addr; - fw_shared->present_flag_0 = 0; + if (drm_dev_enter(&adev->ddev, &idx)) { + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (adev->vcn.harvest_config & (1 << i)) + continue; + fw_shared = adev->vcn.inst[i].fw_shared_cpu_addr; + fw_shared->present_flag_0 = 0; + } + drm_dev_exit(idx); } + if (amdgpu_sriov_vf(adev)) amdgpu_virt_free_mm_table(adev); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 903170e59342c45f6a73ef36aee3393cb852d203..84313135c2eae0f6ce5134a2ce2e6c387f40f2a7 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -744,12 +744,16 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, kfd_double_confirm_iommu_support(kfd); if (kfd_iommu_device_init(kfd)) { + kfd->use_iommu_v2 = false; dev_err(kfd_device, "Error initializing iommuv2\n"); goto device_iommu_error; } kfd_cwsr_init(kfd); + if(kgd2kfd_resume_iommu(kfd)) + goto device_iommu_error; + if (kfd_resume(kfd)) goto kfd_resume_error; @@ -895,17 +899,21 @@ int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm) return ret; } -static int kfd_resume(struct kfd_dev *kfd) +int kgd2kfd_resume_iommu(struct kfd_dev *kfd) { int err = 0; err = kfd_iommu_resume(kfd); - if (err) { + if (err) dev_err(kfd_device, "Failed to resume IOMMU for device %x:%x\n", kfd->pdev->vendor, kfd->pdev->device); - return err; - } + return err; +} + +static int kfd_resume(struct kfd_dev *kfd) +{ + int err = 0; err = kfd->dqm->ops.start(kfd->dqm); if (err) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 352a32dc609b2026a039caef5a4e40688f9291fb..2645ebc63a14d8a64152159cbba3f11d7e1fb391 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -1207,6 +1207,11 @@ static int stop_cpsch(struct device_queue_manager *dqm) bool hanging; dqm_lock(dqm); + if (!dqm->sched_running) { + dqm_unlock(dqm); + return 0; + } + if (!dqm->is_hws_hang) unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); hanging = dqm->is_hws_hang || dqm->is_resetting; diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig index 3c410d236c4919f608ce0782b4e22a3c441d2373..f3274eb6b3418693326999dc4d9ecda0621d430b 100644 --- a/drivers/gpu/drm/amd/display/Kconfig +++ b/drivers/gpu/drm/amd/display/Kconfig @@ -33,6 +33,8 @@ config DRM_AMD_DC_HDCP config DRM_AMD_DC_SI bool "AMD DC support for Southern Islands ASICs" + depends on DRM_AMDGPU_SI + depends on DRM_AMD_DC default n help Choose this option to enable new AMD DC support for SI asics diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index ce21a21ddb23592d72935105c034e8072d0c2963..6c8f141103da47b0b581067c6a2e06363fbc05c0 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -951,6 +951,7 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) init_data.asic_id.pci_revision_id = adev->pdev->revision; init_data.asic_id.hw_internal_rev = adev->external_rev_id; + init_data.asic_id.chip_id = adev->pdev->device; init_data.asic_id.vram_width = adev->gmc.vram_width; /* TODO: initialize init_data.asic_id.vram_type here!!!! */ @@ -1068,6 +1069,9 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) adev_to_drm(adev)->mode_config.cursor_width = adev->dm.dc->caps.max_cursor_size; adev_to_drm(adev)->mode_config.cursor_height = adev->dm.dc->caps.max_cursor_size; + /* Disable vblank IRQs aggressively for power-saving */ + adev_to_drm(adev)->vblank_disable_immediate = true; + if (drm_vblank_init(adev_to_drm(adev), adev->dm.display_indexes_num)) { DRM_ERROR( "amdgpu: failed to initialize sw for display support.\n"); @@ -1962,8 +1966,8 @@ static int dm_resume(void *handle) for (i = 0; i < dc_state->stream_count; i++) { dc_state->streams[i]->mode_changed = true; - for (j = 0; j < dc_state->stream_status->plane_count; j++) { - dc_state->stream_status->plane_states[j]->update_flags.raw + for (j = 0; j < dc_state->stream_status[i].plane_count; j++) { + dc_state->stream_status[i].plane_states[j]->update_flags.raw = 0xffffffff; } } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c index e00a30e7d2529417a7f4bffe68f6c4b10a766475..04c20ce6e94dfe559c1e022afb44cc4f5b7b8aef 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c @@ -226,6 +226,14 @@ int amdgpu_dm_crtc_set_crc_source(struct drm_crtc *crtc, const char *src_name) ret = -EINVAL; goto cleanup; } + + if ((aconn->base.connector_type != DRM_MODE_CONNECTOR_DisplayPort) && + (aconn->base.connector_type != DRM_MODE_CONNECTOR_eDP)) { + DRM_DEBUG_DRIVER("No DP connector available for CRC source\n"); + ret = -EINVAL; + goto cleanup; + } + } if (amdgpu_dm_crtc_configure_crc_source(crtc, crtc_state, source)) { diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c index fbb65c95464b318817181290c7fd9396ff0bc41a..e43f82bcb231abec2905b575aba6a3b96f516687 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c @@ -264,7 +264,7 @@ static ssize_t dp_link_settings_write(struct file *f, const char __user *buf, if (!wr_buf) return -ENOSPC; - if (parse_write_buffer_into_params(wr_buf, size, + if (parse_write_buffer_into_params(wr_buf, wr_buf_size, (long *)param, buf, max_param_num, ¶m_nums)) { diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 955a055bd9800f738a466f7840e9c8226034ba19..d617e98afb76d715610ad5aed5db556891c6f429 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -36,6 +36,8 @@ #include "dm_helpers.h" #include "dc_link_ddc.h" +#include "ddc_service_types.h" +#include "dpcd_defs.h" #include "i2caux_interface.h" #if defined(CONFIG_DEBUG_FS) @@ -152,6 +154,16 @@ static const struct drm_connector_funcs dm_dp_mst_connector_funcs = { }; #if defined(CONFIG_DRM_AMD_DC_DCN) +static bool needs_dsc_aux_workaround(struct dc_link *link) +{ + if (link->dpcd_caps.branch_dev_id == DP_BRANCH_DEVICE_ID_90CC24 && + (link->dpcd_caps.dpcd_rev.raw == DPCD_REV_14 || link->dpcd_caps.dpcd_rev.raw == DPCD_REV_12) && + link->dpcd_caps.sink_count.bits.SINK_COUNT >= 2) + return true; + + return false; +} + static bool validate_dsc_caps_on_connector(struct amdgpu_dm_connector *aconnector) { struct dc_sink *dc_sink = aconnector->dc_sink; @@ -159,7 +171,7 @@ static bool validate_dsc_caps_on_connector(struct amdgpu_dm_connector *aconnecto u8 dsc_caps[16] = { 0 }; aconnector->dsc_aux = drm_dp_mst_dsc_aux_for_port(port); -#if defined(CONFIG_HP_HOOK_WORKAROUND) + /* * drm_dp_mst_dsc_aux_for_port() will return NULL for certain configs * because it only check the dsc/fec caps of the "port variable" and not the dock @@ -169,10 +181,10 @@ static bool validate_dsc_caps_on_connector(struct amdgpu_dm_connector *aconnecto * Workaround: explicitly check the use case above and use the mst dock's aux as dsc_aux * */ - - if (!aconnector->dsc_aux && !port->parent->port_parent) + if (!aconnector->dsc_aux && !port->parent->port_parent && + needs_dsc_aux_workaround(aconnector->dc_link)) aconnector->dsc_aux = &aconnector->mst_port->dm_dp_aux.aux; -#endif + if (!aconnector->dsc_aux) return false; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 284ed1c8a35acef795887ec7b2abc42fc9cf2751..93f5229c303e7bc484244f71bfd03edd5c6a51af 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -2436,7 +2436,8 @@ static void commit_planes_for_stream(struct dc *dc, } if ((update_type != UPDATE_TYPE_FAST) && stream->update_flags.bits.dsc_changed) - if (top_pipe_to_program->stream_res.tg->funcs->lock_doublebuffer_enable) { + if (top_pipe_to_program && + top_pipe_to_program->stream_res.tg->funcs->lock_doublebuffer_enable) { if (should_use_dmub_lock(stream->link)) { union dmub_hw_lock_flags hw_locks = { 0 }; struct dmub_hw_lock_inst_flags inst_flags = { 0 }; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index a7f8caf1086b95a62f15aa39b08c2ca807eb306c..0e359a299f9ecb68198cf1965918f7713da41401 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -3587,6 +3587,26 @@ static bool retrieve_link_cap(struct dc_link *link) dp_hw_fw_revision.ieee_fw_rev, sizeof(dp_hw_fw_revision.ieee_fw_rev)); + /* Quirk for Apple MBP 2018 15" Retina panels: wrong DP_MAX_LINK_RATE */ + { + uint8_t str_mbp_2018[] = { 101, 68, 21, 103, 98, 97 }; + uint8_t fwrev_mbp_2018[] = { 7, 4 }; + uint8_t fwrev_mbp_2018_vega[] = { 8, 4 }; + + /* We also check for the firmware revision as 16,1 models have an + * identical device id and are incorrectly quirked otherwise. + */ + if ((link->dpcd_caps.sink_dev_id == 0x0010fa) && + !memcmp(link->dpcd_caps.sink_dev_id_str, str_mbp_2018, + sizeof(str_mbp_2018)) && + (!memcmp(link->dpcd_caps.sink_fw_revision, fwrev_mbp_2018, + sizeof(fwrev_mbp_2018)) || + !memcmp(link->dpcd_caps.sink_fw_revision, fwrev_mbp_2018_vega, + sizeof(fwrev_mbp_2018_vega)))) { + link->reported_link_cap.link_rate = LINK_RATE_RBR2; + } + } + memset(&link->dpcd_caps.dsc_caps, '\0', sizeof(link->dpcd_caps.dsc_caps)); memset(&link->dpcd_caps.fec_cap, '\0', sizeof(link->dpcd_caps.fec_cap)); diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index 59d48cf819ea8e1a51ac024b979996566cd3c2c3..5f4cdb05c4db90ebe0efa7076095beab2bc2fca9 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -1698,6 +1698,10 @@ bool dc_is_stream_unchanged( if (old_stream->ignore_msa_timing_param != stream->ignore_msa_timing_param) return false; + // Only Have Audio left to check whether it is same or not. This is a corner case for Tiled sinks + if (old_stream->audio_info.mode_count != stream->audio_info.mode_count) + return false; + return true; } diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.c index b24c8ae8b1ece87b4eaa87add1cd93b8cf098415..7e228c181b29825ff593ccc8f9bddc3f4f484ee9 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_init.c @@ -77,6 +77,7 @@ static const struct hw_sequencer_funcs dcn10_funcs = { .get_clock = dcn10_get_clock, .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync, .calc_vupdate_position = dcn10_calc_vupdate_position, + .power_down = dce110_power_down, .set_backlight_level = dce110_set_backlight_level, .set_abm_immediate_disable = dce110_set_abm_immediate_disable, .set_pipe = dce110_set_pipe, diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c index 5dbc290bcbe868240bed4c35f2870ce065695624..53ac826935328d35e69cfaaf3cca8b24d1bddc81 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c @@ -1852,7 +1852,9 @@ static void swizzle_to_dml_params( case DC_SW_VAR_D_X: *sw_mode = dm_sw_var_d_x; break; - + case DC_SW_VAR_R_X: + *sw_mode = dm_sw_var_r_x; + break; default: ASSERT(0); /* Not supported */ break; @@ -3754,16 +3756,22 @@ static bool init_soc_bounding_box(struct dc *dc, clock_limits_available = (status == PP_SMU_RESULT_OK); } - if (clock_limits_available && uclk_states_available && num_states) + if (clock_limits_available && uclk_states_available && num_states) { + DC_FP_START(); dcn20_update_bounding_box(dc, loaded_bb, &max_clocks, uclk_states, num_states); - else if (clock_limits_available) + DC_FP_END(); + } else if (clock_limits_available) { + DC_FP_START(); dcn20_cap_soc_clocks(loaded_bb, max_clocks); + DC_FP_END(); + } } loaded_ip->max_num_otg = pool->base.res_cap->num_timing_generator; loaded_ip->max_num_dpp = pool->base.pipe_count; + DC_FP_START(); dcn20_patch_bounding_box(dc, loaded_bb); - + DC_FP_END(); return true; } @@ -3783,8 +3791,6 @@ static bool dcn20_resource_construct( enum dml_project dml_project_version = get_dml_project_version(ctx->asic_id.hw_internal_rev); - DC_FP_START(); - ctx->dc_bios->regs = &bios_regs; pool->base.funcs = &dcn20_res_pool_funcs; @@ -4128,12 +4134,10 @@ static bool dcn20_resource_construct( pool->base.oem_device = NULL; } - DC_FP_END(); return true; create_fail: - DC_FP_END(); dcn20_resource_destruct(pool); return false; diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h index 64f9c735f74d8a80c5b02bcf6ad84c0264a52d88..e73cee275729c3033430a61eab1774ff3d22ee42 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h +++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h @@ -80,11 +80,11 @@ enum dm_swizzle_mode { dm_sw_SPARE_13 = 24, dm_sw_64kb_s_x = 25, dm_sw_64kb_d_x = 26, - dm_sw_SPARE_14 = 27, + dm_sw_64kb_r_x = 27, dm_sw_SPARE_15 = 28, dm_sw_var_s_x = 29, dm_sw_var_d_x = 30, - dm_sw_64kb_r_x, + dm_sw_var_r_x = 31, dm_sw_gfx7_2d_thin_l_vp, dm_sw_gfx7_2d_thin_gl, }; diff --git a/drivers/gpu/drm/amd/include/discovery.h b/drivers/gpu/drm/amd/include/discovery.h index 7ec4331e67f26e5860268637c2e5f77b28a9fe25..a486769b66c6ab11b3cc8861602600d1163f7de3 100644 --- a/drivers/gpu/drm/amd/include/discovery.h +++ b/drivers/gpu/drm/amd/include/discovery.h @@ -143,6 +143,55 @@ struct gc_info_v1_0 { uint32_t gc_num_gl2a; }; +struct gc_info_v1_1 { + struct gpu_info_header header; + + uint32_t gc_num_se; + uint32_t gc_num_wgp0_per_sa; + uint32_t gc_num_wgp1_per_sa; + uint32_t gc_num_rb_per_se; + uint32_t gc_num_gl2c; + uint32_t gc_num_gprs; + uint32_t gc_num_max_gs_thds; + uint32_t gc_gs_table_depth; + uint32_t gc_gsprim_buff_depth; + uint32_t gc_parameter_cache_depth; + uint32_t gc_double_offchip_lds_buffer; + uint32_t gc_wave_size; + uint32_t gc_max_waves_per_simd; + uint32_t gc_max_scratch_slots_per_cu; + uint32_t gc_lds_size; + uint32_t gc_num_sc_per_se; + uint32_t gc_num_sa_per_se; + uint32_t gc_num_packer_per_sc; + uint32_t gc_num_gl2a; + uint32_t gc_num_tcp_per_sa; + uint32_t gc_num_sdp_interface; + uint32_t gc_num_tcps; +}; + +struct gc_info_v2_0 { + struct gpu_info_header header; + + uint32_t gc_num_se; + uint32_t gc_num_cu_per_sh; + uint32_t gc_num_sh_per_se; + uint32_t gc_num_rb_per_se; + uint32_t gc_num_tccs; + uint32_t gc_num_gprs; + uint32_t gc_num_max_gs_thds; + uint32_t gc_gs_table_depth; + uint32_t gc_gsprim_buff_depth; + uint32_t gc_parameter_cache_depth; + uint32_t gc_double_offchip_lds_buffer; + uint32_t gc_wave_size; + uint32_t gc_max_waves_per_simd; + uint32_t gc_max_scratch_slots_per_cu; + uint32_t gc_lds_size; + uint32_t gc_num_sc_per_se; + uint32_t gc_num_packer_per_sc; +}; + typedef struct harvest_info_header { uint32_t signature; /* Table Signature */ uint32_t version; /* Table Version */ diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index 9f383b9041d28c4e585060792974cb7bcdc16076..49109614510b8c8ef38ab94e2f4cac1db0867ac0 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -2098,6 +2098,12 @@ static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_ } } + /* setting should not be allowed from VF */ + if (amdgpu_sriov_vf(adev)) { + dev_attr->attr.mode &= ~S_IWUGO; + dev_attr->store = NULL; + } + #undef DEVICE_ATTR_IS return 0; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c index 7907c9e0b5decc0f0647c6f37e16daf2894b2daa..b938fd12da4d5e77fafa0b672748f8ef79e5a806 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c @@ -187,6 +187,9 @@ int smu_v12_0_fini_smc_tables(struct smu_context *smu) kfree(smu_table->watermarks_table); smu_table->watermarks_table = NULL; + kfree(smu_table->gpu_metrics_table); + smu_table->gpu_metrics_table = NULL; + return 0; } diff --git a/drivers/gpu/drm/ast/ast_mode.c b/drivers/gpu/drm/ast/ast_mode.c index a3c2f76668abeab6bd72f72ea2b622280ba675d9..d27f2840b9555a8272e088e0090be6966889ff1a 100644 --- a/drivers/gpu/drm/ast/ast_mode.c +++ b/drivers/gpu/drm/ast/ast_mode.c @@ -857,7 +857,10 @@ static void ast_crtc_reset(struct drm_crtc *crtc) if (crtc->state) crtc->funcs->atomic_destroy_state(crtc, crtc->state); - __drm_atomic_helper_crtc_reset(crtc, &ast_state->base); + if (ast_state) + __drm_atomic_helper_crtc_reset(crtc, &ast_state->base); + else + __drm_atomic_helper_crtc_reset(crtc, NULL); } static struct drm_crtc_state * diff --git a/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.c b/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.c index 914c569ab8c15d10e58e3194f7b4b921816cdb93..cab3f5c4e2fc8368e8819fc5fc27d6cae1df16c9 100644 --- a/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.c +++ b/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.c @@ -1086,11 +1086,21 @@ int analogix_dp_send_psr_spd(struct analogix_dp_device *dp, if (!blocking) return 0; + /* + * db[1]!=0: entering PSR, wait for fully active remote frame buffer. + * db[1]==0: exiting PSR, wait for either + * (a) ACTIVE_RESYNC - the sink "must display the + * incoming active frames from the Source device with no visible + * glitches and/or artifacts", even though timings may still be + * re-synchronizing; or + * (b) INACTIVE - the transition is fully complete. + */ ret = readx_poll_timeout(analogix_dp_get_psr_status, dp, psr_status, psr_status >= 0 && ((vsc->db[1] && psr_status == DP_PSR_SINK_ACTIVE_RFB) || - (!vsc->db[1] && psr_status == DP_PSR_SINK_INACTIVE)), 1500, - DP_TIMEOUT_PSR_LOOP_MS * 1000); + (!vsc->db[1] && (psr_status == DP_PSR_SINK_ACTIVE_RESYNC || + psr_status == DP_PSR_SINK_INACTIVE))), + 1500, DP_TIMEOUT_PSR_LOOP_MS * 1000); if (ret) { dev_warn(dp->dev, "Failed to apply PSR %d\n", ret); return ret; diff --git a/drivers/gpu/drm/bridge/display-connector.c b/drivers/gpu/drm/bridge/display-connector.c index 4d278573cdb99817bab4e58705fedcd9f43f8949..544a47335cac4a8f130af61c4c03467c7205cb3a 100644 --- a/drivers/gpu/drm/bridge/display-connector.c +++ b/drivers/gpu/drm/bridge/display-connector.c @@ -104,7 +104,7 @@ static int display_connector_probe(struct platform_device *pdev) { struct display_connector *conn; unsigned int type; - const char *label; + const char *label = NULL; int ret; conn = devm_kzalloc(&pdev->dev, sizeof(*conn), GFP_KERNEL); diff --git a/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c b/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c index d2808c4a6fb1cfa12ddaab5e4ada9ae578955bb7..cce98bf2a4e7324f85c1d4e113e44ae80abed7a7 100644 --- a/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c +++ b/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c @@ -306,19 +306,10 @@ out: mutex_unlock(&ge_b850v3_lvds_dev_mutex); } -static int stdp4028_ge_b850v3_fw_probe(struct i2c_client *stdp4028_i2c, - const struct i2c_device_id *id) +static int ge_b850v3_register(void) { + struct i2c_client *stdp4028_i2c = ge_b850v3_lvds_ptr->stdp4028_i2c; struct device *dev = &stdp4028_i2c->dev; - int ret; - - ret = ge_b850v3_lvds_init(dev); - - if (ret) - return ret; - - ge_b850v3_lvds_ptr->stdp4028_i2c = stdp4028_i2c; - i2c_set_clientdata(stdp4028_i2c, ge_b850v3_lvds_ptr); /* drm bridge initialization */ ge_b850v3_lvds_ptr->bridge.funcs = &ge_b850v3_lvds_funcs; @@ -343,6 +334,27 @@ static int stdp4028_ge_b850v3_fw_probe(struct i2c_client *stdp4028_i2c, "ge-b850v3-lvds-dp", ge_b850v3_lvds_ptr); } +static int stdp4028_ge_b850v3_fw_probe(struct i2c_client *stdp4028_i2c, + const struct i2c_device_id *id) +{ + struct device *dev = &stdp4028_i2c->dev; + int ret; + + ret = ge_b850v3_lvds_init(dev); + + if (ret) + return ret; + + ge_b850v3_lvds_ptr->stdp4028_i2c = stdp4028_i2c; + i2c_set_clientdata(stdp4028_i2c, ge_b850v3_lvds_ptr); + + /* Only register after both bridges are probed */ + if (!ge_b850v3_lvds_ptr->stdp2690_i2c) + return 0; + + return ge_b850v3_register(); +} + static int stdp4028_ge_b850v3_fw_remove(struct i2c_client *stdp4028_i2c) { ge_b850v3_lvds_remove(); @@ -386,7 +398,11 @@ static int stdp2690_ge_b850v3_fw_probe(struct i2c_client *stdp2690_i2c, ge_b850v3_lvds_ptr->stdp2690_i2c = stdp2690_i2c; i2c_set_clientdata(stdp2690_i2c, ge_b850v3_lvds_ptr); - return 0; + /* Only register after both bridges are probed */ + if (!ge_b850v3_lvds_ptr->stdp4028_i2c) + return 0; + + return ge_b850v3_register(); } static int stdp2690_ge_b850v3_fw_remove(struct i2c_client *stdp2690_i2c) diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-ahb-audio.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-ahb-audio.c index d0db1acf11d7319ec722f33202cb91db27971620..7d2ed0ed2fe26c75cebb8f484028ec2be13973ab 100644 --- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-ahb-audio.c +++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-ahb-audio.c @@ -320,13 +320,17 @@ static int dw_hdmi_open(struct snd_pcm_substream *substream) struct snd_pcm_runtime *runtime = substream->runtime; struct snd_dw_hdmi *dw = substream->private_data; void __iomem *base = dw->data.base; + u8 *eld; int ret; runtime->hw = dw_hdmi_hw; - ret = snd_pcm_hw_constraint_eld(runtime, dw->data.eld); - if (ret < 0) - return ret; + eld = dw->data.get_eld(dw->data.hdmi); + if (eld) { + ret = snd_pcm_hw_constraint_eld(runtime, eld); + if (ret < 0) + return ret; + } ret = snd_pcm_limit_hw_rates(runtime); if (ret < 0) diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-audio.h b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-audio.h index cb07dc0da5a70462972ae0592370b6ef22a35e16..f72d27208ebef5fb14825df3afa11a772be32ce5 100644 --- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-audio.h +++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-audio.h @@ -9,15 +9,15 @@ struct dw_hdmi_audio_data { void __iomem *base; int irq; struct dw_hdmi *hdmi; - u8 *eld; + u8 *(*get_eld)(struct dw_hdmi *hdmi); }; struct dw_hdmi_i2s_audio_data { struct dw_hdmi *hdmi; - u8 *eld; void (*write)(struct dw_hdmi *hdmi, u8 val, int offset); u8 (*read)(struct dw_hdmi *hdmi, int offset); + u8 *(*get_eld)(struct dw_hdmi *hdmi); }; #endif diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-i2s-audio.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-i2s-audio.c index 9fef6413741dcbd674d78cae5d5478aa0bb63ce0..9682416056ed61929ab81e348152025932360756 100644 --- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-i2s-audio.c +++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-i2s-audio.c @@ -135,8 +135,15 @@ static int dw_hdmi_i2s_get_eld(struct device *dev, void *data, uint8_t *buf, size_t len) { struct dw_hdmi_i2s_audio_data *audio = data; + u8 *eld; + + eld = audio->get_eld(audio->hdmi); + if (eld) + memcpy(buf, eld, min_t(size_t, MAX_ELD_BYTES, len)); + else + /* Pass en empty ELD if connector not available */ + memset(buf, 0, len); - memcpy(buf, audio->eld, min_t(size_t, MAX_ELD_BYTES, len)); return 0; } diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c index 0c79a9ba48bb6407d7a001b2e03cca139d4ed28d..29c0eb4bd7546d3452533607ffafff65c618cebe 100644 --- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c +++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c @@ -756,6 +756,14 @@ static void hdmi_enable_audio_clk(struct dw_hdmi *hdmi, bool enable) hdmi_writeb(hdmi, hdmi->mc_clkdis, HDMI_MC_CLKDIS); } +static u8 *hdmi_audio_get_eld(struct dw_hdmi *hdmi) +{ + if (!hdmi->curr_conn) + return NULL; + + return hdmi->curr_conn->eld; +} + static void dw_hdmi_ahb_audio_enable(struct dw_hdmi *hdmi) { hdmi_set_cts_n(hdmi, hdmi->audio_cts, hdmi->audio_n); @@ -3395,7 +3403,7 @@ struct dw_hdmi *dw_hdmi_probe(struct platform_device *pdev, audio.base = hdmi->regs; audio.irq = irq; audio.hdmi = hdmi; - audio.eld = hdmi->connector.eld; + audio.get_eld = hdmi_audio_get_eld; hdmi->enable_audio = dw_hdmi_ahb_audio_enable; hdmi->disable_audio = dw_hdmi_ahb_audio_disable; @@ -3408,7 +3416,7 @@ struct dw_hdmi *dw_hdmi_probe(struct platform_device *pdev, struct dw_hdmi_i2s_audio_data audio; audio.hdmi = hdmi; - audio.eld = hdmi->connector.eld; + audio.get_eld = hdmi_audio_get_eld; audio.write = hdmi_writeb; audio.read = hdmi_readb; hdmi->enable_audio = dw_hdmi_i2s_audio_enable; diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi86.c b/drivers/gpu/drm/bridge/ti-sn65dsi86.c index ecdf9b01340f5e6413f10c26054a722560ea72d2..1a58481037b3fec5b2b762031855f7856de619f4 100644 --- a/drivers/gpu/drm/bridge/ti-sn65dsi86.c +++ b/drivers/gpu/drm/bridge/ti-sn65dsi86.c @@ -171,6 +171,7 @@ static const struct regmap_config ti_sn_bridge_regmap_config = { .val_bits = 8, .volatile_table = &ti_sn_bridge_volatile_table, .cache_type = REGCACHE_NONE, + .max_register = 0xFF, }; static void ti_sn_bridge_write_u16(struct ti_sn_bridge *pdata, diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c index 717c4e7271b0422692c485d8c8cb426de0bc006f..5163433ac561b884e2dfd60f1aa4509c9399b5c3 100644 --- a/drivers/gpu/drm/drm_connector.c +++ b/drivers/gpu/drm/drm_connector.c @@ -2155,6 +2155,9 @@ EXPORT_SYMBOL(drm_connector_attach_max_bpc_property); void drm_connector_set_vrr_capable_property( struct drm_connector *connector, bool capable) { + if (!connector->vrr_capable_property) + return; + drm_object_property_set_value(&connector->base, connector->vrr_capable_property, capable); diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c index cd162d406078aa6d6aa802e5f90ee6124be61b4b..006e3b896caea7ed28a9c3df19bb5b134eef9ab9 100644 --- a/drivers/gpu/drm/drm_drv.c +++ b/drivers/gpu/drm/drm_drv.c @@ -577,6 +577,7 @@ static int drm_dev_init(struct drm_device *dev, struct drm_driver *driver, struct device *parent) { + struct inode *inode; int ret; if (!drm_core_init_complete) { @@ -613,13 +614,15 @@ static int drm_dev_init(struct drm_device *dev, if (ret) return ret; - dev->anon_inode = drm_fs_inode_new(); - if (IS_ERR(dev->anon_inode)) { - ret = PTR_ERR(dev->anon_inode); + inode = drm_fs_inode_new(); + if (IS_ERR(inode)) { + ret = PTR_ERR(inode); DRM_ERROR("Cannot allocate anonymous inode: %d\n", ret); goto err; } + dev->anon_inode = inode; + if (drm_core_check_feature(dev, DRIVER_RENDER)) { ret = drm_minor_alloc(dev, DRM_MINOR_RENDER); if (ret) diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index b7ddf504e0249d1ee67e46ea16d12bb29a49c79e..3d7593ea79f14b6270154878f909679c401c0b5a 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -1835,11 +1835,20 @@ static void connector_bad_edid(struct drm_connector *connector, u8 *edid, int num_blocks) { int i; - u8 num_of_ext = edid[0x7e]; + u8 last_block; + + /* + * 0x7e in the EDID is the number of extension blocks. The EDID + * is 1 (base block) + num_ext_blocks big. That means we can think + * of 0x7e in the EDID of the _index_ of the last block in the + * combined chunk of memory. + */ + last_block = edid[0x7e]; /* Calculate real checksum for the last edid extension block data */ - connector->real_edid_checksum = - drm_edid_block_checksum(edid + num_of_ext * EDID_LENGTH); + if (last_block < num_blocks) + connector->real_edid_checksum = + drm_edid_block_checksum(edid + last_block * EDID_LENGTH); if (connector->bad_edid_counter++ && !drm_debug_enabled(DRM_UT_KMS)) return; @@ -5123,6 +5132,7 @@ u32 drm_add_display_info(struct drm_connector *connector, const struct edid *edi if (!(edid->input & DRM_EDID_INPUT_DIGITAL)) return quirks; + info->color_formats |= DRM_COLOR_FORMAT_RGB444; drm_parse_cea_ext(connector, edid); /* @@ -5171,7 +5181,6 @@ u32 drm_add_display_info(struct drm_connector *connector, const struct edid *edi DRM_DEBUG("%s: Assigning EDID-1.4 digital sink color depth as %d bpc.\n", connector->name, info->bpc); - info->color_formats |= DRM_COLOR_FORMAT_RGB444; if (edid->features & DRM_EDID_FEATURE_RGB_YCRCB444) info->color_formats |= DRM_COLOR_FORMAT_YCRCB444; if (edid->features & DRM_EDID_FEATURE_RGB_YCRCB422) diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c index f6bdec7fa92535973940dbecbabd430323a3f019..448c2f2d803a6296e67c4b72b92d00afb95d83cc 100644 --- a/drivers/gpu/drm/drm_panel_orientation_quirks.c +++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c @@ -109,6 +109,18 @@ static const struct drm_dmi_panel_orientation_data lcd1200x1920_rightside_up = { .orientation = DRM_MODE_PANEL_ORIENTATION_RIGHT_UP, }; +static const struct drm_dmi_panel_orientation_data lcd1280x1920_rightside_up = { + .width = 1280, + .height = 1920, + .orientation = DRM_MODE_PANEL_ORIENTATION_RIGHT_UP, +}; + +static const struct drm_dmi_panel_orientation_data lcd1600x2560_leftside_up = { + .width = 1600, + .height = 2560, + .orientation = DRM_MODE_PANEL_ORIENTATION_LEFT_UP, +}; + static const struct dmi_system_id orientation_data[] = { { /* Acer One 10 (S1003) */ .matches = { @@ -134,6 +146,12 @@ static const struct dmi_system_id orientation_data[] = { DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "T103HAF"), }, .driver_data = (void *)&lcd800x1280_rightside_up, + }, { /* AYA NEO 2021 */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "AYADEVICE"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "AYA NEO 2021"), + }, + .driver_data = (void *)&lcd800x1280_rightside_up, }, { /* GPD MicroPC (generic strings, also match on bios date) */ .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Default string"), @@ -185,6 +203,12 @@ static const struct dmi_system_id orientation_data[] = { DMI_EXACT_MATCH(DMI_BOARD_NAME, "Default string"), }, .driver_data = (void *)&gpd_win2, + }, { /* GPD Win 3 */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "GPD"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "G1618-03") + }, + .driver_data = (void *)&lcd720x1280_rightside_up, }, { /* I.T.Works TW891 */ .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "To be filled by O.E.M."), @@ -193,6 +217,13 @@ static const struct dmi_system_id orientation_data[] = { DMI_EXACT_MATCH(DMI_BOARD_NAME, "TW891"), }, .driver_data = (void *)&itworks_tw891, + }, { /* KD Kurio Smart C15200 2-in-1 */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "KD Interactive"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Kurio Smart"), + DMI_EXACT_MATCH(DMI_BOARD_NAME, "KDM960BCP"), + }, + .driver_data = (void *)&lcd800x1280_rightside_up, }, { /* * Lenovo Ideapad Miix 310 laptop, only some production batches * have a portrait screen, the resolution checks makes the quirk @@ -211,13 +242,24 @@ static const struct dmi_system_id orientation_data[] = { DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "Lenovo MIIX 320-10ICR"), }, .driver_data = (void *)&lcd800x1280_rightside_up, - }, { /* Lenovo Ideapad D330 */ + }, { /* Lenovo Ideapad D330-10IGM (HD) */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "Lenovo ideapad D330-10IGM"), + }, + .driver_data = (void *)&lcd800x1280_rightside_up, + }, { /* Lenovo Ideapad D330-10IGM (FHD) */ .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "LENOVO"), - DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "81H3"), DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "Lenovo ideapad D330-10IGM"), }, .driver_data = (void *)&lcd1200x1920_rightside_up, + }, { /* Lenovo Yoga Book X90F / X91F / X91L */ + .matches = { + /* Non exact match to match all versions */ + DMI_MATCH(DMI_PRODUCT_NAME, "Lenovo YB1-X9"), + }, + .driver_data = (void *)&lcd1200x1920_rightside_up, }, { /* OneGX1 Pro */ .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "SYSTEM_MANUFACTURER"), @@ -225,6 +267,25 @@ static const struct dmi_system_id orientation_data[] = { DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "Default string"), }, .driver_data = (void *)&onegx1_pro, + }, { /* OneXPlayer */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "ONE-NETBOOK TECHNOLOGY CO., LTD."), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "ONE XPLAYER"), + }, + .driver_data = (void *)&lcd1600x2560_leftside_up, + }, { /* Samsung GalaxyBook 10.6 */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "SAMSUNG ELECTRONICS CO., LTD."), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Galaxy Book 10.6"), + }, + .driver_data = (void *)&lcd1280x1920_rightside_up, + }, { /* Valve Steam Deck */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Valve"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Jupiter"), + DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "1"), + }, + .driver_data = (void *)&lcd800x1280_rightside_up, }, { /* VIOS LTH17 */ .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "VIOS"), diff --git a/drivers/gpu/drm/drm_plane_helper.c b/drivers/gpu/drm/drm_plane_helper.c index 3aae7ea522f2337956d1456f8552904531005b3b..c3f2292dc93d53bfd440c589c9b17bf3470dc75c 100644 --- a/drivers/gpu/drm/drm_plane_helper.c +++ b/drivers/gpu/drm/drm_plane_helper.c @@ -123,7 +123,6 @@ static int drm_plane_helper_check_update(struct drm_plane *plane, .crtc_w = drm_rect_width(dst), .crtc_h = drm_rect_height(dst), .rotation = rotation, - .visible = *visible, }; struct drm_crtc_state crtc_state = { .crtc = crtc, diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index 3491460498491d0d8f5f96058b5adbf6a7ef6c80..993898a2c7791a1ee9f145d2b1216d37fac32a46 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -391,8 +391,17 @@ int drm_syncobj_find_fence(struct drm_file *file_private, if (*fence) { ret = dma_fence_chain_find_seqno(fence, point); - if (!ret) + if (!ret) { + /* If the requested seqno is already signaled + * drm_syncobj_find_fence may return a NULL + * fence. To make sure the recipient gets + * signalled, use a new fence instead. + */ + if (!*fence) + *fence = dma_fence_get_stub(); + goto out; + } dma_fence_put(*fence); } else { ret = -EINVAL; diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c index 5f24cc52c2878660179a73e656c226c8db30bc09..ddf539f26f2dad3891fac462060b60ea7f7317c7 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c @@ -469,6 +469,12 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data, return -EINVAL; } + if (args->stream_size > SZ_128K || args->nr_relocs > SZ_128K || + args->nr_bos > SZ_128K || args->nr_pmrs > 128) { + DRM_ERROR("submit arguments out of size limits\n"); + return -EINVAL; + } + /* * Copy the command submission and bo array to kernel space in * one go, and do this outside of any locks. diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h index 1c75c8ed5bcea2c5bec1d923ed35fc9a78884c67..85eddd492774d59c34189f6768e24e3f42dd8766 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h @@ -130,6 +130,7 @@ struct etnaviv_gpu { /* hang detection */ u32 hangcheck_dma_addr; + u32 hangcheck_fence; void __iomem *mmio; int irq; diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c b/drivers/gpu/drm/etnaviv/etnaviv_sched.c index cd46c882269cc959070822de22ea9a61d09cf544..026b6c0731198093cb33b444aa85c688fafcc949 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c @@ -106,8 +106,10 @@ static void etnaviv_sched_timedout_job(struct drm_sched_job *sched_job) */ dma_addr = gpu_read(gpu, VIVS_FE_DMA_ADDRESS); change = dma_addr - gpu->hangcheck_dma_addr; - if (change < 0 || change > 16) { + if (gpu->completed_fence != gpu->hangcheck_fence || + change < 0 || change > 16) { gpu->hangcheck_dma_addr = dma_addr; + gpu->hangcheck_fence = gpu->completed_fence; goto out_no_timeout; } diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig index 1e1cb245fca778e71d0553cb15e66693f1f0f2c6..8eb9bf3a1617e6bed7bf08e16ea914969a58a12c 100644 --- a/drivers/gpu/drm/i915/Kconfig +++ b/drivers/gpu/drm/i915/Kconfig @@ -100,6 +100,7 @@ config DRM_I915_USERPTR config DRM_I915_GVT bool "Enable Intel GVT-g graphics virtualization host support" depends on DRM_I915 + depends on X86 depends on 64BIT default n help diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c index 65d73eb5e155c84bc441e2d14161218a45355485..1c1931f5c958b138b80f146a4d2a4a56923a410e 100644 --- a/drivers/gpu/drm/i915/display/intel_dp.c +++ b/drivers/gpu/drm/i915/display/intel_dp.c @@ -154,6 +154,12 @@ static void vlv_steal_power_sequencer(struct drm_i915_private *dev_priv, enum pipe pipe); static void intel_dp_unset_edid(struct intel_dp *intel_dp); +static void intel_dp_set_default_sink_rates(struct intel_dp *intel_dp) +{ + intel_dp->sink_rates[0] = 162000; + intel_dp->num_sink_rates = 1; +} + /* update sink rates from dpcd */ static void intel_dp_set_sink_rates(struct intel_dp *intel_dp) { @@ -4678,6 +4684,9 @@ intel_edp_init_dpcd(struct intel_dp *intel_dp) */ intel_psr_init_dpcd(intel_dp); + /* Clear the default sink rates */ + intel_dp->num_sink_rates = 0; + /* Read the eDP 1.4+ supported link rates. */ if (intel_dp->edp_dpcd[0] >= DP_EDP_14) { __le16 sink_rates[DP_MAX_SUPPORTED_RATES]; @@ -7779,6 +7788,8 @@ intel_dp_init_connector(struct intel_digital_port *dig_port, return false; intel_dp_set_source_rates(intel_dp); + intel_dp_set_default_sink_rates(intel_dp); + intel_dp_set_common_rates(intel_dp); intel_dp->reset_link_params = true; intel_dp->pps_pipe = INVALID_PIPE; diff --git a/drivers/gpu/drm/i915/display/intel_opregion.c b/drivers/gpu/drm/i915/display/intel_opregion.c index de995362f42832206be9f512aa6f0effc68cda6e..abff2d6cedd12ca99ecb9004635d5d20026d1240 100644 --- a/drivers/gpu/drm/i915/display/intel_opregion.c +++ b/drivers/gpu/drm/i915/display/intel_opregion.c @@ -361,6 +361,21 @@ int intel_opregion_notify_encoder(struct intel_encoder *intel_encoder, port++; } + /* + * The port numbering and mapping here is bizarre. The now-obsolete + * swsci spec supports ports numbered [0..4]. Port E is handled as a + * special case, but port F and beyond are not. The functionality is + * supposed to be obsolete for new platforms. Just bail out if the port + * number is out of bounds after mapping. + */ + if (port > 4) { + drm_dbg_kms(&dev_priv->drm, + "[ENCODER:%d:%s] port %c (index %u) out of bounds for display power state notification\n", + intel_encoder->base.base.id, intel_encoder->base.name, + port_name(intel_encoder->port), port); + return -EINVAL; + } + if (!enable) parm |= 4 << 8; diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c index 0e60aec0bb19122a161899f6190a54f752454ca0..b561e9e00153e6ee0140fb380995cbd69078feac 100644 --- a/drivers/gpu/drm/i915/display/intel_overlay.c +++ b/drivers/gpu/drm/i915/display/intel_overlay.c @@ -932,6 +932,9 @@ static int check_overlay_dst(struct intel_overlay *overlay, const struct intel_crtc_state *pipe_config = overlay->crtc->config; + if (rec->dst_height == 0 || rec->dst_width == 0) + return -EINVAL; + if (rec->dst_x < pipe_config->pipe_src_w && rec->dst_x + rec->dst_width <= pipe_config->pipe_src_w && rec->dst_y < pipe_config->pipe_src_h && diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h index d6711caa7f399564120d7eb9dca039bcf3183bd0..dbc88fc7136bfeaff9916635a063bf0ae5928f27 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h @@ -159,6 +159,7 @@ struct drm_i915_gem_object { #define I915_BO_ALLOC_VOLATILE BIT(1) #define I915_BO_ALLOC_FLAGS (I915_BO_ALLOC_CONTIGUOUS | I915_BO_ALLOC_VOLATILE) #define I915_BO_READONLY BIT(2) +#define I915_BO_WAS_BOUND_BIT 3 /* * Is the object to be mapped as read-only to the GPU diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c index f60ca6dc911f29d7d8881b566bc51f11e4554bb4..27d24cb38c0d2c42b406160e8b95f6189202f6c9 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c @@ -10,6 +10,8 @@ #include "i915_gem_lmem.h" #include "i915_gem_mman.h" +#include "gt/intel_gt.h" + void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, struct sg_table *pages, unsigned int sg_page_sizes) @@ -186,6 +188,14 @@ __i915_gem_object_unset_pages(struct drm_i915_gem_object *obj) __i915_gem_object_reset_page_iter(obj); obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0; + if (test_and_clear_bit(I915_BO_WAS_BOUND_BIT, &obj->flags)) { + struct drm_i915_private *i915 = to_i915(obj->base.dev); + intel_wakeref_t wakeref; + + with_intel_runtime_pm_if_active(&i915->runtime_pm, wakeref) + intel_gt_invalidate_tlbs(&i915->gt); + } + return pages; } diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 39b428c5049c04f1dbb27a917e20c58b015acd03..6615eb5147e234795b8014cc9846aa794c1823c9 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -26,6 +26,8 @@ void intel_gt_init_early(struct intel_gt *gt, struct drm_i915_private *i915) spin_lock_init(>->irq_lock); + mutex_init(>->tlb_invalidate_lock); + INIT_LIST_HEAD(>->closed_vma); spin_lock_init(>->closed_lock); @@ -661,3 +663,103 @@ void intel_gt_info_print(const struct intel_gt_info *info, intel_sseu_dump(&info->sseu, p); } + +struct reg_and_bit { + i915_reg_t reg; + u32 bit; +}; + +static struct reg_and_bit +get_reg_and_bit(const struct intel_engine_cs *engine, const bool gen8, + const i915_reg_t *regs, const unsigned int num) +{ + const unsigned int class = engine->class; + struct reg_and_bit rb = { }; + + if (drm_WARN_ON_ONCE(&engine->i915->drm, + class >= num || !regs[class].reg)) + return rb; + + rb.reg = regs[class]; + if (gen8 && class == VIDEO_DECODE_CLASS) + rb.reg.reg += 4 * engine->instance; /* GEN8_M2TCR */ + else + rb.bit = engine->instance; + + rb.bit = BIT(rb.bit); + + return rb; +} + +void intel_gt_invalidate_tlbs(struct intel_gt *gt) +{ + static const i915_reg_t gen8_regs[] = { + [RENDER_CLASS] = GEN8_RTCR, + [VIDEO_DECODE_CLASS] = GEN8_M1TCR, /* , GEN8_M2TCR */ + [VIDEO_ENHANCEMENT_CLASS] = GEN8_VTCR, + [COPY_ENGINE_CLASS] = GEN8_BTCR, + }; + static const i915_reg_t gen12_regs[] = { + [RENDER_CLASS] = GEN12_GFX_TLB_INV_CR, + [VIDEO_DECODE_CLASS] = GEN12_VD_TLB_INV_CR, + [VIDEO_ENHANCEMENT_CLASS] = GEN12_VE_TLB_INV_CR, + [COPY_ENGINE_CLASS] = GEN12_BLT_TLB_INV_CR, + }; + struct drm_i915_private *i915 = gt->i915; + struct intel_uncore *uncore = gt->uncore; + struct intel_engine_cs *engine; + enum intel_engine_id id; + const i915_reg_t *regs; + unsigned int num = 0; + + if (I915_SELFTEST_ONLY(gt->awake == -ENODEV)) + return; + + if (INTEL_GEN(i915) == 12) { + regs = gen12_regs; + num = ARRAY_SIZE(gen12_regs); + } else if (INTEL_GEN(i915) >= 8 && INTEL_GEN(i915) <= 11) { + regs = gen8_regs; + num = ARRAY_SIZE(gen8_regs); + } else if (INTEL_GEN(i915) < 8) { + return; + } + + if (drm_WARN_ONCE(&i915->drm, !num, + "Platform does not implement TLB invalidation!")) + return; + + GEM_TRACE("\n"); + + assert_rpm_wakelock_held(&i915->runtime_pm); + + mutex_lock(>->tlb_invalidate_lock); + intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL); + + for_each_engine(engine, gt, id) { + /* + * HW architecture suggest typical invalidation time at 40us, + * with pessimistic cases up to 100us and a recommendation to + * cap at 1ms. We go a bit higher just in case. + */ + const unsigned int timeout_us = 100; + const unsigned int timeout_ms = 4; + struct reg_and_bit rb; + + rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num); + if (!i915_mmio_reg_offset(rb.reg)) + continue; + + intel_uncore_write_fw(uncore, rb.reg, rb.bit); + if (__intel_wait_for_register_fw(uncore, + rb.reg, rb.bit, 0, + timeout_us, timeout_ms, + NULL)) + drm_err_ratelimited(>->i915->drm, + "%s TLB invalidation did not complete in %ums!\n", + engine->name, timeout_ms); + } + + intel_uncore_forcewake_put_delayed(uncore, FORCEWAKE_ALL); + mutex_unlock(>->tlb_invalidate_lock); +} diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h index 9157c7411f60398cf6daabb029a7135cbd5b70fa..d9a1168172ae33af369c39e530a181c5f324f1cf 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.h +++ b/drivers/gpu/drm/i915/gt/intel_gt.h @@ -77,4 +77,6 @@ static inline bool intel_gt_is_wedged(const struct intel_gt *gt) void intel_gt_info_print(const struct intel_gt_info *info, struct drm_printer *p); +void intel_gt_invalidate_tlbs(struct intel_gt *gt); + #endif /* __INTEL_GT_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index 6d39a4a11bf3983e81a57915f129c23e6f28fa1e..78c061614d8bba9e7c6a32fcdb5e929877be7301 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -36,6 +36,8 @@ struct intel_gt { struct intel_uc uc; + struct mutex tlb_invalidate_lock; + struct intel_gt_timelines { spinlock_t lock; /* protects active_list */ struct list_head active_list; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index ce8c91c5fdd3b4cc2285523a50c95da8e10e9237..12488996a7f4f759bb966f7da7320384b3d8b49c 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2639,6 +2639,12 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING (1 << 28) #define GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT (1 << 24) +#define GEN8_RTCR _MMIO(0x4260) +#define GEN8_M1TCR _MMIO(0x4264) +#define GEN8_M2TCR _MMIO(0x4268) +#define GEN8_BTCR _MMIO(0x426c) +#define GEN8_VTCR _MMIO(0x4270) + #if 0 #define PRB0_TAIL _MMIO(0x2030) #define PRB0_HEAD _MMIO(0x2034) @@ -2728,6 +2734,11 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define FAULT_VA_HIGH_BITS (0xf << 0) #define FAULT_GTT_SEL (1 << 4) +#define GEN12_GFX_TLB_INV_CR _MMIO(0xced8) +#define GEN12_VD_TLB_INV_CR _MMIO(0xcedc) +#define GEN12_VE_TLB_INV_CR _MMIO(0xcee0) +#define GEN12_BLT_TLB_INV_CR _MMIO(0xcee4) + #define GEN12_AUX_ERR_DBG _MMIO(0x43f4) #define FPGA_DBG _MMIO(0x42300) diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index d8fef42ca38e1b8727f7deb9e6004bd17f132fe2..896389f930294ac36dae9f7f785cfa594b3a9570 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -776,8 +776,6 @@ static void __i915_request_ctor(void *arg) i915_sw_fence_init(&rq->submit, submit_notify); i915_sw_fence_init(&rq->semaphore, semaphore_notify); - dma_fence_init(&rq->fence, &i915_fence_ops, &rq->lock, 0, 0); - rq->capture_list = NULL; init_llist_head(&rq->execute_cb); @@ -840,17 +838,12 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) rq->ring = ce->ring; rq->execution_mask = ce->engine->mask; - kref_init(&rq->fence.refcount); - rq->fence.flags = 0; - rq->fence.error = 0; - INIT_LIST_HEAD(&rq->fence.cb_list); - ret = intel_timeline_get_seqno(tl, rq, &seqno); if (ret) goto err_free; - rq->fence.context = tl->fence_context; - rq->fence.seqno = seqno; + dma_fence_init(&rq->fence, &i915_fence_ops, &rq->lock, + tl->fence_context, seqno); RCU_INIT_POINTER(rq->timeline, tl); RCU_INIT_POINTER(rq->hwsp_cacheline, tl->hwsp_cacheline); diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index caa9b041616b0d0903788589ad4a7ae7965339c6..50a86fd89d00524023c7fae53ef7ced05a2270a0 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -439,6 +439,9 @@ int i915_vma_bind(struct i915_vma *vma, vma->ops->bind_vma(vma->vm, NULL, vma, cache_level, bind_flags); } + if (vma->obj) + set_bit(I915_BO_WAS_BOUND_BIT, &vma->obj->flags); + atomic_or(bind_flags, &vma->flags); return 0; } diff --git a/drivers/gpu/drm/i915/intel_pch.c b/drivers/gpu/drm/i915/intel_pch.c index 6c97192e9ca87ea923d522ca0fcc75c40f7f0a79..a0d5e95234fd0a340405e3f51d39fda4e22271e6 100644 --- a/drivers/gpu/drm/i915/intel_pch.c +++ b/drivers/gpu/drm/i915/intel_pch.c @@ -110,6 +110,7 @@ intel_pch_type(const struct drm_i915_private *dev_priv, unsigned short id) /* Comet Lake V PCH is based on KBP, which is SPT compatible */ return PCH_SPT; case INTEL_PCH_ICP_DEVICE_ID_TYPE: + case INTEL_PCH_ICP2_DEVICE_ID_TYPE: drm_dbg_kms(&dev_priv->drm, "Found Ice Lake PCH\n"); drm_WARN_ON(&dev_priv->drm, !IS_ICELAKE(dev_priv)); return PCH_ICP; @@ -124,7 +125,6 @@ intel_pch_type(const struct drm_i915_private *dev_priv, unsigned short id) !IS_ROCKETLAKE(dev_priv)); return PCH_TGP; case INTEL_PCH_JSP_DEVICE_ID_TYPE: - case INTEL_PCH_JSP2_DEVICE_ID_TYPE: drm_dbg_kms(&dev_priv->drm, "Found Jasper Lake PCH\n"); drm_WARN_ON(&dev_priv->drm, !IS_ELKHARTLAKE(dev_priv)); return PCH_JSP; diff --git a/drivers/gpu/drm/i915/intel_pch.h b/drivers/gpu/drm/i915/intel_pch.h index 06d2cd50af0b9d4d8b28ee48a9d1fa23cdfd476f..49325022b3c9628b605723735043556e35296cfc 100644 --- a/drivers/gpu/drm/i915/intel_pch.h +++ b/drivers/gpu/drm/i915/intel_pch.h @@ -48,11 +48,11 @@ enum intel_pch { #define INTEL_PCH_CMP2_DEVICE_ID_TYPE 0x0680 #define INTEL_PCH_CMP_V_DEVICE_ID_TYPE 0xA380 #define INTEL_PCH_ICP_DEVICE_ID_TYPE 0x3480 +#define INTEL_PCH_ICP2_DEVICE_ID_TYPE 0x3880 #define INTEL_PCH_MCC_DEVICE_ID_TYPE 0x4B00 #define INTEL_PCH_TGP_DEVICE_ID_TYPE 0xA080 #define INTEL_PCH_TGP2_DEVICE_ID_TYPE 0x4380 #define INTEL_PCH_JSP_DEVICE_ID_TYPE 0x4D80 -#define INTEL_PCH_JSP2_DEVICE_ID_TYPE 0x3880 #define INTEL_PCH_P2X_DEVICE_ID_TYPE 0x7100 #define INTEL_PCH_P3X_DEVICE_ID_TYPE 0x7000 #define INTEL_PCH_QEMU_DEVICE_ID_TYPE 0x2900 /* qemu q35 has 2918 */ diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 1f23cb6ece58854a29a82f254b8250dc0b00399c..472aaea75ef84303ec901d4b81df104c25c15e3d 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3044,9 +3044,9 @@ static void snb_wm_latency_quirk(struct drm_i915_private *dev_priv) * The BIOS provided WM memory latency values are often * inadequate for high resolution displays. Adjust them. */ - changed = ilk_increase_wm_latency(dev_priv, dev_priv->wm.pri_latency, 12) | - ilk_increase_wm_latency(dev_priv, dev_priv->wm.spr_latency, 12) | - ilk_increase_wm_latency(dev_priv, dev_priv->wm.cur_latency, 12); + changed = ilk_increase_wm_latency(dev_priv, dev_priv->wm.pri_latency, 12); + changed |= ilk_increase_wm_latency(dev_priv, dev_priv->wm.spr_latency, 12); + changed |= ilk_increase_wm_latency(dev_priv, dev_priv->wm.cur_latency, 12); if (!changed) return; @@ -3996,6 +3996,17 @@ static int intel_compute_sagv_mask(struct intel_atomic_state *state) return ret; } + if (intel_can_enable_sagv(dev_priv, new_bw_state) != + intel_can_enable_sagv(dev_priv, old_bw_state)) { + ret = intel_atomic_serialize_global_state(&new_bw_state->base); + if (ret) + return ret; + } else if (new_bw_state->pipe_sagv_reject != old_bw_state->pipe_sagv_reject) { + ret = intel_atomic_lock_global_state(&new_bw_state->base); + if (ret) + return ret; + } + for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i) { struct skl_pipe_wm *pipe_wm = &new_crtc_state->wm.skl.optimal; @@ -4010,17 +4021,6 @@ static int intel_compute_sagv_mask(struct intel_atomic_state *state) intel_can_enable_sagv(dev_priv, new_bw_state); } - if (intel_can_enable_sagv(dev_priv, new_bw_state) != - intel_can_enable_sagv(dev_priv, old_bw_state)) { - ret = intel_atomic_serialize_global_state(&new_bw_state->base); - if (ret) - return ret; - } else if (new_bw_state->pipe_sagv_reject != old_bw_state->pipe_sagv_reject) { - ret = intel_atomic_lock_global_state(&new_bw_state->base); - if (ret) - return ret; - } - return 0; } diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 97ded2a59cf4c50838148c09c0004013bcb091ea..01849840ac560064d5fab2b285c98d0f21e3f5a1 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -694,7 +694,8 @@ void intel_uncore_forcewake_get__locked(struct intel_uncore *uncore, } static void __intel_uncore_forcewake_put(struct intel_uncore *uncore, - enum forcewake_domains fw_domains) + enum forcewake_domains fw_domains, + bool delayed) { struct intel_uncore_forcewake_domain *domain; unsigned int tmp; @@ -709,7 +710,11 @@ static void __intel_uncore_forcewake_put(struct intel_uncore *uncore, continue; } - uncore->funcs.force_wake_put(uncore, domain->mask); + if (delayed && + !(domain->uncore->fw_domains_timer & domain->mask)) + fw_domain_arm_timer(domain); + else + uncore->funcs.force_wake_put(uncore, domain->mask); } } @@ -730,7 +735,20 @@ void intel_uncore_forcewake_put(struct intel_uncore *uncore, return; spin_lock_irqsave(&uncore->lock, irqflags); - __intel_uncore_forcewake_put(uncore, fw_domains); + __intel_uncore_forcewake_put(uncore, fw_domains, false); + spin_unlock_irqrestore(&uncore->lock, irqflags); +} + +void intel_uncore_forcewake_put_delayed(struct intel_uncore *uncore, + enum forcewake_domains fw_domains) +{ + unsigned long irqflags; + + if (!uncore->funcs.force_wake_put) + return; + + spin_lock_irqsave(&uncore->lock, irqflags); + __intel_uncore_forcewake_put(uncore, fw_domains, true); spin_unlock_irqrestore(&uncore->lock, irqflags); } @@ -772,7 +790,7 @@ void intel_uncore_forcewake_put__locked(struct intel_uncore *uncore, if (!uncore->funcs.force_wake_put) return; - __intel_uncore_forcewake_put(uncore, fw_domains); + __intel_uncore_forcewake_put(uncore, fw_domains, false); } void assert_forcewakes_inactive(struct intel_uncore *uncore) diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h index c4b22d9d0b45108aab6a3d0dee5f5f7cea8efac4..034f04e0de8b7774ff8cd6eab16c9e577cb375f9 100644 --- a/drivers/gpu/drm/i915/intel_uncore.h +++ b/drivers/gpu/drm/i915/intel_uncore.h @@ -211,6 +211,8 @@ void intel_uncore_forcewake_get(struct intel_uncore *uncore, enum forcewake_domains domains); void intel_uncore_forcewake_put(struct intel_uncore *uncore, enum forcewake_domains domains); +void intel_uncore_forcewake_put_delayed(struct intel_uncore *uncore, + enum forcewake_domains domains); void intel_uncore_forcewake_flush(struct intel_uncore *uncore, enum forcewake_domains fw_domains); diff --git a/drivers/gpu/drm/lima/lima_device.c b/drivers/gpu/drm/lima/lima_device.c index 65fdca366e41f00d940f13a0865234fc47df5595..36c99058942788fe6e31cc127b7b22c666bc5c21 100644 --- a/drivers/gpu/drm/lima/lima_device.c +++ b/drivers/gpu/drm/lima/lima_device.c @@ -357,6 +357,7 @@ int lima_device_init(struct lima_device *ldev) int err, i; dma_set_coherent_mask(ldev->dev, DMA_BIT_MASK(32)); + dma_set_max_seg_size(ldev->dev, UINT_MAX); err = lima_clk_init(ldev); if (err) diff --git a/drivers/gpu/drm/mediatek/mtk_mipi_tx.c b/drivers/gpu/drm/mediatek/mtk_mipi_tx.c index 8cee2591e7284ba06e4a02475eba09331e47e1a2..ccc742dc78bd99a262385c0088c6c7ca48abc270 100644 --- a/drivers/gpu/drm/mediatek/mtk_mipi_tx.c +++ b/drivers/gpu/drm/mediatek/mtk_mipi_tx.c @@ -147,6 +147,8 @@ static int mtk_mipi_tx_probe(struct platform_device *pdev) return -ENOMEM; mipi_tx->driver_data = of_device_get_match_data(dev); + if (!mipi_tx->driver_data) + return -ENODEV; mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); mipi_tx->regs = devm_ioremap_resource(dev, mem); diff --git a/drivers/gpu/drm/msm/Kconfig b/drivers/gpu/drm/msm/Kconfig index dabb4a1ccdcf70150db023a2815e460e87ea6333..1aad34b5ffd7f42d9e605ed79de1cec10bcbb5c6 100644 --- a/drivers/gpu/drm/msm/Kconfig +++ b/drivers/gpu/drm/msm/Kconfig @@ -60,6 +60,7 @@ config DRM_MSM_HDMI_HDCP config DRM_MSM_DP bool "Enable DisplayPort support in MSM DRM driver" depends on DRM_MSM + select RATIONAL default y help Compile in support for DP driver in MSM DRM driver. DP external diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index 2dcbe02846cd94ad41b21c0d6df5aec819544cef..9e09805575db40792313cfe280b026b8fa899406 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -99,7 +99,7 @@ static void a6xx_set_pagetable(struct a6xx_gpu *a6xx_gpu, u32 asid; u64 memptr = rbmemptr(ring, ttbr0); - if (ctx == a6xx_gpu->cur_ctx) + if (ctx->seqno == a6xx_gpu->cur_ctx_seqno) return; if (msm_iommu_pagetable_params(ctx->aspace->mmu, &ttbr, &asid)) @@ -132,7 +132,7 @@ static void a6xx_set_pagetable(struct a6xx_gpu *a6xx_gpu, OUT_PKT7(ring, CP_EVENT_WRITE, 1); OUT_RING(ring, 0x31); - a6xx_gpu->cur_ctx = ctx; + a6xx_gpu->cur_ctx_seqno = ctx->seqno; } static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) @@ -887,7 +887,7 @@ static int a6xx_hw_init(struct msm_gpu *gpu) /* Always come up on rb 0 */ a6xx_gpu->cur_ring = gpu->rb[0]; - a6xx_gpu->cur_ctx = NULL; + a6xx_gpu->cur_ctx_seqno = 0; /* Enable the SQE_to start the CP engine */ gpu_write(gpu, REG_A6XX_CP_SQE_CNTL, 1); diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h index 69765a722cae6823c4ae333298a028541b305f70..f923edbd5daaf559fdda3e742241c3ef43488e8d 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h @@ -19,7 +19,16 @@ struct a6xx_gpu { uint64_t sqe_iova; struct msm_ringbuffer *cur_ring; - struct msm_file_private *cur_ctx; + + /** + * cur_ctx_seqno: + * + * The ctx->seqno value of the context with current pgtables + * installed. Tracked by seqno rather than pointer value to + * avoid dangling pointers, and cases where a ctx can be freed + * and a new one created with the same address. + */ + int cur_ctx_seqno; struct a6xx_gmu gmu; diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c index e9ede19193b0eb06275d1a8f4422a606c4844428..87d845d6d3bc3c5da551d757b3a0e33e659d039a 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c @@ -777,12 +777,12 @@ static void a6xx_get_gmu_registers(struct msm_gpu *gpu, struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); a6xx_state->gmu_registers = state_kcalloc(a6xx_state, - 2, sizeof(*a6xx_state->gmu_registers)); + 3, sizeof(*a6xx_state->gmu_registers)); if (!a6xx_state->gmu_registers) return; - a6xx_state->nr_gmu_registers = 2; + a6xx_state->nr_gmu_registers = 3; /* Get the CX GMU registers from AHB */ _a6xx_get_gmu_registers(gpu, a6xx_state, &a6xx_gmu_reglist[0], diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.c index a7a24539921f3a58f18aa4c7c87021dd166d68b2..a6efc11eba93fdc2e7786312d71d52717e374fcf 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dspp.c @@ -26,9 +26,16 @@ static void dpu_setup_dspp_pcc(struct dpu_hw_dspp *ctx, struct dpu_hw_pcc_cfg *cfg) { - u32 base = ctx->cap->sblk->pcc.base; + u32 base; - if (!ctx || !base) { + if (!ctx) { + DRM_ERROR("invalid ctx %pK\n", ctx); + return; + } + + base = ctx->cap->sblk->pcc.base; + + if (!base) { DRM_ERROR("invalid ctx %pK pcc base 0x%x\n", ctx, base); return; } diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c index c940b69435e162d1a10d2854a497331067f9d584..016c462bdb5d229f2725fc70734173b08f5183d0 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c @@ -138,11 +138,13 @@ static int _sspp_subblk_offset(struct dpu_hw_pipe *ctx, u32 *idx) { int rc = 0; - const struct dpu_sspp_sub_blks *sblk = ctx->cap->sblk; + const struct dpu_sspp_sub_blks *sblk; - if (!ctx) + if (!ctx || !ctx->cap || !ctx->cap->sblk) return -EINVAL; + sblk = ctx->cap->sblk; + switch (s_id) { case DPU_SSPP_SRC: *idx = sblk->src_blk.base; @@ -419,7 +421,7 @@ static void _dpu_hw_sspp_setup_scaler3(struct dpu_hw_pipe *ctx, (void)pe; if (_sspp_subblk_offset(ctx, DPU_SSPP_SCALER_QSEED3, &idx) || !sspp - || !scaler3_cfg || !ctx || !ctx->cap || !ctx->cap->sblk) + || !scaler3_cfg) return; dpu_hw_setup_scaler3(&ctx->hw, scaler3_cfg, idx, diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c index c8217f4858a1530f41f3ebdb8795eb8058e261ae..08e082d0443af22af0087f49d60ba5f14110c2f2 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c @@ -71,8 +71,8 @@ static int _dpu_danger_signal_status(struct seq_file *s, &status); } else { seq_puts(s, "\nSafe signal status:\n"); - if (kms->hw_mdp->ops.get_danger_status) - kms->hw_mdp->ops.get_danger_status(kms->hw_mdp, + if (kms->hw_mdp->ops.get_safe_status) + kms->hw_mdp->ops.get_safe_status(kms->hw_mdp, &status); } pm_runtime_put_sync(&kms->pdev->dev); @@ -846,6 +846,10 @@ static int _dpu_kms_mmu_init(struct dpu_kms *dpu_kms) return 0; mmu = msm_iommu_new(dpu_kms->dev->dev, domain); + if (IS_ERR(mmu)) { + iommu_domain_free(domain); + return PTR_ERR(mmu); + } aspace = msm_gem_address_space_create(mmu, "dpu1", 0x1000, 0x100000000 - 0x1000); diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c index 7d7668998501af903d54fff23df77f0d5e30e2df..a8fa084dfa494063da43748f2c1cfd6ae7094e84 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c @@ -1119,6 +1119,20 @@ static void mdp5_crtc_reset(struct drm_crtc *crtc) __drm_atomic_helper_crtc_reset(crtc, &mdp5_cstate->base); } +static const struct drm_crtc_funcs mdp5_crtc_no_lm_cursor_funcs = { + .set_config = drm_atomic_helper_set_config, + .destroy = mdp5_crtc_destroy, + .page_flip = drm_atomic_helper_page_flip, + .reset = mdp5_crtc_reset, + .atomic_duplicate_state = mdp5_crtc_duplicate_state, + .atomic_destroy_state = mdp5_crtc_destroy_state, + .atomic_print_state = mdp5_crtc_atomic_print_state, + .get_vblank_counter = mdp5_crtc_get_vblank_counter, + .enable_vblank = msm_crtc_enable_vblank, + .disable_vblank = msm_crtc_disable_vblank, + .get_vblank_timestamp = drm_crtc_vblank_helper_get_vblank_timestamp, +}; + static const struct drm_crtc_funcs mdp5_crtc_funcs = { .set_config = drm_atomic_helper_set_config, .destroy = mdp5_crtc_destroy, @@ -1307,6 +1321,8 @@ struct drm_crtc *mdp5_crtc_init(struct drm_device *dev, mdp5_crtc->lm_cursor_enabled = cursor_plane ? false : true; drm_crtc_init_with_planes(dev, crtc, plane, cursor_plane, + cursor_plane ? + &mdp5_crtc_no_lm_cursor_funcs : &mdp5_crtc_funcs, NULL); drm_flip_work_init(&mdp5_crtc->unref_cursor_work, diff --git a/drivers/gpu/drm/msm/dsi/dsi.c b/drivers/gpu/drm/msm/dsi/dsi.c index 7e364b9c9f9e102b64f5e9cf4ef850f652e6bfe2..f845333593daa7d1821f54536e8dbc41fb901786 100644 --- a/drivers/gpu/drm/msm/dsi/dsi.c +++ b/drivers/gpu/drm/msm/dsi/dsi.c @@ -33,7 +33,12 @@ static int dsi_get_phy(struct msm_dsi *msm_dsi) of_node_put(phy_node); - if (!phy_pdev || !msm_dsi->phy) { + if (!phy_pdev) { + DRM_DEV_ERROR(&pdev->dev, "%s: phy driver is not ready\n", __func__); + return -EPROBE_DEFER; + } + if (!msm_dsi->phy) { + put_device(&phy_pdev->dev); DRM_DEV_ERROR(&pdev->dev, "%s: phy driver is not ready\n", __func__); return -EPROBE_DEFER; } @@ -208,8 +213,10 @@ int msm_dsi_modeset_init(struct msm_dsi *msm_dsi, struct drm_device *dev, goto fail; } - if (!msm_dsi_manager_validate_current_config(msm_dsi->id)) + if (!msm_dsi_manager_validate_current_config(msm_dsi->id)) { + ret = -EINVAL; goto fail; + } msm_dsi->encoder = encoder; diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c b/drivers/gpu/drm/msm/dsi/dsi_host.c index b17ac6c2755490cc5f73a7bc875dfe42040a6e6f..64454a63bbacfe79e6d5e8ea63345e7cd5c98318 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_host.c +++ b/drivers/gpu/drm/msm/dsi/dsi_host.c @@ -464,7 +464,7 @@ static int dsi_bus_clk_enable(struct msm_dsi_host *msm_host) return 0; err: - for (; i > 0; i--) + while (--i >= 0) clk_disable_unprepare(msm_host->bus_clks[i]); return ret; @@ -1692,6 +1692,8 @@ static int dsi_host_parse_lane_data(struct msm_dsi_host *msm_host, if (!prop) { DRM_DEV_DEBUG(dev, "failed to find data lane mapping, using default\n"); + /* Set the number of date lanes to 4 by default. */ + msm_host->num_data_lanes = 4; return 0; } diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c index e8c1a727179cc29b8afceda742353b71c72a7a96..e07986ab52c2235a115b61b70541dc002dfd1c1d 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c @@ -769,12 +769,14 @@ void __exit msm_dsi_phy_driver_unregister(void) int msm_dsi_phy_enable(struct msm_dsi_phy *phy, int src_pll_id, struct msm_dsi_phy_clk_request *clk_req) { - struct device *dev = &phy->pdev->dev; + struct device *dev; int ret; if (!phy || !phy->cfg->ops.enable) return -EINVAL; + dev = &phy->pdev->dev; + ret = dsi_phy_enable_resource(phy); if (ret) { DRM_DEV_ERROR(dev, "%s: resource enable failed, %d\n", diff --git a/drivers/gpu/drm/msm/edp/edp_ctrl.c b/drivers/gpu/drm/msm/edp/edp_ctrl.c index 0d9657cc70dbb7b2f8e4113456386f2c64d047ae..937b4abb1552668d9bacd054e75006c7cf5a135a 100644 --- a/drivers/gpu/drm/msm/edp/edp_ctrl.c +++ b/drivers/gpu/drm/msm/edp/edp_ctrl.c @@ -1116,7 +1116,7 @@ void msm_edp_ctrl_power(struct edp_ctrl *ctrl, bool on) int msm_edp_ctrl_init(struct msm_edp *edp) { struct edp_ctrl *ctrl = NULL; - struct device *dev = &edp->pdev->dev; + struct device *dev; int ret; if (!edp) { @@ -1124,6 +1124,7 @@ int msm_edp_ctrl_init(struct msm_edp *edp) return -EINVAL; } + dev = &edp->pdev->dev; ctrl = devm_kzalloc(dev, sizeof(*ctrl), GFP_KERNEL); if (!ctrl) return -ENOMEM; diff --git a/drivers/gpu/drm/msm/hdmi/hdmi.c b/drivers/gpu/drm/msm/hdmi/hdmi.c index 737453b6e596646b9a530ee16a18901af83c69e8..94f948ef279d107408e1c4b820176f5d78245c60 100644 --- a/drivers/gpu/drm/msm/hdmi/hdmi.c +++ b/drivers/gpu/drm/msm/hdmi/hdmi.c @@ -97,10 +97,15 @@ static int msm_hdmi_get_phy(struct hdmi *hdmi) of_node_put(phy_node); - if (!phy_pdev || !hdmi->phy) { + if (!phy_pdev) { DRM_DEV_ERROR(&pdev->dev, "phy driver is not ready\n"); return -EPROBE_DEFER; } + if (!hdmi->phy) { + DRM_DEV_ERROR(&pdev->dev, "phy driver is not ready\n"); + put_device(&phy_pdev->dev); + return -EPROBE_DEFER; + } hdmi->phy_dev = get_device(&phy_pdev->dev); diff --git a/drivers/gpu/drm/msm/msm_debugfs.c b/drivers/gpu/drm/msm/msm_debugfs.c index ee2e270f464c1d7b664abff78f9aea390e258d2a..7a7ccad65c9221882ffd3600e6a025a18fd4bf5f 100644 --- a/drivers/gpu/drm/msm/msm_debugfs.c +++ b/drivers/gpu/drm/msm/msm_debugfs.c @@ -77,6 +77,7 @@ static int msm_gpu_open(struct inode *inode, struct file *file) goto free_priv; pm_runtime_get_sync(&gpu->pdev->dev); + msm_gpu_hw_init(gpu); show_priv->state = gpu->funcs->gpu_state_get(gpu); pm_runtime_put_sync(&gpu->pdev->dev); diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index edee4c2a76ce4af37558134902d5976f5ff31905..e37e5afc680a2b49bf0afcc3e232ca1bc58f8384 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -350,7 +350,7 @@ static int msm_init_vram(struct drm_device *dev) of_node_put(node); if (ret) return ret; - size = r.end - r.start; + size = r.end - r.start + 1; DRM_INFO("using VRAM carveout: %lx@%pa\n", size, &r.start); /* if we have no IOMMU, then we need to use carveout allocator. @@ -581,6 +581,7 @@ static void load_gpu(struct drm_device *dev) static int context_init(struct drm_device *dev, struct drm_file *file) { + static atomic_t ident = ATOMIC_INIT(0); struct msm_drm_private *priv = dev->dev_private; struct msm_file_private *ctx; @@ -594,6 +595,8 @@ static int context_init(struct drm_device *dev, struct drm_file *file) ctx->aspace = msm_gpu_create_private_address_space(priv->gpu, current); file->driver_priv = ctx; + ctx->seqno = atomic_inc_return(&ident); + return 0; } diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index 0b2686b060c7310c76da1961ca57684926746e0c..1fe809add8f629476d103141fdce46903ad79f15 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -58,6 +58,7 @@ struct msm_file_private { int queueid; struct msm_gem_address_space *aspace; struct kref ref; + int seqno; }; enum msm_mdp_plane_property { @@ -543,7 +544,7 @@ static inline int align_pitch(int width, int bpp) static inline unsigned long timeout_to_jiffies(const ktime_t *timeout) { ktime_t now = ktime_get(); - unsigned long remaining_jiffies; + s64 remaining_jiffies; if (ktime_compare(*timeout, now) < 0) { remaining_jiffies = 0; @@ -552,7 +553,7 @@ static inline unsigned long timeout_to_jiffies(const ktime_t *timeout) remaining_jiffies = ktime_divns(rem, NSEC_PER_SEC / HZ); } - return remaining_jiffies; + return clamp(remaining_jiffies, 0LL, (s64)INT_MAX); } #endif /* __MSM_DRV_H__ */ diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 04be4cfcccc1851f0349b0d1eebe9a30dde3d870..819567e40565c092819f7de1e7970dee9e347fa5 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -1061,7 +1061,7 @@ static struct drm_gem_object *_msm_gem_new(struct drm_device *dev, ret = msm_gem_new_impl(dev, size, flags, &obj); if (ret) - goto fail; + return ERR_PTR(ret); msm_obj = to_msm_bo(obj); @@ -1149,7 +1149,7 @@ struct drm_gem_object *msm_gem_import(struct drm_device *dev, ret = msm_gem_new_impl(dev, size, MSM_BO_WC, &obj); if (ret) - goto fail; + return ERR_PTR(ret); drm_gem_private_object_init(dev, obj, size); diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index 55d16489d0f3fb05e770243adeb5362e952370fa..90c26da1090262d130c3bbb79d446381428b917c 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -376,7 +376,7 @@ static void msm_gpu_crashstate_capture(struct msm_gpu *gpu, state->bos = kcalloc(nr, sizeof(struct msm_gpu_state_bo), GFP_KERNEL); - for (i = 0; i < submit->nr_bos; i++) { + for (i = 0; state->bos && i < submit->nr_bos; i++) { if (should_dump(submit, i)) { msm_gpu_crashstate_get_bo(state, submit->bos[i].obj, submit->bos[i].iova, submit->bos[i].flags); diff --git a/drivers/gpu/drm/mxsfb/mxsfb_drv.c b/drivers/gpu/drm/mxsfb/mxsfb_drv.c index f31e8ef3c258cc82973ee222e624ea0ea26a889e..25e422d2e7f8f48c27991849d4bba21b91be7080 100644 --- a/drivers/gpu/drm/mxsfb/mxsfb_drv.c +++ b/drivers/gpu/drm/mxsfb/mxsfb_drv.c @@ -268,7 +268,11 @@ static void mxsfb_irq_disable(struct drm_device *drm) struct mxsfb_drm_private *mxsfb = drm->dev_private; mxsfb_enable_axi_clk(mxsfb); - mxsfb->crtc.funcs->disable_vblank(&mxsfb->crtc); + + /* Disable and clear VBLANK IRQ */ + writel(CTRL1_CUR_FRAME_DONE_IRQ_EN, mxsfb->base + LCDC_CTRL1 + REG_CLR); + writel(CTRL1_CUR_FRAME_DONE_IRQ, mxsfb->base + LCDC_CTRL1 + REG_CLR); + mxsfb_disable_axi_clk(mxsfb); } diff --git a/drivers/gpu/drm/nouveau/dispnv04/disp.c b/drivers/gpu/drm/nouveau/dispnv04/disp.c index 7739f46470d3e1215402e541f55194d6764d34a9..99fee4d8cd3187dc0549f412ff384926f82a10b6 100644 --- a/drivers/gpu/drm/nouveau/dispnv04/disp.c +++ b/drivers/gpu/drm/nouveau/dispnv04/disp.c @@ -205,7 +205,7 @@ nv04_display_destroy(struct drm_device *dev) nvif_notify_dtor(&disp->flip); nouveau_display(dev)->priv = NULL; - kfree(disp); + vfree(disp); nvif_object_unmap(&drm->client.device.object); } @@ -223,7 +223,7 @@ nv04_display_create(struct drm_device *dev) struct nv04_display *disp; int i, ret; - disp = kzalloc(sizeof(*disp), GFP_KERNEL); + disp = vzalloc(sizeof(*disp)); if (!disp) return -ENOMEM; diff --git a/drivers/gpu/drm/nouveau/dispnv50/crc.c b/drivers/gpu/drm/nouveau/dispnv50/crc.c index b8c31b697797ee294d9804621314a4e61ffa5593..66f32d965c7239496f38f783ddebf2ca0f5b0e33 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/crc.c +++ b/drivers/gpu/drm/nouveau/dispnv50/crc.c @@ -704,6 +704,7 @@ static const struct file_operations nv50_crc_flip_threshold_fops = { .open = nv50_crc_debugfs_flip_threshold_open, .read = seq_read, .write = nv50_crc_debugfs_flip_threshold_set, + .release = single_release, }; int nv50_head_crc_late_register(struct nv50_head *head) diff --git a/drivers/gpu/drm/nouveau/dispnv50/head.c b/drivers/gpu/drm/nouveau/dispnv50/head.c index 61826cac3061ac9e934259acdf03583ad266740e..be649d14f8797636a9f1279fce6ab5e2ae4c3007 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/head.c +++ b/drivers/gpu/drm/nouveau/dispnv50/head.c @@ -51,6 +51,7 @@ nv50_head_flush_clr(struct nv50_head *head, void nv50_head_flush_set_wndw(struct nv50_head *head, struct nv50_head_atom *asyh) { + if (asyh->set.curs ) head->func->curs_set(head, asyh); if (asyh->set.olut ) { asyh->olut.offset = nv50_lut_load(&head->olut, asyh->olut.buffer, @@ -66,7 +67,6 @@ nv50_head_flush_set(struct nv50_head *head, struct nv50_head_atom *asyh) if (asyh->set.view ) head->func->view (head, asyh); if (asyh->set.mode ) head->func->mode (head, asyh); if (asyh->set.core ) head->func->core_set(head, asyh); - if (asyh->set.curs ) head->func->curs_set(head, asyh); if (asyh->set.base ) head->func->base (head, asyh); if (asyh->set.ovly ) head->func->ovly (head, asyh); if (asyh->set.dither ) head->func->dither (head, asyh); diff --git a/drivers/gpu/drm/nouveau/nouveau_debugfs.c b/drivers/gpu/drm/nouveau/nouveau_debugfs.c index c2bc05eb2e54a2e2dbcbdd4eb13d8cae56f7a770..1cbe01048b9304fe838252fbd70c2217b60dda8d 100644 --- a/drivers/gpu/drm/nouveau/nouveau_debugfs.c +++ b/drivers/gpu/drm/nouveau/nouveau_debugfs.c @@ -207,6 +207,7 @@ static const struct file_operations nouveau_pstate_fops = { .open = nouveau_debugfs_pstate_open, .read = seq_read, .write = nouveau_debugfs_pstate_set, + .release = single_release, }; static struct drm_info_list nouveau_debugfs_list[] = { diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index 42fc5c813a9bbfd7195514af99eb20b6456854c0..ac96b6ab44c07a6fe0d5d0d4cbb35ead5921b738 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -557,6 +557,7 @@ nouveau_drm_device_init(struct drm_device *dev) nvkm_dbgopt(nouveau_debug, "DRM"); INIT_LIST_HEAD(&drm->clients); + mutex_init(&drm->clients_lock); spin_lock_init(&drm->tile.lock); /* workaround an odd issue on nvc1 by disabling the device's @@ -627,6 +628,7 @@ fail_alloc: static void nouveau_drm_device_fini(struct drm_device *dev) { + struct nouveau_cli *cli, *temp_cli; struct nouveau_drm *drm = nouveau_drm(dev); if (nouveau_pmops_runtime()) { @@ -651,9 +653,28 @@ nouveau_drm_device_fini(struct drm_device *dev) nouveau_ttm_fini(drm); nouveau_vga_fini(drm); + /* + * There may be existing clients from as-yet unclosed files. For now, + * clean them up here rather than deferring until the file is closed, + * but this likely not correct if we want to support hot-unplugging + * properly. + */ + mutex_lock(&drm->clients_lock); + list_for_each_entry_safe(cli, temp_cli, &drm->clients, head) { + list_del(&cli->head); + mutex_lock(&cli->mutex); + if (cli->abi16) + nouveau_abi16_fini(cli->abi16); + mutex_unlock(&cli->mutex); + nouveau_cli_fini(cli); + kfree(cli); + } + mutex_unlock(&drm->clients_lock); + nouveau_cli_fini(&drm->client); nouveau_cli_fini(&drm->master); nvif_parent_dtor(&drm->parent); + mutex_destroy(&drm->clients_lock); kfree(drm); } @@ -792,7 +813,7 @@ nouveau_drm_device_remove(struct drm_device *dev) struct nvkm_client *client; struct nvkm_device *device; - drm_dev_unregister(dev); + drm_dev_unplug(dev); dev->irq_enabled = false; client = nvxx_client(&drm->client.base); @@ -1086,9 +1107,9 @@ nouveau_drm_open(struct drm_device *dev, struct drm_file *fpriv) fpriv->driver_priv = cli; - mutex_lock(&drm->client.mutex); + mutex_lock(&drm->clients_lock); list_add(&cli->head, &drm->clients); - mutex_unlock(&drm->client.mutex); + mutex_unlock(&drm->clients_lock); done: if (ret && cli) { @@ -1106,6 +1127,16 @@ nouveau_drm_postclose(struct drm_device *dev, struct drm_file *fpriv) { struct nouveau_cli *cli = nouveau_cli(fpriv); struct nouveau_drm *drm = nouveau_drm(dev); + int dev_index; + + /* + * The device is gone, and as it currently stands all clients are + * cleaned up in the removal codepath. In the future this may change + * so that we can support hot-unplugging, but for now we immediately + * return to avoid a double-free situation. + */ + if (!drm_dev_enter(dev, &dev_index)) + return; pm_runtime_get_sync(dev->dev); @@ -1114,14 +1145,15 @@ nouveau_drm_postclose(struct drm_device *dev, struct drm_file *fpriv) nouveau_abi16_fini(cli->abi16); mutex_unlock(&cli->mutex); - mutex_lock(&drm->client.mutex); + mutex_lock(&drm->clients_lock); list_del(&cli->head); - mutex_unlock(&drm->client.mutex); + mutex_unlock(&drm->clients_lock); nouveau_cli_fini(cli); kfree(cli); pm_runtime_mark_last_busy(dev->dev); pm_runtime_put_autosuspend(dev->dev); + drm_dev_exit(dev_index); } static const struct drm_ioctl_desc diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h index b8025507a9e4c9de9331979cf7f8a7a7bbed675a..8b252dca0fc3e981bf0bdc0f16d84de40920ba03 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drv.h +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h @@ -142,6 +142,11 @@ struct nouveau_drm { struct list_head clients; + /** + * @clients_lock: Protects access to the @clients list of &struct nouveau_cli. + */ + struct mutex clients_lock; + u8 old_pm_cap; struct { diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c index c2051380d18c05486c30c4c4640720a2a486fc58..6504ebec1190179cb3b523ada04efe327eaeb0a0 100644 --- a/drivers/gpu/drm/nouveau/nouveau_gem.c +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c @@ -196,10 +196,8 @@ nouveau_gem_new(struct nouveau_cli *cli, u64 size, int align, uint32_t domain, } ret = nouveau_bo_init(nvbo, size, align, domain, NULL, NULL); - if (ret) { - nouveau_bo_ref(NULL, &nvbo); + if (ret) return ret; - } /* we restrict allowed domains on nv50+ to only the types * that were requested at creation time. not possibly on diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c index 1c3f890377d2c283220c2868c389c4135c332eae..f67700c028c75b57c9dff57aa736ddc0b4f2698b 100644 --- a/drivers/gpu/drm/nouveau/nouveau_svm.c +++ b/drivers/gpu/drm/nouveau/nouveau_svm.c @@ -156,10 +156,14 @@ nouveau_svmm_bind(struct drm_device *dev, void *data, */ mm = get_task_mm(current); + if (!mm) { + return -EINVAL; + } mmap_read_lock(mm); if (!cli->svm.svmm) { mmap_read_unlock(mm); + mmput(mm); return -EINVAL; } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/hdmigv100.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/hdmigv100.c index 6e3c450eaacef779b2a8cae771da37457c79e465..3ff49344abc771a61993691149fd91c292fafb80 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/hdmigv100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/hdmigv100.c @@ -62,7 +62,6 @@ gv100_hdmi_ctrl(struct nvkm_ior *ior, int head, bool enable, u8 max_ac_packet, nvkm_wr32(device, 0x6f0108 + hdmi, vendor_infoframe.header); nvkm_wr32(device, 0x6f010c + hdmi, vendor_infoframe.subpack0_low); nvkm_wr32(device, 0x6f0110 + hdmi, vendor_infoframe.subpack0_high); - nvkm_wr32(device, 0x6f0110 + hdmi, 0x00000000); nvkm_wr32(device, 0x6f0114 + hdmi, 0x00000000); nvkm_wr32(device, 0x6f0118 + hdmi, 0x00000000); nvkm_wr32(device, 0x6f011c + hdmi, 0x00000000); diff --git a/drivers/gpu/drm/nouveau/nvkm/falcon/base.c b/drivers/gpu/drm/nouveau/nvkm/falcon/base.c index c6a3448180d6fdb9102026bbbd17bffad1ca1775..93d9575181c67f7ae01203ff9b34e3c91e3cc6ec 100644 --- a/drivers/gpu/drm/nouveau/nvkm/falcon/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/falcon/base.c @@ -119,8 +119,12 @@ nvkm_falcon_disable(struct nvkm_falcon *falcon) int nvkm_falcon_reset(struct nvkm_falcon *falcon) { - nvkm_falcon_disable(falcon); - return nvkm_falcon_enable(falcon); + if (!falcon->func->reset) { + nvkm_falcon_disable(falcon); + return nvkm_falcon_enable(falcon); + } + + return falcon->func->reset(falcon); } int diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/acr/gm200.c b/drivers/gpu/drm/nouveau/nvkm/subdev/acr/gm200.c index cd41b2e6cc8795feca6152c8188028be96163d22..18502fd6ebaa0090d159344194fad5b92a61e0e1 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/acr/gm200.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/acr/gm200.c @@ -207,11 +207,13 @@ int gm200_acr_wpr_parse(struct nvkm_acr *acr) { const struct wpr_header *hdr = (void *)acr->wpr_fw->data; + struct nvkm_acr_lsfw *lsfw; while (hdr->falcon_id != WPR_HEADER_V0_FALCON_ID_INVALID) { wpr_header_dump(&acr->subdev, hdr); - if (!nvkm_acr_lsfw_add(NULL, acr, NULL, (hdr++)->falcon_id)) - return -ENOMEM; + lsfw = nvkm_acr_lsfw_add(NULL, acr, NULL, (hdr++)->falcon_id); + if (IS_ERR(lsfw)) + return PTR_ERR(lsfw); } return 0; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/acr/gp102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/acr/gp102.c index 80eb9d8dbc8037f5de38aaf0f25027298cef1ad1..e5c8303a5b7b7c183b28a90bfc90ccd4059b5410 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/acr/gp102.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/acr/gp102.c @@ -161,11 +161,13 @@ int gp102_acr_wpr_parse(struct nvkm_acr *acr) { const struct wpr_header_v1 *hdr = (void *)acr->wpr_fw->data; + struct nvkm_acr_lsfw *lsfw; while (hdr->falcon_id != WPR_HEADER_V1_FALCON_ID_INVALID) { wpr_header_v1_dump(&acr->subdev, hdr); - if (!nvkm_acr_lsfw_add(NULL, acr, NULL, (hdr++)->falcon_id)) - return -ENOMEM; + lsfw = nvkm_acr_lsfw_add(NULL, acr, NULL, (hdr++)->falcon_id); + if (IS_ERR(lsfw)) + return PTR_ERR(lsfw); } return 0; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/base.c index f3c30b2a788e8534fe72c23729779afd5e39d573..8bff14ae16b0e5bd76ef0f7541040dda3cb80e70 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/base.c @@ -38,7 +38,7 @@ nvbios_addr(struct nvkm_bios *bios, u32 *addr, u8 size) *addr += bios->imaged_addr; } - if (unlikely(*addr + size >= bios->size)) { + if (unlikely(*addr + size > bios->size)) { nvkm_error(&bios->subdev, "OOB %d %08x %08x\n", size, p, *addr); return false; } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/base.c index a0fe607c9c07fc5f59f88c45ff4c4538dc5badc2..3bfc55c571b5e0aca53846dd68a0eb460d6b743e 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/base.c @@ -94,20 +94,13 @@ nvkm_pmu_fini(struct nvkm_subdev *subdev, bool suspend) return 0; } -static int +static void nvkm_pmu_reset(struct nvkm_pmu *pmu) { struct nvkm_device *device = pmu->subdev.device; if (!pmu->func->enabled(pmu)) - return 0; - - /* Inhibit interrupts, and wait for idle. */ - nvkm_wr32(device, 0x10a014, 0x0000ffff); - nvkm_msec(device, 2000, - if (!nvkm_rd32(device, 0x10a04c)) - break; - ); + return; /* Reset. */ if (pmu->func->reset) @@ -118,25 +111,37 @@ nvkm_pmu_reset(struct nvkm_pmu *pmu) if (!(nvkm_rd32(device, 0x10a10c) & 0x00000006)) break; ); - - return 0; } static int nvkm_pmu_preinit(struct nvkm_subdev *subdev) { struct nvkm_pmu *pmu = nvkm_pmu(subdev); - return nvkm_pmu_reset(pmu); + nvkm_pmu_reset(pmu); + return 0; } static int nvkm_pmu_init(struct nvkm_subdev *subdev) { struct nvkm_pmu *pmu = nvkm_pmu(subdev); - int ret = nvkm_pmu_reset(pmu); - if (ret == 0 && pmu->func->init) - ret = pmu->func->init(pmu); - return ret; + struct nvkm_device *device = pmu->subdev.device; + + if (!pmu->func->init) + return 0; + + if (pmu->func->enabled(pmu)) { + /* Inhibit interrupts, and wait for idle. */ + nvkm_wr32(device, 0x10a014, 0x0000ffff); + nvkm_msec(device, 2000, + if (!nvkm_rd32(device, 0x10a04c)) + break; + ); + + nvkm_pmu_reset(pmu); + } + + return pmu->func->init(pmu); } static void * diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gm200.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gm200.c index 383376addb41c4a55dd1261ea65c748c846233fa..a9d6c36195ed1f175cdb83b32f203b8695302f72 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gm200.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gm200.c @@ -23,9 +23,38 @@ */ #include "priv.h" +static int +gm200_pmu_flcn_reset(struct nvkm_falcon *falcon) +{ + struct nvkm_pmu *pmu = container_of(falcon, typeof(*pmu), falcon); + + nvkm_falcon_wr32(falcon, 0x014, 0x0000ffff); + pmu->func->reset(pmu); + return nvkm_falcon_enable(falcon); +} + +const struct nvkm_falcon_func +gm200_pmu_flcn = { + .debug = 0xc08, + .fbif = 0xe00, + .load_imem = nvkm_falcon_v1_load_imem, + .load_dmem = nvkm_falcon_v1_load_dmem, + .read_dmem = nvkm_falcon_v1_read_dmem, + .bind_context = nvkm_falcon_v1_bind_context, + .wait_for_halt = nvkm_falcon_v1_wait_for_halt, + .clear_interrupt = nvkm_falcon_v1_clear_interrupt, + .set_start_addr = nvkm_falcon_v1_set_start_addr, + .start = nvkm_falcon_v1_start, + .enable = nvkm_falcon_v1_enable, + .disable = nvkm_falcon_v1_disable, + .reset = gm200_pmu_flcn_reset, + .cmdq = { 0x4a0, 0x4b0, 4 }, + .msgq = { 0x4c8, 0x4cc, 0 }, +}; + static const struct nvkm_pmu_func gm200_pmu = { - .flcn = >215_pmu_flcn, + .flcn = &gm200_pmu_flcn, .enabled = gf100_pmu_enabled, .reset = gf100_pmu_reset, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gm20b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gm20b.c index 8f6ed5373ea16e046a7748717639a304ac1327f9..7938722b4da17df0fdd580b7aeb64d890ef2cf04 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gm20b.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gm20b.c @@ -211,7 +211,7 @@ gm20b_pmu_recv(struct nvkm_pmu *pmu) static const struct nvkm_pmu_func gm20b_pmu = { - .flcn = >215_pmu_flcn, + .flcn = &gm200_pmu_flcn, .enabled = gf100_pmu_enabled, .intr = gt215_pmu_intr, .recv = gm20b_pmu_recv, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp102.c index 3d8ce14dba7bf1fb2b7a1d4c54dd9ced9f2f6ed0..3dfb3e8522f6a096aaa90fddcf5a79e2e05ec8a6 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp102.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp102.c @@ -39,7 +39,7 @@ gp102_pmu_enabled(struct nvkm_pmu *pmu) static const struct nvkm_pmu_func gp102_pmu = { - .flcn = >215_pmu_flcn, + .flcn = &gm200_pmu_flcn, .enabled = gp102_pmu_enabled, .reset = gp102_pmu_reset, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp10b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp10b.c index 9c237c426599b543e38f39e3d2fd3888bb70e510..7f5f9d5448360a69b5d9a5f832690c88233c593d 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp10b.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp10b.c @@ -78,7 +78,7 @@ gp10b_pmu_acr = { static const struct nvkm_pmu_func gp10b_pmu = { - .flcn = >215_pmu_flcn, + .flcn = &gm200_pmu_flcn, .enabled = gf100_pmu_enabled, .intr = gt215_pmu_intr, .recv = gm20b_pmu_recv, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/priv.h index 276b6d778e532fc54b0049833ddb88d498c6f21e..b945ec320cd2ef15fa1e5749d6425c4c30d7dd4d 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/priv.h +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/priv.h @@ -44,6 +44,8 @@ void gf100_pmu_reset(struct nvkm_pmu *); void gk110_pmu_pgob(struct nvkm_pmu *, bool); +extern const struct nvkm_falcon_func gm200_pmu_flcn; + void gm20b_pmu_acr_bld_patch(struct nvkm_acr *, u32, s64); void gm20b_pmu_acr_bld_write(struct nvkm_acr *, u32, struct nvkm_acr_lsfw *); int gm20b_pmu_acr_boot(struct nvkm_falcon *); diff --git a/drivers/gpu/drm/panel/Kconfig b/drivers/gpu/drm/panel/Kconfig index b9dbedf8f15e8a79505af4b1ff985f55586c0c1b..6153972e0127a85adffd5cf7f365b86ba3eec21a 100644 --- a/drivers/gpu/drm/panel/Kconfig +++ b/drivers/gpu/drm/panel/Kconfig @@ -233,6 +233,7 @@ config DRM_PANEL_OLIMEX_LCD_OLINUXINO depends on OF depends on I2C depends on BACKLIGHT_CLASS_DEVICE + select CRC32 help The panel is used with different sizes LCDs, from 480x272 to 1280x800, and 24 bit per pixel. diff --git a/drivers/gpu/drm/panel/panel-ilitek-ili9881c.c b/drivers/gpu/drm/panel/panel-ilitek-ili9881c.c index 0145129d7c661ce0e9ca1f095faec49b758df9a3..534dd7414d428cddead1d2ddfb23cba2c30cb504 100644 --- a/drivers/gpu/drm/panel/panel-ilitek-ili9881c.c +++ b/drivers/gpu/drm/panel/panel-ilitek-ili9881c.c @@ -590,14 +590,14 @@ static const struct drm_display_mode k101_im2byl02_default_mode = { .clock = 69700, .hdisplay = 800, - .hsync_start = 800 + 6, - .hsync_end = 800 + 6 + 15, - .htotal = 800 + 6 + 15 + 16, + .hsync_start = 800 + 52, + .hsync_end = 800 + 52 + 8, + .htotal = 800 + 52 + 8 + 48, .vdisplay = 1280, - .vsync_start = 1280 + 8, - .vsync_end = 1280 + 8 + 48, - .vtotal = 1280 + 8 + 48 + 52, + .vsync_start = 1280 + 16, + .vsync_end = 1280 + 16 + 6, + .vtotal = 1280 + 16 + 6 + 15, .width_mm = 135, .height_mm = 217, diff --git a/drivers/gpu/drm/panel/panel-innolux-p079zca.c b/drivers/gpu/drm/panel/panel-innolux-p079zca.c index aea31622539146ebbf9bfbcb9f08a3c65b92e633..f194b62e290cae561b2c2b188701481f6aa4ee73 100644 --- a/drivers/gpu/drm/panel/panel-innolux-p079zca.c +++ b/drivers/gpu/drm/panel/panel-innolux-p079zca.c @@ -484,6 +484,7 @@ static void innolux_panel_del(struct innolux_panel *innolux) static int innolux_panel_probe(struct mipi_dsi_device *dsi) { const struct panel_desc *desc; + struct innolux_panel *innolux; int err; desc = of_device_get_match_data(&dsi->dev); @@ -495,7 +496,14 @@ static int innolux_panel_probe(struct mipi_dsi_device *dsi) if (err < 0) return err; - return mipi_dsi_attach(dsi); + err = mipi_dsi_attach(dsi); + if (err < 0) { + innolux = mipi_dsi_get_drvdata(dsi); + innolux_panel_del(innolux); + return err; + } + + return 0; } static int innolux_panel_remove(struct mipi_dsi_device *dsi) diff --git a/drivers/gpu/drm/panel/panel-kingdisplay-kd097d04.c b/drivers/gpu/drm/panel/panel-kingdisplay-kd097d04.c index 86e4213e8bb13f6e834c91bb755d599143f3adc4..daccb1fd5fdad82dcdfd8caacfe89e43a5bb5746 100644 --- a/drivers/gpu/drm/panel/panel-kingdisplay-kd097d04.c +++ b/drivers/gpu/drm/panel/panel-kingdisplay-kd097d04.c @@ -406,7 +406,13 @@ static int kingdisplay_panel_probe(struct mipi_dsi_device *dsi) if (err < 0) return err; - return mipi_dsi_attach(dsi); + err = mipi_dsi_attach(dsi); + if (err < 0) { + kingdisplay_panel_del(kingdisplay); + return err; + } + + return 0; } static int kingdisplay_panel_remove(struct mipi_dsi_device *dsi) diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c index 204674fccd6465fed6c03166c2008efbefc011ee..7ffd2a04ab23ad4758d0952be357e89d6e32c420 100644 --- a/drivers/gpu/drm/panel/panel-simple.c +++ b/drivers/gpu/drm/panel/panel-simple.c @@ -557,6 +557,7 @@ static int panel_simple_probe(struct device *dev, const struct panel_desc *desc) err = panel_dpi_probe(dev, panel); if (err) goto free_ddc; + desc = panel->desc; } else { if (!of_get_display_timing(dev->of_node, "panel-timing", &dt)) panel_simple_parse_panel_timing_node(dev, panel, &dt); diff --git a/drivers/gpu/drm/radeon/atombios_encoders.c b/drivers/gpu/drm/radeon/atombios_encoders.c index cc5ee1b3af84f27462ddcaf64b33d20ceab3c96a..12aa7877a625a389a418cd3e8f3f9835595bc86e 100644 --- a/drivers/gpu/drm/radeon/atombios_encoders.c +++ b/drivers/gpu/drm/radeon/atombios_encoders.c @@ -197,7 +197,8 @@ void radeon_atom_backlight_init(struct radeon_encoder *radeon_encoder, * so don't register a backlight device */ if ((rdev->pdev->subsystem_vendor == PCI_VENDOR_ID_APPLE) && - (rdev->pdev->device == 0x6741)) + (rdev->pdev->device == 0x6741) && + !dmi_match(DMI_PRODUCT_NAME, "iMac12,1")) return; if (!radeon_encoder->enc_priv) diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index 8c0a572940e826aa05d49645f0a28b14cf8e285a..32070e94f6c4941120a0a31a7a1f4691278a7376 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -634,6 +634,8 @@ void radeon_driver_lastclose_kms(struct drm_device *dev) int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) { struct radeon_device *rdev = dev->dev_private; + struct radeon_fpriv *fpriv; + struct radeon_vm *vm; int r; file_priv->driver_priv = NULL; @@ -646,48 +648,52 @@ int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) /* new gpu have virtual address space support */ if (rdev->family >= CHIP_CAYMAN) { - struct radeon_fpriv *fpriv; - struct radeon_vm *vm; fpriv = kzalloc(sizeof(*fpriv), GFP_KERNEL); if (unlikely(!fpriv)) { r = -ENOMEM; - goto out_suspend; + goto err_suspend; } if (rdev->accel_working) { vm = &fpriv->vm; r = radeon_vm_init(rdev, vm); - if (r) { - kfree(fpriv); - goto out_suspend; - } + if (r) + goto err_fpriv; r = radeon_bo_reserve(rdev->ring_tmp_bo.bo, false); - if (r) { - radeon_vm_fini(rdev, vm); - kfree(fpriv); - goto out_suspend; - } + if (r) + goto err_vm_fini; /* map the ib pool buffer read only into * virtual address space */ vm->ib_bo_va = radeon_vm_bo_add(rdev, vm, rdev->ring_tmp_bo.bo); + if (!vm->ib_bo_va) { + r = -ENOMEM; + goto err_vm_fini; + } + r = radeon_vm_bo_set_addr(rdev, vm->ib_bo_va, RADEON_VA_IB_OFFSET, RADEON_VM_PAGE_READABLE | RADEON_VM_PAGE_SNOOPED); - if (r) { - radeon_vm_fini(rdev, vm); - kfree(fpriv); - goto out_suspend; - } + if (r) + goto err_vm_fini; } file_priv->driver_priv = fpriv; } -out_suspend: + pm_runtime_mark_last_busy(dev->dev); + pm_runtime_put_autosuspend(dev->dev); + return 0; + +err_vm_fini: + radeon_vm_fini(rdev, vm); +err_fpriv: + kfree(fpriv); + +err_suspend: pm_runtime_mark_last_busy(dev->dev); pm_runtime_put_autosuspend(dev->dev); return r; diff --git a/drivers/gpu/drm/rcar-du/rcar_du_crtc.c b/drivers/gpu/drm/rcar-du/rcar_du_crtc.c index 1b9738e44909d45a3942671a8872a8daa846adb9..065604c5837de3ab4870c862570cd9b8b85a702a 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_crtc.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_crtc.c @@ -215,6 +215,7 @@ static void rcar_du_crtc_set_display_timing(struct rcar_du_crtc *rcrtc) const struct drm_display_mode *mode = &rcrtc->crtc.state->adjusted_mode; struct rcar_du_device *rcdu = rcrtc->dev; unsigned long mode_clock = mode->clock * 1000; + unsigned int hdse_offset; u32 dsmr; u32 escr; @@ -298,10 +299,15 @@ static void rcar_du_crtc_set_display_timing(struct rcar_du_crtc *rcrtc) | DSMR_DIPM_DISP | DSMR_CSPM; rcar_du_crtc_write(rcrtc, DSMR, dsmr); + hdse_offset = 19; + if (rcrtc->group->cmms_mask & BIT(rcrtc->index % 2)) + hdse_offset += 25; + /* Display timings */ - rcar_du_crtc_write(rcrtc, HDSR, mode->htotal - mode->hsync_start - 19); + rcar_du_crtc_write(rcrtc, HDSR, mode->htotal - mode->hsync_start - + hdse_offset); rcar_du_crtc_write(rcrtc, HDER, mode->htotal - mode->hsync_start + - mode->hdisplay - 19); + mode->hdisplay - hdse_offset); rcar_du_crtc_write(rcrtc, HSWR, mode->hsync_end - mode->hsync_start - 1); rcar_du_crtc_write(rcrtc, HCR, mode->htotal - 1); @@ -831,6 +837,7 @@ rcar_du_crtc_mode_valid(struct drm_crtc *crtc, struct rcar_du_crtc *rcrtc = to_rcar_crtc(crtc); struct rcar_du_device *rcdu = rcrtc->dev; bool interlaced = mode->flags & DRM_MODE_FLAG_INTERLACE; + unsigned int min_sync_porch; unsigned int vbp; if (interlaced && !rcar_du_has(rcdu, RCAR_DU_FEATURE_INTERLACED)) @@ -838,9 +845,14 @@ rcar_du_crtc_mode_valid(struct drm_crtc *crtc, /* * The hardware requires a minimum combined horizontal sync and back - * porch of 20 pixels and a minimum vertical back porch of 3 lines. + * porch of 20 pixels (when CMM isn't used) or 45 pixels (when CMM is + * used), and a minimum vertical back porch of 3 lines. */ - if (mode->htotal - mode->hsync_start < 20) + min_sync_porch = 20; + if (rcrtc->group->cmms_mask & BIT(rcrtc->index % 2)) + min_sync_porch += 25; + + if (mode->htotal - mode->hsync_start < min_sync_porch) return MODE_HBLANK_NARROW; vbp = (mode->vtotal - mode->vsync_end) / (interlaced ? 2 : 1); diff --git a/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c b/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c index d0c9610ad22025140f9e15c84a54e681eccc73ac..b0fb3c3cba5962a151263af9365677838737d23a 100644 --- a/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c +++ b/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c @@ -243,6 +243,8 @@ struct dw_mipi_dsi_rockchip { struct dw_mipi_dsi *dmd; const struct rockchip_dw_dsi_chip_data *cdata; struct dw_mipi_dsi_plat_data pdata; + + bool dsi_bound; }; struct dphy_pll_parameter_map { @@ -753,10 +755,6 @@ static void dw_mipi_dsi_encoder_enable(struct drm_encoder *encoder) if (mux < 0) return; - pm_runtime_get_sync(dsi->dev); - if (dsi->slave) - pm_runtime_get_sync(dsi->slave->dev); - /* * For the RK3399, the clk of grf must be enabled before writing grf * register. And for RK3288 or other soc, this grf_clk must be NULL, @@ -775,20 +773,10 @@ static void dw_mipi_dsi_encoder_enable(struct drm_encoder *encoder) clk_disable_unprepare(dsi->grf_clk); } -static void dw_mipi_dsi_encoder_disable(struct drm_encoder *encoder) -{ - struct dw_mipi_dsi_rockchip *dsi = to_dsi(encoder); - - if (dsi->slave) - pm_runtime_put(dsi->slave->dev); - pm_runtime_put(dsi->dev); -} - static const struct drm_encoder_helper_funcs dw_mipi_dsi_encoder_helper_funcs = { .atomic_check = dw_mipi_dsi_encoder_atomic_check, .enable = dw_mipi_dsi_encoder_enable, - .disable = dw_mipi_dsi_encoder_disable, }; static int rockchip_dsi_drm_create_encoder(struct dw_mipi_dsi_rockchip *dsi, @@ -918,10 +906,14 @@ static int dw_mipi_dsi_rockchip_bind(struct device *dev, put_device(second); } + pm_runtime_get_sync(dsi->dev); + if (dsi->slave) + pm_runtime_get_sync(dsi->slave->dev); + ret = clk_prepare_enable(dsi->pllref_clk); if (ret) { DRM_DEV_ERROR(dev, "Failed to enable pllref_clk: %d\n", ret); - return ret; + goto out_pm_runtime; } /* @@ -933,7 +925,7 @@ static int dw_mipi_dsi_rockchip_bind(struct device *dev, ret = clk_prepare_enable(dsi->grf_clk); if (ret) { DRM_DEV_ERROR(dsi->dev, "Failed to enable grf_clk: %d\n", ret); - return ret; + goto out_pll_clk; } dw_mipi_dsi_rockchip_config(dsi); @@ -945,16 +937,27 @@ static int dw_mipi_dsi_rockchip_bind(struct device *dev, ret = rockchip_dsi_drm_create_encoder(dsi, drm_dev); if (ret) { DRM_DEV_ERROR(dev, "Failed to create drm encoder\n"); - return ret; + goto out_pll_clk; } ret = dw_mipi_dsi_bind(dsi->dmd, &dsi->encoder); if (ret) { DRM_DEV_ERROR(dev, "Failed to bind: %d\n", ret); - return ret; + goto out_pll_clk; } + dsi->dsi_bound = true; + return 0; + +out_pll_clk: + clk_disable_unprepare(dsi->pllref_clk); +out_pm_runtime: + pm_runtime_put(dsi->dev); + if (dsi->slave) + pm_runtime_put(dsi->slave->dev); + + return ret; } static void dw_mipi_dsi_rockchip_unbind(struct device *dev, @@ -966,9 +969,15 @@ static void dw_mipi_dsi_rockchip_unbind(struct device *dev, if (dsi->is_slave) return; + dsi->dsi_bound = false; + dw_mipi_dsi_unbind(dsi->dmd); clk_disable_unprepare(dsi->pllref_clk); + + pm_runtime_put(dsi->dev); + if (dsi->slave) + pm_runtime_put(dsi->slave->dev); } static const struct component_ops dw_mipi_dsi_rockchip_ops = { @@ -1026,6 +1035,36 @@ static const struct dw_mipi_dsi_host_ops dw_mipi_dsi_rockchip_host_ops = { .detach = dw_mipi_dsi_rockchip_host_detach, }; +static int __maybe_unused dw_mipi_dsi_rockchip_resume(struct device *dev) +{ + struct dw_mipi_dsi_rockchip *dsi = dev_get_drvdata(dev); + int ret; + + /* + * Re-configure DSI state, if we were previously initialized. We need + * to do this before rockchip_drm_drv tries to re-enable() any panels. + */ + if (dsi->dsi_bound) { + ret = clk_prepare_enable(dsi->grf_clk); + if (ret) { + DRM_DEV_ERROR(dsi->dev, "Failed to enable grf_clk: %d\n", ret); + return ret; + } + + dw_mipi_dsi_rockchip_config(dsi); + if (dsi->slave) + dw_mipi_dsi_rockchip_config(dsi->slave); + + clk_disable_unprepare(dsi->grf_clk); + } + + return 0; +} + +static const struct dev_pm_ops dw_mipi_dsi_rockchip_pm_ops = { + SET_LATE_SYSTEM_SLEEP_PM_OPS(NULL, dw_mipi_dsi_rockchip_resume) +}; + static int dw_mipi_dsi_rockchip_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; @@ -1126,14 +1165,10 @@ static int dw_mipi_dsi_rockchip_probe(struct platform_device *pdev) if (ret != -EPROBE_DEFER) DRM_DEV_ERROR(dev, "Failed to probe dw_mipi_dsi: %d\n", ret); - goto err_clkdisable; + return ret; } return 0; - -err_clkdisable: - clk_disable_unprepare(dsi->pllref_clk); - return ret; } static int dw_mipi_dsi_rockchip_remove(struct platform_device *pdev) @@ -1249,6 +1284,7 @@ struct platform_driver dw_mipi_dsi_rockchip_driver = { .remove = dw_mipi_dsi_rockchip_remove, .driver = { .of_match_table = dw_mipi_dsi_rockchip_dt_ids, + .pm = &dw_mipi_dsi_rockchip_pm_ops, .name = "dw-mipi-dsi-rockchip", }, }; diff --git a/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c b/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c index 23de359a1dec60116f29a22f045f663b2b7ec707..515e6f187dc777aa6abcdab46a45733c058d8b9a 100644 --- a/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c +++ b/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c @@ -529,13 +529,6 @@ static int dw_hdmi_rockchip_bind(struct device *dev, struct device *master, return ret; } - ret = clk_prepare_enable(hdmi->vpll_clk); - if (ret) { - DRM_DEV_ERROR(hdmi->dev, "Failed to enable HDMI vpll: %d\n", - ret); - return ret; - } - hdmi->phy = devm_phy_optional_get(dev, "hdmi"); if (IS_ERR(hdmi->phy)) { ret = PTR_ERR(hdmi->phy); @@ -544,6 +537,13 @@ static int dw_hdmi_rockchip_bind(struct device *dev, struct device *master, return ret; } + ret = clk_prepare_enable(hdmi->vpll_clk); + if (ret) { + DRM_DEV_ERROR(hdmi->dev, "Failed to enable HDMI vpll: %d\n", + ret); + return ret; + } + drm_encoder_helper_add(encoder, &dw_hdmi_rockchip_encoder_helper_funcs); drm_simple_encoder_init(drm, encoder, DRM_MODE_ENCODER_TMDS); diff --git a/drivers/gpu/drm/rockchip/rockchip_vop_reg.c b/drivers/gpu/drm/rockchip/rockchip_vop_reg.c index a6fe03c3748aad414911cddccc6062c712940432..39e1e1ebea92841dd0ca037da8d9975d50a1ab0d 100644 --- a/drivers/gpu/drm/rockchip/rockchip_vop_reg.c +++ b/drivers/gpu/drm/rockchip/rockchip_vop_reg.c @@ -873,6 +873,7 @@ static const struct vop_win_phy rk3399_win01_data = { .enable = VOP_REG(RK3288_WIN0_CTRL0, 0x1, 0), .format = VOP_REG(RK3288_WIN0_CTRL0, 0x7, 1), .rb_swap = VOP_REG(RK3288_WIN0_CTRL0, 0x1, 12), + .x_mir_en = VOP_REG(RK3288_WIN0_CTRL0, 0x1, 21), .y_mir_en = VOP_REG(RK3288_WIN0_CTRL0, 0x1, 22), .act_info = VOP_REG(RK3288_WIN0_ACT_INFO, 0x1fff1fff, 0), .dsp_info = VOP_REG(RK3288_WIN0_DSP_INFO, 0x0fff0fff, 0), @@ -883,6 +884,7 @@ static const struct vop_win_phy rk3399_win01_data = { .uv_vir = VOP_REG(RK3288_WIN0_VIR, 0x3fff, 16), .src_alpha_ctl = VOP_REG(RK3288_WIN0_SRC_ALPHA_CTRL, 0xff, 0), .dst_alpha_ctl = VOP_REG(RK3288_WIN0_DST_ALPHA_CTRL, 0xff, 0), + .channel = VOP_REG(RK3288_WIN0_CTRL2, 0xff, 0), }; /* @@ -893,11 +895,11 @@ static const struct vop_win_phy rk3399_win01_data = { static const struct vop_win_data rk3399_vop_win_data[] = { { .base = 0x00, .phy = &rk3399_win01_data, .type = DRM_PLANE_TYPE_PRIMARY }, - { .base = 0x40, .phy = &rk3288_win01_data, + { .base = 0x40, .phy = &rk3368_win01_data, .type = DRM_PLANE_TYPE_OVERLAY }, - { .base = 0x00, .phy = &rk3288_win23_data, + { .base = 0x00, .phy = &rk3368_win23_data, .type = DRM_PLANE_TYPE_OVERLAY }, - { .base = 0x50, .phy = &rk3288_win23_data, + { .base = 0x50, .phy = &rk3368_win23_data, .type = DRM_PLANE_TYPE_CURSOR }, }; diff --git a/drivers/gpu/drm/sun4i/Kconfig b/drivers/gpu/drm/sun4i/Kconfig index 5755f0432e77435bca7942430c05ee2dc9e84d31..8c796de53222c950999b897635cc36e6f44f9408 100644 --- a/drivers/gpu/drm/sun4i/Kconfig +++ b/drivers/gpu/drm/sun4i/Kconfig @@ -46,6 +46,7 @@ config DRM_SUN6I_DSI default MACH_SUN8I select CRC_CCITT select DRM_MIPI_DSI + select RESET_CONTROLLER select PHY_SUN6I_MIPI_DPHY help Choose this option if you want have an Allwinner SoC with diff --git a/drivers/gpu/drm/sun4i/sun8i_csc.h b/drivers/gpu/drm/sun4i/sun8i_csc.h index a55a38ad849c1304df484f0d234d182a79a8708a..022cafa6c06cb040d3739182ac432187742aa251 100644 --- a/drivers/gpu/drm/sun4i/sun8i_csc.h +++ b/drivers/gpu/drm/sun4i/sun8i_csc.h @@ -16,8 +16,8 @@ struct sun8i_mixer; #define CCSC10_OFFSET 0xA0000 #define CCSC11_OFFSET 0xF0000 -#define SUN8I_CSC_CTRL(base) (base + 0x0) -#define SUN8I_CSC_COEFF(base, i) (base + 0x10 + 4 * i) +#define SUN8I_CSC_CTRL(base) ((base) + 0x0) +#define SUN8I_CSC_COEFF(base, i) ((base) + 0x10 + 4 * (i)) #define SUN8I_CSC_CTRL_EN BIT(0) diff --git a/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c b/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c index f75fb157f2ff757da981031f438f80afda33d355..016b877051dabfc3b1b9eddc7189cb815896c7f1 100644 --- a/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c +++ b/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.c @@ -216,11 +216,13 @@ static int sun8i_dw_hdmi_bind(struct device *dev, struct device *master, goto err_disable_clk_tmds; } + ret = sun8i_hdmi_phy_init(hdmi->phy); + if (ret) + goto err_disable_clk_tmds; + drm_encoder_helper_add(encoder, &sun8i_dw_hdmi_encoder_helper_funcs); drm_simple_encoder_init(drm, encoder, DRM_MODE_ENCODER_TMDS); - sun8i_hdmi_phy_init(hdmi->phy); - plat_data->mode_valid = hdmi->quirks->mode_valid; plat_data->use_drm_infoframe = hdmi->quirks->use_drm_infoframe; sun8i_hdmi_phy_set_ops(hdmi->phy, plat_data); @@ -262,6 +264,7 @@ static void sun8i_dw_hdmi_unbind(struct device *dev, struct device *master, struct sun8i_dw_hdmi *hdmi = dev_get_drvdata(dev); dw_hdmi_unbind(hdmi->hdmi); + sun8i_hdmi_phy_deinit(hdmi->phy); clk_disable_unprepare(hdmi->clk_tmds); reset_control_assert(hdmi->rst_ctrl); gpiod_set_value(hdmi->ddc_en, 0); diff --git a/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.h b/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.h index 74f6ed0e25709f6ae3bbfebb430ae89bf75534cb..bffe1b9cd3dcb435f019d25668cd3e9c02beaf2c 100644 --- a/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.h +++ b/drivers/gpu/drm/sun4i/sun8i_dw_hdmi.h @@ -169,6 +169,7 @@ struct sun8i_hdmi_phy { struct clk *clk_phy; struct clk *clk_pll0; struct clk *clk_pll1; + struct device *dev; unsigned int rcal; struct regmap *regs; struct reset_control *rst_phy; @@ -205,7 +206,8 @@ encoder_to_sun8i_dw_hdmi(struct drm_encoder *encoder) int sun8i_hdmi_phy_get(struct sun8i_dw_hdmi *hdmi, struct device_node *node); -void sun8i_hdmi_phy_init(struct sun8i_hdmi_phy *phy); +int sun8i_hdmi_phy_init(struct sun8i_hdmi_phy *phy); +void sun8i_hdmi_phy_deinit(struct sun8i_hdmi_phy *phy); void sun8i_hdmi_phy_set_ops(struct sun8i_hdmi_phy *phy, struct dw_hdmi_plat_data *plat_data); diff --git a/drivers/gpu/drm/sun4i/sun8i_hdmi_phy.c b/drivers/gpu/drm/sun4i/sun8i_hdmi_phy.c index c9239708d398cdaa85ebffba7ca4a41ecba5a760..b64d93da651d22d03e74057018cea28d37004a02 100644 --- a/drivers/gpu/drm/sun4i/sun8i_hdmi_phy.c +++ b/drivers/gpu/drm/sun4i/sun8i_hdmi_phy.c @@ -506,9 +506,60 @@ static void sun8i_hdmi_phy_init_h3(struct sun8i_hdmi_phy *phy) phy->rcal = (val & SUN8I_HDMI_PHY_ANA_STS_RCAL_MASK) >> 2; } -void sun8i_hdmi_phy_init(struct sun8i_hdmi_phy *phy) +int sun8i_hdmi_phy_init(struct sun8i_hdmi_phy *phy) { + int ret; + + ret = reset_control_deassert(phy->rst_phy); + if (ret) { + dev_err(phy->dev, "Cannot deassert phy reset control: %d\n", ret); + return ret; + } + + ret = clk_prepare_enable(phy->clk_bus); + if (ret) { + dev_err(phy->dev, "Cannot enable bus clock: %d\n", ret); + goto err_assert_rst_phy; + } + + ret = clk_prepare_enable(phy->clk_mod); + if (ret) { + dev_err(phy->dev, "Cannot enable mod clock: %d\n", ret); + goto err_disable_clk_bus; + } + + if (phy->variant->has_phy_clk) { + ret = sun8i_phy_clk_create(phy, phy->dev, + phy->variant->has_second_pll); + if (ret) { + dev_err(phy->dev, "Couldn't create the PHY clock\n"); + goto err_disable_clk_mod; + } + + clk_prepare_enable(phy->clk_phy); + } + phy->variant->phy_init(phy); + + return 0; + +err_disable_clk_mod: + clk_disable_unprepare(phy->clk_mod); +err_disable_clk_bus: + clk_disable_unprepare(phy->clk_bus); +err_assert_rst_phy: + reset_control_assert(phy->rst_phy); + + return ret; +} + +void sun8i_hdmi_phy_deinit(struct sun8i_hdmi_phy *phy) +{ + clk_disable_unprepare(phy->clk_mod); + clk_disable_unprepare(phy->clk_bus); + clk_disable_unprepare(phy->clk_phy); + + reset_control_assert(phy->rst_phy); } void sun8i_hdmi_phy_set_ops(struct sun8i_hdmi_phy *phy, @@ -638,6 +689,7 @@ static int sun8i_hdmi_phy_probe(struct platform_device *pdev) return -ENOMEM; phy->variant = (struct sun8i_hdmi_phy_variant *)match->data; + phy->dev = dev; ret = of_address_to_resource(node, 0, &res); if (ret) { @@ -696,47 +748,10 @@ static int sun8i_hdmi_phy_probe(struct platform_device *pdev) goto err_put_clk_pll1; } - ret = reset_control_deassert(phy->rst_phy); - if (ret) { - dev_err(dev, "Cannot deassert phy reset control: %d\n", ret); - goto err_put_rst_phy; - } - - ret = clk_prepare_enable(phy->clk_bus); - if (ret) { - dev_err(dev, "Cannot enable bus clock: %d\n", ret); - goto err_deassert_rst_phy; - } - - ret = clk_prepare_enable(phy->clk_mod); - if (ret) { - dev_err(dev, "Cannot enable mod clock: %d\n", ret); - goto err_disable_clk_bus; - } - - if (phy->variant->has_phy_clk) { - ret = sun8i_phy_clk_create(phy, dev, - phy->variant->has_second_pll); - if (ret) { - dev_err(dev, "Couldn't create the PHY clock\n"); - goto err_disable_clk_mod; - } - - clk_prepare_enable(phy->clk_phy); - } - platform_set_drvdata(pdev, phy); return 0; -err_disable_clk_mod: - clk_disable_unprepare(phy->clk_mod); -err_disable_clk_bus: - clk_disable_unprepare(phy->clk_bus); -err_deassert_rst_phy: - reset_control_assert(phy->rst_phy); -err_put_rst_phy: - reset_control_put(phy->rst_phy); err_put_clk_pll1: clk_put(phy->clk_pll1); err_put_clk_pll0: @@ -753,12 +768,6 @@ static int sun8i_hdmi_phy_remove(struct platform_device *pdev) { struct sun8i_hdmi_phy *phy = platform_get_drvdata(pdev); - clk_disable_unprepare(phy->clk_mod); - clk_disable_unprepare(phy->clk_bus); - clk_disable_unprepare(phy->clk_phy); - - reset_control_assert(phy->rst_phy); - reset_control_put(phy->rst_phy); clk_put(phy->clk_pll0); diff --git a/drivers/gpu/drm/sun4i/sun8i_mixer.h b/drivers/gpu/drm/sun4i/sun8i_mixer.h index 7576b523fdbb1409c720ca5874425a71be9cc411..b0178c045267c6f6e584d46a076e5780c8144bb8 100644 --- a/drivers/gpu/drm/sun4i/sun8i_mixer.h +++ b/drivers/gpu/drm/sun4i/sun8i_mixer.h @@ -113,10 +113,10 @@ /* format 13 is semi-planar YUV411 VUVU */ #define SUN8I_MIXER_FBFMT_YUV411 14 /* format 15 doesn't exist */ -/* format 16 is P010 YVU */ -#define SUN8I_MIXER_FBFMT_P010_YUV 17 -/* format 18 is P210 YVU */ -#define SUN8I_MIXER_FBFMT_P210_YUV 19 +#define SUN8I_MIXER_FBFMT_P010_YUV 16 +/* format 17 is P010 YVU */ +#define SUN8I_MIXER_FBFMT_P210_YUV 18 +/* format 19 is P210 YVU */ /* format 20 is packed YVU444 10-bit */ /* format 21 is packed YUV444 10-bit */ diff --git a/drivers/gpu/drm/tegra/vic.c b/drivers/gpu/drm/tegra/vic.c index b77f726303d89d186427e6d6285ab3563e367a20..ec0e4d8f0aade7e9f8f2b89ba687cdda1886f92f 100644 --- a/drivers/gpu/drm/tegra/vic.c +++ b/drivers/gpu/drm/tegra/vic.c @@ -5,6 +5,7 @@ #include #include +#include #include #include #include @@ -265,10 +266,8 @@ static int vic_load_firmware(struct vic *vic) if (!client->group) { virt = dma_alloc_coherent(vic->dev, size, &iova, GFP_KERNEL); - - err = dma_mapping_error(vic->dev, iova); - if (err < 0) - return err; + if (!virt) + return -ENOMEM; } else { virt = tegra_drm_alloc(tegra, size, &iova); } diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index eb4b7df02ca034ab55c963e7c5d00430bd302b19..f673292eec9dbb6b5c050bfca006cbc4c6a151cc 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -789,6 +789,8 @@ int ttm_mem_evict_first(struct ttm_bo_device *bdev, ret = ttm_bo_evict(bo, ctx); if (locked) ttm_bo_unreserve(bo); + else + ttm_bo_move_to_lru_tail_unlocked(bo); ttm_bo_put(bo); return ret; diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c index 98a006fc30a58db3b1e097ca309af1d8556a6765..0b1daf442425fa84ecd73a1c497a6448090a40ee 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c @@ -500,11 +500,6 @@ int ttm_bo_vm_access(struct vm_area_struct *vma, unsigned long addr, switch (bo->mem.mem_type) { case TTM_PL_SYSTEM: - if (unlikely(bo->ttm->page_flags & TTM_PAGE_FLAG_SWAPPED)) { - ret = ttm_tt_swapin(bo->ttm); - if (unlikely(ret != 0)) - return ret; - } fallthrough; case TTM_PL_TT: ret = ttm_bo_vm_access_kmap(bo, offset, buf, len, write); diff --git a/drivers/gpu/drm/udl/udl_connector.c b/drivers/gpu/drm/udl/udl_connector.c index cdc1c42e16695215f28b6f4acec56a0a85e4eea0..aac41a809924e64a2a7b58e84840515848a1612f 100644 --- a/drivers/gpu/drm/udl/udl_connector.c +++ b/drivers/gpu/drm/udl/udl_connector.c @@ -30,7 +30,7 @@ static int udl_get_edid_block(void *data, u8 *buf, unsigned int block, ret = usb_control_msg(udl->udev, usb_rcvctrlpipe(udl->udev, 0), (0x02), (0x80 | (0x02 << 5)), bval, - 0xA1, read_buff, 2, HZ); + 0xA1, read_buff, 2, 1000); if (ret < 1) { DRM_ERROR("Read EDID byte %d failed err %x\n", i, ret); kfree(read_buff); diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c index 182c586525eb844807588c2e62064e779f887d9f..64fe63c1938f5255147f6edf95dd2a2d6766c18f 100644 --- a/drivers/gpu/drm/v3d/v3d_gem.c +++ b/drivers/gpu/drm/v3d/v3d_gem.c @@ -195,8 +195,8 @@ v3d_clean_caches(struct v3d_dev *v3d) V3D_CORE_WRITE(core, V3D_CTL_L2TCACTL, V3D_L2TCACTL_TMUWCF); if (wait_for(!(V3D_CORE_READ(core, V3D_CTL_L2TCACTL) & - V3D_L2TCACTL_L2TFLS), 100)) { - DRM_ERROR("Timeout waiting for L1T write combiner flush\n"); + V3D_L2TCACTL_TMUWCF), 100)) { + DRM_ERROR("Timeout waiting for TMU write combiner flush\n"); } mutex_lock(&v3d->cache_clean_lock); diff --git a/drivers/gpu/drm/vboxvideo/vbox_main.c b/drivers/gpu/drm/vboxvideo/vbox_main.c index d68d9bad76747015c446b3f5908014ca81dd3b70..c5ea880d17b29bc227d523e59a7bb07502ea4751 100644 --- a/drivers/gpu/drm/vboxvideo/vbox_main.c +++ b/drivers/gpu/drm/vboxvideo/vbox_main.c @@ -123,8 +123,8 @@ int vbox_hw_init(struct vbox_private *vbox) /* Create guest-heap mem-pool use 2^4 = 16 byte chunks */ vbox->guest_pool = devm_gen_pool_create(vbox->ddev.dev, 4, -1, "vboxvideo-accel"); - if (!vbox->guest_pool) - return -ENOMEM; + if (IS_ERR(vbox->guest_pool)) + return PTR_ERR(vbox->guest_pool); ret = gen_pool_add_virt(vbox->guest_pool, (unsigned long)vbox->guest_heap, diff --git a/drivers/gpu/drm/vc4/vc4_bo.c b/drivers/gpu/drm/vc4/vc4_bo.c index cc74a3f3a07af960564935aeca7614343c1d9b7e..9006b9861c90c2fe1e346278f970fc01f6e4b9ca 100644 --- a/drivers/gpu/drm/vc4/vc4_bo.c +++ b/drivers/gpu/drm/vc4/vc4_bo.c @@ -389,7 +389,7 @@ struct drm_gem_object *vc4_create_object(struct drm_device *dev, size_t size) bo = kzalloc(sizeof(*bo), GFP_KERNEL); if (!bo) - return ERR_PTR(-ENOMEM); + return NULL; bo->madv = VC4_MADV_WILLNEED; refcount_set(&bo->usecnt, 0); diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c index ee293f061f0a8706a2997bece79927c6809733fd..a308f2d05d17386c9b9e48351c5439bc01c99def 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.c +++ b/drivers/gpu/drm/vc4/vc4_hdmi.c @@ -79,6 +79,7 @@ # define VC4_HD_M_SW_RST BIT(2) # define VC4_HD_M_ENABLE BIT(0) +#define HSM_MIN_CLOCK_FREQ 120000000 #define CEC_CLOCK_FREQ 40000 #define VC4_HSM_MID_CLOCK 149985000 @@ -799,6 +800,7 @@ static int vc4_hdmi_encoder_atomic_check(struct drm_encoder *encoder, unsigned long long tmds_rate; if (vc4_hdmi->variant->unsupported_odd_h_timings && + !(mode->flags & DRM_MODE_FLAG_DBLCLK) && ((mode->hdisplay % 2) || (mode->hsync_start % 2) || (mode->hsync_end % 2) || (mode->htotal % 2))) return -EINVAL; @@ -833,6 +835,7 @@ vc4_hdmi_encoder_mode_valid(struct drm_encoder *encoder, struct vc4_hdmi *vc4_hdmi = encoder_to_vc4_hdmi(encoder); if (vc4_hdmi->variant->unsupported_odd_h_timings && + !(mode->flags & DRM_MODE_FLAG_DBLCLK) && ((mode->hdisplay % 2) || (mode->hsync_start % 2) || (mode->hsync_end % 2) || (mode->htotal % 2))) return MODE_H_ILLEGAL; @@ -1806,6 +1809,19 @@ static int vc4_hdmi_bind(struct device *dev, struct device *master, void *data) vc4_hdmi->disable_wifi_frequencies = of_property_read_bool(dev->of_node, "wifi-2.4ghz-coexistence"); + /* + * If we boot without any cable connected to the HDMI connector, + * the firmware will skip the HSM initialization and leave it + * with a rate of 0, resulting in a bus lockup when we're + * accessing the registers even if it's enabled. + * + * Let's put a sensible default at runtime_resume so that we + * don't end up in this situation. + */ + ret = clk_set_min_rate(vc4_hdmi->hsm_clock, HSM_MIN_CLOCK_FREQ); + if (ret) + goto err_put_ddc; + if (vc4_hdmi->variant->reset) vc4_hdmi->variant->reset(vc4_hdmi); diff --git a/drivers/gpu/drm/virtio/virtgpu_vq.c b/drivers/gpu/drm/virtio/virtgpu_vq.c index 07945ca238e2d93741a7df8db1c21c56f56e7f74..5e40fa0f5e8f2dc8f439971a7e2a5dfc4903760f 100644 --- a/drivers/gpu/drm/virtio/virtgpu_vq.c +++ b/drivers/gpu/drm/virtio/virtgpu_vq.c @@ -91,9 +91,7 @@ virtio_gpu_get_vbuf(struct virtio_gpu_device *vgdev, { struct virtio_gpu_vbuffer *vbuf; - vbuf = kmem_cache_zalloc(vgdev->vbufs, GFP_KERNEL); - if (!vbuf) - return ERR_PTR(-ENOMEM); + vbuf = kmem_cache_zalloc(vgdev->vbufs, GFP_KERNEL | __GFP_NOFAIL); BUG_ON(size > MAX_INLINE_CMD_SIZE || size < sizeof(struct virtio_gpu_ctrl_hdr)); @@ -147,10 +145,6 @@ static void *virtio_gpu_alloc_cmd_resp(struct virtio_gpu_device *vgdev, vbuf = virtio_gpu_get_vbuf(vgdev, cmd_size, resp_size, resp_buf, cb); - if (IS_ERR(vbuf)) { - *vbuffer_p = NULL; - return ERR_CAST(vbuf); - } *vbuffer_p = vbuf; return (struct virtio_gpu_command *)vbuf->buf; } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index 1523b51a7284c03928195c67063db5ab96762f8d..ad208a5f4ebe52e1f14c982bd0c75a2aa3e5fef0 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -1088,15 +1088,14 @@ extern int vmw_execbuf_fence_commands(struct drm_file *file_priv, struct vmw_private *dev_priv, struct vmw_fence_obj **p_fence, uint32_t *p_handle); -extern void vmw_execbuf_copy_fence_user(struct vmw_private *dev_priv, +extern int vmw_execbuf_copy_fence_user(struct vmw_private *dev_priv, struct vmw_fpriv *vmw_fp, int ret, struct drm_vmw_fence_rep __user *user_fence_rep, struct vmw_fence_obj *fence, uint32_t fence_handle, - int32_t out_fence_fd, - struct sync_file *sync_file); + int32_t out_fence_fd); bool vmw_cmd_describe(const void *buf, u32 *size, char const **cmd); /** diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index 83e1b54eb8647d0a9cc3e0d8493172ed16d12349..739cbc77d8867cdb98fb4555467096c8eca4c6ea 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -3816,17 +3816,17 @@ int vmw_execbuf_fence_commands(struct drm_file *file_priv, * Also if copying fails, user-space will be unable to signal the fence object * so we wait for it immediately, and then unreference the user-space reference. */ -void +int vmw_execbuf_copy_fence_user(struct vmw_private *dev_priv, struct vmw_fpriv *vmw_fp, int ret, struct drm_vmw_fence_rep __user *user_fence_rep, struct vmw_fence_obj *fence, uint32_t fence_handle, - int32_t out_fence_fd, struct sync_file *sync_file) + int32_t out_fence_fd) { struct drm_vmw_fence_rep fence_rep; if (user_fence_rep == NULL) - return; + return 0; memset(&fence_rep, 0, sizeof(fence_rep)); @@ -3854,20 +3854,14 @@ vmw_execbuf_copy_fence_user(struct vmw_private *dev_priv, * handle. */ if (unlikely(ret != 0) && (fence_rep.error == 0)) { - if (sync_file) - fput(sync_file->file); - - if (fence_rep.fd != -1) { - put_unused_fd(fence_rep.fd); - fence_rep.fd = -1; - } - ttm_ref_object_base_unref(vmw_fp->tfile, fence_handle, TTM_REF_USAGE); VMW_DEBUG_USER("Fence copy error. Syncing.\n"); (void) vmw_fence_obj_wait(fence, false, false, VMW_FENCE_WAIT_TIMEOUT); } + + return ret ? -EFAULT : 0; } /** @@ -4209,16 +4203,23 @@ int vmw_execbuf_process(struct drm_file *file_priv, (void) vmw_fence_obj_wait(fence, false, false, VMW_FENCE_WAIT_TIMEOUT); + } + } + + ret = vmw_execbuf_copy_fence_user(dev_priv, vmw_fpriv(file_priv), ret, + user_fence_rep, fence, handle, out_fence_fd); + + if (sync_file) { + if (ret) { + /* usercopy of fence failed, put the file object */ + fput(sync_file->file); + put_unused_fd(out_fence_fd); } else { /* Link the fence with the FD created earlier */ fd_install(out_fence_fd, sync_file->file); } } - vmw_execbuf_copy_fence_user(dev_priv, vmw_fpriv(file_priv), ret, - user_fence_rep, fence, handle, out_fence_fd, - sync_file); - /* Don't unreference when handing fence out */ if (unlikely(out_fence != NULL)) { *out_fence = fence; @@ -4236,7 +4237,7 @@ int vmw_execbuf_process(struct drm_file *file_priv, */ vmw_validation_unref_lists(&val_ctx); - return 0; + return ret; out_unlock_binding: mutex_unlock(&dev_priv->binding_mutex); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c index 0f8d293971576a4f6d2faf78c6e24e8825457a58..8bc41ec97d71a29fb9bf804ca8fc41671e716e82 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c @@ -1171,7 +1171,7 @@ int vmw_fence_event_ioctl(struct drm_device *dev, void *data, } vmw_execbuf_copy_fence_user(dev_priv, vmw_fp, 0, user_fence_rep, fence, - handle, -1, NULL); + handle, -1); vmw_fence_obj_unreference(&fence); return 0; out_no_create: diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c index 312ed0881a99b63c106abe60ab5bf2daa43a8aa8..e58112997c88136e4ecce5c238e99d366f2f5b6c 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c @@ -2479,7 +2479,7 @@ void vmw_kms_helper_validation_finish(struct vmw_private *dev_priv, if (file_priv) vmw_execbuf_copy_fence_user(dev_priv, vmw_fpriv(file_priv), ret, user_fence_rep, fence, - handle, -1, NULL); + handle, -1); if (out_fence) *out_fence = fence; else diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c index d0ebb70e2fdd6e56750db8a0772b8491f8b7bf54..a2c09dca4eef945e76a2bf68da9f0db52bb39263 100644 --- a/drivers/gpu/host1x/dev.c +++ b/drivers/gpu/host1x/dev.c @@ -18,6 +18,10 @@ #include #undef CREATE_TRACE_POINTS +#if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU) +#include +#endif + #include "bus.h" #include "channel.h" #include "debug.h" @@ -232,6 +236,17 @@ static struct iommu_domain *host1x_iommu_attach(struct host1x *host) struct iommu_domain *domain = iommu_get_domain_for_dev(host->dev); int err; +#if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU) + if (host->dev->archdata.mapping) { + struct dma_iommu_mapping *mapping = + to_dma_iommu_mapping(host->dev); + arm_iommu_detach_device(host->dev); + arm_iommu_release_mapping(mapping); + + domain = iommu_get_domain_for_dev(host->dev); + } +#endif + /* * We may not always want to enable IOMMU support (for example if the * host1x firewall is already enabled and we don't support addressing diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig index a347368ecaef30022ec568df8d604750e3f063e6..f5dc693a6403f83c78d05d1f42823d2a322bf9dd 100644 --- a/drivers/hid/Kconfig +++ b/drivers/hid/Kconfig @@ -207,14 +207,14 @@ config HID_CHERRY config HID_CHICONY tristate "Chicony devices" - depends on HID + depends on USB_HID default !EXPERT help Support for Chicony Tactical pad and special keys on Chicony keyboards. config HID_CORSAIR tristate "Corsair devices" - depends on HID && USB && LEDS_CLASS + depends on USB_HID && LEDS_CLASS help Support for Corsair devices that are not fully compliant with the HID standard. @@ -245,7 +245,7 @@ config HID_MACALLY config HID_PRODIKEYS tristate "Prodikeys PC-MIDI Keyboard support" - depends on HID && SND + depends on USB_HID && SND select SND_RAWMIDI help Support for Prodikeys PC-MIDI Keyboard device support. @@ -541,7 +541,7 @@ config HID_LENOVO config HID_LOGITECH tristate "Logitech devices" - depends on HID + depends on USB_HID depends on LEDS_CLASS default !EXPERT help @@ -918,7 +918,7 @@ config HID_SAITEK config HID_SAMSUNG tristate "Samsung InfraRed remote control or keyboards" - depends on HID + depends on USB_HID help Support for Samsung InfraRed remote control or keyboards. diff --git a/drivers/hid/hid-apple.c b/drivers/hid/hid-apple.c index 6b8f0d004d3450d5b48f8fc145b855f7ef755e00..e5d2e7e9541b8296b421c2933ec299a46bc99fe7 100644 --- a/drivers/hid/hid-apple.c +++ b/drivers/hid/hid-apple.c @@ -322,12 +322,19 @@ static int apple_event(struct hid_device *hdev, struct hid_field *field, /* * MacBook JIS keyboard has wrong logical maximum + * Magic Keyboard JIS has wrong logical maximum */ static __u8 *apple_report_fixup(struct hid_device *hdev, __u8 *rdesc, unsigned int *rsize) { struct apple_sc *asc = hid_get_drvdata(hdev); + if(*rsize >=71 && rdesc[70] == 0x65 && rdesc[64] == 0x65) { + hid_info(hdev, + "fixing up Magic Keyboard JIS report descriptor\n"); + rdesc[64] = rdesc[70] = 0xe7; + } + if ((asc->quirks & APPLE_RDESC_JIS) && *rsize >= 60 && rdesc[53] == 0x65 && rdesc[59] == 0x65) { hid_info(hdev, @@ -408,7 +415,7 @@ static int apple_input_configured(struct hid_device *hdev, if ((asc->quirks & APPLE_HAS_FN) && !asc->fn_found) { hid_info(hdev, "Fn key not found (Apple Wireless Keyboard clone?), disabling Fn key handling\n"); - asc->quirks = 0; + asc->quirks &= ~APPLE_HAS_FN; } return 0; diff --git a/drivers/hid/hid-asus.c b/drivers/hid/hid-asus.c index c183caf89d49215afffb38cc5a3f394ff8fa919a..f85c6e3309a09f55eeaaeeb1564eb9bb27729e89 100644 --- a/drivers/hid/hid-asus.c +++ b/drivers/hid/hid-asus.c @@ -918,8 +918,7 @@ static int asus_probe(struct hid_device *hdev, const struct hid_device_id *id) if (drvdata->quirks & QUIRK_IS_MULTITOUCH) drvdata->tp = &asus_i2c_tp; - if ((drvdata->quirks & QUIRK_T100_KEYBOARD) && - hid_is_using_ll_driver(hdev, &usb_hid_driver)) { + if ((drvdata->quirks & QUIRK_T100_KEYBOARD) && hid_is_usb(hdev)) { struct usb_interface *intf = to_usb_interface(hdev->dev.parent); if (intf->altsetting->desc.bInterfaceNumber == T100_TPAD_INTF) { @@ -947,8 +946,7 @@ static int asus_probe(struct hid_device *hdev, const struct hid_device_id *id) drvdata->tp = &asus_t100chi_tp; } - if ((drvdata->quirks & QUIRK_MEDION_E1239T) && - hid_is_using_ll_driver(hdev, &usb_hid_driver)) { + if ((drvdata->quirks & QUIRK_MEDION_E1239T) && hid_is_usb(hdev)) { struct usb_host_interface *alt = to_usb_interface(hdev->dev.parent)->altsetting; diff --git a/drivers/hid/hid-betopff.c b/drivers/hid/hid-betopff.c index 0790fbd3fc9a26c8a4e7c518f541aac7bec5a10a..467d789f9bc2d3a0d846c661b618d1b640a63188 100644 --- a/drivers/hid/hid-betopff.c +++ b/drivers/hid/hid-betopff.c @@ -56,15 +56,22 @@ static int betopff_init(struct hid_device *hid) { struct betopff_device *betopff; struct hid_report *report; - struct hid_input *hidinput = - list_first_entry(&hid->inputs, struct hid_input, list); + struct hid_input *hidinput; struct list_head *report_list = &hid->report_enum[HID_OUTPUT_REPORT].report_list; - struct input_dev *dev = hidinput->input; + struct input_dev *dev; int field_count = 0; int error; int i, j; + if (list_empty(&hid->inputs)) { + hid_err(hid, "no inputs found\n"); + return -ENODEV; + } + + hidinput = list_first_entry(&hid->inputs, struct hid_input, list); + dev = hidinput->input; + if (list_empty(report_list)) { hid_err(hid, "no output reports found\n"); return -ENODEV; diff --git a/drivers/hid/hid-bigbenff.c b/drivers/hid/hid-bigbenff.c index db6da21ade06315c457d087447cecf563eac8c55..74ad8bf98bfd5acea3d24ecff58300bdab434a26 100644 --- a/drivers/hid/hid-bigbenff.c +++ b/drivers/hid/hid-bigbenff.c @@ -191,7 +191,7 @@ static void bigben_worker(struct work_struct *work) struct bigben_device, worker); struct hid_field *report_field = bigben->report->field[0]; - if (bigben->removed) + if (bigben->removed || !report_field) return; if (bigben->work_led) { diff --git a/drivers/hid/hid-chicony.c b/drivers/hid/hid-chicony.c index 3f0ed6a95223499e14cf7f85608676945af5474b..e19e2b59733968d4b1efddf3f0551dd6d722c3d3 100644 --- a/drivers/hid/hid-chicony.c +++ b/drivers/hid/hid-chicony.c @@ -58,8 +58,12 @@ static int ch_input_mapping(struct hid_device *hdev, struct hid_input *hi, static __u8 *ch_switch12_report_fixup(struct hid_device *hdev, __u8 *rdesc, unsigned int *rsize) { - struct usb_interface *intf = to_usb_interface(hdev->dev.parent); - + struct usb_interface *intf; + + if (!hid_is_usb(hdev)) + return rdesc; + + intf = to_usb_interface(hdev->dev.parent); if (intf->cur_altsetting->desc.bInterfaceNumber == 1) { /* Change usage maximum and logical maximum from 0x7fff to * 0x2fff, so they don't exceed HID_MAX_USAGES */ diff --git a/drivers/hid/hid-corsair.c b/drivers/hid/hid-corsair.c index 902a60e249ed268489b22809d6c1340d638ed954..8c895c820b672772340ec17d34dc60e198eb56a5 100644 --- a/drivers/hid/hid-corsair.c +++ b/drivers/hid/hid-corsair.c @@ -553,7 +553,12 @@ static int corsair_probe(struct hid_device *dev, const struct hid_device_id *id) int ret; unsigned long quirks = id->driver_data; struct corsair_drvdata *drvdata; - struct usb_interface *usbif = to_usb_interface(dev->dev.parent); + struct usb_interface *usbif; + + if (!hid_is_usb(dev)) + return -EINVAL; + + usbif = to_usb_interface(dev->dev.parent); drvdata = devm_kzalloc(&dev->dev, sizeof(struct corsair_drvdata), GFP_KERNEL); diff --git a/drivers/hid/hid-debug.c b/drivers/hid/hid-debug.c index 982737827b871278239c2372984e8a0e29ca0cd0..f4e2e6937758952ed499e6adbaf89ed147221945 100644 --- a/drivers/hid/hid-debug.c +++ b/drivers/hid/hid-debug.c @@ -823,7 +823,9 @@ static const char *keys[KEY_MAX + 1] = { [KEY_F22] = "F22", [KEY_F23] = "F23", [KEY_F24] = "F24", [KEY_PLAYCD] = "PlayCD", [KEY_PAUSECD] = "PauseCD", [KEY_PROG3] = "Prog3", - [KEY_PROG4] = "Prog4", [KEY_SUSPEND] = "Suspend", + [KEY_PROG4] = "Prog4", + [KEY_ALL_APPLICATIONS] = "AllApplications", + [KEY_SUSPEND] = "Suspend", [KEY_CLOSE] = "Close", [KEY_PLAY] = "Play", [KEY_FASTFORWARD] = "FastForward", [KEY_BASSBOOST] = "BassBoost", [KEY_PRINT] = "Print", [KEY_HP] = "HP", @@ -930,6 +932,7 @@ static const char *keys[KEY_MAX + 1] = { [KEY_SCREENSAVER] = "ScreenSaver", [KEY_VOICECOMMAND] = "VoiceCommand", [KEY_EMOJI_PICKER] = "EmojiPicker", + [KEY_DICTATE] = "Dictate", [KEY_BRIGHTNESS_MIN] = "BrightnessMin", [KEY_BRIGHTNESS_MAX] = "BrightnessMax", [KEY_BRIGHTNESS_AUTO] = "BrightnessAuto", diff --git a/drivers/hid/hid-elan.c b/drivers/hid/hid-elan.c index dae193749d443490390255df260fda27edc3e531..0e8f424025fea6eb1f3b3f3c25211f90faea1c47 100644 --- a/drivers/hid/hid-elan.c +++ b/drivers/hid/hid-elan.c @@ -50,7 +50,7 @@ struct elan_drvdata { static int is_not_elan_touchpad(struct hid_device *hdev) { - if (hdev->bus == BUS_USB) { + if (hid_is_usb(hdev)) { struct usb_interface *intf = to_usb_interface(hdev->dev.parent); return (intf->altsetting->desc.bInterfaceNumber != diff --git a/drivers/hid/hid-elo.c b/drivers/hid/hid-elo.c index 0d22713a38742b23465f0dbe216d2107c25edc52..2876cb6a7dcab0b22791f217ee1bd21e366ad756 100644 --- a/drivers/hid/hid-elo.c +++ b/drivers/hid/hid-elo.c @@ -229,6 +229,9 @@ static int elo_probe(struct hid_device *hdev, const struct hid_device_id *id) struct elo_priv *priv; int ret; + if (!hid_is_usb(hdev)) + return -EINVAL; + priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (!priv) return -ENOMEM; diff --git a/drivers/hid/hid-google-hammer.c b/drivers/hid/hid-google-hammer.c index 2a176f77b32e9dca8cc38ec32d68f63a7115916e..047630198396496931239e31046d8bc7612d7de5 100644 --- a/drivers/hid/hid-google-hammer.c +++ b/drivers/hid/hid-google-hammer.c @@ -528,6 +528,8 @@ static void hammer_remove(struct hid_device *hdev) static const struct hid_device_id hammer_devices[] = { { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_DON) }, + { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, + USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_EEL) }, { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_HAMMER) }, { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, diff --git a/drivers/hid/hid-holtek-kbd.c b/drivers/hid/hid-holtek-kbd.c index 0a38e8e9bc783051f8bc634cff597885fdcb6161..403506b9697e75a3c2ac9042299bac8e1e2d342e 100644 --- a/drivers/hid/hid-holtek-kbd.c +++ b/drivers/hid/hid-holtek-kbd.c @@ -140,12 +140,17 @@ static int holtek_kbd_input_event(struct input_dev *dev, unsigned int type, static int holtek_kbd_probe(struct hid_device *hdev, const struct hid_device_id *id) { - struct usb_interface *intf = to_usb_interface(hdev->dev.parent); - int ret = hid_parse(hdev); + struct usb_interface *intf; + int ret; + + if (!hid_is_usb(hdev)) + return -EINVAL; + ret = hid_parse(hdev); if (!ret) ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT); + intf = to_usb_interface(hdev->dev.parent); if (!ret && intf->cur_altsetting->desc.bInterfaceNumber == 1) { struct hid_input *hidinput; list_for_each_entry(hidinput, &hdev->inputs, list) { diff --git a/drivers/hid/hid-holtek-mouse.c b/drivers/hid/hid-holtek-mouse.c index 195b735b001d03d56f366b63b0ceb14b2d5d7552..7c907939bfae1d9a5398f574a284b66190fcbfd5 100644 --- a/drivers/hid/hid-holtek-mouse.c +++ b/drivers/hid/hid-holtek-mouse.c @@ -62,6 +62,29 @@ static __u8 *holtek_mouse_report_fixup(struct hid_device *hdev, __u8 *rdesc, return rdesc; } +static int holtek_mouse_probe(struct hid_device *hdev, + const struct hid_device_id *id) +{ + int ret; + + if (!hid_is_usb(hdev)) + return -EINVAL; + + ret = hid_parse(hdev); + if (ret) { + hid_err(hdev, "hid parse failed: %d\n", ret); + return ret; + } + + ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT); + if (ret) { + hid_err(hdev, "hw start failed: %d\n", ret); + return ret; + } + + return 0; +} + static const struct hid_device_id holtek_mouse_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_MOUSE_A067) }, @@ -83,6 +106,7 @@ static struct hid_driver holtek_mouse_driver = { .name = "holtek_mouse", .id_table = holtek_mouse_devices, .report_fixup = holtek_mouse_report_fixup, + .probe = holtek_mouse_probe, }; module_hid_driver(holtek_mouse_driver); diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 3bed3731bfcea72cfd3a36acc8afc90cd6979acb..476cf05dc0979a6733e8cfc81e89d59eaef4bdf8 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -488,6 +488,7 @@ #define USB_DEVICE_ID_GOOGLE_MAGNEMITE 0x503d #define USB_DEVICE_ID_GOOGLE_MOONBALL 0x5044 #define USB_DEVICE_ID_GOOGLE_DON 0x5050 +#define USB_DEVICE_ID_GOOGLE_EEL 0x5057 #define USB_VENDOR_ID_GOTOP 0x08f2 #define USB_DEVICE_ID_SUPER_Q2 0x007f @@ -865,6 +866,7 @@ #define USB_DEVICE_ID_MS_TOUCH_COVER_2 0x07a7 #define USB_DEVICE_ID_MS_TYPE_COVER_2 0x07a9 #define USB_DEVICE_ID_MS_POWER_COVER 0x07da +#define USB_DEVICE_ID_MS_SURFACE3_COVER 0x07de #define USB_DEVICE_ID_MS_XBOX_ONE_S_CONTROLLER 0x02fd #define USB_DEVICE_ID_MS_PIXART_MOUSE 0x00cb #define USB_DEVICE_ID_8BITDO_SN30_PRO_PLUS 0x02e0 @@ -1320,6 +1322,7 @@ #define USB_VENDOR_ID_UGTIZER 0x2179 #define USB_DEVICE_ID_UGTIZER_TABLET_GP0610 0x0053 #define USB_DEVICE_ID_UGTIZER_TABLET_GT5040 0x0077 +#define USB_DEVICE_ID_UGTIZER_TABLET_WP5540 0x0004 #define USB_VENDOR_ID_VIEWSONIC 0x0543 #define USB_DEVICE_ID_VIEWSONIC_PD1011 0xe621 diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index 580d378342c41d7d99dcf60587ace66ba0807258..a17d1dda95703718087f59aa1ed349f9a1af6c4d 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -956,6 +956,7 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel case 0x0cd: map_key_clear(KEY_PLAYPAUSE); break; case 0x0cf: map_key_clear(KEY_VOICECOMMAND); break; + case 0x0d8: map_key_clear(KEY_DICTATE); break; case 0x0d9: map_key_clear(KEY_EMOJI_PICKER); break; case 0x0e0: map_abs_clear(ABS_VOLUME); break; @@ -1047,6 +1048,8 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel case 0x29d: map_key_clear(KEY_KBD_LAYOUT_NEXT); break; + case 0x2a2: map_key_clear(KEY_ALL_APPLICATIONS); break; + case 0x2c7: map_key_clear(KEY_KBDINPUTASSIST_PREV); break; case 0x2c8: map_key_clear(KEY_KBDINPUTASSIST_NEXT); break; case 0x2c9: map_key_clear(KEY_KBDINPUTASSIST_PREVGROUP); break; @@ -1288,6 +1291,12 @@ void hidinput_hid_event(struct hid_device *hid, struct hid_field *field, struct input = field->hidinput->input; + if (usage->type == EV_ABS && + (((*quirks & HID_QUIRK_X_INVERT) && usage->code == ABS_X) || + ((*quirks & HID_QUIRK_Y_INVERT) && usage->code == ABS_Y))) { + value = field->logical_maximum - value; + } + if (usage->hat_min < usage->hat_max || usage->hat_dir) { int hat_dir = usage->hat_dir; if (!hat_dir) diff --git a/drivers/hid/hid-lg.c b/drivers/hid/hid-lg.c index 0dc7cdfc56f77470b8a8c853ebd0077b4955853c..2c7e7c089bf990f77be33ea14df06f3a242a71e2 100644 --- a/drivers/hid/hid-lg.c +++ b/drivers/hid/hid-lg.c @@ -769,12 +769,18 @@ static int lg_raw_event(struct hid_device *hdev, struct hid_report *report, static int lg_probe(struct hid_device *hdev, const struct hid_device_id *id) { - struct usb_interface *iface = to_usb_interface(hdev->dev.parent); - __u8 iface_num = iface->cur_altsetting->desc.bInterfaceNumber; + struct usb_interface *iface; + __u8 iface_num; unsigned int connect_mask = HID_CONNECT_DEFAULT; struct lg_drv_data *drv_data; int ret; + if (!hid_is_usb(hdev)) + return -EINVAL; + + iface = to_usb_interface(hdev->dev.parent); + iface_num = iface->cur_altsetting->desc.bInterfaceNumber; + /* G29 only work with the 1st interface */ if ((hdev->product == USB_DEVICE_ID_LOGITECH_G29_WHEEL) && (iface_num != 0)) { diff --git a/drivers/hid/hid-logitech-dj.c b/drivers/hid/hid-logitech-dj.c index 271bd8d2433958c8470bdb4e0308e05eb21da32c..a311b0a33eba7ff45501c396281daba4919a79b8 100644 --- a/drivers/hid/hid-logitech-dj.c +++ b/drivers/hid/hid-logitech-dj.c @@ -1693,7 +1693,7 @@ static int logi_dj_probe(struct hid_device *hdev, case recvr_type_27mhz: no_dj_interfaces = 2; break; case recvr_type_bluetooth: no_dj_interfaces = 2; break; } - if (hid_is_using_ll_driver(hdev, &usb_hid_driver)) { + if (hid_is_usb(hdev)) { intf = to_usb_interface(hdev->dev.parent); if (intf && intf->altsetting->desc.bInterfaceNumber >= no_dj_interfaces) { diff --git a/drivers/hid/hid-prodikeys.c b/drivers/hid/hid-prodikeys.c index 2666af02d5c1a11f1a32f098eecb61ddc458cf0a..e4e9471d0f1e92d9b17a3c0f90d1b7dc0ceeb7de 100644 --- a/drivers/hid/hid-prodikeys.c +++ b/drivers/hid/hid-prodikeys.c @@ -798,12 +798,18 @@ static int pk_raw_event(struct hid_device *hdev, struct hid_report *report, static int pk_probe(struct hid_device *hdev, const struct hid_device_id *id) { int ret; - struct usb_interface *intf = to_usb_interface(hdev->dev.parent); - unsigned short ifnum = intf->cur_altsetting->desc.bInterfaceNumber; + struct usb_interface *intf; + unsigned short ifnum; unsigned long quirks = id->driver_data; struct pk_device *pk; struct pcmidi_snd *pm = NULL; + if (!hid_is_usb(hdev)) + return -EINVAL; + + intf = to_usb_interface(hdev->dev.parent); + ifnum = intf->cur_altsetting->desc.bInterfaceNumber; + pk = kzalloc(sizeof(*pk), GFP_KERNEL); if (pk == NULL) { hid_err(hdev, "can't alloc descriptor\n"); diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c index be53c723c729d80aba003354f2fadc6d336ae9c7..2ab71d717bb03935688b2282bb1de4e28343f7f8 100644 --- a/drivers/hid/hid-quirks.c +++ b/drivers/hid/hid-quirks.c @@ -124,6 +124,7 @@ static const struct hid_device_id hid_quirks[] = { { HID_USB_DEVICE(USB_VENDOR_ID_MCS, USB_DEVICE_ID_MCS_GAMEPADBLOCK), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_PIXART_MOUSE), HID_QUIRK_ALWAYS_POLL }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_POWER_COVER), HID_QUIRK_NO_INIT_REPORTS }, + { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_SURFACE3_COVER), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_SURFACE_PRO_2), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_TOUCH_COVER_2), HID_QUIRK_NO_INIT_REPORTS }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_TYPE_COVER_2), HID_QUIRK_NO_INIT_REPORTS }, @@ -186,6 +187,7 @@ static const struct hid_device_id hid_quirks[] = { { HID_USB_DEVICE(USB_VENDOR_ID_TURBOX, USB_DEVICE_ID_TURBOX_KEYBOARD), HID_QUIRK_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_KNA5), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_TWA60), HID_QUIRK_MULTI_INPUT }, + { HID_USB_DEVICE(USB_VENDOR_ID_UGTIZER, USB_DEVICE_ID_UGTIZER_TABLET_WP5540), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_MEDIA_TABLET_10_6_INCH), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_MEDIA_TABLET_14_1_INCH), HID_QUIRK_MULTI_INPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_SIRIUS_BATTERY_FREE_TABLET), HID_QUIRK_MULTI_INPUT }, diff --git a/drivers/hid/hid-roccat-arvo.c b/drivers/hid/hid-roccat-arvo.c index ffcd444ae2ba6e01d2a6aebebb7dd876b47ae6a9..4b18e1a4fc7acf2c7d80ac66d8b7bffe8137984f 100644 --- a/drivers/hid/hid-roccat-arvo.c +++ b/drivers/hid/hid-roccat-arvo.c @@ -344,6 +344,9 @@ static int arvo_probe(struct hid_device *hdev, { int retval; + if (!hid_is_usb(hdev)) + return -EINVAL; + retval = hid_parse(hdev); if (retval) { hid_err(hdev, "parse failed\n"); diff --git a/drivers/hid/hid-roccat-isku.c b/drivers/hid/hid-roccat-isku.c index ce5f22519956acf91527d390d99f3ff35a424420..e95d59cd8d075dc82716fb47a4193286bd0c2b09 100644 --- a/drivers/hid/hid-roccat-isku.c +++ b/drivers/hid/hid-roccat-isku.c @@ -324,6 +324,9 @@ static int isku_probe(struct hid_device *hdev, { int retval; + if (!hid_is_usb(hdev)) + return -EINVAL; + retval = hid_parse(hdev); if (retval) { hid_err(hdev, "parse failed\n"); diff --git a/drivers/hid/hid-roccat-kone.c b/drivers/hid/hid-roccat-kone.c index 1ca64481145ee4dd5446d41d66b6bb90d249b82e..e8522eacf797342690abf1dc64169928842b03da 100644 --- a/drivers/hid/hid-roccat-kone.c +++ b/drivers/hid/hid-roccat-kone.c @@ -749,6 +749,9 @@ static int kone_probe(struct hid_device *hdev, const struct hid_device_id *id) { int retval; + if (!hid_is_usb(hdev)) + return -EINVAL; + retval = hid_parse(hdev); if (retval) { hid_err(hdev, "parse failed\n"); diff --git a/drivers/hid/hid-roccat-koneplus.c b/drivers/hid/hid-roccat-koneplus.c index 0316edf8c5bb44e4d72e5a9a6ab858b53d0eefbb..1896c69ea512f787b264ed27f82f13d813fd82ea 100644 --- a/drivers/hid/hid-roccat-koneplus.c +++ b/drivers/hid/hid-roccat-koneplus.c @@ -431,6 +431,9 @@ static int koneplus_probe(struct hid_device *hdev, { int retval; + if (!hid_is_usb(hdev)) + return -EINVAL; + retval = hid_parse(hdev); if (retval) { hid_err(hdev, "parse failed\n"); diff --git a/drivers/hid/hid-roccat-konepure.c b/drivers/hid/hid-roccat-konepure.c index 5248b3c7cf7859abbd62dab6fed53e88d1b37773..cf8eeb33a12571bf47c2c57118ff7935909cd865 100644 --- a/drivers/hid/hid-roccat-konepure.c +++ b/drivers/hid/hid-roccat-konepure.c @@ -133,6 +133,9 @@ static int konepure_probe(struct hid_device *hdev, { int retval; + if (!hid_is_usb(hdev)) + return -EINVAL; + retval = hid_parse(hdev); if (retval) { hid_err(hdev, "parse failed\n"); diff --git a/drivers/hid/hid-roccat-kovaplus.c b/drivers/hid/hid-roccat-kovaplus.c index 960012881570569df2d31c368b33c06f4f67ba4d..6fb9b9563769dfbcc1cfc521c1f113feb442d411 100644 --- a/drivers/hid/hid-roccat-kovaplus.c +++ b/drivers/hid/hid-roccat-kovaplus.c @@ -501,6 +501,9 @@ static int kovaplus_probe(struct hid_device *hdev, { int retval; + if (!hid_is_usb(hdev)) + return -EINVAL; + retval = hid_parse(hdev); if (retval) { hid_err(hdev, "parse failed\n"); diff --git a/drivers/hid/hid-roccat-lua.c b/drivers/hid/hid-roccat-lua.c index 4a88a76d5c62293b890c56093d469cf6687a7baf..d5ddf0d68346b2c147f3c888badb77c06d77f46c 100644 --- a/drivers/hid/hid-roccat-lua.c +++ b/drivers/hid/hid-roccat-lua.c @@ -160,6 +160,9 @@ static int lua_probe(struct hid_device *hdev, { int retval; + if (!hid_is_usb(hdev)) + return -EINVAL; + retval = hid_parse(hdev); if (retval) { hid_err(hdev, "parse failed\n"); diff --git a/drivers/hid/hid-roccat-pyra.c b/drivers/hid/hid-roccat-pyra.c index 989927defe8db84d6d1916424a9c1d50fc5c3911..4fcc8e7d276f228cb2c29f99df8e6d5db9eef119 100644 --- a/drivers/hid/hid-roccat-pyra.c +++ b/drivers/hid/hid-roccat-pyra.c @@ -449,6 +449,9 @@ static int pyra_probe(struct hid_device *hdev, const struct hid_device_id *id) { int retval; + if (!hid_is_usb(hdev)) + return -EINVAL; + retval = hid_parse(hdev); if (retval) { hid_err(hdev, "parse failed\n"); diff --git a/drivers/hid/hid-roccat-ryos.c b/drivers/hid/hid-roccat-ryos.c index 3956a6c9c5217efdb88e25c918f3598a9e5e61fb..5bf1971a2b14d83c6388d3d1e9541617ff415fa6 100644 --- a/drivers/hid/hid-roccat-ryos.c +++ b/drivers/hid/hid-roccat-ryos.c @@ -141,6 +141,9 @@ static int ryos_probe(struct hid_device *hdev, { int retval; + if (!hid_is_usb(hdev)) + return -EINVAL; + retval = hid_parse(hdev); if (retval) { hid_err(hdev, "parse failed\n"); diff --git a/drivers/hid/hid-roccat-savu.c b/drivers/hid/hid-roccat-savu.c index 818701f7a028178b2c565e1506a28360c1deee08..a784bb4ee6512d8b39a0ede9a653b29cf901bf5d 100644 --- a/drivers/hid/hid-roccat-savu.c +++ b/drivers/hid/hid-roccat-savu.c @@ -113,6 +113,9 @@ static int savu_probe(struct hid_device *hdev, { int retval; + if (!hid_is_usb(hdev)) + return -EINVAL; + retval = hid_parse(hdev); if (retval) { hid_err(hdev, "parse failed\n"); diff --git a/drivers/hid/hid-samsung.c b/drivers/hid/hid-samsung.c index 2e1c31156eca046417d65d76d0e8aa847719ecb3..cf5992e97094003628a4aaacbded99a9f3603a10 100644 --- a/drivers/hid/hid-samsung.c +++ b/drivers/hid/hid-samsung.c @@ -152,6 +152,9 @@ static int samsung_probe(struct hid_device *hdev, int ret; unsigned int cmask = HID_CONNECT_DEFAULT; + if (!hid_is_usb(hdev)) + return -EINVAL; + ret = hid_parse(hdev); if (ret) { hid_err(hdev, "parse failed\n"); diff --git a/drivers/hid/hid-u2fzero.c b/drivers/hid/hid-u2fzero.c index 95e0807878c7ea9137e4b29159bca278c761152b..ac3fd870673d28f6df13f53d9aacc1b315e3f786 100644 --- a/drivers/hid/hid-u2fzero.c +++ b/drivers/hid/hid-u2fzero.c @@ -132,7 +132,7 @@ static int u2fzero_recv(struct u2fzero_device *dev, ret = (wait_for_completion_timeout( &ctx.done, msecs_to_jiffies(USB_CTRL_SET_TIMEOUT))); - if (ret < 0) { + if (ret == 0) { usb_kill_urb(dev->urb); hid_err(hdev, "urb submission timed out"); } else { @@ -191,6 +191,8 @@ static int u2fzero_rng_read(struct hwrng *rng, void *data, struct u2f_hid_msg resp; int ret; size_t actual_length; + /* valid packets must have a correct header */ + int min_length = offsetof(struct u2f_hid_msg, init.data); if (!dev->present) { hid_dbg(dev->hdev, "device not present"); @@ -198,12 +200,14 @@ static int u2fzero_rng_read(struct hwrng *rng, void *data, } ret = u2fzero_recv(dev, &req, &resp); - if (ret < 0) + + /* ignore errors or packets without data */ + if (ret < min_length) return 0; /* only take the minimum amount of data it is safe to take */ - actual_length = min3((size_t)ret - offsetof(struct u2f_hid_msg, - init.data), U2F_HID_MSG_LEN(resp), max); + actual_length = min3((size_t)ret - min_length, + U2F_HID_MSG_LEN(resp), max); memcpy(data, resp.init.data, actual_length); @@ -286,7 +290,7 @@ static int u2fzero_probe(struct hid_device *hdev, unsigned int minor; int ret; - if (!hid_is_using_ll_driver(hdev, &usb_hid_driver)) + if (!hid_is_usb(hdev)) return -EINVAL; dev = devm_kzalloc(&hdev->dev, sizeof(*dev), GFP_KERNEL); diff --git a/drivers/hid/hid-uclogic-core.c b/drivers/hid/hid-uclogic-core.c index 8e9c9e646cb7dd45d2e84c299e8c34b0936898fd..4edb241957040209672137bc513efa3abf13d970 100644 --- a/drivers/hid/hid-uclogic-core.c +++ b/drivers/hid/hid-uclogic-core.c @@ -164,6 +164,9 @@ static int uclogic_probe(struct hid_device *hdev, struct uclogic_drvdata *drvdata = NULL; bool params_initialized = false; + if (!hid_is_usb(hdev)) + return -EINVAL; + /* * libinput requires the pad interface to be on a different node * than the pen, so use QUIRK_MULTI_INPUT for all tablets. diff --git a/drivers/hid/hid-uclogic-params.c b/drivers/hid/hid-uclogic-params.c index d26d8cd98efcfddc869155a07779828c54eee6bc..38f9bbad81c1774508f6291e16b3159fe5e3844e 100644 --- a/drivers/hid/hid-uclogic-params.c +++ b/drivers/hid/hid-uclogic-params.c @@ -65,7 +65,7 @@ static int uclogic_params_get_str_desc(__u8 **pbuf, struct hid_device *hdev, __u8 idx, size_t len) { int rc; - struct usb_device *udev = hid_to_usb_dev(hdev); + struct usb_device *udev; __u8 *buf = NULL; /* Check arguments */ @@ -74,6 +74,8 @@ static int uclogic_params_get_str_desc(__u8 **pbuf, struct hid_device *hdev, goto cleanup; } + udev = hid_to_usb_dev(hdev); + buf = kmalloc(len, GFP_KERNEL); if (buf == NULL) { rc = -ENOMEM; @@ -449,7 +451,7 @@ static int uclogic_params_frame_init_v1_buttonpad( { int rc; bool found = false; - struct usb_device *usb_dev = hid_to_usb_dev(hdev); + struct usb_device *usb_dev; char *str_buf = NULL; const size_t str_len = 16; @@ -459,6 +461,8 @@ static int uclogic_params_frame_init_v1_buttonpad( goto cleanup; } + usb_dev = hid_to_usb_dev(hdev); + /* * Enable generic button mode */ @@ -705,9 +709,9 @@ static int uclogic_params_huion_init(struct uclogic_params *params, struct hid_device *hdev) { int rc; - struct usb_device *udev = hid_to_usb_dev(hdev); - struct usb_interface *iface = to_usb_interface(hdev->dev.parent); - __u8 bInterfaceNumber = iface->cur_altsetting->desc.bInterfaceNumber; + struct usb_device *udev; + struct usb_interface *iface; + __u8 bInterfaceNumber; bool found; /* The resulting parameters (noop) */ struct uclogic_params p = {0, }; @@ -721,6 +725,10 @@ static int uclogic_params_huion_init(struct uclogic_params *params, goto cleanup; } + udev = hid_to_usb_dev(hdev); + iface = to_usb_interface(hdev->dev.parent); + bInterfaceNumber = iface->cur_altsetting->desc.bInterfaceNumber; + /* If it's not a pen interface */ if (bInterfaceNumber != 0) { /* TODO: Consider marking the interface invalid */ @@ -832,21 +840,25 @@ int uclogic_params_init(struct uclogic_params *params, struct hid_device *hdev) { int rc; - struct usb_device *udev = hid_to_usb_dev(hdev); - __u8 bNumInterfaces = udev->config->desc.bNumInterfaces; - struct usb_interface *iface = to_usb_interface(hdev->dev.parent); - __u8 bInterfaceNumber = iface->cur_altsetting->desc.bInterfaceNumber; + struct usb_device *udev; + __u8 bNumInterfaces; + struct usb_interface *iface; + __u8 bInterfaceNumber; bool found; /* The resulting parameters (noop) */ struct uclogic_params p = {0, }; /* Check arguments */ - if (params == NULL || hdev == NULL || - !hid_is_using_ll_driver(hdev, &usb_hid_driver)) { + if (params == NULL || hdev == NULL || !hid_is_usb(hdev)) { rc = -EINVAL; goto cleanup; } + udev = hid_to_usb_dev(hdev); + bNumInterfaces = udev->config->desc.bNumInterfaces; + iface = to_usb_interface(hdev->dev.parent); + bInterfaceNumber = iface->cur_altsetting->desc.bInterfaceNumber; + /* * Set replacement report descriptor if the original matches the * specified size. Otherwise keep interface unchanged. diff --git a/drivers/hid/hid-vivaldi.c b/drivers/hid/hid-vivaldi.c index cd7ada48b1d9fffb2bca0f026bed8842e497f9bf..d57ec17670379cf75b28dfd8bab713332fa30b4a 100644 --- a/drivers/hid/hid-vivaldi.c +++ b/drivers/hid/hid-vivaldi.c @@ -57,6 +57,9 @@ static int vivaldi_probe(struct hid_device *hdev, int ret; drvdata = devm_kzalloc(&hdev->dev, sizeof(*drvdata), GFP_KERNEL); + if (!drvdata) + return -ENOMEM; + hid_set_drvdata(hdev, drvdata); ret = hid_parse(hdev); @@ -71,10 +74,11 @@ static void vivaldi_feature_mapping(struct hid_device *hdev, struct hid_usage *usage) { struct vivaldi_data *drvdata = hid_get_drvdata(hdev); + struct hid_report *report = field->report; int fn_key; int ret; u32 report_len; - u8 *buf; + u8 *report_data, *buf; if (field->logical != HID_USAGE_FN_ROW_PHYSMAP || (usage->hid & HID_USAGE_PAGE) != HID_UP_ORDINAL) @@ -86,12 +90,24 @@ static void vivaldi_feature_mapping(struct hid_device *hdev, if (fn_key > drvdata->max_function_row_key) drvdata->max_function_row_key = fn_key; - buf = hid_alloc_report_buf(field->report, GFP_KERNEL); - if (!buf) + report_data = buf = hid_alloc_report_buf(report, GFP_KERNEL); + if (!report_data) return; - report_len = hid_report_len(field->report); - ret = hid_hw_raw_request(hdev, field->report->id, buf, + report_len = hid_report_len(report); + if (!report->id) { + /* + * hid_hw_raw_request() will stuff report ID (which will be 0) + * into the first byte of the buffer even for unnumbered + * reports, so we need to account for this to avoid getting + * -EOVERFLOW in return. + * Note that hid_alloc_report_buf() adds 7 bytes to the size + * so we can safely say that we have space for an extra byte. + */ + report_len++; + } + + ret = hid_hw_raw_request(hdev, report->id, report_data, report_len, HID_FEATURE_REPORT, HID_REQ_GET_REPORT); if (ret < 0) { @@ -100,7 +116,16 @@ static void vivaldi_feature_mapping(struct hid_device *hdev, goto out; } - ret = hid_report_raw_event(hdev, HID_FEATURE_REPORT, buf, + if (!report->id) { + /* + * Undo the damage from hid_hw_raw_request() for unnumbered + * reports. + */ + report_data++; + report_len--; + } + + ret = hid_report_raw_event(hdev, HID_FEATURE_REPORT, report_data, report_len, 0); if (ret) { dev_warn(&hdev->dev, "failed to report feature %d\n", @@ -118,7 +143,7 @@ out: static int vivaldi_input_configured(struct hid_device *hdev, struct hid_input *hidinput) { - return sysfs_create_group(&hdev->dev.kobj, &input_attribute_group); + return devm_device_add_group(&hdev->dev, &input_attribute_group); } static const struct hid_device_id vivaldi_table[] = { diff --git a/drivers/hid/uhid.c b/drivers/hid/uhid.c index 8fe3efcb832715c4a978625387dc0bb0ea5b00a2..fc06d8bb42e0fe94e75167fe360c600283f80a4d 100644 --- a/drivers/hid/uhid.c +++ b/drivers/hid/uhid.c @@ -28,11 +28,22 @@ struct uhid_device { struct mutex devlock; + + /* This flag tracks whether the HID device is usable for commands from + * userspace. The flag is already set before hid_add_device(), which + * runs in workqueue context, to allow hid_add_device() to communicate + * with userspace. + * However, if hid_add_device() fails, the flag is cleared without + * holding devlock. + * We guarantee that if @running changes from true to false while you're + * holding @devlock, it's still fine to access @hid. + */ bool running; __u8 *rd_data; uint rd_size; + /* When this is NULL, userspace may use UHID_CREATE/UHID_CREATE2. */ struct hid_device *hid; struct uhid_event input_buf; @@ -63,9 +74,18 @@ static void uhid_device_add_worker(struct work_struct *work) if (ret) { hid_err(uhid->hid, "Cannot register HID device: error %d\n", ret); - hid_destroy_device(uhid->hid); - uhid->hid = NULL; + /* We used to call hid_destroy_device() here, but that's really + * messy to get right because we have to coordinate with + * concurrent writes from userspace that might be in the middle + * of using uhid->hid. + * Just leave uhid->hid as-is for now, and clean it up when + * userspace tries to close or reinitialize the uhid instance. + * + * However, we do have to clear the ->running flag and do a + * wakeup to make sure userspace knows that the device is gone. + */ uhid->running = false; + wake_up_interruptible(&uhid->report_wait); } } @@ -474,7 +494,7 @@ static int uhid_dev_create2(struct uhid_device *uhid, void *rd_data; int ret; - if (uhid->running) + if (uhid->hid) return -EALREADY; rd_size = ev->u.create2.rd_size; @@ -556,7 +576,7 @@ static int uhid_dev_create(struct uhid_device *uhid, static int uhid_dev_destroy(struct uhid_device *uhid) { - if (!uhid->running) + if (!uhid->hid) return -EINVAL; uhid->running = false; @@ -565,6 +585,7 @@ static int uhid_dev_destroy(struct uhid_device *uhid) cancel_work_sync(&uhid->worker); hid_destroy_device(uhid->hid); + uhid->hid = NULL; kfree(uhid->rd_data); return 0; diff --git a/drivers/hid/usbhid/hid-core.c b/drivers/hid/usbhid/hid-core.c index 8d4ac4b9fb9dade3edcadcd5646366ee2003ce47..009a0469d54f6aa4ebe11b5f6f7af8146eeb620f 100644 --- a/drivers/hid/usbhid/hid-core.c +++ b/drivers/hid/usbhid/hid-core.c @@ -503,7 +503,7 @@ static void hid_ctrl(struct urb *urb) if (unplug) { usbhid->ctrltail = usbhid->ctrlhead; - } else { + } else if (usbhid->ctrlhead != usbhid->ctrltail) { usbhid->ctrltail = (usbhid->ctrltail + 1) & (HID_CONTROL_FIFO_SIZE - 1); if (usbhid->ctrlhead != usbhid->ctrltail && @@ -1221,9 +1221,20 @@ static void usbhid_stop(struct hid_device *hid) mutex_lock(&usbhid->mutex); clear_bit(HID_STARTED, &usbhid->iofl); + spin_lock_irq(&usbhid->lock); /* Sync with error and led handlers */ set_bit(HID_DISCONNECTED, &usbhid->iofl); + while (usbhid->ctrltail != usbhid->ctrlhead) { + if (usbhid->ctrl[usbhid->ctrltail].dir == USB_DIR_OUT) { + kfree(usbhid->ctrl[usbhid->ctrltail].raw_report); + usbhid->ctrl[usbhid->ctrltail].raw_report = NULL; + } + + usbhid->ctrltail = (usbhid->ctrltail + 1) & + (HID_CONTROL_FIFO_SIZE - 1); + } spin_unlock_irq(&usbhid->lock); + usb_kill_urb(usbhid->urbin); usb_kill_urb(usbhid->urbout); usb_kill_urb(usbhid->urbctrl); diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c index 73dafa60080f1fd8c4b5008ca8f21f692c90999f..329bb1a46f90e8ba9ebc3997ed16b812f7b90fd5 100644 --- a/drivers/hid/wacom_sys.c +++ b/drivers/hid/wacom_sys.c @@ -726,7 +726,7 @@ static void wacom_retrieve_hid_descriptor(struct hid_device *hdev, * Skip the query for this type and modify defaults based on * interface number. */ - if (features->type == WIRELESS) { + if (features->type == WIRELESS && intf) { if (intf->cur_altsetting->desc.bInterfaceNumber == 0) features->device_type = WACOM_DEVICETYPE_WL_MONITOR; else @@ -2217,7 +2217,7 @@ static void wacom_update_name(struct wacom *wacom, const char *suffix) if ((features->type == HID_GENERIC) && !strcmp("Wacom HID", features->name)) { char *product_name = wacom->hdev->name; - if (hid_is_using_ll_driver(wacom->hdev, &usb_hid_driver)) { + if (hid_is_usb(wacom->hdev)) { struct usb_interface *intf = to_usb_interface(wacom->hdev->dev.parent); struct usb_device *dev = interface_to_usbdev(intf); product_name = dev->product; @@ -2448,6 +2448,9 @@ static void wacom_wireless_work(struct work_struct *work) wacom_destroy_battery(wacom); + if (!usbdev) + return; + /* Stylus interface */ hdev1 = usb_get_intfdata(usbdev->config->interface[1]); wacom1 = hid_get_drvdata(hdev1); @@ -2727,8 +2730,6 @@ static void wacom_mode_change_work(struct work_struct *work) static int wacom_probe(struct hid_device *hdev, const struct hid_device_id *id) { - struct usb_interface *intf = to_usb_interface(hdev->dev.parent); - struct usb_device *dev = interface_to_usbdev(intf); struct wacom *wacom; struct wacom_wac *wacom_wac; struct wacom_features *features; @@ -2763,8 +2764,14 @@ static int wacom_probe(struct hid_device *hdev, wacom_wac->hid_data.inputmode = -1; wacom_wac->mode_report = -1; - wacom->usbdev = dev; - wacom->intf = intf; + if (hid_is_usb(hdev)) { + struct usb_interface *intf = to_usb_interface(hdev->dev.parent); + struct usb_device *dev = interface_to_usbdev(intf); + + wacom->usbdev = dev; + wacom->intf = intf; + } + mutex_init(&wacom->lock); INIT_DELAYED_WORK(&wacom->init_work, wacom_init_work); INIT_WORK(&wacom->wireless_work, wacom_wireless_work); diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index 4228ddc3df0e65ed23b00aca506a05a0d110b044..d90bfa8b7313e4b05a6d697d7a14bc81ac952c45 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -2566,6 +2566,24 @@ static void wacom_wac_finger_slot(struct wacom_wac *wacom_wac, } } +static bool wacom_wac_slot_is_active(struct input_dev *dev, int key) +{ + struct input_mt *mt = dev->mt; + struct input_mt_slot *s; + + if (!mt) + return false; + + for (s = mt->slots; s != mt->slots + mt->num_slots; s++) { + if (s->key == key && + input_mt_get_value(s, ABS_MT_TRACKING_ID) >= 0) { + return true; + } + } + + return false; +} + static void wacom_wac_finger_event(struct hid_device *hdev, struct hid_field *field, struct hid_usage *usage, __s32 value) { @@ -2578,6 +2596,9 @@ static void wacom_wac_finger_event(struct hid_device *hdev, return; switch (equivalent_usage) { + case HID_DG_CONFIDENCE: + wacom_wac->hid_data.confidence = value; + break; case HID_GD_X: wacom_wac->hid_data.x = value; break; @@ -2610,8 +2631,14 @@ static void wacom_wac_finger_event(struct hid_device *hdev, } if (usage->usage_index + 1 == field->report_count) { - if (equivalent_usage == wacom_wac->hid_data.last_slot_field) - wacom_wac_finger_slot(wacom_wac, wacom_wac->touch_input); + if (equivalent_usage == wacom_wac->hid_data.last_slot_field) { + bool touch_removed = wacom_wac_slot_is_active(wacom_wac->touch_input, + wacom_wac->hid_data.id) && !wacom_wac->hid_data.tipswitch; + + if (wacom_wac->hid_data.confidence || touch_removed) { + wacom_wac_finger_slot(wacom_wac, wacom_wac->touch_input); + } + } } } @@ -2625,6 +2652,12 @@ static void wacom_wac_finger_pre_report(struct hid_device *hdev, wacom_wac->is_invalid_bt_frame = false; + hid_data->confidence = true; + + hid_data->cc_report = 0; + hid_data->cc_index = -1; + hid_data->cc_value_index = -1; + for (i = 0; i < report->maxfield; i++) { struct hid_field *field = report->field[i]; int j; @@ -2658,11 +2691,14 @@ static void wacom_wac_finger_pre_report(struct hid_device *hdev, hid_data->cc_index >= 0) { struct hid_field *field = report->field[hid_data->cc_index]; int value = field->value[hid_data->cc_value_index]; - if (value) + if (value) { hid_data->num_expected = value; + hid_data->num_received = 0; + } } else { hid_data->num_expected = wacom_wac->features.touch_max; + hid_data->num_received = 0; } } @@ -2686,6 +2722,7 @@ static void wacom_wac_finger_report(struct hid_device *hdev, input_sync(input); wacom_wac->hid_data.num_received = 0; + wacom_wac->hid_data.num_expected = 0; /* keep touch state for pen event */ wacom_wac->shared->touch_down = wacom_wac_finger_count_touches(wacom_wac); @@ -4715,6 +4752,12 @@ static const struct wacom_features wacom_features_0x393 = { "Wacom Intuos Pro S", 31920, 19950, 8191, 63, INTUOSP2S_BT, WACOM_INTUOS3_RES, WACOM_INTUOS3_RES, 7, .touch_max = 10 }; +static const struct wacom_features wacom_features_0x3c6 = + { "Wacom Intuos BT S", 15200, 9500, 4095, 63, + INTUOSHT3_BT, WACOM_INTUOS_RES, WACOM_INTUOS_RES, 4 }; +static const struct wacom_features wacom_features_0x3c8 = + { "Wacom Intuos BT M", 21600, 13500, 4095, 63, + INTUOSHT3_BT, WACOM_INTUOS_RES, WACOM_INTUOS_RES, 4 }; static const struct wacom_features wacom_features_HID_ANY_ID = { "Wacom HID", .type = HID_GENERIC, .oVid = HID_ANY_ID, .oPid = HID_ANY_ID }; @@ -4888,6 +4931,8 @@ const struct hid_device_id wacom_ids[] = { { USB_DEVICE_WACOM(0x37A) }, { USB_DEVICE_WACOM(0x37B) }, { BT_DEVICE_WACOM(0x393) }, + { BT_DEVICE_WACOM(0x3c6) }, + { BT_DEVICE_WACOM(0x3c8) }, { USB_DEVICE_WACOM(0x4001) }, { USB_DEVICE_WACOM(0x4004) }, { USB_DEVICE_WACOM(0x5000) }, diff --git a/drivers/hid/wacom_wac.h b/drivers/hid/wacom_wac.h index e3835407e8d2399686121ce5f02afb708016c09b..8dea7cb298e693e5653fa9cb4d72ff8b67bd575c 100644 --- a/drivers/hid/wacom_wac.h +++ b/drivers/hid/wacom_wac.h @@ -300,6 +300,7 @@ struct hid_data { bool tipswitch; bool barrelswitch; bool barrelswitch2; + bool confidence; int x; int y; int pressure; diff --git a/drivers/hsi/hsi_core.c b/drivers/hsi/hsi_core.c index a5f92e2889cb81c58e66048b25d145502ea2a1bb..a330f58d45fc637d502a9efc331b9d9bb7948963 100644 --- a/drivers/hsi/hsi_core.c +++ b/drivers/hsi/hsi_core.c @@ -102,6 +102,7 @@ struct hsi_client *hsi_new_client(struct hsi_port *port, if (device_register(&cl->device) < 0) { pr_err("hsi: failed to register client: %s\n", info->name); put_device(&cl->device); + goto err; } return cl; diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index 40e2b9f91163c3415a1c95ed3cf23524aa1a369a..7845fa5de79e922e5f31f9c1d0943db5fe41d9fe 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -13,6 +13,7 @@ #define _HYPERV_VMBUS_H #include +#include #include #include #include diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index a5a402e776c77f2640a1de5b09a13b3f0ff330e2..362da2a83b470bce767073e1a7d8b718575e96e0 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -1944,8 +1944,10 @@ int vmbus_add_channel_kobj(struct hv_device *dev, struct vmbus_channel *channel) kobj->kset = dev->channels_kset; ret = kobject_init_and_add(kobj, &vmbus_chan_ktype, NULL, "%u", relid); - if (ret) + if (ret) { + kobject_put(kobj); return ret; + } ret = sysfs_create_group(kobj, &vmbus_chan_group); @@ -1954,6 +1956,7 @@ int vmbus_add_channel_kobj(struct hv_device *dev, struct vmbus_channel *channel) * The calling functions' error handling paths will cleanup the * empty channel directory. */ + kobject_put(kobj); dev_err(device, "Unable to set up channel sysfs files\n"); return ret; } diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig index a850e4f0e0bdee240f6415e314873751e80a37fd..0c2b032ee6176c755e319139dd9c612154eec9d8 100644 --- a/drivers/hwmon/Kconfig +++ b/drivers/hwmon/Kconfig @@ -1275,7 +1275,7 @@ config SENSORS_LM90 Maxim MAX6646, MAX6647, MAX6648, MAX6649, MAX6654, MAX6657, MAX6658, MAX6659, MAX6680, MAX6681, MAX6692, MAX6695, MAX6696, ON Semiconductor NCT1008, Winbond/Nuvoton W83L771W/G/AWG/ASG, - Philips SA56004, GMT G781, and Texas Instruments TMP451 + Philips SA56004, GMT G781, Texas Instruments TMP451 and TMP461 sensor chips. This driver can also be built as a module. If so, the module diff --git a/drivers/hwmon/dell-smm-hwmon.c b/drivers/hwmon/dell-smm-hwmon.c index 63b74e781c5d955d5b5b7927dd331fb39087cdd9..10c7b6295b02e8b2819e2fa61cc09ce467d19e35 100644 --- a/drivers/hwmon/dell-smm-hwmon.c +++ b/drivers/hwmon/dell-smm-hwmon.c @@ -317,7 +317,7 @@ static int i8k_enable_fan_auto_mode(bool enable) } /* - * Set the fan speed (off, low, high). Returns the new fan status. + * Set the fan speed (off, low, high, ...). */ static int i8k_set_fan(int fan, int speed) { @@ -329,7 +329,7 @@ static int i8k_set_fan(int fan, int speed) speed = (speed < 0) ? 0 : ((speed > i8k_fan_max) ? i8k_fan_max : speed); regs.ebx = (fan & 0xff) | (speed << 8); - return i8k_smm(®s) ? : i8k_get_fan_status(fan); + return i8k_smm(®s); } static int i8k_get_temp_type(int sensor) @@ -443,7 +443,7 @@ static int i8k_ioctl_unlocked(struct file *fp, unsigned int cmd, unsigned long arg) { int val = 0; - int speed; + int speed, err; unsigned char buff[16]; int __user *argp = (int __user *)arg; @@ -504,7 +504,11 @@ i8k_ioctl_unlocked(struct file *fp, unsigned int cmd, unsigned long arg) if (copy_from_user(&speed, argp + 1, sizeof(int))) return -EFAULT; - val = i8k_set_fan(val, speed); + err = i8k_set_fan(val, speed); + if (err < 0) + return err; + + val = i8k_get_fan_status(val); break; default: @@ -603,15 +607,18 @@ static const struct proc_ops i8k_proc_ops = { .proc_ioctl = i8k_ioctl, }; +static struct proc_dir_entry *entry; + static void __init i8k_init_procfs(void) { /* Register the proc entry */ - proc_create("i8k", 0, NULL, &i8k_proc_ops); + entry = proc_create("i8k", 0, NULL, &i8k_proc_ops); } static void __exit i8k_exit_procfs(void) { - remove_proc_entry("i8k", NULL); + if (entry) + remove_proc_entry("i8k", NULL); } #else diff --git a/drivers/hwmon/hwmon.c b/drivers/hwmon/hwmon.c index 6c684058bfdfc0c14f616e839517df95639b8772..d649fea8299948a183b582669df670c728877927 100644 --- a/drivers/hwmon/hwmon.c +++ b/drivers/hwmon/hwmon.c @@ -178,12 +178,14 @@ static int hwmon_thermal_add_sensor(struct device *dev, int index) tzd = devm_thermal_zone_of_sensor_register(dev, index, tdata, &hwmon_thermal_ops); - /* - * If CONFIG_THERMAL_OF is disabled, this returns -ENODEV, - * so ignore that error but forward any other error. - */ - if (IS_ERR(tzd) && (PTR_ERR(tzd) != -ENODEV)) - return PTR_ERR(tzd); + if (IS_ERR(tzd)) { + if (PTR_ERR(tzd) != -ENODEV) + return PTR_ERR(tzd); + dev_info(dev, "temp%d_input not attached to any thermal zone\n", + index + 1); + devm_kfree(dev, tdata); + return 0; + } err = devm_add_action(dev, hwmon_thermal_remove_sensor, &tdata->node); if (err) @@ -760,8 +762,10 @@ __hwmon_device_register(struct device *dev, const char *name, void *drvdata, dev_set_drvdata(hdev, drvdata); dev_set_name(hdev, HWMON_ID_FORMAT, id); err = device_register(hdev); - if (err) - goto free_hwmon; + if (err) { + put_device(hdev); + goto ida_remove; + } INIT_LIST_HEAD(&hwdev->tzdata); diff --git a/drivers/hwmon/lm90.c b/drivers/hwmon/lm90.c index ebbfd5f352c064eeb3cf071691c13919aaee8ec8..a7142c32889c056bc25572cce925afbbe0546bff 100644 --- a/drivers/hwmon/lm90.c +++ b/drivers/hwmon/lm90.c @@ -35,13 +35,14 @@ * explicitly as max6659, or if its address is not 0x4c. * These chips lack the remote temperature offset feature. * - * This driver also supports the MAX6654 chip made by Maxim. This chip can - * be at 9 different addresses, similar to MAX6680/MAX6681. The MAX6654 is - * otherwise similar to MAX6657/MAX6658/MAX6659. Extended range is available - * by setting the configuration register accordingly, and is done during - * initialization. Extended precision is only available at conversion rates - * of 1 Hz and slower. Note that extended precision is not enabled by - * default, as this driver initializes all chips to 2 Hz by design. + * This driver also supports the MAX6654 chip made by Maxim. This chip can be + * at 9 different addresses, similar to MAX6680/MAX6681. The MAX6654 is similar + * to MAX6657/MAX6658/MAX6659, but does not support critical temperature + * limits. Extended range is available by setting the configuration register + * accordingly, and is done during initialization. Extended precision is only + * available at conversion rates of 1 Hz and slower. Note that extended + * precision is not enabled by default, as this driver initializes all chips + * to 2 Hz by design. * * This driver also supports the MAX6646, MAX6647, MAX6648, MAX6649 and * MAX6692 chips made by Maxim. These are again similar to the LM86, @@ -69,10 +70,10 @@ * This driver also supports the G781 from GMT. This device is compatible * with the ADM1032. * - * This driver also supports TMP451 from Texas Instruments. This device is - * supported in both compatibility and extended mode. It's mostly compatible - * with ADT7461 except for local temperature low byte register and max - * conversion rate. + * This driver also supports TMP451 and TMP461 from Texas Instruments. + * Those devices are supported in both compatibility and extended mode. + * They are mostly compatible with ADT7461 except for local temperature + * low byte register and max conversion rate. * * Since the LM90 was the first chipset supported by this driver, most * comments will refer to this chipset, but are actually general and @@ -112,7 +113,7 @@ static const unsigned short normal_i2c[] = { 0x4d, 0x4e, 0x4f, I2C_CLIENT_END }; enum chips { lm90, adm1032, lm99, lm86, max6657, max6659, adt7461, max6680, - max6646, w83l771, max6696, sa56004, g781, tmp451, max6654 }; + max6646, w83l771, max6696, sa56004, g781, tmp451, tmp461, max6654 }; /* * The LM90 registers @@ -168,8 +169,12 @@ enum chips { lm90, adm1032, lm99, lm86, max6657, max6659, adt7461, max6680, #define LM90_MAX_CONVRATE_MS 16000 /* Maximum conversion rate in ms */ -/* TMP451 registers */ +/* TMP451/TMP461 registers */ #define TMP451_REG_R_LOCAL_TEMPL 0x15 +#define TMP451_REG_CONALERT 0x22 + +#define TMP461_REG_CHEN 0x16 +#define TMP461_REG_DFC 0x24 /* * Device flags @@ -182,7 +187,10 @@ enum chips { lm90, adm1032, lm99, lm86, max6657, max6659, adt7461, max6680, #define LM90_HAVE_EMERGENCY_ALARM (1 << 5)/* emergency alarm */ #define LM90_HAVE_TEMP3 (1 << 6) /* 3rd temperature sensor */ #define LM90_HAVE_BROKEN_ALERT (1 << 7) /* Broken alert */ -#define LM90_PAUSE_FOR_CONFIG (1 << 8) /* Pause conversion for config */ +#define LM90_HAVE_EXTENDED_TEMP (1 << 8) /* extended temperature support*/ +#define LM90_PAUSE_FOR_CONFIG (1 << 9) /* Pause conversion for config */ +#define LM90_HAVE_CRIT (1 << 10)/* Chip supports CRIT/OVERT register */ +#define LM90_HAVE_CRIT_ALRM_SWP (1 << 11)/* critical alarm bits swapped */ /* LM90 status */ #define LM90_STATUS_LTHRM (1 << 0) /* local THERM limit tripped */ @@ -192,6 +200,7 @@ enum chips { lm90, adm1032, lm99, lm86, max6657, max6659, adt7461, max6680, #define LM90_STATUS_RHIGH (1 << 4) /* remote high temp limit tripped */ #define LM90_STATUS_LLOW (1 << 5) /* local low temp limit tripped */ #define LM90_STATUS_LHIGH (1 << 6) /* local high temp limit tripped */ +#define LM90_STATUS_BUSY (1 << 7) /* conversion is ongoing */ #define MAX6696_STATUS2_R2THRM (1 << 1) /* remote2 THERM limit tripped */ #define MAX6696_STATUS2_R2OPEN (1 << 2) /* remote2 is an open circuit */ @@ -229,6 +238,7 @@ static const struct i2c_device_id lm90_id[] = { { "w83l771", w83l771 }, { "sa56004", sa56004 }, { "tmp451", tmp451 }, + { "tmp461", tmp461 }, { } }; MODULE_DEVICE_TABLE(i2c, lm90_id); @@ -326,6 +336,10 @@ static const struct of_device_id __maybe_unused lm90_of_match[] = { .compatible = "ti,tmp451", .data = (void *)tmp451 }, + { + .compatible = "ti,tmp461", + .data = (void *)tmp461 + }, { }, }; MODULE_DEVICE_TABLE(of, lm90_of_match); @@ -344,85 +358,99 @@ struct lm90_params { static const struct lm90_params lm90_params[] = { [adm1032] = { .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT - | LM90_HAVE_BROKEN_ALERT, + | LM90_HAVE_BROKEN_ALERT | LM90_HAVE_CRIT, .alert_alarms = 0x7c, .max_convrate = 10, }, [adt7461] = { .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT - | LM90_HAVE_BROKEN_ALERT, + | LM90_HAVE_BROKEN_ALERT | LM90_HAVE_EXTENDED_TEMP + | LM90_HAVE_CRIT, .alert_alarms = 0x7c, .max_convrate = 10, }, [g781] = { .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT - | LM90_HAVE_BROKEN_ALERT, + | LM90_HAVE_BROKEN_ALERT | LM90_HAVE_CRIT, .alert_alarms = 0x7c, - .max_convrate = 8, + .max_convrate = 7, }, [lm86] = { - .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT, + .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT + | LM90_HAVE_CRIT, .alert_alarms = 0x7b, .max_convrate = 9, }, [lm90] = { - .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT, + .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT + | LM90_HAVE_CRIT, .alert_alarms = 0x7b, .max_convrate = 9, }, [lm99] = { - .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT, + .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT + | LM90_HAVE_CRIT, .alert_alarms = 0x7b, .max_convrate = 9, }, [max6646] = { + .flags = LM90_HAVE_CRIT | LM90_HAVE_BROKEN_ALERT, .alert_alarms = 0x7c, .max_convrate = 6, .reg_local_ext = MAX6657_REG_R_LOCAL_TEMPL, }, [max6654] = { + .flags = LM90_HAVE_BROKEN_ALERT, .alert_alarms = 0x7c, .max_convrate = 7, .reg_local_ext = MAX6657_REG_R_LOCAL_TEMPL, }, [max6657] = { - .flags = LM90_PAUSE_FOR_CONFIG, + .flags = LM90_PAUSE_FOR_CONFIG | LM90_HAVE_CRIT, .alert_alarms = 0x7c, .max_convrate = 8, .reg_local_ext = MAX6657_REG_R_LOCAL_TEMPL, }, [max6659] = { - .flags = LM90_HAVE_EMERGENCY, + .flags = LM90_HAVE_EMERGENCY | LM90_HAVE_CRIT, .alert_alarms = 0x7c, .max_convrate = 8, .reg_local_ext = MAX6657_REG_R_LOCAL_TEMPL, }, [max6680] = { - .flags = LM90_HAVE_OFFSET, + .flags = LM90_HAVE_OFFSET | LM90_HAVE_CRIT + | LM90_HAVE_CRIT_ALRM_SWP | LM90_HAVE_BROKEN_ALERT, .alert_alarms = 0x7c, .max_convrate = 7, }, [max6696] = { .flags = LM90_HAVE_EMERGENCY - | LM90_HAVE_EMERGENCY_ALARM | LM90_HAVE_TEMP3, + | LM90_HAVE_EMERGENCY_ALARM | LM90_HAVE_TEMP3 | LM90_HAVE_CRIT, .alert_alarms = 0x1c7c, .max_convrate = 6, .reg_local_ext = MAX6657_REG_R_LOCAL_TEMPL, }, [w83l771] = { - .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT, + .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT | LM90_HAVE_CRIT, .alert_alarms = 0x7c, .max_convrate = 8, }, [sa56004] = { - .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT, + .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT | LM90_HAVE_CRIT, .alert_alarms = 0x7b, .max_convrate = 9, .reg_local_ext = SA56004_REG_R_LOCAL_TEMPL, }, [tmp451] = { .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT - | LM90_HAVE_BROKEN_ALERT, + | LM90_HAVE_BROKEN_ALERT | LM90_HAVE_EXTENDED_TEMP | LM90_HAVE_CRIT, + .alert_alarms = 0x7c, + .max_convrate = 9, + .reg_local_ext = TMP451_REG_R_LOCAL_TEMPL, + }, + [tmp461] = { + .flags = LM90_HAVE_OFFSET | LM90_HAVE_REM_LIMIT_EXT + | LM90_HAVE_BROKEN_ALERT | LM90_HAVE_EXTENDED_TEMP | LM90_HAVE_CRIT, .alert_alarms = 0x7c, .max_convrate = 9, .reg_local_ext = TMP451_REG_R_LOCAL_TEMPL, @@ -650,20 +678,22 @@ static int lm90_update_limits(struct device *dev) struct i2c_client *client = data->client; int val; - val = lm90_read_reg(client, LM90_REG_R_LOCAL_CRIT); - if (val < 0) - return val; - data->temp8[LOCAL_CRIT] = val; + if (data->flags & LM90_HAVE_CRIT) { + val = lm90_read_reg(client, LM90_REG_R_LOCAL_CRIT); + if (val < 0) + return val; + data->temp8[LOCAL_CRIT] = val; - val = lm90_read_reg(client, LM90_REG_R_REMOTE_CRIT); - if (val < 0) - return val; - data->temp8[REMOTE_CRIT] = val; + val = lm90_read_reg(client, LM90_REG_R_REMOTE_CRIT); + if (val < 0) + return val; + data->temp8[REMOTE_CRIT] = val; - val = lm90_read_reg(client, LM90_REG_R_TCRIT_HYST); - if (val < 0) - return val; - data->temp_hyst = val; + val = lm90_read_reg(client, LM90_REG_R_TCRIT_HYST); + if (val < 0) + return val; + data->temp_hyst = val; + } val = lm90_read_reg(client, LM90_REG_R_REMOTE_LOWH); if (val < 0) @@ -791,7 +821,7 @@ static int lm90_update_device(struct device *dev) val = lm90_read_reg(client, LM90_REG_R_STATUS); if (val < 0) return val; - data->alarms = val; /* lower 8 bit of alarms */ + data->alarms = val & ~LM90_STATUS_BUSY; if (data->kind == max6696) { val = lm90_select_remote_channel(data, 1); @@ -997,7 +1027,7 @@ static int lm90_get_temp11(struct lm90_data *data, int index) s16 temp11 = data->temp11[index]; int temp; - if (data->kind == adt7461 || data->kind == tmp451) + if (data->flags & LM90_HAVE_EXTENDED_TEMP) temp = temp_from_u16_adt7461(data, temp11); else if (data->kind == max6646) temp = temp_from_u16(temp11); @@ -1031,7 +1061,7 @@ static int lm90_set_temp11(struct lm90_data *data, int index, long val) if (data->kind == lm99 && index <= 2) val -= 16000; - if (data->kind == adt7461 || data->kind == tmp451) + if (data->flags & LM90_HAVE_EXTENDED_TEMP) data->temp11[index] = temp_to_u16_adt7461(data, val); else if (data->kind == max6646) data->temp11[index] = temp_to_u8(val) << 8; @@ -1058,7 +1088,7 @@ static int lm90_get_temp8(struct lm90_data *data, int index) s8 temp8 = data->temp8[index]; int temp; - if (data->kind == adt7461 || data->kind == tmp451) + if (data->flags & LM90_HAVE_EXTENDED_TEMP) temp = temp_from_u8_adt7461(data, temp8); else if (data->kind == max6646) temp = temp_from_u8(temp8); @@ -1091,7 +1121,7 @@ static int lm90_set_temp8(struct lm90_data *data, int index, long val) if (data->kind == lm99 && index == 3) val -= 16000; - if (data->kind == adt7461 || data->kind == tmp451) + if (data->flags & LM90_HAVE_EXTENDED_TEMP) data->temp8[index] = temp_to_u8_adt7461(data, val); else if (data->kind == max6646) data->temp8[index] = temp_to_u8(val); @@ -1109,7 +1139,7 @@ static int lm90_get_temphyst(struct lm90_data *data, int index) { int temp; - if (data->kind == adt7461 || data->kind == tmp451) + if (data->flags & LM90_HAVE_EXTENDED_TEMP) temp = temp_from_u8_adt7461(data, data->temp8[index]); else if (data->kind == max6646) temp = temp_from_u8(data->temp8[index]); @@ -1129,7 +1159,7 @@ static int lm90_set_temphyst(struct lm90_data *data, long val) int temp; int err; - if (data->kind == adt7461 || data->kind == tmp451) + if (data->flags & LM90_HAVE_EXTENDED_TEMP) temp = temp_from_u8_adt7461(data, data->temp8[LOCAL_CRIT]); else if (data->kind == max6646) temp = temp_from_u8(data->temp8[LOCAL_CRIT]); @@ -1165,6 +1195,7 @@ static const u8 lm90_temp_emerg_index[3] = { static const u8 lm90_min_alarm_bits[3] = { 5, 3, 11 }; static const u8 lm90_max_alarm_bits[3] = { 6, 4, 12 }; static const u8 lm90_crit_alarm_bits[3] = { 0, 1, 9 }; +static const u8 lm90_crit_alarm_bits_swapped[3] = { 1, 0, 9 }; static const u8 lm90_emergency_alarm_bits[3] = { 15, 13, 14 }; static const u8 lm90_fault_bits[3] = { 0, 2, 10 }; @@ -1190,7 +1221,10 @@ static int lm90_temp_read(struct device *dev, u32 attr, int channel, long *val) *val = (data->alarms >> lm90_max_alarm_bits[channel]) & 1; break; case hwmon_temp_crit_alarm: - *val = (data->alarms >> lm90_crit_alarm_bits[channel]) & 1; + if (data->flags & LM90_HAVE_CRIT_ALRM_SWP) + *val = (data->alarms >> lm90_crit_alarm_bits_swapped[channel]) & 1; + else + *val = (data->alarms >> lm90_crit_alarm_bits[channel]) & 1; break; case hwmon_temp_emergency_alarm: *val = (data->alarms >> lm90_emergency_alarm_bits[channel]) & 1; @@ -1438,12 +1472,11 @@ static int lm90_detect(struct i2c_client *client, if (man_id < 0 || chip_id < 0 || config1 < 0 || convrate < 0) return -ENODEV; - if (man_id == 0x01 || man_id == 0x5C || man_id == 0x41) { + if (man_id == 0x01 || man_id == 0x5C || man_id == 0xA1) { config2 = i2c_smbus_read_byte_data(client, LM90_REG_R_CONFIG2); if (config2 < 0) return -ENODEV; - } else - config2 = 0; /* Make compiler happy */ + } if ((address == 0x4C || address == 0x4D) && man_id == 0x01) { /* National Semiconductor */ @@ -1617,18 +1650,26 @@ static int lm90_detect(struct i2c_client *client, && convrate <= 0x08) name = "g781"; } else - if (address == 0x4C - && man_id == 0x55) { /* Texas Instruments */ - int local_ext; + if (man_id == 0x55 && chip_id == 0x00 && + (config1 & 0x1B) == 0x00 && convrate <= 0x09) { + int local_ext, conalert, chen, dfc; local_ext = i2c_smbus_read_byte_data(client, TMP451_REG_R_LOCAL_TEMPL); - - if (chip_id == 0x00 /* TMP451 */ - && (config1 & 0x1B) == 0x00 - && convrate <= 0x09 - && (local_ext & 0x0F) == 0x00) - name = "tmp451"; + conalert = i2c_smbus_read_byte_data(client, + TMP451_REG_CONALERT); + chen = i2c_smbus_read_byte_data(client, TMP461_REG_CHEN); + dfc = i2c_smbus_read_byte_data(client, TMP461_REG_DFC); + + if ((local_ext & 0x0F) == 0x00 && + (conalert & 0xf1) == 0x01 && + (chen & 0xfc) == 0x00 && + (dfc & 0xfc) == 0x00) { + if (address == 0x4c && !(chen & 0x03)) + name = "tmp451"; + else if (address >= 0x48 && address <= 0x4f) + name = "tmp461"; + } } if (!name) { /* identification failed */ @@ -1675,7 +1716,7 @@ static int lm90_init_client(struct i2c_client *client, struct lm90_data *data) lm90_set_convrate(client, data, 500); /* 500ms; 2Hz conversion rate */ /* Check Temperature Range Select */ - if (data->kind == adt7461 || data->kind == tmp451) { + if (data->flags & LM90_HAVE_EXTENDED_TEMP) { if (config & 0x04) data->flags |= LM90_FLAG_ADT7461_EXT; } @@ -1842,11 +1883,14 @@ static int lm90_probe(struct i2c_client *client) info->config = data->channel_config; data->channel_config[0] = HWMON_T_INPUT | HWMON_T_MIN | HWMON_T_MAX | - HWMON_T_CRIT | HWMON_T_CRIT_HYST | HWMON_T_MIN_ALARM | - HWMON_T_MAX_ALARM | HWMON_T_CRIT_ALARM; + HWMON_T_MIN_ALARM | HWMON_T_MAX_ALARM; data->channel_config[1] = HWMON_T_INPUT | HWMON_T_MIN | HWMON_T_MAX | - HWMON_T_CRIT | HWMON_T_CRIT_HYST | HWMON_T_MIN_ALARM | - HWMON_T_MAX_ALARM | HWMON_T_CRIT_ALARM | HWMON_T_FAULT; + HWMON_T_MIN_ALARM | HWMON_T_MAX_ALARM | HWMON_T_FAULT; + + if (data->flags & LM90_HAVE_CRIT) { + data->channel_config[0] |= HWMON_T_CRIT | HWMON_T_CRIT_ALARM | HWMON_T_CRIT_HYST; + data->channel_config[1] |= HWMON_T_CRIT | HWMON_T_CRIT_ALARM | HWMON_T_CRIT_HYST; + } if (data->flags & LM90_HAVE_OFFSET) data->channel_config[1] |= HWMON_T_OFFSET; diff --git a/drivers/hwmon/ltc2947-core.c b/drivers/hwmon/ltc2947-core.c index bb3f7749a0b001cb21a668f62de984363671575b..5423466de697af5bf704217f26eaccea058f404d 100644 --- a/drivers/hwmon/ltc2947-core.c +++ b/drivers/hwmon/ltc2947-core.c @@ -989,8 +989,12 @@ static int ltc2947_setup(struct ltc2947_data *st) return ret; /* check external clock presence */ - extclk = devm_clk_get(st->dev, NULL); - if (!IS_ERR(extclk)) { + extclk = devm_clk_get_optional(st->dev, NULL); + if (IS_ERR(extclk)) + return dev_err_probe(st->dev, PTR_ERR(extclk), + "Failed to get external clock\n"); + + if (extclk) { unsigned long rate_hz; u8 pre = 0, div, tbctl; u64 aux; diff --git a/drivers/hwmon/mlxreg-fan.c b/drivers/hwmon/mlxreg-fan.c index ed8d59d4eecb361c288b0926f222ec1e380634ea..bd8f5a3aaad9cad5bab7dd4fbd6847399ccdae8f 100644 --- a/drivers/hwmon/mlxreg-fan.c +++ b/drivers/hwmon/mlxreg-fan.c @@ -291,8 +291,8 @@ static int mlxreg_fan_set_cur_state(struct thermal_cooling_device *cdev, { struct mlxreg_fan *fan = cdev->devdata; unsigned long cur_state; + int i, config = 0; u32 regval; - int i; int err; /* @@ -305,6 +305,12 @@ static int mlxreg_fan_set_cur_state(struct thermal_cooling_device *cdev, * overwritten. */ if (state >= MLXREG_FAN_SPEED_MIN && state <= MLXREG_FAN_SPEED_MAX) { + /* + * This is configuration change, which is only supported through sysfs. + * For configuration non-zero value is to be returned to avoid thermal + * statistics update. + */ + config = 1; state -= MLXREG_FAN_MAX_STATE; for (i = 0; i < state; i++) fan->cooling_levels[i] = state; @@ -319,7 +325,7 @@ static int mlxreg_fan_set_cur_state(struct thermal_cooling_device *cdev, cur_state = MLXREG_FAN_PWM_DUTY2STATE(regval); if (state < cur_state) - return 0; + return config; state = cur_state; } @@ -335,7 +341,7 @@ static int mlxreg_fan_set_cur_state(struct thermal_cooling_device *cdev, dev_err(fan->dev, "Failed to write PWM duty\n"); return err; } - return 0; + return config; } static const struct thermal_cooling_device_ops mlxreg_fan_cooling_ops = { diff --git a/drivers/hwmon/mr75203.c b/drivers/hwmon/mr75203.c index 18da5a25e89abfb950fe228ebe243d98ec93abb6..046523d47c29b75dfb045e95ad6a8ea86a42752e 100644 --- a/drivers/hwmon/mr75203.c +++ b/drivers/hwmon/mr75203.c @@ -93,7 +93,7 @@ #define VM_CH_REQ BIT(21) #define IP_TMR 0x05 -#define POWER_DELAY_CYCLE_256 0x80 +#define POWER_DELAY_CYCLE_256 0x100 #define POWER_DELAY_CYCLE_64 0x40 #define PVT_POLL_DELAY_US 20 diff --git a/drivers/hwmon/pmbus/ibm-cffps.c b/drivers/hwmon/pmbus/ibm-cffps.c index 79bc2032dcb2a635c38a83e2ebed75dd49809417..da261d32450d0309bc792564d51e1873ea7837cf 100644 --- a/drivers/hwmon/pmbus/ibm-cffps.c +++ b/drivers/hwmon/pmbus/ibm-cffps.c @@ -171,8 +171,14 @@ static ssize_t ibm_cffps_debugfs_read(struct file *file, char __user *buf, cmd = CFFPS_SN_CMD; break; case CFFPS_DEBUGFS_MAX_POWER_OUT: - rc = i2c_smbus_read_word_swapped(psu->client, - CFFPS_MAX_POWER_OUT_CMD); + if (psu->version == cffps1) { + rc = i2c_smbus_read_word_swapped(psu->client, + CFFPS_MAX_POWER_OUT_CMD); + } else { + rc = i2c_smbus_read_word_data(psu->client, + CFFPS_MAX_POWER_OUT_CMD); + } + if (rc < 0) return rc; diff --git a/drivers/hwmon/pmbus/lm25066.c b/drivers/hwmon/pmbus/lm25066.c index 429172a42902c8181ffbe7e2b01875c9a3a2a449..17199a1104c7206d825a5e0f2eb20c0baa3bb933 100644 --- a/drivers/hwmon/pmbus/lm25066.c +++ b/drivers/hwmon/pmbus/lm25066.c @@ -51,26 +51,31 @@ struct __coeff { #define PSC_CURRENT_IN_L (PSC_NUM_CLASSES) #define PSC_POWER_L (PSC_NUM_CLASSES + 1) -static struct __coeff lm25066_coeff[6][PSC_NUM_CLASSES + 2] = { +static struct __coeff lm25066_coeff[][PSC_NUM_CLASSES + 2] = { [lm25056] = { [PSC_VOLTAGE_IN] = { .m = 16296, + .b = 1343, .R = -2, }, [PSC_CURRENT_IN] = { .m = 13797, + .b = -1833, .R = -2, }, [PSC_CURRENT_IN_L] = { .m = 6726, + .b = -537, .R = -2, }, [PSC_POWER] = { .m = 5501, + .b = -2908, .R = -3, }, [PSC_POWER_L] = { .m = 26882, + .b = -5646, .R = -4, }, [PSC_TEMPERATURE] = { @@ -82,26 +87,32 @@ static struct __coeff lm25066_coeff[6][PSC_NUM_CLASSES + 2] = { [lm25066] = { [PSC_VOLTAGE_IN] = { .m = 22070, + .b = -1800, .R = -2, }, [PSC_VOLTAGE_OUT] = { .m = 22070, + .b = -1800, .R = -2, }, [PSC_CURRENT_IN] = { .m = 13661, + .b = -5200, .R = -2, }, [PSC_CURRENT_IN_L] = { .m = 6852, + .b = -3100, .R = -2, }, [PSC_POWER] = { .m = 736, + .b = -3300, .R = -2, }, [PSC_POWER_L] = { .m = 369, + .b = -1900, .R = -2, }, [PSC_TEMPERATURE] = { @@ -111,26 +122,32 @@ static struct __coeff lm25066_coeff[6][PSC_NUM_CLASSES + 2] = { [lm5064] = { [PSC_VOLTAGE_IN] = { .m = 4611, + .b = -642, .R = -2, }, [PSC_VOLTAGE_OUT] = { .m = 4621, + .b = 423, .R = -2, }, [PSC_CURRENT_IN] = { .m = 10742, + .b = 1552, .R = -2, }, [PSC_CURRENT_IN_L] = { .m = 5456, + .b = 2118, .R = -2, }, [PSC_POWER] = { .m = 1204, + .b = 8524, .R = -3, }, [PSC_POWER_L] = { .m = 612, + .b = 11202, .R = -3, }, [PSC_TEMPERATURE] = { @@ -140,26 +157,32 @@ static struct __coeff lm25066_coeff[6][PSC_NUM_CLASSES + 2] = { [lm5066] = { [PSC_VOLTAGE_IN] = { .m = 4587, + .b = -1200, .R = -2, }, [PSC_VOLTAGE_OUT] = { .m = 4587, + .b = -2400, .R = -2, }, [PSC_CURRENT_IN] = { .m = 10753, + .b = -1200, .R = -2, }, [PSC_CURRENT_IN_L] = { .m = 5405, + .b = -600, .R = -2, }, [PSC_POWER] = { .m = 1204, + .b = -6000, .R = -3, }, [PSC_POWER_L] = { .m = 605, + .b = -8000, .R = -3, }, [PSC_TEMPERATURE] = { diff --git a/drivers/hwmon/pmbus/mp2975.c b/drivers/hwmon/pmbus/mp2975.c index 1c3e2a9453b125848b39080ce43bf6ee04103fa3..a41fe06e0ad4ceb5006999ce70ef7f8b0b60b3a8 100644 --- a/drivers/hwmon/pmbus/mp2975.c +++ b/drivers/hwmon/pmbus/mp2975.c @@ -54,7 +54,7 @@ #define MP2975_RAIL2_FUNC (PMBUS_HAVE_VOUT | PMBUS_HAVE_STATUS_VOUT | \ PMBUS_HAVE_IOUT | PMBUS_HAVE_STATUS_IOUT | \ - PMBUS_PHASE_VIRTUAL) + PMBUS_HAVE_POUT | PMBUS_PHASE_VIRTUAL) struct mp2975_data { struct pmbus_driver_info info; diff --git a/drivers/hwmon/pmbus/pmbus_core.c b/drivers/hwmon/pmbus/pmbus_core.c index b0e2820a2d578f62ca0db7ac565ca5404065c790..71798fde2ef0c93280698c355f190273b429f05b 100644 --- a/drivers/hwmon/pmbus/pmbus_core.c +++ b/drivers/hwmon/pmbus/pmbus_core.c @@ -898,6 +898,11 @@ static int pmbus_get_boolean(struct i2c_client *client, struct pmbus_boolean *b, pmbus_update_sensor_data(client, s2); regval = status & mask; + if (regval) { + ret = pmbus_write_byte_data(client, page, reg, regval); + if (ret) + goto unlock; + } if (s1 && s2) { s64 v1, v2; diff --git a/drivers/hwmon/tmp421.c b/drivers/hwmon/tmp421.c index ede66ea6a730dd82787c05cec909c920d4ba5d2c..b963a369c5ab3069aa55568f706d4dc1af4aeeeb 100644 --- a/drivers/hwmon/tmp421.c +++ b/drivers/hwmon/tmp421.c @@ -100,71 +100,81 @@ struct tmp421_data { s16 temp[4]; }; -static int temp_from_s16(s16 reg) +static int temp_from_raw(u16 reg, bool extended) { /* Mask out status bits */ int temp = reg & ~0xf; - return (temp * 1000 + 128) / 256; -} - -static int temp_from_u16(u16 reg) -{ - /* Mask out status bits */ - int temp = reg & ~0xf; - - /* Add offset for extended temperature range. */ - temp -= 64 * 256; + if (extended) + temp = temp - 64 * 256; + else + temp = (s16)temp; - return (temp * 1000 + 128) / 256; + return DIV_ROUND_CLOSEST(temp * 1000, 256); } -static struct tmp421_data *tmp421_update_device(struct device *dev) +static int tmp421_update_device(struct tmp421_data *data) { - struct tmp421_data *data = dev_get_drvdata(dev); struct i2c_client *client = data->client; + int ret = 0; int i; mutex_lock(&data->update_lock); if (time_after(jiffies, data->last_updated + (HZ / 2)) || !data->valid) { - data->config = i2c_smbus_read_byte_data(client, - TMP421_CONFIG_REG_1); + ret = i2c_smbus_read_byte_data(client, TMP421_CONFIG_REG_1); + if (ret < 0) + goto exit; + data->config = ret; for (i = 0; i < data->channels; i++) { - data->temp[i] = i2c_smbus_read_byte_data(client, - TMP421_TEMP_MSB[i]) << 8; - data->temp[i] |= i2c_smbus_read_byte_data(client, - TMP421_TEMP_LSB[i]); + ret = i2c_smbus_read_byte_data(client, TMP421_TEMP_MSB[i]); + if (ret < 0) + goto exit; + data->temp[i] = ret << 8; + + ret = i2c_smbus_read_byte_data(client, TMP421_TEMP_LSB[i]); + if (ret < 0) + goto exit; + data->temp[i] |= ret; } data->last_updated = jiffies; data->valid = 1; } +exit: mutex_unlock(&data->update_lock); - return data; + if (ret < 0) { + data->valid = 0; + return ret; + } + + return 0; } static int tmp421_read(struct device *dev, enum hwmon_sensor_types type, u32 attr, int channel, long *val) { - struct tmp421_data *tmp421 = tmp421_update_device(dev); + struct tmp421_data *tmp421 = dev_get_drvdata(dev); + int ret = 0; + + ret = tmp421_update_device(tmp421); + if (ret) + return ret; switch (attr) { case hwmon_temp_input: - if (tmp421->config & TMP421_CONFIG_RANGE) - *val = temp_from_u16(tmp421->temp[channel]); - else - *val = temp_from_s16(tmp421->temp[channel]); + *val = temp_from_raw(tmp421->temp[channel], + tmp421->config & TMP421_CONFIG_RANGE); return 0; case hwmon_temp_fault: /* - * The OPEN bit signals a fault. This is bit 0 of the temperature - * register (low byte). + * Any of OPEN or /PVLD bits indicate a hardware mulfunction + * and the conversion result may be incorrect */ - *val = tmp421->temp[channel] & 0x01; + *val = !!(tmp421->temp[channel] & 0x03); return 0; default: return -EOPNOTSUPP; @@ -177,9 +187,6 @@ static umode_t tmp421_is_visible(const void *data, enum hwmon_sensor_types type, { switch (attr) { case hwmon_temp_fault: - if (channel == 0) - return 0; - return 0444; case hwmon_temp_input: return 0444; default: diff --git a/drivers/hwmon/w83791d.c b/drivers/hwmon/w83791d.c index 37b25a1474c4633ba40bf4a5fd1d435983357519..3c1be2c11fdf0252daf6bc66fc751874227229b9 100644 --- a/drivers/hwmon/w83791d.c +++ b/drivers/hwmon/w83791d.c @@ -273,9 +273,6 @@ struct w83791d_data { char valid; /* !=0 if following fields are valid */ unsigned long last_updated; /* In jiffies */ - /* array of 2 pointers to subclients */ - struct i2c_client *lm75[2]; - /* volts */ u8 in[NUMBER_OF_VIN]; /* Register value */ u8 in_max[NUMBER_OF_VIN]; /* Register value */ @@ -1257,7 +1254,6 @@ static const struct attribute_group w83791d_group_fanpwm45 = { static int w83791d_detect_subclients(struct i2c_client *client) { struct i2c_adapter *adapter = client->adapter; - struct w83791d_data *data = i2c_get_clientdata(client); int address = client->addr; int i, id; u8 val; @@ -1280,22 +1276,19 @@ static int w83791d_detect_subclients(struct i2c_client *client) } val = w83791d_read(client, W83791D_REG_I2C_SUBADDR); - if (!(val & 0x08)) - data->lm75[0] = devm_i2c_new_dummy_device(&client->dev, adapter, - 0x48 + (val & 0x7)); - if (!(val & 0x80)) { - if (!IS_ERR(data->lm75[0]) && - ((val & 0x7) == ((val >> 4) & 0x7))) { - dev_err(&client->dev, - "duplicate addresses 0x%x, " - "use force_subclient\n", - data->lm75[0]->addr); - return -ENODEV; - } - data->lm75[1] = devm_i2c_new_dummy_device(&client->dev, adapter, - 0x48 + ((val >> 4) & 0x7)); + + if (!(val & 0x88) && (val & 0x7) == ((val >> 4) & 0x7)) { + dev_err(&client->dev, + "duplicate addresses 0x%x, use force_subclient\n", 0x48 + (val & 0x7)); + return -ENODEV; } + if (!(val & 0x08)) + devm_i2c_new_dummy_device(&client->dev, adapter, 0x48 + (val & 0x7)); + + if (!(val & 0x80)) + devm_i2c_new_dummy_device(&client->dev, adapter, 0x48 + ((val >> 4) & 0x7)); + return 0; } diff --git a/drivers/hwmon/w83792d.c b/drivers/hwmon/w83792d.c index abd5c3a722b91a4aa9c6a1ebb2d24a376d00b801..1f175f3813506ddd074650a09eb5970b32ddfbc7 100644 --- a/drivers/hwmon/w83792d.c +++ b/drivers/hwmon/w83792d.c @@ -264,9 +264,6 @@ struct w83792d_data { char valid; /* !=0 if following fields are valid */ unsigned long last_updated; /* In jiffies */ - /* array of 2 pointers to subclients */ - struct i2c_client *lm75[2]; - u8 in[9]; /* Register value */ u8 in_max[9]; /* Register value */ u8 in_min[9]; /* Register value */ @@ -927,7 +924,6 @@ w83792d_detect_subclients(struct i2c_client *new_client) int address = new_client->addr; u8 val; struct i2c_adapter *adapter = new_client->adapter; - struct w83792d_data *data = i2c_get_clientdata(new_client); id = i2c_adapter_id(adapter); if (force_subclients[0] == id && force_subclients[1] == address) { @@ -946,21 +942,19 @@ w83792d_detect_subclients(struct i2c_client *new_client) } val = w83792d_read_value(new_client, W83792D_REG_I2C_SUBADDR); - if (!(val & 0x08)) - data->lm75[0] = devm_i2c_new_dummy_device(&new_client->dev, adapter, - 0x48 + (val & 0x7)); - if (!(val & 0x80)) { - if (!IS_ERR(data->lm75[0]) && - ((val & 0x7) == ((val >> 4) & 0x7))) { - dev_err(&new_client->dev, - "duplicate addresses 0x%x, use force_subclient\n", - data->lm75[0]->addr); - return -ENODEV; - } - data->lm75[1] = devm_i2c_new_dummy_device(&new_client->dev, adapter, - 0x48 + ((val >> 4) & 0x7)); + + if (!(val & 0x88) && (val & 0x7) == ((val >> 4) & 0x7)) { + dev_err(&new_client->dev, + "duplicate addresses 0x%x, use force_subclient\n", 0x48 + (val & 0x7)); + return -ENODEV; } + if (!(val & 0x08)) + devm_i2c_new_dummy_device(&new_client->dev, adapter, 0x48 + (val & 0x7)); + + if (!(val & 0x80)) + devm_i2c_new_dummy_device(&new_client->dev, adapter, 0x48 + ((val >> 4) & 0x7)); + return 0; } diff --git a/drivers/hwmon/w83793.c b/drivers/hwmon/w83793.c index e7d0484eabe4c2a8b59470c69b8aac4def755c5a..1d2854de1cfc961b705516fc3f50b90865eac64e 100644 --- a/drivers/hwmon/w83793.c +++ b/drivers/hwmon/w83793.c @@ -202,7 +202,6 @@ static inline s8 TEMP_TO_REG(long val, s8 min, s8 max) } struct w83793_data { - struct i2c_client *lm75[2]; struct device *hwmon_dev; struct mutex update_lock; char valid; /* !=0 if following fields are valid */ @@ -1566,7 +1565,6 @@ w83793_detect_subclients(struct i2c_client *client) int address = client->addr; u8 tmp; struct i2c_adapter *adapter = client->adapter; - struct w83793_data *data = i2c_get_clientdata(client); id = i2c_adapter_id(adapter); if (force_subclients[0] == id && force_subclients[1] == address) { @@ -1586,21 +1584,19 @@ w83793_detect_subclients(struct i2c_client *client) } tmp = w83793_read_value(client, W83793_REG_I2C_SUBADDR); - if (!(tmp & 0x08)) - data->lm75[0] = devm_i2c_new_dummy_device(&client->dev, adapter, - 0x48 + (tmp & 0x7)); - if (!(tmp & 0x80)) { - if (!IS_ERR(data->lm75[0]) - && ((tmp & 0x7) == ((tmp >> 4) & 0x7))) { - dev_err(&client->dev, - "duplicate addresses 0x%x, " - "use force_subclients\n", data->lm75[0]->addr); - return -ENODEV; - } - data->lm75[1] = devm_i2c_new_dummy_device(&client->dev, adapter, - 0x48 + ((tmp >> 4) & 0x7)); + + if (!(tmp & 0x88) && (tmp & 0x7) == ((tmp >> 4) & 0x7)) { + dev_err(&client->dev, + "duplicate addresses 0x%x, use force_subclient\n", 0x48 + (tmp & 0x7)); + return -ENODEV; } + if (!(tmp & 0x08)) + devm_i2c_new_dummy_device(&client->dev, adapter, 0x48 + (tmp & 0x7)); + + if (!(tmp & 0x80)) + devm_i2c_new_dummy_device(&client->dev, adapter, 0x48 + ((tmp >> 4) & 0x7)); + return 0; } diff --git a/drivers/hwtracing/coresight/coresight-cti-core.c b/drivers/hwtracing/coresight/coresight-cti-core.c index e2a3620cbf4891e41b1a23ca80b957c22cecb89c..8988b2ed2ea6f5d3f5fed1606d824d6bf6f1781f 100644 --- a/drivers/hwtracing/coresight/coresight-cti-core.c +++ b/drivers/hwtracing/coresight/coresight-cti-core.c @@ -175,7 +175,7 @@ static int cti_disable_hw(struct cti_drvdata *drvdata) coresight_disclaim_device_unlocked(csdev); CS_LOCK(drvdata->base); spin_unlock(&drvdata->spinlock); - pm_runtime_put(dev); + pm_runtime_put(dev->parent); return 0; /* not disabled this call */ diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c b/drivers/hwtracing/coresight/coresight-etm4x-core.c index 6fee02bf7b7103ff1e166973f9b0e9a9e448a064..bfcf9e0415c267f8046a4e45e7f63e315e77f12a 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x-core.c +++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c @@ -39,6 +39,7 @@ #include "coresight-etm4x.h" #include "coresight-etm-perf.h" +#include "coresight-self-hosted-trace.h" static int boot_enable; module_param(boot_enable, int, 0444); @@ -236,6 +237,45 @@ struct etm4_enable_arg { int rc; }; +/* + * etm4x_prohibit_trace - Prohibit the CPU from tracing at all ELs. + * When the CPU supports FEAT_TRF, we could move the ETM to a trace + * prohibited state by filtering the Exception levels via TRFCR_EL1. + */ +static void etm4x_prohibit_trace(struct etmv4_drvdata *drvdata) +{ + /* If the CPU doesn't support FEAT_TRF, nothing to do */ + if (!drvdata->trfcr) + return; + cpu_prohibit_trace(); +} + +/* + * etm4x_allow_trace - Allow CPU tracing in the respective ELs, + * as configured by the drvdata->config.mode for the current + * session. Even though we have TRCVICTLR bits to filter the + * trace in the ELs, it doesn't prevent the ETM from generating + * a packet (e.g, TraceInfo) that might contain the addresses from + * the excluded levels. Thus we use the additional controls provided + * via the Trace Filtering controls (FEAT_TRF) to make sure no trace + * is generated for the excluded ELs. + */ +static void etm4x_allow_trace(struct etmv4_drvdata *drvdata) +{ + u64 trfcr = drvdata->trfcr; + + /* If the CPU doesn't support FEAT_TRF, nothing to do */ + if (!trfcr) + return; + + if (drvdata->config.mode & ETM_MODE_EXCL_KERN) + trfcr &= ~TRFCR_ELx_ExTRE; + if (drvdata->config.mode & ETM_MODE_EXCL_USER) + trfcr &= ~TRFCR_ELx_E0TRE; + + write_trfcr(trfcr); +} + #ifdef CONFIG_ETM4X_IMPDEF_FEATURE #define HISI_HIP08_AMBA_ID 0x000b6d01 @@ -440,6 +480,7 @@ static int etm4_enable_hw(struct etmv4_drvdata *drvdata) if (etm4x_is_ete(drvdata)) etm4x_relaxed_write32(csa, TRCRSR_TA, TRCRSR); + etm4x_allow_trace(drvdata); /* Enable the trace unit */ etm4x_relaxed_write32(csa, 1, TRCPRGCTLR); @@ -723,7 +764,6 @@ static int etm4_enable(struct coresight_device *csdev, static void etm4_disable_hw(void *info) { u32 control; - u64 trfcr; struct etmv4_drvdata *drvdata = info; struct etmv4_config *config = &drvdata->config; struct coresight_device *csdev = drvdata->csdev; @@ -750,12 +790,7 @@ static void etm4_disable_hw(void *info) * If the CPU supports v8.4 Trace filter Control, * set the ETM to trace prohibited region. */ - if (drvdata->trfc) { - trfcr = read_sysreg_s(SYS_TRFCR_EL1); - write_sysreg_s(trfcr & ~(TRFCR_ELx_ExTRE | TRFCR_ELx_E0TRE), - SYS_TRFCR_EL1); - isb(); - } + etm4x_prohibit_trace(drvdata); /* * Make sure everything completes before disabling, as recommended * by section 7.3.77 ("TRCVICTLR, ViewInst Main Control Register, @@ -771,9 +806,6 @@ static void etm4_disable_hw(void *info) if (coresight_timeout(csa, TRCSTATR, TRCSTATR_PMSTABLE_BIT, 1)) dev_err(etm_dev, "timeout while waiting for PM stable Trace Status\n"); - if (drvdata->trfc) - write_sysreg_s(trfcr, SYS_TRFCR_EL1); - /* read the status of the single shot comparators */ for (i = 0; i < drvdata->nr_ss_cmp; i++) { config->ss_status[i] = @@ -968,15 +1000,15 @@ static bool etm4_init_csdev_access(struct etmv4_drvdata *drvdata, return false; } -static void cpu_enable_tracing(struct etmv4_drvdata *drvdata) +static void cpu_detect_trace_filtering(struct etmv4_drvdata *drvdata) { u64 dfr0 = read_sysreg(id_aa64dfr0_el1); u64 trfcr; + drvdata->trfcr = 0; if (!cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_TRACE_FILT_SHIFT)) return; - drvdata->trfc = true; /* * If the CPU supports v8.4 SelfHosted Tracing, enable * tracing at the kernel EL and EL0, forcing to use the @@ -990,7 +1022,7 @@ static void cpu_enable_tracing(struct etmv4_drvdata *drvdata) if (is_kernel_in_hyp_mode()) trfcr |= TRFCR_EL2_CX; - write_sysreg_s(trfcr, SYS_TRFCR_EL1); + drvdata->trfcr = trfcr; } static void etm4_init_arch_data(void *info) @@ -1176,7 +1208,7 @@ static void etm4_init_arch_data(void *info) /* NUMCNTR, bits[30:28] number of counters available for tracing */ drvdata->nr_cntr = BMVAL(etmidr5, 28, 30); etm4_cs_lock(drvdata, csa); - cpu_enable_tracing(drvdata); + cpu_detect_trace_filtering(drvdata); } static inline u32 etm4_get_victlr_access_type(struct etmv4_config *config) @@ -1528,7 +1560,7 @@ static void etm4_init_trace_id(struct etmv4_drvdata *drvdata) drvdata->trcid = coresight_get_trace_id(drvdata->cpu); } -static int etm4_cpu_save(struct etmv4_drvdata *drvdata) +static int __etm4_cpu_save(struct etmv4_drvdata *drvdata) { int i, ret = 0; struct etmv4_save_state *state; @@ -1667,7 +1699,23 @@ out: return ret; } -static void etm4_cpu_restore(struct etmv4_drvdata *drvdata) +static int etm4_cpu_save(struct etmv4_drvdata *drvdata) +{ + int ret = 0; + + /* Save the TRFCR irrespective of whether the ETM is ON */ + if (drvdata->trfcr) + drvdata->save_trfcr = read_trfcr(); + /* + * Save and restore the ETM Trace registers only if + * the ETM is active. + */ + if (local_read(&drvdata->mode) && drvdata->save_state) + ret = __etm4_cpu_save(drvdata); + return ret; +} + +static void __etm4_cpu_restore(struct etmv4_drvdata *drvdata) { int i; struct etmv4_save_state *state = drvdata->save_state; @@ -1763,6 +1811,14 @@ static void etm4_cpu_restore(struct etmv4_drvdata *drvdata) etm4_cs_lock(drvdata, csa); } +static void etm4_cpu_restore(struct etmv4_drvdata *drvdata) +{ + if (drvdata->trfcr) + write_trfcr(drvdata->save_trfcr); + if (drvdata->state_needs_restore) + __etm4_cpu_restore(drvdata); +} + static int etm4_cpu_pm_notify(struct notifier_block *nb, unsigned long cmd, void *v) { @@ -1774,23 +1830,17 @@ static int etm4_cpu_pm_notify(struct notifier_block *nb, unsigned long cmd, drvdata = etmdrvdata[cpu]; - if (!drvdata->save_state) - return NOTIFY_OK; - if (WARN_ON_ONCE(drvdata->cpu != cpu)) return NOTIFY_BAD; switch (cmd) { case CPU_PM_ENTER: - /* save the state if self-hosted coresight is in use */ - if (local_read(&drvdata->mode)) - if (etm4_cpu_save(drvdata)) - return NOTIFY_BAD; + if (etm4_cpu_save(drvdata)) + return NOTIFY_BAD; break; case CPU_PM_EXIT: case CPU_PM_ENTER_FAILED: - if (drvdata->state_needs_restore) - etm4_cpu_restore(drvdata); + etm4_cpu_restore(drvdata); break; default: return NOTIFY_DONE; diff --git a/drivers/hwtracing/coresight/coresight-etm4x.h b/drivers/hwtracing/coresight/coresight-etm4x.h index e5b79bdb9851c7b49722ca7f01013f8182e8c786..3c4d69b096ca5296af04d8b611762def0f69b804 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x.h +++ b/drivers/hwtracing/coresight/coresight-etm4x.h @@ -919,8 +919,12 @@ struct etmv4_save_state { * @nooverflow: Indicate if overflow prevention is supported. * @atbtrig: If the implementation can support ATB triggers * @lpoverride: If the implementation can support low-power state over. - * @trfc: If the implementation supports Arm v8.4 trace filter controls. + * @trfcr: If the CPU supports FEAT_TRF, value of the TRFCR_ELx that + * allows tracing at all ELs. We don't want to compute this + * at runtime, due to the additional setting of TRFCR_CX when + * in EL2. Otherwise, 0. * @config: structure holding configuration parameters. + * @save_trfcr: Saved TRFCR_EL1 register during a CPU PM event. * @save_state: State to be preserved across power loss * @state_needs_restore: True when there is context to restore after PM exit * @skip_power_up: Indicates if an implementation can skip powering up @@ -971,8 +975,9 @@ struct etmv4_drvdata { bool nooverflow; bool atbtrig; bool lpoverride; - bool trfc; + u64 trfcr; struct etmv4_config config; + u64 save_trfcr; struct etmv4_save_state *save_state; bool state_needs_restore; bool skip_power_up; diff --git a/drivers/hwtracing/coresight/coresight-self-hosted-trace.h b/drivers/hwtracing/coresight/coresight-self-hosted-trace.h new file mode 100644 index 0000000000000000000000000000000000000000..23f05df3f1730716cdf39afa484e880f238e6c2e --- /dev/null +++ b/drivers/hwtracing/coresight/coresight-self-hosted-trace.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Arm v8 Self-Hosted trace support. + * + * Copyright (C) 2021 ARM Ltd. + */ + +#ifndef __CORESIGHT_SELF_HOSTED_TRACE_H +#define __CORESIGHT_SELF_HOSTED_TRACE_H + +#include + +static inline u64 read_trfcr(void) +{ + return read_sysreg_s(SYS_TRFCR_EL1); +} + +static inline void write_trfcr(u64 val) +{ + write_sysreg_s(val, SYS_TRFCR_EL1); + isb(); +} + +static inline void cpu_prohibit_trace(void) +{ + u64 trfcr = read_trfcr(); + + /* Prohibit tracing at EL0 & the kernel EL */ + write_trfcr(trfcr & ~(TRFCR_ELx_ExTRE | TRFCR_ELx_E0TRE)); +} +#endif /* __CORESIGHT_SELF_HOSTED_TRACE_H */ diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c index ea5027e397d0226da19dc444d78fd0b078439cae..ec092b605c6314472e9d4ca29a4802675afd6274 100644 --- a/drivers/hwtracing/coresight/coresight-tmc-etr.c +++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c @@ -609,8 +609,9 @@ static int tmc_etr_alloc_flat_buf(struct tmc_drvdata *drvdata, if (!flat_buf) return -ENOMEM; - flat_buf->vaddr = dma_alloc_coherent(real_dev, etr_buf->size, - &flat_buf->daddr, GFP_KERNEL); + flat_buf->vaddr = dma_alloc_noncoherent(real_dev, etr_buf->size, + &flat_buf->daddr, + DMA_FROM_DEVICE, GFP_KERNEL); if (!flat_buf->vaddr) { kfree(flat_buf); return -ENOMEM; @@ -631,14 +632,18 @@ static void tmc_etr_free_flat_buf(struct etr_buf *etr_buf) if (flat_buf && flat_buf->daddr) { struct device *real_dev = flat_buf->dev->parent; - dma_free_coherent(real_dev, flat_buf->size, - flat_buf->vaddr, flat_buf->daddr); + dma_free_noncoherent(real_dev, etr_buf->size, + flat_buf->vaddr, flat_buf->daddr, + DMA_FROM_DEVICE); } kfree(flat_buf); } static void tmc_etr_sync_flat_buf(struct etr_buf *etr_buf, u64 rrp, u64 rwp) { + struct etr_flat_buf *flat_buf = etr_buf->private; + struct device *real_dev = flat_buf->dev->parent; + /* * Adjust the buffer to point to the beginning of the trace data * and update the available trace data. @@ -648,6 +653,19 @@ static void tmc_etr_sync_flat_buf(struct etr_buf *etr_buf, u64 rrp, u64 rwp) etr_buf->len = etr_buf->size; else etr_buf->len = rwp - rrp; + + /* + * The driver always starts tracing at the beginning of the buffer, + * the only reason why we would get a wrap around is when the buffer + * is full. Sync the entire buffer in one go for this case. + */ + if (etr_buf->offset + etr_buf->len > etr_buf->size) + dma_sync_single_for_cpu(real_dev, flat_buf->daddr, + etr_buf->size, DMA_FROM_DEVICE); + else + dma_sync_single_for_cpu(real_dev, + flat_buf->daddr + etr_buf->offset, + etr_buf->len, DMA_FROM_DEVICE); } static ssize_t tmc_etr_get_data_flat_buf(struct etr_buf *etr_buf, @@ -1563,6 +1581,14 @@ tmc_update_etr_buffer(struct coresight_device *csdev, */ if (etr_perf->snapshot) handle->head += size; + + /* + * Ensure that the AUX trace data is visible before the aux_head + * is updated via perf_aux_output_end(), as expected by the + * perf ring buffer. + */ + smp_wmb(); + out: /* * Don't set the TRUNCATED flag in snapshot mode because 1) the diff --git a/drivers/hwtracing/coresight/coresight-trbe.c b/drivers/hwtracing/coresight/coresight-trbe.c index 176868496879748f94ef11488219ba95179ad78d..7dddb85b90591f521236933e2b32927ca841476c 100644 --- a/drivers/hwtracing/coresight/coresight-trbe.c +++ b/drivers/hwtracing/coresight/coresight-trbe.c @@ -366,7 +366,7 @@ static unsigned long __trbe_normal_offset(struct perf_output_handle *handle) static unsigned long trbe_normal_offset(struct perf_output_handle *handle) { - struct trbe_buf *buf = perf_get_aux(handle); + struct trbe_buf *buf = etm_perf_sink_config(handle); u64 limit = __trbe_normal_offset(handle); u64 head = PERF_IDX2OFF(handle->head, buf); @@ -869,6 +869,10 @@ static void arm_trbe_register_coresight_cpu(struct trbe_drvdata *drvdata, int cp if (WARN_ON(trbe_csdev)) return; + /* If the TRBE was not probed on the CPU, we shouldn't be here */ + if (WARN_ON(!cpudata->drvdata)) + return; + dev = &cpudata->drvdata->pdev->dev; desc.name = devm_kasprintf(dev, GFP_KERNEL, "trbe%d", cpu); if (!desc.name) @@ -950,7 +954,9 @@ static int arm_trbe_probe_coresight(struct trbe_drvdata *drvdata) return -ENOMEM; for_each_cpu(cpu, &drvdata->supported_cpus) { - smp_call_function_single(cpu, arm_trbe_probe_cpu, drvdata, 1); + /* If we fail to probe the CPU, let us defer it to hotplug callbacks */ + if (smp_call_function_single(cpu, arm_trbe_probe_cpu, drvdata, 1)) + continue; if (cpumask_test_cpu(cpu, &drvdata->supported_cpus)) arm_trbe_register_coresight_cpu(drvdata, cpu); if (cpumask_test_cpu(cpu, &drvdata->supported_cpus)) diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index 7e693dcbdd1961acf771fcc1b0f8199c8ff9ecb0..ea474b16e3aac3b1e935d306e2474ac090fbda4b 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -488,7 +488,7 @@ config I2C_BRCMSTB config I2C_CADENCE tristate "Cadence I2C Controller" - depends on ARCH_ZYNQ || ARM64 || XTENSA + depends on ARCH_ZYNQ || ARM64 || XTENSA || COMPILE_TEST help Say yes here to select Cadence I2C Host Controller. This controller is e.g. used by Xilinx Zynq. @@ -926,7 +926,7 @@ config I2C_QCOM_GENI config I2C_QUP tristate "Qualcomm QUP based I2C controller" - depends on ARCH_QCOM + depends on ARCH_QCOM || COMPILE_TEST help If you say yes to this option, support will be included for the built-in I2C interface on the Qualcomm SoCs. diff --git a/drivers/i2c/busses/i2c-bcm2835.c b/drivers/i2c/busses/i2c-bcm2835.c index 37443edbf75464083693878983790c93d3457709..ad3b124a2e3768c9eb132fd2208ce421ed1da631 100644 --- a/drivers/i2c/busses/i2c-bcm2835.c +++ b/drivers/i2c/busses/i2c-bcm2835.c @@ -23,6 +23,11 @@ #define BCM2835_I2C_FIFO 0x10 #define BCM2835_I2C_DIV 0x14 #define BCM2835_I2C_DEL 0x18 +/* + * 16-bit field for the number of SCL cycles to wait after rising SCL + * before deciding the slave is not responding. 0 disables the + * timeout detection. + */ #define BCM2835_I2C_CLKT 0x1c #define BCM2835_I2C_C_READ BIT(0) @@ -477,6 +482,12 @@ static int bcm2835_i2c_probe(struct platform_device *pdev) adap->dev.of_node = pdev->dev.of_node; adap->quirks = of_device_get_match_data(&pdev->dev); + /* + * Disable the hardware clock stretching timeout. SMBUS + * specifies a limit for how long the device can stretch the + * clock, but core I2C doesn't. + */ + bcm2835_i2c_writel(i2c_dev, BCM2835_I2C_CLKT, 0); bcm2835_i2c_writel(i2c_dev, BCM2835_I2C_C, 0); ret = i2c_add_adapter(adap); diff --git a/drivers/i2c/busses/i2c-brcmstb.c b/drivers/i2c/busses/i2c-brcmstb.c index ba766d24219ef0ea5cbfc3c433c1f4115b53ded5..44e2466f3c6744c21190f73475d8dcce106e9ddc 100644 --- a/drivers/i2c/busses/i2c-brcmstb.c +++ b/drivers/i2c/busses/i2c-brcmstb.c @@ -674,7 +674,7 @@ static int brcmstb_i2c_probe(struct platform_device *pdev) /* set the data in/out register size for compatible SoCs */ if (of_device_is_compatible(dev->device->of_node, - "brcmstb,brcmper-i2c")) + "brcm,brcmper-i2c")) dev->data_regsz = sizeof(u8); else dev->data_regsz = sizeof(u32); diff --git a/drivers/i2c/busses/i2c-cbus-gpio.c b/drivers/i2c/busses/i2c-cbus-gpio.c index 72df563477b1c3e3d99feea2126bdc03fd408cd2..f8639a4457d23ae55eece7874c10e3a05562b58d 100644 --- a/drivers/i2c/busses/i2c-cbus-gpio.c +++ b/drivers/i2c/busses/i2c-cbus-gpio.c @@ -195,8 +195,9 @@ static u32 cbus_i2c_func(struct i2c_adapter *adapter) } static const struct i2c_algorithm cbus_i2c_algo = { - .smbus_xfer = cbus_i2c_smbus_xfer, - .functionality = cbus_i2c_func, + .smbus_xfer = cbus_i2c_smbus_xfer, + .smbus_xfer_atomic = cbus_i2c_smbus_xfer, + .functionality = cbus_i2c_func, }; static int cbus_i2c_remove(struct platform_device *pdev) diff --git a/drivers/i2c/busses/i2c-designware-pcidrv.c b/drivers/i2c/busses/i2c-designware-pcidrv.c index 55c83a7a24f368fa969618acfe813b2fb9e486e3..56c87ade0e89d29d877b97b2063097585dd596f1 100644 --- a/drivers/i2c/busses/i2c-designware-pcidrv.c +++ b/drivers/i2c/busses/i2c-designware-pcidrv.c @@ -37,10 +37,10 @@ enum dw_pci_ctl_id_t { }; struct dw_scl_sda_cfg { - u32 ss_hcnt; - u32 fs_hcnt; - u32 ss_lcnt; - u32 fs_lcnt; + u16 ss_hcnt; + u16 fs_hcnt; + u16 ss_lcnt; + u16 fs_lcnt; u32 sda_hold; }; diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index eab6fd6b890ebe3e7750a2333f223c886e6d250f..5618c1ff34dc3a88c6e65fbdf179c435b7d9512f 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -797,6 +797,11 @@ static int i801_block_transaction(struct i801_priv *priv, int result = 0; unsigned char hostc; + if (read_write == I2C_SMBUS_READ && command == I2C_SMBUS_BLOCK_DATA) + data->block[0] = I2C_SMBUS_BLOCK_MAX; + else if (data->block[0] < 1 || data->block[0] > I2C_SMBUS_BLOCK_MAX) + return -EPROTO; + if (command == I2C_SMBUS_I2C_BLOCK_DATA) { if (read_write == I2C_SMBUS_WRITE) { /* set I2C_EN bit in configuration register */ @@ -810,16 +815,6 @@ static int i801_block_transaction(struct i801_priv *priv, } } - if (read_write == I2C_SMBUS_WRITE - || command == I2C_SMBUS_I2C_BLOCK_DATA) { - if (data->block[0] < 1) - data->block[0] = 1; - if (data->block[0] > I2C_SMBUS_BLOCK_MAX) - data->block[0] = I2C_SMBUS_BLOCK_MAX; - } else { - data->block[0] = 32; /* max for SMBus block reads */ - } - /* Experience has shown that the block buffer can only be used for SMBus (not I2C) block transactions, even though the datasheet doesn't mention this limitation. */ diff --git a/drivers/i2c/busses/i2c-mpc.c b/drivers/i2c/busses/i2c-mpc.c index af349661fd7694382a6d4221677715ade8e2b3d4..8de8296d258318437441ee7e8aa4529c15516c09 100644 --- a/drivers/i2c/busses/i2c-mpc.c +++ b/drivers/i2c/busses/i2c-mpc.c @@ -105,23 +105,30 @@ static irqreturn_t mpc_i2c_isr(int irq, void *dev_id) /* Sometimes 9th clock pulse isn't generated, and slave doesn't release * the bus, because it wants to send ACK. * Following sequence of enabling/disabling and sending start/stop generates - * the 9 pulses, so it's all OK. + * the 9 pulses, each with a START then ending with STOP, so it's all OK. */ static void mpc_i2c_fixup(struct mpc_i2c *i2c) { int k; - u32 delay_val = 1000000 / i2c->real_clk + 1; - - if (delay_val < 2) - delay_val = 2; + unsigned long flags; for (k = 9; k; k--) { writeccr(i2c, 0); - writeccr(i2c, CCR_MSTA | CCR_MTX | CCR_MEN); + writeb(0, i2c->base + MPC_I2C_SR); /* clear any status bits */ + writeccr(i2c, CCR_MEN | CCR_MSTA); /* START */ + readb(i2c->base + MPC_I2C_DR); /* init xfer */ + udelay(15); /* let it hit the bus */ + local_irq_save(flags); /* should not be delayed further */ + writeccr(i2c, CCR_MEN | CCR_MSTA | CCR_RSTA); /* delay SDA */ readb(i2c->base + MPC_I2C_DR); - writeccr(i2c, CCR_MEN); - udelay(delay_val << 1); + if (k != 1) + udelay(5); + local_irq_restore(flags); } + writeccr(i2c, CCR_MEN); /* Initiate STOP */ + readb(i2c->base + MPC_I2C_DR); + udelay(15); /* Let STOP propagate */ + writeccr(i2c, 0); } static int i2c_wait(struct mpc_i2c *i2c, unsigned timeout, int writing) diff --git a/drivers/i2c/busses/i2c-mt65xx.c b/drivers/i2c/busses/i2c-mt65xx.c index 1a5f1ccd1d2f73298a20c02b707749712fa3ba2c..265635db29aa59df334e6c26d4a990f997c29a98 100644 --- a/drivers/i2c/busses/i2c-mt65xx.c +++ b/drivers/i2c/busses/i2c-mt65xx.c @@ -41,6 +41,8 @@ #define I2C_HANDSHAKE_RST 0x0020 #define I2C_FIFO_ADDR_CLR 0x0001 #define I2C_DELAY_LEN 0x0002 +#define I2C_ST_START_CON 0x8001 +#define I2C_FS_START_CON 0x1800 #define I2C_TIME_CLR_VALUE 0x0000 #define I2C_TIME_DEFAULT_VALUE 0x0003 #define I2C_WRRD_TRANAC_VALUE 0x0002 @@ -193,7 +195,7 @@ static const u16 mt_i2c_regs_v2[] = { [OFFSET_CLOCK_DIV] = 0x48, [OFFSET_SOFTRESET] = 0x50, [OFFSET_SCL_MIS_COMP_POINT] = 0x90, - [OFFSET_DEBUGSTAT] = 0xe0, + [OFFSET_DEBUGSTAT] = 0xe4, [OFFSET_DEBUGCTRL] = 0xe8, [OFFSET_FIFO_STAT] = 0xf4, [OFFSET_FIFO_THRESH] = 0xf8, @@ -479,6 +481,7 @@ static void mtk_i2c_init_hw(struct mtk_i2c *i2c) { u16 control_reg; u16 intr_stat_reg; + u16 ext_conf_val; mtk_i2c_writew(i2c, I2C_CHN_CLR_FLAG, OFFSET_START); intr_stat_reg = mtk_i2c_readw(i2c, OFFSET_INTR_STAT); @@ -517,8 +520,13 @@ static void mtk_i2c_init_hw(struct mtk_i2c *i2c) if (i2c->dev_comp->ltiming_adjust) mtk_i2c_writew(i2c, i2c->ltiming_reg, OFFSET_LTIMING); + if (i2c->speed_hz <= I2C_MAX_STANDARD_MODE_FREQ) + ext_conf_val = I2C_ST_START_CON; + else + ext_conf_val = I2C_FS_START_CON; + if (i2c->dev_comp->timing_adjust) { - mtk_i2c_writew(i2c, i2c->ac_timing.ext, OFFSET_EXT_CONF); + ext_conf_val = i2c->ac_timing.ext; mtk_i2c_writew(i2c, i2c->ac_timing.inter_clk_div, OFFSET_CLOCK_DIV); mtk_i2c_writew(i2c, I2C_SCL_MIS_COMP_VALUE, @@ -543,6 +551,7 @@ static void mtk_i2c_init_hw(struct mtk_i2c *i2c) OFFSET_HS_STA_STO_AC_TIMING); } } + mtk_i2c_writew(i2c, ext_conf_val, OFFSET_EXT_CONF); /* If use i2c pin from PMIC mt6397 side, need set PATH_DIR first */ if (i2c->have_pmic) diff --git a/drivers/i2c/busses/i2c-qcom-cci.c b/drivers/i2c/busses/i2c-qcom-cci.c index 1c259b5188de8b2e2b7d6e202d088ee51c7f002c..09e599069a81d2133127c1ebba763bd84b174a4c 100644 --- a/drivers/i2c/busses/i2c-qcom-cci.c +++ b/drivers/i2c/busses/i2c-qcom-cci.c @@ -558,7 +558,7 @@ static int cci_probe(struct platform_device *pdev) cci->master[idx].adap.quirks = &cci->data->quirks; cci->master[idx].adap.algo = &cci_algo; cci->master[idx].adap.dev.parent = dev; - cci->master[idx].adap.dev.of_node = child; + cci->master[idx].adap.dev.of_node = of_node_get(child); cci->master[idx].master = idx; cci->master[idx].cci = cci; @@ -643,8 +643,10 @@ static int cci_probe(struct platform_device *pdev) continue; ret = i2c_add_adapter(&cci->master[i].adap); - if (ret < 0) + if (ret < 0) { + of_node_put(cci->master[i].adap.dev.of_node); goto error_i2c; + } } pm_runtime_set_autosuspend_delay(dev, MSEC_PER_SEC); @@ -655,9 +657,11 @@ static int cci_probe(struct platform_device *pdev) return 0; error_i2c: - for (; i >= 0; i--) { - if (cci->master[i].cci) + for (--i ; i >= 0; i--) { + if (cci->master[i].cci) { i2c_del_adapter(&cci->master[i].adap); + of_node_put(cci->master[i].adap.dev.of_node); + } } error: disable_irq(cci->irq); @@ -673,8 +677,10 @@ static int cci_remove(struct platform_device *pdev) int i; for (i = 0; i < cci->data->num_masters; i++) { - if (cci->master[i].cci) + if (cci->master[i].cci) { i2c_del_adapter(&cci->master[i].adap); + of_node_put(cci->master[i].adap.dev.of_node); + } cci_halt(cci, i); } diff --git a/drivers/i2c/busses/i2c-rk3x.c b/drivers/i2c/busses/i2c-rk3x.c index 819ab4ee517e13cf1367cbfb157a72add5129d6b..02ddb237f69afdfcc766f1703e9b81297882534a 100644 --- a/drivers/i2c/busses/i2c-rk3x.c +++ b/drivers/i2c/busses/i2c-rk3x.c @@ -423,8 +423,8 @@ static void rk3x_i2c_handle_read(struct rk3x_i2c *i2c, unsigned int ipd) if (!(ipd & REG_INT_MBRF)) return; - /* ack interrupt */ - i2c_writel(i2c, REG_INT_MBRF, REG_IPD); + /* ack interrupt (read also produces a spurious START flag, clear it too) */ + i2c_writel(i2c, REG_INT_MBRF | REG_INT_START, REG_IPD); /* Can only handle a maximum of 32 bytes at a time */ if (len > 32) diff --git a/drivers/i2c/busses/i2c-stm32f7.c b/drivers/i2c/busses/i2c-stm32f7.c index 1e800b65e20a0113e8b0e8d3d5d765b32a16347b..0f4c0282028b089316699846d3e115901e61d0ff 100644 --- a/drivers/i2c/busses/i2c-stm32f7.c +++ b/drivers/i2c/busses/i2c-stm32f7.c @@ -1472,6 +1472,7 @@ static irqreturn_t stm32f7_i2c_isr_event(int irq, void *data) { struct stm32f7_i2c_dev *i2c_dev = data; struct stm32f7_i2c_msg *f7_msg = &i2c_dev->f7_msg; + struct stm32_i2c_dma *dma = i2c_dev->dma; void __iomem *base = i2c_dev->base; u32 status, mask; int ret = IRQ_HANDLED; @@ -1497,6 +1498,10 @@ static irqreturn_t stm32f7_i2c_isr_event(int irq, void *data) dev_dbg(i2c_dev->dev, "<%s>: Receive NACK (addr %x)\n", __func__, f7_msg->addr); writel_relaxed(STM32F7_I2C_ICR_NACKCF, base + STM32F7_I2C_ICR); + if (i2c_dev->use_dma) { + stm32f7_i2c_disable_dma_req(i2c_dev); + dmaengine_terminate_all(dma->chan_using); + } f7_msg->result = -ENXIO; } @@ -1512,7 +1517,7 @@ static irqreturn_t stm32f7_i2c_isr_event(int irq, void *data) /* Clear STOP flag */ writel_relaxed(STM32F7_I2C_ICR_STOPCF, base + STM32F7_I2C_ICR); - if (i2c_dev->use_dma) { + if (i2c_dev->use_dma && !f7_msg->result) { ret = IRQ_WAKE_THREAD; } else { i2c_dev->master_mode = false; @@ -1525,7 +1530,7 @@ static irqreturn_t stm32f7_i2c_isr_event(int irq, void *data) if (f7_msg->stop) { mask = STM32F7_I2C_CR2_STOP; stm32f7_i2c_set_bits(base + STM32F7_I2C_CR2, mask); - } else if (i2c_dev->use_dma) { + } else if (i2c_dev->use_dma && !f7_msg->result) { ret = IRQ_WAKE_THREAD; } else if (f7_msg->smbus) { stm32f7_i2c_smbus_rep_start(i2c_dev); @@ -1665,12 +1670,23 @@ static int stm32f7_i2c_xfer(struct i2c_adapter *i2c_adap, time_left = wait_for_completion_timeout(&i2c_dev->complete, i2c_dev->adap.timeout); ret = f7_msg->result; + if (ret) { + /* + * It is possible that some unsent data have already been + * written into TXDR. To avoid sending old data in a + * further transfer, flush TXDR in case of any error + */ + writel_relaxed(STM32F7_I2C_ISR_TXE, + i2c_dev->base + STM32F7_I2C_ISR); + goto pm_free; + } if (!time_left) { dev_dbg(i2c_dev->dev, "Access to slave 0x%x timed out\n", i2c_dev->msg->addr); if (i2c_dev->use_dma) dmaengine_terminate_all(dma->chan_using); + stm32f7_i2c_wait_free_bus(i2c_dev); ret = -ETIMEDOUT; } @@ -1713,13 +1729,22 @@ static int stm32f7_i2c_smbus_xfer(struct i2c_adapter *adapter, u16 addr, timeout = wait_for_completion_timeout(&i2c_dev->complete, i2c_dev->adap.timeout); ret = f7_msg->result; - if (ret) + if (ret) { + /* + * It is possible that some unsent data have already been + * written into TXDR. To avoid sending old data in a + * further transfer, flush TXDR in case of any error + */ + writel_relaxed(STM32F7_I2C_ISR_TXE, + i2c_dev->base + STM32F7_I2C_ISR); goto pm_free; + } if (!timeout) { dev_dbg(dev, "Access to slave 0x%x timed out\n", f7_msg->addr); if (i2c_dev->use_dma) dmaengine_terminate_all(dma->chan_using); + stm32f7_i2c_wait_free_bus(i2c_dev); ret = -ETIMEDOUT; goto pm_free; } diff --git a/drivers/i2c/busses/i2c-xlr.c b/drivers/i2c/busses/i2c-xlr.c index 126d1393e548b40436bcbe1839375c4ebe6caa18..9ce20652d4942842ee9721c8989aa42db6b76374 100644 --- a/drivers/i2c/busses/i2c-xlr.c +++ b/drivers/i2c/busses/i2c-xlr.c @@ -431,11 +431,15 @@ static int xlr_i2c_probe(struct platform_device *pdev) i2c_set_adapdata(&priv->adap, priv); ret = i2c_add_numbered_adapter(&priv->adap); if (ret < 0) - return ret; + goto err_unprepare_clk; platform_set_drvdata(pdev, priv); dev_info(&priv->adap.dev, "Added I2C Bus.\n"); return 0; + +err_unprepare_clk: + clk_unprepare(clk); + return ret; } static int xlr_i2c_remove(struct platform_device *pdev) diff --git a/drivers/i2c/i2c-core-acpi.c b/drivers/i2c/i2c-core-acpi.c index 37c510d9347a7395aa5da26e4a384a7ed3f32714..4b136d87107430019aa8079fceb2c41d4bd87270 100644 --- a/drivers/i2c/i2c-core-acpi.c +++ b/drivers/i2c/i2c-core-acpi.c @@ -426,6 +426,7 @@ static int i2c_acpi_notify(struct notifier_block *nb, unsigned long value, break; i2c_acpi_register_device(adapter, adev, &info); + put_device(&adapter->dev); break; case ACPI_RECONFIG_DEVICE_REMOVE: if (!acpi_device_enumerated(adev)) diff --git a/drivers/i2c/i2c-dev.c b/drivers/i2c/i2c-dev.c index f358120d59b38e25ff19574dcae1a271798937f6..dafad891998ecf9072806000c22894aceae502a7 100644 --- a/drivers/i2c/i2c-dev.c +++ b/drivers/i2c/i2c-dev.c @@ -536,6 +536,9 @@ static long compat_i2cdev_ioctl(struct file *file, unsigned int cmd, unsigned lo sizeof(rdwr_arg))) return -EFAULT; + if (!rdwr_arg.msgs || rdwr_arg.nmsgs == 0) + return -EINVAL; + if (rdwr_arg.nmsgs > I2C_RDWR_IOCTL_MAX_MSGS) return -EINVAL; diff --git a/drivers/iio/accel/bmc150-accel-core.c b/drivers/iio/accel/bmc150-accel-core.c index 48435865fdaf34387b563b9ce025e00fddedcc0b..792526462f1c9c4275edda27526cc7eb555e97a0 100644 --- a/drivers/iio/accel/bmc150-accel-core.c +++ b/drivers/iio/accel/bmc150-accel-core.c @@ -1648,11 +1648,14 @@ int bmc150_accel_core_probe(struct device *dev, struct regmap *regmap, int irq, ret = iio_device_register(indio_dev); if (ret < 0) { dev_err(dev, "Unable to register iio device\n"); - goto err_trigger_unregister; + goto err_pm_cleanup; } return 0; +err_pm_cleanup: + pm_runtime_dont_use_autosuspend(dev); + pm_runtime_disable(dev); err_trigger_unregister: bmc150_accel_unregister_triggers(data, BMC150_ACCEL_TRIGGERS - 1); err_buffer_cleanup: diff --git a/drivers/iio/accel/kxcjk-1013.c b/drivers/iio/accel/kxcjk-1013.c index c99e90469a245cc3d8e65c347a33abc17a4aa6d3..89e0a89d95d6bdadc34dcda832345ae95e4c89d8 100644 --- a/drivers/iio/accel/kxcjk-1013.c +++ b/drivers/iio/accel/kxcjk-1013.c @@ -1429,14 +1429,16 @@ static int kxcjk1013_probe(struct i2c_client *client, ret = iio_device_register(indio_dev); if (ret < 0) { dev_err(&client->dev, "unable to register iio device\n"); - goto err_buffer_cleanup; + goto err_pm_cleanup; } return 0; +err_pm_cleanup: + pm_runtime_dont_use_autosuspend(&client->dev); + pm_runtime_disable(&client->dev); err_buffer_cleanup: - if (data->dready_trig) - iio_triggered_buffer_cleanup(indio_dev); + iio_triggered_buffer_cleanup(indio_dev); err_trigger_unregister: if (data->dready_trig) iio_trigger_unregister(data->dready_trig); @@ -1459,8 +1461,8 @@ static int kxcjk1013_remove(struct i2c_client *client) pm_runtime_set_suspended(&client->dev); pm_runtime_put_noidle(&client->dev); + iio_triggered_buffer_cleanup(indio_dev); if (data->dready_trig) { - iio_triggered_buffer_cleanup(indio_dev); iio_trigger_unregister(data->dready_trig); iio_trigger_unregister(data->motion_trig); } diff --git a/drivers/iio/accel/kxsd9.c b/drivers/iio/accel/kxsd9.c index 0e18b92e20992a69fca70ffe585236f78e6cff8c..a51568ba8b7d2be7b4657106590839b1d1ff842a 100644 --- a/drivers/iio/accel/kxsd9.c +++ b/drivers/iio/accel/kxsd9.c @@ -224,14 +224,14 @@ static irqreturn_t kxsd9_trigger_handler(int irq, void *p) hw_values.chan, sizeof(hw_values.chan)); if (ret) { - dev_err(st->dev, - "error reading data\n"); - return ret; + dev_err(st->dev, "error reading data: %d\n", ret); + goto out; } iio_push_to_buffers_with_timestamp(indio_dev, &hw_values, iio_get_time_ns(indio_dev)); +out: iio_trigger_notify_done(indio_dev->trig); return IRQ_HANDLED; diff --git a/drivers/iio/accel/mma8452.c b/drivers/iio/accel/mma8452.c index bf1d2c8afdbd7182e5b2337a3a24ad93efcd2443..a7208704d31c90d15b7e40bff45e43ab7dd783ec 100644 --- a/drivers/iio/accel/mma8452.c +++ b/drivers/iio/accel/mma8452.c @@ -1473,7 +1473,7 @@ static int mma8452_trigger_setup(struct iio_dev *indio_dev) if (ret) return ret; - indio_dev->trig = trig; + indio_dev->trig = iio_trigger_get(trig); return 0; } diff --git a/drivers/iio/accel/mma9551.c b/drivers/iio/accel/mma9551.c index 08a2303cc9df3c08022bb32730faa41c01f49816..26421e8e8263958b1585d0b8d176a56bca0c4951 100644 --- a/drivers/iio/accel/mma9551.c +++ b/drivers/iio/accel/mma9551.c @@ -495,11 +495,14 @@ static int mma9551_probe(struct i2c_client *client, ret = iio_device_register(indio_dev); if (ret < 0) { dev_err(&client->dev, "unable to register iio device\n"); - goto out_poweroff; + goto err_pm_cleanup; } return 0; +err_pm_cleanup: + pm_runtime_dont_use_autosuspend(&client->dev); + pm_runtime_disable(&client->dev); out_poweroff: mma9551_set_device_state(client, false); diff --git a/drivers/iio/accel/mma9553.c b/drivers/iio/accel/mma9553.c index c15908faa38167b0d6ff68d53ceca8de0ed2d403..a23a7685d1f93c893da306c14760eeea03966e94 100644 --- a/drivers/iio/accel/mma9553.c +++ b/drivers/iio/accel/mma9553.c @@ -1134,12 +1134,15 @@ static int mma9553_probe(struct i2c_client *client, ret = iio_device_register(indio_dev); if (ret < 0) { dev_err(&client->dev, "unable to register iio device\n"); - goto out_poweroff; + goto err_pm_cleanup; } dev_dbg(&indio_dev->dev, "Registered device %s\n", name); return 0; +err_pm_cleanup: + pm_runtime_dont_use_autosuspend(&client->dev); + pm_runtime_disable(&client->dev); out_poweroff: mma9551_set_device_state(client, false); return ret; diff --git a/drivers/iio/accel/st_accel_core.c b/drivers/iio/accel/st_accel_core.c index 43c50167d220cecb17ea201dd0295bd3a1973fdd..bde0ca3ef7a4c82c32e49ef4029161753d46b168 100644 --- a/drivers/iio/accel/st_accel_core.c +++ b/drivers/iio/accel/st_accel_core.c @@ -1255,13 +1255,9 @@ int st_accel_common_probe(struct iio_dev *indio_dev) indio_dev->modes = INDIO_DIRECT_MODE; indio_dev->info = &accel_info; - err = st_sensors_power_enable(indio_dev); - if (err) - return err; - err = st_sensors_verify_id(indio_dev); if (err < 0) - goto st_accel_power_off; + return err; adata->num_data_channels = ST_ACCEL_NUMBER_DATA_CHANNELS; indio_dev->num_channels = ST_SENSORS_NUMBER_ALL_CHANNELS; @@ -1270,10 +1266,8 @@ int st_accel_common_probe(struct iio_dev *indio_dev) channels = devm_kmemdup(&indio_dev->dev, adata->sensor_settings->ch, channels_size, GFP_KERNEL); - if (!channels) { - err = -ENOMEM; - goto st_accel_power_off; - } + if (!channels) + return -ENOMEM; if (apply_acpi_orientation(indio_dev, channels)) dev_warn(&indio_dev->dev, @@ -1288,11 +1282,11 @@ int st_accel_common_probe(struct iio_dev *indio_dev) err = st_sensors_init_sensor(indio_dev, pdata); if (err < 0) - goto st_accel_power_off; + return err; err = st_accel_allocate_ring(indio_dev); if (err < 0) - goto st_accel_power_off; + return err; if (adata->irq > 0) { err = st_sensors_allocate_trigger(indio_dev, @@ -1315,9 +1309,6 @@ st_accel_device_register_error: st_sensors_deallocate_trigger(indio_dev); st_accel_probe_trigger_error: st_accel_deallocate_ring(indio_dev); -st_accel_power_off: - st_sensors_power_disable(indio_dev); - return err; } EXPORT_SYMBOL(st_accel_common_probe); @@ -1326,8 +1317,6 @@ void st_accel_common_remove(struct iio_dev *indio_dev) { struct st_sensor_data *adata = iio_priv(indio_dev); - st_sensors_power_disable(indio_dev); - iio_device_unregister(indio_dev); if (adata->irq > 0) st_sensors_deallocate_trigger(indio_dev); diff --git a/drivers/iio/accel/st_accel_i2c.c b/drivers/iio/accel/st_accel_i2c.c index 360e16f2cadb9669d482d2c0c54ab384bf9ae848..02c823b93ecd498d3f6a17022735cf6466558bbb 100644 --- a/drivers/iio/accel/st_accel_i2c.c +++ b/drivers/iio/accel/st_accel_i2c.c @@ -174,16 +174,29 @@ static int st_accel_i2c_probe(struct i2c_client *client) if (ret < 0) return ret; + ret = st_sensors_power_enable(indio_dev); + if (ret) + return ret; + ret = st_accel_common_probe(indio_dev); if (ret < 0) - return ret; + goto st_accel_power_off; return 0; + +st_accel_power_off: + st_sensors_power_disable(indio_dev); + + return ret; } static int st_accel_i2c_remove(struct i2c_client *client) { - st_accel_common_remove(i2c_get_clientdata(client)); + struct iio_dev *indio_dev = i2c_get_clientdata(client); + + st_accel_common_remove(indio_dev); + + st_sensors_power_disable(indio_dev); return 0; } diff --git a/drivers/iio/accel/st_accel_spi.c b/drivers/iio/accel/st_accel_spi.c index 568ff1bae0eee679f761dcc3e5f1e802da9ef74d..386ae18d5f2692e74d20703bd447047101963527 100644 --- a/drivers/iio/accel/st_accel_spi.c +++ b/drivers/iio/accel/st_accel_spi.c @@ -123,16 +123,29 @@ static int st_accel_spi_probe(struct spi_device *spi) if (err < 0) return err; + err = st_sensors_power_enable(indio_dev); + if (err) + return err; + err = st_accel_common_probe(indio_dev); if (err < 0) - return err; + goto st_accel_power_off; return 0; + +st_accel_power_off: + st_sensors_power_disable(indio_dev); + + return err; } static int st_accel_spi_remove(struct spi_device *spi) { - st_accel_common_remove(spi_get_drvdata(spi)); + struct iio_dev *indio_dev = spi_get_drvdata(spi); + + st_accel_common_remove(indio_dev); + + st_sensors_power_disable(indio_dev); return 0; } diff --git a/drivers/iio/adc/ad7124.c b/drivers/iio/adc/ad7124.c index 9c2401c5848ece566c7bc8944d2699c7f4cd20fa..bd350099503765cc2213470bbb137057be211924 100644 --- a/drivers/iio/adc/ad7124.c +++ b/drivers/iio/adc/ad7124.c @@ -74,7 +74,7 @@ #define AD7124_CONFIG_REF_SEL(x) FIELD_PREP(AD7124_CONFIG_REF_SEL_MSK, x) #define AD7124_CONFIG_PGA_MSK GENMASK(2, 0) #define AD7124_CONFIG_PGA(x) FIELD_PREP(AD7124_CONFIG_PGA_MSK, x) -#define AD7124_CONFIG_IN_BUFF_MSK GENMASK(7, 6) +#define AD7124_CONFIG_IN_BUFF_MSK GENMASK(6, 5) #define AD7124_CONFIG_IN_BUFF(x) FIELD_PREP(AD7124_CONFIG_IN_BUFF_MSK, x) /* AD7124_FILTER_X */ diff --git a/drivers/iio/adc/ad7192.c b/drivers/iio/adc/ad7192.c index 1141cc13a1249829ee9e83fc9718ff5a8976f1df..1b8baba9d4d64f14c214732be2f39d46e56d5a05 100644 --- a/drivers/iio/adc/ad7192.c +++ b/drivers/iio/adc/ad7192.c @@ -293,6 +293,7 @@ static const struct ad_sigma_delta_info ad7192_sigma_delta_info = { .has_registers = true, .addr_shift = 3, .read_mask = BIT(6), + .irq_flags = IRQF_TRIGGER_FALLING, }; static const struct ad_sd_calib_data ad7192_calib_arr[8] = { diff --git a/drivers/iio/adc/ad7768-1.c b/drivers/iio/adc/ad7768-1.c index c7e15c45140ad5f6f72fc558ac3d0a25344d0002..4afa50e5c058a68b2294fd5c38b735d3b3dcca7d 100644 --- a/drivers/iio/adc/ad7768-1.c +++ b/drivers/iio/adc/ad7768-1.c @@ -470,8 +470,8 @@ static irqreturn_t ad7768_trigger_handler(int irq, void *p) iio_push_to_buffers_with_timestamp(indio_dev, &st->data.scan, iio_get_time_ns(indio_dev)); - iio_trigger_notify_done(indio_dev->trig); err_unlock: + iio_trigger_notify_done(indio_dev->trig); mutex_unlock(&st->lock); return IRQ_HANDLED; diff --git a/drivers/iio/adc/ad7780.c b/drivers/iio/adc/ad7780.c index 42e7e8e595d18b6f82ab544491719821eca60586..c70048bc791bd7d3ddc23c9e70bcfbe494072006 100644 --- a/drivers/iio/adc/ad7780.c +++ b/drivers/iio/adc/ad7780.c @@ -203,7 +203,7 @@ static const struct ad_sigma_delta_info ad7780_sigma_delta_info = { .set_mode = ad7780_set_mode, .postprocess_sample = ad7780_postprocess_sample, .has_registers = false, - .irq_flags = IRQF_TRIGGER_LOW, + .irq_flags = IRQF_TRIGGER_FALLING, }; #define _AD7780_CHANNEL(_bits, _wordsize, _mask_all) \ diff --git a/drivers/iio/adc/ad7793.c b/drivers/iio/adc/ad7793.c index 440ef4c7be074c1c4b9f1d680147fd47703dd944..7c9c95c252cf859ad4988061dc700d57c584ef76 100644 --- a/drivers/iio/adc/ad7793.c +++ b/drivers/iio/adc/ad7793.c @@ -206,7 +206,7 @@ static const struct ad_sigma_delta_info ad7793_sigma_delta_info = { .has_registers = true, .addr_shift = 3, .read_mask = BIT(6), - .irq_flags = IRQF_TRIGGER_LOW, + .irq_flags = IRQF_TRIGGER_FALLING, }; static const struct ad_sd_calib_data ad7793_calib_arr[6] = { diff --git a/drivers/iio/adc/aspeed_adc.c b/drivers/iio/adc/aspeed_adc.c index 19efaa41bc344b6def9d03bc59efbe62c6e64d82..34ec0c28b2dff6b145445e1aabf5fc89c20ac363 100644 --- a/drivers/iio/adc/aspeed_adc.c +++ b/drivers/iio/adc/aspeed_adc.c @@ -183,6 +183,7 @@ static int aspeed_adc_probe(struct platform_device *pdev) data = iio_priv(indio_dev); data->dev = &pdev->dev; + platform_set_drvdata(pdev, indio_dev); data->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(data->base)) diff --git a/drivers/iio/adc/at91-sama5d2_adc.c b/drivers/iio/adc/at91-sama5d2_adc.c index b8139c435a4b0dc5d16f88393400ede3a4012024..4ede7e766765e2f8a1cfa8af3efbcb67d0ca7646 100644 --- a/drivers/iio/adc/at91-sama5d2_adc.c +++ b/drivers/iio/adc/at91-sama5d2_adc.c @@ -1401,7 +1401,8 @@ static int at91_adc_read_info_raw(struct iio_dev *indio_dev, *val = st->conversion_value; ret = at91_adc_adjust_val_osr(st, val); if (chan->scan_type.sign == 's') - *val = sign_extend32(*val, 11); + *val = sign_extend32(*val, + chan->scan_type.realbits - 1); st->conversion_done = false; } diff --git a/drivers/iio/adc/axp20x_adc.c b/drivers/iio/adc/axp20x_adc.c index 3e0c0233b43156c899398c34b240001097d40058..df99f1365c398306524389f376002060247b6fb0 100644 --- a/drivers/iio/adc/axp20x_adc.c +++ b/drivers/iio/adc/axp20x_adc.c @@ -251,19 +251,8 @@ static int axp22x_adc_raw(struct iio_dev *indio_dev, struct iio_chan_spec const *chan, int *val) { struct axp20x_adc_iio *info = iio_priv(indio_dev); - int size; - /* - * N.B.: Unlike the Chinese datasheets tell, the charging current is - * stored on 12 bits, not 13 bits. Only discharging current is on 13 - * bits. - */ - if (chan->type == IIO_CURRENT && chan->channel == AXP22X_BATT_DISCHRG_I) - size = 13; - else - size = 12; - - *val = axp20x_read_variable_width(info->regmap, chan->address, size); + *val = axp20x_read_variable_width(info->regmap, chan->address, 12); if (*val < 0) return *val; @@ -386,9 +375,8 @@ static int axp22x_adc_scale(struct iio_chan_spec const *chan, int *val, return IIO_VAL_INT_PLUS_MICRO; case IIO_CURRENT: - *val = 0; - *val2 = 500000; - return IIO_VAL_INT_PLUS_MICRO; + *val = 1; + return IIO_VAL_INT; case IIO_TEMP: *val = 100; diff --git a/drivers/iio/adc/dln2-adc.c b/drivers/iio/adc/dln2-adc.c index 0d53ef18e0459ce204889a8dfb18e360e44b4854..58b3f6065db0d33e4a9323517d5742472a8da735 100644 --- a/drivers/iio/adc/dln2-adc.c +++ b/drivers/iio/adc/dln2-adc.c @@ -248,7 +248,6 @@ static int dln2_adc_set_chan_period(struct dln2_adc *dln2, static int dln2_adc_read(struct dln2_adc *dln2, unsigned int channel) { int ret, i; - struct iio_dev *indio_dev = platform_get_drvdata(dln2->pdev); u16 conflict; __le16 value; int olen = sizeof(value); @@ -257,13 +256,9 @@ static int dln2_adc_read(struct dln2_adc *dln2, unsigned int channel) .chan = channel, }; - ret = iio_device_claim_direct_mode(indio_dev); - if (ret < 0) - return ret; - ret = dln2_adc_set_chan_enabled(dln2, channel, true); if (ret < 0) - goto release_direct; + return ret; ret = dln2_adc_set_port_enabled(dln2, true, &conflict); if (ret < 0) { @@ -300,8 +295,6 @@ disable_port: dln2_adc_set_port_enabled(dln2, false, NULL); disable_chan: dln2_adc_set_chan_enabled(dln2, channel, false); -release_direct: - iio_device_release_direct_mode(indio_dev); return ret; } @@ -337,10 +330,16 @@ static int dln2_adc_read_raw(struct iio_dev *indio_dev, switch (mask) { case IIO_CHAN_INFO_RAW: + ret = iio_device_claim_direct_mode(indio_dev); + if (ret < 0) + return ret; + mutex_lock(&dln2->mutex); ret = dln2_adc_read(dln2, chan->channel); mutex_unlock(&dln2->mutex); + iio_device_release_direct_mode(indio_dev); + if (ret < 0) return ret; @@ -655,7 +654,11 @@ static int dln2_adc_probe(struct platform_device *pdev) return -ENOMEM; } iio_trigger_set_drvdata(dln2->trig, dln2); - devm_iio_trigger_register(dev, dln2->trig); + ret = devm_iio_trigger_register(dev, dln2->trig); + if (ret) { + dev_err(dev, "failed to register trigger: %d\n", ret); + return ret; + } iio_trigger_set_immutable(indio_dev, dln2->trig); ret = devm_iio_triggered_buffer_setup(dev, indio_dev, NULL, diff --git a/drivers/iio/adc/max1027.c b/drivers/iio/adc/max1027.c index ca1dff3924ff9c8d93802297f299c280a38d922c..a08efaaf1a814f4f7e797551eb5aa10a8fca0047 100644 --- a/drivers/iio/adc/max1027.c +++ b/drivers/iio/adc/max1027.c @@ -103,7 +103,7 @@ MODULE_DEVICE_TABLE(of, max1027_adc_dt_ids); .sign = 'u', \ .realbits = depth, \ .storagebits = 16, \ - .shift = 2, \ + .shift = (depth == 10) ? 2 : 0, \ .endianness = IIO_BE, \ }, \ } @@ -142,7 +142,6 @@ MODULE_DEVICE_TABLE(of, max1027_adc_dt_ids); MAX1027_V_CHAN(11, depth) #define MAX1X31_CHANNELS(depth) \ - MAX1X27_CHANNELS(depth), \ MAX1X29_CHANNELS(depth), \ MAX1027_V_CHAN(12, depth), \ MAX1027_V_CHAN(13, depth), \ diff --git a/drivers/iio/adc/men_z188_adc.c b/drivers/iio/adc/men_z188_adc.c index 42ea8bc7e78051c16bc8f1dcd7546393c99153d5..adc5ceaef8c93a373daef26dbaded4d4be1904ca 100644 --- a/drivers/iio/adc/men_z188_adc.c +++ b/drivers/iio/adc/men_z188_adc.c @@ -103,6 +103,7 @@ static int men_z188_probe(struct mcb_device *dev, struct z188_adc *adc; struct iio_dev *indio_dev; struct resource *mem; + int ret; indio_dev = devm_iio_device_alloc(&dev->dev, sizeof(struct z188_adc)); if (!indio_dev) @@ -128,8 +129,14 @@ static int men_z188_probe(struct mcb_device *dev, adc->mem = mem; mcb_set_drvdata(dev, indio_dev); - return iio_device_register(indio_dev); + ret = iio_device_register(indio_dev); + if (ret) + goto err_unmap; + + return 0; +err_unmap: + iounmap(adc->base); err: mcb_release_mem(mem); return -ENXIO; diff --git a/drivers/iio/adc/mt6577_auxadc.c b/drivers/iio/adc/mt6577_auxadc.c index 79c1dd68b9092c2449967f12837e0a3314114f70..d4fccd52ef08ba147a3fe3844ea2e31fa4f6fb0a 100644 --- a/drivers/iio/adc/mt6577_auxadc.c +++ b/drivers/iio/adc/mt6577_auxadc.c @@ -82,6 +82,10 @@ static const struct iio_chan_spec mt6577_auxadc_iio_channels[] = { MT6577_AUXADC_CHANNEL(15), }; +/* For Voltage calculation */ +#define VOLTAGE_FULL_RANGE 1500 /* VA voltage */ +#define AUXADC_PRECISE 4096 /* 12 bits */ + static int mt_auxadc_get_cali_data(int rawdata, bool enable_cali) { return rawdata; @@ -191,6 +195,10 @@ static int mt6577_auxadc_read_raw(struct iio_dev *indio_dev, } if (adc_dev->dev_comp->sample_data_cali) *val = mt_auxadc_get_cali_data(*val, true); + + /* Convert adc raw data to voltage: 0 - 1500 mV */ + *val = *val * VOLTAGE_FULL_RANGE / AUXADC_PRECISE; + return IIO_VAL_INT; default: diff --git a/drivers/iio/adc/stm32-adc.c b/drivers/iio/adc/stm32-adc.c index 16c02c30dec7558e6d57e1ad6e5962fe057c9bea..9939dee01743382d227c9360ad3b2453a1bc7755 100644 --- a/drivers/iio/adc/stm32-adc.c +++ b/drivers/iio/adc/stm32-adc.c @@ -979,6 +979,7 @@ static void stm32h7_adc_unprepare(struct iio_dev *indio_dev) { struct stm32_adc *adc = iio_priv(indio_dev); + stm32_adc_writel(adc, STM32H7_ADC_PCSEL, 0); stm32h7_adc_disable(indio_dev); stm32h7_adc_enter_pwr_down(adc); } diff --git a/drivers/iio/adc/ti-adc081c.c b/drivers/iio/adc/ti-adc081c.c index b64718daa2017c91707cc30763ba85d692616e68..c79cd88cd42314a9a4ed658c382524e2df57f849 100644 --- a/drivers/iio/adc/ti-adc081c.c +++ b/drivers/iio/adc/ti-adc081c.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -151,13 +152,16 @@ static int adc081c_probe(struct i2c_client *client, { struct iio_dev *iio; struct adc081c *adc; - struct adcxx1c_model *model; + const struct adcxx1c_model *model; int err; if (!i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_WORD_DATA)) return -EOPNOTSUPP; - model = &adcxx1c_models[id->driver_data]; + if (dev_fwnode(&client->dev)) + model = device_get_match_data(&client->dev); + else + model = &adcxx1c_models[id->driver_data]; iio = devm_iio_device_alloc(&client->dev, sizeof(*adc)); if (!iio) @@ -224,10 +228,17 @@ static const struct i2c_device_id adc081c_id[] = { }; MODULE_DEVICE_TABLE(i2c, adc081c_id); +static const struct acpi_device_id adc081c_acpi_match[] = { + /* Used on some AAEON boards */ + { "ADC081C", (kernel_ulong_t)&adcxx1c_models[ADC081C] }, + { } +}; +MODULE_DEVICE_TABLE(acpi, adc081c_acpi_match); + static const struct of_device_id adc081c_of_match[] = { - { .compatible = "ti,adc081c" }, - { .compatible = "ti,adc101c" }, - { .compatible = "ti,adc121c" }, + { .compatible = "ti,adc081c", .data = &adcxx1c_models[ADC081C] }, + { .compatible = "ti,adc101c", .data = &adcxx1c_models[ADC101C] }, + { .compatible = "ti,adc121c", .data = &adcxx1c_models[ADC121C] }, { } }; MODULE_DEVICE_TABLE(of, adc081c_of_match); @@ -236,6 +247,7 @@ static struct i2c_driver adc081c_driver = { .driver = { .name = "adc081c", .of_match_table = adc081c_of_match, + .acpi_match_table = adc081c_acpi_match, }, .probe = adc081c_probe, .remove = adc081c_remove, diff --git a/drivers/iio/adc/ti-adc128s052.c b/drivers/iio/adc/ti-adc128s052.c index 3143f35a6509aa5b92b6aece0fa1de3ab71118d7..83c1ae07b3e9a1c7db56921f14c91c0f2c40c9d8 100644 --- a/drivers/iio/adc/ti-adc128s052.c +++ b/drivers/iio/adc/ti-adc128s052.c @@ -171,7 +171,13 @@ static int adc128_probe(struct spi_device *spi) mutex_init(&adc->lock); ret = iio_device_register(indio_dev); + if (ret) + goto err_disable_regulator; + return 0; + +err_disable_regulator: + regulator_disable(adc->reg); return ret; } diff --git a/drivers/iio/common/ssp_sensors/ssp_spi.c b/drivers/iio/common/ssp_sensors/ssp_spi.c index 4864c38b8d1c2d6f66c89fbe7a1407ebf9f4073f..769bd9280524a0d016eac827b200253844453c80 100644 --- a/drivers/iio/common/ssp_sensors/ssp_spi.c +++ b/drivers/iio/common/ssp_sensors/ssp_spi.c @@ -137,7 +137,7 @@ static int ssp_print_mcu_debug(char *data_frame, int *data_index, if (length > received_len - *data_index || length <= 0) { ssp_dbg("[SSP]: MSG From MCU-invalid debug length(%d/%d)\n", length, received_len); - return length ? length : -EPROTO; + return -EPROTO; } ssp_dbg("[SSP]: MSG From MCU - %s\n", &data_frame[*data_index]); @@ -273,6 +273,8 @@ static int ssp_parse_dataframe(struct ssp_data *data, char *dataframe, int len) for (idx = 0; idx < len;) { switch (dataframe[idx++]) { case SSP_MSG2AP_INST_BYPASS_DATA: + if (idx >= len) + return -EPROTO; sd = dataframe[idx++]; if (sd < 0 || sd >= SSP_SENSOR_MAX) { dev_err(SSP_DEV, @@ -282,10 +284,13 @@ static int ssp_parse_dataframe(struct ssp_data *data, char *dataframe, int len) if (indio_devs[sd]) { spd = iio_priv(indio_devs[sd]); - if (spd->process_data) + if (spd->process_data) { + if (idx >= len) + return -EPROTO; spd->process_data(indio_devs[sd], &dataframe[idx], data->timestamp); + } } else { dev_err(SSP_DEV, "no client for frame\n"); } @@ -293,6 +298,8 @@ static int ssp_parse_dataframe(struct ssp_data *data, char *dataframe, int len) idx += ssp_offset_map[sd]; break; case SSP_MSG2AP_INST_DEBUG_DATA: + if (idx >= len) + return -EPROTO; sd = ssp_print_mcu_debug(dataframe, &idx, len); if (sd) { dev_err(SSP_DEV, diff --git a/drivers/iio/dac/ad5446.c b/drivers/iio/dac/ad5446.c index d87e21016863c5b2e2a31df75e4dff8edd8a6109..e86886ca5ee41ff26e6823011a52532466ad2fe4 100644 --- a/drivers/iio/dac/ad5446.c +++ b/drivers/iio/dac/ad5446.c @@ -531,8 +531,15 @@ static int ad5622_write(struct ad5446_state *st, unsigned val) { struct i2c_client *client = to_i2c_client(st->dev); __be16 data = cpu_to_be16(val); + int ret; + + ret = i2c_master_send(client, (char *)&data, sizeof(data)); + if (ret < 0) + return ret; + if (ret != sizeof(data)) + return -EIO; - return i2c_master_send(client, (char *)&data, sizeof(data)); + return 0; } /* diff --git a/drivers/iio/dac/ad5770r.c b/drivers/iio/dac/ad5770r.c index 42decba1463cc0c801cc7df2db2959724f04b639..56d8bd2dd92fd74b5ac944a9274caaee1e2b5218 100644 --- a/drivers/iio/dac/ad5770r.c +++ b/drivers/iio/dac/ad5770r.c @@ -522,7 +522,7 @@ static int ad5770r_channel_config(struct ad5770r_state *st) return -EINVAL; device_for_each_child_node(&st->spi->dev, child) { - ret = fwnode_property_read_u32(child, "num", &num); + ret = fwnode_property_read_u32(child, "reg", &num); if (ret) goto err_child_out; if (num >= AD5770R_MAX_CHANNELS) { diff --git a/drivers/iio/dac/ti-dac5571.c b/drivers/iio/dac/ti-dac5571.c index d3295767a079c83d210143c4c92252f41650a0f9..c0714cb1e164a7128183a252180c3b9baaccc289 100644 --- a/drivers/iio/dac/ti-dac5571.c +++ b/drivers/iio/dac/ti-dac5571.c @@ -350,6 +350,7 @@ static int dac5571_probe(struct i2c_client *client, data->dac5571_pwrdwn = dac5571_pwrdwn_quad; break; default: + ret = -EINVAL; goto err; } diff --git a/drivers/iio/gyro/adxrs290.c b/drivers/iio/gyro/adxrs290.c index ca6fc234076ee8e1198f65c2aabd3b3cfbe49c85..c7f9963391558191f63b9c0a1df6a8d9948e65a8 100644 --- a/drivers/iio/gyro/adxrs290.c +++ b/drivers/iio/gyro/adxrs290.c @@ -7,6 +7,7 @@ */ #include +#include #include #include #include @@ -124,7 +125,7 @@ static int adxrs290_get_rate_data(struct iio_dev *indio_dev, const u8 cmd, int * goto err_unlock; } - *val = temp; + *val = sign_extend32(temp, 15); err_unlock: mutex_unlock(&st->lock); @@ -146,7 +147,7 @@ static int adxrs290_get_temp_data(struct iio_dev *indio_dev, int *val) } /* extract lower 12 bits temperature reading */ - *val = temp & 0x0FFF; + *val = sign_extend32(temp, 11); err_unlock: mutex_unlock(&st->lock); diff --git a/drivers/iio/gyro/bmg160_core.c b/drivers/iio/gyro/bmg160_core.c index 39fe0b1785920c5150005a7af38a702087cdc564..b6b90eebec0b99b290490ae5b7f46c713f06fac9 100644 --- a/drivers/iio/gyro/bmg160_core.c +++ b/drivers/iio/gyro/bmg160_core.c @@ -1170,11 +1170,14 @@ int bmg160_core_probe(struct device *dev, struct regmap *regmap, int irq, ret = iio_device_register(indio_dev); if (ret < 0) { dev_err(dev, "unable to register iio device\n"); - goto err_buffer_cleanup; + goto err_pm_cleanup; } return 0; +err_pm_cleanup: + pm_runtime_dont_use_autosuspend(dev); + pm_runtime_disable(dev); err_buffer_cleanup: iio_triggered_buffer_cleanup(indio_dev); err_trigger_unregister: diff --git a/drivers/iio/gyro/itg3200_buffer.c b/drivers/iio/gyro/itg3200_buffer.c index 1c3c1bd53374a4b45aa1917319e1182d5ef4d636..98b3f021f0bec1a85b0471f691a3d71cf5d9126d 100644 --- a/drivers/iio/gyro/itg3200_buffer.c +++ b/drivers/iio/gyro/itg3200_buffer.c @@ -61,9 +61,9 @@ static irqreturn_t itg3200_trigger_handler(int irq, void *p) iio_push_to_buffers_with_timestamp(indio_dev, &scan, pf->timestamp); +error_ret: iio_trigger_notify_done(indio_dev->trig); -error_ret: return IRQ_HANDLED; } diff --git a/drivers/iio/gyro/st_gyro_core.c b/drivers/iio/gyro/st_gyro_core.c index c8aa051995d3bf6d4a8fb4301f6ae09ee4d4538b..8c87f85f20bd1f2187f67a1e4641de14f87bc9d9 100644 --- a/drivers/iio/gyro/st_gyro_core.c +++ b/drivers/iio/gyro/st_gyro_core.c @@ -466,13 +466,9 @@ int st_gyro_common_probe(struct iio_dev *indio_dev) indio_dev->modes = INDIO_DIRECT_MODE; indio_dev->info = &gyro_info; - err = st_sensors_power_enable(indio_dev); - if (err) - return err; - err = st_sensors_verify_id(indio_dev); if (err < 0) - goto st_gyro_power_off; + return err; gdata->num_data_channels = ST_GYRO_NUMBER_DATA_CHANNELS; indio_dev->channels = gdata->sensor_settings->ch; @@ -485,11 +481,11 @@ int st_gyro_common_probe(struct iio_dev *indio_dev) err = st_sensors_init_sensor(indio_dev, pdata); if (err < 0) - goto st_gyro_power_off; + return err; err = st_gyro_allocate_ring(indio_dev); if (err < 0) - goto st_gyro_power_off; + return err; if (gdata->irq > 0) { err = st_sensors_allocate_trigger(indio_dev, @@ -512,9 +508,6 @@ st_gyro_device_register_error: st_sensors_deallocate_trigger(indio_dev); st_gyro_probe_trigger_error: st_gyro_deallocate_ring(indio_dev); -st_gyro_power_off: - st_sensors_power_disable(indio_dev); - return err; } EXPORT_SYMBOL(st_gyro_common_probe); @@ -523,8 +516,6 @@ void st_gyro_common_remove(struct iio_dev *indio_dev) { struct st_sensor_data *gdata = iio_priv(indio_dev); - st_sensors_power_disable(indio_dev); - iio_device_unregister(indio_dev); if (gdata->irq > 0) st_sensors_deallocate_trigger(indio_dev); diff --git a/drivers/iio/gyro/st_gyro_i2c.c b/drivers/iio/gyro/st_gyro_i2c.c index 8190966e6ff0aa77f24ef8ac1d5da2a0b24b1034..3ed577977946528b9abba233eed01c5d6f56ec36 100644 --- a/drivers/iio/gyro/st_gyro_i2c.c +++ b/drivers/iio/gyro/st_gyro_i2c.c @@ -86,16 +86,29 @@ static int st_gyro_i2c_probe(struct i2c_client *client, if (err < 0) return err; + err = st_sensors_power_enable(indio_dev); + if (err) + return err; + err = st_gyro_common_probe(indio_dev); if (err < 0) - return err; + goto st_gyro_power_off; return 0; + +st_gyro_power_off: + st_sensors_power_disable(indio_dev); + + return err; } static int st_gyro_i2c_remove(struct i2c_client *client) { - st_gyro_common_remove(i2c_get_clientdata(client)); + struct iio_dev *indio_dev = i2c_get_clientdata(client); + + st_gyro_common_remove(indio_dev); + + st_sensors_power_disable(indio_dev); return 0; } diff --git a/drivers/iio/gyro/st_gyro_spi.c b/drivers/iio/gyro/st_gyro_spi.c index efb862763ca3d7b790de1b028e4b6243760ebd8f..c04bcf2518c1188ab89020022b90ad2c97004346 100644 --- a/drivers/iio/gyro/st_gyro_spi.c +++ b/drivers/iio/gyro/st_gyro_spi.c @@ -90,16 +90,29 @@ static int st_gyro_spi_probe(struct spi_device *spi) if (err < 0) return err; + err = st_sensors_power_enable(indio_dev); + if (err) + return err; + err = st_gyro_common_probe(indio_dev); if (err < 0) - return err; + goto st_gyro_power_off; return 0; + +st_gyro_power_off: + st_sensors_power_disable(indio_dev); + + return err; } static int st_gyro_spi_remove(struct spi_device *spi) { - st_gyro_common_remove(spi_get_drvdata(spi)); + struct iio_dev *indio_dev = spi_get_drvdata(spi); + + st_gyro_common_remove(indio_dev); + + st_sensors_power_disable(indio_dev); return 0; } diff --git a/drivers/iio/imu/adis.c b/drivers/iio/imu/adis.c index f8b7837d8b8f62a236f1e7d389a70fdad065af21..715eef81bc24807f1d53afa766e6d8fd1d63011e 100644 --- a/drivers/iio/imu/adis.c +++ b/drivers/iio/imu/adis.c @@ -434,6 +434,8 @@ int __adis_initial_startup(struct adis *adis) if (ret) return ret; + adis_enable_irq(adis, false); + if (!adis->data->prod_id_reg) return 0; @@ -530,7 +532,7 @@ int adis_init(struct adis *adis, struct iio_dev *indio_dev, adis->current_page = 0; } - return adis_enable_irq(adis, false); + return 0; } EXPORT_SYMBOL_GPL(adis_init); diff --git a/drivers/iio/imu/kmx61.c b/drivers/iio/imu/kmx61.c index 61885e99d3fc14db2655e7ac955b1adb4dd7a618..89133315e6aaf07bf509916a857233e04275acc9 100644 --- a/drivers/iio/imu/kmx61.c +++ b/drivers/iio/imu/kmx61.c @@ -1392,7 +1392,7 @@ static int kmx61_probe(struct i2c_client *client, ret = iio_device_register(data->acc_indio_dev); if (ret < 0) { dev_err(&client->dev, "Failed to register acc iio device\n"); - goto err_buffer_cleanup_mag; + goto err_pm_cleanup; } ret = iio_device_register(data->mag_indio_dev); @@ -1405,6 +1405,9 @@ static int kmx61_probe(struct i2c_client *client, err_iio_unregister_acc: iio_device_unregister(data->acc_indio_dev); +err_pm_cleanup: + pm_runtime_dont_use_autosuspend(&client->dev); + pm_runtime_disable(&client->dev); err_buffer_cleanup_mag: if (client->irq > 0) iio_triggered_buffer_cleanup(data->mag_indio_dev); diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c index 2ab1ac5a2412f9223ceaee8e099ffa2c9d7750c7..2c528425b03b4c0583c156a12cab7745dbae4d72 100644 --- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c +++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c @@ -1465,6 +1465,8 @@ st_lsm6dsx_set_odr(struct st_lsm6dsx_sensor *sensor, u32 req_odr) int err; switch (sensor->id) { + case ST_LSM6DSX_ID_GYRO: + break; case ST_LSM6DSX_ID_EXT0: case ST_LSM6DSX_ID_EXT1: case ST_LSM6DSX_ID_EXT2: @@ -1490,8 +1492,8 @@ st_lsm6dsx_set_odr(struct st_lsm6dsx_sensor *sensor, u32 req_odr) } break; } - default: - break; + default: /* should never occur */ + return -EINVAL; } if (req_odr > 0) { @@ -1556,8 +1558,12 @@ static int st_lsm6dsx_read_oneshot(struct st_lsm6dsx_sensor *sensor, if (err < 0) return err; + /* + * we need to wait for sensor settling time before + * reading data in order to avoid corrupted samples + */ delay = 1000000000 / sensor->odr; - usleep_range(delay, 2 * delay); + usleep_range(3 * delay, 4 * delay); err = st_lsm6dsx_read_locked(hw, addr, &data, sizeof(data)); if (err < 0) diff --git a/drivers/iio/industrialio-trigger.c b/drivers/iio/industrialio-trigger.c index 583bb51f65a7525ce67f1b3b5f0c522bfbc54593..6bcc562d7857bba44eecc9636fedfe0f095cc60d 100644 --- a/drivers/iio/industrialio-trigger.c +++ b/drivers/iio/industrialio-trigger.c @@ -550,7 +550,6 @@ struct iio_trigger *viio_trigger_alloc(const char *fmt, va_list vargs) irq_modify_status(trig->subirq_base + i, IRQ_NOREQUEST | IRQ_NOAUTOEN, IRQ_NOPROBE); } - get_device(&trig->dev); return trig; diff --git a/drivers/iio/light/ltr501.c b/drivers/iio/light/ltr501.c index 74ed2d88a3ed39ff10a5e1f22770fc6f17181df1..379236a806faceffc03c874fe92662f781d02741 100644 --- a/drivers/iio/light/ltr501.c +++ b/drivers/iio/light/ltr501.c @@ -1273,7 +1273,7 @@ static irqreturn_t ltr501_trigger_handler(int irq, void *p) ret = regmap_bulk_read(data->regmap, LTR501_ALS_DATA1, als_buf, sizeof(als_buf)); if (ret < 0) - return ret; + goto done; if (test_bit(0, indio_dev->active_scan_mask)) scan.channels[j++] = le16_to_cpu(als_buf[1]); if (test_bit(1, indio_dev->active_scan_mask)) diff --git a/drivers/iio/light/opt3001.c b/drivers/iio/light/opt3001.c index 2d48d61909a4de45c1e14c3b6aef2fc2159b560e..ff776259734adfee5138daedd83b8d2cc4cc0df4 100644 --- a/drivers/iio/light/opt3001.c +++ b/drivers/iio/light/opt3001.c @@ -276,6 +276,8 @@ static int opt3001_get_lux(struct opt3001 *opt, int *val, int *val2) ret = wait_event_timeout(opt->result_ready_queue, opt->result_ready, msecs_to_jiffies(OPT3001_RESULT_READY_LONG)); + if (ret == 0) + return -ETIMEDOUT; } else { /* Sleep for result ready time */ timeout = (opt->int_time == OPT3001_INT_TIME_SHORT) ? @@ -312,9 +314,7 @@ err: /* Disallow IRQ to access the device while lock is active */ opt->ok_to_ignore_lock = false; - if (ret == 0) - return -ETIMEDOUT; - else if (ret < 0) + if (ret < 0) return ret; if (opt->use_irq) { diff --git a/drivers/iio/light/stk3310.c b/drivers/iio/light/stk3310.c index a2827d03ab0fb4282e4a4bae22aea10be39abd72..6654573a887c95a435ab13c2398eb2ab7f279c0a 100644 --- a/drivers/iio/light/stk3310.c +++ b/drivers/iio/light/stk3310.c @@ -546,9 +546,8 @@ static irqreturn_t stk3310_irq_event_handler(int irq, void *private) mutex_lock(&data->lock); ret = regmap_field_read(data->reg_flag_nf, &dir); if (ret < 0) { - dev_err(&data->client->dev, "register read failed\n"); - mutex_unlock(&data->lock); - return ret; + dev_err(&data->client->dev, "register read failed: %d\n", ret); + goto out; } event = IIO_UNMOD_EVENT_CODE(IIO_PROXIMITY, 1, IIO_EV_TYPE_THRESH, @@ -560,6 +559,7 @@ static irqreturn_t stk3310_irq_event_handler(int irq, void *private) ret = regmap_field_write(data->reg_flag_psint, 0); if (ret < 0) dev_err(&data->client->dev, "failed to reset interrupts\n"); +out: mutex_unlock(&data->lock); return IRQ_HANDLED; diff --git a/drivers/iio/magnetometer/bmc150_magn.c b/drivers/iio/magnetometer/bmc150_magn.c index 8eacfaf584cfd0690fb4020a6ff74cca8bb490eb..620537d0104d4b756fcaa17f8e1c93bb390ca444 100644 --- a/drivers/iio/magnetometer/bmc150_magn.c +++ b/drivers/iio/magnetometer/bmc150_magn.c @@ -941,13 +941,14 @@ int bmc150_magn_probe(struct device *dev, struct regmap *regmap, ret = iio_device_register(indio_dev); if (ret < 0) { dev_err(dev, "unable to register iio device\n"); - goto err_disable_runtime_pm; + goto err_pm_cleanup; } dev_dbg(dev, "Registered device %s\n", name); return 0; -err_disable_runtime_pm: +err_pm_cleanup: + pm_runtime_dont_use_autosuspend(dev); pm_runtime_disable(dev); err_buffer_cleanup: iio_triggered_buffer_cleanup(indio_dev); diff --git a/drivers/iio/magnetometer/st_magn_core.c b/drivers/iio/magnetometer/st_magn_core.c index 79de721e60159cdc6d7b1271e1abac2dd492364a..0fc38f17dbe049f5eb4ae8504b3ffea0440f097d 100644 --- a/drivers/iio/magnetometer/st_magn_core.c +++ b/drivers/iio/magnetometer/st_magn_core.c @@ -494,13 +494,9 @@ int st_magn_common_probe(struct iio_dev *indio_dev) indio_dev->modes = INDIO_DIRECT_MODE; indio_dev->info = &magn_info; - err = st_sensors_power_enable(indio_dev); - if (err) - return err; - err = st_sensors_verify_id(indio_dev); if (err < 0) - goto st_magn_power_off; + return err; mdata->num_data_channels = ST_MAGN_NUMBER_DATA_CHANNELS; indio_dev->channels = mdata->sensor_settings->ch; @@ -511,11 +507,11 @@ int st_magn_common_probe(struct iio_dev *indio_dev) err = st_sensors_init_sensor(indio_dev, NULL); if (err < 0) - goto st_magn_power_off; + return err; err = st_magn_allocate_ring(indio_dev); if (err < 0) - goto st_magn_power_off; + return err; if (mdata->irq > 0) { err = st_sensors_allocate_trigger(indio_dev, @@ -538,9 +534,6 @@ st_magn_device_register_error: st_sensors_deallocate_trigger(indio_dev); st_magn_probe_trigger_error: st_magn_deallocate_ring(indio_dev); -st_magn_power_off: - st_sensors_power_disable(indio_dev); - return err; } EXPORT_SYMBOL(st_magn_common_probe); @@ -549,8 +542,6 @@ void st_magn_common_remove(struct iio_dev *indio_dev) { struct st_sensor_data *mdata = iio_priv(indio_dev); - st_sensors_power_disable(indio_dev); - iio_device_unregister(indio_dev); if (mdata->irq > 0) st_sensors_deallocate_trigger(indio_dev); diff --git a/drivers/iio/magnetometer/st_magn_i2c.c b/drivers/iio/magnetometer/st_magn_i2c.c index c6bb4ce7759435be2af4454702259775d266a286..4b6a251dd44efd405c1ff854ec01edb1a1565505 100644 --- a/drivers/iio/magnetometer/st_magn_i2c.c +++ b/drivers/iio/magnetometer/st_magn_i2c.c @@ -78,18 +78,30 @@ static int st_magn_i2c_probe(struct i2c_client *client, if (err < 0) return err; + err = st_sensors_power_enable(indio_dev); + if (err) + return err; + err = st_magn_common_probe(indio_dev); if (err < 0) - return err; + goto st_magn_power_off; return 0; + +st_magn_power_off: + st_sensors_power_disable(indio_dev); + + return err; } static int st_magn_i2c_remove(struct i2c_client *client) { struct iio_dev *indio_dev = i2c_get_clientdata(client); + st_magn_common_remove(indio_dev); + st_sensors_power_disable(indio_dev); + return 0; } diff --git a/drivers/iio/magnetometer/st_magn_spi.c b/drivers/iio/magnetometer/st_magn_spi.c index 3d08d74c367da58d9db3fcebd90a101225a39a19..501eff32df783afd663dbba4b065c09c91536a59 100644 --- a/drivers/iio/magnetometer/st_magn_spi.c +++ b/drivers/iio/magnetometer/st_magn_spi.c @@ -72,18 +72,30 @@ static int st_magn_spi_probe(struct spi_device *spi) if (err < 0) return err; + err = st_sensors_power_enable(indio_dev); + if (err) + return err; + err = st_magn_common_probe(indio_dev); if (err < 0) - return err; + goto st_magn_power_off; return 0; + +st_magn_power_off: + st_sensors_power_disable(indio_dev); + + return err; } static int st_magn_spi_remove(struct spi_device *spi) { struct iio_dev *indio_dev = spi_get_drvdata(spi); + st_magn_common_remove(indio_dev); + st_sensors_power_disable(indio_dev); + return 0; } diff --git a/drivers/iio/pressure/st_pressure_core.c b/drivers/iio/pressure/st_pressure_core.c index 789a2928504a729c73dd1f2d26a06756909cd7ef..7912b5a6839555ed8e7bca80e7776bb126f46699 100644 --- a/drivers/iio/pressure/st_pressure_core.c +++ b/drivers/iio/pressure/st_pressure_core.c @@ -689,13 +689,9 @@ int st_press_common_probe(struct iio_dev *indio_dev) indio_dev->modes = INDIO_DIRECT_MODE; indio_dev->info = &press_info; - err = st_sensors_power_enable(indio_dev); - if (err) - return err; - err = st_sensors_verify_id(indio_dev); if (err < 0) - goto st_press_power_off; + return err; /* * Skip timestamping channel while declaring available channels to @@ -718,11 +714,11 @@ int st_press_common_probe(struct iio_dev *indio_dev) err = st_sensors_init_sensor(indio_dev, pdata); if (err < 0) - goto st_press_power_off; + return err; err = st_press_allocate_ring(indio_dev); if (err < 0) - goto st_press_power_off; + return err; if (press_data->irq > 0) { err = st_sensors_allocate_trigger(indio_dev, @@ -745,9 +741,6 @@ st_press_device_register_error: st_sensors_deallocate_trigger(indio_dev); st_press_probe_trigger_error: st_press_deallocate_ring(indio_dev); -st_press_power_off: - st_sensors_power_disable(indio_dev); - return err; } EXPORT_SYMBOL(st_press_common_probe); @@ -756,8 +749,6 @@ void st_press_common_remove(struct iio_dev *indio_dev) { struct st_sensor_data *press_data = iio_priv(indio_dev); - st_sensors_power_disable(indio_dev); - iio_device_unregister(indio_dev); if (press_data->irq > 0) st_sensors_deallocate_trigger(indio_dev); diff --git a/drivers/iio/pressure/st_pressure_i2c.c b/drivers/iio/pressure/st_pressure_i2c.c index 09c6903f99b872111296ff7f3c5bc88bc314149a..8c26ff61e56addfff6fa891ab8cce1a113d50a8e 100644 --- a/drivers/iio/pressure/st_pressure_i2c.c +++ b/drivers/iio/pressure/st_pressure_i2c.c @@ -98,16 +98,29 @@ static int st_press_i2c_probe(struct i2c_client *client, if (ret < 0) return ret; + ret = st_sensors_power_enable(indio_dev); + if (ret) + return ret; + ret = st_press_common_probe(indio_dev); if (ret < 0) - return ret; + goto st_press_power_off; return 0; + +st_press_power_off: + st_sensors_power_disable(indio_dev); + + return ret; } static int st_press_i2c_remove(struct i2c_client *client) { - st_press_common_remove(i2c_get_clientdata(client)); + struct iio_dev *indio_dev = i2c_get_clientdata(client); + + st_press_common_remove(indio_dev); + + st_sensors_power_disable(indio_dev); return 0; } diff --git a/drivers/iio/pressure/st_pressure_spi.c b/drivers/iio/pressure/st_pressure_spi.c index b5ee3ec2764ff0b4dd19ecb0ea2807d986ab63fa..8cf8cd3b4554a06e963b46be618ac74f34bdf743 100644 --- a/drivers/iio/pressure/st_pressure_spi.c +++ b/drivers/iio/pressure/st_pressure_spi.c @@ -82,16 +82,29 @@ static int st_press_spi_probe(struct spi_device *spi) if (err < 0) return err; + err = st_sensors_power_enable(indio_dev); + if (err) + return err; + err = st_press_common_probe(indio_dev); if (err < 0) - return err; + goto st_press_power_off; return 0; + +st_press_power_off: + st_sensors_power_disable(indio_dev); + + return err; } static int st_press_spi_remove(struct spi_device *spi) { - st_press_common_remove(spi_get_drvdata(spi)); + struct iio_dev *indio_dev = spi_get_drvdata(spi); + + st_press_common_remove(indio_dev); + + st_sensors_power_disable(indio_dev); return 0; } diff --git a/drivers/iio/trigger/stm32-timer-trigger.c b/drivers/iio/trigger/stm32-timer-trigger.c index 3aa9e8bba005fb00732ed65791fc45db49477b23..e38671b41df74834c81f7b397112a86c18fa31ea 100644 --- a/drivers/iio/trigger/stm32-timer-trigger.c +++ b/drivers/iio/trigger/stm32-timer-trigger.c @@ -912,6 +912,6 @@ static struct platform_driver stm32_timer_trigger_driver = { }; module_platform_driver(stm32_timer_trigger_driver); -MODULE_ALIAS("platform: stm32-timer-trigger"); +MODULE_ALIAS("platform:stm32-timer-trigger"); MODULE_DESCRIPTION("STMicroelectronics STM32 Timer Trigger driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 34b94e52539054be1b3f6b13654db3b935204f89..fbb0efbe25f84fee2f3e6defcb727fd48201c43e 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -68,8 +68,8 @@ static const char * const cma_events[] = { [RDMA_CM_EVENT_TIMEWAIT_EXIT] = "timewait exit", }; -static void cma_set_mgid(struct rdma_id_private *id_priv, struct sockaddr *addr, - union ib_gid *mgid); +static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid, + enum ib_gid_type gid_type); const char *__attribute_const__ rdma_event_msg(enum rdma_cm_event_type event) { @@ -775,6 +775,7 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv) unsigned int p; u16 pkey, index; enum ib_port_state port_state; + int ret; int i; cma_dev = NULL; @@ -793,9 +794,14 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv) if (ib_get_cached_port_state(cur_dev->device, p, &port_state)) continue; - for (i = 0; !rdma_query_gid(cur_dev->device, - p, i, &gid); - i++) { + + for (i = 0; i < cur_dev->device->port_data[p].immutable.gid_tbl_len; + ++i) { + ret = rdma_query_gid(cur_dev->device, p, i, + &gid); + if (ret) + continue; + if (!memcmp(&gid, dgid, sizeof(gid))) { cma_dev = cur_dev; sgid = gid; @@ -1750,15 +1756,16 @@ static void cma_cancel_route(struct rdma_id_private *id_priv) } } -static void cma_cancel_listens(struct rdma_id_private *id_priv) +static void _cma_cancel_listens(struct rdma_id_private *id_priv) { struct rdma_id_private *dev_id_priv; + lockdep_assert_held(&lock); + /* * Remove from listen_any_list to prevent added devices from spawning * additional listen requests. */ - mutex_lock(&lock); list_del(&id_priv->list); while (!list_empty(&id_priv->listen_list)) { @@ -1772,6 +1779,12 @@ static void cma_cancel_listens(struct rdma_id_private *id_priv) rdma_destroy_id(&dev_id_priv->id); mutex_lock(&lock); } +} + +static void cma_cancel_listens(struct rdma_id_private *id_priv) +{ + mutex_lock(&lock); + _cma_cancel_listens(id_priv); mutex_unlock(&lock); } @@ -1814,6 +1827,8 @@ static void cma_release_port(struct rdma_id_private *id_priv) static void destroy_mc(struct rdma_id_private *id_priv, struct cma_multicast *mc) { + bool send_only = mc->join_state == BIT(SENDONLY_FULLMEMBER_JOIN); + if (rdma_cap_ib_mcast(id_priv->id.device, id_priv->id.port_num)) ib_sa_free_multicast(mc->sa_mc); @@ -1825,14 +1840,19 @@ static void destroy_mc(struct rdma_id_private *id_priv, if (dev_addr->bound_dev_if) ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if); - if (ndev) { + if (ndev && !send_only) { + enum ib_gid_type gid_type; union ib_gid mgid; - cma_set_mgid(id_priv, (struct sockaddr *)&mc->addr, - &mgid); + gid_type = id_priv->cma_dev->default_gid_type + [id_priv->id.port_num - + rdma_start_port( + id_priv->cma_dev->device)]; + cma_iboe_set_mgid((struct sockaddr *)&mc->addr, &mgid, + gid_type); cma_igmp_send(ndev, &mgid, false); - dev_put(ndev); } + dev_put(ndev); cancel_work_sync(&mc->iboe_join.work); } @@ -2577,7 +2597,7 @@ static int cma_listen_on_all(struct rdma_id_private *id_priv) return 0; err_listen: - list_del(&id_priv->list); + _cma_cancel_listens(id_priv); mutex_unlock(&lock); if (to_destroy) rdma_destroy_id(&to_destroy->id); @@ -3301,22 +3321,30 @@ err: static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr, const struct sockaddr *dst_addr) { - if (!src_addr || !src_addr->sa_family) { - src_addr = (struct sockaddr *) &id->route.addr.src_addr; - src_addr->sa_family = dst_addr->sa_family; - if (IS_ENABLED(CONFIG_IPV6) && - dst_addr->sa_family == AF_INET6) { - struct sockaddr_in6 *src_addr6 = (struct sockaddr_in6 *) src_addr; - struct sockaddr_in6 *dst_addr6 = (struct sockaddr_in6 *) dst_addr; - src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id; - if (ipv6_addr_type(&dst_addr6->sin6_addr) & IPV6_ADDR_LINKLOCAL) - id->route.addr.dev_addr.bound_dev_if = dst_addr6->sin6_scope_id; - } else if (dst_addr->sa_family == AF_IB) { - ((struct sockaddr_ib *) src_addr)->sib_pkey = - ((struct sockaddr_ib *) dst_addr)->sib_pkey; - } + struct sockaddr_storage zero_sock = {}; + + if (src_addr && src_addr->sa_family) + return rdma_bind_addr(id, src_addr); + + /* + * When the src_addr is not specified, automatically supply an any addr + */ + zero_sock.ss_family = dst_addr->sa_family; + if (IS_ENABLED(CONFIG_IPV6) && dst_addr->sa_family == AF_INET6) { + struct sockaddr_in6 *src_addr6 = + (struct sockaddr_in6 *)&zero_sock; + struct sockaddr_in6 *dst_addr6 = + (struct sockaddr_in6 *)dst_addr; + + src_addr6->sin6_scope_id = dst_addr6->sin6_scope_id; + if (ipv6_addr_type(&dst_addr6->sin6_addr) & IPV6_ADDR_LINKLOCAL) + id->route.addr.dev_addr.bound_dev_if = + dst_addr6->sin6_scope_id; + } else if (dst_addr->sa_family == AF_IB) { + ((struct sockaddr_ib *)&zero_sock)->sib_pkey = + ((struct sockaddr_ib *)dst_addr)->sib_pkey; } - return rdma_bind_addr(id, src_addr); + return rdma_bind_addr(id, (struct sockaddr *)&zero_sock); } /* @@ -3732,9 +3760,13 @@ int rdma_listen(struct rdma_cm_id *id, int backlog) int ret; if (!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, RDMA_CM_LISTEN)) { + struct sockaddr_in any_in = { + .sin_family = AF_INET, + .sin_addr.s_addr = htonl(INADDR_ANY), + }; + /* For a well behaved ULP state will be RDMA_CM_IDLE */ - id->route.addr.src_addr.ss_family = AF_INET; - ret = rdma_bind_addr(id, cma_src_addr(id_priv)); + ret = rdma_bind_addr(id, (struct sockaddr *)&any_in); if (ret) return ret; if (WARN_ON(!cma_comp_exch(id_priv, RDMA_CM_ADDR_BOUND, diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index 76b9c436edcd2f1ffaf7175d09133a26b0b93141..aa526c5ca0cf3789892cf82310e9708d810c1898 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -2411,7 +2411,8 @@ int ib_find_gid(struct ib_device *device, union ib_gid *gid, ++i) { ret = rdma_query_gid(device, port, i, &tmp_gid); if (ret) - return ret; + continue; + if (!memcmp(&tmp_gid, gid, sizeof *gid)) { *port_num = port; if (index) diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 8c930bf1df894b711ea0d43739642f6d5d6f5278..de88f472eaad2219d75d3a0e4fa26f6250ead7de 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -760,8 +760,9 @@ static void ib_nl_set_path_rec_attrs(struct sk_buff *skb, /* Construct the family header first */ header = skb_put(skb, NLMSG_ALIGN(sizeof(*header))); - memcpy(header->device_name, dev_name(&query->port->agent->device->dev), - LS_DEVICE_NAME_MAX); + strscpy_pad(header->device_name, + dev_name(&query->port->agent->device->dev), + LS_DEVICE_NAME_MAX); header->port_num = query->port->port_num; if ((comp_mask & IB_SA_PATH_REC_REVERSIBLE) && diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 2cc785c1970b4cce98f669b10be85b9470290dba..d12018c4c86e968f01ee8240c7f870d8f5f40167 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -95,6 +95,7 @@ struct ucma_context { u64 uid; struct list_head list; + struct list_head mc_list; struct work_struct close_work; }; @@ -105,6 +106,7 @@ struct ucma_multicast { u64 uid; u8 join_state; + struct list_head list; struct sockaddr_storage addr; }; @@ -198,6 +200,7 @@ static struct ucma_context *ucma_alloc_ctx(struct ucma_file *file) INIT_WORK(&ctx->close_work, ucma_close_id); init_completion(&ctx->comp); + INIT_LIST_HEAD(&ctx->mc_list); /* So list_del() will work if we don't do ucma_finish_ctx() */ INIT_LIST_HEAD(&ctx->list); ctx->file = file; @@ -484,19 +487,19 @@ err1: static void ucma_cleanup_multicast(struct ucma_context *ctx) { - struct ucma_multicast *mc; - unsigned long index; + struct ucma_multicast *mc, *tmp; - xa_for_each(&multicast_table, index, mc) { - if (mc->ctx != ctx) - continue; + xa_lock(&multicast_table); + list_for_each_entry_safe(mc, tmp, &ctx->mc_list, list) { + list_del(&mc->list); /* * At this point mc->ctx->ref is 0 so the mc cannot leave the * lock on the reader and this is enough serialization */ - xa_erase(&multicast_table, index); + __xa_erase(&multicast_table, mc->id); kfree(mc); } + xa_unlock(&multicast_table); } static void ucma_cleanup_mc_events(struct ucma_multicast *mc) @@ -1469,12 +1472,16 @@ static ssize_t ucma_process_join(struct ucma_file *file, mc->uid = cmd->uid; memcpy(&mc->addr, addr, cmd->addr_size); - if (xa_alloc(&multicast_table, &mc->id, NULL, xa_limit_32b, + xa_lock(&multicast_table); + if (__xa_alloc(&multicast_table, &mc->id, NULL, xa_limit_32b, GFP_KERNEL)) { ret = -ENOMEM; goto err_free_mc; } + list_add_tail(&mc->list, &ctx->mc_list); + xa_unlock(&multicast_table); + mutex_lock(&ctx->mutex); ret = rdma_join_multicast(ctx->cm_id, (struct sockaddr *)&mc->addr, join_state, mc); @@ -1500,8 +1507,11 @@ err_leave_multicast: mutex_unlock(&ctx->mutex); ucma_cleanup_mc_events(mc); err_xa_erase: - xa_erase(&multicast_table, mc->id); + xa_lock(&multicast_table); + list_del(&mc->list); + __xa_erase(&multicast_table, mc->id); err_free_mc: + xa_unlock(&multicast_table); kfree(mc); err_put_ctx: ucma_put_ctx(ctx); @@ -1569,15 +1579,17 @@ static ssize_t ucma_leave_multicast(struct ucma_file *file, mc = ERR_PTR(-EINVAL); else if (!refcount_inc_not_zero(&mc->ctx->ref)) mc = ERR_PTR(-ENXIO); - else - __xa_erase(&multicast_table, mc->id); - xa_unlock(&multicast_table); if (IS_ERR(mc)) { + xa_unlock(&multicast_table); ret = PTR_ERR(mc); goto out; } + list_del(&mc->list); + __xa_erase(&multicast_table, mc->id); + xa_unlock(&multicast_table); + mutex_lock(&mc->ctx->mutex); rdma_leave_multicast(mc->ctx->cm_id, (struct sockaddr *) &mc->addr); mutex_unlock(&mc->ctx->mutex); diff --git a/drivers/infiniband/core/uverbs_marshall.c b/drivers/infiniband/core/uverbs_marshall.c index b8d715c68ca44e3c41bf87f1547384b40c82d4a4..11a0806469162640d32c094f55c1ae2d3913b20f 100644 --- a/drivers/infiniband/core/uverbs_marshall.c +++ b/drivers/infiniband/core/uverbs_marshall.c @@ -66,7 +66,7 @@ void ib_copy_ah_attr_to_user(struct ib_device *device, struct rdma_ah_attr *src = ah_attr; struct rdma_ah_attr conv_ah; - memset(&dst->grh.reserved, 0, sizeof(dst->grh.reserved)); + memset(&dst->grh, 0, sizeof(dst->grh)); if ((ah_attr->type == RDMA_AH_ATTR_TYPE_OPA) && (rdma_ah_get_dlid(ah_attr) > be16_to_cpu(IB_LID_PERMISSIVE)) && diff --git a/drivers/infiniband/core/uverbs_uapi.c b/drivers/infiniband/core/uverbs_uapi.c index 5addc8fae3f3bd9083d2e59fac0662f9f0296aeb..91dbcb3c252d67c8f8e6a8d4e2ddceffd26bfc9f 100644 --- a/drivers/infiniband/core/uverbs_uapi.c +++ b/drivers/infiniband/core/uverbs_uapi.c @@ -450,6 +450,9 @@ static int uapi_finalize(struct uverbs_api *uapi) uapi->num_write_ex = max_write_ex + 1; data = kmalloc_array(uapi->num_write + uapi->num_write_ex, sizeof(*uapi->write_methods), GFP_KERNEL); + if (!data) + return -ENOMEM; + for (i = 0; i != uapi->num_write + uapi->num_write_ex; i++) data[i] = &uapi->notsupp_method; uapi->write_methods = data; diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 441952a5eca4a9d1e8d2d91eecc53016ed71e1e7..10d77f50f818bac2b053a44e839b7e1b56ba232d 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -3368,8 +3368,11 @@ static void bnxt_re_process_res_ud_wc(struct bnxt_re_qp *qp, struct ib_wc *wc, struct bnxt_qplib_cqe *cqe) { + struct bnxt_re_dev *rdev; + u16 vlan_id = 0; u8 nw_type; + rdev = qp->rdev; wc->opcode = IB_WC_RECV; wc->status = __rc_to_ib_wc_status(cqe->status); @@ -3381,9 +3384,12 @@ static void bnxt_re_process_res_ud_wc(struct bnxt_re_qp *qp, memcpy(wc->smac, cqe->smac, ETH_ALEN); wc->wc_flags |= IB_WC_WITH_SMAC; if (cqe->flags & CQ_RES_UD_FLAGS_META_FORMAT_VLAN) { - wc->vlan_id = (cqe->cfa_meta & 0xFFF); - if (wc->vlan_id < 0x1000) - wc->wc_flags |= IB_WC_WITH_VLAN; + vlan_id = (cqe->cfa_meta & 0xFFF); + } + /* Mark only if vlan_id is non zero */ + if (vlan_id && bnxt_re_check_if_vlan_valid(rdev, vlan_id)) { + wc->vlan_id = vlan_id; + wc->wc_flags |= IB_WC_WITH_VLAN; } nw_type = (cqe->flags & CQ_RES_UD_FLAGS_ROCE_IP_VER_MASK) >> CQ_RES_UD_FLAGS_ROCE_IP_VER_SFT; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index d4d4959c2434ce47e2416c3eb859a6c3b867be16..bd153aa7e9ab3e4fbab961c8cfe4c74812ebb04e 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -707,12 +707,13 @@ int bnxt_qplib_query_srq(struct bnxt_qplib_res *res, int rc = 0; RCFW_CMD_PREP(req, QUERY_SRQ, cmd_flags); - req.srq_cid = cpu_to_le32(srq->id); /* Configure the request */ sbuf = bnxt_qplib_rcfw_alloc_sbuf(rcfw, sizeof(*sb)); if (!sbuf) return -ENOMEM; + req.resp_size = sizeof(*sb) / BNXT_QPLIB_CMDQE_UNITS; + req.srq_cid = cpu_to_le32(srq->id); sb = sbuf->sb; rc = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void *)&resp, (void *)sbuf, 0); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c index 441eb421e5e59a7d088611b52ad28c3bc44c8c94..5759027914b01a5badc44866f89ab0d43d51cfa4 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c @@ -614,8 +614,6 @@ int bnxt_qplib_alloc_rcfw_channel(struct bnxt_qplib_res *res, if (!cmdq->cmdq_bitmap) goto fail; - cmdq->bmap_size = bmap_size; - /* Allocate one extra to hold the QP1 entries */ rcfw->qp_tbl_size = qp_tbl_sz + 1; rcfw->qp_tbl = kcalloc(rcfw->qp_tbl_size, sizeof(struct bnxt_qplib_qp_node), @@ -663,8 +661,8 @@ void bnxt_qplib_disable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw) iounmap(cmdq->cmdq_mbox.reg.bar_reg); iounmap(creq->creq_db.reg.bar_reg); - indx = find_first_bit(cmdq->cmdq_bitmap, cmdq->bmap_size); - if (indx != cmdq->bmap_size) + indx = find_first_bit(cmdq->cmdq_bitmap, rcfw->cmdq_depth); + if (indx != rcfw->cmdq_depth) dev_err(&rcfw->pdev->dev, "disabling RCFW with pending cmd-bit %lx\n", indx); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h index 5f2f0a5a3560ff89902a04d3390f303c161e7e4a..6953f4e53dd2090e7b4e55e18b6572a8b9eb7971 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.h @@ -150,7 +150,6 @@ struct bnxt_qplib_cmdq_ctx { wait_queue_head_t waitq; unsigned long flags; unsigned long *cmdq_bitmap; - u32 bmap_size; u32 seq_num; }; diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index 861e19fdfeb469fff7d75143cfbc80738ae70ecf..12e5461581cb455be2d8f19c26e46568fcc6cd7f 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -2469,6 +2469,7 @@ int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, memset(attr, 0, sizeof(*attr)); memset(init_attr, 0, sizeof(*init_attr)); attr->qp_state = to_ib_qp_state(qhp->attr.state); + attr->cur_qp_state = to_ib_qp_state(qhp->attr.state); init_attr->cap.max_send_wr = qhp->attr.sq_num_entries; init_attr->cap.max_recv_wr = qhp->attr.rq_num_entries; init_attr->cap.max_send_sge = qhp->attr.sq_max_sges; diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index c87b94ea2939743cd4dad351b0c787699b4b7f89..88476a1a601a4f8e6e701ba59c8a316db92da148 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -8456,6 +8456,8 @@ static void receive_interrupt_common(struct hfi1_ctxtdata *rcd) */ static void __hfi1_rcd_eoi_intr(struct hfi1_ctxtdata *rcd) { + if (!rcd->rcvhdrq) + return; clear_recv_intr(rcd); if (check_packet_present(rcd)) force_recv_intr(rcd); diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index a40701a6e1b6485065514cf6ce9094b4513b46a6..808571855ed157fec8466bb3c5823d916a13ffae 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -1053,6 +1053,8 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread) struct hfi1_packet packet; int skip_pkt = 0; + if (!rcd->rcvhdrq) + return RCV_PKT_OK; /* Control context will always use the slow path interrupt handler */ needset = (rcd->ctxt == HFI1_CTRL_CTXT) ? 0 : 1; diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index b6e453e9ba23640e0f49bb3feacc87cee675d48e..fa2cd76747ff40e3c54e0645b4413be012f2a515 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -154,7 +154,6 @@ static int hfi1_create_kctxt(struct hfi1_devdata *dd, rcd->fast_handler = get_dma_rtail_setting(rcd) ? handle_receive_interrupt_dma_rtail : handle_receive_interrupt_nodma_rtail; - rcd->slow_handler = handle_receive_interrupt; hfi1_set_seq_cnt(rcd, 1); @@ -375,6 +374,8 @@ int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa, rcd->numa_id = numa; rcd->rcv_array_groups = dd->rcv_entries.ngroups; rcd->rhf_rcv_function_map = normal_rhf_rcv_functions; + rcd->slow_handler = handle_receive_interrupt; + rcd->do_interrupt = rcd->slow_handler; rcd->msix_intr = CCE_NUM_MSIX_VECTORS; mutex_init(&rcd->exp_mutex); @@ -915,18 +916,6 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit) if (ret) goto done; - /* allocate dummy tail memory for all receive contexts */ - dd->rcvhdrtail_dummy_kvaddr = dma_alloc_coherent(&dd->pcidev->dev, - sizeof(u64), - &dd->rcvhdrtail_dummy_dma, - GFP_KERNEL); - - if (!dd->rcvhdrtail_dummy_kvaddr) { - dd_dev_err(dd, "cannot allocate dummy tail memory\n"); - ret = -ENOMEM; - goto done; - } - /* dd->rcd can be NULL if early initialization failed */ for (i = 0; dd->rcd && i < dd->first_dyn_alloc_ctxt; ++i) { /* @@ -939,8 +928,6 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit) if (!rcd) continue; - rcd->do_interrupt = &handle_receive_interrupt; - lastfail = hfi1_create_rcvhdrq(dd, rcd); if (!lastfail) lastfail = hfi1_setup_eagerbufs(rcd); @@ -1161,7 +1148,7 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) rcd->egrbufs.rcvtids = NULL; for (e = 0; e < rcd->egrbufs.alloced; e++) { - if (rcd->egrbufs.buffers[e].dma) + if (rcd->egrbufs.buffers[e].addr) dma_free_coherent(&dd->pcidev->dev, rcd->egrbufs.buffers[e].len, rcd->egrbufs.buffers[e].addr, @@ -1242,6 +1229,11 @@ void hfi1_free_devdata(struct hfi1_devdata *dd) dd->tx_opstats = NULL; kfree(dd->comp_vect); dd->comp_vect = NULL; + if (dd->rcvhdrtail_dummy_kvaddr) + dma_free_coherent(&dd->pcidev->dev, sizeof(u64), + (void *)dd->rcvhdrtail_dummy_kvaddr, + dd->rcvhdrtail_dummy_dma); + dd->rcvhdrtail_dummy_kvaddr = NULL; sdma_clean(dd, dd->num_sdma); rvt_dealloc_device(&dd->verbs_dev.rdi); } @@ -1339,6 +1331,15 @@ static struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, goto bail; } + /* allocate dummy tail memory for all receive contexts */ + dd->rcvhdrtail_dummy_kvaddr = + dma_alloc_coherent(&dd->pcidev->dev, sizeof(u64), + &dd->rcvhdrtail_dummy_dma, GFP_KERNEL); + if (!dd->rcvhdrtail_dummy_kvaddr) { + ret = -ENOMEM; + goto bail; + } + atomic_set(&dd->ipoib_rsm_usr_num, 0); return dd; @@ -1546,13 +1547,6 @@ static void cleanup_device_data(struct hfi1_devdata *dd) free_credit_return(dd); - if (dd->rcvhdrtail_dummy_kvaddr) { - dma_free_coherent(&dd->pcidev->dev, sizeof(u64), - (void *)dd->rcvhdrtail_dummy_kvaddr, - dd->rcvhdrtail_dummy_dma); - dd->rcvhdrtail_dummy_kvaddr = NULL; - } - /* * Free any resources still in use (usually just kernel contexts) * at unload; we do for ctxtcnt, because that's what we allocate. diff --git a/drivers/infiniband/hw/hfi1/ipoib_main.c b/drivers/infiniband/hw/hfi1/ipoib_main.c index 9f71b9d706bd9efdf8b7b8c885788aabf155db0c..22299b0b7df0ed54c6d05e365e7d18c4c10a3fa1 100644 --- a/drivers/infiniband/hw/hfi1/ipoib_main.c +++ b/drivers/infiniband/hw/hfi1/ipoib_main.c @@ -185,12 +185,6 @@ static void hfi1_ipoib_netdev_dtor(struct net_device *dev) free_percpu(priv->netstats); } -static void hfi1_ipoib_free_rdma_netdev(struct net_device *dev) -{ - hfi1_ipoib_netdev_dtor(dev); - free_netdev(dev); -} - static void hfi1_ipoib_set_id(struct net_device *dev, int id) { struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev); @@ -227,24 +221,23 @@ static int hfi1_ipoib_setup_rn(struct ib_device *device, priv->port_num = port_num; priv->netdev_ops = netdev->netdev_ops; - netdev->netdev_ops = &hfi1_ipoib_netdev_ops; - ib_query_pkey(device, port_num, priv->pkey_index, &priv->pkey); rc = hfi1_ipoib_txreq_init(priv); if (rc) { dd_dev_err(dd, "IPoIB netdev TX init - failed(%d)\n", rc); - hfi1_ipoib_free_rdma_netdev(netdev); return rc; } rc = hfi1_ipoib_rxq_init(netdev); if (rc) { dd_dev_err(dd, "IPoIB netdev RX init - failed(%d)\n", rc); - hfi1_ipoib_free_rdma_netdev(netdev); + hfi1_ipoib_txreq_deinit(priv); return rc; } + netdev->netdev_ops = &hfi1_ipoib_netdev_ops; + netdev->priv_destructor = hfi1_ipoib_netdev_dtor; netdev->needs_free_netdev = true; diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index ff864f6f02667bf33e9b697c5d2c9d6298755d3d..1cd8f80f097a77408a97feed8d6858619ffa08fd 100644 --- a/drivers/infiniband/hw/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c @@ -920,6 +920,7 @@ void sc_disable(struct send_context *sc) { u64 reg; struct pio_buf *pbuf; + LIST_HEAD(wake_list); if (!sc) return; @@ -954,19 +955,21 @@ void sc_disable(struct send_context *sc) spin_unlock(&sc->release_lock); write_seqlock(&sc->waitlock); - while (!list_empty(&sc->piowait)) { + if (!list_empty(&sc->piowait)) + list_move(&sc->piowait, &wake_list); + write_sequnlock(&sc->waitlock); + while (!list_empty(&wake_list)) { struct iowait *wait; struct rvt_qp *qp; struct hfi1_qp_priv *priv; - wait = list_first_entry(&sc->piowait, struct iowait, list); + wait = list_first_entry(&wake_list, struct iowait, list); qp = iowait_to_qp(wait); priv = qp->priv; list_del_init(&priv->s_iowait.list); priv->s_iowait.lock = NULL; hfi1_qp_wakeup(qp, RVT_S_WAIT_PIO | HFI1_S_WAIT_PIO_DRAIN); } - write_sequnlock(&sc->waitlock); spin_unlock_irq(&sc->alloc_lock); } diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index ac6f87137b6374cc74fc197165e50a880ad901c1..0b73dc7847aae5be1a6001b83275e74e8e634fb5 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -880,8 +880,8 @@ struct sdma_engine *sdma_select_user_engine(struct hfi1_devdata *dd, if (current->nr_cpus_allowed != 1) goto out; - cpu_id = smp_processor_id(); rcu_read_lock(); + cpu_id = smp_processor_id(); rht_node = rhashtable_lookup(dd->sdma_rht, &cpu_id, sdma_rht_params); diff --git a/drivers/infiniband/hw/hns/hns_roce_alloc.c b/drivers/infiniband/hw/hns/hns_roce_alloc.c index a6b23dec1adcf677437bc9ef4b03a9c1689cee16..5b2baf89d1109fafc01d2b02ee09fc57c3d86ff6 100644 --- a/drivers/infiniband/hw/hns/hns_roce_alloc.c +++ b/drivers/infiniband/hw/hns/hns_roce_alloc.c @@ -240,7 +240,7 @@ int hns_roce_get_kmem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, end = start + buf_cnt; if (end > buf->npages) { dev_err(hr_dev->dev, - "Failed to check kmem bufs, end %d + %d total %d!\n", + "failed to check kmem bufs, end %d + %d total %u!\n", start, buf_cnt, buf->npages); return -EINVAL; } @@ -262,7 +262,7 @@ int hns_roce_get_umem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, u64 addr; if (page_shift < HNS_HW_PAGE_SHIFT) { - dev_err(hr_dev->dev, "Failed to check umem page shift %d!\n", + dev_err(hr_dev->dev, "failed to check umem page shift %u!\n", page_shift); return -EINVAL; } diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index da346129f6e9ee96cbc267ea23a716e5150402d0..8a6bded9c11cbb6be85a6e4075bd7db1c566266d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -50,29 +50,29 @@ static int alloc_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) ret = hns_roce_mtr_find(hr_dev, &hr_cq->mtr, 0, mtts, ARRAY_SIZE(mtts), &dma_handle); - if (ret < 1) { - ibdev_err(ibdev, "Failed to find CQ mtr\n"); + if (!ret) { + ibdev_err(ibdev, "failed to find CQ mtr, ret = %d.\n", ret); return -EINVAL; } cq_table = &hr_dev->cq_table; ret = hns_roce_bitmap_alloc(&cq_table->bitmap, &hr_cq->cqn); if (ret) { - ibdev_err(ibdev, "Failed to alloc CQ bitmap, err %d\n", ret); + ibdev_err(ibdev, "failed to alloc CQ bitmap, ret = %d.\n", ret); return ret; } /* Get CQC memory HEM(Hardware Entry Memory) table */ ret = hns_roce_table_get(hr_dev, &cq_table->table, hr_cq->cqn); if (ret) { - ibdev_err(ibdev, "Failed to get CQ(0x%lx) context, err %d\n", + ibdev_err(ibdev, "failed to get CQ(0x%lx) context, ret = %d.\n", hr_cq->cqn, ret); goto err_out; } ret = xa_err(xa_store(&cq_table->array, hr_cq->cqn, hr_cq, GFP_KERNEL)); if (ret) { - ibdev_err(ibdev, "Failed to xa_store CQ\n"); + ibdev_err(ibdev, "failed to xa_store CQ, ret = %d.\n", ret); goto err_put; } @@ -91,7 +91,7 @@ static int alloc_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) hns_roce_free_cmd_mailbox(hr_dev, mailbox); if (ret) { ibdev_err(ibdev, - "Failed to send create cmd for CQ(0x%lx), err %d\n", + "failed to send create cmd for CQ(0x%lx), ret = %d.\n", hr_cq->cqn, ret); goto err_xa; } @@ -147,7 +147,7 @@ static int alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, { struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_buf_attr buf_attr = {}; - int err; + int ret; buf_attr.page_shift = hr_dev->caps.cqe_buf_pg_sz + HNS_HW_PAGE_SHIFT; buf_attr.region[0].size = hr_cq->cq_depth * hr_cq->cqe_size; @@ -155,13 +155,13 @@ static int alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, buf_attr.region_count = 1; buf_attr.fixed_page = true; - err = hns_roce_mtr_create(hr_dev, &hr_cq->mtr, &buf_attr, + ret = hns_roce_mtr_create(hr_dev, &hr_cq->mtr, &buf_attr, hr_dev->caps.cqe_ba_pg_sz + HNS_HW_PAGE_SHIFT, udata, addr); - if (err) - ibdev_err(ibdev, "Failed to alloc CQ mtr, err %d\n", err); + if (ret) + ibdev_err(ibdev, "failed to alloc CQ mtr, ret = %d.\n", ret); - return err; + return ret; } static void free_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) @@ -252,13 +252,13 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, int ret; if (cq_entries < 1 || cq_entries > hr_dev->caps.max_cqes) { - ibdev_err(ibdev, "Failed to check CQ count %d max=%d\n", + ibdev_err(ibdev, "failed to check CQ count %u, max = %u.\n", cq_entries, hr_dev->caps.max_cqes); return -EINVAL; } if (vector >= hr_dev->caps.num_comp_vectors) { - ibdev_err(ibdev, "Failed to check CQ vector=%d max=%d\n", + ibdev_err(ibdev, "failed to check CQ vector = %d, max = %d.\n", vector, hr_dev->caps.num_comp_vectors); return -EINVAL; } @@ -276,7 +276,7 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, ret = ib_copy_from_udata(&ucmd, udata, min(udata->inlen, sizeof(ucmd))); if (ret) { - ibdev_err(ibdev, "Failed to copy CQ udata, err %d\n", + ibdev_err(ibdev, "failed to copy CQ udata, ret = %d.\n", ret); return ret; } @@ -286,19 +286,20 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, ret = alloc_cq_buf(hr_dev, hr_cq, udata, ucmd.buf_addr); if (ret) { - ibdev_err(ibdev, "Failed to alloc CQ buf, err %d\n", ret); + ibdev_err(ibdev, "failed to alloc CQ buf, ret = %d.\n", ret); return ret; } ret = alloc_cq_db(hr_dev, hr_cq, udata, ucmd.db_addr, &resp); if (ret) { - ibdev_err(ibdev, "Failed to alloc CQ db, err %d\n", ret); + ibdev_err(ibdev, "failed to alloc CQ db, ret = %d.\n", ret); goto err_cq_buf; } ret = alloc_cqc(hr_dev, hr_cq); if (ret) { - ibdev_err(ibdev, "Failed to alloc CQ context, err %d\n", ret); + ibdev_err(ibdev, + "failed to alloc CQ context, ret = %d.\n", ret); goto err_cq_db; } diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index 66f9f036ef946597d6459076bac41f86533037a0..c880a8be7e3cddc60b6707783eda6a7a650b3464 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -184,7 +184,7 @@ static int get_hem_table_config(struct hns_roce_dev *hr_dev, mhop->hop_num = hr_dev->caps.srqc_hop_num; break; default: - dev_err(dev, "Table %d not support multi-hop addressing!\n", + dev_err(dev, "table %u not support multi-hop addressing!\n", type); return -EINVAL; } @@ -232,8 +232,8 @@ int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev, mhop->l0_idx = table_idx; break; default: - dev_err(dev, "Table %d not support hop_num = %d!\n", - table->type, mhop->hop_num); + dev_err(dev, "table %u not support hop_num = %u!\n", + table->type, mhop->hop_num); return -EINVAL; } if (mhop->l0_idx >= mhop->ba_l0_num) @@ -438,13 +438,13 @@ static int calc_hem_config(struct hns_roce_dev *hr_dev, index->buf = l0_idx; break; default: - ibdev_err(ibdev, "Table %d not support mhop.hop_num = %d!\n", + ibdev_err(ibdev, "table %u not support mhop.hop_num = %u!\n", table->type, mhop->hop_num); return -EINVAL; } if (unlikely(index->buf >= table->num_hem)) { - ibdev_err(ibdev, "Table %d exceed hem limt idx %llu,max %lu!\n", + ibdev_err(ibdev, "table %u exceed hem limt idx %llu, max %lu!\n", table->type, index->buf, table->num_hem); return -EINVAL; } @@ -714,15 +714,15 @@ static void clear_mhop_hem(struct hns_roce_dev *hr_dev, step_idx = hop_num; if (hr_dev->hw->clear_hem(hr_dev, table, obj, step_idx)) - ibdev_warn(ibdev, "Clear hop%d HEM failed.\n", hop_num); + ibdev_warn(ibdev, "failed to clear hop%u HEM.\n", hop_num); if (index->inited & HEM_INDEX_L1) if (hr_dev->hw->clear_hem(hr_dev, table, obj, 1)) - ibdev_warn(ibdev, "Clear HEM step 1 failed.\n"); + ibdev_warn(ibdev, "failed to clear HEM step 1.\n"); if (index->inited & HEM_INDEX_L0) if (hr_dev->hw->clear_hem(hr_dev, table, obj, 0)) - ibdev_warn(ibdev, "Clear HEM step 0 failed.\n"); + ibdev_warn(ibdev, "failed to clear HEM step 0.\n"); } } @@ -1234,7 +1234,7 @@ static int hem_list_alloc_mid_bt(struct hns_roce_dev *hr_dev, } if (offset < r->offset) { - dev_err(hr_dev->dev, "invalid offset %d,min %d!\n", + dev_err(hr_dev->dev, "invalid offset %d, min %u!\n", offset, r->offset); return -EINVAL; } diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index ebcf26dec1e30fbca4ebd411ff368256c5f385d0..abe882ec1bae7ac20a70537c94e05f823a5d823e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -361,7 +362,7 @@ static int check_send_valid(struct hns_roce_dev *hr_dev, } else if (unlikely(hr_qp->state == IB_QPS_RESET || hr_qp->state == IB_QPS_INIT || hr_qp->state == IB_QPS_RTR)) { - ibdev_err(ibdev, "failed to post WQE, QP state %d!\n", + ibdev_err(ibdev, "failed to post WQE, QP state %hhu!\n", hr_qp->state); return -EINVAL; } else if (unlikely(hr_dev->state >= HNS_ROCE_DEVICE_STATE_RST_DOWN)) { @@ -665,7 +666,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, wqe_idx = (qp->sq.head + nreq) & (qp->sq.wqe_cnt - 1); if (unlikely(wr->num_sge > qp->sq.max_gs)) { - ibdev_err(ibdev, "num_sge=%d > qp->sq.max_gs=%d\n", + ibdev_err(ibdev, "num_sge = %d > qp->sq.max_gs = %u.\n", wr->num_sge, qp->sq.max_gs); ret = -EINVAL; *bad_wr = wr; @@ -750,7 +751,7 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, wqe_idx = (hr_qp->rq.head + nreq) & (hr_qp->rq.wqe_cnt - 1); if (unlikely(wr->num_sge > hr_qp->rq.max_gs)) { - ibdev_err(ibdev, "rq:num_sge=%d >= qp->sq.max_gs=%d\n", + ibdev_err(ibdev, "num_sge = %d >= max_sge = %u.\n", wr->num_sge, hr_qp->rq.max_gs); ret = -EINVAL; *bad_wr = wr; @@ -964,9 +965,14 @@ static int hns_roce_v2_cmd_hw_resetting(struct hns_roce_dev *hr_dev, unsigned long instance_stage, unsigned long reset_stage) { +#define HW_RESET_TIMEOUT_US 1000000 +#define HW_RESET_SLEEP_US 1000 + struct hns_roce_v2_priv *priv = hr_dev->priv; struct hnae3_handle *handle = priv->handle; const struct hnae3_ae_ops *ops = handle->ae_algo->ops; + unsigned long val; + int ret; /* When hardware reset is detected, we should stop sending mailbox&cmq& * doorbell to hardware. If now in .init_instance() function, we should @@ -978,7 +984,11 @@ static int hns_roce_v2_cmd_hw_resetting(struct hns_roce_dev *hr_dev, * again. */ hr_dev->dis_db = true; - if (!ops->get_hw_reset_stat(handle)) + + ret = read_poll_timeout(ops->ae_dev_reset_cnt, val, + val > hr_dev->reset_cnt, HW_RESET_SLEEP_US, + HW_RESET_TIMEOUT_US, false, handle); + if (!ret) hr_dev->is_reset = true; if (!hr_dev->is_reset || reset_stage == HNS_ROCE_STATE_RST_INIT || @@ -1920,8 +1930,8 @@ static void calc_pg_sz(int obj_num, int obj_size, int hop_num, int ctx_bt_num, obj_per_chunk = ctx_bt_num * obj_per_chunk_default; break; default: - pr_err("Table %d not support hop_num = %d!\n", hem_type, - hop_num); + pr_err("table %u not support hop_num = %u!\n", hem_type, + hop_num); return; } @@ -3562,7 +3572,7 @@ static int get_op_for_set_hem(struct hns_roce_dev *hr_dev, u32 type, break; default: dev_warn(hr_dev->dev, - "Table %d not to be written by mailbox!\n", type); + "table %u not to be written by mailbox!\n", type); return -EINVAL; } @@ -3681,7 +3691,7 @@ static int hns_roce_v2_clear_hem(struct hns_roce_dev *hr_dev, op = HNS_ROCE_CMD_DESTROY_SRQC_BT0; break; default: - dev_warn(dev, "Table %d not to be destroyed by mailbox!\n", + dev_warn(dev, "table %u not to be destroyed by mailbox!\n", table->type); return 0; } @@ -4318,7 +4328,7 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, ret = config_qp_sq_buf(hr_dev, hr_qp, context, qpc_mask); if (ret) { - ibdev_err(ibdev, "failed to config sq buf, ret %d\n", ret); + ibdev_err(ibdev, "failed to config sq buf, ret = %d.\n", ret); return ret; } @@ -4804,7 +4814,7 @@ static int hns_roce_v2_modify_qp(struct ib_qp *ibqp, /* SW pass context to HW */ ret = hns_roce_v2_qp_modify(hr_dev, context, qpc_mask, hr_qp); if (ret) { - ibdev_err(ibdev, "failed to modify QP, ret = %d\n", ret); + ibdev_err(ibdev, "failed to modify QP, ret = %d.\n", ret); goto out; } @@ -4897,7 +4907,7 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, ret = hns_roce_v2_query_qpc(hr_dev, hr_qp, &context); if (ret) { - ibdev_err(ibdev, "failed to query QPC, ret = %d\n", ret); + ibdev_err(ibdev, "failed to query QPC, ret = %d.\n", ret); ret = -EINVAL; goto out; } @@ -5018,7 +5028,7 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev, hr_qp->state, IB_QPS_RESET); if (ret) ibdev_err(ibdev, - "failed to modify QP to RST, ret = %d\n", + "failed to modify QP to RST, ret = %d.\n", ret); } @@ -5057,7 +5067,7 @@ static int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) ret = hns_roce_v2_destroy_qp_common(hr_dev, hr_qp, udata); if (ret) ibdev_err(&hr_dev->ib_dev, - "failed to destroy QP 0x%06lx, ret = %d\n", + "failed to destroy QP, QPN = 0x%06lx, ret = %d.\n", hr_qp->qpn, ret); hns_roce_qp_destroy(hr_dev, hr_qp, udata); @@ -5080,7 +5090,7 @@ static int hns_roce_v2_qp_flow_control_init(struct hns_roce_dev *hr_dev, hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_RESET_SCCC, false); ret = hns_roce_cmq_send(hr_dev, &desc, 1); if (ret) { - ibdev_err(ibdev, "failed to reset SCC ctx, ret = %d\n", ret); + ibdev_err(ibdev, "failed to reset SCC ctx, ret = %d.\n", ret); goto out; } @@ -5090,7 +5100,7 @@ static int hns_roce_v2_qp_flow_control_init(struct hns_roce_dev *hr_dev, clr->qpn = cpu_to_le32(hr_qp->qpn); ret = hns_roce_cmq_send(hr_dev, &desc, 1); if (ret) { - ibdev_err(ibdev, "failed to clear SCC ctx, ret = %d\n", ret); + ibdev_err(ibdev, "failed to clear SCC ctx, ret = %d.\n", ret); goto out; } @@ -5339,7 +5349,7 @@ static int hns_roce_v2_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) hns_roce_free_cmd_mailbox(hr_dev, mailbox); if (ret) ibdev_err(&hr_dev->ib_dev, - "failed to process cmd when modifying CQ, ret = %d\n", + "failed to process cmd when modifying CQ, ret = %d.\n", ret); return ret; @@ -6342,10 +6352,8 @@ static int hns_roce_hw_v2_reset_notify_down(struct hnae3_handle *handle) if (!hr_dev) return 0; - hr_dev->is_reset = true; hr_dev->active = false; hr_dev->dis_db = true; - hr_dev->state = HNS_ROCE_DEVICE_STATE_RST_DOWN; return 0; diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index ba65823a5c0bbf4497fc283a4328b0cd2de215cd..1e8b3e4ef1b179b6bfb8bba9f9ef318456092ddb 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -279,6 +279,9 @@ static enum rdma_link_layer hns_roce_get_link_layer(struct ib_device *device, static int hns_roce_query_pkey(struct ib_device *ib_dev, u8 port, u16 index, u16 *pkey) { + if (index > 0) + return -EINVAL; + *pkey = PKEY_ID; return 0; @@ -356,7 +359,7 @@ static int hns_roce_mmap(struct ib_ucontext *context, return rdma_user_mmap_io(context, vma, to_hr_ucontext(context)->uar.pfn, PAGE_SIZE, - pgprot_noncached(vma->vm_page_prot), + pgprot_device(vma->vm_page_prot), NULL); /* vm_pgoff: 1 -- TPTR */ diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 7f81a695e9af93a053433d4fd6acfeaeb03789ac..027ec8413ac25ccba513c4fc9cd6e25ff49e0abc 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -185,14 +185,14 @@ static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev, else ret = hr_dev->hw->frmr_write_mtpt(hr_dev, mailbox->buf, mr); if (ret) { - dev_err(dev, "Write mtpt fail!\n"); + dev_err(dev, "failed to write mtpt, ret = %d.\n", ret); goto err_page; } ret = hns_roce_hw_create_mpt(hr_dev, mailbox, mtpt_idx & (hr_dev->caps.num_mtpts - 1)); if (ret) { - dev_err(dev, "CREATE_MPT failed (%d)\n", ret); + dev_err(dev, "failed to create mpt, ret = %d.\n", ret); goto err_page; } @@ -495,7 +495,7 @@ int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, ret = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, hns_roce_set_page); if (ret < 1) { - ibdev_err(ibdev, "failed to store sg pages %d %d, cnt = %d.\n", + ibdev_err(ibdev, "failed to store sg pages %u %u, cnt = %d.\n", mr->npages, mr->pbl_mtr.hem_cfg.buf_pg_count, ret); goto err_page_list; } @@ -862,7 +862,7 @@ int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, if (r->offset + r->count > page_cnt) { err = -EINVAL; ibdev_err(ibdev, - "Failed to check mtr%d end %d + %d, max %d\n", + "failed to check mtr%u end %u + %u, max %u.\n", i, r->offset, r->count, page_cnt); return err; } @@ -870,7 +870,7 @@ int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, err = mtr_map_region(hr_dev, mtr, &pages[r->offset], r); if (err) { ibdev_err(ibdev, - "Failed to map mtr%d offset %d, err %d\n", + "failed to map mtr%u offset %u, ret = %d.\n", i, r->offset, err); return err; } diff --git a/drivers/infiniband/hw/hns/hns_roce_pd.c b/drivers/infiniband/hw/hns/hns_roce_pd.c index f78fa1d3d8075f5ce9af28ac8177b48e3581e632..012a769d6a6a8f26c67906376b1072fd036f82b4 100644 --- a/drivers/infiniband/hw/hns/hns_roce_pd.c +++ b/drivers/infiniband/hw/hns/hns_roce_pd.c @@ -65,7 +65,7 @@ int hns_roce_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) ret = hns_roce_pd_alloc(to_hr_dev(ib_dev), &pd->pdn); if (ret) { - ibdev_err(ib_dev, "failed to alloc pd, ret = %d\n", ret); + ibdev_err(ib_dev, "failed to alloc pd, ret = %d.\n", ret); return ret; } diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 7ce9ad8aee1ec84959f7c07b00a3f8da445c08fa..291e06d631505578922dbd1e6221a5f06c9e5940 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -452,12 +452,12 @@ static int check_sq_size_with_integrity(struct hns_roce_dev *hr_dev, /* Sanity check SQ size before proceeding */ if (ucmd->log_sq_stride > max_sq_stride || ucmd->log_sq_stride < HNS_ROCE_IB_MIN_SQ_STRIDE) { - ibdev_err(&hr_dev->ib_dev, "Failed to check SQ stride size\n"); + ibdev_err(&hr_dev->ib_dev, "failed to check SQ stride size.\n"); return -EINVAL; } if (cap->max_send_sge > hr_dev->caps.max_sq_sg) { - ibdev_err(&hr_dev->ib_dev, "Failed to check SQ SGE size %d\n", + ibdev_err(&hr_dev->ib_dev, "failed to check SQ SGE size %u.\n", cap->max_send_sge); return -EINVAL; } @@ -563,7 +563,7 @@ static int set_kernel_sq_size(struct hns_roce_dev *hr_dev, cnt = roundup_pow_of_two(max(cap->max_send_wr, hr_dev->caps.min_wqes)); if (cnt > hr_dev->caps.max_wqes) { - ibdev_err(ibdev, "failed to check WQE num, WQE num = %d.\n", + ibdev_err(ibdev, "failed to check WQE num, WQE num = %u.\n", cnt); return -EINVAL; } @@ -736,7 +736,8 @@ static int alloc_qp_db(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, &hr_qp->sdb); if (ret) { ibdev_err(ibdev, - "Failed to map user SQ doorbell\n"); + "failed to map user SQ doorbell, ret = %d.\n", + ret); goto err_out; } hr_qp->en_flags |= HNS_ROCE_QP_CAP_SQ_RECORD_DB; @@ -747,7 +748,8 @@ static int alloc_qp_db(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, &hr_qp->rdb); if (ret) { ibdev_err(ibdev, - "Failed to map user RQ doorbell\n"); + "failed to map user RQ doorbell, ret = %d.\n", + ret); goto err_sdb; } hr_qp->en_flags |= HNS_ROCE_QP_CAP_RQ_RECORD_DB; @@ -763,7 +765,8 @@ static int alloc_qp_db(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, ret = hns_roce_alloc_db(hr_dev, &hr_qp->rdb, 0); if (ret) { ibdev_err(ibdev, - "Failed to alloc kernel RQ doorbell\n"); + "failed to alloc kernel RQ doorbell, ret = %d.\n", + ret); goto err_out; } *hr_qp->rdb.db_record = 0; @@ -806,14 +809,14 @@ static int alloc_kernel_wrid(struct hns_roce_dev *hr_dev, sq_wrid = kcalloc(hr_qp->sq.wqe_cnt, sizeof(u64), GFP_KERNEL); if (ZERO_OR_NULL_PTR(sq_wrid)) { - ibdev_err(ibdev, "Failed to alloc SQ wrid\n"); + ibdev_err(ibdev, "failed to alloc SQ wrid.\n"); return -ENOMEM; } if (hr_qp->rq.wqe_cnt) { rq_wrid = kcalloc(hr_qp->rq.wqe_cnt, sizeof(u64), GFP_KERNEL); if (ZERO_OR_NULL_PTR(rq_wrid)) { - ibdev_err(ibdev, "Failed to alloc RQ wrid\n"); + ibdev_err(ibdev, "failed to alloc RQ wrid.\n"); ret = -ENOMEM; goto err_sq; } @@ -873,7 +876,9 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, ret = set_user_sq_size(hr_dev, &init_attr->cap, hr_qp, ucmd); if (ret) - ibdev_err(ibdev, "Failed to set user SQ size\n"); + ibdev_err(ibdev, + "failed to set user SQ size, ret = %d.\n", + ret); } else { if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) { @@ -888,7 +893,9 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, ret = set_kernel_sq_size(hr_dev, &init_attr->cap, hr_qp); if (ret) - ibdev_err(ibdev, "Failed to set kernel SQ size\n"); + ibdev_err(ibdev, + "failed to set kernel SQ size, ret = %d.\n", + ret); } return ret; @@ -914,45 +921,48 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, ret = set_qp_param(hr_dev, hr_qp, init_attr, udata, &ucmd); if (ret) { - ibdev_err(ibdev, "Failed to set QP param\n"); + ibdev_err(ibdev, "failed to set QP param, ret = %d.\n", ret); return ret; } if (!udata) { ret = alloc_kernel_wrid(hr_dev, hr_qp); if (ret) { - ibdev_err(ibdev, "Failed to alloc wrid\n"); + ibdev_err(ibdev, "failed to alloc wrid, ret = %d.\n", + ret); return ret; } } ret = alloc_qp_db(hr_dev, hr_qp, init_attr, udata, &ucmd, &resp); if (ret) { - ibdev_err(ibdev, "Failed to alloc QP doorbell\n"); + ibdev_err(ibdev, "failed to alloc QP doorbell, ret = %d.\n", + ret); goto err_wrid; } ret = alloc_qp_buf(hr_dev, hr_qp, init_attr, udata, ucmd.buf_addr); if (ret) { - ibdev_err(ibdev, "Failed to alloc QP buffer\n"); + ibdev_err(ibdev, "failed to alloc QP buffer, ret = %d.\n", ret); goto err_db; } ret = alloc_qpn(hr_dev, hr_qp); if (ret) { - ibdev_err(ibdev, "Failed to alloc QPN\n"); + ibdev_err(ibdev, "failed to alloc QPN, ret = %d.\n", ret); goto err_buf; } ret = alloc_qpc(hr_dev, hr_qp); if (ret) { - ibdev_err(ibdev, "Failed to alloc QP context\n"); + ibdev_err(ibdev, "failed to alloc QP context, ret = %d.\n", + ret); goto err_qpn; } ret = hns_roce_qp_store(hr_dev, hr_qp, init_attr); if (ret) { - ibdev_err(ibdev, "Failed to store QP\n"); + ibdev_err(ibdev, "failed to store QP, ret = %d.\n", ret); goto err_qpc; } @@ -1098,9 +1108,8 @@ static int hns_roce_check_qp_attr(struct ib_qp *ibqp, struct ib_qp_attr *attr, if ((attr_mask & IB_QP_PORT) && (attr->port_num == 0 || attr->port_num > hr_dev->caps.num_ports)) { - ibdev_err(&hr_dev->ib_dev, - "attr port_num invalid.attr->port_num=%d\n", - attr->port_num); + ibdev_err(&hr_dev->ib_dev, "invalid attr, port_num = %u.\n", + attr->port_num); return -EINVAL; } @@ -1108,8 +1117,8 @@ static int hns_roce_check_qp_attr(struct ib_qp *ibqp, struct ib_qp_attr *attr, p = attr_mask & IB_QP_PORT ? (attr->port_num - 1) : hr_qp->port; if (attr->pkey_index >= hr_dev->caps.pkey_table_len[p]) { ibdev_err(&hr_dev->ib_dev, - "attr pkey_index invalid.attr->pkey_index=%d\n", - attr->pkey_index); + "invalid attr, pkey_index = %u.\n", + attr->pkey_index); return -EINVAL; } } @@ -1117,16 +1126,16 @@ static int hns_roce_check_qp_attr(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && attr->max_rd_atomic > hr_dev->caps.max_qp_init_rdma) { ibdev_err(&hr_dev->ib_dev, - "attr max_rd_atomic invalid.attr->max_rd_atomic=%d\n", - attr->max_rd_atomic); + "invalid attr, max_rd_atomic = %u.\n", + attr->max_rd_atomic); return -EINVAL; } if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC && attr->max_dest_rd_atomic > hr_dev->caps.max_qp_dest_rdma) { ibdev_err(&hr_dev->ib_dev, - "attr max_dest_rd_atomic invalid.attr->max_dest_rd_atomic=%d\n", - attr->max_dest_rd_atomic); + "invalid attr, max_dest_rd_atomic = %u.\n", + attr->max_dest_rd_atomic); return -EINVAL; } diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index 75d74f4bb52c94bbe00e32ce02d869f0e60b25e8..08df97e0a6654eea6a2aaa10b1123f514d775fae 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -93,7 +93,8 @@ static int alloc_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, ret = hns_roce_mtr_find(hr_dev, &srq->buf_mtr, 0, mtts_wqe, ARRAY_SIZE(mtts_wqe), &dma_handle_wqe); if (ret < 1) { - ibdev_err(ibdev, "Failed to find mtr for SRQ WQE\n"); + ibdev_err(ibdev, "failed to find mtr for SRQ WQE, ret = %d.\n", + ret); return -ENOBUFS; } @@ -101,32 +102,34 @@ static int alloc_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, ret = hns_roce_mtr_find(hr_dev, &srq->idx_que.mtr, 0, mtts_idx, ARRAY_SIZE(mtts_idx), &dma_handle_idx); if (ret < 1) { - ibdev_err(ibdev, "Failed to find mtr for SRQ idx\n"); + ibdev_err(ibdev, "failed to find mtr for SRQ idx, ret = %d.\n", + ret); return -ENOBUFS; } ret = hns_roce_bitmap_alloc(&srq_table->bitmap, &srq->srqn); if (ret) { - ibdev_err(ibdev, "Failed to alloc SRQ number, err %d\n", ret); + ibdev_err(ibdev, + "failed to alloc SRQ number, ret = %d.\n", ret); return -ENOMEM; } ret = hns_roce_table_get(hr_dev, &srq_table->table, srq->srqn); if (ret) { - ibdev_err(ibdev, "Failed to get SRQC table, err %d\n", ret); + ibdev_err(ibdev, "failed to get SRQC table, ret = %d.\n", ret); goto err_out; } ret = xa_err(xa_store(&srq_table->xa, srq->srqn, srq, GFP_KERNEL)); if (ret) { - ibdev_err(ibdev, "Failed to store SRQC, err %d\n", ret); + ibdev_err(ibdev, "failed to store SRQC, ret = %d.\n", ret); goto err_put; } mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); if (IS_ERR_OR_NULL(mailbox)) { ret = -ENOMEM; - ibdev_err(ibdev, "Failed to alloc mailbox for SRQC\n"); + ibdev_err(ibdev, "failed to alloc mailbox for SRQC.\n"); goto err_xa; } @@ -137,7 +140,7 @@ static int alloc_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, ret = hns_roce_hw_create_srq(hr_dev, mailbox, srq->srqn); hns_roce_free_cmd_mailbox(hr_dev, mailbox); if (ret) { - ibdev_err(ibdev, "Failed to config SRQC, err %d\n", ret); + ibdev_err(ibdev, "failed to config SRQC, ret = %d.\n", ret); goto err_xa; } @@ -198,7 +201,8 @@ static int alloc_srq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, hr_dev->caps.srqwqe_ba_pg_sz + HNS_HW_PAGE_SHIFT, udata, addr); if (err) - ibdev_err(ibdev, "Failed to alloc SRQ buf mtr, err %d\n", err); + ibdev_err(ibdev, + "failed to alloc SRQ buf mtr, ret = %d.\n", err); return err; } @@ -229,14 +233,15 @@ static int alloc_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, hr_dev->caps.idx_ba_pg_sz + HNS_HW_PAGE_SHIFT, udata, addr); if (err) { - ibdev_err(ibdev, "Failed to alloc SRQ idx mtr, err %d\n", err); + ibdev_err(ibdev, + "failed to alloc SRQ idx mtr, ret = %d.\n", err); return err; } if (!udata) { idx_que->bitmap = bitmap_zalloc(srq->wqe_cnt, GFP_KERNEL); if (!idx_que->bitmap) { - ibdev_err(ibdev, "Failed to alloc SRQ idx bitmap\n"); + ibdev_err(ibdev, "failed to alloc SRQ idx bitmap.\n"); err = -ENOMEM; goto err_idx_mtr; } @@ -272,7 +277,7 @@ static int alloc_srq_wrid(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) static void free_srq_wrid(struct hns_roce_srq *srq) { - kfree(srq->wrid); + kvfree(srq->wrid); srq->wrid = NULL; } @@ -303,7 +308,7 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, ret = ib_copy_from_udata(&ucmd, udata, min(udata->inlen, sizeof(ucmd))); if (ret) { - ibdev_err(ibdev, "Failed to copy SRQ udata, err %d\n", + ibdev_err(ibdev, "failed to copy SRQ udata, ret = %d.\n", ret); return ret; } @@ -311,20 +316,21 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, ret = alloc_srq_buf(hr_dev, srq, udata, ucmd.buf_addr); if (ret) { - ibdev_err(ibdev, "Failed to alloc SRQ buffer, err %d\n", ret); + ibdev_err(ibdev, + "failed to alloc SRQ buffer, ret = %d.\n", ret); return ret; } ret = alloc_srq_idx(hr_dev, srq, udata, ucmd.que_addr); if (ret) { - ibdev_err(ibdev, "Failed to alloc SRQ idx, err %d\n", ret); + ibdev_err(ibdev, "failed to alloc SRQ idx, ret = %d.\n", ret); goto err_buf_alloc; } if (!udata) { ret = alloc_srq_wrid(hr_dev, srq); if (ret) { - ibdev_err(ibdev, "Failed to alloc SRQ wrid, err %d\n", + ibdev_err(ibdev, "failed to alloc SRQ wrid, ret = %d.\n", ret); goto err_idx_alloc; } @@ -336,7 +342,8 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, ret = alloc_srqc(hr_dev, srq, to_hr_pd(ib_srq->pd)->pdn, cqn, 0, 0); if (ret) { - ibdev_err(ibdev, "Failed to alloc SRQ context, err %d\n", ret); + ibdev_err(ibdev, + "failed to alloc SRQ context, ret = %d.\n", ret); goto err_wrid_alloc; } diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 7b11aff8a5ea7cb0d37f1d09149468b10b4882d6..05c7200751e50a834ac948a9c690b0a9b7cf68f5 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -3273,7 +3273,7 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr, case MLX4_DEV_EVENT_PORT_MGMT_CHANGE: ew = kmalloc(sizeof *ew, GFP_ATOMIC); if (!ew) - break; + return; INIT_WORK(&ew->work, handle_port_mgmt_change_event); memcpy(&ew->ib_eqe, eqe, sizeof *eqe); diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 6bc0818f4b2c6f6b8ebecdc85b3bddd8348f934b..c6a815a705fef9e42319e9e1d8da617c90a2a36f 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1099,8 +1099,10 @@ static int create_qp_common(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, if (dev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED) qp->flags |= MLX4_IB_QP_NETIF; - else + else { + err = -EINVAL; goto err; + } } err = set_kernel_sq_size(dev, &init_attr->cap, qp_type, qp); diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 011477356a1de52ddd461dfc90b7b8cd8ade98a5..7a2bec0ac005547545e4d49e489abb20a65bc966 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -4216,6 +4216,8 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr, MLX5_SET(dctc, dctc, mtu, attr->path_mtu); MLX5_SET(dctc, dctc, my_addr_index, attr->ah_attr.grh.sgid_index); MLX5_SET(dctc, dctc, hop_limit, attr->ah_attr.grh.hop_limit); + if (attr->ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE) + MLX5_SET(dctc, dctc, eth_prio, attr->ah_attr.sl & 0x7); err = mlx5_core_create_dct(dev, &qp->dct.mdct, qp->dct.in, MLX5_ST_SZ_BYTES(create_dct_in), out, diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index cdfb7732dff3e6134a06f69a28c0b3ac1d32690a..eeb87f31cd2523f71bf5f141fa7f7c5d8ab9a909 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -1918,6 +1918,7 @@ static int qedr_create_user_qp(struct qedr_dev *dev, /* db offset was calculated in copy_qp_uresp, now set in the user q */ if (qedr_qp_has_sq(qp)) { qp->usq.db_addr = ctx->dpi_addr + uresp.sq_db_offset; + qp->sq.max_wr = attrs->cap.max_send_wr; rc = qedr_db_recovery_add(dev, qp->usq.db_addr, &qp->usq.db_rec_data->db_data, DB_REC_WIDTH_32B, @@ -1928,6 +1929,7 @@ static int qedr_create_user_qp(struct qedr_dev *dev, if (qedr_qp_has_rq(qp)) { qp->urq.db_addr = ctx->dpi_addr + uresp.rq_db_offset; + qp->rq.max_wr = attrs->cap.max_recv_wr; rc = qedr_db_recovery_add(dev, qp->urq.db_addr, &qp->urq.db_rec_data->db_data, DB_REC_WIDTH_32B, @@ -2744,15 +2746,18 @@ int qedr_query_qp(struct ib_qp *ibqp, int rc = 0; memset(¶ms, 0, sizeof(params)); - - rc = dev->ops->rdma_query_qp(dev->rdma_ctx, qp->qed_qp, ¶ms); - if (rc) - goto err; - memset(qp_attr, 0, sizeof(*qp_attr)); memset(qp_init_attr, 0, sizeof(*qp_init_attr)); - qp_attr->qp_state = qedr_get_ibqp_state(params.state); + if (qp->qp_type != IB_QPT_GSI) { + rc = dev->ops->rdma_query_qp(dev->rdma_ctx, qp->qed_qp, ¶ms); + if (rc) + goto err; + qp_attr->qp_state = qedr_get_ibqp_state(params.state); + } else { + qp_attr->qp_state = qedr_get_ibqp_state(QED_ROCE_QP_STATE_RTS); + } + qp_attr->cur_qp_state = qedr_get_ibqp_state(params.state); qp_attr->path_mtu = ib_mtu_int_to_enum(params.mtu); qp_attr->path_mig_state = IB_MIG_MIGRATED; diff --git a/drivers/infiniband/hw/qib/qib_user_sdma.c b/drivers/infiniband/hw/qib/qib_user_sdma.c index a67599b5a550afd794f1e54c8f02cefdb3c8e35c..bf2f30d67949dc386404a632c1ec28834d7269ff 100644 --- a/drivers/infiniband/hw/qib/qib_user_sdma.c +++ b/drivers/infiniband/hw/qib/qib_user_sdma.c @@ -602,7 +602,7 @@ done: /* * How many pages in this iovec element? */ -static int qib_user_sdma_num_pages(const struct iovec *iov) +static size_t qib_user_sdma_num_pages(const struct iovec *iov) { const unsigned long addr = (unsigned long) iov->iov_base; const unsigned long len = iov->iov_len; @@ -658,7 +658,7 @@ static void qib_user_sdma_free_pkt_frag(struct device *dev, static int qib_user_sdma_pin_pages(const struct qib_devdata *dd, struct qib_user_sdma_queue *pq, struct qib_user_sdma_pkt *pkt, - unsigned long addr, int tlen, int npages) + unsigned long addr, int tlen, size_t npages) { struct page *pages[8]; int i, j; @@ -722,7 +722,7 @@ static int qib_user_sdma_pin_pkt(const struct qib_devdata *dd, unsigned long idx; for (idx = 0; idx < niov; idx++) { - const int npages = qib_user_sdma_num_pages(iov + idx); + const size_t npages = qib_user_sdma_num_pages(iov + idx); const unsigned long addr = (unsigned long) iov[idx].iov_base; ret = qib_user_sdma_pin_pages(dd, pq, pkt, addr, @@ -824,8 +824,8 @@ static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd, unsigned pktnw; unsigned pktnwc; int nfrags = 0; - int npages = 0; - int bytes_togo = 0; + size_t npages = 0; + size_t bytes_togo = 0; int tiddma = 0; int cfur; @@ -885,7 +885,11 @@ static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd, npages += qib_user_sdma_num_pages(&iov[idx]); - bytes_togo += slen; + if (check_add_overflow(bytes_togo, slen, &bytes_togo) || + bytes_togo > type_max(typeof(pkt->bytes_togo))) { + ret = -EINVAL; + goto free_pbc; + } pktnwc += slen >> 2; idx++; nfrags++; @@ -904,8 +908,7 @@ static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd, } if (frag_size) { - int tidsmsize, n; - size_t pktsize; + size_t tidsmsize, n, pktsize, sz, addrlimit; n = npages*((2*PAGE_SIZE/frag_size)+1); pktsize = struct_size(pkt, addr, n); @@ -923,14 +926,24 @@ static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd, else tidsmsize = 0; - pkt = kmalloc(pktsize+tidsmsize, GFP_KERNEL); + if (check_add_overflow(pktsize, tidsmsize, &sz)) { + ret = -EINVAL; + goto free_pbc; + } + pkt = kmalloc(sz, GFP_KERNEL); if (!pkt) { ret = -ENOMEM; goto free_pbc; } pkt->largepkt = 1; pkt->frag_size = frag_size; - pkt->addrlimit = n + ARRAY_SIZE(pkt->addr); + if (check_add_overflow(n, ARRAY_SIZE(pkt->addr), + &addrlimit) || + addrlimit > type_max(typeof(pkt->addrlimit))) { + ret = -EINVAL; + goto free_pkt; + } + pkt->addrlimit = addrlimit; if (tiddma) { char *tidsm = (char *)pkt + pktsize; diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index ee48befc8978619b477cfa7325d66be1f622773f..09f0dbf941c067d3431ef8deec1f1a86c6a96de0 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -3124,6 +3124,8 @@ do_write: case IB_WR_ATOMIC_FETCH_AND_ADD: if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC))) goto inv_err; + if (unlikely(wqe->atomic_wr.remote_addr & (sizeof(u64) - 1))) + goto inv_err; if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64), wqe->atomic_wr.remote_addr, wqe->atomic_wr.rkey, diff --git a/drivers/infiniband/sw/rxe/rxe_opcode.c b/drivers/infiniband/sw/rxe/rxe_opcode.c index 0cb4b01fd9101e235bdb39052e5dc17c7b1ecd83..66ffb516bdaf0c1625b50d15d8686b0e9d9527c7 100644 --- a/drivers/infiniband/sw/rxe/rxe_opcode.c +++ b/drivers/infiniband/sw/rxe/rxe_opcode.c @@ -110,7 +110,7 @@ struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = { } }, [IB_OPCODE_RC_SEND_MIDDLE] = { - .name = "IB_OPCODE_RC_SEND_MIDDLE]", + .name = "IB_OPCODE_RC_SEND_MIDDLE", .mask = RXE_PAYLOAD_MASK | RXE_REQ_MASK | RXE_SEND_MASK | RXE_MIDDLE_MASK, .length = RXE_BTH_BYTES, diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h index 25ab50d9b7c283f9e928ed47eaffab7bbf7ab41e..f9fb56ec6dfda6f679b18450a9cd2a172cda64cb 100644 --- a/drivers/infiniband/sw/rxe/rxe_param.h +++ b/drivers/infiniband/sw/rxe/rxe_param.h @@ -108,7 +108,7 @@ enum rxe_device_param { /* default/initial rxe port parameters */ enum rxe_port_param { RXE_PORT_GID_TBL_LEN = 1024, - RXE_PORT_PORT_CAP_FLAGS = RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP, + RXE_PORT_PORT_CAP_FLAGS = IB_PORT_CM_SUP, RXE_PORT_MAX_MSG_SZ = 0x800000, RXE_PORT_BAD_PKEY_CNTR = 0, RXE_PORT_QKEY_VIOL_CNTR = 0, diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h index 368959ae9a8cc382bb0bf89f66bce731ddbf3f24..df03d84c6868ace3d5fe68c47e62cdd33e8b44d2 100644 --- a/drivers/infiniband/sw/siw/siw.h +++ b/drivers/infiniband/sw/siw/siw.h @@ -644,14 +644,9 @@ static inline struct siw_sqe *orq_get_current(struct siw_qp *qp) return &qp->orq[qp->orq_get % qp->attrs.orq_size]; } -static inline struct siw_sqe *orq_get_tail(struct siw_qp *qp) -{ - return &qp->orq[qp->orq_put % qp->attrs.orq_size]; -} - static inline struct siw_sqe *orq_get_free(struct siw_qp *qp) { - struct siw_sqe *orq_e = orq_get_tail(qp); + struct siw_sqe *orq_e = &qp->orq[qp->orq_put % qp->attrs.orq_size]; if (READ_ONCE(orq_e->flags) == 0) return orq_e; diff --git a/drivers/infiniband/sw/siw/siw_qp_rx.c b/drivers/infiniband/sw/siw/siw_qp_rx.c index 60116f20653c776c3de38cbb3c954d9bdffbb112..875ea6f1b04a29034b6b41f8bfedaefc01af28c6 100644 --- a/drivers/infiniband/sw/siw/siw_qp_rx.c +++ b/drivers/infiniband/sw/siw/siw_qp_rx.c @@ -1153,11 +1153,12 @@ static int siw_check_tx_fence(struct siw_qp *qp) spin_lock_irqsave(&qp->orq_lock, flags); - rreq = orq_get_current(qp); - /* free current orq entry */ + rreq = orq_get_current(qp); WRITE_ONCE(rreq->flags, 0); + qp->orq_get++; + if (qp->tx_ctx.orq_fence) { if (unlikely(tx_waiting->wr_status != SIW_WR_QUEUED)) { pr_warn("siw: [QP %u]: fence resume: bad status %d\n", @@ -1165,10 +1166,12 @@ static int siw_check_tx_fence(struct siw_qp *qp) rv = -EPROTO; goto out; } - /* resume SQ processing */ + /* resume SQ processing, if possible */ if (tx_waiting->sqe.opcode == SIW_OP_READ || tx_waiting->sqe.opcode == SIW_OP_READ_LOCAL_INV) { - rreq = orq_get_tail(qp); + + /* SQ processing was stopped because of a full ORQ */ + rreq = orq_get_free(qp); if (unlikely(!rreq)) { pr_warn("siw: [QP %u]: no ORQE\n", qp_id(qp)); rv = -EPROTO; @@ -1181,15 +1184,14 @@ static int siw_check_tx_fence(struct siw_qp *qp) resume_tx = 1; } else if (siw_orq_empty(qp)) { + /* + * SQ processing was stopped by fenced work request. + * Resume since all previous Read's are now completed. + */ qp->tx_ctx.orq_fence = 0; resume_tx = 1; - } else { - pr_warn("siw: [QP %u]: fence resume: orq idx: %d:%d\n", - qp_id(qp), qp->orq_get, qp->orq_put); - rv = -EPROTO; } } - qp->orq_get++; out: spin_unlock_irqrestore(&qp->orq_lock, flags); diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c index 46fad202a380e6043a0d94066827ccf099e953a3..13634eda833de3d2729fba2b9ada55c721276da9 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c @@ -1328,6 +1328,12 @@ out_err: static void free_permits(struct rtrs_clt *clt) { + if (clt->permits_map) { + size_t sz = clt->queue_depth; + + wait_event(clt->permits_wait, + find_first_bit(clt->permits_map, sz) >= sz); + } kfree(clt->permits_map); clt->permits_map = NULL; kfree(clt->permits); @@ -2540,6 +2546,8 @@ static void rtrs_clt_dev_release(struct device *dev) { struct rtrs_clt *clt = container_of(dev, struct rtrs_clt, dev); + mutex_destroy(&clt->paths_ev_mutex); + mutex_destroy(&clt->paths_mutex); kfree(clt); } @@ -2571,6 +2579,8 @@ static struct rtrs_clt *alloc_clt(const char *sessname, size_t paths_num, return ERR_PTR(-ENOMEM); } + clt->dev.class = rtrs_clt_dev_class; + clt->dev.release = rtrs_clt_dev_release; uuid_gen(&clt->paths_uuid); INIT_LIST_HEAD_RCU(&clt->paths_list); clt->paths_num = paths_num; @@ -2588,64 +2598,51 @@ static struct rtrs_clt *alloc_clt(const char *sessname, size_t paths_num, init_waitqueue_head(&clt->permits_wait); mutex_init(&clt->paths_ev_mutex); mutex_init(&clt->paths_mutex); + device_initialize(&clt->dev); - clt->dev.class = rtrs_clt_dev_class; - clt->dev.release = rtrs_clt_dev_release; err = dev_set_name(&clt->dev, "%s", sessname); if (err) - goto err; + goto err_put; + /* * Suppress user space notification until * sysfs files are created */ dev_set_uevent_suppress(&clt->dev, true); - err = device_register(&clt->dev); - if (err) { - put_device(&clt->dev); - goto err; - } + err = device_add(&clt->dev); + if (err) + goto err_put; clt->kobj_paths = kobject_create_and_add("paths", &clt->dev.kobj); if (!clt->kobj_paths) { err = -ENOMEM; - goto err_dev; + goto err_del; } err = rtrs_clt_create_sysfs_root_files(clt); if (err) { kobject_del(clt->kobj_paths); kobject_put(clt->kobj_paths); - goto err_dev; + goto err_del; } dev_set_uevent_suppress(&clt->dev, false); kobject_uevent(&clt->dev.kobj, KOBJ_ADD); return clt; -err_dev: - device_unregister(&clt->dev); -err: +err_del: + device_del(&clt->dev); +err_put: free_percpu(clt->pcpu_path); - kfree(clt); + put_device(&clt->dev); return ERR_PTR(err); } -static void wait_for_inflight_permits(struct rtrs_clt *clt) -{ - if (clt->permits_map) { - size_t sz = clt->queue_depth; - - wait_event(clt->permits_wait, - find_first_bit(clt->permits_map, sz) >= sz); - } -} - static void free_clt(struct rtrs_clt *clt) { - wait_for_inflight_permits(clt); - free_permits(clt); free_percpu(clt->pcpu_path); - mutex_destroy(&clt->paths_ev_mutex); - mutex_destroy(&clt->paths_mutex); - /* release callback will free clt in last put */ + + /* + * release callback will free clt and destroy mutexes in last put + */ device_unregister(&clt->dev); } @@ -2761,6 +2758,7 @@ void rtrs_clt_close(struct rtrs_clt *clt) rtrs_clt_destroy_sess_files(sess, NULL); kobject_put(&sess->kobj); } + free_permits(clt); free_clt(clt); } EXPORT_SYMBOL(rtrs_clt_close); diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 86d5c4c92b363c4fc6fb9832ca7da626496a7200..b4ccb333a834201e0e67cac4337383293cacd780 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -4045,9 +4045,11 @@ static void srp_remove_one(struct ib_device *device, void *client_data) spin_unlock(&host->target_lock); /* - * Wait for tl_err and target port removal tasks. + * srp_queue_remove_work() queues a call to + * srp_remove_target(). The latter function cancels + * target->tl_err_work so waiting for the remove works to + * finish is sufficient. */ - flush_workqueue(system_long_wq); flush_workqueue(srp_remove_wq); kfree(host); diff --git a/drivers/input/input.c b/drivers/input/input.c index 3cfd2c18eebd9d6a546c0d259204dc559c8460a1..ff9dc37eff3454662d1fdede756fe74cf06444e0 100644 --- a/drivers/input/input.c +++ b/drivers/input/input.c @@ -2179,6 +2179,12 @@ int input_register_device(struct input_dev *dev) /* KEY_RESERVED is not supposed to be transmitted to userspace. */ __clear_bit(KEY_RESERVED, dev->keybit); + /* Buttonpads should not map BTN_RIGHT and/or BTN_MIDDLE. */ + if (test_bit(INPUT_PROP_BUTTONPAD, dev->propbit)) { + __clear_bit(BTN_RIGHT, dev->keybit); + __clear_bit(BTN_MIDDLE, dev->keybit); + } + /* Make sure that bitmasks not mentioned in dev->evbit are clean. */ input_cleanse_bitmasks(dev); diff --git a/drivers/input/joystick/iforce/iforce-usb.c b/drivers/input/joystick/iforce/iforce-usb.c index 6c554c11a7ac3f35a33b318f7f6bef589a8c6b81..ea58805c480fa91c9d2dfdfec4d857200986021f 100644 --- a/drivers/input/joystick/iforce/iforce-usb.c +++ b/drivers/input/joystick/iforce/iforce-usb.c @@ -92,7 +92,7 @@ static int iforce_usb_get_id(struct iforce *iforce, u8 id, id, USB_TYPE_VENDOR | USB_DIR_IN | USB_RECIP_INTERFACE, - 0, 0, buf, IFORCE_MAX_LENGTH, HZ); + 0, 0, buf, IFORCE_MAX_LENGTH, 1000); if (status < 0) { dev_err(&iforce_usb->intf->dev, "usb_submit_urb failed: %d\n", status); diff --git a/drivers/input/joystick/spaceball.c b/drivers/input/joystick/spaceball.c index 429411c6c0a8ef18876196a39eed0e09bc265fc8..a85a4f33aea8ca11eb294a117ed79f335a5c145c 100644 --- a/drivers/input/joystick/spaceball.c +++ b/drivers/input/joystick/spaceball.c @@ -19,6 +19,7 @@ #include #include #include +#include #define DRIVER_DESC "SpaceTec SpaceBall 2003/3003/4000 FLX driver" @@ -75,9 +76,15 @@ static void spaceball_process_packet(struct spaceball* spaceball) case 'D': /* Ball data */ if (spaceball->idx != 15) return; - for (i = 0; i < 6; i++) + /* + * Skip first three bytes; read six axes worth of data. + * Axis values are signed 16-bit big-endian. + */ + data += 3; + for (i = 0; i < ARRAY_SIZE(spaceball_axes); i++) { input_report_abs(dev, spaceball_axes[i], - (__s16)((data[2 * i + 3] << 8) | data[2 * i + 2])); + (__s16)get_unaligned_be16(&data[i * 2])); + } break; case 'K': /* Button data */ diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index e5f1e3cf9179f7a9dc084a7a7014bad71bfa8008..ba101afcfc27fa37fb839e5538921d091535e1fd 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -331,6 +331,7 @@ static const struct xpad_device { { 0x24c6, 0x5b03, "Thrustmaster Ferrari 458 Racing Wheel", 0, XTYPE_XBOX360 }, { 0x24c6, 0x5d04, "Razer Sabertooth", 0, XTYPE_XBOX360 }, { 0x24c6, 0xfafe, "Rock Candy Gamepad for Xbox 360", 0, XTYPE_XBOX360 }, + { 0x3285, 0x0607, "Nacon GC-100", 0, XTYPE_XBOX360 }, { 0x3767, 0x0101, "Fanatec Speedster 3 Forceshock Wheel", 0, XTYPE_XBOX }, { 0xffff, 0xffff, "Chinese-made Xbox Controller", 0, XTYPE_XBOX }, { 0x0000, 0x0000, "Generic X-Box pad", 0, XTYPE_UNKNOWN } @@ -447,6 +448,7 @@ static const struct usb_device_id xpad_table[] = { XPAD_XBOXONE_VENDOR(0x24c6), /* PowerA Controllers */ XPAD_XBOXONE_VENDOR(0x2e24), /* Hyperkin Duke X-Box One pad */ XPAD_XBOX360_VENDOR(0x2f24), /* GameSir Controllers */ + XPAD_XBOX360_VENDOR(0x3285), /* Nacon GC-100 */ { } }; diff --git a/drivers/input/keyboard/Kconfig b/drivers/input/keyboard/Kconfig index 9f60f1559e49926b161b2a5b472e30445f843c59..3f7a5ff17a9a3c23405b19aafbce729d5972fa42 100644 --- a/drivers/input/keyboard/Kconfig +++ b/drivers/input/keyboard/Kconfig @@ -556,7 +556,7 @@ config KEYBOARD_PMIC8XXX config KEYBOARD_SAMSUNG tristate "Samsung keypad support" - depends on HAVE_CLK + depends on HAS_IOMEM && HAVE_CLK select INPUT_MATRIXKMAP help Say Y here if you want to use the keypad on your Samsung mobile diff --git a/drivers/input/keyboard/snvs_pwrkey.c b/drivers/input/keyboard/snvs_pwrkey.c index 2f5e3ab5ed638c9f7d0691f40528b766cf794134..65286762b02ab918403d91de894e849820010ec1 100644 --- a/drivers/input/keyboard/snvs_pwrkey.c +++ b/drivers/input/keyboard/snvs_pwrkey.c @@ -3,6 +3,7 @@ // Driver for the IMX SNVS ON/OFF Power Key // Copyright (C) 2015 Freescale Semiconductor, Inc. All Rights Reserved. +#include #include #include #include @@ -99,6 +100,11 @@ static irqreturn_t imx_snvs_pwrkey_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } +static void imx_snvs_pwrkey_disable_clk(void *data) +{ + clk_disable_unprepare(data); +} + static void imx_snvs_pwrkey_act(void *pdata) { struct pwrkey_drv_data *pd = pdata; @@ -111,6 +117,7 @@ static int imx_snvs_pwrkey_probe(struct platform_device *pdev) struct pwrkey_drv_data *pdata; struct input_dev *input; struct device_node *np; + struct clk *clk; int error; u32 vid; @@ -134,6 +141,28 @@ static int imx_snvs_pwrkey_probe(struct platform_device *pdev) dev_warn(&pdev->dev, "KEY_POWER without setting in dts\n"); } + clk = devm_clk_get_optional(&pdev->dev, NULL); + if (IS_ERR(clk)) { + dev_err(&pdev->dev, "Failed to get snvs clock (%pe)\n", clk); + return PTR_ERR(clk); + } + + error = clk_prepare_enable(clk); + if (error) { + dev_err(&pdev->dev, "Failed to enable snvs clock (%pe)\n", + ERR_PTR(error)); + return error; + } + + error = devm_add_action_or_reset(&pdev->dev, + imx_snvs_pwrkey_disable_clk, clk); + if (error) { + dev_err(&pdev->dev, + "Failed to register clock cleanup handler (%pe)\n", + ERR_PTR(error)); + return error; + } + pdata->wakeup = of_property_read_bool(np, "wakeup-source"); pdata->irq = platform_get_irq(pdev, 0); diff --git a/drivers/input/mouse/appletouch.c b/drivers/input/mouse/appletouch.c index bfa26651c0be7c8a5c2173e868a111ece3cb80ba..627048bc6a12e0ed3c2e62a36ca52159ca331efc 100644 --- a/drivers/input/mouse/appletouch.c +++ b/drivers/input/mouse/appletouch.c @@ -916,6 +916,8 @@ static int atp_probe(struct usb_interface *iface, set_bit(BTN_TOOL_TRIPLETAP, input_dev->keybit); set_bit(BTN_LEFT, input_dev->keybit); + INIT_WORK(&dev->work, atp_reinit); + error = input_register_device(dev->input); if (error) goto err_free_buffer; @@ -923,8 +925,6 @@ static int atp_probe(struct usb_interface *iface, /* save our data pointer in this interface device */ usb_set_intfdata(iface, dev); - INIT_WORK(&dev->work, atp_reinit); - return 0; err_free_buffer: diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c index 11a9ee32c98cc89ab58d981162c1c7f0f1fd7219..6f59c8b245f240e19922c05e870ae7b735144f4c 100644 --- a/drivers/input/mouse/elan_i2c_core.c +++ b/drivers/input/mouse/elan_i2c_core.c @@ -153,55 +153,21 @@ static int elan_get_fwinfo(u16 ic_type, u8 iap_version, u16 *validpage_count, return 0; } -static int elan_enable_power(struct elan_tp_data *data) +static int elan_set_power(struct elan_tp_data *data, bool on) { int repeat = ETP_RETRY_COUNT; int error; - error = regulator_enable(data->vcc); - if (error) { - dev_err(&data->client->dev, - "failed to enable regulator: %d\n", error); - return error; - } - do { - error = data->ops->power_control(data->client, true); + error = data->ops->power_control(data->client, on); if (error >= 0) return 0; msleep(30); } while (--repeat > 0); - dev_err(&data->client->dev, "failed to enable power: %d\n", error); - return error; -} - -static int elan_disable_power(struct elan_tp_data *data) -{ - int repeat = ETP_RETRY_COUNT; - int error; - - do { - error = data->ops->power_control(data->client, false); - if (!error) { - error = regulator_disable(data->vcc); - if (error) { - dev_err(&data->client->dev, - "failed to disable regulator: %d\n", - error); - /* Attempt to power the chip back up */ - data->ops->power_control(data->client, true); - break; - } - - return 0; - } - - msleep(30); - } while (--repeat > 0); - - dev_err(&data->client->dev, "failed to disable power: %d\n", error); + dev_err(&data->client->dev, "failed to set power %s: %d\n", + on ? "on" : "off", error); return error; } @@ -1361,9 +1327,19 @@ static int __maybe_unused elan_suspend(struct device *dev) /* Enable wake from IRQ */ data->irq_wake = (enable_irq_wake(client->irq) == 0); } else { - ret = elan_disable_power(data); + ret = elan_set_power(data, false); + if (ret) + goto err; + + ret = regulator_disable(data->vcc); + if (ret) { + dev_err(dev, "error %d disabling regulator\n", ret); + /* Attempt to power the chip back up */ + elan_set_power(data, true); + } } +err: mutex_unlock(&data->sysfs_mutex); return ret; } @@ -1374,12 +1350,18 @@ static int __maybe_unused elan_resume(struct device *dev) struct elan_tp_data *data = i2c_get_clientdata(client); int error; - if (device_may_wakeup(dev) && data->irq_wake) { + if (!device_may_wakeup(dev)) { + error = regulator_enable(data->vcc); + if (error) { + dev_err(dev, "error %d enabling regulator\n", error); + goto err; + } + } else if (data->irq_wake) { disable_irq_wake(client->irq); data->irq_wake = false; } - error = elan_enable_power(data); + error = elan_set_power(data, true); if (error) { dev_err(dev, "power up when resuming failed: %d\n", error); goto err; diff --git a/drivers/input/mouse/elantech.c b/drivers/input/mouse/elantech.c index e0e53a9a816fc6265386595c8dd5f79cf95b2d6f..2e53ea261e014705c0620e9e2634f6a0f69ee035 100644 --- a/drivers/input/mouse/elantech.c +++ b/drivers/input/mouse/elantech.c @@ -517,6 +517,19 @@ static void elantech_report_trackpoint(struct psmouse *psmouse, case 0x16008020U: case 0x26800010U: case 0x36808000U: + + /* + * This firmware misreport coordinates for trackpoint + * occasionally. Discard packets outside of [-127, 127] range + * to prevent cursor jumps. + */ + if (packet[4] == 0x80 || packet[5] == 0x80 || + packet[1] >> 7 == packet[4] >> 7 || + packet[2] >> 7 == packet[5] >> 7) { + elantech_debug("discarding packet [%6ph]\n", packet); + break; + + } x = packet[4] - (int)((packet[1]^0x80) << 1); y = (int)((packet[2]^0x80) << 1) - packet[5]; @@ -1575,7 +1588,13 @@ static const struct dmi_system_id no_hw_res_dmi_table[] = { */ static int elantech_change_report_id(struct psmouse *psmouse) { - unsigned char param[2] = { 0x10, 0x03 }; + /* + * NOTE: the code is expecting to receive param[] as an array of 3 + * items (see __ps2_command()), even if in this case only 2 are + * actually needed. Make sure the array size is 3 to avoid potential + * stack out-of-bound accesses. + */ + unsigned char param[3] = { 0x10, 0x03 }; if (elantech_write_reg_params(psmouse, 0x7, param) || elantech_read_reg_params(psmouse, 0x7, param) || diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h index a5a0035536462aa5e34fd7fa310f85b0be16aae9..148a7c5fd0e22be932e922c6ba641b6df361e7b2 100644 --- a/drivers/input/serio/i8042-x86ia64io.h +++ b/drivers/input/serio/i8042-x86ia64io.h @@ -272,6 +272,13 @@ static const struct dmi_system_id __initconst i8042_dmi_nomux_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "LifeBook S6230"), }, }, + { + /* Fujitsu Lifebook T725 laptop */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"), + DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK T725"), + }, + }, { /* Fujitsu Lifebook U745 */ .matches = { @@ -840,6 +847,13 @@ static const struct dmi_system_id __initconst i8042_dmi_notimeout_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK AH544"), }, }, + { + /* Fujitsu Lifebook T725 laptop */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"), + DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK T725"), + }, + }, { /* Fujitsu U574 laptop */ /* https://bugzilla.kernel.org/show_bug.cgi?id=69731 */ @@ -981,6 +995,24 @@ static const struct dmi_system_id __initconst i8042_dmi_kbdreset_table[] = { { } }; +static const struct dmi_system_id i8042_dmi_probe_defer_table[] __initconst = { + { + /* ASUS ZenBook UX425UA */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "ZenBook UX425UA"), + }, + }, + { + /* ASUS ZenBook UM325UA */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "ZenBook UX325UA_UM325UA"), + }, + }, + { } +}; + #endif /* CONFIG_X86 */ #ifdef CONFIG_PNP @@ -1301,6 +1333,9 @@ static int __init i8042_platform_init(void) if (dmi_check_system(i8042_dmi_kbdreset_table)) i8042_kbdreset = true; + if (dmi_check_system(i8042_dmi_probe_defer_table)) + i8042_probe_defer = true; + /* * A20 was already enabled during early kernel init. But some buggy * BIOSes (in MSI Laptops) require A20 to be enabled using 8042 to diff --git a/drivers/input/serio/i8042.c b/drivers/input/serio/i8042.c index abae23af0791e1183b519423774e39f9156d75f9..a9f68f535b727bc56ced6311a91a6144082e72fe 100644 --- a/drivers/input/serio/i8042.c +++ b/drivers/input/serio/i8042.c @@ -45,6 +45,10 @@ static bool i8042_unlock; module_param_named(unlock, i8042_unlock, bool, 0); MODULE_PARM_DESC(unlock, "Ignore keyboard lock."); +static bool i8042_probe_defer; +module_param_named(probe_defer, i8042_probe_defer, bool, 0); +MODULE_PARM_DESC(probe_defer, "Allow deferred probing."); + enum i8042_controller_reset_mode { I8042_RESET_NEVER, I8042_RESET_ALWAYS, @@ -711,7 +715,7 @@ static int i8042_set_mux_mode(bool multiplex, unsigned char *mux_version) * LCS/Telegraphics. */ -static int __init i8042_check_mux(void) +static int i8042_check_mux(void) { unsigned char mux_version; @@ -740,10 +744,10 @@ static int __init i8042_check_mux(void) /* * The following is used to test AUX IRQ delivery. */ -static struct completion i8042_aux_irq_delivered __initdata; -static bool i8042_irq_being_tested __initdata; +static struct completion i8042_aux_irq_delivered; +static bool i8042_irq_being_tested; -static irqreturn_t __init i8042_aux_test_irq(int irq, void *dev_id) +static irqreturn_t i8042_aux_test_irq(int irq, void *dev_id) { unsigned long flags; unsigned char str, data; @@ -770,7 +774,7 @@ static irqreturn_t __init i8042_aux_test_irq(int irq, void *dev_id) * verifies success by readinng CTR. Used when testing for presence of AUX * port. */ -static int __init i8042_toggle_aux(bool on) +static int i8042_toggle_aux(bool on) { unsigned char param; int i; @@ -798,7 +802,7 @@ static int __init i8042_toggle_aux(bool on) * the presence of an AUX interface. */ -static int __init i8042_check_aux(void) +static int i8042_check_aux(void) { int retval = -1; bool irq_registered = false; @@ -1005,7 +1009,7 @@ static int i8042_controller_init(void) if (i8042_command(&ctr[n++ % 2], I8042_CMD_CTL_RCTR)) { pr_err("Can't read CTR while initializing i8042\n"); - return -EIO; + return i8042_probe_defer ? -EPROBE_DEFER : -EIO; } } while (n < 2 || ctr[0] != ctr[1]); @@ -1320,7 +1324,7 @@ static void i8042_shutdown(struct platform_device *dev) i8042_controller_reset(false); } -static int __init i8042_create_kbd_port(void) +static int i8042_create_kbd_port(void) { struct serio *serio; struct i8042_port *port = &i8042_ports[I8042_KBD_PORT_NO]; @@ -1349,7 +1353,7 @@ static int __init i8042_create_kbd_port(void) return 0; } -static int __init i8042_create_aux_port(int idx) +static int i8042_create_aux_port(int idx) { struct serio *serio; int port_no = idx < 0 ? I8042_AUX_PORT_NO : I8042_MUX_PORT_NO + idx; @@ -1386,13 +1390,13 @@ static int __init i8042_create_aux_port(int idx) return 0; } -static void __init i8042_free_kbd_port(void) +static void i8042_free_kbd_port(void) { kfree(i8042_ports[I8042_KBD_PORT_NO].serio); i8042_ports[I8042_KBD_PORT_NO].serio = NULL; } -static void __init i8042_free_aux_ports(void) +static void i8042_free_aux_ports(void) { int i; @@ -1402,7 +1406,7 @@ static void __init i8042_free_aux_ports(void) } } -static void __init i8042_register_ports(void) +static void i8042_register_ports(void) { int i; @@ -1443,7 +1447,7 @@ static void i8042_free_irqs(void) i8042_aux_irq_registered = i8042_kbd_irq_registered = false; } -static int __init i8042_setup_aux(void) +static int i8042_setup_aux(void) { int (*aux_enable)(void); int error; @@ -1485,7 +1489,7 @@ static int __init i8042_setup_aux(void) return error; } -static int __init i8042_setup_kbd(void) +static int i8042_setup_kbd(void) { int error; @@ -1535,7 +1539,7 @@ static int i8042_kbd_bind_notifier(struct notifier_block *nb, return 0; } -static int __init i8042_probe(struct platform_device *dev) +static int i8042_probe(struct platform_device *dev) { int error; @@ -1600,6 +1604,7 @@ static struct platform_driver i8042_driver = { .pm = &i8042_pm_ops, #endif }, + .probe = i8042_probe, .remove = i8042_remove, .shutdown = i8042_shutdown, }; @@ -1610,7 +1615,6 @@ static struct notifier_block i8042_kbd_bind_notifier_block = { static int __init i8042_init(void) { - struct platform_device *pdev; int err; dbg_init(); @@ -1626,17 +1630,29 @@ static int __init i8042_init(void) /* Set this before creating the dev to allow i8042_command to work right away */ i8042_present = true; - pdev = platform_create_bundle(&i8042_driver, i8042_probe, NULL, 0, NULL, 0); - if (IS_ERR(pdev)) { - err = PTR_ERR(pdev); + err = platform_driver_register(&i8042_driver); + if (err) goto err_platform_exit; + + i8042_platform_device = platform_device_alloc("i8042", -1); + if (!i8042_platform_device) { + err = -ENOMEM; + goto err_unregister_driver; } + err = platform_device_add(i8042_platform_device); + if (err) + goto err_free_device; + bus_register_notifier(&serio_bus, &i8042_kbd_bind_notifier_block); panic_blink = i8042_panic_blink; return 0; +err_free_device: + platform_device_put(i8042_platform_device); +err_unregister_driver: + platform_driver_unregister(&i8042_driver); err_platform_exit: i8042_platform_exit(); return err; diff --git a/drivers/input/touchscreen/atmel_mxt_ts.c b/drivers/input/touchscreen/atmel_mxt_ts.c index b6f75367a284ab351b4f4d20475d463ef19dd6fc..8df402a1ed446cd518e7e14e649f9b0596dbc08c 100644 --- a/drivers/input/touchscreen/atmel_mxt_ts.c +++ b/drivers/input/touchscreen/atmel_mxt_ts.c @@ -1839,7 +1839,7 @@ static int mxt_read_info_block(struct mxt_data *data) if (error) { dev_err(&client->dev, "Error %d parsing object table\n", error); mxt_free_object_table(data); - goto err_free_mem; + return error; } data->object_table = (struct mxt_object *)(id_buf + MXT_OBJECT_START); diff --git a/drivers/input/touchscreen/elants_i2c.c b/drivers/input/touchscreen/elants_i2c.c index 03a48253594480942d72710a3ecea520a9c5d137..c09aefa2661dd058386ba7e7bcfd98b48a809815 100644 --- a/drivers/input/touchscreen/elants_i2c.c +++ b/drivers/input/touchscreen/elants_i2c.c @@ -109,6 +109,19 @@ #define ELAN_POWERON_DELAY_USEC 500 #define ELAN_RESET_DELAY_MSEC 20 +/* FW boot code version */ +#define BC_VER_H_BYTE_FOR_EKTH3900x1_I2C 0x72 +#define BC_VER_H_BYTE_FOR_EKTH3900x2_I2C 0x82 +#define BC_VER_H_BYTE_FOR_EKTH3900x3_I2C 0x92 +#define BC_VER_H_BYTE_FOR_EKTH5312x1_I2C 0x6D +#define BC_VER_H_BYTE_FOR_EKTH5312x2_I2C 0x6E +#define BC_VER_H_BYTE_FOR_EKTH5312cx1_I2C 0x77 +#define BC_VER_H_BYTE_FOR_EKTH5312cx2_I2C 0x78 +#define BC_VER_H_BYTE_FOR_EKTH5312x1_I2C_USB 0x67 +#define BC_VER_H_BYTE_FOR_EKTH5312x2_I2C_USB 0x68 +#define BC_VER_H_BYTE_FOR_EKTH5312cx1_I2C_USB 0x74 +#define BC_VER_H_BYTE_FOR_EKTH5312cx2_I2C_USB 0x75 + enum elants_state { ELAN_STATE_NORMAL, ELAN_WAIT_QUEUE_HEADER, @@ -663,6 +676,37 @@ static int elants_i2c_validate_remark_id(struct elants_data *ts, return 0; } +static bool elants_i2c_should_check_remark_id(struct elants_data *ts) +{ + struct i2c_client *client = ts->client; + const u8 bootcode_version = ts->iap_version; + bool check; + + /* I2C eKTH3900 and eKTH5312 are NOT support Remark ID */ + if ((bootcode_version == BC_VER_H_BYTE_FOR_EKTH3900x1_I2C) || + (bootcode_version == BC_VER_H_BYTE_FOR_EKTH3900x2_I2C) || + (bootcode_version == BC_VER_H_BYTE_FOR_EKTH3900x3_I2C) || + (bootcode_version == BC_VER_H_BYTE_FOR_EKTH5312x1_I2C) || + (bootcode_version == BC_VER_H_BYTE_FOR_EKTH5312x2_I2C) || + (bootcode_version == BC_VER_H_BYTE_FOR_EKTH5312cx1_I2C) || + (bootcode_version == BC_VER_H_BYTE_FOR_EKTH5312cx2_I2C) || + (bootcode_version == BC_VER_H_BYTE_FOR_EKTH5312x1_I2C_USB) || + (bootcode_version == BC_VER_H_BYTE_FOR_EKTH5312x2_I2C_USB) || + (bootcode_version == BC_VER_H_BYTE_FOR_EKTH5312cx1_I2C_USB) || + (bootcode_version == BC_VER_H_BYTE_FOR_EKTH5312cx2_I2C_USB)) { + dev_dbg(&client->dev, + "eKTH3900/eKTH5312(0x%02x) are not support remark id\n", + bootcode_version); + check = false; + } else if (bootcode_version >= 0x60) { + check = true; + } else { + check = false; + } + + return check; +} + static int elants_i2c_do_update_firmware(struct i2c_client *client, const struct firmware *fw, bool force) @@ -676,7 +720,7 @@ static int elants_i2c_do_update_firmware(struct i2c_client *client, u16 send_id; int page, n_fw_pages; int error; - bool check_remark_id = ts->iap_version >= 0x60; + bool check_remark_id = elants_i2c_should_check_remark_id(ts); /* Recovery mode detection! */ if (force) { diff --git a/drivers/input/touchscreen/goodix.c b/drivers/input/touchscreen/goodix.c index a06385c55af2a55d2c1e7dddaa87c476cee13ffa..5fc789f717c8a8b16d2f16a9fc10390bb9453847 100644 --- a/drivers/input/touchscreen/goodix.c +++ b/drivers/input/touchscreen/goodix.c @@ -162,6 +162,7 @@ static const struct goodix_chip_id goodix_chip_ids[] = { { .id = "911", .data = >911_chip_data }, { .id = "9271", .data = >911_chip_data }, { .id = "9110", .data = >911_chip_data }, + { .id = "9111", .data = >911_chip_data }, { .id = "927", .data = >911_chip_data }, { .id = "928", .data = >911_chip_data }, diff --git a/drivers/input/touchscreen/of_touchscreen.c b/drivers/input/touchscreen/of_touchscreen.c index 97342e14b4f18c83d2e67218d892cb13a88799f8..8719a8b0e8682b7bc840e7e21233b8ae509c2f54 100644 --- a/drivers/input/touchscreen/of_touchscreen.c +++ b/drivers/input/touchscreen/of_touchscreen.c @@ -79,27 +79,27 @@ void touchscreen_parse_properties(struct input_dev *input, bool multitouch, data_present = touchscreen_get_prop_u32(dev, "touchscreen-min-x", input_abs_get_min(input, axis_x), - &minimum) | - touchscreen_get_prop_u32(dev, "touchscreen-size-x", - input_abs_get_max(input, - axis_x) + 1, - &maximum) | - touchscreen_get_prop_u32(dev, "touchscreen-fuzz-x", - input_abs_get_fuzz(input, axis_x), - &fuzz); + &minimum); + data_present |= touchscreen_get_prop_u32(dev, "touchscreen-size-x", + input_abs_get_max(input, + axis_x) + 1, + &maximum); + data_present |= touchscreen_get_prop_u32(dev, "touchscreen-fuzz-x", + input_abs_get_fuzz(input, axis_x), + &fuzz); if (data_present) touchscreen_set_params(input, axis_x, minimum, maximum - 1, fuzz); data_present = touchscreen_get_prop_u32(dev, "touchscreen-min-y", input_abs_get_min(input, axis_y), - &minimum) | - touchscreen_get_prop_u32(dev, "touchscreen-size-y", - input_abs_get_max(input, - axis_y) + 1, - &maximum) | - touchscreen_get_prop_u32(dev, "touchscreen-fuzz-y", - input_abs_get_fuzz(input, axis_y), - &fuzz); + &minimum); + data_present |= touchscreen_get_prop_u32(dev, "touchscreen-size-y", + input_abs_get_max(input, + axis_y) + 1, + &maximum); + data_present |= touchscreen_get_prop_u32(dev, "touchscreen-fuzz-y", + input_abs_get_fuzz(input, axis_y), + &fuzz); if (data_present) touchscreen_set_params(input, axis_y, minimum, maximum - 1, fuzz); @@ -107,11 +107,11 @@ void touchscreen_parse_properties(struct input_dev *input, bool multitouch, data_present = touchscreen_get_prop_u32(dev, "touchscreen-max-pressure", input_abs_get_max(input, axis), - &maximum) | - touchscreen_get_prop_u32(dev, - "touchscreen-fuzz-pressure", - input_abs_get_fuzz(input, axis), - &fuzz); + &maximum); + data_present |= touchscreen_get_prop_u32(dev, + "touchscreen-fuzz-pressure", + input_abs_get_fuzz(input, axis), + &fuzz); if (data_present) touchscreen_set_params(input, axis, 0, maximum, fuzz); diff --git a/drivers/input/touchscreen/zinitix.c b/drivers/input/touchscreen/zinitix.c index fd8b4e9f08a21f07792f8b2b145f2f43857313e9..6df6f07f1ac66a036737e1957a5ed8007cd7f2bd 100644 --- a/drivers/input/touchscreen/zinitix.c +++ b/drivers/input/touchscreen/zinitix.c @@ -488,6 +488,15 @@ static int zinitix_ts_probe(struct i2c_client *client) return error; } + error = devm_request_threaded_irq(&client->dev, client->irq, + NULL, zinitix_ts_irq_handler, + IRQF_ONESHOT, + client->name, bt541); + if (error) { + dev_err(&client->dev, "Failed to request IRQ: %d\n", error); + return error; + } + error = zinitix_init_input_dev(bt541); if (error) { dev_err(&client->dev, @@ -514,13 +523,6 @@ static int zinitix_ts_probe(struct i2c_client *client) } irq_set_status_flags(client->irq, IRQ_NOAUTOEN); - error = devm_request_threaded_irq(&client->dev, client->irq, - NULL, zinitix_ts_irq_handler, - IRQF_ONESHOT, client->name, bt541); - if (error) { - dev_err(&client->dev, "Failed to request IRQ: %d\n", error); - return error; - } return 0; } diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index b4adab69856323b3f9158720dac5dc673249974e..0c40d22409f232f46becc79a25891f1cb42df70c 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -17,6 +17,7 @@ extern int amd_iommu_init_passthrough(void); extern irqreturn_t amd_iommu_int_thread(int irq, void *data); extern irqreturn_t amd_iommu_int_handler(int irq, void *data); extern void amd_iommu_apply_erratum_63(u16 devid); +extern void amd_iommu_restart_event_logging(struct amd_iommu *iommu); extern void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu); extern int amd_iommu_init_devices(void); extern void amd_iommu_uninit_devices(void); diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 33446c9d3bac81cc4d7c3c66165be776193376e5..690c5976575c68ea0398ebb8ae900b160d3ef2bb 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -109,6 +109,7 @@ #define PASID_MASK 0x0000ffff /* MMIO status bits */ +#define MMIO_STATUS_EVT_OVERFLOW_INT_MASK (1 << 0) #define MMIO_STATUS_EVT_INT_MASK (1 << 1) #define MMIO_STATUS_COM_WAIT_INT_MASK (1 << 2) #define MMIO_STATUS_PPR_INT_MASK (1 << 6) diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 28de889aa516408a9b398449beda05706b0a8749..6eaefc9e7b3d60f6ef2bb0ab629466cc66c3f3d6 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -655,6 +656,16 @@ static int __init alloc_command_buffer(struct amd_iommu *iommu) return iommu->cmd_buf ? 0 : -ENOMEM; } +/* + * This function restarts event logging in case the IOMMU experienced + * an event log buffer overflow. + */ +void amd_iommu_restart_event_logging(struct amd_iommu *iommu) +{ + iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN); + iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN); +} + /* * This function resets the command buffer if the IOMMU stopped fetching * commands from it. @@ -805,16 +816,27 @@ static int iommu_ga_log_enable(struct amd_iommu *iommu) { #ifdef CONFIG_IRQ_REMAP u32 status, i; + u64 entry; if (!iommu->ga_log) return -EINVAL; - status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); - /* Check if already running */ - if (status & (MMIO_STATUS_GALOG_RUN_MASK)) + status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); + if (WARN_ON(status & (MMIO_STATUS_GALOG_RUN_MASK))) return 0; + entry = iommu_virt_to_phys(iommu->ga_log) | GA_LOG_SIZE_512; + memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_BASE_OFFSET, + &entry, sizeof(entry)); + entry = (iommu_virt_to_phys(iommu->ga_log_tail) & + (BIT_ULL(52)-1)) & ~7ULL; + memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_TAIL_OFFSET, + &entry, sizeof(entry)); + writel(0x00, iommu->mmio_base + MMIO_GA_HEAD_OFFSET); + writel(0x00, iommu->mmio_base + MMIO_GA_TAIL_OFFSET); + + iommu_feature_enable(iommu, CONTROL_GAINT_EN); iommu_feature_enable(iommu, CONTROL_GALOG_EN); @@ -822,19 +844,18 @@ static int iommu_ga_log_enable(struct amd_iommu *iommu) status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); if (status & (MMIO_STATUS_GALOG_RUN_MASK)) break; + udelay(10); } - if (i >= LOOP_TIMEOUT) + if (WARN_ON(i >= LOOP_TIMEOUT)) return -EINVAL; #endif /* CONFIG_IRQ_REMAP */ return 0; } -#ifdef CONFIG_IRQ_REMAP static int iommu_init_ga_log(struct amd_iommu *iommu) { - u64 entry; - +#ifdef CONFIG_IRQ_REMAP if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir)) return 0; @@ -848,32 +869,13 @@ static int iommu_init_ga_log(struct amd_iommu *iommu) if (!iommu->ga_log_tail) goto err_out; - entry = iommu_virt_to_phys(iommu->ga_log) | GA_LOG_SIZE_512; - memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_BASE_OFFSET, - &entry, sizeof(entry)); - entry = (iommu_virt_to_phys(iommu->ga_log_tail) & - (BIT_ULL(52)-1)) & ~7ULL; - memcpy_toio(iommu->mmio_base + MMIO_GA_LOG_TAIL_OFFSET, - &entry, sizeof(entry)); - writel(0x00, iommu->mmio_base + MMIO_GA_HEAD_OFFSET); - writel(0x00, iommu->mmio_base + MMIO_GA_TAIL_OFFSET); - return 0; err_out: free_ga_log(iommu); return -EINVAL; -} -#endif /* CONFIG_IRQ_REMAP */ - -static int iommu_init_ga(struct amd_iommu *iommu) -{ - int ret = 0; - -#ifdef CONFIG_IRQ_REMAP - ret = iommu_init_ga_log(iommu); +#else + return 0; #endif /* CONFIG_IRQ_REMAP */ - - return ret; } static int __init alloc_cwwb_sem(struct amd_iommu *iommu) @@ -1860,7 +1862,7 @@ static int __init iommu_init_pci(struct amd_iommu *iommu) if (iommu_feature(iommu, FEATURE_PPR) && alloc_ppr_log(iommu)) return -ENOMEM; - ret = iommu_init_ga(iommu); + ret = iommu_init_ga_log(iommu); if (ret) return ret; diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 5f1195791cb18a30ba695bd983f919e5e0cf2e97..200cf5da5e0ad1c9ec3acf34fe06566d5e1f14dd 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -813,7 +813,8 @@ amd_iommu_set_pci_msi_domain(struct device *dev, struct amd_iommu *iommu) { } #endif /* !CONFIG_IRQ_REMAP */ #define AMD_IOMMU_INT_MASK \ - (MMIO_STATUS_EVT_INT_MASK | \ + (MMIO_STATUS_EVT_OVERFLOW_INT_MASK | \ + MMIO_STATUS_EVT_INT_MASK | \ MMIO_STATUS_PPR_INT_MASK | \ MMIO_STATUS_GALOG_INT_MASK) @@ -823,7 +824,7 @@ irqreturn_t amd_iommu_int_thread(int irq, void *data) u32 status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); while (status & AMD_IOMMU_INT_MASK) { - /* Enable EVT and PPR and GA interrupts again */ + /* Enable interrupt sources again */ writel(AMD_IOMMU_INT_MASK, iommu->mmio_base + MMIO_STATUS_OFFSET); @@ -844,6 +845,11 @@ irqreturn_t amd_iommu_int_thread(int irq, void *data) } #endif + if (status & MMIO_STATUS_EVT_OVERFLOW_INT_MASK) { + pr_info_ratelimited("IOMMU event log overflow\n"); + amd_iommu_restart_event_logging(iommu); + } + /* * Hardware bug: ERBT1312 * When re-enabling interrupt (by writing 1 diff --git a/drivers/iommu/amd/iommu_v2.c b/drivers/iommu/amd/iommu_v2.c index 5ecc0bc608ec6fac11e12bdea8da9a9ad9af1907..fb61bdca4c2c1455f375fc01e01d054d60908275 100644 --- a/drivers/iommu/amd/iommu_v2.c +++ b/drivers/iommu/amd/iommu_v2.c @@ -927,10 +927,8 @@ static int __init amd_iommu_v2_init(void) { int ret; - pr_info("AMD IOMMUv2 driver by Joerg Roedel \n"); - if (!amd_iommu_v2_supported()) { - pr_info("AMD IOMMUv2 functionality not available on this system\n"); + pr_info("AMD IOMMUv2 functionality not available on this system - This is not a bug.\n"); /* * Load anyway to provide the symbols to other modules * which may use AMD IOMMUv2 optionally. @@ -947,6 +945,8 @@ static int __init amd_iommu_v2_init(void) amd_iommu_register_ppr_notifier(&ppr_nb); + pr_info("AMD IOMMUv2 loaded and initialized\n"); + return 0; out: diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c index aedaae4630bc8117ed9432daf2d7ecaffd28c773..b853888774e65511ae957b542e30139059c09c3e 100644 --- a/drivers/iommu/intel/irq_remapping.c +++ b/drivers/iommu/intel/irq_remapping.c @@ -576,9 +576,8 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu) fn, &intel_ir_domain_ops, iommu); if (!iommu->ir_domain) { - irq_domain_free_fwnode(fn); pr_err("IR%d: failed to allocate irqdomain\n", iommu->seq_id); - goto out_free_bitmap; + goto out_free_fwnode; } iommu->ir_msi_domain = arch_create_remap_msi_irq_domain(iommu->ir_domain, @@ -602,7 +601,7 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu) if (dmar_enable_qi(iommu)) { pr_err("Failed to enable queued invalidation\n"); - goto out_free_bitmap; + goto out_free_ir_domain; } } @@ -626,6 +625,14 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu) return 0; +out_free_ir_domain: + if (iommu->ir_msi_domain) + irq_domain_remove(iommu->ir_msi_domain); + iommu->ir_msi_domain = NULL; + irq_domain_remove(iommu->ir_domain); + iommu->ir_domain = NULL; +out_free_fwnode: + irq_domain_free_fwnode(fn); out_free_bitmap: bitmap_free(bitmap); out_free_pages: diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c index 6ed8f98ece579a0e742206d0bfe931992b23d734..b07695805c6c4c1a06e5faa7aaaabd2575b43616 100644 --- a/drivers/iommu/io-pgtable-arm-v7s.c +++ b/drivers/iommu/io-pgtable-arm-v7s.c @@ -246,13 +246,17 @@ static void *__arm_v7s_alloc_table(int lvl, gfp_t gfp, __GFP_ZERO | ARM_V7S_TABLE_GFP_DMA, get_order(size)); else if (lvl == 2) table = kmem_cache_zalloc(data->l2_tables, gfp); + + if (!table) + return NULL; + phys = virt_to_phys(table); if (phys != (arm_v7s_iopte)phys) { /* Doesn't fit in PTE */ dev_err(dev, "Page table does not fit in PTE: %pa", &phys); goto out_free; } - if (table && !cfg->coherent_walk) { + if (!cfg->coherent_walk) { dma = dma_map_single(dev, table, size, DMA_TO_DEVICE); if (dma_mapping_error(dev, dma)) goto out_free; diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index ec31966b4f6fba6bc498df408585ece02416d095..e086b1a7603514b34d9eb0f51b9226aca4a0a536 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -316,11 +316,12 @@ static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data, static arm_lpae_iopte arm_lpae_install_table(arm_lpae_iopte *table, arm_lpae_iopte *ptep, arm_lpae_iopte curr, - struct io_pgtable_cfg *cfg) + struct arm_lpae_io_pgtable *data) { arm_lpae_iopte old, new; + struct io_pgtable_cfg *cfg = &data->iop.cfg; - new = __pa(table) | ARM_LPAE_PTE_TYPE_TABLE; + new = paddr_to_iopte(__pa(table), data) | ARM_LPAE_PTE_TYPE_TABLE; if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS) new |= ARM_LPAE_PTE_NSTABLE; @@ -381,7 +382,7 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova, if (!cptep) return -ENOMEM; - pte = arm_lpae_install_table(cptep, ptep, 0, cfg); + pte = arm_lpae_install_table(cptep, ptep, 0, data); if (pte) __arm_lpae_free_pages(cptep, tblsz, cfg); } else if (!cfg->coherent_walk && !(pte & ARM_LPAE_PTE_SW_SYNC)) { @@ -595,7 +596,7 @@ static size_t arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data, __arm_lpae_init_pte(data, blk_paddr, pte, lvl, 1, &tablep[i]); } - pte = arm_lpae_install_table(tablep, ptep, blk_pte, cfg); + pte = arm_lpae_install_table(tablep, ptep, blk_pte, data); if (pte != blk_pte) { __arm_lpae_free_pages(tablep, tablesz, cfg); /* diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index d41031198d776508222ea9678700776f8e217bea..1d320eec94aae1365e4b593112e11eaf8b8522b5 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -186,9 +186,14 @@ static struct dev_iommu *dev_iommu_get(struct device *dev) static void dev_iommu_free(struct device *dev) { - iommu_fwspec_free(dev); - kfree(dev->iommu); + struct dev_iommu *param = dev->iommu; + dev->iommu = NULL; + if (param->fwspec) { + fwnode_handle_put(param->fwspec->iommu_fwnode); + kfree(param->fwspec); + } + kfree(param); } static int __iommu_probe_device(struct device *dev, struct list_head *group_list) diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index 2adc7882a02363d00e567ca5e205f288cd7bd5df..53994947ecf758ba6eee552b7777038dfa72c94d 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -65,8 +65,7 @@ static void free_iova_flush_queue(struct iova_domain *iovad) if (!has_iova_flush_queue(iovad)) return; - if (timer_pending(&iovad->fq_timer)) - del_timer(&iovad->fq_timer); + del_timer_sync(&iovad->fq_timer); fq_destroy_all_entries(iovad); @@ -140,10 +139,11 @@ __cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free) cached_iova = rb_entry(iovad->cached32_node, struct iova, node); if (free == cached_iova || (free->pfn_hi < iovad->dma_32bit_pfn && - free->pfn_lo >= cached_iova->pfn_lo)) { + free->pfn_lo >= cached_iova->pfn_lo)) iovad->cached32_node = rb_next(&free->node); + + if (free->pfn_lo < iovad->dma_32bit_pfn) iovad->max32_alloc_size = iovad->dma_32bit_pfn; - } cached_iova = rb_entry(iovad->cached_node, struct iova, node); if (free->pfn_lo >= cached_iova->pfn_lo) diff --git a/drivers/ipack/devices/ipoctal.c b/drivers/ipack/devices/ipoctal.c index d480a514c983792082e4715c7a9990812db372ba..1f7512c991a3228e0517ea5bf3db820f07cd3712 100644 --- a/drivers/ipack/devices/ipoctal.c +++ b/drivers/ipack/devices/ipoctal.c @@ -35,6 +35,7 @@ struct ipoctal_channel { unsigned int pointer_read; unsigned int pointer_write; struct tty_port tty_port; + bool tty_registered; union scc2698_channel __iomem *regs; union scc2698_block __iomem *block_regs; unsigned int board_id; @@ -83,22 +84,34 @@ static int ipoctal_port_activate(struct tty_port *port, struct tty_struct *tty) return 0; } -static int ipoctal_open(struct tty_struct *tty, struct file *file) +static int ipoctal_install(struct tty_driver *driver, struct tty_struct *tty) { struct ipoctal_channel *channel = dev_get_drvdata(tty->dev); struct ipoctal *ipoctal = chan_to_ipoctal(channel, tty->index); - int err; - - tty->driver_data = channel; + int res; if (!ipack_get_carrier(ipoctal->dev)) return -EBUSY; - err = tty_port_open(&channel->tty_port, tty, file); - if (err) - ipack_put_carrier(ipoctal->dev); + res = tty_standard_install(driver, tty); + if (res) + goto err_put_carrier; + + tty->driver_data = channel; + + return 0; + +err_put_carrier: + ipack_put_carrier(ipoctal->dev); + + return res; +} + +static int ipoctal_open(struct tty_struct *tty, struct file *file) +{ + struct ipoctal_channel *channel = tty->driver_data; - return err; + return tty_port_open(&channel->tty_port, tty, file); } static void ipoctal_reset_stats(struct ipoctal_stats *stats) @@ -266,7 +279,6 @@ static int ipoctal_inst_slot(struct ipoctal *ipoctal, unsigned int bus_nr, int res; int i; struct tty_driver *tty; - char name[20]; struct ipoctal_channel *channel; struct ipack_region *region; void __iomem *addr; @@ -357,8 +369,11 @@ static int ipoctal_inst_slot(struct ipoctal *ipoctal, unsigned int bus_nr, /* Fill struct tty_driver with ipoctal data */ tty->owner = THIS_MODULE; tty->driver_name = KBUILD_MODNAME; - sprintf(name, KBUILD_MODNAME ".%d.%d.", bus_nr, slot); - tty->name = name; + tty->name = kasprintf(GFP_KERNEL, KBUILD_MODNAME ".%d.%d.", bus_nr, slot); + if (!tty->name) { + res = -ENOMEM; + goto err_put_driver; + } tty->major = 0; tty->minor_start = 0; @@ -374,8 +389,7 @@ static int ipoctal_inst_slot(struct ipoctal *ipoctal, unsigned int bus_nr, res = tty_register_driver(tty); if (res) { dev_err(&ipoctal->dev->dev, "Can't register tty driver.\n"); - put_tty_driver(tty); - return res; + goto err_free_name; } /* Save struct tty_driver for use it when uninstalling the device */ @@ -386,7 +400,9 @@ static int ipoctal_inst_slot(struct ipoctal *ipoctal, unsigned int bus_nr, channel = &ipoctal->channel[i]; tty_port_init(&channel->tty_port); - tty_port_alloc_xmit_buf(&channel->tty_port); + res = tty_port_alloc_xmit_buf(&channel->tty_port); + if (res) + continue; channel->tty_port.ops = &ipoctal_tty_port_ops; ipoctal_reset_stats(&channel->stats); @@ -394,13 +410,15 @@ static int ipoctal_inst_slot(struct ipoctal *ipoctal, unsigned int bus_nr, spin_lock_init(&channel->lock); channel->pointer_read = 0; channel->pointer_write = 0; - tty_dev = tty_port_register_device(&channel->tty_port, tty, i, NULL); + tty_dev = tty_port_register_device_attr(&channel->tty_port, tty, + i, NULL, channel, NULL); if (IS_ERR(tty_dev)) { dev_err(&ipoctal->dev->dev, "Failed to register tty device.\n"); + tty_port_free_xmit_buf(&channel->tty_port); tty_port_destroy(&channel->tty_port); continue; } - dev_set_drvdata(tty_dev, channel); + channel->tty_registered = true; } /* @@ -412,6 +430,13 @@ static int ipoctal_inst_slot(struct ipoctal *ipoctal, unsigned int bus_nr, ipoctal_irq_handler, ipoctal); return 0; + +err_free_name: + kfree(tty->name); +err_put_driver: + put_tty_driver(tty); + + return res; } static inline int ipoctal_copy_write_buffer(struct ipoctal_channel *channel, @@ -652,6 +677,7 @@ static void ipoctal_cleanup(struct tty_struct *tty) static const struct tty_operations ipoctal_fops = { .ioctl = NULL, + .install = ipoctal_install, .open = ipoctal_open, .close = ipoctal_close, .write = ipoctal_write_tty, @@ -694,12 +720,17 @@ static void __ipoctal_remove(struct ipoctal *ipoctal) for (i = 0; i < NR_CHANNELS; i++) { struct ipoctal_channel *channel = &ipoctal->channel[i]; + + if (!channel->tty_registered) + continue; + tty_unregister_device(ipoctal->tty_drv, i); tty_port_free_xmit_buf(&channel->tty_port); tty_port_destroy(&channel->tty_port); } tty_unregister_driver(ipoctal->tty_drv); + kfree(ipoctal->tty_drv->name); put_tty_driver(ipoctal->tty_drv); kfree(ipoctal); } diff --git a/drivers/irqchip/irq-armada-370-xp.c b/drivers/irqchip/irq-armada-370-xp.c index d7eb2e93db8f525a6af7950a08da705a34940ab2..84f2741aaac6a6ff84439b1a21080beda21fb74c 100644 --- a/drivers/irqchip/irq-armada-370-xp.c +++ b/drivers/irqchip/irq-armada-370-xp.c @@ -232,16 +232,12 @@ static int armada_370_xp_msi_alloc(struct irq_domain *domain, unsigned int virq, int hwirq, i; mutex_lock(&msi_used_lock); + hwirq = bitmap_find_free_region(msi_used, PCI_MSI_DOORBELL_NR, + order_base_2(nr_irqs)); + mutex_unlock(&msi_used_lock); - hwirq = bitmap_find_next_zero_area(msi_used, PCI_MSI_DOORBELL_NR, - 0, nr_irqs, 0); - if (hwirq >= PCI_MSI_DOORBELL_NR) { - mutex_unlock(&msi_used_lock); + if (hwirq < 0) return -ENOSPC; - } - - bitmap_set(msi_used, hwirq, nr_irqs); - mutex_unlock(&msi_used_lock); for (i = 0; i < nr_irqs; i++) { irq_domain_set_info(domain, virq + i, hwirq + i, @@ -250,7 +246,7 @@ static int armada_370_xp_msi_alloc(struct irq_domain *domain, unsigned int virq, NULL, NULL); } - return hwirq; + return 0; } static void armada_370_xp_msi_free(struct irq_domain *domain, @@ -259,7 +255,7 @@ static void armada_370_xp_msi_free(struct irq_domain *domain, struct irq_data *d = irq_domain_get_irq_data(domain, virq); mutex_lock(&msi_used_lock); - bitmap_clear(msi_used, d->hwirq, nr_irqs); + bitmap_release_region(msi_used, d->hwirq, order_base_2(nr_irqs)); mutex_unlock(&msi_used_lock); } diff --git a/drivers/irqchip/irq-aspeed-scu-ic.c b/drivers/irqchip/irq-aspeed-scu-ic.c index c90a3346b9857bb5386cd1f632fea193b36a1b77..0f0aac7cc1140763f9b1c9289e87b3aaa7dc8b6f 100644 --- a/drivers/irqchip/irq-aspeed-scu-ic.c +++ b/drivers/irqchip/irq-aspeed-scu-ic.c @@ -78,8 +78,8 @@ static void aspeed_scu_ic_irq_handler(struct irq_desc *desc) bit - scu_ic->irq_shift); generic_handle_irq(irq); - regmap_update_bits(scu_ic->scu, scu_ic->reg, mask, - BIT(bit + ASPEED_SCU_IC_STATUS_SHIFT)); + regmap_write_bits(scu_ic->scu, scu_ic->reg, mask, + BIT(bit + ASPEED_SCU_IC_STATUS_SHIFT)); } chained_irq_exit(chip, desc); diff --git a/drivers/irqchip/irq-bcm6345-l1.c b/drivers/irqchip/irq-bcm6345-l1.c index e3483789f4df3622b3b312c511e61b66b808a190..1bd0621c4ce2aefd157db645ddf84bfb0b78cb02 100644 --- a/drivers/irqchip/irq-bcm6345-l1.c +++ b/drivers/irqchip/irq-bcm6345-l1.c @@ -140,7 +140,7 @@ static void bcm6345_l1_irq_handle(struct irq_desc *desc) for_each_set_bit(hwirq, &pending, IRQS_PER_WORD) { irq = irq_linear_revmap(intc->domain, base + hwirq); if (irq) - do_IRQ(irq); + generic_handle_irq(irq); else spurious_interrupt(); } diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 24f87ec604ee439a86d8f3b814ba198fdda4b3f4..e9a4fb326d24065f34294a2a5599c924d74fc585 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -27,6 +28,7 @@ #include #include #include +#include #include #include @@ -742,7 +744,7 @@ static struct its_collection *its_build_invall_cmd(struct its_node *its, its_fixup_cmd(cmd); - return NULL; + return desc->its_invall_cmd.col; } static struct its_vpe *its_build_vinvall_cmd(struct its_node *its, @@ -2166,6 +2168,7 @@ static void gic_reset_prop_table(void *va) /* Make sure the GIC will observe the written configuration */ gic_flush_dcache_to_poc(va, LPI_PROPBASE_SZ); + set_memory_decrypted((unsigned long)va, LPI_PROPBASE_SZ >> PAGE_SHIFT); } static struct page *its_allocate_prop_table(gfp_t gfp_flags) @@ -2183,8 +2186,10 @@ static struct page *its_allocate_prop_table(gfp_t gfp_flags) static void its_free_prop_table(struct page *prop_page) { - free_pages((unsigned long)page_address(prop_page), - get_order(LPI_PROPBASE_SZ)); + unsigned long va = (unsigned long)page_address(prop_page); + + set_memory_encrypted(va, LPI_PROPBASE_SZ >> PAGE_SHIFT); + free_pages(va, get_order(LPI_PROPBASE_SZ)); } static bool gic_check_reserved_range(phys_addr_t addr, unsigned long size) @@ -2377,6 +2382,8 @@ retry_baser: return -ENXIO; } + set_memory_decrypted((unsigned long)base, + PAGE_ORDER_TO_SIZE(order) >> PAGE_SHIFT); baser->order = order; baser->base = base; baser->psz = psz; @@ -2512,8 +2519,12 @@ static void its_free_tables(struct its_node *its) for (i = 0; i < GITS_BASER_NR_REGS; i++) { if (its->tables[i].base) { - free_pages((unsigned long)its->tables[i].base, - its->tables[i].order); + unsigned long base = (unsigned long)its->tables[i].base; + u32 order = its->tables[i].order; + u32 npages = PAGE_ORDER_TO_SIZE(order) >> PAGE_SHIFT; + + set_memory_encrypted(base, npages); + free_pages(base, order); its->tables[i].base = NULL; } } @@ -2934,6 +2945,7 @@ static int its_alloc_collections(struct its_node *its) static struct page *its_allocate_pending_table(gfp_t gfp_flags) { struct page *pend_page; + void *va; pend_page = alloc_pages(gfp_flags | __GFP_ZERO, get_order(LPI_PENDBASE_SZ)); @@ -2941,14 +2953,19 @@ static struct page *its_allocate_pending_table(gfp_t gfp_flags) return NULL; /* Make sure the GIC will observe the zero-ed page */ - gic_flush_dcache_to_poc(page_address(pend_page), LPI_PENDBASE_SZ); + va = page_address(pend_page); + gic_flush_dcache_to_poc(va, LPI_PENDBASE_SZ); + set_memory_decrypted((unsigned long)va, LPI_PENDBASE_SZ >> PAGE_SHIFT); return pend_page; } static void its_free_pending_table(struct page *pt) { - free_pages((unsigned long)page_address(pt), get_order(LPI_PENDBASE_SZ)); + unsigned long va = (unsigned long)page_address(pt); + + set_memory_encrypted(va, LPI_PENDBASE_SZ >> PAGE_SHIFT); + free_pages(va, get_order(LPI_PENDBASE_SZ)); } /* @@ -3268,14 +3285,20 @@ static bool its_alloc_table_entry(struct its_node *its, /* Allocate memory for 2nd level table */ if (!table[idx]) { + void *l2addr; + page = alloc_pages_node(its->numa_node, GFP_KERNEL | __GFP_ZERO, get_order(baser->psz)); if (!page) return false; + l2addr = page_address(page); + set_memory_decrypted((unsigned long)l2addr, + baser->psz >> PAGE_SHIFT); + /* Flush Lvl2 table to PoC if hw doesn't support coherency */ if (!(baser->val & GITS_BASER_SHAREABILITY_MASK)) - gic_flush_dcache_to_poc(page_address(page), baser->psz); + gic_flush_dcache_to_poc(l2addr, baser->psz); table[idx] = cpu_to_le64(page_to_phys(page) | GITS_BASER_VALID); @@ -5042,6 +5065,8 @@ static int __init its_probe_one(struct resource *res, its->fwnode_handle = handle; its->get_msi_base = its_irq_get_msi_base; its->msi_domain_flags = IRQ_DOMAIN_FLAG_MSI_REMAP; + set_memory_decrypted((unsigned long)its->cmd_base, + ITS_CMD_QUEUE_SZ >> PAGE_SHIFT); its_enable_quirks(its); @@ -5098,6 +5123,8 @@ static int __init its_probe_one(struct resource *res, out_free_tables: its_free_tables(its); out_free_cmd: + set_memory_encrypted((unsigned long)its->cmd_base, + ITS_CMD_QUEUE_SZ >> PAGE_SHIFT); free_pages((unsigned long)its->cmd_base, get_order(ITS_CMD_QUEUE_SZ)); out_unmap_sgir: if (its->sgir_base) diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 8013d898a20f123e21dd65f9531e674518c4ccde..66d98d76936ca0aa0b23c76599bb804098f313cc 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -912,6 +912,22 @@ static int __gic_update_rdist_properties(struct redist_region *region, { u64 typer = gic_read_typer(ptr + GICR_TYPER); + /* Boot-time cleanip */ + if ((typer & GICR_TYPER_VLPIS) && (typer & GICR_TYPER_RVPEID)) { + u64 val; + + /* Deactivate any present vPE */ + val = gicr_read_vpendbaser(ptr + SZ_128K + GICR_VPENDBASER); + if (val & GICR_VPENDBASER_Valid) + gicr_write_vpendbaser(GICR_VPENDBASER_PendingLast, + ptr + SZ_128K + GICR_VPENDBASER); + + /* Mark the VPE table as invalid */ + val = gicr_read_vpropbaser(ptr + SZ_128K + GICR_VPROPBASER); + val &= ~GICR_VPROPBASER_4_1_VALID; + gicr_write_vpropbaser(val, ptr + SZ_128K + GICR_VPROPBASER); + } + gic_data.rdists.has_vlpis &= !!(typer & GICR_TYPER_VLPIS); /* RVPEID implies some form of DirectLPI, no matter what the doc says... :-/ */ diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index e35f453a200197854d6722ae535b02741364898f..a31eb0e810c1a8e2194e1fb02a37dc2768adafd8 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -107,6 +107,8 @@ static DEFINE_RAW_SPINLOCK(cpu_map_lock); #endif +static DEFINE_STATIC_KEY_FALSE(needs_rmw_access); + /* * The GIC mapping of CPU interfaces does not necessarily match * the logical CPU numbering. Let's use a mapping as returned @@ -777,6 +779,25 @@ static int gic_pm_init(struct gic_chip_data *gic) #endif #ifdef CONFIG_SMP +static void rmw_writeb(u8 bval, void __iomem *addr) +{ + static DEFINE_RAW_SPINLOCK(rmw_lock); + unsigned long offset = (unsigned long)addr & 3UL; + unsigned long shift = offset * 8; + unsigned long flags; + u32 val; + + raw_spin_lock_irqsave(&rmw_lock, flags); + + addr -= offset; + val = readl_relaxed(addr); + val &= ~GENMASK(shift + 7, shift); + val |= bval << shift; + writel_relaxed(val, addr); + + raw_spin_unlock_irqrestore(&rmw_lock, flags); +} + static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val, bool force) { @@ -791,7 +812,10 @@ static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val, if (cpu >= NR_GIC_CPU_IF || cpu >= nr_cpu_ids) return -EINVAL; - writeb_relaxed(gic_cpu_map[cpu], reg); + if (static_branch_unlikely(&needs_rmw_access)) + rmw_writeb(gic_cpu_map[cpu], reg); + else + writeb_relaxed(gic_cpu_map[cpu], reg); irq_data_update_effective_affinity(d, cpumask_of(cpu)); return IRQ_SET_MASK_OK_DONE; @@ -1384,6 +1408,30 @@ static bool gic_check_eoimode(struct device_node *node, void __iomem **base) return true; } +static bool gic_enable_rmw_access(void *data) +{ + /* + * The EMEV2 class of machines has a broken interconnect, and + * locks up on accesses that are less than 32bit. So far, only + * the affinity setting requires it. + */ + if (of_machine_is_compatible("renesas,emev2")) { + static_branch_enable(&needs_rmw_access); + return true; + } + + return false; +} + +static const struct gic_quirk gic_quirks[] = { + { + .desc = "broken byte access", + .compatible = "arm,pl390", + .init = gic_enable_rmw_access, + }, + { }, +}; + static int gic_of_setup(struct gic_chip_data *gic, struct device_node *node) { if (!gic || !node) @@ -1400,6 +1448,8 @@ static int gic_of_setup(struct gic_chip_data *gic, struct device_node *node) if (of_property_read_u32(node, "cpu-offset", &gic->percpu_offset)) gic->percpu_offset = 0; + gic_enable_of_quirks(node, gic_quirks, gic); + return 0; error: diff --git a/drivers/irqchip/irq-nvic.c b/drivers/irqchip/irq-nvic.c index f747e2209ea995c29803c5c8dbf55a1fe46a47d6..21cb31ff2bbf25777d34c6b12ed86c6458724461 100644 --- a/drivers/irqchip/irq-nvic.c +++ b/drivers/irqchip/irq-nvic.c @@ -26,7 +26,7 @@ #define NVIC_ISER 0x000 #define NVIC_ICER 0x080 -#define NVIC_IPR 0x300 +#define NVIC_IPR 0x400 #define NVIC_MAX_BANKS 16 /* diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c index 6f432d2a5cebdc388f5c7d88e1c807f4d83fc28f..bd99ee0ae433d9d0a014831c61c3f7119409d326 100644 --- a/drivers/irqchip/irq-sifive-plic.c +++ b/drivers/irqchip/irq-sifive-plic.c @@ -163,7 +163,13 @@ static void plic_irq_eoi(struct irq_data *d) { struct plic_handler *handler = this_cpu_ptr(&plic_handlers); - writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM); + if (irqd_irq_masked(d)) { + plic_irq_unmask(d); + writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM); + plic_irq_mask(d); + } else { + writel(d->hwirq, handler->hart_base + CONTEXT_CLAIM); + } } static struct irq_chip plic_chip = { @@ -394,3 +400,4 @@ out_free_priv: IRQCHIP_DECLARE(sifive_plic, "sifive,plic-1.0.0", plic_init); IRQCHIP_DECLARE(riscv_plic0, "riscv,plic0", plic_init); /* for legacy systems */ +IRQCHIP_DECLARE(thead_c900_plic, "thead,c900-plic", plic_init); /* for firmware driver */ diff --git a/drivers/isdn/capi/kcapi.c b/drivers/isdn/capi/kcapi.c index cb0afe8971623668b232173e38dc7aad63d887c4..7313454e403a630d64196a07bfc805891753f00f 100644 --- a/drivers/isdn/capi/kcapi.c +++ b/drivers/isdn/capi/kcapi.c @@ -480,6 +480,11 @@ int detach_capi_ctr(struct capi_ctr *ctr) ctr_down(ctr, CAPI_CTR_DETACHED); + if (ctr->cnr < 1 || ctr->cnr - 1 >= CAPI_MAXCONTR) { + err = -EINVAL; + goto unlock_out; + } + if (capi_controller[ctr->cnr - 1] != ctr) { err = -EINVAL; goto unlock_out; diff --git a/drivers/isdn/hardware/mISDN/hfcpci.c b/drivers/isdn/hardware/mISDN/hfcpci.c index e501cb03f211dee02a0f32be36a7017edfaef970..af17459c1a5c02d847d801be97f72605c1870e51 100644 --- a/drivers/isdn/hardware/mISDN/hfcpci.c +++ b/drivers/isdn/hardware/mISDN/hfcpci.c @@ -1994,25 +1994,29 @@ setup_hw(struct hfc_pci *hc) pci_set_master(hc->pdev); if (!hc->irq) { printk(KERN_WARNING "HFC-PCI: No IRQ for PCI card found\n"); - return 1; + return -EINVAL; } hc->hw.pci_io = (char __iomem *)(unsigned long)hc->pdev->resource[1].start; if (!hc->hw.pci_io) { printk(KERN_WARNING "HFC-PCI: No IO-Mem for PCI card found\n"); - return 1; + return -ENOMEM; } /* Allocate memory for FIFOS */ /* the memory needs to be on a 32k boundary within the first 4G */ - dma_set_mask(&hc->pdev->dev, 0xFFFF8000); + if (dma_set_mask(&hc->pdev->dev, 0xFFFF8000)) { + printk(KERN_WARNING + "HFC-PCI: No usable DMA configuration!\n"); + return -EIO; + } buffer = dma_alloc_coherent(&hc->pdev->dev, 0x8000, &hc->hw.dmahandle, GFP_KERNEL); /* We silently assume the address is okay if nonzero */ if (!buffer) { printk(KERN_WARNING "HFC-PCI: Error allocating memory for FIFO!\n"); - return 1; + return -ENOMEM; } hc->hw.fifos = buffer; pci_write_config_dword(hc->pdev, 0x80, hc->hw.dmahandle); @@ -2022,7 +2026,7 @@ setup_hw(struct hfc_pci *hc) "HFC-PCI: Error in ioremap for PCI!\n"); dma_free_coherent(&hc->pdev->dev, 0x8000, hc->hw.fifos, hc->hw.dmahandle); - return 1; + return -ENOMEM; } printk(KERN_INFO diff --git a/drivers/isdn/hardware/mISDN/netjet.c b/drivers/isdn/hardware/mISDN/netjet.c index 2a1ddd47a0968da8db09c9800b5219547ca5e43f..a52f275f826348475b0f49a241ee73b2c6553fca 100644 --- a/drivers/isdn/hardware/mISDN/netjet.c +++ b/drivers/isdn/hardware/mISDN/netjet.c @@ -949,8 +949,8 @@ nj_release(struct tiger_hw *card) nj_disable_hwirq(card); mode_tiger(&card->bc[0], ISDN_P_NONE); mode_tiger(&card->bc[1], ISDN_P_NONE); - card->isac.release(&card->isac); spin_unlock_irqrestore(&card->lock, flags); + card->isac.release(&card->isac); release_region(card->base, card->base_s); card->base_s = 0; } diff --git a/drivers/isdn/mISDN/core.c b/drivers/isdn/mISDN/core.c index 55891e4204460c641a649327c10d41a5213f16a8..a41b4b264594121e45b338d151c59fd4937c16c4 100644 --- a/drivers/isdn/mISDN/core.c +++ b/drivers/isdn/mISDN/core.c @@ -381,7 +381,7 @@ mISDNInit(void) err = mISDN_inittimer(&debug); if (err) goto error2; - err = l1_init(&debug); + err = Isdnl1_Init(&debug); if (err) goto error3; err = Isdnl2_Init(&debug); @@ -395,7 +395,7 @@ mISDNInit(void) error5: Isdnl2_cleanup(); error4: - l1_cleanup(); + Isdnl1_cleanup(); error3: mISDN_timer_cleanup(); error2: @@ -408,7 +408,7 @@ static void mISDN_cleanup(void) { misdn_sock_cleanup(); Isdnl2_cleanup(); - l1_cleanup(); + Isdnl1_cleanup(); mISDN_timer_cleanup(); class_unregister(&mISDN_class); diff --git a/drivers/isdn/mISDN/core.h b/drivers/isdn/mISDN/core.h index 23b44d3033279b07a5fc721715e405e8f1cff11d..42599f49c189df0990e47dc043416d48cc1cab67 100644 --- a/drivers/isdn/mISDN/core.h +++ b/drivers/isdn/mISDN/core.h @@ -60,8 +60,8 @@ struct Bprotocol *get_Bprotocol4id(u_int); extern int mISDN_inittimer(u_int *); extern void mISDN_timer_cleanup(void); -extern int l1_init(u_int *); -extern void l1_cleanup(void); +extern int Isdnl1_Init(u_int *); +extern void Isdnl1_cleanup(void); extern int Isdnl2_Init(u_int *); extern void Isdnl2_cleanup(void); diff --git a/drivers/isdn/mISDN/dsp_pipeline.c b/drivers/isdn/mISDN/dsp_pipeline.c index 40588692cec74ed5861b32c193f238d320e8e24b..c3b2c99b5cd5ceaf12c9fc7dcd929840ec6b3870 100644 --- a/drivers/isdn/mISDN/dsp_pipeline.c +++ b/drivers/isdn/mISDN/dsp_pipeline.c @@ -17,9 +17,6 @@ #include "dsp.h" #include "dsp_hwec.h" -/* uncomment for debugging */ -/*#define PIPELINE_DEBUG*/ - struct dsp_pipeline_entry { struct mISDN_dsp_element *elem; void *p; @@ -104,10 +101,6 @@ int mISDN_dsp_element_register(struct mISDN_dsp_element *elem) } } -#ifdef PIPELINE_DEBUG - printk(KERN_DEBUG "%s: %s registered\n", __func__, elem->name); -#endif - return 0; err2: @@ -129,10 +122,6 @@ void mISDN_dsp_element_unregister(struct mISDN_dsp_element *elem) list_for_each_entry_safe(entry, n, &dsp_elements, list) if (entry->elem == elem) { device_unregister(&entry->dev); -#ifdef PIPELINE_DEBUG - printk(KERN_DEBUG "%s: %s unregistered\n", - __func__, elem->name); -#endif return; } printk(KERN_ERR "%s: element %s not in list.\n", __func__, elem->name); @@ -145,10 +134,6 @@ int dsp_pipeline_module_init(void) if (IS_ERR(elements_class)) return PTR_ERR(elements_class); -#ifdef PIPELINE_DEBUG - printk(KERN_DEBUG "%s: dsp pipeline module initialized\n", __func__); -#endif - dsp_hwec_init(); return 0; @@ -168,10 +153,6 @@ void dsp_pipeline_module_exit(void) __func__, entry->elem->name); kfree(entry); } - -#ifdef PIPELINE_DEBUG - printk(KERN_DEBUG "%s: dsp pipeline module exited\n", __func__); -#endif } int dsp_pipeline_init(struct dsp_pipeline *pipeline) @@ -181,10 +162,6 @@ int dsp_pipeline_init(struct dsp_pipeline *pipeline) INIT_LIST_HEAD(&pipeline->list); -#ifdef PIPELINE_DEBUG - printk(KERN_DEBUG "%s: dsp pipeline ready\n", __func__); -#endif - return 0; } @@ -210,16 +187,12 @@ void dsp_pipeline_destroy(struct dsp_pipeline *pipeline) return; _dsp_pipeline_destroy(pipeline); - -#ifdef PIPELINE_DEBUG - printk(KERN_DEBUG "%s: dsp pipeline destroyed\n", __func__); -#endif } int dsp_pipeline_build(struct dsp_pipeline *pipeline, const char *cfg) { - int incomplete = 0, found = 0; - char *dup, *tok, *name, *args; + int found = 0; + char *dup, *next, *tok, *name, *args; struct dsp_element_entry *entry, *n; struct dsp_pipeline_entry *pipeline_entry; struct mISDN_dsp_element *elem; @@ -230,10 +203,10 @@ int dsp_pipeline_build(struct dsp_pipeline *pipeline, const char *cfg) if (!list_empty(&pipeline->list)) _dsp_pipeline_destroy(pipeline); - dup = kstrdup(cfg, GFP_ATOMIC); + dup = next = kstrdup(cfg, GFP_ATOMIC); if (!dup) return 0; - while ((tok = strsep(&dup, "|"))) { + while ((tok = strsep(&next, "|"))) { if (!strlen(tok)) continue; name = strsep(&tok, "("); @@ -251,7 +224,6 @@ int dsp_pipeline_build(struct dsp_pipeline *pipeline, const char *cfg) printk(KERN_ERR "%s: failed to add " "entry to pipeline: %s (out of " "memory)\n", __func__, elem->name); - incomplete = 1; goto _out; } pipeline_entry->elem = elem; @@ -268,20 +240,12 @@ int dsp_pipeline_build(struct dsp_pipeline *pipeline, const char *cfg) if (pipeline_entry->p) { list_add_tail(&pipeline_entry-> list, &pipeline->list); -#ifdef PIPELINE_DEBUG - printk(KERN_DEBUG "%s: created " - "instance of %s%s%s\n", - __func__, name, args ? - " with args " : "", args ? - args : ""); -#endif } else { printk(KERN_ERR "%s: failed " "to add entry to pipeline: " "%s (new() returned NULL)\n", __func__, elem->name); kfree(pipeline_entry); - incomplete = 1; } } found = 1; @@ -290,11 +254,9 @@ int dsp_pipeline_build(struct dsp_pipeline *pipeline, const char *cfg) if (found) found = 0; - else { + else printk(KERN_ERR "%s: element not found, skipping: " "%s\n", __func__, name); - incomplete = 1; - } } _out: @@ -303,10 +265,6 @@ _out: else pipeline->inuse = 0; -#ifdef PIPELINE_DEBUG - printk(KERN_DEBUG "%s: dsp pipeline built%s: %s\n", - __func__, incomplete ? " incomplete" : "", cfg); -#endif kfree(dup); return 0; } diff --git a/drivers/isdn/mISDN/layer1.c b/drivers/isdn/mISDN/layer1.c index 98a3bc6c17009485d9a6322982f48d3db24d36bf..7b31c25a550e3f9da3438e9c201dbdff6159c4bb 100644 --- a/drivers/isdn/mISDN/layer1.c +++ b/drivers/isdn/mISDN/layer1.c @@ -398,7 +398,7 @@ create_l1(struct dchannel *dch, dchannel_l1callback *dcb) { EXPORT_SYMBOL(create_l1); int -l1_init(u_int *deb) +Isdnl1_Init(u_int *deb) { debug = deb; l1fsm_s.state_count = L1S_STATE_COUNT; @@ -409,7 +409,7 @@ l1_init(u_int *deb) } void -l1_cleanup(void) +Isdnl1_cleanup(void) { mISDN_FsmFree(&l1fsm_s); } diff --git a/drivers/md/dm-bow.c b/drivers/md/dm-bow.c index cfd1fa63ff9715a15a835a68ccdca6b3b4f3b8fa..2e95d614969202038dbca09fccbc1f8be4e70cbb 100644 --- a/drivers/md/dm-bow.c +++ b/drivers/md/dm-bow.c @@ -599,6 +599,7 @@ static void dm_bow_dtr(struct dm_target *ti) struct bow_context *bc = (struct bow_context *) ti->private; struct kobject *kobj; + mutex_lock(&bc->ranges_lock); while (rb_first(&bc->ranges)) { struct bow_range *br = container_of(rb_first(&bc->ranges), struct bow_range, node); @@ -606,6 +607,8 @@ static void dm_bow_dtr(struct dm_target *ti) rb_erase(&br->node, &bc->ranges); kfree(br); } + mutex_unlock(&bc->ranges_lock); + if (bc->workqueue) destroy_workqueue(bc->workqueue); if (bc->bufio) @@ -1181,6 +1184,7 @@ static void dm_bow_tablestatus(struct dm_target *ti, char *result, return; } + mutex_lock(&bc->ranges_lock); for (i = rb_first(&bc->ranges); i; i = rb_next(i)) { struct bow_range *br = container_of(i, struct bow_range, node); @@ -1188,11 +1192,11 @@ static void dm_bow_tablestatus(struct dm_target *ti, char *result, readable_type[br->type], (unsigned long long)br->sector); if (result >= end) - return; + goto unlock; result += scnprintf(result, end - result, "\n"); if (result >= end) - return; + goto unlock; if (br->type == TRIMMED) ++trimmed_range_count; @@ -1214,19 +1218,22 @@ static void dm_bow_tablestatus(struct dm_target *ti, char *result, if (!rb_next(i)) { scnprintf(result, end - result, "\nERROR: Last range not of type TOP"); - return; + goto unlock; } if (br->sector > range_top(br)) { scnprintf(result, end - result, "\nERROR: sectors out of order"); - return; + goto unlock; } } if (trimmed_range_count != trimmed_list_length) scnprintf(result, end - result, "\nERROR: not all trimmed ranges in trimmed list"); + +unlock: + mutex_unlock(&bc->ranges_lock); } static void dm_bow_status(struct dm_target *ti, status_type_t type, diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index 4312007d2d34b8307059dd0c33c98ceab5e1d86d..2d3cda0acacb6a5c3d916b602d19268dd08ecc38 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c @@ -341,8 +341,8 @@ static void do_region(int op, int op_flags, unsigned region, num_bvecs = 1; break; default: - num_bvecs = min_t(int, BIO_MAX_PAGES, - dm_sector_div_up(remaining, (PAGE_SIZE >> SECTOR_SHIFT))); + num_bvecs = bio_max_segs(dm_sector_div_up(remaining, + (PAGE_SIZE >> SECTOR_SHIFT))); } bio = bio_alloc_bioset(GFP_NOIO, num_bvecs, &io->client->bios); diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c index e3d35c6c9f714d49d6187fbdc2eac1a9df40e29a..57882654ffee79eee87de2b582ccaf124a559000 100644 --- a/drivers/md/dm-log-writes.c +++ b/drivers/md/dm-log-writes.c @@ -264,15 +264,14 @@ static int write_inline_data(struct log_writes_c *lc, void *entry, size_t entrylen, void *data, size_t datalen, sector_t sector) { - int num_pages, bio_pages, pg_datalen, pg_sectorlen, i; + int bio_pages, pg_datalen, pg_sectorlen, i; struct page *page; struct bio *bio; size_t ret; void *ptr; while (datalen) { - num_pages = ALIGN(datalen, PAGE_SIZE) >> PAGE_SHIFT; - bio_pages = min(num_pages, BIO_MAX_PAGES); + bio_pages = bio_max_segs(DIV_ROUND_UP(datalen, PAGE_SIZE)); atomic_inc(&lc->io_blocks); @@ -364,7 +363,7 @@ static int log_one_block(struct log_writes_c *lc, goto out; atomic_inc(&lc->io_blocks); - bio = bio_alloc(GFP_KERNEL, min(block->vec_cnt, BIO_MAX_PAGES)); + bio = bio_alloc(GFP_KERNEL, bio_max_segs(block->vec_cnt)); if (!bio) { DMERR("Couldn't alloc log bio"); goto error; @@ -386,7 +385,8 @@ static int log_one_block(struct log_writes_c *lc, if (ret != block->vecs[i].bv_len) { atomic_inc(&lc->io_blocks); submit_bio(bio); - bio = bio_alloc(GFP_KERNEL, min(block->vec_cnt - i, BIO_MAX_PAGES)); + bio = bio_alloc(GFP_KERNEL, + bio_max_segs(block->vec_cnt - i)); if (!bio) { DMERR("Couldn't alloc log bio"); goto error; diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index 808a98ef624c305c53815efb82809f27f0810fb9..9d5c6dd5b7564f91b40e50409bb43c1532cb0715 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -475,6 +475,7 @@ static int verity_verify_io(struct dm_verity_io *io) struct bvec_iter start; unsigned b; struct crypto_wait wait; + struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_io_data_size); for (b = 0; b < io->n_blocks; b++) { int r; @@ -529,9 +530,17 @@ static int verity_verify_io(struct dm_verity_io *io) else if (verity_fec_decode(v, io, DM_VERITY_BLOCK_TYPE_DATA, cur_block, NULL, &start) == 0) continue; - else if (verity_handle_err(v, DM_VERITY_BLOCK_TYPE_DATA, + else { + if (bio->bi_status) { + /* + * Error correction failed; Just return error + */ + return -EIO; + } + if (verity_handle_err(v, DM_VERITY_BLOCK_TYPE_DATA, cur_block)) - return -EIO; + return -EIO; + } } return 0; diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 08b0d3a846484e23a80704c0f5a3a27834b4201e..9fd176e225aff4f192e3b4d5d8323b324ce21fa2 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -608,18 +608,17 @@ static void start_io_acct(struct dm_io *io) false, 0, &io->stats_aux); } -static void end_io_acct(struct dm_io *io) +static void end_io_acct(struct mapped_device *md, struct bio *bio, + unsigned long start_time, struct dm_stats_aux *stats_aux) { - struct mapped_device *md = io->md; - struct bio *bio = io->orig_bio; - unsigned long duration = jiffies - io->start_time; + unsigned long duration = jiffies - start_time; - bio_end_io_acct(bio, io->start_time); + bio_end_io_acct(bio, start_time); if (unlikely(dm_stats_used(&md->stats))) dm_stats_account_io(&md->stats, bio_data_dir(bio), bio->bi_iter.bi_sector, bio_sectors(bio), - true, duration, &io->stats_aux); + true, duration, stats_aux); /* nudge anyone waiting on suspend queue */ if (unlikely(wq_has_sleeper(&md->wait))) @@ -904,6 +903,8 @@ static void dec_pending(struct dm_io *io, blk_status_t error) blk_status_t io_error; struct bio *bio; struct mapped_device *md = io->md; + unsigned long start_time = 0; + struct dm_stats_aux stats_aux; /* Push-back supersedes any I/O errors */ if (unlikely(error)) { @@ -930,8 +931,10 @@ static void dec_pending(struct dm_io *io, blk_status_t error) io_error = io->status; bio = io->orig_bio; - end_io_acct(io); + start_time = io->start_time; + stats_aux = io->stats_aux; free_io(md, io); + end_io_acct(md, bio, start_time, &stats_aux); if (io_error == BLK_STS_DM_REQUEUE) return; @@ -1910,8 +1913,10 @@ static struct mapped_device *alloc_dev(int minor) if (IS_ENABLED(CONFIG_DAX_DRIVER)) { md->dax_dev = alloc_dax(md, md->disk->disk_name, &dm_dax_ops, 0); - if (IS_ERR(md->dax_dev)) + if (IS_ERR(md->dax_dev)) { + md->dax_dev = NULL; goto bad; + } } add_disk_no_queue_reg(md->disk); diff --git a/drivers/md/md.c b/drivers/md/md.c index f16f190546ef3ae38e63d15da7b1aa34f953dc80..cc3876500c4b276b39eda733401c895e29e48253 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -459,34 +459,12 @@ check_suspended: } EXPORT_SYMBOL(md_handle_request); -struct md_io { - struct mddev *mddev; - bio_end_io_t *orig_bi_end_io; - void *orig_bi_private; - unsigned long start_time; - struct hd_struct *part; -}; - -static void md_end_io(struct bio *bio) -{ - struct md_io *md_io = bio->bi_private; - struct mddev *mddev = md_io->mddev; - - part_end_io_acct(md_io->part, bio, md_io->start_time); - - bio->bi_end_io = md_io->orig_bi_end_io; - bio->bi_private = md_io->orig_bi_private; - - mempool_free(md_io, &mddev->md_io_pool); - - if (bio->bi_end_io) - bio->bi_end_io(bio); -} - static blk_qc_t md_submit_bio(struct bio *bio) { const int rw = bio_data_dir(bio); + const int sgrp = op_stat_group(bio_op(bio)); struct mddev *mddev = bio->bi_disk->private_data; + unsigned int sectors; if (mddev == NULL || mddev->pers == NULL) { bio_io_error(bio); @@ -507,26 +485,21 @@ static blk_qc_t md_submit_bio(struct bio *bio) return BLK_QC_T_NONE; } - if (bio->bi_end_io != md_end_io) { - struct md_io *md_io; - - md_io = mempool_alloc(&mddev->md_io_pool, GFP_NOIO); - md_io->mddev = mddev; - md_io->orig_bi_end_io = bio->bi_end_io; - md_io->orig_bi_private = bio->bi_private; - - bio->bi_end_io = md_end_io; - bio->bi_private = md_io; - - md_io->start_time = part_start_io_acct(mddev->gendisk, - &md_io->part, bio); - } - + /* + * save the sectors now since our bio can + * go away inside make_request + */ + sectors = bio_sectors(bio); /* bio could be mergeable after passing to underlayer */ bio->bi_opf &= ~REQ_NOMERGE; md_handle_request(mddev, bio); + part_stat_lock(); + part_stat_inc(&mddev->gendisk->part0, ios[sgrp]); + part_stat_add(&mddev->gendisk->part0, sectors[sgrp], sectors); + part_stat_unlock(); + return BLK_QC_T_NONE; } @@ -2252,6 +2225,7 @@ super_1_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors) if (!num_sectors || num_sectors > max_sectors) num_sectors = max_sectors; + rdev->sb_start = sb_start; } sb = page_address(rdev->sb_page); sb->data_size = cpu_to_le64(num_sectors); @@ -3024,7 +2998,11 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len) * -write_error - clears WriteErrorSeen * {,-}failfast - set/clear FailFast */ + + struct mddev *mddev = rdev->mddev; int err = -EINVAL; + bool need_update_sb = false; + if (cmd_match(buf, "faulty") && rdev->mddev->pers) { md_error(rdev->mddev, rdev); if (test_bit(Faulty, &rdev->flags)) @@ -3039,7 +3017,6 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len) if (rdev->raid_disk >= 0) err = -EBUSY; else { - struct mddev *mddev = rdev->mddev; err = 0; if (mddev_is_clustered(mddev)) err = md_cluster_ops->remove_disk(mddev, rdev); @@ -3056,10 +3033,12 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len) } else if (cmd_match(buf, "writemostly")) { set_bit(WriteMostly, &rdev->flags); mddev_create_serial_pool(rdev->mddev, rdev, false); + need_update_sb = true; err = 0; } else if (cmd_match(buf, "-writemostly")) { mddev_destroy_serial_pool(rdev->mddev, rdev, false); clear_bit(WriteMostly, &rdev->flags); + need_update_sb = true; err = 0; } else if (cmd_match(buf, "blocked")) { set_bit(Blocked, &rdev->flags); @@ -3085,9 +3064,11 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len) err = 0; } else if (cmd_match(buf, "failfast")) { set_bit(FailFast, &rdev->flags); + need_update_sb = true; err = 0; } else if (cmd_match(buf, "-failfast")) { clear_bit(FailFast, &rdev->flags); + need_update_sb = true; err = 0; } else if (cmd_match(buf, "-insync") && rdev->raid_disk >= 0 && !test_bit(Journal, &rdev->flags)) { @@ -3166,6 +3147,8 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len) clear_bit(ExternalBbl, &rdev->flags); err = 0; } + if (need_update_sb) + md_update_sb(mddev, 1); if (!err) sysfs_notify_dirent_safe(rdev->sysfs_state); return err ? err : len; @@ -5626,7 +5609,6 @@ static void md_free(struct kobject *ko) bioset_exit(&mddev->bio_set); bioset_exit(&mddev->sync_set); - mempool_exit(&mddev->md_io_pool); kfree(mddev); } @@ -5722,11 +5704,6 @@ static int md_alloc(dev_t dev, char *name) */ mddev->hold_active = UNTIL_STOP; - error = mempool_init_kmalloc_pool(&mddev->md_io_pool, BIO_POOL_SIZE, - sizeof(struct md_io)); - if (error) - goto abort; - error = -ENOMEM; mddev->queue = blk_alloc_queue(NUMA_NO_NODE); if (!mddev->queue) diff --git a/drivers/md/md.h b/drivers/md/md.h index 2175a5ac4f7c6891239c7b2292b996489deb0f93..c94811cf26004f78437e5c2017c55ab677337471 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -487,7 +487,6 @@ struct mddev { struct bio_set sync_set; /* for sync operations like * metadata and bitmap writes */ - mempool_t md_io_pool; /* Generic flush handling. * The last to finish preflush schedules a worker to submit diff --git a/drivers/md/persistent-data/dm-btree-remove.c b/drivers/md/persistent-data/dm-btree-remove.c index 9e4d1212f4c16caefd33b51752b8bdeda0018423..63f2baed3c8a6e8d5f4d85abf2dd39e5951ea70a 100644 --- a/drivers/md/persistent-data/dm-btree-remove.c +++ b/drivers/md/persistent-data/dm-btree-remove.c @@ -423,9 +423,9 @@ static int rebalance_children(struct shadow_spine *s, memcpy(n, dm_block_data(child), dm_bm_block_size(dm_tm_get_bm(info->tm))); - dm_tm_unlock(info->tm, child); dm_tm_dec(info->tm, dm_block_location(child)); + dm_tm_unlock(info->tm, child); return 0; } diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c index ef6e78d45d5b8bfe8e63200611b97131bbebd45e..ee3e63aa864bfd9ffbf79282018025e781505dd8 100644 --- a/drivers/md/persistent-data/dm-btree.c +++ b/drivers/md/persistent-data/dm-btree.c @@ -83,14 +83,16 @@ void inc_children(struct dm_transaction_manager *tm, struct btree_node *n, } static int insert_at(size_t value_size, struct btree_node *node, unsigned index, - uint64_t key, void *value) - __dm_written_to_disk(value) + uint64_t key, void *value) + __dm_written_to_disk(value) { uint32_t nr_entries = le32_to_cpu(node->header.nr_entries); + uint32_t max_entries = le32_to_cpu(node->header.max_entries); __le64 key_le = cpu_to_le64(key); if (index > nr_entries || - index >= le32_to_cpu(node->header.max_entries)) { + index >= max_entries || + nr_entries >= max_entries) { DMERR("too many entries in btree node for insert"); __dm_unbless_for_disk(value); return -ENOMEM; diff --git a/drivers/md/persistent-data/dm-space-map-common.c b/drivers/md/persistent-data/dm-space-map-common.c index a213bf11738fbbb10448d09abfb8d532679f011e..85853ab6297171c755114929712322fc682170c3 100644 --- a/drivers/md/persistent-data/dm-space-map-common.c +++ b/drivers/md/persistent-data/dm-space-map-common.c @@ -281,6 +281,11 @@ int sm_ll_lookup_bitmap(struct ll_disk *ll, dm_block_t b, uint32_t *result) struct disk_index_entry ie_disk; struct dm_block *blk; + if (b >= ll->nr_blocks) { + DMERR_LIMIT("metadata block out of bounds"); + return -EINVAL; + } + b = do_div(index, ll->entries_per_block); r = ll->load_ie(ll, index, &ie_disk); if (r < 0) diff --git a/drivers/media/Kconfig b/drivers/media/Kconfig index 118e7ddbfd8bdb7ce168eeb79b70f4f0acffb23b..04e8120c6140af70b0335ea5d8e06b765271173d 100644 --- a/drivers/media/Kconfig +++ b/drivers/media/Kconfig @@ -135,10 +135,10 @@ config MEDIA_TEST_SUPPORT bool "Test drivers" default y if !MEDIA_SUPPORT_FILTER help - Those drivers should not be used on production Kernels, but - can be useful on debug ones. It enables several dummy drivers - that simulate a real hardware. Very useful to test userspace - applications and to validate if the subsystem core is doesn't + These drivers should not be used on production kernels, but + can be useful on debug ones. This option enables several dummy drivers + that simulate real hardware. Very useful to test userspace + applications and to validate if the subsystem core doesn't have regressions. Say Y if you want to use some virtual test driver. diff --git a/drivers/media/cec/core/cec-adap.c b/drivers/media/cec/core/cec-adap.c index d5d5d28d0b36a5a0e9facb3acb9dab0a48fdcac1..2e5698fbc3a876d1a311e662122e42bc8e12e32f 100644 --- a/drivers/media/cec/core/cec-adap.c +++ b/drivers/media/cec/core/cec-adap.c @@ -1199,6 +1199,7 @@ void cec_received_msg_ts(struct cec_adapter *adap, if (abort) dst->rx_status |= CEC_RX_STATUS_FEATURE_ABORT; msg->flags = dst->flags; + msg->sequence = dst->sequence; /* Remove it from the wait_queue */ list_del_init(&data->list); diff --git a/drivers/media/cec/core/cec-pin.c b/drivers/media/cec/core/cec-pin.c index f006bd8eec63c0808f8dd09fb3ced709ae3642a5..f8452a1f9fc6c13bd14a22ec1b2679d8b3b78712 100644 --- a/drivers/media/cec/core/cec-pin.c +++ b/drivers/media/cec/core/cec-pin.c @@ -1033,6 +1033,7 @@ static int cec_pin_thread_func(void *_adap) { struct cec_adapter *adap = _adap; struct cec_pin *pin = adap->pin; + bool irq_enabled = false; for (;;) { wait_event_interruptible(pin->kthread_waitq, @@ -1060,6 +1061,7 @@ static int cec_pin_thread_func(void *_adap) ns_to_ktime(pin->work_rx_msg.rx_ts)); msg->len = 0; } + if (pin->work_tx_status) { unsigned int tx_status = pin->work_tx_status; @@ -1083,27 +1085,39 @@ static int cec_pin_thread_func(void *_adap) switch (atomic_xchg(&pin->work_irq_change, CEC_PIN_IRQ_UNCHANGED)) { case CEC_PIN_IRQ_DISABLE: - pin->ops->disable_irq(adap); + if (irq_enabled) { + pin->ops->disable_irq(adap); + irq_enabled = false; + } cec_pin_high(pin); cec_pin_to_idle(pin); hrtimer_start(&pin->timer, ns_to_ktime(0), HRTIMER_MODE_REL); break; case CEC_PIN_IRQ_ENABLE: + if (irq_enabled) + break; pin->enable_irq_failed = !pin->ops->enable_irq(adap); if (pin->enable_irq_failed) { cec_pin_to_idle(pin); hrtimer_start(&pin->timer, ns_to_ktime(0), HRTIMER_MODE_REL); + } else { + irq_enabled = true; } break; default: break; } - if (kthread_should_stop()) break; } + if (pin->ops->disable_irq && irq_enabled) + pin->ops->disable_irq(adap); + hrtimer_cancel(&pin->timer); + cec_pin_read(pin); + cec_pin_to_idle(pin); + pin->state = CEC_ST_OFF; return 0; } @@ -1130,13 +1144,7 @@ static int cec_pin_adap_enable(struct cec_adapter *adap, bool enable) hrtimer_start(&pin->timer, ns_to_ktime(0), HRTIMER_MODE_REL); } else { - if (pin->ops->disable_irq) - pin->ops->disable_irq(adap); - hrtimer_cancel(&pin->timer); kthread_stop(pin->kthread); - cec_pin_read(pin); - cec_pin_to_idle(pin); - pin->state = CEC_ST_OFF; } return 0; } @@ -1157,11 +1165,8 @@ void cec_pin_start_timer(struct cec_pin *pin) if (pin->state != CEC_ST_RX_IRQ) return; - atomic_set(&pin->work_irq_change, CEC_PIN_IRQ_UNCHANGED); - pin->ops->disable_irq(pin->adap); - cec_pin_high(pin); - cec_pin_to_idle(pin); - hrtimer_start(&pin->timer, ns_to_ktime(0), HRTIMER_MODE_REL); + atomic_set(&pin->work_irq_change, CEC_PIN_IRQ_DISABLE); + wake_up_interruptible(&pin->kthread_waitq); } static int cec_pin_adap_transmit(struct cec_adapter *adap, u8 attempts, diff --git a/drivers/media/common/saa7146/saa7146_fops.c b/drivers/media/common/saa7146/saa7146_fops.c index d6531874faa650f0e8367ac8d37e0490e299b7db..8047e305f3d014c768c91e83e3ff491409ea8460 100644 --- a/drivers/media/common/saa7146/saa7146_fops.c +++ b/drivers/media/common/saa7146/saa7146_fops.c @@ -523,7 +523,7 @@ int saa7146_vv_init(struct saa7146_dev* dev, struct saa7146_ext_vv *ext_vv) ERR("out of memory. aborting.\n"); kfree(vv); v4l2_ctrl_handler_free(hdl); - return -1; + return -ENOMEM; } saa7146_video_uops.init(dev,vv); diff --git a/drivers/media/common/videobuf2/videobuf2-dma-contig.c b/drivers/media/common/videobuf2/videobuf2-dma-contig.c index 2f3a5996d3fc90a62e01f1e6c62876ace0c75ddb..fe626109ef4db62b381d5c0959fa0a693bea5d5e 100644 --- a/drivers/media/common/videobuf2/videobuf2-dma-contig.c +++ b/drivers/media/common/videobuf2/videobuf2-dma-contig.c @@ -150,7 +150,7 @@ static void *vb2_dc_alloc(struct device *dev, unsigned long attrs, buf->cookie = dma_alloc_attrs(dev, size, &buf->dma_addr, GFP_KERNEL | gfp_flags, buf->attrs); if (!buf->cookie) { - dev_err(dev, "dma_alloc_coherent of size %ld failed\n", size); + dev_err(dev, "dma_alloc_coherent of size %lu failed\n", size); kfree(buf); return ERR_PTR(-ENOMEM); } @@ -196,9 +196,9 @@ static int vb2_dc_mmap(void *buf_priv, struct vm_area_struct *vma) vma->vm_ops->open(vma); - pr_debug("%s: mapped dma addr 0x%08lx at 0x%08lx, size %ld\n", - __func__, (unsigned long)buf->dma_addr, vma->vm_start, - buf->size); + pr_debug("%s: mapped dma addr 0x%08lx at 0x%08lx, size %lu\n", + __func__, (unsigned long)buf->dma_addr, vma->vm_start, + buf->size); return 0; } diff --git a/drivers/media/dvb-core/dmxdev.c b/drivers/media/dvb-core/dmxdev.c index f14a872d126872c3fc695cbd3f8460c32d317b14..e58cb8434dafeea3323ea892028be6ffde3ca155 100644 --- a/drivers/media/dvb-core/dmxdev.c +++ b/drivers/media/dvb-core/dmxdev.c @@ -1413,7 +1413,7 @@ static const struct dvb_device dvbdev_dvr = { }; int dvb_dmxdev_init(struct dmxdev *dmxdev, struct dvb_adapter *dvb_adapter) { - int i; + int i, ret; if (dmxdev->demux->open(dmxdev->demux) < 0) return -EUSERS; @@ -1432,14 +1432,26 @@ int dvb_dmxdev_init(struct dmxdev *dmxdev, struct dvb_adapter *dvb_adapter) DMXDEV_STATE_FREE); } - dvb_register_device(dvb_adapter, &dmxdev->dvbdev, &dvbdev_demux, dmxdev, + ret = dvb_register_device(dvb_adapter, &dmxdev->dvbdev, &dvbdev_demux, dmxdev, DVB_DEVICE_DEMUX, dmxdev->filternum); - dvb_register_device(dvb_adapter, &dmxdev->dvr_dvbdev, &dvbdev_dvr, + if (ret < 0) + goto err_register_dvbdev; + + ret = dvb_register_device(dvb_adapter, &dmxdev->dvr_dvbdev, &dvbdev_dvr, dmxdev, DVB_DEVICE_DVR, dmxdev->filternum); + if (ret < 0) + goto err_register_dvr_dvbdev; dvb_ringbuffer_init(&dmxdev->dvr_buffer, NULL, 8192); return 0; + +err_register_dvr_dvbdev: + dvb_unregister_device(dmxdev->dvbdev); +err_register_dvbdev: + vfree(dmxdev->filter); + dmxdev->filter = NULL; + return ret; } EXPORT_SYMBOL(dvb_dmxdev_init); diff --git a/drivers/media/dvb-frontends/dib8000.c b/drivers/media/dvb-frontends/dib8000.c index bb02354a48b81b4c6c6ff3c23558eb1df31afc34..d67f2dd997d06188bba43fb266829ffd298e8a82 100644 --- a/drivers/media/dvb-frontends/dib8000.c +++ b/drivers/media/dvb-frontends/dib8000.c @@ -4473,8 +4473,10 @@ static struct dvb_frontend *dib8000_init(struct i2c_adapter *i2c_adap, u8 i2c_ad state->timf_default = cfg->pll->timf; - if (dib8000_identify(&state->i2c) == 0) + if (dib8000_identify(&state->i2c) == 0) { + kfree(fe); goto error; + } dibx000_init_i2c_master(&state->i2c_master, DIB8000, state->i2c.adap, state->i2c.addr); diff --git a/drivers/media/dvb-frontends/mn88443x.c b/drivers/media/dvb-frontends/mn88443x.c index e4528784f8477fe897c51a18fc7f5bcb9b9196e4..fff212c0bf3b5a4b36fc6b15b84018f27f1dfac8 100644 --- a/drivers/media/dvb-frontends/mn88443x.c +++ b/drivers/media/dvb-frontends/mn88443x.c @@ -204,11 +204,18 @@ struct mn88443x_priv { struct regmap *regmap_t; }; -static void mn88443x_cmn_power_on(struct mn88443x_priv *chip) +static int mn88443x_cmn_power_on(struct mn88443x_priv *chip) { + struct device *dev = &chip->client_s->dev; struct regmap *r_t = chip->regmap_t; + int ret; - clk_prepare_enable(chip->mclk); + ret = clk_prepare_enable(chip->mclk); + if (ret) { + dev_err(dev, "Failed to prepare and enable mclk: %d\n", + ret); + return ret; + } gpiod_set_value_cansleep(chip->reset_gpio, 1); usleep_range(100, 1000); @@ -222,6 +229,8 @@ static void mn88443x_cmn_power_on(struct mn88443x_priv *chip) } else { regmap_write(r_t, HIZSET3, 0x8f); } + + return 0; } static void mn88443x_cmn_power_off(struct mn88443x_priv *chip) @@ -738,7 +747,10 @@ static int mn88443x_probe(struct i2c_client *client, chip->fe.demodulator_priv = chip; i2c_set_clientdata(client, chip); - mn88443x_cmn_power_on(chip); + ret = mn88443x_cmn_power_on(chip); + if (ret) + goto err_i2c_t; + mn88443x_s_sleep(chip); mn88443x_t_sleep(chip); diff --git a/drivers/media/firewire/firedtv-avc.c b/drivers/media/firewire/firedtv-avc.c index 2bf9467b917d156ce9a074c33503a75fd091770b..71991f8638e6b8e58d145f1e455deeb8c43d3de2 100644 --- a/drivers/media/firewire/firedtv-avc.c +++ b/drivers/media/firewire/firedtv-avc.c @@ -1165,7 +1165,11 @@ int avc_ca_pmt(struct firedtv *fdtv, char *msg, int length) read_pos += program_info_length; write_pos += program_info_length; } - while (read_pos < length) { + while (read_pos + 4 < length) { + if (write_pos + 4 >= sizeof(c->operand) - 4) { + ret = -EINVAL; + goto out; + } c->operand[write_pos++] = msg[read_pos++]; c->operand[write_pos++] = msg[read_pos++]; c->operand[write_pos++] = msg[read_pos++]; @@ -1177,13 +1181,17 @@ int avc_ca_pmt(struct firedtv *fdtv, char *msg, int length) c->operand[write_pos++] = es_info_length >> 8; c->operand[write_pos++] = es_info_length & 0xff; if (es_info_length > 0) { + if (read_pos >= length) { + ret = -EINVAL; + goto out; + } pmt_cmd_id = msg[read_pos++]; if (pmt_cmd_id != 1 && pmt_cmd_id != 4) dev_err(fdtv->device, "invalid pmt_cmd_id %d at stream level\n", pmt_cmd_id); - if (es_info_length > sizeof(c->operand) - 4 - - write_pos) { + if (es_info_length > sizeof(c->operand) - 4 - write_pos || + es_info_length > length - read_pos) { ret = -EINVAL; goto out; } diff --git a/drivers/media/firewire/firedtv-ci.c b/drivers/media/firewire/firedtv-ci.c index 9363d005e2b612da8f6e7f4cdb96020c730879cb..e0d57e09dab0ccdaa225feb518095d03a42896ab 100644 --- a/drivers/media/firewire/firedtv-ci.c +++ b/drivers/media/firewire/firedtv-ci.c @@ -134,6 +134,8 @@ static int fdtv_ca_pmt(struct firedtv *fdtv, void *arg) } else { data_length = msg->msg[3]; } + if (data_length > sizeof(msg->msg) - data_pos) + return -EINVAL; return avc_ca_pmt(fdtv, &msg->msg[data_pos], data_length); } diff --git a/drivers/media/i2c/ir-kbd-i2c.c b/drivers/media/i2c/ir-kbd-i2c.c index 92376592455eebf2f8c0eb56a23e991e8944c255..56674173524fdb25775f812761e140253d723fb5 100644 --- a/drivers/media/i2c/ir-kbd-i2c.c +++ b/drivers/media/i2c/ir-kbd-i2c.c @@ -791,6 +791,7 @@ static int ir_probe(struct i2c_client *client, const struct i2c_device_id *id) rc_proto = RC_PROTO_BIT_RC5 | RC_PROTO_BIT_RC6_MCE | RC_PROTO_BIT_RC6_6A_32; ir_codes = RC_MAP_HAUPPAUGE; + ir->polling_interval = 125; probe_tx = true; break; } diff --git a/drivers/media/i2c/mt9p031.c b/drivers/media/i2c/mt9p031.c index dc23b9ed510a4c29571bbb77e64e37a952db895d..18440c5104ad917f3fbab1615fc59fe91f7fb5d4 100644 --- a/drivers/media/i2c/mt9p031.c +++ b/drivers/media/i2c/mt9p031.c @@ -78,7 +78,9 @@ #define MT9P031_PIXEL_CLOCK_INVERT (1 << 15) #define MT9P031_PIXEL_CLOCK_SHIFT(n) ((n) << 8) #define MT9P031_PIXEL_CLOCK_DIVIDE(n) ((n) << 0) -#define MT9P031_FRAME_RESTART 0x0b +#define MT9P031_RESTART 0x0b +#define MT9P031_FRAME_PAUSE_RESTART (1 << 1) +#define MT9P031_FRAME_RESTART (1 << 0) #define MT9P031_SHUTTER_DELAY 0x0c #define MT9P031_RST 0x0d #define MT9P031_RST_ENABLE 1 @@ -445,9 +447,23 @@ static int mt9p031_set_params(struct mt9p031 *mt9p031) static int mt9p031_s_stream(struct v4l2_subdev *subdev, int enable) { struct mt9p031 *mt9p031 = to_mt9p031(subdev); + struct i2c_client *client = v4l2_get_subdevdata(subdev); + int val; int ret; if (!enable) { + /* enable pause restart */ + val = MT9P031_FRAME_PAUSE_RESTART; + ret = mt9p031_write(client, MT9P031_RESTART, val); + if (ret < 0) + return ret; + + /* enable restart + keep pause restart set */ + val |= MT9P031_FRAME_RESTART; + ret = mt9p031_write(client, MT9P031_RESTART, val); + if (ret < 0) + return ret; + /* Stop sensor readout */ ret = mt9p031_set_output_control(mt9p031, MT9P031_OUTPUT_CONTROL_CEN, 0); @@ -467,6 +483,16 @@ static int mt9p031_s_stream(struct v4l2_subdev *subdev, int enable) if (ret < 0) return ret; + /* + * - clear pause restart + * - don't clear restart as clearing restart manually can cause + * undefined behavior + */ + val = MT9P031_FRAME_RESTART; + ret = mt9p031_write(client, MT9P031_RESTART, val); + if (ret < 0) + return ret; + return mt9p031_pll_enable(mt9p031); } diff --git a/drivers/media/i2c/tda1997x.c b/drivers/media/i2c/tda1997x.c index 17cc69c3227f8655200beaeaa92b465830b53e88..8476330964fc7ab8764711a095a6f3dee977b17c 100644 --- a/drivers/media/i2c/tda1997x.c +++ b/drivers/media/i2c/tda1997x.c @@ -1247,13 +1247,13 @@ tda1997x_parse_infoframe(struct tda1997x_state *state, u16 addr) { struct v4l2_subdev *sd = &state->sd; union hdmi_infoframe frame; - u8 buffer[40]; + u8 buffer[40] = { 0 }; u8 reg; int len, err; /* read data */ len = io_readn(sd, addr, sizeof(buffer), buffer); - err = hdmi_infoframe_unpack(&frame, buffer, sizeof(buffer)); + err = hdmi_infoframe_unpack(&frame, buffer, len); if (err) { v4l_err(state->client, "failed parsing %d byte infoframe: 0x%04x/0x%02x\n", @@ -1927,13 +1927,13 @@ static int tda1997x_log_infoframe(struct v4l2_subdev *sd, int addr) { struct tda1997x_state *state = to_state(sd); union hdmi_infoframe frame; - u8 buffer[40]; + u8 buffer[40] = { 0 }; int len, err; /* read data */ len = io_readn(sd, addr, sizeof(buffer), buffer); v4l2_dbg(1, debug, sd, "infoframe: addr=%d len=%d\n", addr, len); - err = hdmi_infoframe_unpack(&frame, buffer, sizeof(buffer)); + err = hdmi_infoframe_unpack(&frame, buffer, len); if (err) { v4l_err(state->client, "failed parsing %d byte infoframe: 0x%04x/0x%02x\n", diff --git a/drivers/media/pci/b2c2/flexcop-pci.c b/drivers/media/pci/b2c2/flexcop-pci.c index a9d9520a94c6d87437aa6e839099e569b28f91c8..c9e6c7d6637682cbf53c651a604b79cd9e0824df 100644 --- a/drivers/media/pci/b2c2/flexcop-pci.c +++ b/drivers/media/pci/b2c2/flexcop-pci.c @@ -185,6 +185,8 @@ static irqreturn_t flexcop_pci_isr(int irq, void *dev_id) dma_addr_t cur_addr = fc->read_ibi_reg(fc,dma1_008).dma_0x8.dma_cur_addr << 2; u32 cur_pos = cur_addr - fc_pci->dma[0].dma_addr0; + if (cur_pos > fc_pci->dma[0].size * 2) + goto error; deb_irq("%u irq: %08x cur_addr: %llx: cur_pos: %08x, last_cur_pos: %08x ", jiffies_to_usecs(jiffies - fc_pci->last_irq), @@ -225,6 +227,7 @@ static irqreturn_t flexcop_pci_isr(int irq, void *dev_id) ret = IRQ_NONE; } +error: spin_unlock_irqrestore(&fc_pci->irq_lock, flags); return ret; } diff --git a/drivers/media/pci/cx23885/cx23885-alsa.c b/drivers/media/pci/cx23885/cx23885-alsa.c index 13689c5dd47ffc622f352543c3e819378513f6fa..9154031c087d449b92a8b5009ddfa416a608fc99 100644 --- a/drivers/media/pci/cx23885/cx23885-alsa.c +++ b/drivers/media/pci/cx23885/cx23885-alsa.c @@ -550,7 +550,7 @@ struct cx23885_audio_dev *cx23885_audio_register(struct cx23885_dev *dev) SNDRV_DEFAULT_IDX1, SNDRV_DEFAULT_STR1, THIS_MODULE, sizeof(struct cx23885_audio_dev), &card); if (err < 0) - goto error; + goto error_msg; chip = (struct cx23885_audio_dev *) card->private_data; chip->dev = dev; @@ -576,6 +576,7 @@ struct cx23885_audio_dev *cx23885_audio_register(struct cx23885_dev *dev) error: snd_card_free(card); +error_msg: pr_err("%s(): Failed to register analog audio adapter\n", __func__); diff --git a/drivers/media/pci/netup_unidvb/netup_unidvb_core.c b/drivers/media/pci/netup_unidvb/netup_unidvb_core.c index 6f3125c2d09767c28f470fec210fce71ba07cf3e..77bae146855135693faf8bb358b55845cd7aff78 100644 --- a/drivers/media/pci/netup_unidvb/netup_unidvb_core.c +++ b/drivers/media/pci/netup_unidvb/netup_unidvb_core.c @@ -258,19 +258,24 @@ static irqreturn_t netup_unidvb_isr(int irq, void *dev_id) if ((reg40 & AVL_IRQ_ASSERTED) != 0) { /* IRQ is being signaled */ reg_isr = readw(ndev->bmmio0 + REG_ISR); - if (reg_isr & NETUP_UNIDVB_IRQ_I2C0) { - iret = netup_i2c_interrupt(&ndev->i2c[0]); - } else if (reg_isr & NETUP_UNIDVB_IRQ_I2C1) { - iret = netup_i2c_interrupt(&ndev->i2c[1]); - } else if (reg_isr & NETUP_UNIDVB_IRQ_SPI) { + if (reg_isr & NETUP_UNIDVB_IRQ_SPI) iret = netup_spi_interrupt(ndev->spi); - } else if (reg_isr & NETUP_UNIDVB_IRQ_DMA1) { - iret = netup_dma_interrupt(&ndev->dma[0]); - } else if (reg_isr & NETUP_UNIDVB_IRQ_DMA2) { - iret = netup_dma_interrupt(&ndev->dma[1]); - } else if (reg_isr & NETUP_UNIDVB_IRQ_CI) { - iret = netup_ci_interrupt(ndev); + else if (!ndev->old_fw) { + if (reg_isr & NETUP_UNIDVB_IRQ_I2C0) { + iret = netup_i2c_interrupt(&ndev->i2c[0]); + } else if (reg_isr & NETUP_UNIDVB_IRQ_I2C1) { + iret = netup_i2c_interrupt(&ndev->i2c[1]); + } else if (reg_isr & NETUP_UNIDVB_IRQ_DMA1) { + iret = netup_dma_interrupt(&ndev->dma[0]); + } else if (reg_isr & NETUP_UNIDVB_IRQ_DMA2) { + iret = netup_dma_interrupt(&ndev->dma[1]); + } else if (reg_isr & NETUP_UNIDVB_IRQ_CI) { + iret = netup_ci_interrupt(ndev); + } else { + goto err; + } } else { +err: dev_err(&pci_dev->dev, "%s(): unknown interrupt 0x%x\n", __func__, reg_isr); diff --git a/drivers/media/pci/saa7146/hexium_gemini.c b/drivers/media/pci/saa7146/hexium_gemini.c index 2214c74bbbf1548b12b8122827a25c0b61a45756..3947701cd6c7e76835d3d8e58321a5a75c7d9c46 100644 --- a/drivers/media/pci/saa7146/hexium_gemini.c +++ b/drivers/media/pci/saa7146/hexium_gemini.c @@ -284,7 +284,12 @@ static int hexium_attach(struct saa7146_dev *dev, struct saa7146_pci_extension_d hexium_set_input(hexium, 0); hexium->cur_input = 0; - saa7146_vv_init(dev, &vv_data); + ret = saa7146_vv_init(dev, &vv_data); + if (ret) { + i2c_del_adapter(&hexium->i2c_adapter); + kfree(hexium); + return ret; + } vv_data.vid_ops.vidioc_enum_input = vidioc_enum_input; vv_data.vid_ops.vidioc_g_input = vidioc_g_input; diff --git a/drivers/media/pci/saa7146/hexium_orion.c b/drivers/media/pci/saa7146/hexium_orion.c index 39d14c179d229da31c1d76a7c067b56a24c2883d..2eb4bee16b71f28175854cc636472c903f0f4228 100644 --- a/drivers/media/pci/saa7146/hexium_orion.c +++ b/drivers/media/pci/saa7146/hexium_orion.c @@ -355,10 +355,16 @@ static struct saa7146_ext_vv vv_data; static int hexium_attach(struct saa7146_dev *dev, struct saa7146_pci_extension_data *info) { struct hexium *hexium = (struct hexium *) dev->ext_priv; + int ret; DEB_EE("\n"); - saa7146_vv_init(dev, &vv_data); + ret = saa7146_vv_init(dev, &vv_data); + if (ret) { + pr_err("Error in saa7146_vv_init()\n"); + return ret; + } + vv_data.vid_ops.vidioc_enum_input = vidioc_enum_input; vv_data.vid_ops.vidioc_g_input = vidioc_g_input; vv_data.vid_ops.vidioc_s_input = vidioc_s_input; diff --git a/drivers/media/pci/saa7146/mxb.c b/drivers/media/pci/saa7146/mxb.c index 73fc901ecf3db5ecd7312bb29af338886717244a..bf0b9b0914cd53cddf78c56eb4db2099302f7742 100644 --- a/drivers/media/pci/saa7146/mxb.c +++ b/drivers/media/pci/saa7146/mxb.c @@ -683,10 +683,16 @@ static struct saa7146_ext_vv vv_data; static int mxb_attach(struct saa7146_dev *dev, struct saa7146_pci_extension_data *info) { struct mxb *mxb; + int ret; DEB_EE("dev:%p\n", dev); - saa7146_vv_init(dev, &vv_data); + ret = saa7146_vv_init(dev, &vv_data); + if (ret) { + ERR("Error in saa7146_vv_init()"); + return ret; + } + if (mxb_probe(dev)) { saa7146_vv_release(dev); return -1; diff --git a/drivers/media/platform/aspeed-video.c b/drivers/media/platform/aspeed-video.c index 7bb6babdcade049bf7e6a806d19026a687f15a31..debc7509c173c41e218bbebbebeeeb9f4fc7ca1e 100644 --- a/drivers/media/platform/aspeed-video.c +++ b/drivers/media/platform/aspeed-video.c @@ -500,6 +500,10 @@ static void aspeed_video_enable_mode_detect(struct aspeed_video *video) aspeed_video_update(video, VE_INTERRUPT_CTRL, 0, VE_INTERRUPT_MODE_DETECT); + /* Disable mode detect in order to re-trigger */ + aspeed_video_update(video, VE_SEQ_CTRL, + VE_SEQ_CTRL_TRIG_MODE_DET, 0); + /* Trigger mode detect */ aspeed_video_update(video, VE_SEQ_CTRL, 0, VE_SEQ_CTRL_TRIG_MODE_DET); } @@ -552,6 +556,8 @@ static void aspeed_video_irq_res_change(struct aspeed_video *video, ulong delay) set_bit(VIDEO_RES_CHANGE, &video->flags); clear_bit(VIDEO_FRAME_INPRG, &video->flags); + video->v4l2_input_status = V4L2_IN_ST_NO_SIGNAL; + aspeed_video_off(video); aspeed_video_bufs_done(video, VB2_BUF_STATE_ERROR); @@ -786,10 +792,6 @@ static void aspeed_video_get_resolution(struct aspeed_video *video) return; } - /* Disable mode detect in order to re-trigger */ - aspeed_video_update(video, VE_SEQ_CTRL, - VE_SEQ_CTRL_TRIG_MODE_DET, 0); - aspeed_video_check_and_set_polarity(video); aspeed_video_enable_mode_detect(video); @@ -1337,7 +1339,6 @@ static void aspeed_video_resolution_work(struct work_struct *work) struct delayed_work *dwork = to_delayed_work(work); struct aspeed_video *video = container_of(dwork, struct aspeed_video, res_work); - u32 input_status = video->v4l2_input_status; aspeed_video_on(video); @@ -1350,8 +1351,7 @@ static void aspeed_video_resolution_work(struct work_struct *work) aspeed_video_get_resolution(video); if (video->detected_timings.width != video->active_timings.width || - video->detected_timings.height != video->active_timings.height || - input_status != video->v4l2_input_status) { + video->detected_timings.height != video->active_timings.height) { static const struct v4l2_event ev = { .type = V4L2_EVENT_SOURCE_CHANGE, .u.src_change.changes = V4L2_EVENT_SRC_CH_RESOLUTION, diff --git a/drivers/media/platform/coda/coda-common.c b/drivers/media/platform/coda/coda-common.c index 87a2c706f7477db4003ebcfe0c6578a34a4677b0..1eed69d29149f34c43fd155ff957c157623deac4 100644 --- a/drivers/media/platform/coda/coda-common.c +++ b/drivers/media/platform/coda/coda-common.c @@ -1537,11 +1537,13 @@ static void coda_pic_run_work(struct work_struct *work) if (!wait_for_completion_timeout(&ctx->completion, msecs_to_jiffies(1000))) { - dev_err(dev->dev, "CODA PIC_RUN timeout\n"); + if (ctx->use_bit) { + dev_err(dev->dev, "CODA PIC_RUN timeout\n"); - ctx->hold = true; + ctx->hold = true; - coda_hw_reset(ctx); + coda_hw_reset(ctx); + } if (ctx->ops->run_timeout) ctx->ops->run_timeout(ctx); diff --git a/drivers/media/platform/coda/coda-jpeg.c b/drivers/media/platform/coda/coda-jpeg.c index b11cfbe166dd3141568c10a16d02406385fa8f80..a72f4655e5ad56312d8fe4c113452f5b3baebe86 100644 --- a/drivers/media/platform/coda/coda-jpeg.c +++ b/drivers/media/platform/coda/coda-jpeg.c @@ -1127,7 +1127,8 @@ static int coda9_jpeg_prepare_encode(struct coda_ctx *ctx) coda_write(dev, 0, CODA9_REG_JPEG_GBU_BT_PTR); coda_write(dev, 0, CODA9_REG_JPEG_GBU_WD_PTR); coda_write(dev, 0, CODA9_REG_JPEG_GBU_BBSR); - coda_write(dev, 0, CODA9_REG_JPEG_BBC_STRM_CTRL); + coda_write(dev, BIT(31) | ((end_addr - start_addr - header_len) / 256), + CODA9_REG_JPEG_BBC_STRM_CTRL); coda_write(dev, 0, CODA9_REG_JPEG_GBU_CTRL); coda_write(dev, 0, CODA9_REG_JPEG_GBU_FF_RPTR); coda_write(dev, 127, CODA9_REG_JPEG_GBU_BBER); @@ -1257,6 +1258,23 @@ static void coda9_jpeg_finish_encode(struct coda_ctx *ctx) coda_hw_reset(ctx); } +static void coda9_jpeg_encode_timeout(struct coda_ctx *ctx) +{ + struct coda_dev *dev = ctx->dev; + u32 end_addr, wr_ptr; + + /* Handle missing BBC overflow interrupt via timeout */ + end_addr = coda_read(dev, CODA9_REG_JPEG_BBC_END_ADDR); + wr_ptr = coda_read(dev, CODA9_REG_JPEG_BBC_WR_PTR); + if (wr_ptr >= end_addr - 256) { + v4l2_err(&dev->v4l2_dev, "JPEG too large for capture buffer\n"); + coda9_jpeg_finish_encode(ctx); + return; + } + + coda_hw_reset(ctx); +} + static void coda9_jpeg_release(struct coda_ctx *ctx) { int i; @@ -1276,6 +1294,7 @@ const struct coda_context_ops coda9_jpeg_encode_ops = { .start_streaming = coda9_jpeg_start_encoding, .prepare_run = coda9_jpeg_prepare_encode, .finish_run = coda9_jpeg_finish_encode, + .run_timeout = coda9_jpeg_encode_timeout, .release = coda9_jpeg_release, }; diff --git a/drivers/media/platform/coda/imx-vdoa.c b/drivers/media/platform/coda/imx-vdoa.c index 8bc0d83718193f3028693cd566ab69133a3f274d..dd6e2e320264ef3eae9b70ea25691b6fb5c0396f 100644 --- a/drivers/media/platform/coda/imx-vdoa.c +++ b/drivers/media/platform/coda/imx-vdoa.c @@ -287,7 +287,11 @@ static int vdoa_probe(struct platform_device *pdev) struct resource *res; int ret; - dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32)); + ret = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32)); + if (ret) { + dev_err(&pdev->dev, "DMA enable failed\n"); + return ret; + } vdoa = devm_kzalloc(&pdev->dev, sizeof(*vdoa), GFP_KERNEL); if (!vdoa) diff --git a/drivers/media/platform/imx-pxp.c b/drivers/media/platform/imx-pxp.c index 08d76eb05ed1a446d9827c13aa4a20d999fbb353..62356adebc39ed15c6855c92729ca102937ad943 100644 --- a/drivers/media/platform/imx-pxp.c +++ b/drivers/media/platform/imx-pxp.c @@ -1664,6 +1664,8 @@ static int pxp_probe(struct platform_device *pdev) if (irq < 0) return irq; + spin_lock_init(&dev->irqlock); + ret = devm_request_threaded_irq(&pdev->dev, irq, NULL, pxp_irq_handler, IRQF_ONESHOT, dev_name(&pdev->dev), dev); if (ret < 0) { @@ -1681,8 +1683,6 @@ static int pxp_probe(struct platform_device *pdev) goto err_clk; } - spin_lock_init(&dev->irqlock); - ret = v4l2_device_register(&pdev->dev, &dev->v4l2_dev); if (ret) goto err_clk; diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc_drv.c b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc_drv.c index 219c2c5b78efc3c1f7520d1989ffaa3f7d05cbed..5f93bc670edb20e56eef04967e881c07666f7d9d 100644 --- a/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc_drv.c +++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc_drv.c @@ -237,11 +237,11 @@ static int fops_vcodec_release(struct file *file) mtk_v4l2_debug(1, "[%d] encoder", ctx->id); mutex_lock(&dev->dev_mutex); + v4l2_m2m_ctx_release(ctx->m2m_ctx); mtk_vcodec_enc_release(ctx); v4l2_fh_del(&ctx->fh); v4l2_fh_exit(&ctx->fh); v4l2_ctrl_handler_free(&ctx->ctrl_hdl); - v4l2_m2m_ctx_release(ctx->m2m_ctx); list_del_init(&ctx->list); kfree(ctx); diff --git a/drivers/media/platform/mtk-vpu/mtk_vpu.c b/drivers/media/platform/mtk-vpu/mtk_vpu.c index 36cb9b6131f7e2ecc0a79ce731e3fff090684d7d..c62eb212cca92d7fed0ccd30e5b986c891bd9d03 100644 --- a/drivers/media/platform/mtk-vpu/mtk_vpu.c +++ b/drivers/media/platform/mtk-vpu/mtk_vpu.c @@ -820,7 +820,8 @@ static int mtk_vpu_probe(struct platform_device *pdev) vpu->wdt.wq = create_singlethread_workqueue("vpu_wdt"); if (!vpu->wdt.wq) { dev_err(dev, "initialize wdt workqueue failed\n"); - return -ENOMEM; + ret = -ENOMEM; + goto clk_unprepare; } INIT_WORK(&vpu->wdt.ws, vpu_wdt_reset_func); mutex_init(&vpu->vpu_mutex); @@ -914,6 +915,8 @@ disable_vpu_clk: vpu_clock_disable(vpu); workqueue_destroy: destroy_workqueue(vpu->wdt.wq); +clk_unprepare: + clk_unprepare(vpu->clk); return ret; } diff --git a/drivers/media/platform/qcom/venus/core.c b/drivers/media/platform/qcom/venus/core.c index 58ddebbb84468a08363da2f45e72bfa03abe280f..62d11c6e41d60d2b18b068260160d2e4afc51189 100644 --- a/drivers/media/platform/qcom/venus/core.c +++ b/drivers/media/platform/qcom/venus/core.c @@ -222,7 +222,6 @@ static int venus_probe(struct platform_device *pdev) return -ENOMEM; core->dev = dev; - platform_set_drvdata(pdev, core); r = platform_get_resource(pdev, IORESOURCE_MEM, 0); core->base = devm_ioremap_resource(dev, r); @@ -252,7 +251,7 @@ static int venus_probe(struct platform_device *pdev) return -ENODEV; if (core->pm_ops->core_get) { - ret = core->pm_ops->core_get(dev); + ret = core->pm_ops->core_get(core); if (ret) return ret; } @@ -277,6 +276,12 @@ static int venus_probe(struct platform_device *pdev) if (ret) goto err_core_put; + ret = v4l2_device_register(dev, &core->v4l2_dev); + if (ret) + goto err_core_deinit; + + platform_set_drvdata(pdev, core); + pm_runtime_enable(dev); ret = pm_runtime_get_sync(dev); @@ -289,11 +294,11 @@ static int venus_probe(struct platform_device *pdev) ret = venus_firmware_init(core); if (ret) - goto err_runtime_disable; + goto err_of_depopulate; ret = venus_boot(core); if (ret) - goto err_runtime_disable; + goto err_firmware_deinit; ret = hfi_core_resume(core, true); if (ret) @@ -311,10 +316,6 @@ static int venus_probe(struct platform_device *pdev) if (ret) goto err_venus_shutdown; - ret = v4l2_device_register(dev, &core->v4l2_dev); - if (ret) - goto err_core_deinit; - ret = pm_runtime_put_sync(dev); if (ret) { pm_runtime_get_noresume(dev); @@ -327,18 +328,22 @@ static int venus_probe(struct platform_device *pdev) err_dev_unregister: v4l2_device_unregister(&core->v4l2_dev); -err_core_deinit: - hfi_core_deinit(core, false); err_venus_shutdown: venus_shutdown(core); +err_firmware_deinit: + venus_firmware_deinit(core); +err_of_depopulate: + of_platform_depopulate(dev); err_runtime_disable: pm_runtime_put_noidle(dev); pm_runtime_set_suspended(dev); pm_runtime_disable(dev); hfi_destroy(core); +err_core_deinit: + hfi_core_deinit(core, false); err_core_put: if (core->pm_ops->core_put) - core->pm_ops->core_put(dev); + core->pm_ops->core_put(core); return ret; } @@ -364,11 +369,12 @@ static int venus_remove(struct platform_device *pdev) pm_runtime_disable(dev); if (pm_ops->core_put) - pm_ops->core_put(dev); + pm_ops->core_put(core); + + v4l2_device_unregister(&core->v4l2_dev); hfi_destroy(core); - v4l2_device_unregister(&core->v4l2_dev); mutex_destroy(&core->pm_lock); mutex_destroy(&core->lock); venus_dbgfs_deinit(core); @@ -387,7 +393,7 @@ static __maybe_unused int venus_runtime_suspend(struct device *dev) return ret; if (pm_ops->core_power) { - ret = pm_ops->core_power(dev, POWER_OFF); + ret = pm_ops->core_power(core, POWER_OFF); if (ret) return ret; } @@ -405,7 +411,8 @@ static __maybe_unused int venus_runtime_suspend(struct device *dev) err_video_path: icc_set_bw(core->cpucfg_path, kbps_to_icc(1000), 0); err_cpucfg_path: - pm_ops->core_power(dev, POWER_ON); + if (pm_ops->core_power) + pm_ops->core_power(core, POWER_ON); return ret; } @@ -425,7 +432,7 @@ static __maybe_unused int venus_runtime_resume(struct device *dev) return ret; if (pm_ops->core_power) { - ret = pm_ops->core_power(dev, POWER_ON); + ret = pm_ops->core_power(core, POWER_ON); if (ret) return ret; } diff --git a/drivers/media/platform/qcom/venus/core.h b/drivers/media/platform/qcom/venus/core.h index 05c9fbd51f0c057586afbe588541c4a93c9e3487..f2a0ef9ee884e488ea7ac3eb6418e1d73b408a82 100644 --- a/drivers/media/platform/qcom/venus/core.h +++ b/drivers/media/platform/qcom/venus/core.h @@ -123,7 +123,6 @@ struct venus_caps { * @clks: an array of struct clk pointers * @vcodec0_clks: an array of vcodec0 struct clk pointers * @vcodec1_clks: an array of vcodec1 struct clk pointers - * @pd_dl_venus: pmdomain device-link for venus domain * @pmdomains: an array of pmdomains struct device pointers * @vdev_dec: a reference to video device structure for decoder instances * @vdev_enc: a reference to video device structure for encoder instances @@ -161,7 +160,6 @@ struct venus_core { struct icc_path *cpucfg_path; struct opp_table *opp_table; bool has_opp_table; - struct device_link *pd_dl_venus; struct device *pmdomains[VIDC_PMDOMAINS_NUM_MAX]; struct device_link *opp_dl_venus; struct device *opp_pmdomain; diff --git a/drivers/media/platform/qcom/venus/pm_helpers.c b/drivers/media/platform/qcom/venus/pm_helpers.c index 2946547a0df4a445cfd105847849bf076584f8d2..710f9a2b132b07ca89ca33cc983854be37a9f811 100644 --- a/drivers/media/platform/qcom/venus/pm_helpers.c +++ b/drivers/media/platform/qcom/venus/pm_helpers.c @@ -147,14 +147,12 @@ static u32 load_per_type(struct venus_core *core, u32 session_type) struct venus_inst *inst = NULL; u32 mbs_per_sec = 0; - mutex_lock(&core->lock); list_for_each_entry(inst, &core->instances, list) { if (inst->session_type != session_type) continue; mbs_per_sec += load_per_instance(inst); } - mutex_unlock(&core->lock); return mbs_per_sec; } @@ -203,14 +201,12 @@ static int load_scale_bw(struct venus_core *core) struct venus_inst *inst = NULL; u32 mbs_per_sec, avg, peak, total_avg = 0, total_peak = 0; - mutex_lock(&core->lock); list_for_each_entry(inst, &core->instances, list) { mbs_per_sec = load_per_instance(inst); mbs_to_bw(inst, mbs_per_sec, &avg, &peak); total_avg += avg; total_peak += peak; } - mutex_unlock(&core->lock); /* * keep minimum bandwidth vote for "video-mem" path, @@ -237,8 +233,9 @@ static int load_scale_v1(struct venus_inst *inst) struct device *dev = core->dev; u32 mbs_per_sec; unsigned int i; - int ret; + int ret = 0; + mutex_lock(&core->lock); mbs_per_sec = load_per_type(core, VIDC_SESSION_TYPE_ENC) + load_per_type(core, VIDC_SESSION_TYPE_DEC); @@ -263,29 +260,28 @@ set_freq: if (ret) { dev_err(dev, "failed to set clock rate %lu (%d)\n", freq, ret); - return ret; + goto exit; } ret = load_scale_bw(core); if (ret) { dev_err(dev, "failed to set bandwidth (%d)\n", ret); - return ret; + goto exit; } - return 0; +exit: + mutex_unlock(&core->lock); + return ret; } -static int core_get_v1(struct device *dev) +static int core_get_v1(struct venus_core *core) { - struct venus_core *core = dev_get_drvdata(dev); - return core_clks_get(core); } -static int core_power_v1(struct device *dev, int on) +static int core_power_v1(struct venus_core *core, int on) { - struct venus_core *core = dev_get_drvdata(dev); int ret = 0; if (on == POWER_ON) @@ -752,12 +748,12 @@ static int venc_power_v4(struct device *dev, int on) return ret; } -static int vcodec_domains_get(struct device *dev) +static int vcodec_domains_get(struct venus_core *core) { int ret; struct opp_table *opp_table; struct device **opp_virt_dev; - struct venus_core *core = dev_get_drvdata(dev); + struct device *dev = core->dev; const struct venus_resources *res = core->res; struct device *pd; unsigned int i; @@ -773,13 +769,6 @@ static int vcodec_domains_get(struct device *dev) core->pmdomains[i] = pd; } - core->pd_dl_venus = device_link_add(dev, core->pmdomains[0], - DL_FLAG_PM_RUNTIME | - DL_FLAG_STATELESS | - DL_FLAG_RPM_ACTIVE); - if (!core->pd_dl_venus) - return -ENODEV; - skip_pmdomains: if (!core->has_opp_table) return 0; @@ -806,29 +795,23 @@ skip_pmdomains: opp_dl_add_err: dev_pm_opp_detach_genpd(core->opp_table); opp_attach_err: - if (core->pd_dl_venus) { - device_link_del(core->pd_dl_venus); - for (i = 0; i < res->vcodec_pmdomains_num; i++) { - if (IS_ERR_OR_NULL(core->pmdomains[i])) - continue; - dev_pm_domain_detach(core->pmdomains[i], true); - } + for (i = 0; i < res->vcodec_pmdomains_num; i++) { + if (IS_ERR_OR_NULL(core->pmdomains[i])) + continue; + dev_pm_domain_detach(core->pmdomains[i], true); } + return ret; } -static void vcodec_domains_put(struct device *dev) +static void vcodec_domains_put(struct venus_core *core) { - struct venus_core *core = dev_get_drvdata(dev); const struct venus_resources *res = core->res; unsigned int i; if (!res->vcodec_pmdomains_num) goto skip_pmdomains; - if (core->pd_dl_venus) - device_link_del(core->pd_dl_venus); - for (i = 0; i < res->vcodec_pmdomains_num; i++) { if (IS_ERR_OR_NULL(core->pmdomains[i])) continue; @@ -845,9 +828,9 @@ skip_pmdomains: dev_pm_opp_detach_genpd(core->opp_table); } -static int core_get_v4(struct device *dev) +static int core_get_v4(struct venus_core *core) { - struct venus_core *core = dev_get_drvdata(dev); + struct device *dev = core->dev; const struct venus_resources *res = core->res; int ret; @@ -886,7 +869,7 @@ static int core_get_v4(struct device *dev) } } - ret = vcodec_domains_get(dev); + ret = vcodec_domains_get(core); if (ret) { if (core->has_opp_table) dev_pm_opp_of_remove_table(dev); @@ -897,14 +880,14 @@ static int core_get_v4(struct device *dev) return 0; } -static void core_put_v4(struct device *dev) +static void core_put_v4(struct venus_core *core) { - struct venus_core *core = dev_get_drvdata(dev); + struct device *dev = core->dev; if (legacy_binding) return; - vcodec_domains_put(dev); + vcodec_domains_put(core); if (core->has_opp_table) dev_pm_opp_of_remove_table(dev); @@ -913,19 +896,33 @@ static void core_put_v4(struct device *dev) } -static int core_power_v4(struct device *dev, int on) +static int core_power_v4(struct venus_core *core, int on) { - struct venus_core *core = dev_get_drvdata(dev); + struct device *dev = core->dev; + struct device *pmctrl = core->pmdomains[0]; int ret = 0; if (on == POWER_ON) { + if (pmctrl) { + ret = pm_runtime_get_sync(pmctrl); + if (ret < 0) { + pm_runtime_put_noidle(pmctrl); + return ret; + } + } + ret = core_clks_enable(core); + if (ret < 0 && pmctrl) + pm_runtime_put_sync(pmctrl); } else { /* Drop the performance state vote */ if (core->opp_pmdomain) dev_pm_opp_set_rate(dev, 0); core_clks_disable(core); + + if (pmctrl) + pm_runtime_put_sync(pmctrl); } return ret; @@ -962,13 +959,13 @@ static int load_scale_v4(struct venus_inst *inst) struct device *dev = core->dev; unsigned long freq = 0, freq_core1 = 0, freq_core2 = 0; unsigned long filled_len = 0; - int i, ret; + int i, ret = 0; for (i = 0; i < inst->num_input_bufs; i++) filled_len = max(filled_len, inst->payloads[i]); if (inst->session_type == VIDC_SESSION_TYPE_DEC && !filled_len) - return 0; + return ret; freq = calculate_inst_freq(inst, filled_len); inst->clk_data.freq = freq; @@ -984,7 +981,6 @@ static int load_scale_v4(struct venus_inst *inst) freq_core2 += inst->clk_data.freq; } } - mutex_unlock(&core->lock); freq = max(freq_core1, freq_core2); @@ -1008,17 +1004,19 @@ set_freq: if (ret) { dev_err(dev, "failed to set clock rate %lu (%d)\n", freq, ret); - return ret; + goto exit; } ret = load_scale_bw(core); if (ret) { dev_err(dev, "failed to set bandwidth (%d)\n", ret); - return ret; + goto exit; } - return 0; +exit: + mutex_unlock(&core->lock); + return ret; } static const struct venus_pm_ops pm_ops_v4 = { diff --git a/drivers/media/platform/qcom/venus/pm_helpers.h b/drivers/media/platform/qcom/venus/pm_helpers.h index aa2f6afa2354432fc0f58b6735f45356214f3579..a492c50c5543c5036a13cef7a72891b788b7e1c9 100644 --- a/drivers/media/platform/qcom/venus/pm_helpers.h +++ b/drivers/media/platform/qcom/venus/pm_helpers.h @@ -4,14 +4,15 @@ #define __VENUS_PM_HELPERS_H__ struct device; +struct venus_core; #define POWER_ON 1 #define POWER_OFF 0 struct venus_pm_ops { - int (*core_get)(struct device *dev); - void (*core_put)(struct device *dev); - int (*core_power)(struct device *dev, int on); + int (*core_get)(struct venus_core *core); + void (*core_put)(struct venus_core *core); + int (*core_power)(struct venus_core *core, int on); int (*vdec_get)(struct device *dev); void (*vdec_put)(struct device *dev); diff --git a/drivers/media/platform/rcar-vin/rcar-csi2.c b/drivers/media/platform/rcar-vin/rcar-csi2.c index 79f229756805eb873c08042822e1af6d28f1b9b8..5e8e48a721a049c9faabaa8c3e4d6770f99e6eaa 100644 --- a/drivers/media/platform/rcar-vin/rcar-csi2.c +++ b/drivers/media/platform/rcar-vin/rcar-csi2.c @@ -436,16 +436,23 @@ static int rcsi2_wait_phy_start(struct rcar_csi2 *priv, static int rcsi2_set_phypll(struct rcar_csi2 *priv, unsigned int mbps) { const struct rcsi2_mbps_reg *hsfreq; + const struct rcsi2_mbps_reg *hsfreq_prev = NULL; - for (hsfreq = priv->info->hsfreqrange; hsfreq->mbps != 0; hsfreq++) + for (hsfreq = priv->info->hsfreqrange; hsfreq->mbps != 0; hsfreq++) { if (hsfreq->mbps >= mbps) break; + hsfreq_prev = hsfreq; + } if (!hsfreq->mbps) { dev_err(priv->dev, "Unsupported PHY speed (%u Mbps)", mbps); return -ERANGE; } + if (hsfreq_prev && + ((mbps - hsfreq_prev->mbps) <= (hsfreq->mbps - mbps))) + hsfreq = hsfreq_prev; + rcsi2_write(priv, PHYPLL_REG, PHYPLL_HSFREQRANGE(hsfreq->reg)); return 0; @@ -544,6 +551,8 @@ static int rcsi2_start_receiver(struct rcar_csi2 *priv) /* Code is validated in set_fmt. */ format = rcsi2_code_to_fmt(priv->mf.code); + if (!format) + return -EINVAL; /* * Enable all supported CSI-2 channels with virtual channel and @@ -967,10 +976,17 @@ static int rcsi2_phtw_write_mbps(struct rcar_csi2 *priv, unsigned int mbps, const struct rcsi2_mbps_reg *values, u16 code) { const struct rcsi2_mbps_reg *value; + const struct rcsi2_mbps_reg *prev_value = NULL; - for (value = values; value->mbps; value++) + for (value = values; value->mbps; value++) { if (value->mbps >= mbps) break; + prev_value = value; + } + + if (prev_value && + ((mbps - prev_value->mbps) <= (value->mbps - mbps))) + value = prev_value; if (!value->mbps) { dev_err(priv->dev, "Unsupported PHY speed (%u Mbps)", mbps); diff --git a/drivers/media/platform/rcar-vin/rcar-v4l2.c b/drivers/media/platform/rcar-vin/rcar-v4l2.c index 3e7a3ae2a6b97045bdf9d653e5560802e02f8422..0bbe6f9f92062fdbd5bfe3fa43533a03959f1002 100644 --- a/drivers/media/platform/rcar-vin/rcar-v4l2.c +++ b/drivers/media/platform/rcar-vin/rcar-v4l2.c @@ -175,20 +175,27 @@ static void rvin_format_align(struct rvin_dev *vin, struct v4l2_pix_format *pix) break; } - /* HW limit width to a multiple of 32 (2^5) for NV12/16 else 2 (2^1) */ + /* Hardware limits width alignment based on format. */ switch (pix->pixelformat) { + /* Multiple of 32 (2^5) for NV12/16. */ case V4L2_PIX_FMT_NV12: case V4L2_PIX_FMT_NV16: walign = 5; break; - default: + /* Multiple of 2 (2^1) for YUV. */ + case V4L2_PIX_FMT_YUYV: + case V4L2_PIX_FMT_UYVY: walign = 1; break; + /* No multiple for RGB. */ + default: + walign = 0; + break; } /* Limit to VIN capabilities */ - v4l_bound_align_image(&pix->width, 2, vin->info->max_width, walign, - &pix->height, 4, vin->info->max_height, 2, 0); + v4l_bound_align_image(&pix->width, 5, vin->info->max_width, walign, + &pix->height, 2, vin->info->max_height, 0, 0); pix->bytesperline = rvin_format_bytesperline(vin, pix); pix->sizeimage = rvin_format_sizeimage(pix); diff --git a/drivers/media/platform/s5p-mfc/s5p_mfc.c b/drivers/media/platform/s5p-mfc/s5p_mfc.c index eba2b9f040df0b43ff0c4b7d57127c885369a43a..f336a954327320f9b16f52c85335cdc810220f8b 100644 --- a/drivers/media/platform/s5p-mfc/s5p_mfc.c +++ b/drivers/media/platform/s5p-mfc/s5p_mfc.c @@ -1283,11 +1283,15 @@ static int s5p_mfc_probe(struct platform_device *pdev) spin_lock_init(&dev->condlock); dev->plat_dev = pdev; if (!dev->plat_dev) { - dev_err(&pdev->dev, "No platform data specified\n"); + mfc_err("No platform data specified\n"); return -ENODEV; } dev->variant = of_device_get_match_data(&pdev->dev); + if (!dev->variant) { + dev_err(&pdev->dev, "Failed to get device MFC hardware variant information\n"); + return -ENOENT; + } res = platform_get_resource(pdev, IORESOURCE_MEM, 0); dev->regs_base = devm_ioremap_resource(&pdev->dev, res); diff --git a/drivers/media/platform/stm32/stm32-dcmi.c b/drivers/media/platform/stm32/stm32-dcmi.c index fd1c41cba52fc8a5160195004194e26a3bb691dc..233e4d3feacd91fb4e4154da849de6538802f80b 100644 --- a/drivers/media/platform/stm32/stm32-dcmi.c +++ b/drivers/media/platform/stm32/stm32-dcmi.c @@ -135,6 +135,7 @@ struct stm32_dcmi { int sequence; struct list_head buffers; struct dcmi_buf *active; + int irq; struct v4l2_device v4l2_dev; struct video_device *vdev; @@ -1720,6 +1721,14 @@ static int dcmi_graph_notify_complete(struct v4l2_async_notifier *notifier) return ret; } + ret = devm_request_threaded_irq(dcmi->dev, dcmi->irq, dcmi_irq_callback, + dcmi_irq_thread, IRQF_ONESHOT, + dev_name(dcmi->dev), dcmi); + if (ret) { + dev_err(dcmi->dev, "Unable to request irq %d\n", dcmi->irq); + return ret; + } + return 0; } @@ -1881,6 +1890,8 @@ static int dcmi_probe(struct platform_device *pdev) if (irq <= 0) return irq ? irq : -ENXIO; + dcmi->irq = irq; + dcmi->res = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!dcmi->res) { dev_err(&pdev->dev, "Could not get resource\n"); @@ -1893,14 +1904,6 @@ static int dcmi_probe(struct platform_device *pdev) return PTR_ERR(dcmi->regs); } - ret = devm_request_threaded_irq(&pdev->dev, irq, dcmi_irq_callback, - dcmi_irq_thread, IRQF_ONESHOT, - dev_name(&pdev->dev), dcmi); - if (ret) { - dev_err(&pdev->dev, "Unable to request irq %d\n", irq); - return ret; - } - mclk = devm_clk_get(&pdev->dev, "mclk"); if (IS_ERR(mclk)) { if (PTR_ERR(mclk) != -EPROBE_DEFER) diff --git a/drivers/media/radio/radio-wl1273.c b/drivers/media/radio/radio-wl1273.c index 1123768731676a0aaf0c8d2183f3607d5301ef5e..484046471c03f8c1b1defe24006793e538221e4d 100644 --- a/drivers/media/radio/radio-wl1273.c +++ b/drivers/media/radio/radio-wl1273.c @@ -1279,7 +1279,7 @@ static int wl1273_fm_vidioc_querycap(struct file *file, void *priv, strscpy(capability->driver, WL1273_FM_DRIVER_NAME, sizeof(capability->driver)); - strscpy(capability->card, "Texas Instruments Wl1273 FM Radio", + strscpy(capability->card, "TI Wl1273 FM Radio", sizeof(capability->card)); strscpy(capability->bus_info, radio->bus_type, sizeof(capability->bus_info)); diff --git a/drivers/media/radio/si470x/radio-si470x-i2c.c b/drivers/media/radio/si470x/radio-si470x-i2c.c index f491420d7b538a3f67dd9598e950438b6b71cb13..76d39e2e877068e0a8fbe65c42f8de585cc95d9f 100644 --- a/drivers/media/radio/si470x/radio-si470x-i2c.c +++ b/drivers/media/radio/si470x/radio-si470x-i2c.c @@ -11,7 +11,7 @@ /* driver definitions */ #define DRIVER_AUTHOR "Joonyoung Shim "; -#define DRIVER_CARD "Silicon Labs Si470x FM Radio Receiver" +#define DRIVER_CARD "Silicon Labs Si470x FM Radio" #define DRIVER_DESC "I2C radio driver for Si470x FM Radio Receivers" #define DRIVER_VERSION "1.0.2" @@ -368,7 +368,7 @@ static int si470x_i2c_probe(struct i2c_client *client) if (radio->hdl.error) { retval = radio->hdl.error; dev_err(&client->dev, "couldn't register control\n"); - goto err_dev; + goto err_all; } /* video device initialization */ @@ -463,7 +463,6 @@ static int si470x_i2c_probe(struct i2c_client *client) return 0; err_all: v4l2_ctrl_handler_free(&radio->hdl); -err_dev: v4l2_device_unregister(&radio->v4l2_dev); err_initial: return retval; diff --git a/drivers/media/radio/si470x/radio-si470x-usb.c b/drivers/media/radio/si470x/radio-si470x-usb.c index fedff68d8c496876a82e583d22ad2ce37a17885d..3f8634a46573010e428ba0e61ae80004a90d55a4 100644 --- a/drivers/media/radio/si470x/radio-si470x-usb.c +++ b/drivers/media/radio/si470x/radio-si470x-usb.c @@ -16,7 +16,7 @@ /* driver definitions */ #define DRIVER_AUTHOR "Tobias Lorenz " -#define DRIVER_CARD "Silicon Labs Si470x FM Radio Receiver" +#define DRIVER_CARD "Silicon Labs Si470x FM Radio" #define DRIVER_DESC "USB radio driver for Si470x FM Radio Receivers" #define DRIVER_VERSION "1.0.10" diff --git a/drivers/media/rc/igorplugusb.c b/drivers/media/rc/igorplugusb.c index effaa5751d6c98099cd6cb745ddbcb40411eb889..3e9988ee785f0cf4f6a3804c253a8defb53f3586 100644 --- a/drivers/media/rc/igorplugusb.c +++ b/drivers/media/rc/igorplugusb.c @@ -64,9 +64,11 @@ static void igorplugusb_irdata(struct igorplugusb *ir, unsigned len) if (start >= len) { dev_err(ir->dev, "receive overflow invalid: %u", overflow); } else { - if (overflow > 0) + if (overflow > 0) { dev_warn(ir->dev, "receive overflow, at least %u lost", overflow); + ir_raw_event_reset(ir->rc); + } do { rawir.duration = ir->buf_in[i] * 85; diff --git a/drivers/media/rc/ir_toy.c b/drivers/media/rc/ir_toy.c index 3e729a17b35ff9dab6651c150d2230d35f4c62df..1aa7989e756ccd967ca376ce0201911f3f5c4708 100644 --- a/drivers/media/rc/ir_toy.c +++ b/drivers/media/rc/ir_toy.c @@ -24,6 +24,7 @@ static const u8 COMMAND_VERSION[] = { 'v' }; // End transmit and repeat reset command so we exit sump mode static const u8 COMMAND_RESET[] = { 0xff, 0xff, 0, 0, 0, 0, 0 }; static const u8 COMMAND_SMODE_ENTER[] = { 's' }; +static const u8 COMMAND_SMODE_EXIT[] = { 0 }; static const u8 COMMAND_TXSTART[] = { 0x26, 0x24, 0x25, 0x03 }; #define REPLY_XMITCOUNT 't' @@ -315,6 +316,24 @@ static int irtoy_tx(struct rc_dev *rc, uint *txbuf, uint count) irtoy->tx_len = size; irtoy->emitted = 0; + // There is an issue where if the unit is receiving IR while the + // first TXSTART command is sent, the device might end up hanging + // with its led on. It does not respond to any command when this + // happens. To work around this, re-enter sample mode. + err = irtoy_command(irtoy, COMMAND_SMODE_EXIT, + sizeof(COMMAND_SMODE_EXIT), STATE_RESET); + if (err) { + dev_err(irtoy->dev, "exit sample mode: %d\n", err); + return err; + } + + err = irtoy_command(irtoy, COMMAND_SMODE_ENTER, + sizeof(COMMAND_SMODE_ENTER), STATE_COMMAND); + if (err) { + dev_err(irtoy->dev, "enter sample mode: %d\n", err); + return err; + } + err = irtoy_command(irtoy, COMMAND_TXSTART, sizeof(COMMAND_TXSTART), STATE_TX); kfree(buf); diff --git a/drivers/media/rc/ite-cir.c b/drivers/media/rc/ite-cir.c index e5c4a6941d26b8bee703c13de783cd8e7f41a923..fbe794ca1eef16468693c71f2a25ab7e7a42a3a1 100644 --- a/drivers/media/rc/ite-cir.c +++ b/drivers/media/rc/ite-cir.c @@ -283,7 +283,7 @@ static irqreturn_t ite_cir_isr(int irq, void *data) } /* check for the receive interrupt */ - if (iflags & ITE_IRQ_RX_FIFO) { + if (iflags & (ITE_IRQ_RX_FIFO | ITE_IRQ_RX_FIFO_OVERRUN)) { /* read the FIFO bytes */ rx_bytes = dev->params.get_rx_bytes(dev, rx_buf, diff --git a/drivers/media/rc/mceusb.c b/drivers/media/rc/mceusb.c index 5642595a057ec422c1bca563674a2a94ec982049..dbb5a4f44bda50631da1a72b95d2e3ca79e402ba 100644 --- a/drivers/media/rc/mceusb.c +++ b/drivers/media/rc/mceusb.c @@ -1386,6 +1386,7 @@ static void mceusb_dev_recv(struct urb *urb) case -ECONNRESET: case -ENOENT: case -EILSEQ: + case -EPROTO: case -ESHUTDOWN: usb_unlink_urb(urb); return; @@ -1429,7 +1430,7 @@ static void mceusb_gen1_init(struct mceusb_dev *ir) */ ret = usb_control_msg(ir->usbdev, usb_rcvctrlpipe(ir->usbdev, 0), USB_REQ_SET_ADDRESS, USB_TYPE_VENDOR, 0, 0, - data, USB_CTRL_MSG_SZ, HZ * 3); + data, USB_CTRL_MSG_SZ, 3000); dev_dbg(dev, "set address - ret = %d", ret); dev_dbg(dev, "set address - data[0] = %d, data[1] = %d", data[0], data[1]); @@ -1437,20 +1438,20 @@ static void mceusb_gen1_init(struct mceusb_dev *ir) /* set feature: bit rate 38400 bps */ ret = usb_control_msg(ir->usbdev, usb_sndctrlpipe(ir->usbdev, 0), USB_REQ_SET_FEATURE, USB_TYPE_VENDOR, - 0xc04e, 0x0000, NULL, 0, HZ * 3); + 0xc04e, 0x0000, NULL, 0, 3000); dev_dbg(dev, "set feature - ret = %d", ret); /* bRequest 4: set char length to 8 bits */ ret = usb_control_msg(ir->usbdev, usb_sndctrlpipe(ir->usbdev, 0), 4, USB_TYPE_VENDOR, - 0x0808, 0x0000, NULL, 0, HZ * 3); + 0x0808, 0x0000, NULL, 0, 3000); dev_dbg(dev, "set char length - retB = %d", ret); /* bRequest 2: set handshaking to use DTR/DSR */ ret = usb_control_msg(ir->usbdev, usb_sndctrlpipe(ir->usbdev, 0), 2, USB_TYPE_VENDOR, - 0x0000, 0x0100, NULL, 0, HZ * 3); + 0x0000, 0x0100, NULL, 0, 3000); dev_dbg(dev, "set handshake - retC = %d", ret); /* device resume */ diff --git a/drivers/media/rc/redrat3.c b/drivers/media/rc/redrat3.c index 2cf3377ec63a7c0b8b61c17e0ce68d61ffecdae8..a61f9820ade951c943580c909d64380b32ec9779 100644 --- a/drivers/media/rc/redrat3.c +++ b/drivers/media/rc/redrat3.c @@ -404,7 +404,7 @@ static int redrat3_send_cmd(int cmd, struct redrat3_dev *rr3) udev = rr3->udev; res = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0), cmd, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_IN, - 0x0000, 0x0000, data, sizeof(u8), HZ * 10); + 0x0000, 0x0000, data, sizeof(u8), 10000); if (res < 0) { dev_err(rr3->dev, "%s: Error sending rr3 cmd res %d, data %d", @@ -480,7 +480,7 @@ static u32 redrat3_get_timeout(struct redrat3_dev *rr3) pipe = usb_rcvctrlpipe(rr3->udev, 0); ret = usb_control_msg(rr3->udev, pipe, RR3_GET_IR_PARAM, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_IN, - RR3_IR_IO_SIG_TIMEOUT, 0, tmp, len, HZ * 5); + RR3_IR_IO_SIG_TIMEOUT, 0, tmp, len, 5000); if (ret != len) dev_warn(rr3->dev, "Failed to read timeout from hardware\n"); else { @@ -510,7 +510,7 @@ static int redrat3_set_timeout(struct rc_dev *rc_dev, unsigned int timeoutus) ret = usb_control_msg(udev, usb_sndctrlpipe(udev, 0), RR3_SET_IR_PARAM, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_OUT, RR3_IR_IO_SIG_TIMEOUT, 0, timeout, sizeof(*timeout), - HZ * 25); + 25000); dev_dbg(dev, "set ir parm timeout %d ret 0x%02x\n", be32_to_cpu(*timeout), ret); @@ -542,32 +542,32 @@ static void redrat3_reset(struct redrat3_dev *rr3) *val = 0x01; rc = usb_control_msg(udev, rxpipe, RR3_RESET, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_IN, - RR3_CPUCS_REG_ADDR, 0, val, len, HZ * 25); + RR3_CPUCS_REG_ADDR, 0, val, len, 25000); dev_dbg(dev, "reset returned 0x%02x\n", rc); *val = length_fuzz; rc = usb_control_msg(udev, txpipe, RR3_SET_IR_PARAM, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_OUT, - RR3_IR_IO_LENGTH_FUZZ, 0, val, len, HZ * 25); + RR3_IR_IO_LENGTH_FUZZ, 0, val, len, 25000); dev_dbg(dev, "set ir parm len fuzz %d rc 0x%02x\n", *val, rc); *val = (65536 - (minimum_pause * 2000)) / 256; rc = usb_control_msg(udev, txpipe, RR3_SET_IR_PARAM, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_OUT, - RR3_IR_IO_MIN_PAUSE, 0, val, len, HZ * 25); + RR3_IR_IO_MIN_PAUSE, 0, val, len, 25000); dev_dbg(dev, "set ir parm min pause %d rc 0x%02x\n", *val, rc); *val = periods_measure_carrier; rc = usb_control_msg(udev, txpipe, RR3_SET_IR_PARAM, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_OUT, - RR3_IR_IO_PERIODS_MF, 0, val, len, HZ * 25); + RR3_IR_IO_PERIODS_MF, 0, val, len, 25000); dev_dbg(dev, "set ir parm periods measure carrier %d rc 0x%02x", *val, rc); *val = RR3_DRIVER_MAXLENS; rc = usb_control_msg(udev, txpipe, RR3_SET_IR_PARAM, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_OUT, - RR3_IR_IO_MAX_LENGTHS, 0, val, len, HZ * 25); + RR3_IR_IO_MAX_LENGTHS, 0, val, len, 25000); dev_dbg(dev, "set ir parm max lens %d rc 0x%02x\n", *val, rc); kfree(val); @@ -585,7 +585,7 @@ static void redrat3_get_firmware_rev(struct redrat3_dev *rr3) rc = usb_control_msg(rr3->udev, usb_rcvctrlpipe(rr3->udev, 0), RR3_FW_VERSION, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_IN, - 0, 0, buffer, RR3_FW_VERSION_LEN, HZ * 5); + 0, 0, buffer, RR3_FW_VERSION_LEN, 5000); if (rc >= 0) dev_info(rr3->dev, "Firmware rev: %s", buffer); @@ -825,14 +825,14 @@ static int redrat3_transmit_ir(struct rc_dev *rcdev, unsigned *txbuf, pipe = usb_sndbulkpipe(rr3->udev, rr3->ep_out->bEndpointAddress); ret = usb_bulk_msg(rr3->udev, pipe, irdata, - sendbuf_len, &ret_len, 10 * HZ); + sendbuf_len, &ret_len, 10000); dev_dbg(dev, "sent %d bytes, (ret %d)\n", ret_len, ret); /* now tell the hardware to transmit what we sent it */ pipe = usb_rcvctrlpipe(rr3->udev, 0); ret = usb_control_msg(rr3->udev, pipe, RR3_TX_SEND_SIGNAL, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_IN, - 0, 0, irdata, 2, HZ * 10); + 0, 0, irdata, 2, 10000); if (ret < 0) dev_err(dev, "Error: control msg send failed, rc %d\n", ret); diff --git a/drivers/media/spi/cxd2880-spi.c b/drivers/media/spi/cxd2880-spi.c index 93194f03764d220670096255230f2906b5112ab0..11273be702b6ef3d43eb92df26f0d7b4fef201a7 100644 --- a/drivers/media/spi/cxd2880-spi.c +++ b/drivers/media/spi/cxd2880-spi.c @@ -618,7 +618,7 @@ fail_frontend: fail_attach: dvb_unregister_adapter(&dvb_spi->adapter); fail_adapter: - if (!dvb_spi->vcc_supply) + if (dvb_spi->vcc_supply) regulator_disable(dvb_spi->vcc_supply); fail_regulator: kfree(dvb_spi); diff --git a/drivers/media/tuners/msi001.c b/drivers/media/tuners/msi001.c index 78e6fd600d8ef77ec60fd1e55aa96feaaf90827f..44247049a31903f5cbb9159d9be8ae934d5fb9ae 100644 --- a/drivers/media/tuners/msi001.c +++ b/drivers/media/tuners/msi001.c @@ -442,6 +442,13 @@ static int msi001_probe(struct spi_device *spi) V4L2_CID_RF_TUNER_BANDWIDTH_AUTO, 0, 1, 1, 1); dev->bandwidth = v4l2_ctrl_new_std(&dev->hdl, &msi001_ctrl_ops, V4L2_CID_RF_TUNER_BANDWIDTH, 200000, 8000000, 1, 200000); + if (dev->hdl.error) { + ret = dev->hdl.error; + dev_err(&spi->dev, "Could not initialize controls\n"); + /* control init failed, free handler */ + goto err_ctrl_handler_free; + } + v4l2_ctrl_auto_cluster(2, &dev->bandwidth_auto, 0, false); dev->lna_gain = v4l2_ctrl_new_std(&dev->hdl, &msi001_ctrl_ops, V4L2_CID_RF_TUNER_LNA_GAIN, 0, 1, 1, 1); diff --git a/drivers/media/tuners/si2157.c b/drivers/media/tuners/si2157.c index fefb2625f6558e340488586395607781f2becf2c..75ddf7ed1faff25f255d45b58ef5ee1e68f59109 100644 --- a/drivers/media/tuners/si2157.c +++ b/drivers/media/tuners/si2157.c @@ -90,7 +90,7 @@ static int si2157_init(struct dvb_frontend *fe) dev_dbg(&client->dev, "\n"); /* Try to get Xtal trim property, to verify tuner still running */ - memcpy(cmd.args, "\x15\x00\x04\x02", 4); + memcpy(cmd.args, "\x15\x00\x02\x04", 4); cmd.wlen = 4; cmd.rlen = 4; ret = si2157_cmd_execute(client, &cmd); diff --git a/drivers/media/usb/b2c2/flexcop-usb.c b/drivers/media/usb/b2c2/flexcop-usb.c index e731243267e4904b8ceed06f004a0e62352e44cf..a2563c25408086d013c4a7a9306d913343aa1eda 100644 --- a/drivers/media/usb/b2c2/flexcop-usb.c +++ b/drivers/media/usb/b2c2/flexcop-usb.c @@ -87,7 +87,7 @@ static int flexcop_usb_readwrite_dw(struct flexcop_device *fc, u16 wRegOffsPCI, 0, fc_usb->data, sizeof(u32), - B2C2_WAIT_FOR_OPERATION_RDW * HZ); + B2C2_WAIT_FOR_OPERATION_RDW); if (ret != sizeof(u32)) { err("error while %s dword from %d (%d).", read ? "reading" : @@ -155,7 +155,7 @@ static int flexcop_usb_v8_memory_req(struct flexcop_usb *fc_usb, wIndex, fc_usb->data, buflen, - nWaitTime * HZ); + nWaitTime); if (ret != buflen) ret = -EIO; @@ -249,13 +249,13 @@ static int flexcop_usb_i2c_req(struct flexcop_i2c_adapter *i2c, /* DKT 020208 - add this to support special case of DiSEqC */ case USB_FUNC_I2C_CHECKWRITE: pipe = B2C2_USB_CTRL_PIPE_OUT; - nWaitTime = 2; + nWaitTime = 2000; request_type |= USB_DIR_OUT; break; case USB_FUNC_I2C_READ: case USB_FUNC_I2C_REPEATREAD: pipe = B2C2_USB_CTRL_PIPE_IN; - nWaitTime = 2; + nWaitTime = 2000; request_type |= USB_DIR_IN; break; default: @@ -282,7 +282,7 @@ static int flexcop_usb_i2c_req(struct flexcop_i2c_adapter *i2c, wIndex, fc_usb->data, buflen, - nWaitTime * HZ); + nWaitTime); if (ret != buflen) ret = -EIO; diff --git a/drivers/media/usb/b2c2/flexcop-usb.h b/drivers/media/usb/b2c2/flexcop-usb.h index 2f230bf72252be0d022e003e032784b0b5a9dce1..c7cca1a5ee59d0e420180a5d1c8325cf5351b21b 100644 --- a/drivers/media/usb/b2c2/flexcop-usb.h +++ b/drivers/media/usb/b2c2/flexcop-usb.h @@ -91,13 +91,13 @@ typedef enum { UTILITY_SRAM_TESTVERIFY = 0x16, } flexcop_usb_utility_function_t; -#define B2C2_WAIT_FOR_OPERATION_RW (1*HZ) -#define B2C2_WAIT_FOR_OPERATION_RDW (3*HZ) -#define B2C2_WAIT_FOR_OPERATION_WDW (1*HZ) +#define B2C2_WAIT_FOR_OPERATION_RW 1000 +#define B2C2_WAIT_FOR_OPERATION_RDW 3000 +#define B2C2_WAIT_FOR_OPERATION_WDW 1000 -#define B2C2_WAIT_FOR_OPERATION_V8READ (3*HZ) -#define B2C2_WAIT_FOR_OPERATION_V8WRITE (3*HZ) -#define B2C2_WAIT_FOR_OPERATION_V8FLASH (3*HZ) +#define B2C2_WAIT_FOR_OPERATION_V8READ 3000 +#define B2C2_WAIT_FOR_OPERATION_V8WRITE 3000 +#define B2C2_WAIT_FOR_OPERATION_V8FLASH 3000 typedef enum { V8_MEMORY_PAGE_DVB_CI = 0x20, diff --git a/drivers/media/usb/cpia2/cpia2_usb.c b/drivers/media/usb/cpia2/cpia2_usb.c index 76aac06f9fb8e2cb5b85ada5a77be10900e37dfc..cba03b286473858851310c678f047ffc772f6e8f 100644 --- a/drivers/media/usb/cpia2/cpia2_usb.c +++ b/drivers/media/usb/cpia2/cpia2_usb.c @@ -550,7 +550,7 @@ static int write_packet(struct usb_device *udev, 0, /* index */ buf, /* buffer */ size, - HZ); + 1000); kfree(buf); return ret; @@ -582,7 +582,7 @@ static int read_packet(struct usb_device *udev, 0, /* index */ buf, /* buffer */ size, - HZ); + 1000); if (ret >= 0) memcpy(registers, buf, size); diff --git a/drivers/media/usb/dvb-usb-v2/mxl111sf.c b/drivers/media/usb/dvb-usb-v2/mxl111sf.c index 7865fa0a8295716502317f2e0a8deada3552ba0d..cd5861a30b6f853a4eca87234624425087a3d236 100644 --- a/drivers/media/usb/dvb-usb-v2/mxl111sf.c +++ b/drivers/media/usb/dvb-usb-v2/mxl111sf.c @@ -931,8 +931,6 @@ static int mxl111sf_init(struct dvb_usb_device *d) .len = sizeof(eeprom), .buf = eeprom }, }; - mutex_init(&state->msg_lock); - ret = get_chip_info(state); if (mxl_fail(ret)) pr_err("failed to get chip info during probe"); @@ -1074,6 +1072,14 @@ static int mxl111sf_get_stream_config_dvbt(struct dvb_frontend *fe, return 0; } +static int mxl111sf_probe(struct dvb_usb_device *dev) +{ + struct mxl111sf_state *state = d_to_priv(dev); + + mutex_init(&state->msg_lock); + return 0; +} + static struct dvb_usb_device_properties mxl111sf_props_dvbt = { .driver_name = KBUILD_MODNAME, .owner = THIS_MODULE, @@ -1083,6 +1089,7 @@ static struct dvb_usb_device_properties mxl111sf_props_dvbt = { .generic_bulk_ctrl_endpoint = 0x02, .generic_bulk_ctrl_endpoint_response = 0x81, + .probe = mxl111sf_probe, .i2c_algo = &mxl111sf_i2c_algo, .frontend_attach = mxl111sf_frontend_attach_dvbt, .tuner_attach = mxl111sf_attach_tuner, @@ -1124,6 +1131,7 @@ static struct dvb_usb_device_properties mxl111sf_props_atsc = { .generic_bulk_ctrl_endpoint = 0x02, .generic_bulk_ctrl_endpoint_response = 0x81, + .probe = mxl111sf_probe, .i2c_algo = &mxl111sf_i2c_algo, .frontend_attach = mxl111sf_frontend_attach_atsc, .tuner_attach = mxl111sf_attach_tuner, @@ -1165,6 +1173,7 @@ static struct dvb_usb_device_properties mxl111sf_props_mh = { .generic_bulk_ctrl_endpoint = 0x02, .generic_bulk_ctrl_endpoint_response = 0x81, + .probe = mxl111sf_probe, .i2c_algo = &mxl111sf_i2c_algo, .frontend_attach = mxl111sf_frontend_attach_mh, .tuner_attach = mxl111sf_attach_tuner, @@ -1233,6 +1242,7 @@ static struct dvb_usb_device_properties mxl111sf_props_atsc_mh = { .generic_bulk_ctrl_endpoint = 0x02, .generic_bulk_ctrl_endpoint_response = 0x81, + .probe = mxl111sf_probe, .i2c_algo = &mxl111sf_i2c_algo, .frontend_attach = mxl111sf_frontend_attach_atsc_mh, .tuner_attach = mxl111sf_attach_tuner, @@ -1311,6 +1321,7 @@ static struct dvb_usb_device_properties mxl111sf_props_mercury = { .generic_bulk_ctrl_endpoint = 0x02, .generic_bulk_ctrl_endpoint_response = 0x81, + .probe = mxl111sf_probe, .i2c_algo = &mxl111sf_i2c_algo, .frontend_attach = mxl111sf_frontend_attach_mercury, .tuner_attach = mxl111sf_attach_tuner, @@ -1381,6 +1392,7 @@ static struct dvb_usb_device_properties mxl111sf_props_mercury_mh = { .generic_bulk_ctrl_endpoint = 0x02, .generic_bulk_ctrl_endpoint_response = 0x81, + .probe = mxl111sf_probe, .i2c_algo = &mxl111sf_i2c_algo, .frontend_attach = mxl111sf_frontend_attach_mercury_mh, .tuner_attach = mxl111sf_attach_tuner, diff --git a/drivers/media/usb/dvb-usb/az6027.c b/drivers/media/usb/dvb-usb/az6027.c index 1c39b61cde29b18d2d298154ba8ac78bc2e6668c..86788771175b7994cb322129b683dac5feb63055 100644 --- a/drivers/media/usb/dvb-usb/az6027.c +++ b/drivers/media/usb/dvb-usb/az6027.c @@ -391,6 +391,7 @@ static struct rc_map_table rc_map_az6027_table[] = { /* remote control stuff (does not work with my box) */ static int az6027_rc_query(struct dvb_usb_device *d, u32 *event, int *state) { + *state = REMOTE_NO_KEY_PRESSED; return 0; } diff --git a/drivers/media/usb/dvb-usb/dib0700_core.c b/drivers/media/usb/dvb-usb/dib0700_core.c index 70219b3e85666de2772509149396b85b4fa95a5c..7ea8f68b0f458418276bffd1c595cf8c2e094dc3 100644 --- a/drivers/media/usb/dvb-usb/dib0700_core.c +++ b/drivers/media/usb/dvb-usb/dib0700_core.c @@ -618,8 +618,6 @@ int dib0700_streaming_ctrl(struct dvb_usb_adapter *adap, int onoff) deb_info("the endpoint number (%i) is not correct, use the adapter id instead", adap->fe_adap[0].stream.props.endpoint); if (onoff) st->channel_state |= 1 << (adap->id); - else - st->channel_state |= 1 << ~(adap->id); } else { if (onoff) st->channel_state |= 1 << (adap->fe_adap[0].stream.props.endpoint-2); diff --git a/drivers/media/usb/dvb-usb/dibusb-common.c b/drivers/media/usb/dvb-usb/dibusb-common.c index 02b51d1a1b67c9fb3eda5a9dd84faf5ecd6f08f0..aff60c10cb0b2c9e92c899c8504538a97012d615 100644 --- a/drivers/media/usb/dvb-usb/dibusb-common.c +++ b/drivers/media/usb/dvb-usb/dibusb-common.c @@ -223,7 +223,7 @@ int dibusb_read_eeprom_byte(struct dvb_usb_device *d, u8 offs, u8 *val) u8 *buf; int rc; - buf = kmalloc(2, GFP_KERNEL); + buf = kzalloc(2, GFP_KERNEL); if (!buf) return -ENOMEM; diff --git a/drivers/media/usb/dvb-usb/dw2102.c b/drivers/media/usb/dvb-usb/dw2102.c index a27a6844032528437561278a5816d0fe6513a4e7..aa929db56db1f4721799aae7b3da1afc9e95215a 100644 --- a/drivers/media/usb/dvb-usb/dw2102.c +++ b/drivers/media/usb/dvb-usb/dw2102.c @@ -2148,46 +2148,153 @@ static struct dvb_usb_device_properties s6x0_properties = { } }; -static const struct dvb_usb_device_description d1100 = { - "Prof 1100 USB ", - {&dw2102_table[PROF_1100], NULL}, - {NULL}, -}; +static struct dvb_usb_device_properties p1100_properties = { + .caps = DVB_USB_IS_AN_I2C_ADAPTER, + .usb_ctrl = DEVICE_SPECIFIC, + .size_of_priv = sizeof(struct dw2102_state), + .firmware = P1100_FIRMWARE, + .no_reconnect = 1, -static const struct dvb_usb_device_description d660 = { - "TeVii S660 USB", - {&dw2102_table[TEVII_S660], NULL}, - {NULL}, -}; + .i2c_algo = &s6x0_i2c_algo, + .rc.core = { + .rc_interval = 150, + .rc_codes = RC_MAP_TBS_NEC, + .module_name = "dw2102", + .allowed_protos = RC_PROTO_BIT_NEC, + .rc_query = prof_rc_query, + }, -static const struct dvb_usb_device_description d480_1 = { - "TeVii S480.1 USB", - {&dw2102_table[TEVII_S480_1], NULL}, - {NULL}, + .generic_bulk_ctrl_endpoint = 0x81, + .num_adapters = 1, + .download_firmware = dw2102_load_firmware, + .read_mac_address = s6x0_read_mac_address, + .adapter = { + { + .num_frontends = 1, + .fe = {{ + .frontend_attach = stv0288_frontend_attach, + .stream = { + .type = USB_BULK, + .count = 8, + .endpoint = 0x82, + .u = { + .bulk = { + .buffersize = 4096, + } + } + }, + } }, + } + }, + .num_device_descs = 1, + .devices = { + {"Prof 1100 USB ", + {&dw2102_table[PROF_1100], NULL}, + {NULL}, + }, + } }; -static const struct dvb_usb_device_description d480_2 = { - "TeVii S480.2 USB", - {&dw2102_table[TEVII_S480_2], NULL}, - {NULL}, -}; +static struct dvb_usb_device_properties s660_properties = { + .caps = DVB_USB_IS_AN_I2C_ADAPTER, + .usb_ctrl = DEVICE_SPECIFIC, + .size_of_priv = sizeof(struct dw2102_state), + .firmware = S660_FIRMWARE, + .no_reconnect = 1, -static const struct dvb_usb_device_description d7500 = { - "Prof 7500 USB DVB-S2", - {&dw2102_table[PROF_7500], NULL}, - {NULL}, -}; + .i2c_algo = &s6x0_i2c_algo, + .rc.core = { + .rc_interval = 150, + .rc_codes = RC_MAP_TEVII_NEC, + .module_name = "dw2102", + .allowed_protos = RC_PROTO_BIT_NEC, + .rc_query = dw2102_rc_query, + }, -static const struct dvb_usb_device_description d421 = { - "TeVii S421 PCI", - {&dw2102_table[TEVII_S421], NULL}, - {NULL}, + .generic_bulk_ctrl_endpoint = 0x81, + .num_adapters = 1, + .download_firmware = dw2102_load_firmware, + .read_mac_address = s6x0_read_mac_address, + .adapter = { + { + .num_frontends = 1, + .fe = {{ + .frontend_attach = ds3000_frontend_attach, + .stream = { + .type = USB_BULK, + .count = 8, + .endpoint = 0x82, + .u = { + .bulk = { + .buffersize = 4096, + } + } + }, + } }, + } + }, + .num_device_descs = 3, + .devices = { + {"TeVii S660 USB", + {&dw2102_table[TEVII_S660], NULL}, + {NULL}, + }, + {"TeVii S480.1 USB", + {&dw2102_table[TEVII_S480_1], NULL}, + {NULL}, + }, + {"TeVii S480.2 USB", + {&dw2102_table[TEVII_S480_2], NULL}, + {NULL}, + }, + } }; -static const struct dvb_usb_device_description d632 = { - "TeVii S632 USB", - {&dw2102_table[TEVII_S632], NULL}, - {NULL}, +static struct dvb_usb_device_properties p7500_properties = { + .caps = DVB_USB_IS_AN_I2C_ADAPTER, + .usb_ctrl = DEVICE_SPECIFIC, + .size_of_priv = sizeof(struct dw2102_state), + .firmware = P7500_FIRMWARE, + .no_reconnect = 1, + + .i2c_algo = &s6x0_i2c_algo, + .rc.core = { + .rc_interval = 150, + .rc_codes = RC_MAP_TBS_NEC, + .module_name = "dw2102", + .allowed_protos = RC_PROTO_BIT_NEC, + .rc_query = prof_rc_query, + }, + + .generic_bulk_ctrl_endpoint = 0x81, + .num_adapters = 1, + .download_firmware = dw2102_load_firmware, + .read_mac_address = s6x0_read_mac_address, + .adapter = { + { + .num_frontends = 1, + .fe = {{ + .frontend_attach = prof_7500_frontend_attach, + .stream = { + .type = USB_BULK, + .count = 8, + .endpoint = 0x82, + .u = { + .bulk = { + .buffersize = 4096, + } + } + }, + } }, + } + }, + .num_device_descs = 1, + .devices = { + {"Prof 7500 USB DVB-S2", + {&dw2102_table[PROF_7500], NULL}, + {NULL}, + }, + } }; static struct dvb_usb_device_properties su3000_properties = { @@ -2267,6 +2374,59 @@ static struct dvb_usb_device_properties su3000_properties = { } }; +static struct dvb_usb_device_properties s421_properties = { + .caps = DVB_USB_IS_AN_I2C_ADAPTER, + .usb_ctrl = DEVICE_SPECIFIC, + .size_of_priv = sizeof(struct dw2102_state), + .power_ctrl = su3000_power_ctrl, + .num_adapters = 1, + .identify_state = su3000_identify_state, + .i2c_algo = &su3000_i2c_algo, + + .rc.core = { + .rc_interval = 150, + .rc_codes = RC_MAP_SU3000, + .module_name = "dw2102", + .allowed_protos = RC_PROTO_BIT_RC5, + .rc_query = su3000_rc_query, + }, + + .read_mac_address = su3000_read_mac_address, + + .generic_bulk_ctrl_endpoint = 0x01, + + .adapter = { + { + .num_frontends = 1, + .fe = {{ + .streaming_ctrl = su3000_streaming_ctrl, + .frontend_attach = m88rs2000_frontend_attach, + .stream = { + .type = USB_BULK, + .count = 8, + .endpoint = 0x82, + .u = { + .bulk = { + .buffersize = 4096, + } + } + } + } }, + } + }, + .num_device_descs = 2, + .devices = { + { "TeVii S421 PCI", + { &dw2102_table[TEVII_S421], NULL }, + { NULL }, + }, + { "TeVii S632 USB", + { &dw2102_table[TEVII_S632], NULL }, + { NULL }, + }, + } +}; + static struct dvb_usb_device_properties t220_properties = { .caps = DVB_USB_IS_AN_I2C_ADAPTER, .usb_ctrl = DEVICE_SPECIFIC, @@ -2384,101 +2544,33 @@ static struct dvb_usb_device_properties tt_s2_4600_properties = { static int dw2102_probe(struct usb_interface *intf, const struct usb_device_id *id) { - int retval = -ENOMEM; - struct dvb_usb_device_properties *p1100; - struct dvb_usb_device_properties *s660; - struct dvb_usb_device_properties *p7500; - struct dvb_usb_device_properties *s421; - - p1100 = kmemdup(&s6x0_properties, - sizeof(struct dvb_usb_device_properties), GFP_KERNEL); - if (!p1100) - goto err0; - - /* copy default structure */ - /* fill only different fields */ - p1100->firmware = P1100_FIRMWARE; - p1100->devices[0] = d1100; - p1100->rc.core.rc_query = prof_rc_query; - p1100->rc.core.rc_codes = RC_MAP_TBS_NEC; - p1100->adapter->fe[0].frontend_attach = stv0288_frontend_attach; - - s660 = kmemdup(&s6x0_properties, - sizeof(struct dvb_usb_device_properties), GFP_KERNEL); - if (!s660) - goto err1; - - s660->firmware = S660_FIRMWARE; - s660->num_device_descs = 3; - s660->devices[0] = d660; - s660->devices[1] = d480_1; - s660->devices[2] = d480_2; - s660->adapter->fe[0].frontend_attach = ds3000_frontend_attach; - - p7500 = kmemdup(&s6x0_properties, - sizeof(struct dvb_usb_device_properties), GFP_KERNEL); - if (!p7500) - goto err2; - - p7500->firmware = P7500_FIRMWARE; - p7500->devices[0] = d7500; - p7500->rc.core.rc_query = prof_rc_query; - p7500->rc.core.rc_codes = RC_MAP_TBS_NEC; - p7500->adapter->fe[0].frontend_attach = prof_7500_frontend_attach; - - - s421 = kmemdup(&su3000_properties, - sizeof(struct dvb_usb_device_properties), GFP_KERNEL); - if (!s421) - goto err3; - - s421->num_device_descs = 2; - s421->devices[0] = d421; - s421->devices[1] = d632; - s421->adapter->fe[0].frontend_attach = m88rs2000_frontend_attach; - - if (0 == dvb_usb_device_init(intf, &dw2102_properties, - THIS_MODULE, NULL, adapter_nr) || - 0 == dvb_usb_device_init(intf, &dw2104_properties, - THIS_MODULE, NULL, adapter_nr) || - 0 == dvb_usb_device_init(intf, &dw3101_properties, - THIS_MODULE, NULL, adapter_nr) || - 0 == dvb_usb_device_init(intf, &s6x0_properties, - THIS_MODULE, NULL, adapter_nr) || - 0 == dvb_usb_device_init(intf, p1100, - THIS_MODULE, NULL, adapter_nr) || - 0 == dvb_usb_device_init(intf, s660, - THIS_MODULE, NULL, adapter_nr) || - 0 == dvb_usb_device_init(intf, p7500, - THIS_MODULE, NULL, adapter_nr) || - 0 == dvb_usb_device_init(intf, s421, - THIS_MODULE, NULL, adapter_nr) || - 0 == dvb_usb_device_init(intf, &su3000_properties, - THIS_MODULE, NULL, adapter_nr) || - 0 == dvb_usb_device_init(intf, &t220_properties, - THIS_MODULE, NULL, adapter_nr) || - 0 == dvb_usb_device_init(intf, &tt_s2_4600_properties, - THIS_MODULE, NULL, adapter_nr)) { - - /* clean up copied properties */ - kfree(s421); - kfree(p7500); - kfree(s660); - kfree(p1100); + if (!(dvb_usb_device_init(intf, &dw2102_properties, + THIS_MODULE, NULL, adapter_nr) && + dvb_usb_device_init(intf, &dw2104_properties, + THIS_MODULE, NULL, adapter_nr) && + dvb_usb_device_init(intf, &dw3101_properties, + THIS_MODULE, NULL, adapter_nr) && + dvb_usb_device_init(intf, &s6x0_properties, + THIS_MODULE, NULL, adapter_nr) && + dvb_usb_device_init(intf, &p1100_properties, + THIS_MODULE, NULL, adapter_nr) && + dvb_usb_device_init(intf, &s660_properties, + THIS_MODULE, NULL, adapter_nr) && + dvb_usb_device_init(intf, &p7500_properties, + THIS_MODULE, NULL, adapter_nr) && + dvb_usb_device_init(intf, &s421_properties, + THIS_MODULE, NULL, adapter_nr) && + dvb_usb_device_init(intf, &su3000_properties, + THIS_MODULE, NULL, adapter_nr) && + dvb_usb_device_init(intf, &t220_properties, + THIS_MODULE, NULL, adapter_nr) && + dvb_usb_device_init(intf, &tt_s2_4600_properties, + THIS_MODULE, NULL, adapter_nr))) { return 0; } - retval = -ENODEV; - kfree(s421); -err3: - kfree(p7500); -err2: - kfree(s660); -err1: - kfree(p1100); -err0: - return retval; + return -ENODEV; } static void dw2102_disconnect(struct usb_interface *intf) diff --git a/drivers/media/usb/dvb-usb/m920x.c b/drivers/media/usb/dvb-usb/m920x.c index 4bb5b82599a790c615422fe360ce20f4bdf37a9d..691e05833db1959e27858445e7e3594f5b4df7fc 100644 --- a/drivers/media/usb/dvb-usb/m920x.c +++ b/drivers/media/usb/dvb-usb/m920x.c @@ -274,6 +274,13 @@ static int m920x_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg msg[], int nu /* Should check for ack here, if we knew how. */ } if (msg[i].flags & I2C_M_RD) { + char *read = kmalloc(1, GFP_KERNEL); + if (!read) { + ret = -ENOMEM; + kfree(read); + goto unlock; + } + for (j = 0; j < msg[i].len; j++) { /* Last byte of transaction? * Send STOP, otherwise send ACK. */ @@ -281,9 +288,12 @@ static int m920x_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg msg[], int nu if ((ret = m920x_read(d->udev, M9206_I2C, 0x0, 0x20 | stop, - &msg[i].buf[j], 1)) != 0) + read, 1)) != 0) goto unlock; + msg[i].buf[j] = read[0]; } + + kfree(read); } else { for (j = 0; j < msg[i].len; j++) { /* Last byte of transaction? Then send STOP. */ diff --git a/drivers/media/usb/em28xx/em28xx-cards.c b/drivers/media/usb/em28xx/em28xx-cards.c index 5144888ae36f7d6df3648190b4fe90a878fd9d8a..87e375562dbb2b1df019c6ea305a222a298b01df 100644 --- a/drivers/media/usb/em28xx/em28xx-cards.c +++ b/drivers/media/usb/em28xx/em28xx-cards.c @@ -3575,8 +3575,10 @@ static int em28xx_init_dev(struct em28xx *dev, struct usb_device *udev, if (dev->is_audio_only) { retval = em28xx_audio_setup(dev); - if (retval) - return -ENODEV; + if (retval) { + retval = -ENODEV; + goto err_deinit_media; + } em28xx_init_extension(dev); return 0; @@ -3595,7 +3597,7 @@ static int em28xx_init_dev(struct em28xx *dev, struct usb_device *udev, dev_err(&dev->intf->dev, "%s: em28xx_i2c_register bus 0 - error [%d]!\n", __func__, retval); - return retval; + goto err_deinit_media; } /* register i2c bus 1 */ @@ -3611,9 +3613,7 @@ static int em28xx_init_dev(struct em28xx *dev, struct usb_device *udev, "%s: em28xx_i2c_register bus 1 - error [%d]!\n", __func__, retval); - em28xx_i2c_unregister(dev, 0); - - return retval; + goto err_unreg_i2c; } } @@ -3621,6 +3621,12 @@ static int em28xx_init_dev(struct em28xx *dev, struct usb_device *udev, em28xx_card_setup(dev); return 0; + +err_unreg_i2c: + em28xx_i2c_unregister(dev, 0); +err_deinit_media: + em28xx_unregister_media_device(dev); + return retval; } static int em28xx_duplicate_dev(struct em28xx *dev) @@ -4089,8 +4095,11 @@ static void em28xx_usb_disconnect(struct usb_interface *intf) em28xx_close_extension(dev); - if (dev->dev_next) + if (dev->dev_next) { + em28xx_close_extension(dev->dev_next); em28xx_release_resources(dev->dev_next); + } + em28xx_release_resources(dev); if (dev->dev_next) { diff --git a/drivers/media/usb/em28xx/em28xx-core.c b/drivers/media/usb/em28xx/em28xx-core.c index 3daa64bb1e1d9f0368c0ee87dbb6429e433c80a6..308bc029099d9da5de834f76e8e7b1b8fc4831f5 100644 --- a/drivers/media/usb/em28xx/em28xx-core.c +++ b/drivers/media/usb/em28xx/em28xx-core.c @@ -89,7 +89,7 @@ int em28xx_read_reg_req_len(struct em28xx *dev, u8 req, u16 reg, mutex_lock(&dev->ctrl_urb_lock); ret = usb_control_msg(udev, pipe, req, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, - 0x0000, reg, dev->urb_buf, len, HZ); + 0x0000, reg, dev->urb_buf, len, 1000); if (ret < 0) { em28xx_regdbg("(pipe 0x%08x): IN: %02x %02x %02x %02x %02x %02x %02x %02x failed with error %i\n", pipe, @@ -158,7 +158,7 @@ int em28xx_write_regs_req(struct em28xx *dev, u8 req, u16 reg, char *buf, memcpy(dev->urb_buf, buf, len); ret = usb_control_msg(udev, pipe, req, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, - 0x0000, reg, dev->urb_buf, len, HZ); + 0x0000, reg, dev->urb_buf, len, 1000); mutex_unlock(&dev->ctrl_urb_lock); if (ret < 0) { @@ -1152,8 +1152,9 @@ int em28xx_suspend_extension(struct em28xx *dev) dev_info(&dev->intf->dev, "Suspending extensions\n"); mutex_lock(&em28xx_devlist_mutex); list_for_each_entry(ops, &em28xx_extension_devlist, next) { - if (ops->suspend) - ops->suspend(dev); + if (!ops->suspend) + continue; + ops->suspend(dev); if (dev->dev_next) ops->suspend(dev->dev_next); } diff --git a/drivers/media/usb/pvrusb2/pvrusb2-hdw.c b/drivers/media/usb/pvrusb2/pvrusb2-hdw.c index d38dee1792e41f21b2f21758fe393e92b5a061d4..3915d551d59e7b2f564a2d222d5ee9d21cf1028b 100644 --- a/drivers/media/usb/pvrusb2/pvrusb2-hdw.c +++ b/drivers/media/usb/pvrusb2/pvrusb2-hdw.c @@ -1467,7 +1467,7 @@ static int pvr2_upload_firmware1(struct pvr2_hdw *hdw) for (address = 0; address < fwsize; address += 0x800) { memcpy(fw_ptr, fw_entry->data + address, 0x800); ret += usb_control_msg(hdw->usb_dev, pipe, 0xa0, 0x40, address, - 0, fw_ptr, 0x800, HZ); + 0, fw_ptr, 0x800, 1000); } trace_firmware("Upload done, releasing device's CPU"); @@ -1605,7 +1605,7 @@ int pvr2_upload_firmware2(struct pvr2_hdw *hdw) ((u32 *)fw_ptr)[icnt] = swab32(((u32 *)fw_ptr)[icnt]); ret |= usb_bulk_msg(hdw->usb_dev, pipe, fw_ptr,bcnt, - &actual_length, HZ); + &actual_length, 1000); ret |= (actual_length != bcnt); if (ret) break; fw_done += bcnt; @@ -3438,7 +3438,7 @@ void pvr2_hdw_cpufw_set_enabled(struct pvr2_hdw *hdw, 0xa0,0xc0, address,0, hdw->fw_buffer+address, - 0x800,HZ); + 0x800,1000); if (ret < 0) break; } @@ -3977,7 +3977,7 @@ void pvr2_hdw_cpureset_assert(struct pvr2_hdw *hdw,int val) /* Write the CPUCS register on the 8051. The lsb of the register is the reset bit; a 1 asserts reset while a 0 clears it. */ pipe = usb_sndctrlpipe(hdw->usb_dev, 0); - ret = usb_control_msg(hdw->usb_dev,pipe,0xa0,0x40,0xe600,0,da,1,HZ); + ret = usb_control_msg(hdw->usb_dev,pipe,0xa0,0x40,0xe600,0,da,1,1000); if (ret < 0) { pvr2_trace(PVR2_TRACE_ERROR_LEGS, "cpureset_assert(%d) error=%d",val,ret); diff --git a/drivers/media/usb/s2255/s2255drv.c b/drivers/media/usb/s2255/s2255drv.c index 4af55e2478be19ef0cd85c723bdb8e3596455426..cb15eb32d2a6be0bb6a360e842bfa134c7a73ba5 100644 --- a/drivers/media/usb/s2255/s2255drv.c +++ b/drivers/media/usb/s2255/s2255drv.c @@ -1884,7 +1884,7 @@ static long s2255_vendor_req(struct s2255_dev *dev, unsigned char Request, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_IN, Value, Index, buf, - TransferBufferLength, HZ * 5); + TransferBufferLength, USB_CTRL_SET_TIMEOUT); if (r >= 0) memcpy(TransferBuffer, buf, TransferBufferLength); @@ -1893,7 +1893,7 @@ static long s2255_vendor_req(struct s2255_dev *dev, unsigned char Request, r = usb_control_msg(dev->udev, usb_sndctrlpipe(dev->udev, 0), Request, USB_TYPE_VENDOR | USB_RECIP_DEVICE, Value, Index, buf, - TransferBufferLength, HZ * 5); + TransferBufferLength, USB_CTRL_SET_TIMEOUT); } kfree(buf); return r; diff --git a/drivers/media/usb/stk1160/stk1160-core.c b/drivers/media/usb/stk1160/stk1160-core.c index b4f8bc5db138917baa34d827cc209554c15ef43e..4e1698f7881876010ff05f18b39610d863fccaa6 100644 --- a/drivers/media/usb/stk1160/stk1160-core.c +++ b/drivers/media/usb/stk1160/stk1160-core.c @@ -65,7 +65,7 @@ int stk1160_read_reg(struct stk1160 *dev, u16 reg, u8 *value) return -ENOMEM; ret = usb_control_msg(dev->udev, pipe, 0x00, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, - 0x00, reg, buf, sizeof(u8), HZ); + 0x00, reg, buf, sizeof(u8), 1000); if (ret < 0) { stk1160_err("read failed on reg 0x%x (%d)\n", reg, ret); @@ -85,7 +85,7 @@ int stk1160_write_reg(struct stk1160 *dev, u16 reg, u16 value) ret = usb_control_msg(dev->udev, pipe, 0x01, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, - value, reg, NULL, 0, HZ); + value, reg, NULL, 0, 1000); if (ret < 0) { stk1160_err("write failed on reg 0x%x (%d)\n", reg, ret); diff --git a/drivers/media/usb/tm6000/tm6000-video.c b/drivers/media/usb/tm6000/tm6000-video.c index 2df736c029d6e65bdfeb89faf5a29a36f7edb090..01071e6cd75747932a10fbc7b7cf515d88cfab5a 100644 --- a/drivers/media/usb/tm6000/tm6000-video.c +++ b/drivers/media/usb/tm6000/tm6000-video.c @@ -854,8 +854,7 @@ static int vidioc_querycap(struct file *file, void *priv, struct tm6000_core *dev = ((struct tm6000_fh *)priv)->dev; strscpy(cap->driver, "tm6000", sizeof(cap->driver)); - strscpy(cap->card, "Trident TVMaster TM5600/6000/6010", - sizeof(cap->card)); + strscpy(cap->card, "Trident TM5600/6000/6010", sizeof(cap->card)); usb_make_path(dev->udev, cap->bus_info, sizeof(cap->bus_info)); cap->capabilities = V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_READWRITE | V4L2_CAP_DEVICE_CAPS; diff --git a/drivers/media/usb/uvc/uvc_v4l2.c b/drivers/media/usb/uvc/uvc_v4l2.c index 5f0e2fa69da5c6e8da2d78e16b3d62ffceb8689a..753b8a99e08fcbf1b34be02aeef6a02682c8130b 100644 --- a/drivers/media/usb/uvc/uvc_v4l2.c +++ b/drivers/media/usb/uvc/uvc_v4l2.c @@ -471,10 +471,13 @@ static int uvc_v4l2_set_streamparm(struct uvc_streaming *stream, uvc_simplify_fraction(&timeperframe.numerator, &timeperframe.denominator, 8, 333); - if (parm->type == V4L2_BUF_TYPE_VIDEO_CAPTURE) + if (parm->type == V4L2_BUF_TYPE_VIDEO_CAPTURE) { parm->parm.capture.timeperframe = timeperframe; - else + parm->parm.capture.capability = V4L2_CAP_TIMEPERFRAME; + } else { parm->parm.output.timeperframe = timeperframe; + parm->parm.output.capability = V4L2_CAP_TIMEPERFRAME; + } return 0; } diff --git a/drivers/media/usb/uvc/uvc_video.c b/drivers/media/usb/uvc/uvc_video.c index 5878c783348625651b2833bd818cd3869d3f437c..f6373d678d256361b14024406688680af7d3a91e 100644 --- a/drivers/media/usb/uvc/uvc_video.c +++ b/drivers/media/usb/uvc/uvc_video.c @@ -112,6 +112,11 @@ int uvc_query_ctrl(struct uvc_device *dev, u8 query, u8 unit, case 5: /* Invalid unit */ case 6: /* Invalid control */ case 7: /* Invalid Request */ + /* + * The firmware has not properly implemented + * the control or there has been a HW error. + */ + return -EIO; case 8: /* Invalid value within range */ return -EINVAL; default: /* reserved or unknown */ @@ -1910,6 +1915,10 @@ static int uvc_video_start_transfer(struct uvc_streaming *stream, if (ep == NULL) return -EIO; + /* Reject broken descriptors. */ + if (usb_endpoint_maxp(&ep->desc) == 0) + return -EIO; + ret = uvc_init_video_bulk(stream, ep, gfp_flags); } diff --git a/drivers/media/usb/uvc/uvcvideo.h b/drivers/media/usb/uvc/uvcvideo.h index a3dfacf069c44dede79b361f4c4e0d21002932b7..c884020b287843270872f88b36e0856ff0f18f0d 100644 --- a/drivers/media/usb/uvc/uvcvideo.h +++ b/drivers/media/usb/uvc/uvcvideo.h @@ -183,7 +183,7 @@ /* Maximum status buffer size in bytes of interrupt URB. */ #define UVC_MAX_STATUS_SIZE 16 -#define UVC_CTRL_CONTROL_TIMEOUT 500 +#define UVC_CTRL_CONTROL_TIMEOUT 5000 #define UVC_CTRL_STREAMING_TIMEOUT 5000 /* Maximum allowed number of control mappings per device */ diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c index b0101e08fcdc40cfc5c4579af2d48b1319043a9b..b98800201e21b6ebbe58a369cf07442554390a6c 100644 --- a/drivers/media/v4l2-core/v4l2-ioctl.c +++ b/drivers/media/v4l2-core/v4l2-ioctl.c @@ -918,7 +918,7 @@ static void v4l_print_default(const void *arg, bool write_only) pr_cont("driver-specific ioctl\n"); } -static int check_ext_ctrls(struct v4l2_ext_controls *c, int allow_priv) +static bool check_ext_ctrls(struct v4l2_ext_controls *c, unsigned long ioctl) { __u32 i; @@ -927,23 +927,41 @@ static int check_ext_ctrls(struct v4l2_ext_controls *c, int allow_priv) for (i = 0; i < c->count; i++) c->controls[i].reserved2[0] = 0; - /* V4L2_CID_PRIVATE_BASE cannot be used as control class - when using extended controls. - Only when passed in through VIDIOC_G_CTRL and VIDIOC_S_CTRL - is it allowed for backwards compatibility. - */ - if (!allow_priv && c->which == V4L2_CID_PRIVATE_BASE) - return 0; - if (!c->which) - return 1; + switch (c->which) { + case V4L2_CID_PRIVATE_BASE: + /* + * V4L2_CID_PRIVATE_BASE cannot be used as control class + * when using extended controls. + * Only when passed in through VIDIOC_G_CTRL and VIDIOC_S_CTRL + * is it allowed for backwards compatibility. + */ + if (ioctl == VIDIOC_G_CTRL || ioctl == VIDIOC_S_CTRL) + return false; + break; + case V4L2_CTRL_WHICH_DEF_VAL: + /* Default value cannot be changed */ + if (ioctl == VIDIOC_S_EXT_CTRLS || + ioctl == VIDIOC_TRY_EXT_CTRLS) { + c->error_idx = c->count; + return false; + } + return true; + case V4L2_CTRL_WHICH_CUR_VAL: + return true; + case V4L2_CTRL_WHICH_REQUEST_VAL: + c->error_idx = c->count; + return false; + } + /* Check that all controls are from the same control class. */ for (i = 0; i < c->count; i++) { if (V4L2_CTRL_ID2WHICH(c->controls[i].id) != c->which) { - c->error_idx = i; - return 0; + c->error_idx = ioctl == VIDIOC_TRY_EXT_CTRLS ? i : + c->count; + return false; } } - return 1; + return true; } static int check_fmt(struct file *file, enum v4l2_buf_type type) @@ -2123,6 +2141,7 @@ static int v4l_prepare_buf(const struct v4l2_ioctl_ops *ops, static int v4l_g_parm(const struct v4l2_ioctl_ops *ops, struct file *file, void *fh, void *arg) { + struct video_device *vfd = video_devdata(file); struct v4l2_streamparm *p = arg; v4l2_std_id std; int ret = check_fmt(file, p->type); @@ -2134,7 +2153,8 @@ static int v4l_g_parm(const struct v4l2_ioctl_ops *ops, if (p->type != V4L2_BUF_TYPE_VIDEO_CAPTURE && p->type != V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE) return -EINVAL; - p->parm.capture.readbuffers = 2; + if (vfd->device_caps & V4L2_CAP_READWRITE) + p->parm.capture.readbuffers = 2; ret = ops->vidioc_g_std(file, fh, &std); if (ret == 0) v4l2_video_std_frame_period(std, &p->parm.capture.timeperframe); @@ -2240,7 +2260,7 @@ static int v4l_g_ctrl(const struct v4l2_ioctl_ops *ops, ctrls.controls = &ctrl; ctrl.id = p->id; ctrl.value = p->value; - if (check_ext_ctrls(&ctrls, 1)) { + if (check_ext_ctrls(&ctrls, VIDIOC_G_CTRL)) { int ret = ops->vidioc_g_ext_ctrls(file, fh, &ctrls); if (ret == 0) @@ -2259,6 +2279,7 @@ static int v4l_s_ctrl(const struct v4l2_ioctl_ops *ops, test_bit(V4L2_FL_USES_V4L2_FH, &vfd->flags) ? fh : NULL; struct v4l2_ext_controls ctrls; struct v4l2_ext_control ctrl; + int ret; if (vfh && vfh->ctrl_handler) return v4l2_s_ctrl(vfh, vfh->ctrl_handler, p); @@ -2274,9 +2295,11 @@ static int v4l_s_ctrl(const struct v4l2_ioctl_ops *ops, ctrls.controls = &ctrl; ctrl.id = p->id; ctrl.value = p->value; - if (check_ext_ctrls(&ctrls, 1)) - return ops->vidioc_s_ext_ctrls(file, fh, &ctrls); - return -EINVAL; + if (!check_ext_ctrls(&ctrls, VIDIOC_S_CTRL)) + return -EINVAL; + ret = ops->vidioc_s_ext_ctrls(file, fh, &ctrls); + p->value = ctrl.value; + return ret; } static int v4l_g_ext_ctrls(const struct v4l2_ioctl_ops *ops, @@ -2296,8 +2319,8 @@ static int v4l_g_ext_ctrls(const struct v4l2_ioctl_ops *ops, vfd, vfd->v4l2_dev->mdev, p); if (ops->vidioc_g_ext_ctrls == NULL) return -ENOTTY; - return check_ext_ctrls(p, 0) ? ops->vidioc_g_ext_ctrls(file, fh, p) : - -EINVAL; + return check_ext_ctrls(p, VIDIOC_G_EXT_CTRLS) ? + ops->vidioc_g_ext_ctrls(file, fh, p) : -EINVAL; } static int v4l_s_ext_ctrls(const struct v4l2_ioctl_ops *ops, @@ -2317,8 +2340,8 @@ static int v4l_s_ext_ctrls(const struct v4l2_ioctl_ops *ops, vfd, vfd->v4l2_dev->mdev, p); if (ops->vidioc_s_ext_ctrls == NULL) return -ENOTTY; - return check_ext_ctrls(p, 0) ? ops->vidioc_s_ext_ctrls(file, fh, p) : - -EINVAL; + return check_ext_ctrls(p, VIDIOC_S_EXT_CTRLS) ? + ops->vidioc_s_ext_ctrls(file, fh, p) : -EINVAL; } static int v4l_try_ext_ctrls(const struct v4l2_ioctl_ops *ops, @@ -2338,8 +2361,8 @@ static int v4l_try_ext_ctrls(const struct v4l2_ioctl_ops *ops, vfd, vfd->v4l2_dev->mdev, p); if (ops->vidioc_try_ext_ctrls == NULL) return -ENOTTY; - return check_ext_ctrls(p, 0) ? ops->vidioc_try_ext_ctrls(file, fh, p) : - -EINVAL; + return check_ext_ctrls(p, VIDIOC_TRY_EXT_CTRLS) ? + ops->vidioc_try_ext_ctrls(file, fh, p) : -EINVAL; } /* diff --git a/drivers/media/v4l2-core/v4l2-mem2mem.c b/drivers/media/v4l2-core/v4l2-mem2mem.c index b221b4e438a1a91f8e8bf31164390189288e1cdb..73190652c267bd61cddafa2ede59a3e98ac1f3ab 100644 --- a/drivers/media/v4l2-core/v4l2-mem2mem.c +++ b/drivers/media/v4l2-core/v4l2-mem2mem.c @@ -585,19 +585,14 @@ int v4l2_m2m_reqbufs(struct file *file, struct v4l2_m2m_ctx *m2m_ctx, } EXPORT_SYMBOL_GPL(v4l2_m2m_reqbufs); -int v4l2_m2m_querybuf(struct file *file, struct v4l2_m2m_ctx *m2m_ctx, - struct v4l2_buffer *buf) +static void v4l2_m2m_adjust_mem_offset(struct vb2_queue *vq, + struct v4l2_buffer *buf) { - struct vb2_queue *vq; - int ret = 0; - unsigned int i; - - vq = v4l2_m2m_get_vq(m2m_ctx, buf->type); - ret = vb2_querybuf(vq, buf); - /* Adjust MMAP memory offsets for the CAPTURE queue */ if (buf->memory == V4L2_MEMORY_MMAP && V4L2_TYPE_IS_CAPTURE(vq->type)) { if (V4L2_TYPE_IS_MULTIPLANAR(vq->type)) { + unsigned int i; + for (i = 0; i < buf->length; ++i) buf->m.planes[i].m.mem_offset += DST_QUEUE_OFF_BASE; @@ -605,8 +600,23 @@ int v4l2_m2m_querybuf(struct file *file, struct v4l2_m2m_ctx *m2m_ctx, buf->m.offset += DST_QUEUE_OFF_BASE; } } +} - return ret; +int v4l2_m2m_querybuf(struct file *file, struct v4l2_m2m_ctx *m2m_ctx, + struct v4l2_buffer *buf) +{ + struct vb2_queue *vq; + int ret; + + vq = v4l2_m2m_get_vq(m2m_ctx, buf->type); + ret = vb2_querybuf(vq, buf); + if (ret) + return ret; + + /* Adjust MMAP memory offsets for the CAPTURE queue */ + v4l2_m2m_adjust_mem_offset(vq, buf); + + return 0; } EXPORT_SYMBOL_GPL(v4l2_m2m_querybuf); @@ -763,6 +773,9 @@ int v4l2_m2m_qbuf(struct file *file, struct v4l2_m2m_ctx *m2m_ctx, if (ret) return ret; + /* Adjust MMAP memory offsets for the CAPTURE queue */ + v4l2_m2m_adjust_mem_offset(vq, buf); + /* * If the capture queue is streaming, but streaming hasn't started * on the device, but was asked to stop, mark the previously queued @@ -784,9 +797,17 @@ int v4l2_m2m_dqbuf(struct file *file, struct v4l2_m2m_ctx *m2m_ctx, struct v4l2_buffer *buf) { struct vb2_queue *vq; + int ret; vq = v4l2_m2m_get_vq(m2m_ctx, buf->type); - return vb2_dqbuf(vq, buf, file->f_flags & O_NONBLOCK); + ret = vb2_dqbuf(vq, buf, file->f_flags & O_NONBLOCK); + if (ret) + return ret; + + /* Adjust MMAP memory offsets for the CAPTURE queue */ + v4l2_m2m_adjust_mem_offset(vq, buf); + + return 0; } EXPORT_SYMBOL_GPL(v4l2_m2m_dqbuf); @@ -795,9 +816,17 @@ int v4l2_m2m_prepare_buf(struct file *file, struct v4l2_m2m_ctx *m2m_ctx, { struct video_device *vdev = video_devdata(file); struct vb2_queue *vq; + int ret; vq = v4l2_m2m_get_vq(m2m_ctx, buf->type); - return vb2_prepare_buf(vq, vdev->v4l2_dev->mdev, buf); + ret = vb2_prepare_buf(vq, vdev->v4l2_dev->mdev, buf); + if (ret) + return ret; + + /* Adjust MMAP memory offsets for the CAPTURE queue */ + v4l2_m2m_adjust_mem_offset(vq, buf); + + return 0; } EXPORT_SYMBOL_GPL(v4l2_m2m_prepare_buf); diff --git a/drivers/memory/fsl_ifc.c b/drivers/memory/fsl_ifc.c index d062c2f8250f43a956d9e9a5bd8ef382a8de5e3f..75a8c38df9394f3d80e3b2e40c53b565ba5b77b6 100644 --- a/drivers/memory/fsl_ifc.c +++ b/drivers/memory/fsl_ifc.c @@ -263,7 +263,7 @@ static int fsl_ifc_ctrl_probe(struct platform_device *dev) ret = fsl_ifc_ctrl_init(fsl_ifc_ctrl_dev); if (ret < 0) - goto err; + goto err_unmap_nandirq; init_waitqueue_head(&fsl_ifc_ctrl_dev->nand_wait); @@ -272,7 +272,7 @@ static int fsl_ifc_ctrl_probe(struct platform_device *dev) if (ret != 0) { dev_err(&dev->dev, "failed to install irq (%d)\n", fsl_ifc_ctrl_dev->irq); - goto err_irq; + goto err_unmap_nandirq; } if (fsl_ifc_ctrl_dev->nand_irq) { @@ -281,17 +281,16 @@ static int fsl_ifc_ctrl_probe(struct platform_device *dev) if (ret != 0) { dev_err(&dev->dev, "failed to install irq (%d)\n", fsl_ifc_ctrl_dev->nand_irq); - goto err_nandirq; + goto err_free_irq; } } return 0; -err_nandirq: - free_irq(fsl_ifc_ctrl_dev->nand_irq, fsl_ifc_ctrl_dev); - irq_dispose_mapping(fsl_ifc_ctrl_dev->nand_irq); -err_irq: +err_free_irq: free_irq(fsl_ifc_ctrl_dev->irq, fsl_ifc_ctrl_dev); +err_unmap_nandirq: + irq_dispose_mapping(fsl_ifc_ctrl_dev->nand_irq); irq_dispose_mapping(fsl_ifc_ctrl_dev->irq); err: iounmap(fsl_ifc_ctrl_dev->gregs); diff --git a/drivers/memory/renesas-rpc-if.c b/drivers/memory/renesas-rpc-if.c index 1fe6c35b7503ea3eed1c4210ec95be7e12f6f044..9019121a80f53ded8c553ae1fa49723ca09dd1cf 100644 --- a/drivers/memory/renesas-rpc-if.c +++ b/drivers/memory/renesas-rpc-if.c @@ -161,10 +161,62 @@ static const struct regmap_access_table rpcif_volatile_table = { .n_yes_ranges = ARRAY_SIZE(rpcif_volatile_ranges), }; + +/* + * Custom accessor functions to ensure SMRDR0 and SMWDR0 are always accessed + * with proper width. Requires SMENR_SPIDE to be correctly set before! + */ +static int rpcif_reg_read(void *context, unsigned int reg, unsigned int *val) +{ + struct rpcif *rpc = context; + + if (reg == RPCIF_SMRDR0 || reg == RPCIF_SMWDR0) { + u32 spide = readl(rpc->base + RPCIF_SMENR) & RPCIF_SMENR_SPIDE(0xF); + + if (spide == 0x8) { + *val = readb(rpc->base + reg); + return 0; + } else if (spide == 0xC) { + *val = readw(rpc->base + reg); + return 0; + } else if (spide != 0xF) { + return -EILSEQ; + } + } + + *val = readl(rpc->base + reg); + return 0; + +} + +static int rpcif_reg_write(void *context, unsigned int reg, unsigned int val) +{ + struct rpcif *rpc = context; + + if (reg == RPCIF_SMRDR0 || reg == RPCIF_SMWDR0) { + u32 spide = readl(rpc->base + RPCIF_SMENR) & RPCIF_SMENR_SPIDE(0xF); + + if (spide == 0x8) { + writeb(val, rpc->base + reg); + return 0; + } else if (spide == 0xC) { + writew(val, rpc->base + reg); + return 0; + } else if (spide != 0xF) { + return -EILSEQ; + } + } + + writel(val, rpc->base + reg); + return 0; +} + static const struct regmap_config rpcif_regmap_config = { .reg_bits = 32, .val_bits = 32, .reg_stride = 4, + .reg_read = rpcif_reg_read, + .reg_write = rpcif_reg_write, .fast_io = true, .max_register = RPCIF_PHYINT, .volatile_table = &rpcif_volatile_table, @@ -174,17 +226,15 @@ int rpcif_sw_init(struct rpcif *rpc, struct device *dev) { struct platform_device *pdev = to_platform_device(dev); struct resource *res; - void __iomem *base; rpc->dev = dev; res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "regs"); - base = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(base)) - return PTR_ERR(base); + rpc->base = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(rpc->base)) + return PTR_ERR(rpc->base); - rpc->regmap = devm_regmap_init_mmio(&pdev->dev, base, - &rpcif_regmap_config); + rpc->regmap = devm_regmap_init(&pdev->dev, NULL, rpc, &rpcif_regmap_config); if (IS_ERR(rpc->regmap)) { dev_err(&pdev->dev, "failed to init regmap for rpcif, error %ld\n", @@ -195,7 +245,7 @@ int rpcif_sw_init(struct rpcif *rpc, struct device *dev) res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "dirmap"); rpc->dirmap = devm_ioremap_resource(&pdev->dev, res); if (IS_ERR(rpc->dirmap)) - rpc->dirmap = NULL; + return PTR_ERR(rpc->dirmap); rpc->size = resource_size(res); rpc->rstc = devm_reset_control_get_exclusive(&pdev->dev, NULL); @@ -367,20 +417,16 @@ void rpcif_prepare(struct rpcif *rpc, const struct rpcif_op *op, u64 *offs, nbytes = op->data.nbytes; rpc->xferlen = nbytes; - rpc->enable |= RPCIF_SMENR_SPIDE(rpcif_bits_set(rpc, nbytes)) | - RPCIF_SMENR_SPIDB(rpcif_bit_size(op->data.buswidth)); + rpc->enable |= RPCIF_SMENR_SPIDB(rpcif_bit_size(op->data.buswidth)); } } EXPORT_SYMBOL(rpcif_prepare); int rpcif_manual_xfer(struct rpcif *rpc) { - u32 smenr, smcr, pos = 0, max = 4; + u32 smenr, smcr, pos = 0, max = rpc->bus_size == 2 ? 8 : 4; int ret = 0; - if (rpc->bus_size == 2) - max = 8; - pm_runtime_get_sync(rpc->dev); regmap_update_bits(rpc->regmap, RPCIF_PHYCNT, @@ -391,37 +437,36 @@ int rpcif_manual_xfer(struct rpcif *rpc) regmap_write(rpc->regmap, RPCIF_SMOPR, rpc->option); regmap_write(rpc->regmap, RPCIF_SMDMCR, rpc->dummy); regmap_write(rpc->regmap, RPCIF_SMDRENR, rpc->ddr); + regmap_write(rpc->regmap, RPCIF_SMADR, rpc->smadr); smenr = rpc->enable; switch (rpc->dir) { case RPCIF_DATA_OUT: while (pos < rpc->xferlen) { - u32 nbytes = rpc->xferlen - pos; - u32 data[2]; + u32 bytes_left = rpc->xferlen - pos; + u32 nbytes, data[2]; smcr = rpc->smcr | RPCIF_SMCR_SPIE; - if (nbytes > max) { - nbytes = max; + + /* nbytes may only be 1, 2, 4, or 8 */ + nbytes = bytes_left >= max ? max : (1 << ilog2(bytes_left)); + if (bytes_left > nbytes) smcr |= RPCIF_SMCR_SSLKP; - } + + smenr |= RPCIF_SMENR_SPIDE(rpcif_bits_set(rpc, nbytes)); + regmap_write(rpc->regmap, RPCIF_SMENR, smenr); memcpy(data, rpc->buffer + pos, nbytes); - if (nbytes > 4) { + if (nbytes == 8) { regmap_write(rpc->regmap, RPCIF_SMWDR1, data[0]); regmap_write(rpc->regmap, RPCIF_SMWDR0, data[1]); - } else if (nbytes > 2) { + } else { regmap_write(rpc->regmap, RPCIF_SMWDR0, data[0]); - } else { - regmap_write(rpc->regmap, RPCIF_SMWDR0, - data[0] << 16); } - regmap_write(rpc->regmap, RPCIF_SMADR, - rpc->smadr + pos); - regmap_write(rpc->regmap, RPCIF_SMENR, smenr); regmap_write(rpc->regmap, RPCIF_SMCR, smcr); ret = wait_msg_xfer_end(rpc); if (ret) @@ -461,14 +506,16 @@ int rpcif_manual_xfer(struct rpcif *rpc) break; } while (pos < rpc->xferlen) { - u32 nbytes = rpc->xferlen - pos; - u32 data[2]; + u32 bytes_left = rpc->xferlen - pos; + u32 nbytes, data[2]; - if (nbytes > max) - nbytes = max; + /* nbytes may only be 1, 2, 4, or 8 */ + nbytes = bytes_left >= max ? max : (1 << ilog2(bytes_left)); regmap_write(rpc->regmap, RPCIF_SMADR, rpc->smadr + pos); + smenr &= ~RPCIF_SMENR_SPIDE(0xF); + smenr |= RPCIF_SMENR_SPIDE(rpcif_bits_set(rpc, nbytes)); regmap_write(rpc->regmap, RPCIF_SMENR, smenr); regmap_write(rpc->regmap, RPCIF_SMCR, rpc->smcr | RPCIF_SMCR_SPIE); @@ -476,18 +523,14 @@ int rpcif_manual_xfer(struct rpcif *rpc) if (ret) goto err_out; - if (nbytes > 4) { + if (nbytes == 8) { regmap_read(rpc->regmap, RPCIF_SMRDR1, &data[0]); regmap_read(rpc->regmap, RPCIF_SMRDR0, &data[1]); - } else if (nbytes > 2) { - regmap_read(rpc->regmap, RPCIF_SMRDR0, - &data[0]); - } else { + } else { regmap_read(rpc->regmap, RPCIF_SMRDR0, &data[0]); - data[0] >>= 16; } memcpy(rpc->buffer + pos, data, nbytes); diff --git a/drivers/memstick/core/ms_block.c b/drivers/memstick/core/ms_block.c index 8004dd64d09a87bc8b203e42077d5bd2fed29e53..bc1f484f50f1df7e2daec751f2f0e4766c9d93e4 100644 --- a/drivers/memstick/core/ms_block.c +++ b/drivers/memstick/core/ms_block.c @@ -1727,7 +1727,7 @@ static int msb_init_card(struct memstick_dev *card) msb->pages_in_block = boot_block->attr.block_size * 2; msb->block_size = msb->page_size * msb->pages_in_block; - if (msb->page_size > PAGE_SIZE) { + if ((size_t)msb->page_size > PAGE_SIZE) { /* this isn't supported by linux at all, anyway*/ dbg("device page %d size isn't supported", msb->page_size); return -EINVAL; diff --git a/drivers/memstick/host/jmb38x_ms.c b/drivers/memstick/host/jmb38x_ms.c index e83c3ada9389ea460ac1fe339e6e9a9f28f89e0b..9e8cccbd2817ea1d6eb2f23efc8ef3dbb7e728fb 100644 --- a/drivers/memstick/host/jmb38x_ms.c +++ b/drivers/memstick/host/jmb38x_ms.c @@ -882,7 +882,7 @@ static struct memstick_host *jmb38x_ms_alloc_host(struct jmb38x_ms *jm, int cnt) iounmap(host->addr); err_out_free: - kfree(msh); + memstick_free_host(msh); return NULL; } diff --git a/drivers/memstick/host/r592.c b/drivers/memstick/host/r592.c index d2ef46337191cafc085520778a1c94e8f2fe3bb6..eaa2a94d18be4e46b187de7b5d5f9ad92245caa3 100644 --- a/drivers/memstick/host/r592.c +++ b/drivers/memstick/host/r592.c @@ -837,15 +837,15 @@ static void r592_remove(struct pci_dev *pdev) } memstick_remove_host(dev->host); + if (dev->dummy_dma_page) + dma_free_coherent(&pdev->dev, PAGE_SIZE, dev->dummy_dma_page, + dev->dummy_dma_page_physical_address); + free_irq(dev->irq, dev); iounmap(dev->mmio); pci_release_regions(pdev); pci_disable_device(pdev); memstick_free_host(dev->host); - - if (dev->dummy_dma_page) - dma_free_coherent(&pdev->dev, PAGE_SIZE, dev->dummy_dma_page, - dev->dummy_dma_page_physical_address); } #ifdef CONFIG_PM_SLEEP diff --git a/drivers/mfd/atmel-flexcom.c b/drivers/mfd/atmel-flexcom.c index d2f5c073fdf31553d4d107c7d605f0fe155671d8..559eb4d352b68b21c69fe3ad5002b37ac7cd6c22 100644 --- a/drivers/mfd/atmel-flexcom.c +++ b/drivers/mfd/atmel-flexcom.c @@ -87,8 +87,7 @@ static const struct of_device_id atmel_flexcom_of_match[] = { }; MODULE_DEVICE_TABLE(of, atmel_flexcom_of_match); -#ifdef CONFIG_PM_SLEEP -static int atmel_flexcom_resume(struct device *dev) +static int __maybe_unused atmel_flexcom_resume_noirq(struct device *dev) { struct atmel_flexcom *ddata = dev_get_drvdata(dev); int err; @@ -105,16 +104,16 @@ static int atmel_flexcom_resume(struct device *dev) return 0; } -#endif -static SIMPLE_DEV_PM_OPS(atmel_flexcom_pm_ops, NULL, - atmel_flexcom_resume); +static const struct dev_pm_ops atmel_flexcom_pm_ops = { + .resume_noirq = atmel_flexcom_resume_noirq, +}; static struct platform_driver atmel_flexcom_driver = { .probe = atmel_flexcom_probe, .driver = { .name = "atmel_flexcom", - .pm = &atmel_flexcom_pm_ops, + .pm = pm_ptr(&atmel_flexcom_pm_ops), .of_match_table = atmel_flexcom_of_match, }, }; diff --git a/drivers/mfd/dln2.c b/drivers/mfd/dln2.c index 83e676a096dc11c724348537a184bd22c81c7332..852129ea076660f8dc4f495eece81fe8b5f0927c 100644 --- a/drivers/mfd/dln2.c +++ b/drivers/mfd/dln2.c @@ -50,6 +50,7 @@ enum dln2_handle { DLN2_HANDLE_GPIO, DLN2_HANDLE_I2C, DLN2_HANDLE_SPI, + DLN2_HANDLE_ADC, DLN2_HANDLES }; @@ -653,6 +654,7 @@ enum { DLN2_ACPI_MATCH_GPIO = 0, DLN2_ACPI_MATCH_I2C = 1, DLN2_ACPI_MATCH_SPI = 2, + DLN2_ACPI_MATCH_ADC = 3, }; static struct dln2_platform_data dln2_pdata_gpio = { @@ -683,6 +685,16 @@ static struct mfd_cell_acpi_match dln2_acpi_match_spi = { .adr = DLN2_ACPI_MATCH_SPI, }; +/* Only one ADC port supported */ +static struct dln2_platform_data dln2_pdata_adc = { + .handle = DLN2_HANDLE_ADC, + .port = 0, +}; + +static struct mfd_cell_acpi_match dln2_acpi_match_adc = { + .adr = DLN2_ACPI_MATCH_ADC, +}; + static const struct mfd_cell dln2_devs[] = { { .name = "dln2-gpio", @@ -702,6 +714,12 @@ static const struct mfd_cell dln2_devs[] = { .platform_data = &dln2_pdata_spi, .pdata_size = sizeof(struct dln2_platform_data), }, + { + .name = "dln2-adc", + .acpi_match = &dln2_acpi_match_adc, + .platform_data = &dln2_pdata_adc, + .pdata_size = sizeof(struct dln2_platform_data), + }, }; static void dln2_stop(struct dln2_dev *dln2) diff --git a/drivers/mfd/intel-lpss-acpi.c b/drivers/mfd/intel-lpss-acpi.c index c8fe334b5fe8bb425833bd57baf24c59b71afefe..045cbf0cbe53ae55f334f0a0d83aa25da96bf5cc 100644 --- a/drivers/mfd/intel-lpss-acpi.c +++ b/drivers/mfd/intel-lpss-acpi.c @@ -102,6 +102,7 @@ static int intel_lpss_acpi_probe(struct platform_device *pdev) { struct intel_lpss_platform_info *info; const struct acpi_device_id *id; + int ret; id = acpi_match_device(intel_lpss_acpi_ids, &pdev->dev); if (!id) @@ -115,10 +116,14 @@ static int intel_lpss_acpi_probe(struct platform_device *pdev) info->mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); info->irq = platform_get_irq(pdev, 0); + ret = intel_lpss_probe(&pdev->dev, info); + if (ret) + return ret; + pm_runtime_set_active(&pdev->dev); pm_runtime_enable(&pdev->dev); - return intel_lpss_probe(&pdev->dev, info); + return 0; } static int intel_lpss_acpi_remove(struct platform_device *pdev) diff --git a/drivers/mfd/mfd-core.c b/drivers/mfd/mfd-core.c index fc00aaccb5f72979868991c6383ea4b78fa4ed47..a3a6faa99de05ba361b33391f92e6e1e2f0f4863 100644 --- a/drivers/mfd/mfd-core.c +++ b/drivers/mfd/mfd-core.c @@ -210,6 +210,7 @@ static int mfd_add_device(struct device *parent, int id, if (of_device_is_compatible(np, cell->of_compatible)) { /* Ignore 'disabled' devices error free */ if (!of_device_is_available(np)) { + of_node_put(np); ret = 0; goto fail_alias; } @@ -217,6 +218,7 @@ static int mfd_add_device(struct device *parent, int id, ret = mfd_match_of_node_to_dev(pdev, np, cell); if (ret == -EAGAIN) continue; + of_node_put(np); if (ret) goto fail_alias; diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index 2c8929a4412200ef1891f72febb86057ae252176..2b9572a6d114b90f288099e09c2872f052509ff1 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -481,6 +481,18 @@ config HISI_HIKEY_USB switching between the dual-role USB-C port and the USB-A host ports using only one USB controller. +config OPEN_DICE + tristate "Open Profile for DICE driver" + depends on OF_RESERVED_MEM + help + This driver exposes a DICE reserved memory region to userspace via + a character device. The memory region contains Compound Device + Identifiers (CDIs) generated by firmware as an output of DICE + measured boot flow. Userspace can use CDIs for remote attestation + and sealing. + + If unsure, say N. + source "drivers/misc/c2port/Kconfig" source "drivers/misc/eeprom/Kconfig" source "drivers/misc/cb710/Kconfig" diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile index aecaaed87a3575f40bda27bbe395839a9dfbea91..2ec634354cf552f74dad24e083e4932434a33e5f 100644 --- a/drivers/misc/Makefile +++ b/drivers/misc/Makefile @@ -58,3 +58,4 @@ obj-$(CONFIG_UACCE) += uacce/ obj-$(CONFIG_XILINX_SDFEC) += xilinx_sdfec.o obj-$(CONFIG_HISI_HIKEY_USB) += hisi_hikey_usb.o obj-$(CONFIG_UID_SYS_STATS) += uid_sys_stats.o +obj-$(CONFIG_OPEN_DICE) += open-dice.o diff --git a/drivers/misc/cb710/sgbuf2.c b/drivers/misc/cb710/sgbuf2.c index e5a4ed3701eb8e3d858eee4026ea5a299068542a..a798fad5f03c28f707814cc56c8a8042f0f3079b 100644 --- a/drivers/misc/cb710/sgbuf2.c +++ b/drivers/misc/cb710/sgbuf2.c @@ -47,7 +47,7 @@ static inline bool needs_unaligned_copy(const void *ptr) #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS return false; #else - return ((ptr - NULL) & 3) != 0; + return ((uintptr_t)ptr & 3) != 0; #endif } diff --git a/drivers/misc/eeprom/ee1004.c b/drivers/misc/eeprom/ee1004.c index 252e15ba65e111c0595dc2683dec25620cfb5f3a..d9f90332aaf65ecadf5b28e8c4191980d9c39f7e 100644 --- a/drivers/misc/eeprom/ee1004.c +++ b/drivers/misc/eeprom/ee1004.c @@ -82,6 +82,9 @@ static ssize_t ee1004_eeprom_read(struct i2c_client *client, char *buf, if (unlikely(offset + count > EE1004_PAGE_SIZE)) count = EE1004_PAGE_SIZE - offset; + if (count > I2C_SMBUS_BLOCK_MAX) + count = I2C_SMBUS_BLOCK_MAX; + status = i2c_smbus_read_i2c_block_data_or_emulated(client, offset, count, buf); dev_dbg(&client->dev, "read %zu@%d --> %d\n", count, offset, status); diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c index 273d9c1591793dee72c54735bc3277626ff4af61..d0471fec37fbb30d6519c7b7749ddea8058ef523 100644 --- a/drivers/misc/fastrpc.c +++ b/drivers/misc/fastrpc.c @@ -717,16 +717,18 @@ static int fastrpc_get_meta_size(struct fastrpc_invoke_ctx *ctx) static u64 fastrpc_get_payload_size(struct fastrpc_invoke_ctx *ctx, int metalen) { u64 size = 0; - int i; + int oix; size = ALIGN(metalen, FASTRPC_ALIGN); - for (i = 0; i < ctx->nscalars; i++) { + for (oix = 0; oix < ctx->nbufs; oix++) { + int i = ctx->olaps[oix].raix; + if (ctx->args[i].fd == 0 || ctx->args[i].fd == -1) { - if (ctx->olaps[i].offset == 0) + if (ctx->olaps[oix].offset == 0) size = ALIGN(size, FASTRPC_ALIGN); - size += (ctx->olaps[i].mend - ctx->olaps[i].mstart); + size += (ctx->olaps[oix].mend - ctx->olaps[oix].mstart); } } @@ -812,10 +814,12 @@ static int fastrpc_get_args(u32 kernel, struct fastrpc_invoke_ctx *ctx) rpra[i].pv = (u64) ctx->args[i].ptr; pages[i].addr = ctx->maps[i]->phys; + mmap_read_lock(current->mm); vma = find_vma(current->mm, ctx->args[i].ptr); if (vma) pages[i].addr += ctx->args[i].ptr - vma->vm_start; + mmap_read_unlock(current->mm); pg_start = (ctx->args[i].ptr & PAGE_MASK) >> PAGE_SHIFT; pg_end = ((ctx->args[i].ptr + len - 1) & PAGE_MASK) >> @@ -1280,7 +1284,14 @@ static int fastrpc_dmabuf_alloc(struct fastrpc_user *fl, char __user *argp) } if (copy_to_user(argp, &bp, sizeof(bp))) { - dma_buf_put(buf->dmabuf); + /* + * The usercopy failed, but we can't do much about it, as + * dma_buf_fd() already called fd_install() and made the + * file descriptor accessible for the current process. It + * might already be closed and dmabuf no longer valid when + * we reach this point. Therefore "leak" the fd and rely on + * the process exit path to do any required cleanup. + */ return -EFAULT; } diff --git a/drivers/misc/habanalabs/gaudi/gaudi_security.c b/drivers/misc/habanalabs/gaudi/gaudi_security.c index 2d7add0e5bcc0a4f406f02578ed9ae5338d6ba37..9343a81d312219251f64340fc88e3bf0e90d2db5 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi_security.c +++ b/drivers/misc/habanalabs/gaudi/gaudi_security.c @@ -8,16 +8,21 @@ #include "gaudiP.h" #include "../include/gaudi/asic_reg/gaudi_regs.h" -#define GAUDI_NUMBER_OF_RR_REGS 24 -#define GAUDI_NUMBER_OF_LBW_RANGES 12 +#define GAUDI_NUMBER_OF_LBW_RR_REGS 28 +#define GAUDI_NUMBER_OF_HBW_RR_REGS 24 +#define GAUDI_NUMBER_OF_LBW_RANGES 10 -static u64 gaudi_rr_lbw_hit_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = { +static u64 gaudi_rr_lbw_hit_aw_regs[GAUDI_NUMBER_OF_LBW_RR_REGS] = { + mmDMA_IF_W_S_SOB_HIT_WPROT, mmDMA_IF_W_S_DMA0_HIT_WPROT, mmDMA_IF_W_S_DMA1_HIT_WPROT, + mmDMA_IF_E_S_SOB_HIT_WPROT, mmDMA_IF_E_S_DMA0_HIT_WPROT, mmDMA_IF_E_S_DMA1_HIT_WPROT, + mmDMA_IF_W_N_SOB_HIT_WPROT, mmDMA_IF_W_N_DMA0_HIT_WPROT, mmDMA_IF_W_N_DMA1_HIT_WPROT, + mmDMA_IF_E_N_SOB_HIT_WPROT, mmDMA_IF_E_N_DMA0_HIT_WPROT, mmDMA_IF_E_N_DMA1_HIT_WPROT, mmSIF_RTR_0_LBW_RANGE_PROT_HIT_AW, @@ -38,13 +43,17 @@ static u64 gaudi_rr_lbw_hit_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = { mmNIF_RTR_7_LBW_RANGE_PROT_HIT_AW, }; -static u64 gaudi_rr_lbw_hit_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = { +static u64 gaudi_rr_lbw_hit_ar_regs[GAUDI_NUMBER_OF_LBW_RR_REGS] = { + mmDMA_IF_W_S_SOB_HIT_RPROT, mmDMA_IF_W_S_DMA0_HIT_RPROT, mmDMA_IF_W_S_DMA1_HIT_RPROT, + mmDMA_IF_E_S_SOB_HIT_RPROT, mmDMA_IF_E_S_DMA0_HIT_RPROT, mmDMA_IF_E_S_DMA1_HIT_RPROT, + mmDMA_IF_W_N_SOB_HIT_RPROT, mmDMA_IF_W_N_DMA0_HIT_RPROT, mmDMA_IF_W_N_DMA1_HIT_RPROT, + mmDMA_IF_E_N_SOB_HIT_RPROT, mmDMA_IF_E_N_DMA0_HIT_RPROT, mmDMA_IF_E_N_DMA1_HIT_RPROT, mmSIF_RTR_0_LBW_RANGE_PROT_HIT_AR, @@ -65,13 +74,17 @@ static u64 gaudi_rr_lbw_hit_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = { mmNIF_RTR_7_LBW_RANGE_PROT_HIT_AR, }; -static u64 gaudi_rr_lbw_min_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = { +static u64 gaudi_rr_lbw_min_aw_regs[GAUDI_NUMBER_OF_LBW_RR_REGS] = { + mmDMA_IF_W_S_SOB_MIN_WPROT_0, mmDMA_IF_W_S_DMA0_MIN_WPROT_0, mmDMA_IF_W_S_DMA1_MIN_WPROT_0, + mmDMA_IF_E_S_SOB_MIN_WPROT_0, mmDMA_IF_E_S_DMA0_MIN_WPROT_0, mmDMA_IF_E_S_DMA1_MIN_WPROT_0, + mmDMA_IF_W_N_SOB_MIN_WPROT_0, mmDMA_IF_W_N_DMA0_MIN_WPROT_0, mmDMA_IF_W_N_DMA1_MIN_WPROT_0, + mmDMA_IF_E_N_SOB_MIN_WPROT_0, mmDMA_IF_E_N_DMA0_MIN_WPROT_0, mmDMA_IF_E_N_DMA1_MIN_WPROT_0, mmSIF_RTR_0_LBW_RANGE_PROT_MIN_AW_0, @@ -92,13 +105,17 @@ static u64 gaudi_rr_lbw_min_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = { mmNIF_RTR_7_LBW_RANGE_PROT_MIN_AW_0, }; -static u64 gaudi_rr_lbw_max_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = { +static u64 gaudi_rr_lbw_max_aw_regs[GAUDI_NUMBER_OF_LBW_RR_REGS] = { + mmDMA_IF_W_S_SOB_MAX_WPROT_0, mmDMA_IF_W_S_DMA0_MAX_WPROT_0, mmDMA_IF_W_S_DMA1_MAX_WPROT_0, + mmDMA_IF_E_S_SOB_MAX_WPROT_0, mmDMA_IF_E_S_DMA0_MAX_WPROT_0, mmDMA_IF_E_S_DMA1_MAX_WPROT_0, + mmDMA_IF_W_N_SOB_MAX_WPROT_0, mmDMA_IF_W_N_DMA0_MAX_WPROT_0, mmDMA_IF_W_N_DMA1_MAX_WPROT_0, + mmDMA_IF_E_N_SOB_MAX_WPROT_0, mmDMA_IF_E_N_DMA0_MAX_WPROT_0, mmDMA_IF_E_N_DMA1_MAX_WPROT_0, mmSIF_RTR_0_LBW_RANGE_PROT_MAX_AW_0, @@ -119,13 +136,17 @@ static u64 gaudi_rr_lbw_max_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = { mmNIF_RTR_7_LBW_RANGE_PROT_MAX_AW_0, }; -static u64 gaudi_rr_lbw_min_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = { +static u64 gaudi_rr_lbw_min_ar_regs[GAUDI_NUMBER_OF_LBW_RR_REGS] = { + mmDMA_IF_W_S_SOB_MIN_RPROT_0, mmDMA_IF_W_S_DMA0_MIN_RPROT_0, mmDMA_IF_W_S_DMA1_MIN_RPROT_0, + mmDMA_IF_E_S_SOB_MIN_RPROT_0, mmDMA_IF_E_S_DMA0_MIN_RPROT_0, mmDMA_IF_E_S_DMA1_MIN_RPROT_0, + mmDMA_IF_W_N_SOB_MIN_RPROT_0, mmDMA_IF_W_N_DMA0_MIN_RPROT_0, mmDMA_IF_W_N_DMA1_MIN_RPROT_0, + mmDMA_IF_E_N_SOB_MIN_RPROT_0, mmDMA_IF_E_N_DMA0_MIN_RPROT_0, mmDMA_IF_E_N_DMA1_MIN_RPROT_0, mmSIF_RTR_0_LBW_RANGE_PROT_MIN_AR_0, @@ -146,13 +167,17 @@ static u64 gaudi_rr_lbw_min_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = { mmNIF_RTR_7_LBW_RANGE_PROT_MIN_AR_0, }; -static u64 gaudi_rr_lbw_max_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = { +static u64 gaudi_rr_lbw_max_ar_regs[GAUDI_NUMBER_OF_LBW_RR_REGS] = { + mmDMA_IF_W_S_SOB_MAX_RPROT_0, mmDMA_IF_W_S_DMA0_MAX_RPROT_0, mmDMA_IF_W_S_DMA1_MAX_RPROT_0, + mmDMA_IF_E_S_SOB_MAX_RPROT_0, mmDMA_IF_E_S_DMA0_MAX_RPROT_0, mmDMA_IF_E_S_DMA1_MAX_RPROT_0, + mmDMA_IF_W_N_SOB_MAX_RPROT_0, mmDMA_IF_W_N_DMA0_MAX_RPROT_0, mmDMA_IF_W_N_DMA1_MAX_RPROT_0, + mmDMA_IF_E_N_SOB_MAX_RPROT_0, mmDMA_IF_E_N_DMA0_MAX_RPROT_0, mmDMA_IF_E_N_DMA1_MAX_RPROT_0, mmSIF_RTR_0_LBW_RANGE_PROT_MAX_AR_0, @@ -173,7 +198,7 @@ static u64 gaudi_rr_lbw_max_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = { mmNIF_RTR_7_LBW_RANGE_PROT_MAX_AR_0, }; -static u64 gaudi_rr_hbw_hit_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = { +static u64 gaudi_rr_hbw_hit_aw_regs[GAUDI_NUMBER_OF_HBW_RR_REGS] = { mmDMA_IF_W_S_DOWN_CH0_RANGE_SEC_HIT_AW, mmDMA_IF_W_S_DOWN_CH1_RANGE_SEC_HIT_AW, mmDMA_IF_E_S_DOWN_CH0_RANGE_SEC_HIT_AW, @@ -200,7 +225,7 @@ static u64 gaudi_rr_hbw_hit_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = { mmNIF_RTR_CTRL_7_RANGE_SEC_HIT_AW }; -static u64 gaudi_rr_hbw_hit_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = { +static u64 gaudi_rr_hbw_hit_ar_regs[GAUDI_NUMBER_OF_HBW_RR_REGS] = { mmDMA_IF_W_S_DOWN_CH0_RANGE_SEC_HIT_AR, mmDMA_IF_W_S_DOWN_CH1_RANGE_SEC_HIT_AR, mmDMA_IF_E_S_DOWN_CH0_RANGE_SEC_HIT_AR, @@ -227,7 +252,7 @@ static u64 gaudi_rr_hbw_hit_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = { mmNIF_RTR_CTRL_7_RANGE_SEC_HIT_AR }; -static u64 gaudi_rr_hbw_base_low_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = { +static u64 gaudi_rr_hbw_base_low_aw_regs[GAUDI_NUMBER_OF_HBW_RR_REGS] = { mmDMA_IF_W_S_DOWN_CH0_RANGE_SEC_BASE_LOW_AW_0, mmDMA_IF_W_S_DOWN_CH1_RANGE_SEC_BASE_LOW_AW_0, mmDMA_IF_E_S_DOWN_CH0_RANGE_SEC_BASE_LOW_AW_0, @@ -254,7 +279,7 @@ static u64 gaudi_rr_hbw_base_low_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = { mmNIF_RTR_CTRL_7_RANGE_SEC_BASE_LOW_AW_0 }; -static u64 gaudi_rr_hbw_base_high_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = { +static u64 gaudi_rr_hbw_base_high_aw_regs[GAUDI_NUMBER_OF_HBW_RR_REGS] = { mmDMA_IF_W_S_DOWN_CH0_RANGE_SEC_BASE_HIGH_AW_0, mmDMA_IF_W_S_DOWN_CH1_RANGE_SEC_BASE_HIGH_AW_0, mmDMA_IF_E_S_DOWN_CH0_RANGE_SEC_BASE_HIGH_AW_0, @@ -281,7 +306,7 @@ static u64 gaudi_rr_hbw_base_high_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = { mmNIF_RTR_CTRL_7_RANGE_SEC_BASE_HIGH_AW_0 }; -static u64 gaudi_rr_hbw_mask_low_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = { +static u64 gaudi_rr_hbw_mask_low_aw_regs[GAUDI_NUMBER_OF_HBW_RR_REGS] = { mmDMA_IF_W_S_DOWN_CH0_RANGE_SEC_MASK_LOW_AW_0, mmDMA_IF_W_S_DOWN_CH1_RANGE_SEC_MASK_LOW_AW_0, mmDMA_IF_E_S_DOWN_CH0_RANGE_SEC_MASK_LOW_AW_0, @@ -308,7 +333,7 @@ static u64 gaudi_rr_hbw_mask_low_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = { mmNIF_RTR_CTRL_7_RANGE_SEC_MASK_LOW_AW_0 }; -static u64 gaudi_rr_hbw_mask_high_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = { +static u64 gaudi_rr_hbw_mask_high_aw_regs[GAUDI_NUMBER_OF_HBW_RR_REGS] = { mmDMA_IF_W_S_DOWN_CH0_RANGE_SEC_MASK_HIGH_AW_0, mmDMA_IF_W_S_DOWN_CH1_RANGE_SEC_MASK_HIGH_AW_0, mmDMA_IF_E_S_DOWN_CH0_RANGE_SEC_MASK_HIGH_AW_0, @@ -335,7 +360,7 @@ static u64 gaudi_rr_hbw_mask_high_aw_regs[GAUDI_NUMBER_OF_RR_REGS] = { mmNIF_RTR_CTRL_7_RANGE_SEC_MASK_HIGH_AW_0 }; -static u64 gaudi_rr_hbw_base_low_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = { +static u64 gaudi_rr_hbw_base_low_ar_regs[GAUDI_NUMBER_OF_HBW_RR_REGS] = { mmDMA_IF_W_S_DOWN_CH0_RANGE_SEC_BASE_LOW_AR_0, mmDMA_IF_W_S_DOWN_CH1_RANGE_SEC_BASE_LOW_AR_0, mmDMA_IF_E_S_DOWN_CH0_RANGE_SEC_BASE_LOW_AR_0, @@ -362,7 +387,7 @@ static u64 gaudi_rr_hbw_base_low_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = { mmNIF_RTR_CTRL_7_RANGE_SEC_BASE_LOW_AR_0 }; -static u64 gaudi_rr_hbw_base_high_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = { +static u64 gaudi_rr_hbw_base_high_ar_regs[GAUDI_NUMBER_OF_HBW_RR_REGS] = { mmDMA_IF_W_S_DOWN_CH0_RANGE_SEC_BASE_HIGH_AR_0, mmDMA_IF_W_S_DOWN_CH1_RANGE_SEC_BASE_HIGH_AR_0, mmDMA_IF_E_S_DOWN_CH0_RANGE_SEC_BASE_HIGH_AR_0, @@ -389,7 +414,7 @@ static u64 gaudi_rr_hbw_base_high_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = { mmNIF_RTR_CTRL_7_RANGE_SEC_BASE_HIGH_AR_0 }; -static u64 gaudi_rr_hbw_mask_low_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = { +static u64 gaudi_rr_hbw_mask_low_ar_regs[GAUDI_NUMBER_OF_HBW_RR_REGS] = { mmDMA_IF_W_S_DOWN_CH0_RANGE_SEC_MASK_LOW_AR_0, mmDMA_IF_W_S_DOWN_CH1_RANGE_SEC_MASK_LOW_AR_0, mmDMA_IF_E_S_DOWN_CH0_RANGE_SEC_MASK_LOW_AR_0, @@ -416,7 +441,7 @@ static u64 gaudi_rr_hbw_mask_low_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = { mmNIF_RTR_CTRL_7_RANGE_SEC_MASK_LOW_AR_0 }; -static u64 gaudi_rr_hbw_mask_high_ar_regs[GAUDI_NUMBER_OF_RR_REGS] = { +static u64 gaudi_rr_hbw_mask_high_ar_regs[GAUDI_NUMBER_OF_HBW_RR_REGS] = { mmDMA_IF_W_S_DOWN_CH0_RANGE_SEC_MASK_HIGH_AR_0, mmDMA_IF_W_S_DOWN_CH1_RANGE_SEC_MASK_HIGH_AR_0, mmDMA_IF_E_S_DOWN_CH0_RANGE_SEC_MASK_HIGH_AR_0, @@ -8870,50 +8895,44 @@ static void gaudi_init_range_registers_lbw(struct hl_device *hdev) u32 lbw_rng_end[GAUDI_NUMBER_OF_LBW_RANGES]; int i, j; - lbw_rng_start[0] = (0xFBFE0000 & 0x3FFFFFF) - 1; - lbw_rng_end[0] = (0xFBFFF000 & 0x3FFFFFF) + 1; + lbw_rng_start[0] = (0xFC0E8000 & 0x3FFFFFF) - 1; /* 0x000E7FFF */ + lbw_rng_end[0] = (0xFC11FFFF & 0x3FFFFFF) + 1; /* 0x00120000 */ - lbw_rng_start[1] = (0xFC0E8000 & 0x3FFFFFF) - 1; - lbw_rng_end[1] = (0xFC120000 & 0x3FFFFFF) + 1; + lbw_rng_start[1] = (0xFC1E8000 & 0x3FFFFFF) - 1; /* 0x001E7FFF */ + lbw_rng_end[1] = (0xFC48FFFF & 0x3FFFFFF) + 1; /* 0x00490000 */ - lbw_rng_start[2] = (0xFC1E8000 & 0x3FFFFFF) - 1; - lbw_rng_end[2] = (0xFC48FFFF & 0x3FFFFFF) + 1; + lbw_rng_start[2] = (0xFC600000 & 0x3FFFFFF) - 1; /* 0x005FFFFF */ + lbw_rng_end[2] = (0xFCC48FFF & 0x3FFFFFF) + 1; /* 0x00C49000 */ - lbw_rng_start[3] = (0xFC600000 & 0x3FFFFFF) - 1; - lbw_rng_end[3] = (0xFCC48FFF & 0x3FFFFFF) + 1; + lbw_rng_start[3] = (0xFCC4A000 & 0x3FFFFFF) - 1; /* 0x00C49FFF */ + lbw_rng_end[3] = (0xFCCDFFFF & 0x3FFFFFF) + 1; /* 0x00CE0000 */ - lbw_rng_start[4] = (0xFCC4A000 & 0x3FFFFFF) - 1; - lbw_rng_end[4] = (0xFCCDFFFF & 0x3FFFFFF) + 1; + lbw_rng_start[4] = (0xFCCE4000 & 0x3FFFFFF) - 1; /* 0x00CE3FFF */ + lbw_rng_end[4] = (0xFCD1FFFF & 0x3FFFFFF) + 1; /* 0x00D20000 */ - lbw_rng_start[5] = (0xFCCE4000 & 0x3FFFFFF) - 1; - lbw_rng_end[5] = (0xFCD1FFFF & 0x3FFFFFF) + 1; + lbw_rng_start[5] = (0xFCD24000 & 0x3FFFFFF) - 1; /* 0x00D23FFF */ + lbw_rng_end[5] = (0xFCD5FFFF & 0x3FFFFFF) + 1; /* 0x00D60000 */ - lbw_rng_start[6] = (0xFCD24000 & 0x3FFFFFF) - 1; - lbw_rng_end[6] = (0xFCD5FFFF & 0x3FFFFFF) + 1; + lbw_rng_start[6] = (0xFCD64000 & 0x3FFFFFF) - 1; /* 0x00D63FFF */ + lbw_rng_end[6] = (0xFCD9FFFF & 0x3FFFFFF) + 1; /* 0x00DA0000 */ - lbw_rng_start[7] = (0xFCD64000 & 0x3FFFFFF) - 1; - lbw_rng_end[7] = (0xFCD9FFFF & 0x3FFFFFF) + 1; + lbw_rng_start[7] = (0xFCDA4000 & 0x3FFFFFF) - 1; /* 0x00DA3FFF */ + lbw_rng_end[7] = (0xFCDDFFFF & 0x3FFFFFF) + 1; /* 0x00DE0000 */ - lbw_rng_start[8] = (0xFCDA4000 & 0x3FFFFFF) - 1; - lbw_rng_end[8] = (0xFCDDFFFF & 0x3FFFFFF) + 1; + lbw_rng_start[8] = (0xFCDE4000 & 0x3FFFFFF) - 1; /* 0x00DE3FFF */ + lbw_rng_end[8] = (0xFCE05FFF & 0x3FFFFFF) + 1; /* 0x00E06000 */ - lbw_rng_start[9] = (0xFCDE4000 & 0x3FFFFFF) - 1; - lbw_rng_end[9] = (0xFCE05FFF & 0x3FFFFFF) + 1; + lbw_rng_start[9] = (0xFCFC9000 & 0x3FFFFFF) - 1; /* 0x00FC8FFF */ + lbw_rng_end[9] = (0xFFFFFFFE & 0x3FFFFFF) + 1; /* 0x03FFFFFF */ - lbw_rng_start[10] = (0xFEC43000 & 0x3FFFFFF) - 1; - lbw_rng_end[10] = (0xFEC43FFF & 0x3FFFFFF) + 1; - - lbw_rng_start[11] = (0xFE484000 & 0x3FFFFFF) - 1; - lbw_rng_end[11] = (0xFE484FFF & 0x3FFFFFF) + 1; - - for (i = 0 ; i < GAUDI_NUMBER_OF_RR_REGS ; i++) { + for (i = 0 ; i < GAUDI_NUMBER_OF_LBW_RR_REGS ; i++) { WREG32(gaudi_rr_lbw_hit_aw_regs[i], (1 << GAUDI_NUMBER_OF_LBW_RANGES) - 1); WREG32(gaudi_rr_lbw_hit_ar_regs[i], (1 << GAUDI_NUMBER_OF_LBW_RANGES) - 1); } - for (i = 0 ; i < GAUDI_NUMBER_OF_RR_REGS ; i++) + for (i = 0 ; i < GAUDI_NUMBER_OF_LBW_RR_REGS ; i++) for (j = 0 ; j < GAUDI_NUMBER_OF_LBW_RANGES ; j++) { WREG32(gaudi_rr_lbw_min_aw_regs[i] + (j << 2), lbw_rng_start[j]); @@ -8960,12 +8979,12 @@ static void gaudi_init_range_registers_hbw(struct hl_device *hdev) * 6th range is the host */ - for (i = 0 ; i < GAUDI_NUMBER_OF_RR_REGS ; i++) { + for (i = 0 ; i < GAUDI_NUMBER_OF_HBW_RR_REGS ; i++) { WREG32(gaudi_rr_hbw_hit_aw_regs[i], 0x1F); WREG32(gaudi_rr_hbw_hit_ar_regs[i], 0x1D); } - for (i = 0 ; i < GAUDI_NUMBER_OF_RR_REGS ; i++) { + for (i = 0 ; i < GAUDI_NUMBER_OF_HBW_RR_REGS ; i++) { WREG32(gaudi_rr_hbw_base_low_aw_regs[i], dram_addr_lo); WREG32(gaudi_rr_hbw_base_low_ar_regs[i], dram_addr_lo); diff --git a/drivers/misc/lattice-ecp3-config.c b/drivers/misc/lattice-ecp3-config.c index 5eaf74447ca1e48764b446512e42d538579874eb..556bb7d705f532c7465917bc35de01187d263912 100644 --- a/drivers/misc/lattice-ecp3-config.c +++ b/drivers/misc/lattice-ecp3-config.c @@ -76,12 +76,12 @@ static void firmware_load(const struct firmware *fw, void *context) if (fw == NULL) { dev_err(&spi->dev, "Cannot load firmware, aborting\n"); - return; + goto out; } if (fw->size == 0) { dev_err(&spi->dev, "Error: Firmware size is 0!\n"); - return; + goto out; } /* Fill dummy data (24 stuffing bits for commands) */ @@ -103,7 +103,7 @@ static void firmware_load(const struct firmware *fw, void *context) dev_err(&spi->dev, "Error: No supported FPGA detected (JEDEC_ID=%08x)!\n", jedec_id); - return; + goto out; } dev_info(&spi->dev, "FPGA %s detected\n", ecp3_dev[i].name); @@ -116,7 +116,7 @@ static void firmware_load(const struct firmware *fw, void *context) buffer = kzalloc(fw->size + 8, GFP_KERNEL); if (!buffer) { dev_err(&spi->dev, "Error: Can't allocate memory!\n"); - return; + goto out; } /* @@ -155,7 +155,7 @@ static void firmware_load(const struct firmware *fw, void *context) "Error: Timeout waiting for FPGA to clear (status=%08x)!\n", status); kfree(buffer); - return; + goto out; } dev_info(&spi->dev, "Configuring the FPGA...\n"); @@ -181,7 +181,7 @@ static void firmware_load(const struct firmware *fw, void *context) release_firmware(fw); kfree(buffer); - +out: complete(&data->fw_loaded); } diff --git a/drivers/misc/lkdtm/Makefile b/drivers/misc/lkdtm/Makefile index 101cdfc11247a8fcbc3983fad171d88f86d1689a..a191f51752919bda44f6ac7795b759623083b571 100644 --- a/drivers/misc/lkdtm/Makefile +++ b/drivers/misc/lkdtm/Makefile @@ -18,7 +18,7 @@ CFLAGS_REMOVE_rodata.o += $(CC_FLAGS_LTO) OBJCOPYFLAGS := OBJCOPYFLAGS_rodata_objcopy.o := \ - --rename-section .noinstr.text=.rodata,alloc,readonly,load + --rename-section .noinstr.text=.rodata,alloc,readonly,load,contents targets += rodata.o rodata_objcopy.o $(obj)/rodata_objcopy.o: $(obj)/rodata.o FORCE $(call if_changed,objcopy) diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h index cb34925e10f15d6cd65b79b48c4870257b9e6189..67bb6a25fd0a020c55c310fcd171b1fc566982f9 100644 --- a/drivers/misc/mei/hw-me-regs.h +++ b/drivers/misc/mei/hw-me-regs.h @@ -92,6 +92,7 @@ #define MEI_DEV_ID_CDF 0x18D3 /* Cedar Fork */ #define MEI_DEV_ID_ICP_LP 0x34E0 /* Ice Lake Point LP */ +#define MEI_DEV_ID_ICP_N 0x38E0 /* Ice Lake Point N */ #define MEI_DEV_ID_JSP_N 0x4DE0 /* Jasper Lake Point N */ diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c index c3393b383e5989bf6ed3be487ba219a8b7fcaeb2..3a45aaf002ac8523e3955e23e21603acfffeb280 100644 --- a/drivers/misc/mei/pci-me.c +++ b/drivers/misc/mei/pci-me.c @@ -96,6 +96,7 @@ static const struct pci_device_id mei_me_pci_tbl[] = { {MEI_PCI_DEVICE(MEI_DEV_ID_CMP_H_3, MEI_ME_PCH8_ITOUCH_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_ICP_LP, MEI_ME_PCH12_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_ICP_N, MEI_ME_PCH12_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_TGP_LP, MEI_ME_PCH15_CFG)}, {MEI_PCI_DEVICE(MEI_DEV_ID_TGP_H, MEI_ME_PCH15_SPS_CFG)}, diff --git a/drivers/misc/open-dice.c b/drivers/misc/open-dice.c new file mode 100644 index 0000000000000000000000000000000000000000..c61be3404c6f283e8e3408edc3d35dd064e5d859 --- /dev/null +++ b/drivers/misc/open-dice.c @@ -0,0 +1,208 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2021 - Google LLC + * Author: David Brazdil + * + * Driver for Open Profile for DICE. + * + * This driver takes ownership of a reserved memory region containing data + * generated by the Open Profile for DICE measured boot protocol. The memory + * contents are not interpreted by the kernel but can be mapped into a userspace + * process via a misc device. Userspace can also request a wipe of the memory. + * + * Userspace can access the data with (w/o error handling): + * + * fd = open("/dev/open-dice0", O_RDWR); + * read(fd, &size, sizeof(unsigned long)); + * data = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0); + * write(fd, NULL, 0); // wipe + * close(fd); + */ + +#include +#include +#include +#include +#include +#include + +#define DRIVER_NAME "open-dice" + +struct open_dice_drvdata { + struct mutex lock; + char name[16]; + struct reserved_mem *rmem; + struct miscdevice misc; +}; + +static inline struct open_dice_drvdata *to_open_dice_drvdata(struct file *filp) +{ + return container_of(filp->private_data, struct open_dice_drvdata, misc); +} + +static int open_dice_wipe(struct open_dice_drvdata *drvdata) +{ + void *kaddr; + + mutex_lock(&drvdata->lock); + kaddr = devm_memremap(drvdata->misc.this_device, drvdata->rmem->base, + drvdata->rmem->size, MEMREMAP_WC); + if (IS_ERR(kaddr)) { + mutex_unlock(&drvdata->lock); + return PTR_ERR(kaddr); + } + + memset(kaddr, 0, drvdata->rmem->size); + devm_memunmap(drvdata->misc.this_device, kaddr); + mutex_unlock(&drvdata->lock); + return 0; +} + +/* + * Copies the size of the reserved memory region to the user-provided buffer. + */ +static ssize_t open_dice_read(struct file *filp, char __user *ptr, size_t len, + loff_t *off) +{ + unsigned long val = to_open_dice_drvdata(filp)->rmem->size; + + return simple_read_from_buffer(ptr, len, off, &val, sizeof(val)); +} + +/* + * Triggers a wipe of the reserved memory region. The user-provided pointer + * is never dereferenced. + */ +static ssize_t open_dice_write(struct file *filp, const char __user *ptr, + size_t len, loff_t *off) +{ + if (open_dice_wipe(to_open_dice_drvdata(filp))) + return -EIO; + + /* Consume the input buffer. */ + return len; +} + +/* + * Creates a mapping of the reserved memory region in user address space. + */ +static int open_dice_mmap(struct file *filp, struct vm_area_struct *vma) +{ + struct open_dice_drvdata *drvdata = to_open_dice_drvdata(filp); + + /* Do not allow userspace to modify the underlying data. */ + if ((vma->vm_flags & VM_WRITE) && (vma->vm_flags & VM_SHARED)) + return -EPERM; + + /* Ensure userspace cannot acquire VM_WRITE + VM_SHARED later. */ + if (vma->vm_flags & VM_WRITE) + vma->vm_flags &= ~VM_MAYSHARE; + else if (vma->vm_flags & VM_SHARED) + vma->vm_flags &= ~VM_MAYWRITE; + + /* Create write-combine mapping so all clients observe a wipe. */ + vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); + vma->vm_flags |= VM_DONTCOPY | VM_DONTDUMP; + return vm_iomap_memory(vma, drvdata->rmem->base, drvdata->rmem->size); +} + +static const struct file_operations open_dice_fops = { + .owner = THIS_MODULE, + .read = open_dice_read, + .write = open_dice_write, + .mmap = open_dice_mmap, +}; + +static int __init open_dice_probe(struct platform_device *pdev) +{ + static unsigned int dev_idx; + struct device *dev = &pdev->dev; + struct reserved_mem *rmem; + struct open_dice_drvdata *drvdata; + int ret; + + rmem = of_reserved_mem_lookup(dev->of_node); + if (!rmem) { + dev_err(dev, "failed to lookup reserved memory\n"); + return -EINVAL; + } + + if (!rmem->size || (rmem->size > ULONG_MAX)) { + dev_err(dev, "invalid memory region size\n"); + return -EINVAL; + } + + if (!PAGE_ALIGNED(rmem->base) || !PAGE_ALIGNED(rmem->size)) { + dev_err(dev, "memory region must be page-aligned\n"); + return -EINVAL; + } + + drvdata = devm_kmalloc(dev, sizeof(*drvdata), GFP_KERNEL); + if (!drvdata) + return -ENOMEM; + + *drvdata = (struct open_dice_drvdata){ + .lock = __MUTEX_INITIALIZER(drvdata->lock), + .rmem = rmem, + .misc = (struct miscdevice){ + .parent = dev, + .name = drvdata->name, + .minor = MISC_DYNAMIC_MINOR, + .fops = &open_dice_fops, + .mode = 0600, + }, + }; + + /* Index overflow check not needed, misc_register() will fail. */ + snprintf(drvdata->name, sizeof(drvdata->name), DRIVER_NAME"%u", dev_idx++); + + ret = misc_register(&drvdata->misc); + if (ret) { + dev_err(dev, "failed to register misc device '%s': %d\n", + drvdata->name, ret); + return ret; + } + + platform_set_drvdata(pdev, drvdata); + return 0; +} + +static int open_dice_remove(struct platform_device *pdev) +{ + struct open_dice_drvdata *drvdata = platform_get_drvdata(pdev); + + misc_deregister(&drvdata->misc); + return 0; +} + +static const struct of_device_id open_dice_of_match[] = { + { .compatible = "google,open-dice" }, + {}, +}; + +static struct platform_driver open_dice_driver = { + .remove = open_dice_remove, + .driver = { + .name = DRIVER_NAME, + .of_match_table = open_dice_of_match, + }, +}; + +static int __init open_dice_init(void) +{ + int ret = platform_driver_probe(&open_dice_driver, open_dice_probe); + + /* DICE regions are optional. Succeed even with zero instances. */ + return (ret == -ENODEV) ? 0 : ret; +} + +static void __exit open_dice_exit(void) +{ + platform_driver_unregister(&open_dice_driver); +} + +module_init(open_dice_init); +module_exit(open_dice_exit); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("David Brazdil "); diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index 74e34cd14160c4ac25baa04cda431b693b4d79cb..14dddf05f0ac303cb8b39c83bc37ce06a89471ac 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -1652,31 +1652,31 @@ static void mmc_blk_read_single(struct mmc_queue *mq, struct request *req) struct mmc_card *card = mq->card; struct mmc_host *host = card->host; blk_status_t error = BLK_STS_OK; - int retries = 0; do { u32 status; int err; + int retries = 0; - mmc_blk_rw_rq_prep(mqrq, card, 1, mq); + while (retries++ <= MMC_READ_SINGLE_RETRIES) { + mmc_blk_rw_rq_prep(mqrq, card, 1, mq); - mmc_wait_for_req(host, mrq); + mmc_wait_for_req(host, mrq); - err = mmc_send_status(card, &status); - if (err) - goto error_exit; - - if (!mmc_host_is_spi(host) && - !mmc_ready_for_data(status)) { - err = mmc_blk_fix_state(card, req); + err = mmc_send_status(card, &status); if (err) goto error_exit; - } - if (mrq->cmd->error && retries++ < MMC_READ_SINGLE_RETRIES) - continue; + if (!mmc_host_is_spi(host) && + !mmc_ready_for_data(status)) { + err = mmc_blk_fix_state(card, req); + if (err) + goto error_exit; + } - retries = 0; + if (!mrq->cmd->error) + break; + } if (mrq->cmd->error || mrq->data->error || diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c index f086ce1ac9bf730d8073f182d4e2741281f41e6c..9c09d0254470fc719c4deeaed9fd05db9704da9e 100644 --- a/drivers/mmc/core/core.c +++ b/drivers/mmc/core/core.c @@ -2330,7 +2330,7 @@ void mmc_start_host(struct mmc_host *host) _mmc_detect_change(host, 0, false); } -void mmc_stop_host(struct mmc_host *host) +void __mmc_stop_host(struct mmc_host *host) { if (host->slot.cd_irq >= 0) { mmc_gpio_set_cd_wake(host, false); @@ -2339,6 +2339,11 @@ void mmc_stop_host(struct mmc_host *host) host->rescan_disable = 1; cancel_delayed_work_sync(&host->detect); +} + +void mmc_stop_host(struct mmc_host *host) +{ + __mmc_stop_host(host); /* clear pm flags now and let card drivers set them as needed */ host->pm_flags = 0; diff --git a/drivers/mmc/core/core.h b/drivers/mmc/core/core.h index dbe7ee457bed49afb79c81da57d966bfd723ec4d..e7720d5dc8b65b8bcea7c3ae8e490b3048d8f68f 100644 --- a/drivers/mmc/core/core.h +++ b/drivers/mmc/core/core.h @@ -71,6 +71,7 @@ static inline void mmc_delay(unsigned int ms) void mmc_rescan(struct work_struct *work); void mmc_start_host(struct mmc_host *host); +void __mmc_stop_host(struct mmc_host *host); void mmc_stop_host(struct mmc_host *host); void _mmc_detect_change(struct mmc_host *host, unsigned long delay, diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c index 6a6e050133a56ca11cb3e123171bccbe73af85fd..d0b71aecaee86d396a430488bd83929daf17401c 100644 --- a/drivers/mmc/core/host.c +++ b/drivers/mmc/core/host.c @@ -80,9 +80,18 @@ static void mmc_host_classdev_release(struct device *dev) kfree(host); } +static int mmc_host_classdev_shutdown(struct device *dev) +{ + struct mmc_host *host = cls_dev_to_mmc_host(dev); + + __mmc_stop_host(host); + return 0; +} + static struct class mmc_host_class = { .name = "mmc_host", .dev_release = mmc_host_classdev_release, + .shutdown_pre = mmc_host_classdev_shutdown, .pm = MMC_HOST_CLASS_DEV_PM_OPS, }; diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c index 1b0853a82189aceefcd70a2e491fc0ac1ddcf183..99a4ce68d82f1e2c753745a86027526c475dd317 100644 --- a/drivers/mmc/core/sdio.c +++ b/drivers/mmc/core/sdio.c @@ -708,6 +708,8 @@ try_again: if (host->ops->init_card) host->ops->init_card(host, card); + card->ocr = ocr_card; + /* * If the host and card support UHS-I mode request the card * to switch to 1.8V signaling level. No 1.8v signalling if @@ -820,7 +822,7 @@ try_again: goto mismatch; } } - card->ocr = ocr_card; + mmc_fixup_device(card, sdio_fixup_methods); if (card->type == MMC_TYPE_SD_COMBO) { diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig index 4f8ff5a690fba43ddaa2ac846816fa59ec35eb99..9daef0237382adb4357ccc2c3689b2287047c2b7 100644 --- a/drivers/mmc/host/Kconfig +++ b/drivers/mmc/host/Kconfig @@ -503,7 +503,7 @@ config MMC_OMAP_HS config MMC_WBSD tristate "Winbond W83L51xD SD/MMC Card Interface support" - depends on ISA_DMA_API + depends on ISA_DMA_API && !M68K help This selects the Winbond(R) W83L51xD Secure digital and Multimedia card Interface. diff --git a/drivers/mmc/host/cqhci-core.c b/drivers/mmc/host/cqhci-core.c index 93b0432bb60111337cc754fc8c36c8c07e98959e..e3d6de1d86efcf99eb00960c13a6921b66c88031 100644 --- a/drivers/mmc/host/cqhci-core.c +++ b/drivers/mmc/host/cqhci-core.c @@ -277,6 +277,9 @@ static void __cqhci_enable(struct cqhci_host *cq_host) cqhci_writel(cq_host, cqcfg, CQHCI_CFG); + if (cqhci_readl(cq_host, CQHCI_CTL) & CQHCI_HALT) + cqhci_writel(cq_host, 0, CQHCI_CTL); + mmc->cqe_on = true; if (cq_host->ops->enable) diff --git a/drivers/mmc/host/dw_mmc-exynos.c b/drivers/mmc/host/dw_mmc-exynos.c index 0c75810812a0a88efb77ea2c81c42a85529c9926..1f8a3c0ddfe11896a112e246eba7fde54dab46ce 100644 --- a/drivers/mmc/host/dw_mmc-exynos.c +++ b/drivers/mmc/host/dw_mmc-exynos.c @@ -464,6 +464,18 @@ static s8 dw_mci_exynos_get_best_clksmpl(u8 candiates) } } + /* + * If there is no cadiates value, then it needs to return -EIO. + * If there are candiates values and don't find bset clk sample value, + * then use a first candiates clock sample value. + */ + for (i = 0; i < iter; i++) { + __c = ror8(candiates, i); + if ((__c & 0x1) == 0x1) { + loc = i; + goto out; + } + } out: return loc; } @@ -494,6 +506,8 @@ static int dw_mci_exynos_execute_tuning(struct dw_mci_slot *slot, u32 opcode) priv->tuned_sample = found; } else { ret = -EIO; + dev_warn(&mmc->class_dev, + "There is no candiates value about clksmpl!\n"); } return ret; diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c index 7f90326b1be505474f6d2660d3dc335cec11ad4e..a6170f80ba64d25880b687c87e83daaa824feebd 100644 --- a/drivers/mmc/host/dw_mmc.c +++ b/drivers/mmc/host/dw_mmc.c @@ -2014,7 +2014,8 @@ static void dw_mci_tasklet_func(unsigned long priv) * delayed. Allowing the transfer to take place * avoids races and keeps things simple. */ - if (err != -ETIMEDOUT) { + if (err != -ETIMEDOUT && + host->dir_status == DW_MCI_RECV_STATUS) { state = STATE_SENDING_DATA; continue; } diff --git a/drivers/mmc/host/meson-gx-mmc.c b/drivers/mmc/host/meson-gx-mmc.c index d3f40c9a8c6c8c1d2ec219f20331017b5b964c27..091e0e051d109609f363bb102f806e4f6c28996a 100644 --- a/drivers/mmc/host/meson-gx-mmc.c +++ b/drivers/mmc/host/meson-gx-mmc.c @@ -173,6 +173,8 @@ struct meson_host { int irq; bool vqmmc_enabled; + bool needs_pre_post_req; + }; #define CMD_CFG_LENGTH_MASK GENMASK(8, 0) @@ -652,6 +654,8 @@ static void meson_mmc_request_done(struct mmc_host *mmc, struct meson_host *host = mmc_priv(mmc); host->cmd = NULL; + if (host->needs_pre_post_req) + meson_mmc_post_req(mmc, mrq, 0); mmc_request_done(host->mmc, mrq); } @@ -735,7 +739,7 @@ static void meson_mmc_desc_chain_transfer(struct mmc_host *mmc, u32 cmd_cfg) writel(start, host->regs + SD_EMMC_START); } -/* local sg copy to buffer version with _to/fromio usage for dram_access_quirk */ +/* local sg copy for dram_access_quirk */ static void meson_mmc_copy_buffer(struct meson_host *host, struct mmc_data *data, size_t buflen, bool to_buffer) { @@ -753,21 +757,27 @@ static void meson_mmc_copy_buffer(struct meson_host *host, struct mmc_data *data sg_miter_start(&miter, sgl, nents, sg_flags); while ((offset < buflen) && sg_miter_next(&miter)) { - unsigned int len; + unsigned int buf_offset = 0; + unsigned int len, left; + u32 *buf = miter.addr; len = min(miter.length, buflen - offset); + left = len; - /* When dram_access_quirk, the bounce buffer is a iomem mapping */ - if (host->dram_access_quirk) { - if (to_buffer) - memcpy_toio(host->bounce_iomem_buf + offset, miter.addr, len); - else - memcpy_fromio(miter.addr, host->bounce_iomem_buf + offset, len); + if (to_buffer) { + do { + writel(*buf++, host->bounce_iomem_buf + offset + buf_offset); + + buf_offset += 4; + left -= 4; + } while (left); } else { - if (to_buffer) - memcpy(host->bounce_buf + offset, miter.addr, len); - else - memcpy(miter.addr, host->bounce_buf + offset, len); + do { + *buf++ = readl(host->bounce_iomem_buf + offset + buf_offset); + + buf_offset += 4; + left -= 4; + } while (left); } offset += len; @@ -819,7 +829,11 @@ static void meson_mmc_start_cmd(struct mmc_host *mmc, struct mmc_command *cmd) if (data->flags & MMC_DATA_WRITE) { cmd_cfg |= CMD_CFG_DATA_WR; WARN_ON(xfer_bytes > host->bounce_buf_size); - meson_mmc_copy_buffer(host, data, xfer_bytes, true); + if (host->dram_access_quirk) + meson_mmc_copy_buffer(host, data, xfer_bytes, true); + else + sg_copy_to_buffer(data->sg, data->sg_len, + host->bounce_buf, xfer_bytes); dma_wmb(); } @@ -838,28 +852,56 @@ static void meson_mmc_start_cmd(struct mmc_host *mmc, struct mmc_command *cmd) writel(cmd->arg, host->regs + SD_EMMC_CMD_ARG); } +static int meson_mmc_validate_dram_access(struct mmc_host *mmc, struct mmc_data *data) +{ + struct scatterlist *sg; + int i; + + /* Reject request if any element offset or size is not 32bit aligned */ + for_each_sg(data->sg, sg, data->sg_len, i) { + if (!IS_ALIGNED(sg->offset, sizeof(u32)) || + !IS_ALIGNED(sg->length, sizeof(u32))) { + dev_err(mmc_dev(mmc), "unaligned sg offset %u len %u\n", + data->sg->offset, data->sg->length); + return -EINVAL; + } + } + + return 0; +} + static void meson_mmc_request(struct mmc_host *mmc, struct mmc_request *mrq) { struct meson_host *host = mmc_priv(mmc); - bool needs_pre_post_req = mrq->data && + host->needs_pre_post_req = mrq->data && !(mrq->data->host_cookie & SD_EMMC_PRE_REQ_DONE); - if (needs_pre_post_req) { + /* + * The memory at the end of the controller used as bounce buffer for + * the dram_access_quirk only accepts 32bit read/write access, + * check the aligment and length of the data before starting the request. + */ + if (host->dram_access_quirk && mrq->data) { + mrq->cmd->error = meson_mmc_validate_dram_access(mmc, mrq->data); + if (mrq->cmd->error) { + mmc_request_done(mmc, mrq); + return; + } + } + + if (host->needs_pre_post_req) { meson_mmc_get_transfer_mode(mmc, mrq); if (!meson_mmc_desc_chain_mode(mrq->data)) - needs_pre_post_req = false; + host->needs_pre_post_req = false; } - if (needs_pre_post_req) + if (host->needs_pre_post_req) meson_mmc_pre_req(mmc, mrq); /* Stop execution */ writel(0, host->regs + SD_EMMC_START); meson_mmc_start_cmd(mmc, mrq->sbc ?: mrq->cmd); - - if (needs_pre_post_req) - meson_mmc_post_req(mmc, mrq, 0); } static void meson_mmc_read_resp(struct mmc_host *mmc, struct mmc_command *cmd) @@ -988,7 +1030,11 @@ static irqreturn_t meson_mmc_irq_thread(int irq, void *dev_id) if (meson_mmc_bounce_buf_read(data)) { xfer_bytes = data->blksz * data->blocks; WARN_ON(xfer_bytes > host->bounce_buf_size); - meson_mmc_copy_buffer(host, data, xfer_bytes, false); + if (host->dram_access_quirk) + meson_mmc_copy_buffer(host, data, xfer_bytes, false); + else + sg_copy_from_buffer(data->sg, data->sg_len, + host->bounce_buf, xfer_bytes); } next_cmd = meson_mmc_get_next_command(cmd); diff --git a/drivers/mmc/host/meson-mx-sdhc-mmc.c b/drivers/mmc/host/meson-mx-sdhc-mmc.c index 7cd9c0ec2fcfe97cd42c0458d9d6f32a67953545..28aa78aa08f3f2752d3da3b2db2a51e98ab80e6a 100644 --- a/drivers/mmc/host/meson-mx-sdhc-mmc.c +++ b/drivers/mmc/host/meson-mx-sdhc-mmc.c @@ -135,6 +135,7 @@ static void meson_mx_sdhc_start_cmd(struct mmc_host *mmc, struct mmc_command *cmd) { struct meson_mx_sdhc_host *host = mmc_priv(mmc); + bool manual_stop = false; u32 ictl, send; int pack_len; @@ -172,12 +173,27 @@ static void meson_mx_sdhc_start_cmd(struct mmc_host *mmc, else /* software flush: */ ictl |= MESON_SDHC_ICTL_DATA_XFER_OK; + + /* + * Mimic the logic from the vendor driver where (only) + * SD_IO_RW_EXTENDED commands with more than one block set the + * MESON_SDHC_MISC_MANUAL_STOP bit. This fixes the firmware + * download in the brcmfmac driver for a BCM43362/1 card. + * Without this sdio_memcpy_toio() (with a size of 219557 + * bytes) times out if MESON_SDHC_MISC_MANUAL_STOP is not set. + */ + manual_stop = cmd->data->blocks > 1 && + cmd->opcode == SD_IO_RW_EXTENDED; } else { pack_len = 0; ictl |= MESON_SDHC_ICTL_RESP_OK; } + regmap_update_bits(host->regmap, MESON_SDHC_MISC, + MESON_SDHC_MISC_MANUAL_STOP, + manual_stop ? MESON_SDHC_MISC_MANUAL_STOP : 0); + if (cmd->opcode == MMC_STOP_TRANSMISSION) send |= MESON_SDHC_SEND_DATA_STOP; @@ -838,6 +854,11 @@ static int meson_mx_sdhc_probe(struct platform_device *pdev) goto err_disable_pclk; irq = platform_get_irq(pdev, 0); + if (irq < 0) { + ret = irq; + goto err_disable_pclk; + } + ret = devm_request_threaded_irq(dev, irq, meson_mx_sdhc_irq, meson_mx_sdhc_irq_thread, IRQF_ONESHOT, NULL, host); diff --git a/drivers/mmc/host/meson-mx-sdio.c b/drivers/mmc/host/meson-mx-sdio.c index 1c5299cd0cbe1333dc394e0c432a102a884616fd..264aae2a2b0cfa62bf68b6984399cc15a83fc305 100644 --- a/drivers/mmc/host/meson-mx-sdio.c +++ b/drivers/mmc/host/meson-mx-sdio.c @@ -663,6 +663,11 @@ static int meson_mx_mmc_probe(struct platform_device *pdev) } irq = platform_get_irq(pdev, 0); + if (irq < 0) { + ret = irq; + goto error_free_mmc; + } + ret = devm_request_threaded_irq(host->controller_dev, irq, meson_mx_mmc_irq, meson_mx_mmc_irq_thread, IRQF_ONESHOT, diff --git a/drivers/mmc/host/mmci_stm32_sdmmc.c b/drivers/mmc/host/mmci_stm32_sdmmc.c index fdaa11f92fe6f5c31901f85128d5b60c2191f3a5..a75d3dd34d18cb41f24f4f201613916a3110fb09 100644 --- a/drivers/mmc/host/mmci_stm32_sdmmc.c +++ b/drivers/mmc/host/mmci_stm32_sdmmc.c @@ -441,6 +441,8 @@ static int sdmmc_dlyb_phase_tuning(struct mmci_host *host, u32 opcode) return -EINVAL; } + writel_relaxed(0, dlyb->base + DLYB_CR); + phase = end_of_len - max_len / 2; sdmmc_dlyb_set_cfgr(dlyb, dlyb->unit, phase, false); diff --git a/drivers/mmc/host/moxart-mmc.c b/drivers/mmc/host/moxart-mmc.c index 2e4a7c6971dc99a82d0c1ff9e01115d1876180d3..ea67a7ef2390c834d2118f58d610be5a557ef859 100644 --- a/drivers/mmc/host/moxart-mmc.c +++ b/drivers/mmc/host/moxart-mmc.c @@ -569,37 +569,37 @@ static int moxart_probe(struct platform_device *pdev) if (!mmc) { dev_err(dev, "mmc_alloc_host failed\n"); ret = -ENOMEM; - goto out; + goto out_mmc; } ret = of_address_to_resource(node, 0, &res_mmc); if (ret) { dev_err(dev, "of_address_to_resource failed\n"); - goto out; + goto out_mmc; } irq = irq_of_parse_and_map(node, 0); if (irq <= 0) { dev_err(dev, "irq_of_parse_and_map failed\n"); ret = -EINVAL; - goto out; + goto out_mmc; } clk = devm_clk_get(dev, NULL); if (IS_ERR(clk)) { ret = PTR_ERR(clk); - goto out; + goto out_mmc; } reg_mmc = devm_ioremap_resource(dev, &res_mmc); if (IS_ERR(reg_mmc)) { ret = PTR_ERR(reg_mmc); - goto out; + goto out_mmc; } ret = mmc_of_parse(mmc); if (ret) - goto out; + goto out_mmc; host = mmc_priv(mmc); host->mmc = mmc; @@ -624,6 +624,14 @@ static int moxart_probe(struct platform_device *pdev) ret = -EPROBE_DEFER; goto out; } + if (!IS_ERR(host->dma_chan_tx)) { + dma_release_channel(host->dma_chan_tx); + host->dma_chan_tx = NULL; + } + if (!IS_ERR(host->dma_chan_rx)) { + dma_release_channel(host->dma_chan_rx); + host->dma_chan_rx = NULL; + } dev_dbg(dev, "PIO mode transfer enabled\n"); host->have_dma = false; } else { @@ -678,6 +686,11 @@ static int moxart_probe(struct platform_device *pdev) return 0; out: + if (!IS_ERR_OR_NULL(host->dma_chan_tx)) + dma_release_channel(host->dma_chan_tx); + if (!IS_ERR_OR_NULL(host->dma_chan_rx)) + dma_release_channel(host->dma_chan_rx); +out_mmc: if (mmc) mmc_free_host(mmc); return ret; @@ -690,17 +703,17 @@ static int moxart_remove(struct platform_device *pdev) dev_set_drvdata(&pdev->dev, NULL); - if (!IS_ERR(host->dma_chan_tx)) + if (!IS_ERR_OR_NULL(host->dma_chan_tx)) dma_release_channel(host->dma_chan_tx); - if (!IS_ERR(host->dma_chan_rx)) + if (!IS_ERR_OR_NULL(host->dma_chan_rx)) dma_release_channel(host->dma_chan_rx); mmc_remove_host(mmc); - mmc_free_host(mmc); writel(0, host->base + REG_INTERRUPT_MASK); writel(0, host->base + REG_POWER_CONTROL); writel(readl(host->base + REG_CLOCK_CONTROL) | CLK_OFF, host->base + REG_CLOCK_CONTROL); + mmc_free_host(mmc); return 0; } diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c index dc84e2dff408573cc6f13fc2b044fbca62e0662c..f5c965da9501386e4d3c8daa3edbf891459ccdba 100644 --- a/drivers/mmc/host/mtk-sd.c +++ b/drivers/mmc/host/mtk-sd.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -2285,6 +2286,7 @@ static void msdc_cqe_enable(struct mmc_host *mmc) static void msdc_cqe_disable(struct mmc_host *mmc, bool recovery) { struct msdc_host *host = mmc_priv(mmc); + unsigned int val = 0; /* disable cmdq irq */ sdr_clr_bits(host->base + MSDC_INTEN, MSDC_INT_CMDQ); @@ -2294,6 +2296,9 @@ static void msdc_cqe_disable(struct mmc_host *mmc, bool recovery) if (recovery) { sdr_set_field(host->base + MSDC_DMA_CTRL, MSDC_DMA_CTRL_STOP, 1); + if (WARN_ON(readl_poll_timeout(host->base + MSDC_DMA_CFG, val, + !(val & MSDC_DMA_CFG_STS), 1, 3000))) + return; msdc_reset_hw(host); } } @@ -2503,6 +2508,25 @@ static int msdc_drv_probe(struct platform_device *pdev) host->dma_mask = DMA_BIT_MASK(32); mmc_dev(mmc)->dma_mask = &host->dma_mask; + host->timeout_clks = 3 * 1048576; + host->dma.gpd = dma_alloc_coherent(&pdev->dev, + 2 * sizeof(struct mt_gpdma_desc), + &host->dma.gpd_addr, GFP_KERNEL); + host->dma.bd = dma_alloc_coherent(&pdev->dev, + MAX_BD_NUM * sizeof(struct mt_bdma_desc), + &host->dma.bd_addr, GFP_KERNEL); + if (!host->dma.gpd || !host->dma.bd) { + ret = -ENOMEM; + goto release_mem; + } + msdc_init_gpd_bd(host, &host->dma); + INIT_DELAYED_WORK(&host->req_timeout, msdc_request_timeout); + spin_lock_init(&host->lock); + + platform_set_drvdata(pdev, mmc); + msdc_ungate_clock(host); + msdc_init_hw(host); + if (mmc->caps2 & MMC_CAP2_CQE) { host->cq_host = devm_kzalloc(mmc->parent, sizeof(*host->cq_host), @@ -2523,25 +2547,6 @@ static int msdc_drv_probe(struct platform_device *pdev) mmc->max_seg_size = 64 * 1024; } - host->timeout_clks = 3 * 1048576; - host->dma.gpd = dma_alloc_coherent(&pdev->dev, - 2 * sizeof(struct mt_gpdma_desc), - &host->dma.gpd_addr, GFP_KERNEL); - host->dma.bd = dma_alloc_coherent(&pdev->dev, - MAX_BD_NUM * sizeof(struct mt_bdma_desc), - &host->dma.bd_addr, GFP_KERNEL); - if (!host->dma.gpd || !host->dma.bd) { - ret = -ENOMEM; - goto release_mem; - } - msdc_init_gpd_bd(host, &host->dma); - INIT_DELAYED_WORK(&host->req_timeout, msdc_request_timeout); - spin_lock_init(&host->lock); - - platform_set_drvdata(pdev, mmc); - msdc_ungate_clock(host); - msdc_init_hw(host); - ret = devm_request_irq(&pdev->dev, host->irq, msdc_irq, IRQF_TRIGGER_NONE, pdev->name, host); if (ret) diff --git a/drivers/mmc/host/mxs-mmc.c b/drivers/mmc/host/mxs-mmc.c index 4fbbff03137c3e19cc981384ae78c793be007805..2ec3eb651d6b513c27f45a317ace5758a7dbbf97 100644 --- a/drivers/mmc/host/mxs-mmc.c +++ b/drivers/mmc/host/mxs-mmc.c @@ -565,6 +565,11 @@ static const struct of_device_id mxs_mmc_dt_ids[] = { }; MODULE_DEVICE_TABLE(of, mxs_mmc_dt_ids); +static void mxs_mmc_regulator_disable(void *regulator) +{ + regulator_disable(regulator); +} + static int mxs_mmc_probe(struct platform_device *pdev) { const struct of_device_id *of_id = @@ -606,6 +611,11 @@ static int mxs_mmc_probe(struct platform_device *pdev) "Failed to enable vmmc regulator: %d\n", ret); goto out_mmc_free; } + + ret = devm_add_action_or_reset(&pdev->dev, mxs_mmc_regulator_disable, + reg_vmmc); + if (ret) + goto out_mmc_free; } ssp->clk = devm_clk_get(&pdev->dev, NULL); diff --git a/drivers/mmc/host/renesas_sdhi_core.c b/drivers/mmc/host/renesas_sdhi_core.c index addaaf2810e26f35f768437c050f11e2c6b7ab44..782879d46ff4849d8efb72afb2eac5d38085c4a2 100644 --- a/drivers/mmc/host/renesas_sdhi_core.c +++ b/drivers/mmc/host/renesas_sdhi_core.c @@ -660,7 +660,7 @@ static int renesas_sdhi_execute_tuning(struct mmc_host *mmc, u32 opcode) /* Issue CMD19 twice for each tap */ for (i = 0; i < 2 * priv->tap_num; i++) { - int cmd_error; + int cmd_error = 0; /* Set sampling clock position */ sd_scc_write32(host, priv, SH_MOBILE_SDHI_SCC_TAPSET, i % priv->tap_num); diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c index d28809e479628eaefa3cc0841229dcf85765ab40..a4bd85b200a3edf710da0138dd145d4ae5da5223 100644 --- a/drivers/mmc/host/sdhci-esdhc-imx.c +++ b/drivers/mmc/host/sdhci-esdhc-imx.c @@ -263,7 +263,6 @@ static struct esdhc_soc_data usdhc_imx8qxp_data = { .flags = ESDHC_FLAG_USDHC | ESDHC_FLAG_STD_TUNING | ESDHC_FLAG_HAVE_CAP1 | ESDHC_FLAG_HS200 | ESDHC_FLAG_HS400 | ESDHC_FLAG_HS400_ES - | ESDHC_FLAG_CQHCI | ESDHC_FLAG_STATE_LOST_IN_LPMODE | ESDHC_FLAG_CLK_RATE_LOST_IN_PM_RUNTIME, }; @@ -272,7 +271,6 @@ static struct esdhc_soc_data usdhc_imx8mm_data = { .flags = ESDHC_FLAG_USDHC | ESDHC_FLAG_STD_TUNING | ESDHC_FLAG_HAVE_CAP1 | ESDHC_FLAG_HS200 | ESDHC_FLAG_HS400 | ESDHC_FLAG_HS400_ES - | ESDHC_FLAG_CQHCI | ESDHC_FLAG_STATE_LOST_IN_LPMODE, }; @@ -1157,6 +1155,7 @@ static void esdhc_reset_tuning(struct sdhci_host *host) struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); struct pltfm_imx_data *imx_data = sdhci_pltfm_priv(pltfm_host); u32 ctrl; + int ret; /* Reset the tuning circuit */ if (esdhc_is_usdhc(imx_data)) { @@ -1169,7 +1168,22 @@ static void esdhc_reset_tuning(struct sdhci_host *host) } else if (imx_data->socdata->flags & ESDHC_FLAG_STD_TUNING) { ctrl = readl(host->ioaddr + SDHCI_AUTO_CMD_STATUS); ctrl &= ~ESDHC_MIX_CTRL_SMPCLK_SEL; + ctrl &= ~ESDHC_MIX_CTRL_EXE_TUNE; writel(ctrl, host->ioaddr + SDHCI_AUTO_CMD_STATUS); + /* Make sure ESDHC_MIX_CTRL_EXE_TUNE cleared */ + ret = readl_poll_timeout(host->ioaddr + SDHCI_AUTO_CMD_STATUS, + ctrl, !(ctrl & ESDHC_MIX_CTRL_EXE_TUNE), 1, 50); + if (ret == -ETIMEDOUT) + dev_warn(mmc_dev(host->mmc), + "Warning! clear execute tuning bit failed\n"); + /* + * SDHCI_INT_DATA_AVAIL is W1C bit, set this bit will clear the + * usdhc IP internal logic flag execute_tuning_with_clr_buf, which + * will finally make sure the normal data transfer logic correct. + */ + ctrl = readl(host->ioaddr + SDHCI_INT_STATUS); + ctrl |= SDHCI_INT_DATA_AVAIL; + writel(ctrl, host->ioaddr + SDHCI_INT_STATUS); } } } diff --git a/drivers/mmc/host/sdhci-of-at91.c b/drivers/mmc/host/sdhci-of-at91.c index 5564d7b23e7cd9861fc90ab0c720ddb06f6a65ef..d1a1c548c515f07a4cebce87dd0637177f39197f 100644 --- a/drivers/mmc/host/sdhci-of-at91.c +++ b/drivers/mmc/host/sdhci-of-at91.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -61,7 +62,6 @@ static void sdhci_at91_set_force_card_detect(struct sdhci_host *host) static void sdhci_at91_set_clock(struct sdhci_host *host, unsigned int clock) { u16 clk; - unsigned long timeout; host->mmc->actual_clock = 0; @@ -86,16 +86,11 @@ static void sdhci_at91_set_clock(struct sdhci_host *host, unsigned int clock) sdhci_writew(host, clk, SDHCI_CLOCK_CONTROL); /* Wait max 20 ms */ - timeout = 20; - while (!((clk = sdhci_readw(host, SDHCI_CLOCK_CONTROL)) - & SDHCI_CLOCK_INT_STABLE)) { - if (timeout == 0) { - pr_err("%s: Internal clock never stabilised.\n", - mmc_hostname(host->mmc)); - return; - } - timeout--; - mdelay(1); + if (read_poll_timeout(sdhci_readw, clk, (clk & SDHCI_CLOCK_INT_STABLE), + 1000, 20000, false, host, SDHCI_CLOCK_CONTROL)) { + pr_err("%s: Internal clock never stabilised.\n", + mmc_hostname(host->mmc)); + return; } clk |= SDHCI_CLOCK_CARD_EN; @@ -114,6 +109,7 @@ static void sdhci_at91_reset(struct sdhci_host *host, u8 mask) { struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); struct sdhci_at91_priv *priv = sdhci_pltfm_priv(pltfm_host); + unsigned int tmp; sdhci_reset(host, mask); @@ -126,6 +122,10 @@ static void sdhci_at91_reset(struct sdhci_host *host, u8 mask) sdhci_writel(host, calcr | SDMMC_CALCR_ALWYSON | SDMMC_CALCR_EN, SDMMC_CALCR); + + if (read_poll_timeout(sdhci_readl, tmp, !(tmp & SDMMC_CALCR_EN), + 10, 20000, false, host, SDMMC_CALCR)) + dev_err(mmc_dev(host->mmc), "Failed to calibrate\n"); } } diff --git a/drivers/mmc/host/sdhci-of-esdhc.c b/drivers/mmc/host/sdhci-of-esdhc.c index ab5ab969f711deecdbca38e27c758b2f2b416bd4..343648fcbc31f1b228b3f8ca4a3cea6fbd8207c6 100644 --- a/drivers/mmc/host/sdhci-of-esdhc.c +++ b/drivers/mmc/host/sdhci-of-esdhc.c @@ -524,12 +524,16 @@ static void esdhc_of_adma_workaround(struct sdhci_host *host, u32 intmask) static int esdhc_of_enable_dma(struct sdhci_host *host) { + int ret; u32 value; struct device *dev = mmc_dev(host->mmc); if (of_device_is_compatible(dev->of_node, "fsl,ls1043a-esdhc") || - of_device_is_compatible(dev->of_node, "fsl,ls1046a-esdhc")) - dma_set_mask_and_coherent(dev, DMA_BIT_MASK(40)); + of_device_is_compatible(dev->of_node, "fsl,ls1046a-esdhc")) { + ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(40)); + if (ret) + return ret; + } value = sdhci_readl(host, ESDHC_DMA_SYSCTL); diff --git a/drivers/mmc/host/sdhci-omap.c b/drivers/mmc/host/sdhci-omap.c index 7893fd3599b6120c5aaeaded46864de2dbd6db7e..53c362bb2866158568ceff4bf6d9f14d0bacf17d 100644 --- a/drivers/mmc/host/sdhci-omap.c +++ b/drivers/mmc/host/sdhci-omap.c @@ -62,6 +62,8 @@ #define SDHCI_OMAP_IE 0x234 #define INT_CC_EN BIT(0) +#define SDHCI_OMAP_ISE 0x238 + #define SDHCI_OMAP_AC12 0x23c #define AC12_V1V8_SIGEN BIT(19) #define AC12_SCLK_SEL BIT(23) @@ -113,6 +115,8 @@ struct sdhci_omap_host { u32 hctl; u32 sysctl; u32 capa; + u32 ie; + u32 ise; }; static void sdhci_omap_start_clock(struct sdhci_omap_host *omap_host); @@ -682,7 +686,8 @@ static void sdhci_omap_set_power(struct sdhci_host *host, unsigned char mode, { struct mmc_host *mmc = host->mmc; - mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, vdd); + if (!IS_ERR(mmc->supply.vmmc)) + mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, vdd); } static int sdhci_omap_enable_dma(struct sdhci_host *host) @@ -1245,14 +1250,23 @@ static void sdhci_omap_context_save(struct sdhci_omap_host *omap_host) { omap_host->con = sdhci_omap_readl(omap_host, SDHCI_OMAP_CON); omap_host->hctl = sdhci_omap_readl(omap_host, SDHCI_OMAP_HCTL); + omap_host->sysctl = sdhci_omap_readl(omap_host, SDHCI_OMAP_SYSCTL); omap_host->capa = sdhci_omap_readl(omap_host, SDHCI_OMAP_CAPA); + omap_host->ie = sdhci_omap_readl(omap_host, SDHCI_OMAP_IE); + omap_host->ise = sdhci_omap_readl(omap_host, SDHCI_OMAP_ISE); } +/* Order matters here, HCTL must be restored in two phases */ static void sdhci_omap_context_restore(struct sdhci_omap_host *omap_host) { - sdhci_omap_writel(omap_host, SDHCI_OMAP_CON, omap_host->con); sdhci_omap_writel(omap_host, SDHCI_OMAP_HCTL, omap_host->hctl); sdhci_omap_writel(omap_host, SDHCI_OMAP_CAPA, omap_host->capa); + sdhci_omap_writel(omap_host, SDHCI_OMAP_HCTL, omap_host->hctl); + + sdhci_omap_writel(omap_host, SDHCI_OMAP_SYSCTL, omap_host->sysctl); + sdhci_omap_writel(omap_host, SDHCI_OMAP_CON, omap_host->con); + sdhci_omap_writel(omap_host, SDHCI_OMAP_IE, omap_host->ie); + sdhci_omap_writel(omap_host, SDHCI_OMAP_ISE, omap_host->ise); } static int __maybe_unused sdhci_omap_suspend(struct device *dev) diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c index bf04a08eeba13424827477c48d2a8124770d4013..a78b060ce847158505ae2cc2c4c3151feee69478 100644 --- a/drivers/mmc/host/sdhci-pci-core.c +++ b/drivers/mmc/host/sdhci-pci-core.c @@ -1932,6 +1932,7 @@ static const struct pci_device_id pci_ids[] = { SDHCI_PCI_DEVICE(INTEL, JSL_SD, intel_byt_sd), SDHCI_PCI_DEVICE(INTEL, LKF_EMMC, intel_glk_emmc), SDHCI_PCI_DEVICE(INTEL, LKF_SD, intel_byt_sd), + SDHCI_PCI_DEVICE(INTEL, ADL_EMMC, intel_glk_emmc), SDHCI_PCI_DEVICE(O2, 8120, o2), SDHCI_PCI_DEVICE(O2, 8220, o2), SDHCI_PCI_DEVICE(O2, 8221, o2), diff --git a/drivers/mmc/host/sdhci-pci.h b/drivers/mmc/host/sdhci-pci.h index 8f90c4163bb5c7a1faf5705af925cc09da193181..dcd99d5057ee1ba66047bff826f7c3f6626aba2d 100644 --- a/drivers/mmc/host/sdhci-pci.h +++ b/drivers/mmc/host/sdhci-pci.h @@ -59,6 +59,7 @@ #define PCI_DEVICE_ID_INTEL_JSL_SD 0x4df8 #define PCI_DEVICE_ID_INTEL_LKF_EMMC 0x98c4 #define PCI_DEVICE_ID_INTEL_LKF_SD 0x98f8 +#define PCI_DEVICE_ID_INTEL_ADL_EMMC 0x54c4 #define PCI_DEVICE_ID_SYSKONNECT_8000 0x8000 #define PCI_DEVICE_ID_VIA_95D0 0x95d0 diff --git a/drivers/mmc/host/sdhci-tegra.c b/drivers/mmc/host/sdhci-tegra.c index 8ea9132ebca4ef9676553f47855b0db1679bd2d7..d50b691f6c44e5f5053c031056be527572b14ff3 100644 --- a/drivers/mmc/host/sdhci-tegra.c +++ b/drivers/mmc/host/sdhci-tegra.c @@ -354,23 +354,6 @@ static void tegra_sdhci_set_tap(struct sdhci_host *host, unsigned int tap) } } -static void tegra_sdhci_hs400_enhanced_strobe(struct mmc_host *mmc, - struct mmc_ios *ios) -{ - struct sdhci_host *host = mmc_priv(mmc); - u32 val; - - val = sdhci_readl(host, SDHCI_TEGRA_VENDOR_SYS_SW_CTRL); - - if (ios->enhanced_strobe) - val |= SDHCI_TEGRA_SYS_SW_CTRL_ENHANCED_STROBE; - else - val &= ~SDHCI_TEGRA_SYS_SW_CTRL_ENHANCED_STROBE; - - sdhci_writel(host, val, SDHCI_TEGRA_VENDOR_SYS_SW_CTRL); - -} - static void tegra_sdhci_reset(struct sdhci_host *host, u8 mask) { struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); @@ -791,6 +774,32 @@ static void tegra_sdhci_set_clock(struct sdhci_host *host, unsigned int clock) } } +static void tegra_sdhci_hs400_enhanced_strobe(struct mmc_host *mmc, + struct mmc_ios *ios) +{ + struct sdhci_host *host = mmc_priv(mmc); + u32 val; + + val = sdhci_readl(host, SDHCI_TEGRA_VENDOR_SYS_SW_CTRL); + + if (ios->enhanced_strobe) { + val |= SDHCI_TEGRA_SYS_SW_CTRL_ENHANCED_STROBE; + /* + * When CMD13 is sent from mmc_select_hs400es() after + * switching to HS400ES mode, the bus is operating at + * either MMC_HIGH_26_MAX_DTR or MMC_HIGH_52_MAX_DTR. + * To meet Tegra SDHCI requirement at HS400ES mode, force SDHCI + * interface clock to MMC_HS200_MAX_DTR (200 MHz) so that host + * controller CAR clock and the interface clock are rate matched. + */ + tegra_sdhci_set_clock(host, MMC_HS200_MAX_DTR); + } else { + val &= ~SDHCI_TEGRA_SYS_SW_CTRL_ENHANCED_STROBE; + } + + sdhci_writel(host, val, SDHCI_TEGRA_VENDOR_SYS_SW_CTRL); +} + static unsigned int tegra_sdhci_get_max_clock(struct sdhci_host *host) { struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index f6bb5420ff7f6818078c2733762f3df03b16e3f0..8c2db142be9274dbddb5fe7eb6c19a4da02e755c 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -773,7 +773,19 @@ static void sdhci_adma_table_pre(struct sdhci_host *host, len -= offset; } - BUG_ON(len > 65536); + /* + * The block layer forces a minimum segment size of PAGE_SIZE, + * so 'len' can be too big here if PAGE_SIZE >= 64KiB. Write + * multiple descriptors, noting that the ADMA table is sized + * for 4KiB chunks anyway, so it will be big enough. + */ + while (len > host->max_adma) { + int n = 32 * 1024; /* 32KiB*/ + + __sdhci_adma_write_desc(host, &desc, addr, n, ADMA2_TRAN_VALID); + addr += n; + len -= n; + } /* tran, valid */ if (len) @@ -2044,6 +2056,12 @@ void sdhci_set_power_noreg(struct sdhci_host *host, unsigned char mode, break; case MMC_VDD_32_33: case MMC_VDD_33_34: + /* + * 3.4 ~ 3.6V are valid only for those platforms where it's + * known that the voltage range is supported by hardware. + */ + case MMC_VDD_34_35: + case MMC_VDD_35_36: pwr = SDHCI_POWER_330; break; default: @@ -3948,6 +3966,7 @@ struct sdhci_host *sdhci_alloc_host(struct device *dev, * descriptor for each segment, plus 1 for a nop end descriptor. */ host->adma_table_cnt = SDHCI_MAX_SEGS * 2 + 1; + host->max_adma = 65536; return host; } @@ -4607,10 +4626,12 @@ int sdhci_setup_host(struct sdhci_host *host) * be larger than 64 KiB though. */ if (host->flags & SDHCI_USE_ADMA) { - if (host->quirks & SDHCI_QUIRK_BROKEN_ADMA_ZEROLEN_DESC) + if (host->quirks & SDHCI_QUIRK_BROKEN_ADMA_ZEROLEN_DESC) { + host->max_adma = 65532; /* 32-bit alignment */ mmc->max_seg_size = 65535; - else + } else { mmc->max_seg_size = 65536; + } } else { mmc->max_seg_size = mmc->max_req_size; } diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h index 15ecf856a7c7abefbdd0a3674b0a255eef78ab9a..f89cebd30d5ac77ccb09678d61046ac9e231fd06 100644 --- a/drivers/mmc/host/sdhci.h +++ b/drivers/mmc/host/sdhci.h @@ -339,7 +339,8 @@ struct sdhci_adma2_64_desc { /* * Maximum segments assuming a 512KiB maximum requisition size and a minimum - * 4KiB page size. + * 4KiB page size. Note this also allows enough for multiple descriptors in + * case of PAGE_SIZE >= 64KiB. */ #define SDHCI_MAX_SEGS 128 @@ -541,6 +542,7 @@ struct sdhci_host { unsigned int blocks; /* remaining PIO blocks */ int sg_count; /* Mapped sg entries */ + int max_adma; /* Max. length in ADMA descriptor */ void *adma_table; /* ADMA descriptor table */ void *align_buffer; /* Bounce buffer */ diff --git a/drivers/mmc/host/vub300.c b/drivers/mmc/host/vub300.c index 4950d10d3a19111b121d1fca07a2045f0c31f1a4..97beece62fec4396f28edddf278f126c2de35f76 100644 --- a/drivers/mmc/host/vub300.c +++ b/drivers/mmc/host/vub300.c @@ -576,7 +576,7 @@ static void check_vub300_port_status(struct vub300_mmc_host *vub300) GET_SYSTEM_PORT_STATUS, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0x0000, 0x0000, &vub300->system_port_status, - sizeof(vub300->system_port_status), HZ); + sizeof(vub300->system_port_status), 1000); if (sizeof(vub300->system_port_status) == retval) new_system_port_status(vub300); } @@ -1241,7 +1241,7 @@ static void __download_offload_pseudocode(struct vub300_mmc_host *vub300, SET_INTERRUPT_PSEUDOCODE, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0x0000, 0x0000, - xfer_buffer, xfer_length, HZ); + xfer_buffer, xfer_length, 1000); kfree(xfer_buffer); if (retval < 0) goto copy_error_message; @@ -1284,7 +1284,7 @@ static void __download_offload_pseudocode(struct vub300_mmc_host *vub300, SET_TRANSFER_PSEUDOCODE, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0x0000, 0x0000, - xfer_buffer, xfer_length, HZ); + xfer_buffer, xfer_length, 1000); kfree(xfer_buffer); if (retval < 0) goto copy_error_message; @@ -1991,7 +1991,7 @@ static void __set_clock_speed(struct vub300_mmc_host *vub300, u8 buf[8], usb_control_msg(vub300->udev, usb_sndctrlpipe(vub300->udev, 0), SET_CLOCK_SPEED, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, - 0x00, 0x00, buf, buf_array_size, HZ); + 0x00, 0x00, buf, buf_array_size, 1000); if (retval != 8) { dev_err(&vub300->udev->dev, "SET_CLOCK_SPEED" " %dkHz failed with retval=%d\n", kHzClock, retval); @@ -2013,14 +2013,14 @@ static void vub300_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) usb_control_msg(vub300->udev, usb_sndctrlpipe(vub300->udev, 0), SET_SD_POWER, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, - 0x0000, 0x0000, NULL, 0, HZ); + 0x0000, 0x0000, NULL, 0, 1000); /* must wait for the VUB300 u-proc to boot up */ msleep(600); } else if ((ios->power_mode == MMC_POWER_UP) && !vub300->card_powered) { usb_control_msg(vub300->udev, usb_sndctrlpipe(vub300->udev, 0), SET_SD_POWER, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, - 0x0001, 0x0000, NULL, 0, HZ); + 0x0001, 0x0000, NULL, 0, 1000); msleep(600); vub300->card_powered = 1; } else if (ios->power_mode == MMC_POWER_ON) { @@ -2275,14 +2275,14 @@ static int vub300_probe(struct usb_interface *interface, GET_HC_INF0, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0x0000, 0x0000, &vub300->hc_info, - sizeof(vub300->hc_info), HZ); + sizeof(vub300->hc_info), 1000); if (retval < 0) goto error5; retval = usb_control_msg(vub300->udev, usb_sndctrlpipe(vub300->udev, 0), SET_ROM_WAIT_STATES, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, - firmware_rom_wait_states, 0x0000, NULL, 0, HZ); + firmware_rom_wait_states, 0x0000, NULL, 0, 1000); if (retval < 0) goto error5; dev_info(&vub300->udev->dev, @@ -2297,7 +2297,7 @@ static int vub300_probe(struct usb_interface *interface, GET_SYSTEM_PORT_STATUS, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0x0000, 0x0000, &vub300->system_port_status, - sizeof(vub300->system_port_status), HZ); + sizeof(vub300->system_port_status), 1000); if (retval < 0) { goto error4; } else if (sizeof(vub300->system_port_status) == retval) { diff --git a/drivers/most/most_usb.c b/drivers/most/most_usb.c index 2640c5b326a493fff8f9b9fe0ddaf36b9e7a6e74..acabb7715b42327e5cd7a2aa70e01c161ca173b5 100644 --- a/drivers/most/most_usb.c +++ b/drivers/most/most_usb.c @@ -149,7 +149,8 @@ static inline int drci_rd_reg(struct usb_device *dev, u16 reg, u16 *buf) retval = usb_control_msg(dev, usb_rcvctrlpipe(dev, 0), DRCI_READ_REQ, req_type, 0x0000, - reg, dma_buf, sizeof(*dma_buf), 5 * HZ); + reg, dma_buf, sizeof(*dma_buf), + USB_CTRL_GET_TIMEOUT); *buf = le16_to_cpu(*dma_buf); kfree(dma_buf); @@ -176,7 +177,7 @@ static inline int drci_wr_reg(struct usb_device *dev, u16 reg, u16 data) reg, NULL, 0, - 5 * HZ); + USB_CTRL_SET_TIMEOUT); } static inline int start_sync_ep(struct usb_device *usb_dev, u16 ep) diff --git a/drivers/mtd/chips/Kconfig b/drivers/mtd/chips/Kconfig index aef14990e5f7cfc6c0e115f66a6047e03e4edf9a..19726ebd973d0bbde3d118688401baedc1169f76 100644 --- a/drivers/mtd/chips/Kconfig +++ b/drivers/mtd/chips/Kconfig @@ -55,12 +55,14 @@ choice LITTLE_ENDIAN_BYTE, if the bytes are reversed. config MTD_CFI_NOSWAP + depends on !ARCH_IXP4XX || CPU_BIG_ENDIAN bool "NO" config MTD_CFI_BE_BYTE_SWAP bool "BIG_ENDIAN_BYTE" config MTD_CFI_LE_BYTE_SWAP + depends on !ARCH_IXP4XX bool "LITTLE_ENDIAN_BYTE" endchoice diff --git a/drivers/mtd/hyperbus/rpc-if.c b/drivers/mtd/hyperbus/rpc-if.c index ecb050ba95cdff52e22469672b60e366ca265abf..dc164c18f8429fe5d632b264084af0bc9b7e574c 100644 --- a/drivers/mtd/hyperbus/rpc-if.c +++ b/drivers/mtd/hyperbus/rpc-if.c @@ -124,7 +124,9 @@ static int rpcif_hb_probe(struct platform_device *pdev) if (!hyperbus) return -ENOMEM; - rpcif_sw_init(&hyperbus->rpc, pdev->dev.parent); + error = rpcif_sw_init(&hyperbus->rpc, pdev->dev.parent); + if (error) + return error; platform_set_drvdata(pdev, hyperbus); @@ -150,9 +152,9 @@ static int rpcif_hb_remove(struct platform_device *pdev) { struct rpcif_hyperbus *hyperbus = platform_get_drvdata(pdev); int error = hyperbus_unregister_device(&hyperbus->hbdev); - struct rpcif *rpc = dev_get_drvdata(pdev->dev.parent); - rpcif_disable_rpm(rpc); + rpcif_disable_rpm(&hyperbus->rpc); + return error; } diff --git a/drivers/mtd/maps/Kconfig b/drivers/mtd/maps/Kconfig index 6650acbc961e972089aa97fdf56f90b987075a71..fc0aaa03c524272ed065837c438b277c921b4fb0 100644 --- a/drivers/mtd/maps/Kconfig +++ b/drivers/mtd/maps/Kconfig @@ -325,7 +325,7 @@ config MTD_DC21285 config MTD_IXP4XX tristate "CFI Flash device mapped on Intel IXP4xx based systems" - depends on MTD_CFI && MTD_COMPLEX_MAPPINGS && ARCH_IXP4XX + depends on MTD_CFI && MTD_COMPLEX_MAPPINGS && ARCH_IXP4XX && MTD_CFI_ADV_OPTIONS help This enables MTD access to flash devices on platforms based on Intel's IXP4xx family of network processors such as the diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c index 1c8c40728678343146a45715eceb2ccb50a6db4c..a5197a48190256e9598cc620794694ae9587fc0e 100644 --- a/drivers/mtd/mtdcore.c +++ b/drivers/mtd/mtdcore.c @@ -721,8 +721,6 @@ int del_mtd_device(struct mtd_info *mtd) mutex_lock(&mtd_table_mutex); - debugfs_remove_recursive(mtd->dbg.dfs_dir); - if (idr_find(&mtd_idr, mtd->index) != mtd) { ret = -ENODEV; goto out_error; @@ -738,6 +736,8 @@ int del_mtd_device(struct mtd_info *mtd) mtd->index, mtd->name, mtd->usecount); ret = -EBUSY; } else { + debugfs_remove_recursive(mtd->dbg.dfs_dir); + /* Try to remove the NVMEM provider */ if (mtd->nvmem) nvmem_unregister(mtd->nvmem); diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c index 95d47422bbf2039f9e92bc7d09e08c548d7e3b0a..5725818fa199f72c056e9f1bb5be10eb2126e93a 100644 --- a/drivers/mtd/mtdpart.c +++ b/drivers/mtd/mtdpart.c @@ -313,7 +313,7 @@ static int __mtd_del_partition(struct mtd_info *mtd) if (err) return err; - list_del(&child->part.node); + list_del(&mtd->part.node); free_partition(mtd); return 0; diff --git a/drivers/mtd/nand/bbt.c b/drivers/mtd/nand/bbt.c index 044adf91385465ca73e54f53238fb68b03c8a604..64af6898131d656401a42135855c22e183df4109 100644 --- a/drivers/mtd/nand/bbt.c +++ b/drivers/mtd/nand/bbt.c @@ -123,7 +123,7 @@ int nanddev_bbt_set_block_status(struct nand_device *nand, unsigned int entry, unsigned int rbits = bits_per_block + offs - BITS_PER_LONG; pos[1] &= ~GENMASK(rbits - 1, 0); - pos[1] |= val >> rbits; + pos[1] |= val >> (bits_per_block - rbits); } return 0; diff --git a/drivers/mtd/nand/raw/ams-delta.c b/drivers/mtd/nand/raw/ams-delta.c index ff1697f899ba68df0801607dd7bf15f0017f164d..13de39aa3288f273695f6ae26c3687508a12bbf9 100644 --- a/drivers/mtd/nand/raw/ams-delta.c +++ b/drivers/mtd/nand/raw/ams-delta.c @@ -217,9 +217,8 @@ static int gpio_nand_setup_interface(struct nand_chip *this, int csline, static int gpio_nand_attach_chip(struct nand_chip *chip) { - chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT; - - if (chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN) + if (chip->ecc.engine_type == NAND_ECC_ENGINE_TYPE_SOFT && + chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN) chip->ecc.algo = NAND_ECC_ALGO_HAMMING; return 0; @@ -370,6 +369,13 @@ static int gpio_nand_probe(struct platform_device *pdev) /* Release write protection */ gpiod_set_value(priv->gpiod_nwp, 0); + /* + * This driver assumes that the default ECC engine should be TYPE_SOFT. + * Set ->engine_type before registering the NAND devices in order to + * provide a driver specific default value. + */ + this->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT; + /* Scan to find existence of the device */ err = nand_scan(this, 1); if (err) diff --git a/drivers/mtd/nand/raw/au1550nd.c b/drivers/mtd/nand/raw/au1550nd.c index 7b6b354f2d39842161acca041595faecb76cd152..48901a1b8bf52d444c64807d4121841663d5d6ae 100644 --- a/drivers/mtd/nand/raw/au1550nd.c +++ b/drivers/mtd/nand/raw/au1550nd.c @@ -238,9 +238,8 @@ static int au1550nd_exec_op(struct nand_chip *this, static int au1550nd_attach_chip(struct nand_chip *chip) { - chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT; - - if (chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN) + if (chip->ecc.engine_type == NAND_ECC_ENGINE_TYPE_SOFT && + chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN) chip->ecc.algo = NAND_ECC_ALGO_HAMMING; return 0; @@ -309,6 +308,13 @@ static int au1550nd_probe(struct platform_device *pdev) if (pd->devwidth) this->options |= NAND_BUSWIDTH_16; + /* + * This driver assumes that the default ECC engine should be TYPE_SOFT. + * Set ->engine_type before registering the NAND devices in order to + * provide a driver specific default value. + */ + this->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT; + ret = nand_scan(this, 1); if (ret) { dev_err(&pdev->dev, "NAND scan failed with %d\n", ret); diff --git a/drivers/mtd/nand/raw/brcmnand/brcmnand.c b/drivers/mtd/nand/raw/brcmnand/brcmnand.c index 909b14cc8e55cf880ebd0e040774e85a7102161c..580b91cbd18defc7a085cc9dad965c4b9b7159f4 100644 --- a/drivers/mtd/nand/raw/brcmnand/brcmnand.c +++ b/drivers/mtd/nand/raw/brcmnand/brcmnand.c @@ -2062,7 +2062,7 @@ static int brcmnand_read_by_pio(struct mtd_info *mtd, struct nand_chip *chip, mtd->oobsize / trans, host->hwcfg.sector_size_1k); - if (!ret) { + if (ret != -EBADMSG) { *err_addr = brcmnand_get_uncorrecc_addr(ctrl); if (*err_addr) diff --git a/drivers/mtd/nand/raw/davinci_nand.c b/drivers/mtd/nand/raw/davinci_nand.c index f8c36d19ab47f36aba5d2c1696bd795058bb084b..bfd3f440aca574931e30e22e96e4a6e891ea0f4b 100644 --- a/drivers/mtd/nand/raw/davinci_nand.c +++ b/drivers/mtd/nand/raw/davinci_nand.c @@ -372,17 +372,15 @@ correct: } /** - * nand_read_page_hwecc_oob_first - hw ecc, read oob first + * nand_davinci_read_page_hwecc_oob_first - Hardware ECC page read with ECC + * data read from OOB area * @chip: nand chip info structure * @buf: buffer to store read data * @oob_required: caller requires OOB data read to chip->oob_poi * @page: page number to read * - * Hardware ECC for large page chips, require OOB to be read first. For this - * ECC mode, the write_page method is re-used from ECC_HW. These methods - * read/write ECC from the OOB area, unlike the ECC_HW_SYNDROME support with - * multiple ECC steps, follows the "infix ECC" scheme and reads/writes ECC from - * the data area, by overwriting the NAND manufacturer bad block markings. + * Hardware ECC for large page chips, which requires the ECC data to be + * extracted from the OOB before the actual data is read. */ static int nand_davinci_read_page_hwecc_oob_first(struct nand_chip *chip, uint8_t *buf, @@ -394,7 +392,6 @@ static int nand_davinci_read_page_hwecc_oob_first(struct nand_chip *chip, int eccsteps = chip->ecc.steps; uint8_t *p = buf; uint8_t *ecc_code = chip->ecc.code_buf; - uint8_t *ecc_calc = chip->ecc.calc_buf; unsigned int max_bitflips = 0; /* Read the OOB area first */ @@ -402,7 +399,8 @@ static int nand_davinci_read_page_hwecc_oob_first(struct nand_chip *chip, if (ret) return ret; - ret = nand_read_page_op(chip, page, 0, NULL, 0); + /* Move read cursor to start of page */ + ret = nand_change_read_column_op(chip, 0, NULL, 0, false); if (ret) return ret; @@ -420,8 +418,6 @@ static int nand_davinci_read_page_hwecc_oob_first(struct nand_chip *chip, if (ret) return ret; - chip->ecc.calculate(chip, p, &ecc_calc[i]); - stat = chip->ecc.correct(chip, p, &ecc_code[i], NULL); if (stat == -EBADMSG && (chip->ecc.options & NAND_ECC_GENERIC_ERASED_CHECK)) { diff --git a/drivers/mtd/nand/raw/fsmc_nand.c b/drivers/mtd/nand/raw/fsmc_nand.c index ce05dd4088e9d1dff56f5d6f49f3bfbd57d507a0..663ff5300ad996049f724db1cce4167665001188 100644 --- a/drivers/mtd/nand/raw/fsmc_nand.c +++ b/drivers/mtd/nand/raw/fsmc_nand.c @@ -15,6 +15,7 @@ #include #include +#include #include #include #include @@ -93,6 +94,14 @@ #define FSMC_BUSY_WAIT_TIMEOUT (1 * HZ) +/* + * According to SPEAr300 Reference Manual (RM0082) + * TOUDEL = 7ns (Output delay from the flip-flops to the board) + * TINDEL = 5ns (Input delay from the board to the flipflop) + */ +#define TOUTDEL 7000 +#define TINDEL 5000 + struct fsmc_nand_timings { u8 tclr; u8 tar; @@ -277,7 +286,7 @@ static int fsmc_calc_timings(struct fsmc_nand_data *host, { unsigned long hclk = clk_get_rate(host->clk); unsigned long hclkn = NSEC_PER_SEC / hclk; - u32 thiz, thold, twait, tset; + u32 thiz, thold, twait, tset, twait_min; if (sdrt->tRC_min < 30000) return -EOPNOTSUPP; @@ -309,13 +318,6 @@ static int fsmc_calc_timings(struct fsmc_nand_data *host, else if (tims->thold > FSMC_THOLD_MASK) tims->thold = FSMC_THOLD_MASK; - twait = max(sdrt->tRP_min, sdrt->tWP_min); - tims->twait = DIV_ROUND_UP(twait / 1000, hclkn) - 1; - if (tims->twait == 0) - tims->twait = 1; - else if (tims->twait > FSMC_TWAIT_MASK) - tims->twait = FSMC_TWAIT_MASK; - tset = max(sdrt->tCS_min - sdrt->tWP_min, sdrt->tCEA_max - sdrt->tREA_max); tims->tset = DIV_ROUND_UP(tset / 1000, hclkn) - 1; @@ -324,6 +326,21 @@ static int fsmc_calc_timings(struct fsmc_nand_data *host, else if (tims->tset > FSMC_TSET_MASK) tims->tset = FSMC_TSET_MASK; + /* + * According to SPEAr300 Reference Manual (RM0082) which gives more + * information related to FSMSC timings than the SPEAr600 one (RM0305), + * twait >= tCEA - (tset * TCLK) + TOUTDEL + TINDEL + */ + twait_min = sdrt->tCEA_max - ((tims->tset + 1) * hclkn * 1000) + + TOUTDEL + TINDEL; + twait = max3(sdrt->tRP_min, sdrt->tWP_min, twait_min); + + tims->twait = DIV_ROUND_UP(twait / 1000, hclkn) - 1; + if (tims->twait == 0) + tims->twait = 1; + else if (tims->twait > FSMC_TWAIT_MASK) + tims->twait = FSMC_TWAIT_MASK; + return 0; } @@ -653,6 +670,9 @@ static int fsmc_exec_op(struct nand_chip *chip, const struct nand_operation *op, instr->ctx.waitrdy.timeout_ms); break; } + + if (instr->delay_ns) + ndelay(instr->delay_ns); } return ret; diff --git a/drivers/mtd/nand/raw/gpio.c b/drivers/mtd/nand/raw/gpio.c index fb7a086de35e5a31b55dbce3d072823f6d2d0069..fdf073d2e1b6c13acc16409c2cca940b8abccda0 100644 --- a/drivers/mtd/nand/raw/gpio.c +++ b/drivers/mtd/nand/raw/gpio.c @@ -163,9 +163,8 @@ static int gpio_nand_exec_op(struct nand_chip *chip, static int gpio_nand_attach_chip(struct nand_chip *chip) { - chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT; - - if (chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN) + if (chip->ecc.engine_type == NAND_ECC_ENGINE_TYPE_SOFT && + chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN) chip->ecc.algo = NAND_ECC_ALGO_HAMMING; return 0; @@ -365,6 +364,13 @@ static int gpio_nand_probe(struct platform_device *pdev) if (gpiomtd->nwp && !IS_ERR(gpiomtd->nwp)) gpiod_direction_output(gpiomtd->nwp, 1); + /* + * This driver assumes that the default ECC engine should be TYPE_SOFT. + * Set ->engine_type before registering the NAND devices in order to + * provide a driver specific default value. + */ + chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT; + ret = nand_scan(chip, 1); if (ret) goto err_wp; diff --git a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c index a6658567d55c0a637c8b484abc5f938d49728d2c..cb7631145700a56f700fa861d4ed42f88817fc50 100644 --- a/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c +++ b/drivers/mtd/nand/raw/gpmi-nand/gpmi-nand.c @@ -711,14 +711,32 @@ static void gpmi_nfc_compute_timings(struct gpmi_nand_data *this, (use_half_period ? BM_GPMI_CTRL1_HALF_PERIOD : 0); } -static void gpmi_nfc_apply_timings(struct gpmi_nand_data *this) +static int gpmi_nfc_apply_timings(struct gpmi_nand_data *this) { struct gpmi_nfc_hardware_timing *hw = &this->hw; struct resources *r = &this->resources; void __iomem *gpmi_regs = r->gpmi_regs; unsigned int dll_wait_time_us; + int ret; - clk_set_rate(r->clock[0], hw->clk_rate); + /* Clock dividers do NOT guarantee a clean clock signal on its output + * during the change of the divide factor on i.MX6Q/UL/SX. On i.MX7/8, + * all clock dividers provide these guarantee. + */ + if (GPMI_IS_MX6Q(this) || GPMI_IS_MX6SX(this)) + clk_disable_unprepare(r->clock[0]); + + ret = clk_set_rate(r->clock[0], hw->clk_rate); + if (ret) { + dev_err(this->dev, "cannot set clock rate to %lu Hz: %d\n", hw->clk_rate, ret); + return ret; + } + + if (GPMI_IS_MX6Q(this) || GPMI_IS_MX6SX(this)) { + ret = clk_prepare_enable(r->clock[0]); + if (ret) + return ret; + } writel(hw->timing0, gpmi_regs + HW_GPMI_TIMING0); writel(hw->timing1, gpmi_regs + HW_GPMI_TIMING1); @@ -737,6 +755,8 @@ static void gpmi_nfc_apply_timings(struct gpmi_nand_data *this) /* Wait for the DLL to settle. */ udelay(dll_wait_time_us); + + return 0; } static int gpmi_setup_interface(struct nand_chip *chip, int chipnr, @@ -1032,15 +1052,6 @@ static int gpmi_get_clks(struct gpmi_nand_data *this) r->clock[i] = clk; } - if (GPMI_IS_MX6(this)) - /* - * Set the default value for the gpmi clock. - * - * If you want to use the ONFI nand which is in the - * Synchronous Mode, you should change the clock as you need. - */ - clk_set_rate(r->clock[0], 22000000); - return 0; err_clock: @@ -2278,7 +2289,9 @@ static int gpmi_nfc_exec_op(struct nand_chip *chip, */ if (this->hw.must_apply_timings) { this->hw.must_apply_timings = false; - gpmi_nfc_apply_timings(this); + ret = gpmi_nfc_apply_timings(this); + if (ret) + goto out_pm; } dev_dbg(this->dev, "%s: %d instructions\n", __func__, op->ninstrs); @@ -2407,6 +2420,7 @@ unmap: this->bch = false; +out_pm: pm_runtime_mark_last_busy(this->dev); pm_runtime_put_autosuspend(this->dev); diff --git a/drivers/mtd/nand/raw/mpc5121_nfc.c b/drivers/mtd/nand/raw/mpc5121_nfc.c index bcd4a556c959c94592b897c0159f8d55691e462a..5b9271b9c32655b4d4d8f2d972c3d478d7b3b6d9 100644 --- a/drivers/mtd/nand/raw/mpc5121_nfc.c +++ b/drivers/mtd/nand/raw/mpc5121_nfc.c @@ -291,7 +291,6 @@ static int ads5121_chipselect_init(struct mtd_info *mtd) /* Control chips select signal on ADS5121 board */ static void ads5121_select_chip(struct nand_chip *nand, int chip) { - struct mtd_info *mtd = nand_to_mtd(nand); struct mpc5121_nfc_prv *prv = nand_get_controller_data(nand); u8 v; @@ -605,9 +604,8 @@ static void mpc5121_nfc_free(struct device *dev, struct mtd_info *mtd) static int mpc5121_nfc_attach_chip(struct nand_chip *chip) { - chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT; - - if (chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN) + if (chip->ecc.engine_type == NAND_ECC_ENGINE_TYPE_SOFT && + chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN) chip->ecc.algo = NAND_ECC_ALGO_HAMMING; return 0; @@ -772,6 +770,13 @@ static int mpc5121_nfc_probe(struct platform_device *op) goto error; } + /* + * This driver assumes that the default ECC engine should be TYPE_SOFT. + * Set ->engine_type before registering the NAND devices in order to + * provide a driver specific default value. + */ + chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT; + /* Detect NAND chips */ retval = nand_scan(chip, be32_to_cpup(chips_no)); if (retval) { diff --git a/drivers/mtd/nand/raw/nandsim.c b/drivers/mtd/nand/raw/nandsim.c index 9a9f1c24d83219c34b1da8f1f2b45aceceeb47f7..9e7cf9f36057981f351db48d063dbe44ac883391 100644 --- a/drivers/mtd/nand/raw/nandsim.c +++ b/drivers/mtd/nand/raw/nandsim.c @@ -2453,5 +2453,6 @@ static void __exit ns_cleanup_module(void) module_exit(ns_cleanup_module); MODULE_LICENSE ("GPL"); +MODULE_IMPORT_NS(VFS_internal_I_am_really_a_filesystem_and_am_NOT_a_driver); MODULE_AUTHOR ("Artem B. Bityuckiy"); MODULE_DESCRIPTION ("The NAND flash simulator"); diff --git a/drivers/mtd/nand/raw/orion_nand.c b/drivers/mtd/nand/raw/orion_nand.c index 66211c9311d2f3bf5e52c2b525b7494d0ffa261d..2c87c7d892058046e0aa55a50433b43840ecd7f4 100644 --- a/drivers/mtd/nand/raw/orion_nand.c +++ b/drivers/mtd/nand/raw/orion_nand.c @@ -85,9 +85,8 @@ static void orion_nand_read_buf(struct nand_chip *chip, uint8_t *buf, int len) static int orion_nand_attach_chip(struct nand_chip *chip) { - chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT; - - if (chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN) + if (chip->ecc.engine_type == NAND_ECC_ENGINE_TYPE_SOFT && + chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN) chip->ecc.algo = NAND_ECC_ALGO_HAMMING; return 0; @@ -190,6 +189,13 @@ static int __init orion_nand_probe(struct platform_device *pdev) return ret; } + /* + * This driver assumes that the default ECC engine should be TYPE_SOFT. + * Set ->engine_type before registering the NAND devices in order to + * provide a driver specific default value. + */ + nc->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT; + ret = nand_scan(nc, 1); if (ret) goto no_dev; diff --git a/drivers/mtd/nand/raw/pasemi_nand.c b/drivers/mtd/nand/raw/pasemi_nand.c index 68c08772d7c26f5e6561828763f8cca002b28897..b0ba1fdbf5f2a338c9fc5a2478ca10df9d3d48cd 100644 --- a/drivers/mtd/nand/raw/pasemi_nand.c +++ b/drivers/mtd/nand/raw/pasemi_nand.c @@ -76,9 +76,8 @@ static int pasemi_device_ready(struct nand_chip *chip) static int pasemi_attach_chip(struct nand_chip *chip) { - chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT; - - if (chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN) + if (chip->ecc.engine_type == NAND_ECC_ENGINE_TYPE_SOFT && + chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN) chip->ecc.algo = NAND_ECC_ALGO_HAMMING; return 0; @@ -155,6 +154,13 @@ static int pasemi_nand_probe(struct platform_device *ofdev) /* Enable the following for a flash based bad block table */ chip->bbt_options = NAND_BBT_USE_FLASH; + /* + * This driver assumes that the default ECC engine should be TYPE_SOFT. + * Set ->engine_type before registering the NAND devices in order to + * provide a driver specific default value. + */ + chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT; + /* Scan to find existence of the device */ err = nand_scan(chip, 1); if (err) diff --git a/drivers/mtd/nand/raw/plat_nand.c b/drivers/mtd/nand/raw/plat_nand.c index 7711e1020c21c537c8a8eb5ccd8a6ea201da270d..0ee08c42cc35bca0b705e717458ccbfd3ffec8e9 100644 --- a/drivers/mtd/nand/raw/plat_nand.c +++ b/drivers/mtd/nand/raw/plat_nand.c @@ -21,9 +21,8 @@ struct plat_nand_data { static int plat_nand_attach_chip(struct nand_chip *chip) { - chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT; - - if (chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN) + if (chip->ecc.engine_type == NAND_ECC_ENGINE_TYPE_SOFT && + chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN) chip->ecc.algo = NAND_ECC_ALGO_HAMMING; return 0; @@ -94,6 +93,13 @@ static int plat_nand_probe(struct platform_device *pdev) goto out; } + /* + * This driver assumes that the default ECC engine should be TYPE_SOFT. + * Set ->engine_type before registering the NAND devices in order to + * provide a driver specific default value. + */ + data->chip.ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT; + /* Scan to find existence of the device */ err = nand_scan(&data->chip, pdata->chip.nr_chips); if (err) diff --git a/drivers/mtd/nand/raw/qcom_nandc.c b/drivers/mtd/nand/raw/qcom_nandc.c index b99d2e9d1e2c49a86157c8e06d08df067928a87e..bb181e18c7c52b80ca2fe7fff6c03f0f70324a3e 100644 --- a/drivers/mtd/nand/raw/qcom_nandc.c +++ b/drivers/mtd/nand/raw/qcom_nandc.c @@ -2,7 +2,6 @@ /* * Copyright (c) 2016, The Linux Foundation. All rights reserved. */ - #include #include #include @@ -2968,10 +2967,6 @@ static int qcom_nandc_probe(struct platform_device *pdev) if (!nandc->base_dma) return -ENXIO; - ret = qcom_nandc_alloc(nandc); - if (ret) - goto err_nandc_alloc; - ret = clk_prepare_enable(nandc->core_clk); if (ret) goto err_core_clk; @@ -2980,6 +2975,10 @@ static int qcom_nandc_probe(struct platform_device *pdev) if (ret) goto err_aon_clk; + ret = qcom_nandc_alloc(nandc); + if (ret) + goto err_nandc_alloc; + ret = qcom_nandc_setup(nandc); if (ret) goto err_setup; @@ -2991,15 +2990,14 @@ static int qcom_nandc_probe(struct platform_device *pdev) return 0; err_setup: + qcom_nandc_unalloc(nandc); +err_nandc_alloc: clk_disable_unprepare(nandc->aon_clk); err_aon_clk: clk_disable_unprepare(nandc->core_clk); err_core_clk: - qcom_nandc_unalloc(nandc); -err_nandc_alloc: dma_unmap_resource(dev, res->start, resource_size(res), DMA_BIDIRECTIONAL, 0); - return ret; } diff --git a/drivers/mtd/nand/raw/socrates_nand.c b/drivers/mtd/nand/raw/socrates_nand.c index 70f8305c9b6e11a56c887112e61615a9452fa6c8..fb39cc7ebce03686af4d330df362719ade27a900 100644 --- a/drivers/mtd/nand/raw/socrates_nand.c +++ b/drivers/mtd/nand/raw/socrates_nand.c @@ -119,9 +119,8 @@ static int socrates_nand_device_ready(struct nand_chip *nand_chip) static int socrates_attach_chip(struct nand_chip *chip) { - chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT; - - if (chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN) + if (chip->ecc.engine_type == NAND_ECC_ENGINE_TYPE_SOFT && + chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN) chip->ecc.algo = NAND_ECC_ALGO_HAMMING; return 0; @@ -175,6 +174,13 @@ static int socrates_nand_probe(struct platform_device *ofdev) /* TODO: I have no idea what real delay is. */ nand_chip->legacy.chip_delay = 20; /* 20us command delay time */ + /* + * This driver assumes that the default ECC engine should be TYPE_SOFT. + * Set ->engine_type before registering the NAND devices in order to + * provide a driver specific default value. + */ + nand_chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT; + dev_set_drvdata(&ofdev->dev, host); res = nand_scan(nand_chip, 1); diff --git a/drivers/mtd/nand/raw/xway_nand.c b/drivers/mtd/nand/raw/xway_nand.c index 26751976e50264aae741942a96b4d28bb524bdd4..236fd8c5a958f76c2fd0c2518c7128d29a857617 100644 --- a/drivers/mtd/nand/raw/xway_nand.c +++ b/drivers/mtd/nand/raw/xway_nand.c @@ -148,9 +148,8 @@ static void xway_write_buf(struct nand_chip *chip, const u_char *buf, int len) static int xway_attach_chip(struct nand_chip *chip) { - chip->ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT; - - if (chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN) + if (chip->ecc.engine_type == NAND_ECC_ENGINE_TYPE_SOFT && + chip->ecc.algo == NAND_ECC_ALGO_UNKNOWN) chip->ecc.algo = NAND_ECC_ALGO_HAMMING; return 0; @@ -219,6 +218,13 @@ static int xway_nand_probe(struct platform_device *pdev) | NAND_CON_SE_P | NAND_CON_WP_P | NAND_CON_PRE_P | cs_flag, EBU_NAND_CON); + /* + * This driver assumes that the default ECC engine should be TYPE_SOFT. + * Set ->engine_type before registering the NAND devices in order to + * provide a driver specific default value. + */ + data->chip.ecc.engine_type = NAND_ECC_ENGINE_TYPE_SOFT; + /* Scan to find existence of the device */ err = nand_scan(&data->chip, 1); if (err) diff --git a/drivers/mtd/spi-nor/controllers/hisi-sfc.c b/drivers/mtd/spi-nor/controllers/hisi-sfc.c index 440fc5ae7d34cbf0727068730d25e9c48c6ec301..fd2c19a047485e3a176bb76c449529c7fe95b660 100644 --- a/drivers/mtd/spi-nor/controllers/hisi-sfc.c +++ b/drivers/mtd/spi-nor/controllers/hisi-sfc.c @@ -477,7 +477,6 @@ static int hisi_spi_nor_remove(struct platform_device *pdev) hisi_spi_nor_unregister_all(host); mutex_destroy(&host->lock); - clk_disable_unprepare(host->clk); return 0; } diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c index e85b04e9716b21523155864eed7b331651da9178..0597478129ffe177c21ff489dac358af08ae6870 100644 --- a/drivers/mtd/ubi/build.c +++ b/drivers/mtd/ubi/build.c @@ -1476,3 +1476,4 @@ MODULE_VERSION(__stringify(UBI_VERSION)); MODULE_DESCRIPTION("UBI - Unsorted Block Images"); MODULE_AUTHOR("Artem Bityutskiy"); MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(VFS_internal_I_am_really_a_filesystem_and_am_NOT_a_driver); diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index e5961582345758e208c445d4f0bd2713e3c6fc72..5be850effd29d0b590fe18812000723ff9422b77 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -149,7 +149,7 @@ config NET_FC config IFB tristate "Intermediate Functional Block support" - depends on NET_CLS_ACT + depends on NET_ACT_MIRRED || NFT_FWD_NETDEV select NET_REDIRECT help This is an intermediate driver that allows sharing of diff --git a/drivers/net/arcnet/com20020-pci.c b/drivers/net/arcnet/com20020-pci.c index eb7f76753c9c0dbdd2ca07985d9c407b433ab506..9f44e2e458df17e08b2ab9238d5dca39c52110bb 100644 --- a/drivers/net/arcnet/com20020-pci.c +++ b/drivers/net/arcnet/com20020-pci.c @@ -136,6 +136,9 @@ static int com20020pci_probe(struct pci_dev *pdev, return -ENOMEM; ci = (struct com20020_pci_card_info *)id->driver_data; + if (!ci) + return -EINVAL; + priv->ci = ci; mm = &ci->misc_map; diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index aa001b16765ae152e666fb1fa0d2ebd4ef8784e6..c2cef7ba2671969c74737d26ec3b6ea992d3b694 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -223,7 +223,7 @@ static inline int __check_agg_selection_timer(struct port *port) if (bond == NULL) return 0; - return BOND_AD_INFO(bond).agg_select_timer ? 1 : 0; + return atomic_read(&BOND_AD_INFO(bond).agg_select_timer) ? 1 : 0; } /** @@ -1003,8 +1003,8 @@ static void ad_mux_machine(struct port *port, bool *update_slave_arr) if (port->aggregator && port->aggregator->is_active && !__port_is_enabled(port)) { - __enable_port(port); + *update_slave_arr = true; } } break; @@ -1760,6 +1760,7 @@ static void ad_agg_selection_logic(struct aggregator *agg, port = port->next_port_in_aggregator) { __enable_port(port); } + *update_slave_arr = true; } } @@ -1975,7 +1976,7 @@ static void ad_marker_response_received(struct bond_marker *marker, */ void bond_3ad_initiate_agg_selection(struct bonding *bond, int timeout) { - BOND_AD_INFO(bond).agg_select_timer = timeout; + atomic_set(&BOND_AD_INFO(bond).agg_select_timer, timeout); } /** @@ -2258,6 +2259,28 @@ void bond_3ad_update_ad_actor_settings(struct bonding *bond) spin_unlock_bh(&bond->mode_lock); } +/** + * bond_agg_timer_advance - advance agg_select_timer + * @bond: bonding structure + * + * Return true when agg_select_timer reaches 0. + */ +static bool bond_agg_timer_advance(struct bonding *bond) +{ + int val, nval; + + while (1) { + val = atomic_read(&BOND_AD_INFO(bond).agg_select_timer); + if (!val) + return false; + nval = val - 1; + if (atomic_cmpxchg(&BOND_AD_INFO(bond).agg_select_timer, + val, nval) == val) + break; + } + return nval == 0; +} + /** * bond_3ad_state_machine_handler - handle state machines timeout * @work: work context to fetch bonding struct to work on from @@ -2293,9 +2316,7 @@ void bond_3ad_state_machine_handler(struct work_struct *work) if (!bond_has_slaves(bond)) goto re_arm; - /* check if agg_select_timer timer after initialize is timed out */ - if (BOND_AD_INFO(bond).agg_select_timer && - !(--BOND_AD_INFO(bond).agg_select_timer)) { + if (bond_agg_timer_advance(bond)) { slave = bond_first_slave_rcu(bond); port = slave ? &(SLAVE_AD_INFO(slave)->port) : NULL; diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index c3091e00dd5fb295315c550509050ac16849387d..0436aef9c9ef5c22e241d3f6bab9c2e194a2ec1b 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -1531,14 +1531,14 @@ void bond_alb_monitor(struct work_struct *work) struct slave *slave; if (!bond_has_slaves(bond)) { - bond_info->tx_rebalance_counter = 0; + atomic_set(&bond_info->tx_rebalance_counter, 0); bond_info->lp_counter = 0; goto re_arm; } rcu_read_lock(); - bond_info->tx_rebalance_counter++; + atomic_inc(&bond_info->tx_rebalance_counter); bond_info->lp_counter++; /* send learning packets */ @@ -1560,7 +1560,7 @@ void bond_alb_monitor(struct work_struct *work) } /* rebalance tx traffic */ - if (bond_info->tx_rebalance_counter >= BOND_TLB_REBALANCE_TICKS) { + if (atomic_read(&bond_info->tx_rebalance_counter) >= BOND_TLB_REBALANCE_TICKS) { bond_for_each_slave_rcu(bond, slave, iter) { tlb_clear_slave(bond, slave, 1); if (slave == rcu_access_pointer(bond->curr_active_slave)) { @@ -1570,7 +1570,7 @@ void bond_alb_monitor(struct work_struct *work) bond_info->unbalanced_load = 0; } } - bond_info->tx_rebalance_counter = 0; + atomic_set(&bond_info->tx_rebalance_counter, 0); } if (bond_info->rlb_enabled) { @@ -1640,7 +1640,8 @@ int bond_alb_init_slave(struct bonding *bond, struct slave *slave) tlb_init_slave(slave); /* order a rebalance ASAP */ - bond->alb_info.tx_rebalance_counter = BOND_TLB_REBALANCE_TICKS; + atomic_set(&bond->alb_info.tx_rebalance_counter, + BOND_TLB_REBALANCE_TICKS); if (bond->alb_info.rlb_enabled) bond->alb_info.rlb_rebalance = 1; @@ -1677,7 +1678,8 @@ void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char rlb_clear_slave(bond, slave); } else if (link == BOND_LINK_UP) { /* order a rebalance ASAP */ - bond_info->tx_rebalance_counter = BOND_TLB_REBALANCE_TICKS; + atomic_set(&bond_info->tx_rebalance_counter, + BOND_TLB_REBALANCE_TICKS); if (bond->alb_info.rlb_enabled) { bond->alb_info.rlb_rebalance = 1; /* If the updelay module parameter is smaller than the diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 645c7cabcbe4ddabaf9faafab26fce4455d1e340..cbeb69bca0bba2e1dc879aa9d2fd175596959411 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1061,9 +1061,6 @@ static bool bond_should_notify_peers(struct bonding *bond) slave = rcu_dereference(bond->curr_active_slave); rcu_read_unlock(); - netdev_dbg(bond->dev, "bond_should_notify_peers: slave %s\n", - slave ? slave->dev->name : "NULL"); - if (!slave || !bond->send_peer_notif || bond->send_peer_notif % max(1, bond->params.peer_notif_delay) != 0 || @@ -1071,6 +1068,9 @@ static bool bond_should_notify_peers(struct bonding *bond) test_bit(__LINK_STATE_LINKWATCH_PENDING, &slave->dev->state)) return false; + netdev_dbg(bond->dev, "bond_should_notify_peers: slave %s\n", + slave ? slave->dev->name : "NULL"); + return true; } @@ -2272,10 +2272,9 @@ static int __bond_release_one(struct net_device *bond_dev, bond_select_active_slave(bond); } - if (!bond_has_slaves(bond)) { - bond_set_carrier(bond); + bond_set_carrier(bond); + if (!bond_has_slaves(bond)) eth_hw_addr_random(bond_dev); - } unblock_netpoll_tx(); synchronize_rcu(); @@ -4562,25 +4561,39 @@ static netdev_tx_t bond_xmit_broadcast(struct sk_buff *skb, struct bonding *bond = netdev_priv(bond_dev); struct slave *slave = NULL; struct list_head *iter; + bool xmit_suc = false; + bool skb_used = false; bond_for_each_slave_rcu(bond, slave, iter) { - if (bond_is_last_slave(bond, slave)) - break; - if (bond_slave_is_up(slave) && slave->link == BOND_LINK_UP) { - struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); + struct sk_buff *skb2; + + if (!(bond_slave_is_up(slave) && slave->link == BOND_LINK_UP)) + continue; + if (bond_is_last_slave(bond, slave)) { + skb2 = skb; + skb_used = true; + } else { + skb2 = skb_clone(skb, GFP_ATOMIC); if (!skb2) { net_err_ratelimited("%s: Error: %s: skb_clone() failed\n", bond_dev->name, __func__); continue; } - bond_dev_queue_xmit(bond, skb2, slave->dev); } + + if (bond_dev_queue_xmit(bond, skb2, slave->dev) == NETDEV_TX_OK) + xmit_suc = true; } - if (slave && bond_slave_is_up(slave) && slave->link == BOND_LINK_UP) - return bond_dev_queue_xmit(bond, skb, slave->dev); - return bond_tx_drop(bond_dev, skb); + if (!skb_used) + dev_kfree_skb_any(skb); + + if (xmit_suc) + return NETDEV_TX_OK; + + atomic_long_inc(&bond_dev->tx_dropped); + return NET_XMIT_DROP; } /*------------------------- Device initialization ---------------------------*/ diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index a4e4e15f574df253751355bb9f74e12c325e6f49..fe55c81608daa4b6d0b3fbb4f456cf442cb08a95 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -1466,7 +1466,7 @@ static int bond_option_ad_actor_system_set(struct bonding *bond, mac = (u8 *)&newval->value; } - if (!is_valid_ether_addr(mac)) + if (is_multicast_ether_addr(mac)) goto err; netdev_dbg(bond->dev, "Setting ad_actor_system to %pM\n", mac); diff --git a/drivers/net/bonding/bond_sysfs_slave.c b/drivers/net/bonding/bond_sysfs_slave.c index fd07561da0348abdaf7f7d9d06e061c4d5361db2..6a6cdd0bb2585bd3f77d2cb590da9910f89f0337 100644 --- a/drivers/net/bonding/bond_sysfs_slave.c +++ b/drivers/net/bonding/bond_sysfs_slave.c @@ -108,15 +108,15 @@ static ssize_t ad_partner_oper_port_state_show(struct slave *slave, char *buf) } static SLAVE_ATTR_RO(ad_partner_oper_port_state); -static const struct slave_attribute *slave_attrs[] = { - &slave_attr_state, - &slave_attr_mii_status, - &slave_attr_link_failure_count, - &slave_attr_perm_hwaddr, - &slave_attr_queue_id, - &slave_attr_ad_aggregator_id, - &slave_attr_ad_actor_oper_port_state, - &slave_attr_ad_partner_oper_port_state, +static const struct attribute *slave_attrs[] = { + &slave_attr_state.attr, + &slave_attr_mii_status.attr, + &slave_attr_link_failure_count.attr, + &slave_attr_perm_hwaddr.attr, + &slave_attr_queue_id.attr, + &slave_attr_ad_aggregator_id.attr, + &slave_attr_ad_actor_oper_port_state.attr, + &slave_attr_ad_partner_oper_port_state.attr, NULL }; @@ -137,24 +137,10 @@ const struct sysfs_ops slave_sysfs_ops = { int bond_sysfs_slave_add(struct slave *slave) { - const struct slave_attribute **a; - int err; - - for (a = slave_attrs; *a; ++a) { - err = sysfs_create_file(&slave->kobj, &((*a)->attr)); - if (err) { - kobject_put(&slave->kobj); - return err; - } - } - - return 0; + return sysfs_create_files(&slave->kobj, slave_attrs); } void bond_sysfs_slave_del(struct slave *slave) { - const struct slave_attribute **a; - - for (a = slave_attrs; *a; ++a) - sysfs_remove_file(&slave->kobj, &((*a)->attr)); + sysfs_remove_files(&slave->kobj, slave_attrs); } diff --git a/drivers/net/can/kvaser_pciefd.c b/drivers/net/can/kvaser_pciefd.c index 99323c273aa56094b084b21be139864278af6bde..9d7445f6ef143ed751ca68ca2069f443cd7dcf28 100644 --- a/drivers/net/can/kvaser_pciefd.c +++ b/drivers/net/can/kvaser_pciefd.c @@ -248,6 +248,9 @@ MODULE_DESCRIPTION("CAN driver for Kvaser CAN/PCIe devices"); #define KVASER_PCIEFD_SPACK_EWLR BIT(23) #define KVASER_PCIEFD_SPACK_EPLR BIT(24) +/* Kvaser KCAN_EPACK second word */ +#define KVASER_PCIEFD_EPACK_DIR_TX BIT(0) + struct kvaser_pciefd; struct kvaser_pciefd_can { @@ -1285,7 +1288,10 @@ static int kvaser_pciefd_rx_error_frame(struct kvaser_pciefd_can *can, can->err_rep_cnt++; can->can.can_stats.bus_error++; - stats->rx_errors++; + if (p->header[1] & KVASER_PCIEFD_EPACK_DIR_TX) + stats->tx_errors++; + else + stats->rx_errors++; can->bec.txerr = bec.txerr; can->bec.rxerr = bec.rxerr; diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index 62bcef4bb95fdefa4f7ffb1e46dd598b6831d24e..19a7e4adb9338d945490fb68117221a23ba3529b 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -207,15 +207,15 @@ enum m_can_reg { /* Interrupts for version 3.0.x */ #define IR_ERR_LEC_30X (IR_STE | IR_FOE | IR_ACKE | IR_BE | IR_CRCE) -#define IR_ERR_BUS_30X (IR_ERR_LEC_30X | IR_WDI | IR_ELO | IR_BEU | \ - IR_BEC | IR_TOO | IR_MRAF | IR_TSW | IR_TEFL | \ - IR_RF1L | IR_RF0L) +#define IR_ERR_BUS_30X (IR_ERR_LEC_30X | IR_WDI | IR_BEU | IR_BEC | \ + IR_TOO | IR_MRAF | IR_TSW | IR_TEFL | IR_RF1L | \ + IR_RF0L) #define IR_ERR_ALL_30X (IR_ERR_STATE | IR_ERR_BUS_30X) /* Interrupts for version >= 3.1.x */ #define IR_ERR_LEC_31X (IR_PED | IR_PEA) -#define IR_ERR_BUS_31X (IR_ERR_LEC_31X | IR_WDI | IR_ELO | IR_BEU | \ - IR_BEC | IR_TOO | IR_MRAF | IR_TSW | IR_TEFL | \ - IR_RF1L | IR_RF0L) +#define IR_ERR_BUS_31X (IR_ERR_LEC_31X | IR_WDI | IR_BEU | IR_BEC | \ + IR_TOO | IR_MRAF | IR_TSW | IR_TEFL | IR_RF1L | \ + IR_RF0L) #define IR_ERR_ALL_31X (IR_ERR_STATE | IR_ERR_BUS_31X) /* Interrupt Line Select (ILS) */ @@ -752,8 +752,6 @@ static void m_can_handle_other_err(struct net_device *dev, u32 irqstatus) { if (irqstatus & IR_WDI) netdev_err(dev, "Message RAM Watchdog event due to missing READY\n"); - if (irqstatus & IR_ELO) - netdev_err(dev, "Error Logging Overflow\n"); if (irqstatus & IR_BEU) netdev_err(dev, "Bit Error Uncorrected\n"); if (irqstatus & IR_BEC) diff --git a/drivers/net/can/pch_can.c b/drivers/net/can/pch_can.c index 5c180d2f3c3c12b8858ade18dec34fe1c60cbe82..79d9abdcc65aa86eb1a9a18781ecfd47f75bdff2 100644 --- a/drivers/net/can/pch_can.c +++ b/drivers/net/can/pch_can.c @@ -692,11 +692,11 @@ static int pch_can_rx_normal(struct net_device *ndev, u32 obj_num, int quota) cf->data[i + 1] = data_reg >> 8; } - netif_receive_skb(skb); rcv_pkts++; stats->rx_packets++; quota--; stats->rx_bytes += cf->can_dlc; + netif_receive_skb(skb); pch_fifo_thresh(priv, obj_num); obj_num++; diff --git a/drivers/net/can/rcar/rcar_can.c b/drivers/net/can/rcar/rcar_can.c index 48575900adb75f68e3a427212c655e8194c07ba2..3570a4de0085aec1d863948ce3a59b8f23be1a95 100644 --- a/drivers/net/can/rcar/rcar_can.c +++ b/drivers/net/can/rcar/rcar_can.c @@ -846,10 +846,12 @@ static int __maybe_unused rcar_can_suspend(struct device *dev) struct rcar_can_priv *priv = netdev_priv(ndev); u16 ctlr; - if (netif_running(ndev)) { - netif_stop_queue(ndev); - netif_device_detach(ndev); - } + if (!netif_running(ndev)) + return 0; + + netif_stop_queue(ndev); + netif_device_detach(ndev); + ctlr = readw(&priv->regs->ctlr); ctlr |= RCAR_CAN_CTLR_CANM_HALT; writew(ctlr, &priv->regs->ctlr); @@ -868,6 +870,9 @@ static int __maybe_unused rcar_can_resume(struct device *dev) u16 ctlr; int err; + if (!netif_running(ndev)) + return 0; + err = clk_enable(priv->clk); if (err) { netdev_err(ndev, "clk_enable() failed, error %d\n", err); @@ -881,10 +886,9 @@ static int __maybe_unused rcar_can_resume(struct device *dev) writew(ctlr, &priv->regs->ctlr); priv->can.state = CAN_STATE_ERROR_ACTIVE; - if (netif_running(ndev)) { - netif_device_attach(ndev); - netif_start_queue(ndev); - } + netif_device_attach(ndev); + netif_start_queue(ndev); + return 0; } diff --git a/drivers/net/can/rcar/rcar_canfd.c b/drivers/net/can/rcar/rcar_canfd.c index de59dd6aad29918633ccfc4a237fccc26643b1d5..67f0f14e2bf4e1eadcd9409aa80d0a4ac6ece0dd 100644 --- a/drivers/net/can/rcar/rcar_canfd.c +++ b/drivers/net/can/rcar/rcar_canfd.c @@ -1598,15 +1598,15 @@ static int rcar_canfd_channel_probe(struct rcar_canfd_global *gpriv, u32 ch, netif_napi_add(ndev, &priv->napi, rcar_canfd_rx_poll, RCANFD_NAPI_WEIGHT); + spin_lock_init(&priv->tx_lock); + devm_can_led_init(ndev); + gpriv->ch[priv->channel] = priv; err = register_candev(ndev); if (err) { dev_err(&pdev->dev, "register_candev() failed, error %d\n", err); goto fail_candev; } - spin_lock_init(&priv->tx_lock); - devm_can_led_init(ndev); - gpriv->ch[priv->channel] = priv; dev_info(&pdev->dev, "device registered (channel %u)\n", priv->channel); return 0; diff --git a/drivers/net/can/sja1000/ems_pcmcia.c b/drivers/net/can/sja1000/ems_pcmcia.c index 770304eaef950386f0754c7f35854bf94005001f..80b30768d9c6c589c0bc64786dae3d204aa718eb 100644 --- a/drivers/net/can/sja1000/ems_pcmcia.c +++ b/drivers/net/can/sja1000/ems_pcmcia.c @@ -235,7 +235,12 @@ static int ems_pcmcia_add_card(struct pcmcia_device *pdev, unsigned long base) free_sja1000dev(dev); } - err = request_irq(dev->irq, &ems_pcmcia_interrupt, IRQF_SHARED, + if (!card->channels) { + err = -ENODEV; + goto failure_cleanup; + } + + err = request_irq(pdev->irq, &ems_pcmcia_interrupt, IRQF_SHARED, DRV_NAME, card); if (!err) return 0; diff --git a/drivers/net/can/sja1000/peak_pci.c b/drivers/net/can/sja1000/peak_pci.c index 4713921bd511e6b92ff707842a1d5b9e9acfce3f..c3fd7cd802a97c8048c019f497fa963e9cead9da 100644 --- a/drivers/net/can/sja1000/peak_pci.c +++ b/drivers/net/can/sja1000/peak_pci.c @@ -731,16 +731,15 @@ static void peak_pci_remove(struct pci_dev *pdev) struct net_device *prev_dev = chan->prev_dev; dev_info(&pdev->dev, "removing device %s\n", dev->name); + /* do that only for first channel */ + if (!prev_dev && chan->pciec_card) + peak_pciec_remove(chan->pciec_card); unregister_sja1000dev(dev); free_sja1000dev(dev); dev = prev_dev; - if (!dev) { - /* do that only for first channel */ - if (chan->pciec_card) - peak_pciec_remove(chan->pciec_card); + if (!dev) break; - } priv = netdev_priv(dev); chan = priv->priv; } diff --git a/drivers/net/can/softing/softing_cs.c b/drivers/net/can/softing/softing_cs.c index 2e93ee79237395e310b5f16e0ef15aa77e427111..e5c939b63fa65889e34e274aaee1426a349b056e 100644 --- a/drivers/net/can/softing/softing_cs.c +++ b/drivers/net/can/softing/softing_cs.c @@ -293,7 +293,7 @@ static int softingcs_probe(struct pcmcia_device *pcmcia) return 0; platform_failed: - kfree(dev); + platform_device_put(pdev); mem_failed: pcmcia_bad: pcmcia_failed: diff --git a/drivers/net/can/softing/softing_fw.c b/drivers/net/can/softing/softing_fw.c index ccd649a8e37bdc23de9449c42db7cdda2e714fd4..bad69a4abec1043e5d62e87725ccca07627eb8dd 100644 --- a/drivers/net/can/softing/softing_fw.c +++ b/drivers/net/can/softing/softing_fw.c @@ -565,18 +565,19 @@ int softing_startstop(struct net_device *dev, int up) if (ret < 0) goto failed; } - /* enable_error_frame */ - /* + + /* enable_error_frame + * * Error reporting is switched off at the moment since * the receiving of them is not yet 100% verified * This should be enabled sooner or later - * - if (error_reporting) { + */ + if (0 && error_reporting) { ret = softing_fct_cmd(card, 51, "enable_error_frame"); if (ret < 0) goto failed; } - */ + /* initialize interface */ iowrite16(1, &card->dpram[DPRAM_FCT_PARAM + 2]); iowrite16(1, &card->dpram[DPRAM_FCT_PARAM + 4]); diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c index 68ff931993c2557a14627c5240e70682fc5a66ec..abe00a085f6fcd872481441120aa020471c6fe80 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c @@ -1041,7 +1041,7 @@ static int mcp251xfd_chip_start(struct mcp251xfd_priv *priv) err = mcp251xfd_chip_rx_int_enable(priv); if (err) - return err; + goto out_chip_stop; err = mcp251xfd_chip_ecc_init(priv); if (err) @@ -1288,7 +1288,7 @@ mcp251xfd_tef_obj_read(const struct mcp251xfd_priv *priv, len > tx_ring->obj_num || offset + len > tx_ring->obj_num)) { netdev_err(priv->ndev, - "Trying to read to many TEF objects (max=%d, offset=%d, len=%d).\n", + "Trying to read too many TEF objects (max=%d, offset=%d, len=%d).\n", tx_ring->obj_num, offset, len); return -ERANGE; } @@ -2497,7 +2497,7 @@ static int mcp251xfd_register_chip_detect(struct mcp251xfd_priv *priv) if (!mcp251xfd_is_251X(priv) && priv->devtype_data.model != devtype_data->model) { netdev_info(ndev, - "Detected %s, but firmware specifies a %s. Fixing up.", + "Detected %s, but firmware specifies a %s. Fixing up.\n", __mcp251xfd_get_model_str(devtype_data->model), mcp251xfd_get_model_str(priv)); } @@ -2534,7 +2534,7 @@ static int mcp251xfd_register_check_rx_int(struct mcp251xfd_priv *priv) return 0; netdev_info(priv->ndev, - "RX_INT active after softreset, disabling RX_INT support."); + "RX_INT active after softreset, disabling RX_INT support.\n"); devm_gpiod_put(&priv->spi->dev, priv->rx_int); priv->rx_int = NULL; diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c index 018ca3b057a3baeb59d460385b464c5faa7b7747..e023c401f4f77387826eff3f3f71a6fb862a89f5 100644 --- a/drivers/net/can/usb/gs_usb.c +++ b/drivers/net/can/usb/gs_usb.c @@ -190,8 +190,8 @@ struct gs_can { struct gs_usb { struct gs_can *canch[GS_MAX_INTF]; struct usb_anchor rx_submitted; - atomic_t active_channels; struct usb_device *udev; + u8 active_channels; }; /* 'allocate' a tx context. @@ -320,7 +320,7 @@ static void gs_usb_receive_bulk_callback(struct urb *urb) /* device reports out of range channel id */ if (hf->channel >= GS_MAX_INTF) - goto resubmit_urb; + goto device_detach; dev = usbcan->canch[hf->channel]; @@ -405,6 +405,7 @@ static void gs_usb_receive_bulk_callback(struct urb *urb) /* USB failure take down all interfaces */ if (rc == -ENODEV) { + device_detach: for (rc = 0; rc < GS_MAX_INTF; rc++) { if (usbcan->canch[rc]) netif_device_detach(usbcan->canch[rc]->netdev); @@ -506,6 +507,8 @@ static netdev_tx_t gs_can_start_xmit(struct sk_buff *skb, hf->echo_id = idx; hf->channel = dev->channel; + hf->flags = 0; + hf->reserved = 0; cf = (struct can_frame *)skb->data; @@ -585,7 +588,7 @@ static int gs_can_open(struct net_device *netdev) if (rc) return rc; - if (atomic_add_return(1, &parent->active_channels) == 1) { + if (!parent->active_channels) { for (i = 0; i < GS_MAX_RX_URBS; i++) { struct urb *urb; u8 *buf; @@ -686,6 +689,7 @@ static int gs_can_open(struct net_device *netdev) dev->can.state = CAN_STATE_ERROR_ACTIVE; + parent->active_channels++; if (!(dev->can.ctrlmode & CAN_CTRLMODE_LISTENONLY)) netif_start_queue(netdev); @@ -701,7 +705,8 @@ static int gs_can_close(struct net_device *netdev) netif_stop_queue(netdev); /* Stop polling */ - if (atomic_dec_and_test(&parent->active_channels)) + parent->active_channels--; + if (!parent->active_channels) usb_kill_anchored_urbs(&parent->rx_submitted); /* Stop sending URBs */ @@ -980,8 +985,6 @@ static int gs_usb_probe(struct usb_interface *intf, init_usb_anchor(&dev->rx_submitted); - atomic_set(&dev->active_channels, 0); - usb_set_intfdata(intf, dev); dev->udev = interface_to_usbdev(intf); diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c b/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c index 1b9957f12459a9780ac9e94a73fa773cca6483de..8b5d1add899a60143d7d7d0563ed4b81376b90e1 100644 --- a/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c +++ b/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c @@ -28,10 +28,6 @@ #include "kvaser_usb.h" -/* Forward declaration */ -static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_dev_cfg; - -#define CAN_USB_CLOCK 8000000 #define MAX_USBCAN_NET_DEVICES 2 /* Command header size */ @@ -80,6 +76,12 @@ static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_dev_cfg; #define CMD_LEAF_LOG_MESSAGE 106 +/* Leaf frequency options */ +#define KVASER_USB_LEAF_SWOPTION_FREQ_MASK 0x60 +#define KVASER_USB_LEAF_SWOPTION_FREQ_16_MHZ_CLK 0 +#define KVASER_USB_LEAF_SWOPTION_FREQ_32_MHZ_CLK BIT(5) +#define KVASER_USB_LEAF_SWOPTION_FREQ_24_MHZ_CLK BIT(6) + /* error factors */ #define M16C_EF_ACKE BIT(0) #define M16C_EF_CRCE BIT(1) @@ -340,6 +342,50 @@ struct kvaser_usb_err_summary { }; }; +static const struct can_bittiming_const kvaser_usb_leaf_bittiming_const = { + .name = "kvaser_usb", + .tseg1_min = KVASER_USB_TSEG1_MIN, + .tseg1_max = KVASER_USB_TSEG1_MAX, + .tseg2_min = KVASER_USB_TSEG2_MIN, + .tseg2_max = KVASER_USB_TSEG2_MAX, + .sjw_max = KVASER_USB_SJW_MAX, + .brp_min = KVASER_USB_BRP_MIN, + .brp_max = KVASER_USB_BRP_MAX, + .brp_inc = KVASER_USB_BRP_INC, +}; + +static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_dev_cfg_8mhz = { + .clock = { + .freq = 8000000, + }, + .timestamp_freq = 1, + .bittiming_const = &kvaser_usb_leaf_bittiming_const, +}; + +static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_dev_cfg_16mhz = { + .clock = { + .freq = 16000000, + }, + .timestamp_freq = 1, + .bittiming_const = &kvaser_usb_leaf_bittiming_const, +}; + +static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_dev_cfg_24mhz = { + .clock = { + .freq = 24000000, + }, + .timestamp_freq = 1, + .bittiming_const = &kvaser_usb_leaf_bittiming_const, +}; + +static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_dev_cfg_32mhz = { + .clock = { + .freq = 32000000, + }, + .timestamp_freq = 1, + .bittiming_const = &kvaser_usb_leaf_bittiming_const, +}; + static void * kvaser_usb_leaf_frame_to_cmd(const struct kvaser_usb_net_priv *priv, const struct sk_buff *skb, int *frame_len, @@ -471,6 +517,27 @@ static int kvaser_usb_leaf_send_simple_cmd(const struct kvaser_usb *dev, return rc; } +static void kvaser_usb_leaf_get_software_info_leaf(struct kvaser_usb *dev, + const struct leaf_cmd_softinfo *softinfo) +{ + u32 sw_options = le32_to_cpu(softinfo->sw_options); + + dev->fw_version = le32_to_cpu(softinfo->fw_version); + dev->max_tx_urbs = le16_to_cpu(softinfo->max_outstanding_tx); + + switch (sw_options & KVASER_USB_LEAF_SWOPTION_FREQ_MASK) { + case KVASER_USB_LEAF_SWOPTION_FREQ_16_MHZ_CLK: + dev->cfg = &kvaser_usb_leaf_dev_cfg_16mhz; + break; + case KVASER_USB_LEAF_SWOPTION_FREQ_24_MHZ_CLK: + dev->cfg = &kvaser_usb_leaf_dev_cfg_24mhz; + break; + case KVASER_USB_LEAF_SWOPTION_FREQ_32_MHZ_CLK: + dev->cfg = &kvaser_usb_leaf_dev_cfg_32mhz; + break; + } +} + static int kvaser_usb_leaf_get_software_info_inner(struct kvaser_usb *dev) { struct kvaser_cmd cmd; @@ -486,14 +553,13 @@ static int kvaser_usb_leaf_get_software_info_inner(struct kvaser_usb *dev) switch (dev->card_data.leaf.family) { case KVASER_LEAF: - dev->fw_version = le32_to_cpu(cmd.u.leaf.softinfo.fw_version); - dev->max_tx_urbs = - le16_to_cpu(cmd.u.leaf.softinfo.max_outstanding_tx); + kvaser_usb_leaf_get_software_info_leaf(dev, &cmd.u.leaf.softinfo); break; case KVASER_USBCAN: dev->fw_version = le32_to_cpu(cmd.u.usbcan.softinfo.fw_version); dev->max_tx_urbs = le16_to_cpu(cmd.u.usbcan.softinfo.max_outstanding_tx); + dev->cfg = &kvaser_usb_leaf_dev_cfg_8mhz; break; } @@ -1225,24 +1291,11 @@ static int kvaser_usb_leaf_init_card(struct kvaser_usb *dev) { struct kvaser_usb_dev_card_data *card_data = &dev->card_data; - dev->cfg = &kvaser_usb_leaf_dev_cfg; card_data->ctrlmode_supported |= CAN_CTRLMODE_3_SAMPLES; return 0; } -static const struct can_bittiming_const kvaser_usb_leaf_bittiming_const = { - .name = "kvaser_usb", - .tseg1_min = KVASER_USB_TSEG1_MIN, - .tseg1_max = KVASER_USB_TSEG1_MAX, - .tseg2_min = KVASER_USB_TSEG2_MIN, - .tseg2_max = KVASER_USB_TSEG2_MAX, - .sjw_max = KVASER_USB_SJW_MAX, - .brp_min = KVASER_USB_BRP_MIN, - .brp_max = KVASER_USB_BRP_MAX, - .brp_inc = KVASER_USB_BRP_INC, -}; - static int kvaser_usb_leaf_set_bittiming(struct net_device *netdev) { struct kvaser_usb_net_priv *priv = netdev_priv(netdev); @@ -1348,11 +1401,3 @@ const struct kvaser_usb_dev_ops kvaser_usb_leaf_dev_ops = { .dev_read_bulk_callback = kvaser_usb_leaf_read_bulk_callback, .dev_frame_to_cmd = kvaser_usb_leaf_frame_to_cmd, }; - -static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_dev_cfg = { - .clock = { - .freq = CAN_USB_CLOCK, - }, - .timestamp_freq = 1, - .bittiming_const = &kvaser_usb_leaf_bittiming_const, -}; diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c index d565922838186f599ed714191d83e4cadf904a48..301a0f54fc994f5a9611194a617820796b5d573d 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c @@ -551,11 +551,10 @@ static int pcan_usb_fd_decode_status(struct pcan_usb_fd_if *usb_if, } else if (sm->channel_p_w_b & PUCAN_BUS_WARNING) { new_state = CAN_STATE_ERROR_WARNING; } else { - /* no error bit (so, no error skb, back to active state) */ - dev->can.state = CAN_STATE_ERROR_ACTIVE; + /* back to (or still in) ERROR_ACTIVE state */ + new_state = CAN_STATE_ERROR_ACTIVE; pdev->bec.txerr = 0; pdev->bec.rxerr = 0; - return 0; } /* state hasn't changed */ diff --git a/drivers/net/can/xilinx_can.c b/drivers/net/can/xilinx_can.c index 48d746e18f30219e49ed04c24165a789d0bc71fc..375998263af7a2abb70f5abca7d86c8619541346 100644 --- a/drivers/net/can/xilinx_can.c +++ b/drivers/net/can/xilinx_can.c @@ -1762,7 +1762,12 @@ static int xcan_probe(struct platform_device *pdev) spin_lock_init(&priv->tx_lock); /* Get IRQ for the device */ - ndev->irq = platform_get_irq(pdev, 0); + ret = platform_get_irq(pdev, 0); + if (ret < 0) + goto err_free; + + ndev->irq = ret; + ndev->flags |= IFF_ECHO; /* We support local echo */ platform_set_drvdata(pdev, ndev); diff --git a/drivers/net/dsa/Kconfig b/drivers/net/dsa/Kconfig index 2451f61a38e4ae3582c41fe6a313e012d87031ef..9e32ea9c116473d532784e58606f64b6839974f2 100644 --- a/drivers/net/dsa/Kconfig +++ b/drivers/net/dsa/Kconfig @@ -36,6 +36,7 @@ config NET_DSA_MT7530 tristate "MediaTek MT753x and MT7621 Ethernet switch support" depends on NET_DSA select NET_DSA_TAG_MTK + select MEDIATEK_GE_PHY help This enables support for the MediaTek MT7530, MT7531, and MT7621 Ethernet switch chips. diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 690e9d9495e7593aa2eda07ff1dae9c2049fef3a..08a675a5328d718091ee6189e186d6dd06fec394 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -504,7 +504,7 @@ static int bcm_sf2_mdio_register(struct dsa_switch *ds) get_device(&priv->master_mii_bus->dev); priv->master_mii_dn = dn; - priv->slave_mii_bus = devm_mdiobus_alloc(ds->dev); + priv->slave_mii_bus = mdiobus_alloc(); if (!priv->slave_mii_bus) { of_node_put(dn); return -ENOMEM; @@ -564,8 +564,10 @@ static int bcm_sf2_mdio_register(struct dsa_switch *ds) } err = mdiobus_register(priv->slave_mii_bus); - if (err && dn) + if (err && dn) { + mdiobus_free(priv->slave_mii_bus); of_node_put(dn); + } return err; } @@ -573,6 +575,7 @@ static int bcm_sf2_mdio_register(struct dsa_switch *ds) static void bcm_sf2_mdio_unregister(struct bcm_sf2_priv *priv) { mdiobus_unregister(priv->slave_mii_bus); + mdiobus_free(priv->slave_mii_bus); of_node_put(priv->master_mii_dn); } diff --git a/drivers/net/dsa/lan9303-core.c b/drivers/net/dsa/lan9303-core.c index dcf1fc89451f28f98f501536b1ce334f717b4d49..2044d440d7de4cc15c841c4f331a62d6d2cebc70 100644 --- a/drivers/net/dsa/lan9303-core.c +++ b/drivers/net/dsa/lan9303-core.c @@ -1305,7 +1305,7 @@ static int lan9303_probe_reset_gpio(struct lan9303 *chip, struct device_node *np) { chip->reset_gpio = devm_gpiod_get_optional(chip->dev, "reset", - GPIOD_OUT_LOW); + GPIOD_OUT_HIGH); if (IS_ERR(chip->reset_gpio)) return PTR_ERR(chip->reset_gpio); diff --git a/drivers/net/dsa/lantiq_gswip.c b/drivers/net/dsa/lantiq_gswip.c index 95e634cbc4b63a07691bcc7960501c685394e5f1..80ef7ea779545c3db1d0bfb64256a30aaac82a96 100644 --- a/drivers/net/dsa/lantiq_gswip.c +++ b/drivers/net/dsa/lantiq_gswip.c @@ -229,7 +229,7 @@ #define GSWIP_SDMA_PCTRLp(p) (0xBC0 + ((p) * 0x6)) #define GSWIP_SDMA_PCTRL_EN BIT(0) /* SDMA Port Enable */ #define GSWIP_SDMA_PCTRL_FCEN BIT(1) /* Flow Control Enable */ -#define GSWIP_SDMA_PCTRL_PAUFWD BIT(1) /* Pause Frame Forwarding */ +#define GSWIP_SDMA_PCTRL_PAUFWD BIT(3) /* Pause Frame Forwarding */ #define GSWIP_TABLE_ACTIVE_VLAN 0x01 #define GSWIP_TABLE_VLAN_MAPPING 0x02 @@ -495,8 +495,9 @@ static int gswip_mdio_rd(struct mii_bus *bus, int addr, int reg) static int gswip_mdio(struct gswip_priv *priv, struct device_node *mdio_np) { struct dsa_switch *ds = priv->ds; + int err; - ds->slave_mii_bus = devm_mdiobus_alloc(priv->dev); + ds->slave_mii_bus = mdiobus_alloc(); if (!ds->slave_mii_bus) return -ENOMEM; @@ -509,7 +510,11 @@ static int gswip_mdio(struct gswip_priv *priv, struct device_node *mdio_np) ds->slave_mii_bus->parent = priv->dev; ds->slave_mii_bus->phy_mask = ~ds->phys_mii_mask; - return of_mdiobus_register(ds->slave_mii_bus, mdio_np); + err = of_mdiobus_register(ds->slave_mii_bus, mdio_np); + if (err) + mdiobus_free(ds->slave_mii_bus); + + return err; } static int gswip_pce_table_entry_read(struct gswip_priv *priv, @@ -2086,8 +2091,10 @@ disable_switch: gswip_mdio_mask(priv, GSWIP_MDIO_GLOB_ENABLE, 0, GSWIP_MDIO_GLOB); dsa_unregister_switch(priv->ds); mdio_bus: - if (mdio_np) + if (mdio_np) { mdiobus_unregister(priv->ds->slave_mii_bus); + mdiobus_free(priv->ds->slave_mii_bus); + } put_mdio_node: of_node_put(mdio_np); for (i = 0; i < priv->num_gphy_fw; i++) @@ -2108,6 +2115,7 @@ static int gswip_remove(struct platform_device *pdev) if (priv->ds->slave_mii_bus) { mdiobus_unregister(priv->ds->slave_mii_bus); of_node_put(priv->ds->slave_mii_bus->dev.of_node); + mdiobus_free(priv->ds->slave_mii_bus); } for (i = 0; i < priv->num_gphy_fw; i++) diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index 88fa0779e0bc9278502ca8b0d84c55fded3020d4..e3c338624b95c7f842cec80f152ec4145bc86f00 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -460,8 +460,10 @@ EXPORT_SYMBOL(ksz_switch_register); void ksz_switch_remove(struct ksz_device *dev) { /* timer started */ - if (dev->mib_read_interval) + if (dev->mib_read_interval) { + dev->mib_read_interval = 0; cancel_delayed_work_sync(&dev->mib_read); + } dev->dev_ops->exit(dev); dsa_unregister_switch(dev->ds); diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c index 73de09093c350b9a7e96fa4e7a22495dc2c030db..5ee8809bc27112cfd9da1430531abfc6da73f592 100644 --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@ -981,9 +981,6 @@ mt7530_port_enable(struct dsa_switch *ds, int port, { struct mt7530_priv *priv = ds->priv; - if (!dsa_is_user_port(ds, port)) - return 0; - mutex_lock(&priv->reg_mutex); /* Allow the user port gets connected to the cpu port and also @@ -1006,9 +1003,6 @@ mt7530_port_disable(struct dsa_switch *ds, int port) { struct mt7530_priv *priv = ds->priv; - if (!dsa_is_user_port(ds, port)) - return; - mutex_lock(&priv->reg_mutex); /* Clear up all port matrix which could be restored in the next @@ -2348,7 +2342,7 @@ mt753x_phylink_validate(struct dsa_switch *ds, int port, phylink_set_port_modes(mask); - if (state->interface != PHY_INTERFACE_MODE_TRGMII || + if (state->interface != PHY_INTERFACE_MODE_TRGMII && !phy_interface_mode_is_8023z(state->interface)) { phylink_set(mask, 10baseT_Half); phylink_set(mask, 10baseT_Full); @@ -2593,7 +2587,7 @@ mt7530_probe(struct mdio_device *mdiodev) return -ENOMEM; priv->ds->dev = &mdiodev->dev; - priv->ds->num_ports = DSA_MAX_PORTS; + priv->ds->num_ports = MT7530_NUM_PORTS; /* Use medatek,mcm property to distinguish hardware type that would * casues a little bit differences on power-on sequence. diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 184cbc93328c2ce5f95aa210eb1cc8ee99558dfa..1992be77522ac0db4046e74b5e29badbba1718d9 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -726,7 +726,11 @@ static void mv88e6xxx_mac_link_down(struct dsa_switch *ds, int port, ops = chip->info->ops; mv88e6xxx_reg_lock(chip); - if ((!mv88e6xxx_port_ppu_updates(chip, port) || + /* Internal PHYs propagate their configuration directly to the MAC. + * External PHYs depend on whether the PPU is enabled for this port. + */ + if (((!mv88e6xxx_phy_is_internal(ds, port) && + !mv88e6xxx_port_ppu_updates(chip, port)) || mode == MLO_AN_FIXED) && ops->port_set_link) err = ops->port_set_link(chip, port, LINK_FORCED_DOWN); mv88e6xxx_reg_unlock(chip); @@ -749,7 +753,12 @@ static void mv88e6xxx_mac_link_up(struct dsa_switch *ds, int port, ops = chip->info->ops; mv88e6xxx_reg_lock(chip); - if (!mv88e6xxx_port_ppu_updates(chip, port) || mode == MLO_AN_FIXED) { + /* Internal PHYs propagate their configuration directly to the MAC. + * External PHYs depend on whether the PPU is enabled for this port. + */ + if ((!mv88e6xxx_phy_is_internal(ds, port) && + !mv88e6xxx_port_ppu_updates(chip, port)) || + mode == MLO_AN_FIXED) { /* FIXME: for an automedia port, should we force the link * down here - what if the link comes up due to "other" media * while we're bringing the port up, how is the exclusivity @@ -2613,8 +2622,8 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port) if (err) return err; - /* Port Control 2: don't force a good FCS, set the maximum frame size to - * 10240 bytes, disable 802.1q tags checking, don't discard tagged or + /* Port Control 2: don't force a good FCS, set the MTU size to + * 10222 bytes, disable 802.1q tags checking, don't discard tagged or * untagged frames on this port, do a destination address lookup on all * received packets as usual, disable ARP mirroring and don't send a * copy of all transmitted/received frames on this port to the CPU. @@ -2633,7 +2642,7 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port) return err; if (chip->info->ops->port_set_jumbo_size) { - err = chip->info->ops->port_set_jumbo_size(chip, port, 10240); + err = chip->info->ops->port_set_jumbo_size(chip, port, 10218); if (err) return err; } @@ -2718,10 +2727,10 @@ static int mv88e6xxx_get_max_mtu(struct dsa_switch *ds, int port) struct mv88e6xxx_chip *chip = ds->priv; if (chip->info->ops->port_set_jumbo_size) - return 10240; + return 10240 - VLAN_ETH_HLEN - EDSA_HLEN - ETH_FCS_LEN; else if (chip->info->ops->set_max_frame_size) - return 1632; - return 1522; + return 1632 - VLAN_ETH_HLEN - EDSA_HLEN - ETH_FCS_LEN; + return 1522 - VLAN_ETH_HLEN - EDSA_HLEN - ETH_FCS_LEN; } static int mv88e6xxx_change_mtu(struct dsa_switch *ds, int port, int new_mtu) @@ -2729,6 +2738,9 @@ static int mv88e6xxx_change_mtu(struct dsa_switch *ds, int port, int new_mtu) struct mv88e6xxx_chip *chip = ds->priv; int ret = 0; + if (dsa_is_dsa_port(ds, port) || dsa_is_cpu_port(ds, port)) + new_mtu += EDSA_HLEN; + mv88e6xxx_reg_lock(chip); if (chip->info->ops->port_set_jumbo_size) ret = chip->info->ops->port_set_jumbo_size(chip, port, new_mtu); @@ -3060,7 +3072,7 @@ static int mv88e6xxx_mdio_register(struct mv88e6xxx_chip *chip, return err; } - bus = devm_mdiobus_alloc_size(chip->dev, sizeof(*mdio_bus)); + bus = mdiobus_alloc_size(sizeof(*mdio_bus)); if (!bus) return -ENOMEM; @@ -3085,14 +3097,14 @@ static int mv88e6xxx_mdio_register(struct mv88e6xxx_chip *chip, if (!external) { err = mv88e6xxx_g2_irq_mdio_setup(chip, bus); if (err) - return err; + goto out; } err = of_mdiobus_register(bus, np); if (err) { dev_err(chip->dev, "Cannot register MDIO bus (%d)\n", err); mv88e6xxx_g2_irq_mdio_free(chip, bus); - return err; + goto out; } if (external) @@ -3101,21 +3113,26 @@ static int mv88e6xxx_mdio_register(struct mv88e6xxx_chip *chip, list_add(&mdio_bus->list, &chip->mdios); return 0; + +out: + mdiobus_free(bus); + return err; } static void mv88e6xxx_mdios_unregister(struct mv88e6xxx_chip *chip) { - struct mv88e6xxx_mdio_bus *mdio_bus; + struct mv88e6xxx_mdio_bus *mdio_bus, *p; struct mii_bus *bus; - list_for_each_entry(mdio_bus, &chip->mdios, list) { + list_for_each_entry_safe(mdio_bus, p, &chip->mdios, list) { bus = mdio_bus->bus; if (!mdio_bus->external) mv88e6xxx_g2_irq_mdio_free(chip, bus); mdiobus_unregister(bus); + mdiobus_free(bus); } } @@ -3455,7 +3472,6 @@ static const struct mv88e6xxx_ops mv88e6161_ops = { .port_set_frame_mode = mv88e6351_port_set_frame_mode, .port_set_egress_floods = mv88e6352_port_set_egress_floods, .port_set_ether_type = mv88e6351_port_set_ether_type, - .port_set_jumbo_size = mv88e6165_port_set_jumbo_size, .port_egress_rate_limiting = mv88e6097_port_egress_rate_limiting, .port_pause_limit = mv88e6097_port_pause_limit, .port_disable_learn_limit = mv88e6xxx_port_disable_learn_limit, @@ -3480,6 +3496,7 @@ static const struct mv88e6xxx_ops mv88e6161_ops = { .avb_ops = &mv88e6165_avb_ops, .ptp_ops = &mv88e6165_ptp_ops, .phylink_validate = mv88e6185_phylink_validate, + .set_max_frame_size = mv88e6185_g1_set_max_frame_size, }; static const struct mv88e6xxx_ops mv88e6165_ops = { diff --git a/drivers/net/dsa/mv88e6xxx/chip.h b/drivers/net/dsa/mv88e6xxx/chip.h index 81c244fc04195ad949399eecbee88a25e932ebf1..51a7ff44478ec0b18d7f4bd783bb7a15663c31e9 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.h +++ b/drivers/net/dsa/mv88e6xxx/chip.h @@ -18,6 +18,7 @@ #include #include +#define EDSA_HLEN 8 #define MV88E6XXX_N_FID 4096 /* PVT limits for 4-bit port and 5-bit switch */ diff --git a/drivers/net/dsa/mv88e6xxx/global1.c b/drivers/net/dsa/mv88e6xxx/global1.c index 33d443a37efc4628246ec693290b6b68e8a904ea..9936ae69e5ee498449caafd0c560cdb7de18c2a3 100644 --- a/drivers/net/dsa/mv88e6xxx/global1.c +++ b/drivers/net/dsa/mv88e6xxx/global1.c @@ -232,6 +232,8 @@ int mv88e6185_g1_set_max_frame_size(struct mv88e6xxx_chip *chip, int mtu) u16 val; int err; + mtu += ETH_HLEN + ETH_FCS_LEN; + err = mv88e6xxx_g1_read(chip, MV88E6XXX_G1_CTL1, &val); if (err) return err; diff --git a/drivers/net/dsa/mv88e6xxx/port.c b/drivers/net/dsa/mv88e6xxx/port.c index 8128dc607cf46f15103218924ca0cdc73be79292..dfd9e8292e9a0607fb8239fb8a4cb3604fd7e9ee 100644 --- a/drivers/net/dsa/mv88e6xxx/port.c +++ b/drivers/net/dsa/mv88e6xxx/port.c @@ -1082,6 +1082,8 @@ int mv88e6165_port_set_jumbo_size(struct mv88e6xxx_chip *chip, int port, u16 reg; int err; + size += VLAN_ETH_HLEN + ETH_FCS_LEN; + err = mv88e6xxx_port_read(chip, port, MV88E6XXX_PORT_CTL2, ®); if (err) return err; diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c index 2e5bbdca5ea4756df5089804bce7970735aba5ff..cd8d9b0e0edb322f519698fb193564372d9a46e1 100644 --- a/drivers/net/dsa/ocelot/felix_vsc9959.c +++ b/drivers/net/dsa/ocelot/felix_vsc9959.c @@ -1050,7 +1050,7 @@ static int vsc9959_mdio_bus_alloc(struct ocelot *ocelot) return PTR_ERR(hw); } - bus = devm_mdiobus_alloc_size(dev, sizeof(*mdio_priv)); + bus = mdiobus_alloc_size(sizeof(*mdio_priv)); if (!bus) return -ENOMEM; @@ -1070,6 +1070,7 @@ static int vsc9959_mdio_bus_alloc(struct ocelot *ocelot) rc = mdiobus_register(bus); if (rc < 0) { dev_err(dev, "failed to register MDIO bus\n"); + mdiobus_free(bus); return rc; } @@ -1119,6 +1120,7 @@ static void vsc9959_mdio_bus_free(struct ocelot *ocelot) lynx_pcs_destroy(pcs); } mdiobus_unregister(felix->imdio); + mdiobus_free(felix->imdio); } static void vsc9959_sched_speed_set(struct ocelot *ocelot, int port, diff --git a/drivers/net/dsa/qca/ar9331.c b/drivers/net/dsa/qca/ar9331.c index 661745932a5392004231fcd06929ac947223a483..c33bdcf7efc587e4d5d20c45801e9b0c21a511c8 100644 --- a/drivers/net/dsa/qca/ar9331.c +++ b/drivers/net/dsa/qca/ar9331.c @@ -289,7 +289,7 @@ static int ar9331_sw_mbus_init(struct ar9331_sw_priv *priv) if (!mnp) return -ENODEV; - ret = of_mdiobus_register(mbus, mnp); + ret = devm_of_mdiobus_register(dev, mbus, mnp); of_node_put(mnp); if (ret) return ret; @@ -856,7 +856,6 @@ static void ar9331_sw_remove(struct mdio_device *mdiodev) struct ar9331_sw_priv *priv = dev_get_drvdata(&mdiodev->dev); irq_domain_remove(priv->irqdomain); - mdiobus_unregister(priv->mbus); dsa_unregister_switch(&priv->ds); reset_control_assert(priv->sw_reset); diff --git a/drivers/net/dsa/rtl8366rb.c b/drivers/net/dsa/rtl8366rb.c index cfe56960f44b918aaf28b7e32e4b273119f76a5d..12d7e5cd319741f4690576fa4e2c173a09ba02c1 100644 --- a/drivers/net/dsa/rtl8366rb.c +++ b/drivers/net/dsa/rtl8366rb.c @@ -1343,7 +1343,7 @@ static int rtl8366rb_set_mc_index(struct realtek_smi *smi, int port, int index) static bool rtl8366rb_is_vlan_valid(struct realtek_smi *smi, unsigned int vlan) { - unsigned int max = RTL8366RB_NUM_VLANS; + unsigned int max = RTL8366RB_NUM_VLANS - 1; if (smi->vlan4k_enabled) max = RTL8366RB_NUM_VIDS - 1; diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig index de50e8b9e65628105605d3fc39ba12c9dc848b94..fad9a2c77fa7ccc226975b5d1b1d53bc4d6b38e9 100644 --- a/drivers/net/ethernet/Kconfig +++ b/drivers/net/ethernet/Kconfig @@ -99,6 +99,7 @@ config JME config KORINA tristate "Korina (IDT RC32434) Ethernet support" depends on MIKROTIK_RB532 + select CRC32 help If you have a Mikrotik RouterBoard 500 or IDT RC32434 based system say Y. Otherwise say N. diff --git a/drivers/net/ethernet/altera/altera_tse_main.c b/drivers/net/ethernet/altera/altera_tse_main.c index 907125abef2c10a0f1b6e5095a0dbbcb46429acc..a7d8d45e0e94190af5bb71ba2b7b54e38e1a6f23 100644 --- a/drivers/net/ethernet/altera/altera_tse_main.c +++ b/drivers/net/ethernet/altera/altera_tse_main.c @@ -1431,16 +1431,19 @@ static int altera_tse_probe(struct platform_device *pdev) priv->rxdescmem_busaddr = dma_res->start; } else { + ret = -ENODEV; goto err_free_netdev; } - if (!dma_set_mask(priv->device, DMA_BIT_MASK(priv->dmaops->dmamask))) + if (!dma_set_mask(priv->device, DMA_BIT_MASK(priv->dmaops->dmamask))) { dma_set_coherent_mask(priv->device, DMA_BIT_MASK(priv->dmaops->dmamask)); - else if (!dma_set_mask(priv->device, DMA_BIT_MASK(32))) + } else if (!dma_set_mask(priv->device, DMA_BIT_MASK(32))) { dma_set_coherent_mask(priv->device, DMA_BIT_MASK(32)); - else + } else { + ret = -EIO; goto err_free_netdev; + } /* MAC address space */ ret = request_and_map(pdev, "control_port", &control_port, diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index df1884d57d1a0ca92451911f286008e29f4d729d..52414ac2c901abc5482c311261500674efdbbbb2 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -1199,26 +1199,22 @@ static int handle_invalid_req_id(struct ena_ring *ring, u16 req_id, static int validate_tx_req_id(struct ena_ring *tx_ring, u16 req_id) { - struct ena_tx_buffer *tx_info = NULL; + struct ena_tx_buffer *tx_info; - if (likely(req_id < tx_ring->ring_size)) { - tx_info = &tx_ring->tx_buffer_info[req_id]; - if (likely(tx_info->skb)) - return 0; - } + tx_info = &tx_ring->tx_buffer_info[req_id]; + if (likely(tx_info->skb)) + return 0; return handle_invalid_req_id(tx_ring, req_id, tx_info, false); } static int validate_xdp_req_id(struct ena_ring *xdp_ring, u16 req_id) { - struct ena_tx_buffer *tx_info = NULL; + struct ena_tx_buffer *tx_info; - if (likely(req_id < xdp_ring->ring_size)) { - tx_info = &xdp_ring->tx_buffer_info[req_id]; - if (likely(tx_info->xdpf)) - return 0; - } + tx_info = &xdp_ring->tx_buffer_info[req_id]; + if (likely(tx_info->xdpf)) + return 0; return handle_invalid_req_id(xdp_ring, req_id, tx_info, true); } @@ -1243,9 +1239,14 @@ static int ena_clean_tx_irq(struct ena_ring *tx_ring, u32 budget) rc = ena_com_tx_comp_req_id_get(tx_ring->ena_com_io_cq, &req_id); - if (rc) + if (rc) { + if (unlikely(rc == -EINVAL)) + handle_invalid_req_id(tx_ring, req_id, NULL, + false); break; + } + /* validate that the request id points to a valid skb */ rc = validate_tx_req_id(tx_ring, req_id); if (rc) break; @@ -1801,9 +1802,14 @@ static int ena_clean_xdp_irq(struct ena_ring *xdp_ring, u32 budget) rc = ena_com_tx_comp_req_id_get(xdp_ring->ena_com_io_cq, &req_id); - if (rc) + if (rc) { + if (unlikely(rc == -EINVAL)) + handle_invalid_req_id(xdp_ring, req_id, NULL, + true); break; + } + /* validate that the request id points to a valid xdp_frame */ rc = validate_xdp_req_id(xdp_ring, req_id); if (rc) break; @@ -3921,10 +3927,6 @@ static u32 ena_calc_max_io_queue_num(struct pci_dev *pdev, max_num_io_queues = min_t(u32, max_num_io_queues, io_tx_cq_num); /* 1 IRQ for for mgmnt and 1 IRQs for each IO direction */ max_num_io_queues = min_t(u32, max_num_io_queues, pci_msix_vec_count(pdev) - 1); - if (unlikely(!max_num_io_queues)) { - dev_err(&pdev->dev, "The device doesn't have io queues\n"); - return -EFAULT; - } return max_num_io_queues; } diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-common.h b/drivers/net/ethernet/amd/xgbe/xgbe-common.h index b2cd3bdba9f89ba53b24f62e79f58baca05eded5..533b8519ec352842f7293c5b4a6e606505ff0907 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-common.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe-common.h @@ -1331,6 +1331,10 @@ #define MDIO_VEND2_PMA_CDR_CONTROL 0x8056 #endif +#ifndef MDIO_VEND2_PMA_MISC_CTRL0 +#define MDIO_VEND2_PMA_MISC_CTRL0 0x8090 +#endif + #ifndef MDIO_CTRL1_SPEED1G #define MDIO_CTRL1_SPEED1G (MDIO_CTRL1_SPEED10G & ~BMCR_SPEED100) #endif @@ -1389,6 +1393,10 @@ #define XGBE_PMA_RX_RST_0_RESET_ON 0x10 #define XGBE_PMA_RX_RST_0_RESET_OFF 0x00 +#define XGBE_PMA_PLL_CTRL_MASK BIT(15) +#define XGBE_PMA_PLL_CTRL_ENABLE BIT(15) +#define XGBE_PMA_PLL_CTRL_DISABLE 0x0000 + /* Bit setting and getting macros * The get macro will extract the current bit field value from within * the variable diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index 395eb0b52680211eac42a3baea57e3e21091b539..a816b30bca04c7641bc3142df5a814b02ea27b3b 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -721,7 +721,9 @@ static void xgbe_stop_timers(struct xgbe_prv_data *pdata) if (!channel->tx_ring) break; + /* Deactivate the Tx timer */ del_timer_sync(&channel->tx_timer); + channel->tx_timer_active = 0; } } @@ -2557,6 +2559,14 @@ read_again: buf2_len = xgbe_rx_buf2_len(rdata, packet, len); len += buf2_len; + if (buf2_len > rdata->rx.buf.dma_len) { + /* Hardware inconsistency within the descriptors + * that has resulted in a length underflow. + */ + error = 1; + goto skip_data; + } + if (!skb) { skb = xgbe_create_skb(pdata, napi, rdata, buf1_len); @@ -2586,8 +2596,10 @@ skip_data: if (!last || context_next) goto read_again; - if (!skb) + if (!skb || error) { + dev_kfree_skb(skb); goto next_packet; + } /* Be sure we don't exceed the configured MTU */ max_len = netdev->mtu + ETH_HLEN; diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-pci.c b/drivers/net/ethernet/amd/xgbe/xgbe-pci.c index 90cb55eb546658456f7ace869a079171d169d041..014513ce00a14b06149f5f82257359b11c2d34f5 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-pci.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-pci.c @@ -418,6 +418,9 @@ static void xgbe_pci_remove(struct pci_dev *pdev) pci_free_irq_vectors(pdata->pcidev); + /* Disable all interrupts in the hardware */ + XP_IOWRITE(pdata, XP_INT_EN, 0x0); + xgbe_free_pdata(pdata); } diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c index 18e48b3bc402b50a02601c475e6d797d6e448ea2..213769054391c1f7236aad579ade7defcedb9a82 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c @@ -1977,12 +1977,26 @@ static void xgbe_phy_rx_reset(struct xgbe_prv_data *pdata) } } +static void xgbe_phy_pll_ctrl(struct xgbe_prv_data *pdata, bool enable) +{ + XMDIO_WRITE_BITS(pdata, MDIO_MMD_PMAPMD, MDIO_VEND2_PMA_MISC_CTRL0, + XGBE_PMA_PLL_CTRL_MASK, + enable ? XGBE_PMA_PLL_CTRL_ENABLE + : XGBE_PMA_PLL_CTRL_DISABLE); + + /* Wait for command to complete */ + usleep_range(100, 200); +} + static void xgbe_phy_perform_ratechange(struct xgbe_prv_data *pdata, unsigned int cmd, unsigned int sub_cmd) { unsigned int s0 = 0; unsigned int wait; + /* Disable PLL re-initialization during FW command processing */ + xgbe_phy_pll_ctrl(pdata, false); + /* Log if a previous command did not complete */ if (XP_IOREAD_BITS(pdata, XP_DRIVER_INT_RO, STATUS)) { netif_dbg(pdata, link, pdata->netdev, @@ -2003,7 +2017,7 @@ static void xgbe_phy_perform_ratechange(struct xgbe_prv_data *pdata, wait = XGBE_RATECHANGE_COUNT; while (wait--) { if (!XP_IOREAD_BITS(pdata, XP_DRIVER_INT_RO, STATUS)) - return; + goto reenable_pll; usleep_range(1000, 2000); } @@ -2013,6 +2027,10 @@ static void xgbe_phy_perform_ratechange(struct xgbe_prv_data *pdata, /* Reset on error */ xgbe_phy_rx_reset(pdata); + +reenable_pll: + /* Enable PLL re-initialization */ + xgbe_phy_pll_ctrl(pdata, true); } static void xgbe_phy_rrc(struct xgbe_prv_data *pdata) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_common.h b/drivers/net/ethernet/aquantia/atlantic/aq_common.h index 23b2d390fcdda8af72fe7e474fc5fef7194a9274..ace691d7cd759f52d38d116ce8cfe979089d7f25 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_common.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_common.h @@ -40,10 +40,12 @@ #define AQ_DEVICE_ID_AQC113DEV 0x00C0 #define AQ_DEVICE_ID_AQC113CS 0x94C0 +#define AQ_DEVICE_ID_AQC113CA 0x34C0 #define AQ_DEVICE_ID_AQC114CS 0x93C0 #define AQ_DEVICE_ID_AQC113 0x04C0 #define AQ_DEVICE_ID_AQC113C 0x14C0 #define AQ_DEVICE_ID_AQC115C 0x12C0 +#define AQ_DEVICE_ID_AQC116C 0x11C0 #define HW_ATL_NIC_NAME "Marvell (aQuantia) AQtion 10Gbit Network Adapter" @@ -53,20 +55,19 @@ #define AQ_NIC_RATE_10G BIT(0) #define AQ_NIC_RATE_5G BIT(1) -#define AQ_NIC_RATE_5GSR BIT(2) -#define AQ_NIC_RATE_2G5 BIT(3) -#define AQ_NIC_RATE_1G BIT(4) -#define AQ_NIC_RATE_100M BIT(5) -#define AQ_NIC_RATE_10M BIT(6) -#define AQ_NIC_RATE_1G_HALF BIT(7) -#define AQ_NIC_RATE_100M_HALF BIT(8) -#define AQ_NIC_RATE_10M_HALF BIT(9) +#define AQ_NIC_RATE_2G5 BIT(2) +#define AQ_NIC_RATE_1G BIT(3) +#define AQ_NIC_RATE_100M BIT(4) +#define AQ_NIC_RATE_10M BIT(5) +#define AQ_NIC_RATE_1G_HALF BIT(6) +#define AQ_NIC_RATE_100M_HALF BIT(7) +#define AQ_NIC_RATE_10M_HALF BIT(8) -#define AQ_NIC_RATE_EEE_10G BIT(10) -#define AQ_NIC_RATE_EEE_5G BIT(11) -#define AQ_NIC_RATE_EEE_2G5 BIT(12) -#define AQ_NIC_RATE_EEE_1G BIT(13) -#define AQ_NIC_RATE_EEE_100M BIT(14) +#define AQ_NIC_RATE_EEE_10G BIT(9) +#define AQ_NIC_RATE_EEE_5G BIT(10) +#define AQ_NIC_RATE_EEE_2G5 BIT(11) +#define AQ_NIC_RATE_EEE_1G BIT(12) +#define AQ_NIC_RATE_EEE_100M BIT(13) #define AQ_NIC_RATE_EEE_MSK (AQ_NIC_RATE_EEE_10G |\ AQ_NIC_RATE_EEE_5G |\ AQ_NIC_RATE_EEE_2G5 |\ diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h index bed481816ea3136b14c792f5124a93f16e7ee5b3..7442850ca95f09ca48aaf741c410f3dfff29fea5 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h +++ b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h @@ -80,6 +80,8 @@ struct aq_hw_link_status_s { }; struct aq_stats_s { + u64 brc; + u64 btc; u64 uprc; u64 mprc; u64 bprc; diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c index bf5e0e9bd0e2b0c1bbb267537ea035632053862a..0cf8ae8aeac8391f9c13011507dd35c526b20713 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c @@ -903,8 +903,14 @@ u64 *aq_nic_get_stats(struct aq_nic_s *self, u64 *data) data[++i] = stats->mbtc; data[++i] = stats->bbrc; data[++i] = stats->bbtc; - data[++i] = stats->ubrc + stats->mbrc + stats->bbrc; - data[++i] = stats->ubtc + stats->mbtc + stats->bbtc; + if (stats->brc) + data[++i] = stats->brc; + else + data[++i] = stats->ubrc + stats->mbrc + stats->bbrc; + if (stats->btc) + data[++i] = stats->btc; + else + data[++i] = stats->ubtc + stats->mbtc + stats->bbtc; data[++i] = stats->dma_pkt_rc; data[++i] = stats->dma_pkt_tc; data[++i] = stats->dma_oct_rc; diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c index 5b996330f228b2f8cde2107f21b71c100ceda7b1..1826253f97dc40b8b9727730c397112317207bcf 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c @@ -49,6 +49,8 @@ static const struct pci_device_id aq_pci_tbl[] = { { PCI_VDEVICE(AQUANTIA, AQ_DEVICE_ID_AQC113), }, { PCI_VDEVICE(AQUANTIA, AQ_DEVICE_ID_AQC113C), }, { PCI_VDEVICE(AQUANTIA, AQ_DEVICE_ID_AQC115C), }, + { PCI_VDEVICE(AQUANTIA, AQ_DEVICE_ID_AQC113CA), }, + { PCI_VDEVICE(AQUANTIA, AQ_DEVICE_ID_AQC116C), }, {} }; @@ -85,7 +87,10 @@ static const struct aq_board_revision_s hw_atl_boards[] = { { AQ_DEVICE_ID_AQC113CS, AQ_HWREV_ANY, &hw_atl2_ops, &hw_atl2_caps_aqc113, }, { AQ_DEVICE_ID_AQC114CS, AQ_HWREV_ANY, &hw_atl2_ops, &hw_atl2_caps_aqc113, }, { AQ_DEVICE_ID_AQC113C, AQ_HWREV_ANY, &hw_atl2_ops, &hw_atl2_caps_aqc113, }, - { AQ_DEVICE_ID_AQC115C, AQ_HWREV_ANY, &hw_atl2_ops, &hw_atl2_caps_aqc113, }, + { AQ_DEVICE_ID_AQC115C, AQ_HWREV_ANY, &hw_atl2_ops, &hw_atl2_caps_aqc115c, }, + { AQ_DEVICE_ID_AQC113CA, AQ_HWREV_ANY, &hw_atl2_ops, &hw_atl2_caps_aqc113, }, + { AQ_DEVICE_ID_AQC116C, AQ_HWREV_ANY, &hw_atl2_ops, &hw_atl2_caps_aqc116c, }, + }; MODULE_DEVICE_TABLE(pci, aq_pci_tbl); diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c index 24122ccda614cc46fab36f59b208977fbc8b62b7..72f8751784c31eb153777f9ecb28ccd52ebd1197 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c @@ -365,6 +365,10 @@ int aq_ring_rx_clean(struct aq_ring_s *self, if (!buff->is_eop) { buff_ = buff; do { + if (buff_->next >= self->size) { + err = -EIO; + goto err_exit; + } next_ = buff_->next, buff_ = &self->buff_ring[next_]; is_rsc_completed = @@ -388,6 +392,10 @@ int aq_ring_rx_clean(struct aq_ring_s *self, (buff->is_lro && buff->is_cso_err)) { buff_ = buff; do { + if (buff_->next >= self->size) { + err = -EIO; + goto err_exit; + } next_ = buff_->next, buff_ = &self->buff_ring[next_]; diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_vec.c b/drivers/net/ethernet/aquantia/atlantic/aq_vec.c index d281322d7dd29074185e2645aa0a17bb344ef404..f4774cf051c9780cfc434450673bb82d175e2736 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_vec.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_vec.c @@ -362,9 +362,6 @@ unsigned int aq_vec_get_sw_stats(struct aq_vec_s *self, const unsigned int tc, u { unsigned int count; - WARN_ONCE(!aq_vec_is_valid_tc(self, tc), - "Invalid tc %u (#rx=%u, #tx=%u)\n", - tc, self->rx_rings, self->tx_rings); if (!aq_vec_is_valid_tc(self, tc)) return 0; diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c index 404cbf60d3f2ffecfc59ffb3308fcabfa2e2949c..65b9e5846be45699507f6649b753499e8c26314f 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c @@ -559,6 +559,11 @@ int hw_atl_utils_fw_rpc_wait(struct aq_hw_s *self, goto err_exit; if (fw.len == 0xFFFFU) { + if (sw.len > sizeof(self->rpc)) { + printk(KERN_INFO "Invalid sw len: %x\n", sw.len); + err = -EINVAL; + goto err_exit; + } err = hw_atl_utils_fw_rpc_call(self, sw.len); if (err < 0) goto err_exit; @@ -567,6 +572,11 @@ int hw_atl_utils_fw_rpc_wait(struct aq_hw_s *self, if (rpc) { if (fw.len) { + if (fw.len > sizeof(self->rpc)) { + printk(KERN_INFO "Invalid fw len: %x\n", fw.len); + err = -EINVAL; + goto err_exit; + } err = hw_atl_utils_fw_downld_dwords(self, self->rpc_addr, @@ -857,12 +867,20 @@ static int hw_atl_fw1x_deinit(struct aq_hw_s *self) int hw_atl_utils_update_stats(struct aq_hw_s *self) { struct aq_stats_s *cs = &self->curr_stats; + struct aq_stats_s curr_stats = *cs; struct hw_atl_utils_mbox mbox; + bool corrupted_stats = false; hw_atl_utils_mpi_read_stats(self, &mbox); -#define AQ_SDELTA(_N_) (self->curr_stats._N_ += \ - mbox.stats._N_ - self->last_stats._N_) +#define AQ_SDELTA(_N_) \ +do { \ + if (!corrupted_stats && \ + ((s64)(mbox.stats._N_ - self->last_stats._N_)) >= 0) \ + curr_stats._N_ += mbox.stats._N_ - self->last_stats._N_; \ + else \ + corrupted_stats = true; \ +} while (0) if (self->aq_link_status.mbps) { AQ_SDELTA(uprc); @@ -882,6 +900,9 @@ int hw_atl_utils_update_stats(struct aq_hw_s *self) AQ_SDELTA(bbrc); AQ_SDELTA(bbtc); AQ_SDELTA(dpc); + + if (!corrupted_stats) + *cs = curr_stats; } #undef AQ_SDELTA diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c index ee0c22d0493540d4fa1015908c21659e872dd436..05086f0040fd9afcf4286ff377a596dfc21ac7db 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c @@ -132,9 +132,6 @@ static enum hw_atl_fw2x_rate link_speed_mask_2fw2x_ratemask(u32 speed) if (speed & AQ_NIC_RATE_5G) rate |= FW2X_RATE_5G; - if (speed & AQ_NIC_RATE_5GSR) - rate |= FW2X_RATE_5G; - if (speed & AQ_NIC_RATE_2G5) rate |= FW2X_RATE_2G5; diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c index 92f64048bf691e7014d009f531eaafb339efe5f9..c76ccdc77ba602dc6943c810a4ee1986d88896f1 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c @@ -65,11 +65,25 @@ const struct aq_hw_caps_s hw_atl2_caps_aqc113 = { AQ_NIC_RATE_5G | AQ_NIC_RATE_2G5 | AQ_NIC_RATE_1G | - AQ_NIC_RATE_1G_HALF | AQ_NIC_RATE_100M | - AQ_NIC_RATE_100M_HALF | - AQ_NIC_RATE_10M | - AQ_NIC_RATE_10M_HALF, + AQ_NIC_RATE_10M, +}; + +const struct aq_hw_caps_s hw_atl2_caps_aqc115c = { + DEFAULT_BOARD_BASIC_CAPABILITIES, + .media_type = AQ_HW_MEDIA_TYPE_TP, + .link_speed_msk = AQ_NIC_RATE_2G5 | + AQ_NIC_RATE_1G | + AQ_NIC_RATE_100M | + AQ_NIC_RATE_10M, +}; + +const struct aq_hw_caps_s hw_atl2_caps_aqc116c = { + DEFAULT_BOARD_BASIC_CAPABILITIES, + .media_type = AQ_HW_MEDIA_TYPE_TP, + .link_speed_msk = AQ_NIC_RATE_1G | + AQ_NIC_RATE_100M | + AQ_NIC_RATE_10M, }; static u32 hw_atl2_sem_act_rslvr_get(struct aq_hw_s *self) diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.h index de8723f1c28a13f0e98214f9011ac771028f2eff..346f0dc9912e500014d253135ab431f67be8eb3c 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.h +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.h @@ -9,6 +9,8 @@ #include "aq_common.h" extern const struct aq_hw_caps_s hw_atl2_caps_aqc113; +extern const struct aq_hw_caps_s hw_atl2_caps_aqc115c; +extern const struct aq_hw_caps_s hw_atl2_caps_aqc116c; extern const struct aq_hw_ops hw_atl2_ops; #endif /* HW_ATL2_H */ diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils.h index b66fa346581ce30b94627f810cf013d02f63323c..6bad64c77b87c94a258b4bee5391ffa66a52c031 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils.h +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils.h @@ -239,7 +239,8 @@ struct version_s { u8 minor; u16 build; } phy; - u32 rsvd; + u32 drv_iface_ver:4; + u32 rsvd:28; }; struct link_status_s { @@ -424,7 +425,7 @@ struct cable_diag_status_s { u16 rsvd2; }; -struct statistics_s { +struct statistics_a0_s { struct { u32 link_up; u32 link_down; @@ -457,6 +458,33 @@ struct statistics_s { u32 reserve_fw_gap; }; +struct __packed statistics_b0_s { + u64 rx_good_octets; + u64 rx_pause_frames; + u64 rx_good_frames; + u64 rx_errors; + u64 rx_unicast_frames; + u64 rx_multicast_frames; + u64 rx_broadcast_frames; + + u64 tx_good_octets; + u64 tx_pause_frames; + u64 tx_good_frames; + u64 tx_errors; + u64 tx_unicast_frames; + u64 tx_multicast_frames; + u64 tx_broadcast_frames; + + u32 main_loop_cycles; +}; + +struct __packed statistics_s { + union __packed { + struct statistics_a0_s a0; + struct statistics_b0_s b0; + }; +}; + struct filter_caps_s { u8 l2_filters_base_index:6; u8 flexible_filter_mask:2; @@ -545,7 +573,7 @@ struct management_status_s { u32 rsvd5; }; -struct fw_interface_out { +struct __packed fw_interface_out { struct transaction_counter_s transaction_id; struct version_s version; struct link_status_s link_status; @@ -569,7 +597,6 @@ struct fw_interface_out { struct core_dump_s core_dump; u32 rsvd11; struct statistics_s stats; - u32 rsvd12; struct filter_caps_s filter_caps; struct device_caps_s device_caps; u32 rsvd13; @@ -592,6 +619,9 @@ struct fw_interface_out { #define AQ_HOST_MODE_LOW_POWER 3U #define AQ_HOST_MODE_SHUTDOWN 4U +#define AQ_A2_FW_INTERFACE_A0 0 +#define AQ_A2_FW_INTERFACE_B0 1 + int hw_atl2_utils_initfw(struct aq_hw_s *self, const struct aq_fw_ops **fw_ops); int hw_atl2_utils_soft_reset(struct aq_hw_s *self); diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c index dd259c8f2f4f397adbed0b5ea284bd8881a48735..58d426dda3edbf13b4e852b3fab14e720d55d3b2 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c @@ -84,7 +84,7 @@ static int hw_atl2_shared_buffer_read_block(struct aq_hw_s *self, if (cnt > AQ_A2_FW_READ_TRY_MAX) return -ETIME; if (tid1.transaction_cnt_a != tid1.transaction_cnt_b) - udelay(1); + mdelay(1); } while (tid1.transaction_cnt_a != tid1.transaction_cnt_b); hw_atl2_mif_shared_buf_read(self, offset, (u32 *)data, dwords); @@ -154,7 +154,7 @@ static void a2_link_speed_mask2fw(u32 speed, { link_options->rate_10G = !!(speed & AQ_NIC_RATE_10G); link_options->rate_5G = !!(speed & AQ_NIC_RATE_5G); - link_options->rate_N5G = !!(speed & AQ_NIC_RATE_5GSR); + link_options->rate_N5G = link_options->rate_5G; link_options->rate_2P5G = !!(speed & AQ_NIC_RATE_2G5); link_options->rate_N2P5G = link_options->rate_2P5G; link_options->rate_1G = !!(speed & AQ_NIC_RATE_1G); @@ -192,8 +192,6 @@ static u32 a2_fw_lkp_to_mask(struct lkp_link_caps_s *lkp_link_caps) rate |= AQ_NIC_RATE_10G; if (lkp_link_caps->rate_5G) rate |= AQ_NIC_RATE_5G; - if (lkp_link_caps->rate_N5G) - rate |= AQ_NIC_RATE_5GSR; if (lkp_link_caps->rate_2P5G) rate |= AQ_NIC_RATE_2G5; if (lkp_link_caps->rate_1G) @@ -335,15 +333,22 @@ static int aq_a2_fw_get_mac_permanent(struct aq_hw_s *self, u8 *mac) return 0; } -static int aq_a2_fw_update_stats(struct aq_hw_s *self) +static void aq_a2_fill_a0_stats(struct aq_hw_s *self, + struct statistics_s *stats) { struct hw_atl2_priv *priv = (struct hw_atl2_priv *)self->priv; - struct statistics_s stats; - - hw_atl2_shared_buffer_read_safe(self, stats, &stats); - -#define AQ_SDELTA(_N_, _F_) (self->curr_stats._N_ += \ - stats.msm._F_ - priv->last_stats.msm._F_) + struct aq_stats_s *cs = &self->curr_stats; + struct aq_stats_s curr_stats = *cs; + bool corrupted_stats = false; + +#define AQ_SDELTA(_N, _F) \ +do { \ + if (!corrupted_stats && \ + ((s64)(stats->a0.msm._F - priv->last_stats.a0.msm._F)) >= 0) \ + curr_stats._N += stats->a0.msm._F - priv->last_stats.a0.msm._F;\ + else \ + corrupted_stats = true; \ +} while (0) if (self->aq_link_status.mbps) { AQ_SDELTA(uprc, rx_unicast_frames); @@ -362,17 +367,76 @@ static int aq_a2_fw_update_stats(struct aq_hw_s *self) AQ_SDELTA(mbtc, tx_multicast_octets); AQ_SDELTA(bbrc, rx_broadcast_octets); AQ_SDELTA(bbtc, tx_broadcast_octets); + + if (!corrupted_stats) + *cs = curr_stats; } #undef AQ_SDELTA - self->curr_stats.dma_pkt_rc = - hw_atl_stats_rx_dma_good_pkt_counter_get(self); - self->curr_stats.dma_pkt_tc = - hw_atl_stats_tx_dma_good_pkt_counter_get(self); - self->curr_stats.dma_oct_rc = - hw_atl_stats_rx_dma_good_octet_counter_get(self); - self->curr_stats.dma_oct_tc = - hw_atl_stats_tx_dma_good_octet_counter_get(self); - self->curr_stats.dpc = hw_atl_rpb_rx_dma_drop_pkt_cnt_get(self); + +} + +static void aq_a2_fill_b0_stats(struct aq_hw_s *self, + struct statistics_s *stats) +{ + struct hw_atl2_priv *priv = (struct hw_atl2_priv *)self->priv; + struct aq_stats_s *cs = &self->curr_stats; + struct aq_stats_s curr_stats = *cs; + bool corrupted_stats = false; + +#define AQ_SDELTA(_N, _F) \ +do { \ + if (!corrupted_stats && \ + ((s64)(stats->b0._F - priv->last_stats.b0._F)) >= 0) \ + curr_stats._N += stats->b0._F - priv->last_stats.b0._F; \ + else \ + corrupted_stats = true; \ +} while (0) + + if (self->aq_link_status.mbps) { + AQ_SDELTA(uprc, rx_unicast_frames); + AQ_SDELTA(mprc, rx_multicast_frames); + AQ_SDELTA(bprc, rx_broadcast_frames); + AQ_SDELTA(erpr, rx_errors); + AQ_SDELTA(brc, rx_good_octets); + + AQ_SDELTA(uptc, tx_unicast_frames); + AQ_SDELTA(mptc, tx_multicast_frames); + AQ_SDELTA(bptc, tx_broadcast_frames); + AQ_SDELTA(erpt, tx_errors); + AQ_SDELTA(btc, tx_good_octets); + + if (!corrupted_stats) + *cs = curr_stats; + } +#undef AQ_SDELTA +} + +static int aq_a2_fw_update_stats(struct aq_hw_s *self) +{ + struct hw_atl2_priv *priv = (struct hw_atl2_priv *)self->priv; + struct aq_stats_s *cs = &self->curr_stats; + struct statistics_s stats; + struct version_s version; + int err; + + err = hw_atl2_shared_buffer_read_safe(self, version, &version); + if (err) + return err; + + err = hw_atl2_shared_buffer_read_safe(self, stats, &stats); + if (err) + return err; + + if (version.drv_iface_ver == AQ_A2_FW_INTERFACE_A0) + aq_a2_fill_a0_stats(self, &stats); + else + aq_a2_fill_b0_stats(self, &stats); + + cs->dma_pkt_rc = hw_atl_stats_rx_dma_good_pkt_counter_get(self); + cs->dma_pkt_tc = hw_atl_stats_tx_dma_good_pkt_counter_get(self); + cs->dma_oct_rc = hw_atl_stats_rx_dma_good_octet_counter_get(self); + cs->dma_oct_tc = hw_atl_stats_tx_dma_good_octet_counter_get(self); + cs->dpc = hw_atl_rpb_rx_dma_drop_pkt_cnt_get(self); memcpy(&priv->last_stats, &stats, sizeof(stats)); @@ -499,9 +563,9 @@ u32 hw_atl2_utils_get_fw_version(struct aq_hw_s *self) hw_atl2_shared_buffer_read_safe(self, version, &version); /* A2 FW version is stored in reverse order */ - return version.mac.major << 24 | - version.mac.minor << 16 | - version.mac.build; + return version.bundle.major << 24 | + version.bundle.minor << 16 | + version.bundle.build; } int hw_atl2_utils_get_action_resolve_table_caps(struct aq_hw_s *self, diff --git a/drivers/net/ethernet/arc/Kconfig b/drivers/net/ethernet/arc/Kconfig index 37a41773dd4350d96fded737601db7afef3ee521..92a79c4ffa2c7bc3313cf90cc2c8b8c513be47a2 100644 --- a/drivers/net/ethernet/arc/Kconfig +++ b/drivers/net/ethernet/arc/Kconfig @@ -21,6 +21,7 @@ config ARC_EMAC_CORE depends on ARC || ARCH_ROCKCHIP || COMPILE_TEST select MII select PHYLIB + select CRC32 config ARC_EMAC tristate "ARC EMAC support" diff --git a/drivers/net/ethernet/atheros/ag71xx.c b/drivers/net/ethernet/atheros/ag71xx.c index a60ce903058190e41dbb59b3ff4e1901c51b203e..c26c9b0c00d8fb7babd827ceb036bc10e8acb3c0 100644 --- a/drivers/net/ethernet/atheros/ag71xx.c +++ b/drivers/net/ethernet/atheros/ag71xx.c @@ -1904,15 +1904,12 @@ static int ag71xx_probe(struct platform_device *pdev) ag->mac_reset = devm_reset_control_get(&pdev->dev, "mac"); if (IS_ERR(ag->mac_reset)) { netif_err(ag, probe, ndev, "missing mac reset\n"); - err = PTR_ERR(ag->mac_reset); - goto err_free; + return PTR_ERR(ag->mac_reset); } ag->mac_base = devm_ioremap(&pdev->dev, res->start, resource_size(res)); - if (!ag->mac_base) { - err = -ENOMEM; - goto err_free; - } + if (!ag->mac_base) + return -ENOMEM; ndev->irq = platform_get_irq(pdev, 0); err = devm_request_irq(&pdev->dev, ndev->irq, ag71xx_interrupt, @@ -1920,7 +1917,7 @@ static int ag71xx_probe(struct platform_device *pdev) if (err) { netif_err(ag, probe, ndev, "unable to request IRQ %d\n", ndev->irq); - goto err_free; + return err; } ndev->netdev_ops = &ag71xx_netdev_ops; @@ -1948,10 +1945,8 @@ static int ag71xx_probe(struct platform_device *pdev) ag->stop_desc = dmam_alloc_coherent(&pdev->dev, sizeof(struct ag71xx_desc), &ag->stop_desc_dma, GFP_KERNEL); - if (!ag->stop_desc) { - err = -ENOMEM; - goto err_free; - } + if (!ag->stop_desc) + return -ENOMEM; ag->stop_desc->data = 0; ag->stop_desc->ctrl = 0; @@ -1968,7 +1963,7 @@ static int ag71xx_probe(struct platform_device *pdev) err = of_get_phy_mode(np, &ag->phy_if_mode); if (err) { netif_err(ag, probe, ndev, "missing phy-mode property in DT\n"); - goto err_free; + return err; } netif_napi_add(ndev, &ag->napi, ag71xx_poll, AG71XX_NAPI_WEIGHT); @@ -1976,7 +1971,7 @@ static int ag71xx_probe(struct platform_device *pdev) err = clk_prepare_enable(ag->clk_eth); if (err) { netif_err(ag, probe, ndev, "Failed to enable eth clk.\n"); - goto err_free; + return err; } ag71xx_wr(ag, AG71XX_REG_MAC_CFG1, 0); @@ -2012,8 +2007,6 @@ err_mdio_remove: ag71xx_mdio_remove(ag); err_put_clk: clk_disable_unprepare(ag->clk_eth); -err_free: - free_netdev(ndev); return err; } diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index 0404aafd5ce56ddc1c1ecac402d24dd2f9c51455..1a703b95208b0c505a7cf59d5ae6a2e609783af7 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -1304,11 +1304,11 @@ static netdev_tx_t bcm_sysport_xmit(struct sk_buff *skb, struct bcm_sysport_priv *priv = netdev_priv(dev); struct device *kdev = &priv->pdev->dev; struct bcm_sysport_tx_ring *ring; + unsigned long flags, desc_flags; struct bcm_sysport_cb *cb; struct netdev_queue *txq; u32 len_status, addr_lo; unsigned int skb_len; - unsigned long flags; dma_addr_t mapping; u16 queue; int ret; @@ -1368,8 +1368,10 @@ static netdev_tx_t bcm_sysport_xmit(struct sk_buff *skb, ring->desc_count--; /* Ports are latched, so write upper address first */ + spin_lock_irqsave(&priv->desc_lock, desc_flags); tdma_writel(priv, len_status, TDMA_WRITE_PORT_HI(ring->index)); tdma_writel(priv, addr_lo, TDMA_WRITE_PORT_LO(ring->index)); + spin_unlock_irqrestore(&priv->desc_lock, desc_flags); /* Check ring space and update SW control flow */ if (ring->desc_count == 0) @@ -2008,6 +2010,7 @@ static int bcm_sysport_open(struct net_device *dev) } /* Initialize both hardware and software ring */ + spin_lock_init(&priv->desc_lock); for (i = 0; i < dev->num_tx_queues; i++) { ret = bcm_sysport_init_tx_ring(priv, i); if (ret) { diff --git a/drivers/net/ethernet/broadcom/bcmsysport.h b/drivers/net/ethernet/broadcom/bcmsysport.h index 3a5cb6f128f5727e8579fb9262b66e97fb611729..1276e330e9d03f03b9cf27c971158a49e3308352 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.h +++ b/drivers/net/ethernet/broadcom/bcmsysport.h @@ -742,6 +742,7 @@ struct bcm_sysport_priv { int wol_irq; /* Transmit rings */ + spinlock_t desc_lock; struct bcm_sysport_tx_ring *tx_rings; /* Receive queue */ diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h index d04994840b87d5cf8f406bdc80556f77c9ebe501..bb3ba614fb17497d62566d41026997db23473a26 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h @@ -1850,6 +1850,14 @@ struct bnx2x { /* Vxlan/Geneve related information */ u16 udp_tunnel_ports[BNX2X_UDP_PORT_MAX]; + +#define FW_CAP_INVALIDATE_VF_FP_HSI BIT(0) + u32 fw_cap; + + u32 fw_major; + u32 fw_minor; + u32 fw_rev; + u32 fw_eng; }; /* Tx queues may be less or equal to Rx queues */ @@ -2526,5 +2534,6 @@ void bnx2x_register_phc(struct bnx2x *bp); * Meant for implicit re-load flows. */ int bnx2x_vlan_reconfigure_vid(struct bnx2x *bp); - +int bnx2x_init_firmware(struct bnx2x *bp); +void bnx2x_release_firmware(struct bnx2x *bp); #endif /* bnx2x.h */ diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index b5d954cb409ae4de9e6e6d70dede933a2154bbfa..41ebbb2c7d3ac3c8d1aa172d9a2a874e6f51b8d7 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -2364,10 +2364,8 @@ int bnx2x_compare_fw_ver(struct bnx2x *bp, u32 load_code, bool print_err) if (load_code != FW_MSG_CODE_DRV_LOAD_COMMON_CHIP && load_code != FW_MSG_CODE_DRV_LOAD_COMMON) { /* build my FW version dword */ - u32 my_fw = (BCM_5710_FW_MAJOR_VERSION) + - (BCM_5710_FW_MINOR_VERSION << 8) + - (BCM_5710_FW_REVISION_VERSION << 16) + - (BCM_5710_FW_ENGINEERING_VERSION << 24); + u32 my_fw = (bp->fw_major) + (bp->fw_minor << 8) + + (bp->fw_rev << 16) + (bp->fw_eng << 24); /* read loaded FW from chip */ u32 loaded_fw = REG_RD(bp, XSEM_REG_PRAM); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h index 3f8435208bf498171a48eb0b4336295b40aec549..a84d015da5dfa6005ce846349915038d5dcd8826 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_fw_defs.h @@ -241,6 +241,8 @@ IRO[221].m2)) #define XSTORM_VF_TO_PF_OFFSET(funcId) \ (IRO[48].base + ((funcId) * IRO[48].m1)) +#define XSTORM_ETH_FUNCTION_INFO_FP_HSI_VALID_E2_OFFSET(fid) \ + (IRO[386].base + ((fid) * IRO[386].m1)) #define COMMON_ASM_INVALID_ASSERT_OPCODE 0x0 /* eth hsi version */ diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h index 622fadc50316ee9c37af97a45e266ab7fb187b63..611efee758340bd9dc5338ce68409bab179bf650 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_hsi.h @@ -3024,7 +3024,8 @@ struct afex_stats { #define BCM_5710_FW_MAJOR_VERSION 7 #define BCM_5710_FW_MINOR_VERSION 13 -#define BCM_5710_FW_REVISION_VERSION 15 +#define BCM_5710_FW_REVISION_VERSION 21 +#define BCM_5710_FW_REVISION_VERSION_V15 15 #define BCM_5710_FW_ENGINEERING_VERSION 0 #define BCM_5710_FW_COMPILE_FLAGS 1 diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_init_ops.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_init_ops.h index 1835d2e451c0139e272e400774b0d6d19487df98..fc7fce642666ceab237461a33bf1ee051eef50b2 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_init_ops.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_init_ops.h @@ -635,11 +635,13 @@ static int bnx2x_ilt_client_mem_op(struct bnx2x *bp, int cli_num, { int i, rc; struct bnx2x_ilt *ilt = BP_ILT(bp); - struct ilt_client_info *ilt_cli = &ilt->clients[cli_num]; + struct ilt_client_info *ilt_cli; if (!ilt || !ilt->lines) return -1; + ilt_cli = &ilt->clients[cli_num]; + if (ilt_cli->flags & (ILT_CLIENT_SKIP_INIT | ILT_CLIENT_SKIP_MEM)) return 0; diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index 28069b29086257e922ef0ebf78cb9f5084ec3759..7fa271db41b0763c719b7ea223c79e166e1d72eb 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -74,9 +74,19 @@ __stringify(BCM_5710_FW_MINOR_VERSION) "." \ __stringify(BCM_5710_FW_REVISION_VERSION) "." \ __stringify(BCM_5710_FW_ENGINEERING_VERSION) + +#define FW_FILE_VERSION_V15 \ + __stringify(BCM_5710_FW_MAJOR_VERSION) "." \ + __stringify(BCM_5710_FW_MINOR_VERSION) "." \ + __stringify(BCM_5710_FW_REVISION_VERSION_V15) "." \ + __stringify(BCM_5710_FW_ENGINEERING_VERSION) + #define FW_FILE_NAME_E1 "bnx2x/bnx2x-e1-" FW_FILE_VERSION ".fw" #define FW_FILE_NAME_E1H "bnx2x/bnx2x-e1h-" FW_FILE_VERSION ".fw" #define FW_FILE_NAME_E2 "bnx2x/bnx2x-e2-" FW_FILE_VERSION ".fw" +#define FW_FILE_NAME_E1_V15 "bnx2x/bnx2x-e1-" FW_FILE_VERSION_V15 ".fw" +#define FW_FILE_NAME_E1H_V15 "bnx2x/bnx2x-e1h-" FW_FILE_VERSION_V15 ".fw" +#define FW_FILE_NAME_E2_V15 "bnx2x/bnx2x-e2-" FW_FILE_VERSION_V15 ".fw" /* Time in jiffies before concluding the transmitter is hung */ #define TX_TIMEOUT (5*HZ) @@ -90,6 +100,9 @@ MODULE_LICENSE("GPL"); MODULE_FIRMWARE(FW_FILE_NAME_E1); MODULE_FIRMWARE(FW_FILE_NAME_E1H); MODULE_FIRMWARE(FW_FILE_NAME_E2); +MODULE_FIRMWARE(FW_FILE_NAME_E1_V15); +MODULE_FIRMWARE(FW_FILE_NAME_E1H_V15); +MODULE_FIRMWARE(FW_FILE_NAME_E2_V15); int bnx2x_num_queues; module_param_named(num_queues, bnx2x_num_queues, int, 0444); @@ -747,9 +760,7 @@ static int bnx2x_mc_assert(struct bnx2x *bp) CHIP_IS_E1(bp) ? "everest1" : CHIP_IS_E1H(bp) ? "everest1h" : CHIP_IS_E2(bp) ? "everest2" : "everest3", - BCM_5710_FW_MAJOR_VERSION, - BCM_5710_FW_MINOR_VERSION, - BCM_5710_FW_REVISION_VERSION); + bp->fw_major, bp->fw_minor, bp->fw_rev); return rc; } @@ -12355,6 +12366,15 @@ static int bnx2x_init_bp(struct bnx2x *bp) bnx2x_read_fwinfo(bp); + if (IS_PF(bp)) { + rc = bnx2x_init_firmware(bp); + + if (rc) { + bnx2x_free_mem_bp(bp); + return rc; + } + } + func = BP_FUNC(bp); /* need to reset chip if undi was active */ @@ -12367,6 +12387,7 @@ static int bnx2x_init_bp(struct bnx2x *bp) rc = bnx2x_prev_unload(bp); if (rc) { + bnx2x_release_firmware(bp); bnx2x_free_mem_bp(bp); return rc; } @@ -13366,16 +13387,11 @@ static int bnx2x_check_firmware(struct bnx2x *bp) /* Check FW version */ offset = be32_to_cpu(fw_hdr->fw_version.offset); fw_ver = firmware->data + offset; - if ((fw_ver[0] != BCM_5710_FW_MAJOR_VERSION) || - (fw_ver[1] != BCM_5710_FW_MINOR_VERSION) || - (fw_ver[2] != BCM_5710_FW_REVISION_VERSION) || - (fw_ver[3] != BCM_5710_FW_ENGINEERING_VERSION)) { + if (fw_ver[0] != bp->fw_major || fw_ver[1] != bp->fw_minor || + fw_ver[2] != bp->fw_rev || fw_ver[3] != bp->fw_eng) { BNX2X_ERR("Bad FW version:%d.%d.%d.%d. Should be %d.%d.%d.%d\n", - fw_ver[0], fw_ver[1], fw_ver[2], fw_ver[3], - BCM_5710_FW_MAJOR_VERSION, - BCM_5710_FW_MINOR_VERSION, - BCM_5710_FW_REVISION_VERSION, - BCM_5710_FW_ENGINEERING_VERSION); + fw_ver[0], fw_ver[1], fw_ver[2], fw_ver[3], + bp->fw_major, bp->fw_minor, bp->fw_rev, bp->fw_eng); return -EINVAL; } @@ -13453,34 +13469,51 @@ do { \ (u8 *)bp->arr, len); \ } while (0) -static int bnx2x_init_firmware(struct bnx2x *bp) +int bnx2x_init_firmware(struct bnx2x *bp) { - const char *fw_file_name; + const char *fw_file_name, *fw_file_name_v15; struct bnx2x_fw_file_hdr *fw_hdr; int rc; if (bp->firmware) return 0; - if (CHIP_IS_E1(bp)) + if (CHIP_IS_E1(bp)) { fw_file_name = FW_FILE_NAME_E1; - else if (CHIP_IS_E1H(bp)) + fw_file_name_v15 = FW_FILE_NAME_E1_V15; + } else if (CHIP_IS_E1H(bp)) { fw_file_name = FW_FILE_NAME_E1H; - else if (!CHIP_IS_E1x(bp)) + fw_file_name_v15 = FW_FILE_NAME_E1H_V15; + } else if (!CHIP_IS_E1x(bp)) { fw_file_name = FW_FILE_NAME_E2; - else { + fw_file_name_v15 = FW_FILE_NAME_E2_V15; + } else { BNX2X_ERR("Unsupported chip revision\n"); return -EINVAL; } + BNX2X_DEV_INFO("Loading %s\n", fw_file_name); rc = request_firmware(&bp->firmware, fw_file_name, &bp->pdev->dev); if (rc) { - BNX2X_ERR("Can't load firmware file %s\n", - fw_file_name); - goto request_firmware_exit; + BNX2X_DEV_INFO("Trying to load older fw %s\n", fw_file_name_v15); + + /* try to load prev version */ + rc = request_firmware(&bp->firmware, fw_file_name_v15, &bp->pdev->dev); + + if (rc) + goto request_firmware_exit; + + bp->fw_rev = BCM_5710_FW_REVISION_VERSION_V15; + } else { + bp->fw_cap |= FW_CAP_INVALIDATE_VF_FP_HSI; + bp->fw_rev = BCM_5710_FW_REVISION_VERSION; } + bp->fw_major = BCM_5710_FW_MAJOR_VERSION; + bp->fw_minor = BCM_5710_FW_MINOR_VERSION; + bp->fw_eng = BCM_5710_FW_ENGINEERING_VERSION; + rc = bnx2x_check_firmware(bp); if (rc) { BNX2X_ERR("Corrupt firmware file %s\n", fw_file_name); @@ -13536,7 +13569,7 @@ request_firmware_exit: return rc; } -static void bnx2x_release_firmware(struct bnx2x *bp) +void bnx2x_release_firmware(struct bnx2x *bp) { kfree(bp->init_ops_offsets); kfree(bp->init_ops); @@ -14053,6 +14086,7 @@ static int bnx2x_init_one(struct pci_dev *pdev, return 0; init_one_freemem: + bnx2x_release_firmware(bp); bnx2x_free_mem_bp(bp); init_one_exit: diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c index 03eb0179ec0085eb07a2b82b1293664e5419279a..08437eaacbb966cf3909fd21d14ae2fe15e11a5e 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c @@ -758,9 +758,18 @@ static void bnx2x_vf_igu_reset(struct bnx2x *bp, struct bnx2x_virtf *vf) void bnx2x_vf_enable_access(struct bnx2x *bp, u8 abs_vfid) { + u16 abs_fid; + + abs_fid = FW_VF_HANDLE(abs_vfid); + /* set the VF-PF association in the FW */ - storm_memset_vf_to_pf(bp, FW_VF_HANDLE(abs_vfid), BP_FUNC(bp)); - storm_memset_func_en(bp, FW_VF_HANDLE(abs_vfid), 1); + storm_memset_vf_to_pf(bp, abs_fid, BP_FUNC(bp)); + storm_memset_func_en(bp, abs_fid, 1); + + /* Invalidate fp_hsi version for vfs */ + if (bp->fw_cap & FW_CAP_INVALIDATE_VF_FP_HSI) + REG_WR8(bp, BAR_XSTRORM_INTMEM + + XSTORM_ETH_FUNCTION_INFO_FP_HSI_VALID_E2_OFFSET(abs_fid), 0); /* clear vf errors*/ bnx2x_vf_semi_clear_err(bp, abs_vfid); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 6f9196ff2ac4f56b0c73303ca618b460160f4447..98087b278d1f4965227f37ad19157c8191fa2e9f 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -1926,6 +1926,9 @@ static int bnxt_get_fecparam(struct net_device *dev, case PORT_PHY_QCFG_RESP_ACTIVE_FEC_FEC_RS272_IEEE_ACTIVE: fec->active_fec |= ETHTOOL_FEC_LLRS; break; + case PORT_PHY_QCFG_RESP_ACTIVE_FEC_FEC_NONE_ACTIVE: + fec->active_fec |= ETHTOOL_FEC_OFF; + break; } return 0; } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c index 2186706cf9130788cc2d6b8430c06df64bd8b6b1..3e9b1f59e381da735888ae7616bbee7208a3a95f 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c @@ -1854,7 +1854,7 @@ static int bnxt_tc_setup_indr_block_cb(enum tc_setup_type type, struct flow_cls_offload *flower = type_data; struct bnxt *bp = priv->bp; - if (flower->common.chain_index) + if (!tc_cls_can_offload_and_chain0(bp->dev, type_data)) return -EOPNOTSUPP; switch (type) { diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index db74241935ab4154bcd2c264f37ea638f60ef978..e19cf020e5ae1af76baa3132b2cd3397a10cddb8 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -3962,10 +3962,12 @@ static int bcmgenet_probe(struct platform_device *pdev) /* Request the WOL interrupt and advertise suspend if available */ priv->wol_irq_disabled = true; - err = devm_request_irq(&pdev->dev, priv->wol_irq, bcmgenet_wol_isr, 0, - dev->name, priv); - if (!err) - device_set_wakeup_capable(&pdev->dev, 1); + if (priv->wol_irq > 0) { + err = devm_request_irq(&pdev->dev, priv->wol_irq, + bcmgenet_wol_isr, 0, dev->name, priv); + if (!err) + device_set_wakeup_capable(&pdev->dev, 1); + } /* Set the needed headroom to account for any possible * features enabling/disabling at runtime diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c index e84ad587fb2141c72408f413724c432d84717786..2c2a56d5a0a1a87e41eec62dd5b1bbd1abbdec37 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c @@ -41,6 +41,13 @@ void bcmgenet_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol) { struct bcmgenet_priv *priv = netdev_priv(dev); + struct device *kdev = &priv->pdev->dev; + + if (!device_can_wakeup(kdev)) { + wol->supported = 0; + wol->wolopts = 0; + return; + } wol->supported = WAKE_MAGIC | WAKE_MAGICSECURE | WAKE_FILTER; wol->wolopts = priv->wolopts; diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 1e8bf6b9834bba6cb0454faa38ff7c5d6db540de..f29ec765d684a653465bab4bbe9b1af6aca6842c 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -1448,7 +1448,14 @@ static int macb_poll(struct napi_struct *napi, int budget) if (work_done < budget) { napi_complete_done(napi, work_done); - /* Packets received while interrupts were disabled */ + /* RSR bits only seem to propagate to raise interrupts when + * interrupts are enabled at the time, so if bits are already + * set due to packets received while interrupts were disabled, + * they will not cause another interrupt to be generated when + * interrupts are re-enabled. + * Check for this case here. This has been seen to happen + * around 30% of the time under heavy network load. + */ status = macb_readl(bp, RSR); if (status) { if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE) @@ -1456,6 +1463,22 @@ static int macb_poll(struct napi_struct *napi, int budget) napi_reschedule(napi); } else { queue_writel(queue, IER, bp->rx_intr_mask); + + /* In rare cases, packets could have been received in + * the window between the check above and re-enabling + * interrupts. Therefore, a double-check is required + * to avoid losing a wakeup. This can potentially race + * with the interrupt handler doing the same actions + * if an interrupt is raised just after enabling them, + * but this should be harmless. + */ + status = macb_readl(bp, RSR); + if (unlikely(status)) { + queue_writel(queue, IDR, bp->rx_intr_mask); + if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE) + queue_writel(queue, ISR, MACB_BIT(RCOMP)); + napi_schedule(napi); + } } } @@ -4534,7 +4557,7 @@ static int macb_probe(struct platform_device *pdev) #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT if (GEM_BFEXT(DAW64, gem_readl(bp, DCFG6))) { - dma_set_mask(&pdev->dev, DMA_BIT_MASK(44)); + dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(44)); bp->hw_dma_cap |= HW_DMA_CAP_64B; } #endif diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c index 9361f964bb9b2cbce17fca17d2c757dc5d887056..816453a4f8d6c26d803e28067d788768371179b5 100644 --- a/drivers/net/ethernet/cavium/thunder/nic_main.c +++ b/drivers/net/ethernet/cavium/thunder/nic_main.c @@ -1193,7 +1193,7 @@ static int nic_register_interrupts(struct nicpf *nic) dev_err(&nic->pdev->dev, "Request for #%d msix vectors failed, returned %d\n", nic->num_vec, ret); - return 1; + return ret; } /* Register mailbox interrupt handler */ diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index f3b7b443f9648ef3279b6a39010b0337962203d2..c00f1a7ffc15f63ade18e1eec159f805a9e08e07 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -1226,7 +1226,7 @@ static int nicvf_register_misc_interrupt(struct nicvf *nic) if (ret < 0) { netdev_err(nic->netdev, "Req for #%d msix vectors failed\n", nic->num_vec); - return 1; + return ret; } sprintf(nic->irq_name[irq], "%s Mbox", "NICVF"); @@ -1245,7 +1245,7 @@ static int nicvf_register_misc_interrupt(struct nicvf *nic) if (!nicvf_check_pf_ready(nic)) { nicvf_disable_intr(nic, NICVF_INTR_MBOX, 0); nicvf_unregister_interrupts(nic); - return 1; + return -EIO; } return 0; diff --git a/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c b/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c index 7ff31d1026fb27a4edc8af073b36927c675c8fe7..e0d34e64fc6cb72d396a6505c5c4028b2cf81a19 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb3/t3_hw.c @@ -3678,6 +3678,8 @@ int t3_prep_adapter(struct adapter *adapter, const struct adapter_info *ai, MAC_STATS_ACCUM_SECS : (MAC_STATS_ACCUM_SECS * 10); adapter->params.pci.vpd_cap_addr = pci_find_capability(adapter->pdev, PCI_CAP_ID_VPD); + if (!adapter->params.pci.vpd_cap_addr) + return -ENODEV; ret = get_vpd_params(adapter, &adapter->params.vpd); if (ret < 0) return ret; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c index 83ed10ac86606c65a47584d1267fc237fc6a58c2..7080cb6c83e4accdec583edc209b615683362029 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c @@ -2011,12 +2011,15 @@ static int cxgb4_get_module_info(struct net_device *dev, if (ret) return ret; - if (!sff8472_comp || (sff_diag_type & 4)) { + if (!sff8472_comp || (sff_diag_type & SFP_DIAG_ADDRMODE)) { modinfo->type = ETH_MODULE_SFF_8079; modinfo->eeprom_len = ETH_MODULE_SFF_8079_LEN; } else { modinfo->type = ETH_MODULE_SFF_8472; - modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN; + if (sff_diag_type & SFP_DIAG_IMPLEMENTED) + modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN; + else + modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN / 2; } break; diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h index 002fc62ea7262cc102942c1dff933934b889b9b1..63bc956d20376baf860ce0d476af1edae80f16ab 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.h @@ -293,6 +293,8 @@ enum { #define I2C_PAGE_SIZE 0x100 #define SFP_DIAG_TYPE_ADDR 0x5c #define SFP_DIAG_TYPE_LEN 0x1 +#define SFP_DIAG_ADDRMODE BIT(2) +#define SFP_DIAG_IMPLEMENTED BIT(6) #define SFF_8472_COMP_ADDR 0x5e #define SFF_8472_COMP_LEN 0x1 #define SFF_REV_ADDR 0x1 diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c index a262c949ed76bed477965d54d900665adf590a6f..d6b6ebb3f1ec744e2edaa28e7965a4b516abd278 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c @@ -870,7 +870,7 @@ static void do_abort_syn_rcv(struct sock *child, struct sock *parent) * created only after 3 way handshake is done. */ sock_orphan(child); - percpu_counter_inc((child)->sk_prot->orphan_count); + INC_ORPHAN_COUNT(child); chtls_release_resources(child); chtls_conn_done(child); } else { diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.h b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.h index b1161bdeda4dc7ef349f353be316a964a54330bd..f61ca657601caeb3c0e5533e4fcf5819988cc3f7 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.h +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.h @@ -95,7 +95,7 @@ struct deferred_skb_cb { #define WSCALE_OK(tp) ((tp)->rx_opt.wscale_ok) #define TSTAMP_OK(tp) ((tp)->rx_opt.tstamp_ok) #define SACK_OK(tp) ((tp)->rx_opt.sack_ok) -#define INC_ORPHAN_COUNT(sk) percpu_counter_inc((sk)->sk_prot->orphan_count) +#define INC_ORPHAN_COUNT(sk) this_cpu_inc(*(sk)->sk_prot->orphan_count) /* TLS SKB */ #define skb_ulp_tls_inline(skb) (ULP_SKB_CB(skb)->ulp.tls.ofld) diff --git a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c index d04a6c1634452034217e9dc4ab8771bf02899b86..da8d10475a08e6e23c466b6f62637f7c840e86bd 100644 --- a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c +++ b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c @@ -32,6 +32,7 @@ #include #include +#include #include #include @@ -99,7 +100,7 @@ cxgb_find_route(struct cxgb4_lld_info *lldi, rt = ip_route_output_ports(&init_net, &fl4, NULL, peer_ip, local_ip, peer_port, local_port, IPPROTO_TCP, - tos, 0); + tos & ~INET_ECN_MASK, 0); if (IS_ERR(rt)) return NULL; n = dst_neigh_lookup(&rt->dst, &peer_ip); diff --git a/drivers/net/ethernet/cortina/gemini.c b/drivers/net/ethernet/cortina/gemini.c index 8df6f081f2447fbb3cdb8ddd88c14db01bb3c9bb..d11fcfd927c0b5633928e5c5f1babe352253a244 100644 --- a/drivers/net/ethernet/cortina/gemini.c +++ b/drivers/net/ethernet/cortina/gemini.c @@ -305,21 +305,21 @@ static void gmac_speed_set(struct net_device *netdev) switch (phydev->speed) { case 1000: status.bits.speed = GMAC_SPEED_1000; - if (phydev->interface == PHY_INTERFACE_MODE_RGMII) + if (phy_interface_mode_is_rgmii(phydev->interface)) status.bits.mii_rmii = GMAC_PHY_RGMII_1000; netdev_dbg(netdev, "connect %s to RGMII @ 1Gbit\n", phydev_name(phydev)); break; case 100: status.bits.speed = GMAC_SPEED_100; - if (phydev->interface == PHY_INTERFACE_MODE_RGMII) + if (phy_interface_mode_is_rgmii(phydev->interface)) status.bits.mii_rmii = GMAC_PHY_RGMII_100_10; netdev_dbg(netdev, "connect %s to RGMII @ 100 Mbit\n", phydev_name(phydev)); break; case 10: status.bits.speed = GMAC_SPEED_10; - if (phydev->interface == PHY_INTERFACE_MODE_RGMII) + if (phy_interface_mode_is_rgmii(phydev->interface)) status.bits.mii_rmii = GMAC_PHY_RGMII_100_10; netdev_dbg(netdev, "connect %s to RGMII @ 10 Mbit\n", phydev_name(phydev)); @@ -389,6 +389,9 @@ static int gmac_setup_phy(struct net_device *netdev) status.bits.mii_rmii = GMAC_PHY_GMII; break; case PHY_INTERFACE_MODE_RGMII: + case PHY_INTERFACE_MODE_RGMII_ID: + case PHY_INTERFACE_MODE_RGMII_TXID: + case PHY_INTERFACE_MODE_RGMII_RXID: netdev_dbg(netdev, "RGMII: set GMAC0 and GMAC1 to MII/RGMII mode\n"); status.bits.mii_rmii = GMAC_PHY_RGMII_100_10; diff --git a/drivers/net/ethernet/dec/tulip/de4x5.c b/drivers/net/ethernet/dec/tulip/de4x5.c index 683e328b5461d839584c4d6e5d84dd507b0415ab..8edd394bc3358396f6786bfb20a7ef87a143aad3 100644 --- a/drivers/net/ethernet/dec/tulip/de4x5.c +++ b/drivers/net/ethernet/dec/tulip/de4x5.c @@ -4706,6 +4706,10 @@ type3_infoblock(struct net_device *dev, u_char count, u_char *p) lp->ibn = 3; lp->active = *p++; if (MOTO_SROM_BUG) lp->active = 0; + /* if (MOTO_SROM_BUG) statement indicates lp->active could + * be 8 (i.e. the size of array lp->phy) */ + if (WARN_ON(lp->active >= ARRAY_SIZE(lp->phy))) + return -EINVAL; lp->phy[lp->active].gep = (*p ? p : NULL); p += (2 * (*p) + 1); lp->phy[lp->active].rst = (*p ? p : NULL); p += (2 * (*p) + 1); lp->phy[lp->active].mc = get_unaligned_le16(p); p += 2; @@ -4997,19 +5001,23 @@ mii_get_phy(struct net_device *dev) } if ((j == limit) && (i < DE4X5_MAX_MII)) { for (k=0; k < DE4X5_MAX_PHY && lp->phy[k].id; k++); - lp->phy[k].addr = i; - lp->phy[k].id = id; - lp->phy[k].spd.reg = GENERIC_REG; /* ANLPA register */ - lp->phy[k].spd.mask = GENERIC_MASK; /* 100Mb/s technologies */ - lp->phy[k].spd.value = GENERIC_VALUE; /* TX & T4, H/F Duplex */ - lp->mii_cnt++; - lp->active++; - printk("%s: Using generic MII device control. If the board doesn't operate,\nplease mail the following dump to the author:\n", dev->name); - j = de4x5_debug; - de4x5_debug |= DEBUG_MII; - de4x5_dbg_mii(dev, k); - de4x5_debug = j; - printk("\n"); + if (k < DE4X5_MAX_PHY) { + lp->phy[k].addr = i; + lp->phy[k].id = id; + lp->phy[k].spd.reg = GENERIC_REG; /* ANLPA register */ + lp->phy[k].spd.mask = GENERIC_MASK; /* 100Mb/s technologies */ + lp->phy[k].spd.value = GENERIC_VALUE; /* TX & T4, H/F Duplex */ + lp->mii_cnt++; + lp->active++; + printk("%s: Using generic MII device control. If the board doesn't operate,\nplease mail the following dump to the author:\n", dev->name); + j = de4x5_debug; + de4x5_debug |= DEBUG_MII; + de4x5_dbg_mii(dev, k); + de4x5_debug = j; + printk("\n"); + } else { + goto purgatory; + } } } purgatory: diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c index f91c67489e62978ee026639c25bb313780d155d5..d89ddc165ec24f757aca4be47e8bfcfdd2803b57 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c @@ -4225,7 +4225,7 @@ static int dpaa2_eth_probe(struct fsl_mc_device *dpni_dev) } INIT_WORK(&priv->tx_onestep_tstamp, dpaa2_eth_tx_onestep_tstamp); - + mutex_init(&priv->onestep_tstamp_lock); skb_queue_head_init(&priv->tx_skbs); /* Obtain a MC portal */ @@ -4405,12 +4405,12 @@ static int dpaa2_eth_remove(struct fsl_mc_device *ls_dev) #ifdef CONFIG_DEBUG_FS dpaa2_dbg_remove(priv); #endif + + unregister_netdev(net_dev); rtnl_lock(); dpaa2_eth_disconnect_mac(priv); rtnl_unlock(); - unregister_netdev(net_dev); - dpaa2_eth_dl_port_del(priv); dpaa2_eth_dl_traps_unregister(priv); dpaa2_eth_dl_unregister(priv); @@ -4432,10 +4432,12 @@ static int dpaa2_eth_remove(struct fsl_mc_device *ls_dev) fsl_mc_portal_free(priv->mc_io); - free_netdev(net_dev); + destroy_workqueue(priv->dpaa2_ptp_wq); dev_dbg(net_dev->dev.parent, "Removed interface %s\n", net_dev->name); + free_netdev(net_dev); + return 0; } diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c index 89e558135432b39a39efaddf532ef20314b745e8..9c1690f64a027b8de3c2e3344de0c53d3b758624 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c @@ -157,7 +157,7 @@ static const struct { { ENETC_PM0_TFRM, "MAC tx frames" }, { ENETC_PM0_TFCS, "MAC tx fcs errors" }, { ENETC_PM0_TVLAN, "MAC tx VLAN frames" }, - { ENETC_PM0_TERR, "MAC tx frames" }, + { ENETC_PM0_TERR, "MAC tx frame errors" }, { ENETC_PM0_TUCA, "MAC tx unicast frames" }, { ENETC_PM0_TMCA, "MAC tx multicast frames" }, { ENETC_PM0_TBCA, "MAC tx broadcast frames" }, diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c index 68133563a40c129c7e07ba7c3d4ff418698bfd4f..716b396bf09470be9bf5f8a59cc2942284a75a57 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c @@ -504,8 +504,7 @@ static void enetc_mac_config(struct enetc_hw *hw, phy_interface_t phy_mode) if (phy_interface_mode_is_rgmii(phy_mode)) { val = enetc_port_rd(hw, ENETC_PM0_IF_MODE); - val &= ~ENETC_PM0_IFM_EN_AUTO; - val &= ENETC_PM0_IFM_IFMODE_MASK; + val &= ~(ENETC_PM0_IFM_EN_AUTO | ENETC_PM0_IFM_IFMODE_MASK); val |= ENETC_PM0_IFM_IFMODE_GMII | ENETC_PM0_IFM_RG; enetc_port_wr(hw, ENETC_PM0_IF_MODE, val); } diff --git a/drivers/net/ethernet/freescale/enetc/enetc_qos.c b/drivers/net/ethernet/freescale/enetc/enetc_qos.c index dbceb99c4441adbccafba6a6756b63c6b4c4aeb9..9e6988fd3787a691ab20395f632cc896d9e5bbd3 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_qos.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_qos.c @@ -486,14 +486,16 @@ static int enetc_streamid_hw_set(struct enetc_ndev_priv *priv, data_size = sizeof(struct streamid_data); si_data = kzalloc(data_size, __GFP_DMA | GFP_KERNEL); + if (!si_data) + return -ENOMEM; cbd.length = cpu_to_le16(data_size); dma = dma_map_single(&priv->si->pdev->dev, si_data, data_size, DMA_FROM_DEVICE); if (dma_mapping_error(&priv->si->pdev->dev, dma)) { netdev_err(priv->si->ndev, "DMA mapping failed!\n"); - kfree(si_data); - return -ENOMEM; + err = -ENOMEM; + goto out; } cbd.addr[0] = lower_32_bits(dma); @@ -513,12 +515,10 @@ static int enetc_streamid_hw_set(struct enetc_ndev_priv *priv, err = enetc_send_cmd(priv->si, &cbd); if (err) - return -EINVAL; + goto out; - if (!enable) { - kfree(si_data); - return 0; - } + if (!enable) + goto out; /* Enable the entry overwrite again incase space flushed by hardware */ memset(&cbd, 0, sizeof(cbd)); @@ -563,6 +563,10 @@ static int enetc_streamid_hw_set(struct enetc_ndev_priv *priv, } err = enetc_send_cmd(priv->si, &cbd); +out: + if (!dma_mapping_error(&priv->si->pdev->dev, dma)) + dma_unmap_single(&priv->si->pdev->dev, dma, data_size, DMA_FROM_DEVICE); + kfree(si_data); return err; diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h index c527f4ee1d3aed313124d2befbb6a536801e5594..6ea98af63b34124816355f3f525621ab39cb5a29 100644 --- a/drivers/net/ethernet/freescale/fec.h +++ b/drivers/net/ethernet/freescale/fec.h @@ -373,6 +373,9 @@ struct bufdesc_ex { #define FEC_ENET_WAKEUP ((uint)0x00020000) /* Wakeup request */ #define FEC_ENET_TXF (FEC_ENET_TXF_0 | FEC_ENET_TXF_1 | FEC_ENET_TXF_2) #define FEC_ENET_RXF (FEC_ENET_RXF_0 | FEC_ENET_RXF_1 | FEC_ENET_RXF_2) +#define FEC_ENET_RXF_GET(X) (((X) == 0) ? FEC_ENET_RXF_0 : \ + (((X) == 1) ? FEC_ENET_RXF_1 : \ + FEC_ENET_RXF_2)) #define FEC_ENET_TS_AVAIL ((uint)0x00010000) #define FEC_ENET_TS_TIMER ((uint)0x00008000) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 94eb838a017605876d0ade0f73ff94c2b7067855..166bc3f3b34cce3226698777272a1e9880e17e87 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -1439,7 +1439,7 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id) break; pkt_received++; - writel(FEC_ENET_RXF, fep->hwp + FEC_IEVENT); + writel(FEC_ENET_RXF_GET(queue_id), fep->hwp + FEC_IEVENT); /* Check for errors. */ status ^= BD_ENET_RX_LAST; diff --git a/drivers/net/ethernet/freescale/fman/fman_port.c b/drivers/net/ethernet/freescale/fman/fman_port.c index d9baac0dbc7d05ff0a2ea171012093693855f153..4c9d05c45c033b70712b373f3888a288d71d35fc 100644 --- a/drivers/net/ethernet/freescale/fman/fman_port.c +++ b/drivers/net/ethernet/freescale/fman/fman_port.c @@ -1805,7 +1805,7 @@ static int fman_port_probe(struct platform_device *of_dev) fman = dev_get_drvdata(&fm_pdev->dev); if (!fman) { err = -EINVAL; - goto return_err; + goto put_device; } err = of_property_read_u32(port_node, "cell-index", &val); @@ -1813,7 +1813,7 @@ static int fman_port_probe(struct platform_device *of_dev) dev_err(port->dev, "%s: reading cell-index for %pOF failed\n", __func__, port_node); err = -EINVAL; - goto return_err; + goto put_device; } port_id = (u8)val; port->dts_params.id = port_id; @@ -1847,7 +1847,7 @@ static int fman_port_probe(struct platform_device *of_dev) } else { dev_err(port->dev, "%s: Illegal port type\n", __func__); err = -EINVAL; - goto return_err; + goto put_device; } port->dts_params.type = port_type; @@ -1861,7 +1861,7 @@ static int fman_port_probe(struct platform_device *of_dev) dev_err(port->dev, "%s: incorrect qman-channel-id\n", __func__); err = -EINVAL; - goto return_err; + goto put_device; } port->dts_params.qman_channel_id = qman_channel_id; } @@ -1871,7 +1871,7 @@ static int fman_port_probe(struct platform_device *of_dev) dev_err(port->dev, "%s: of_address_to_resource() failed\n", __func__); err = -ENOMEM; - goto return_err; + goto put_device; } port->dts_params.fman = fman; @@ -1896,6 +1896,8 @@ static int fman_port_probe(struct platform_device *of_dev) return 0; +put_device: + put_device(&fm_pdev->dev); return_err: of_node_put(port_node); free_port: diff --git a/drivers/net/ethernet/freescale/fman/mac.c b/drivers/net/ethernet/freescale/fman/mac.c index 901749a7a318b7e001c7b31c9c6e41707588bb69..6eeccc11b76efc4381aa38b83461e5410ff957c9 100644 --- a/drivers/net/ethernet/freescale/fman/mac.c +++ b/drivers/net/ethernet/freescale/fman/mac.c @@ -94,14 +94,17 @@ static void mac_exception(void *handle, enum fman_mac_exceptions ex) __func__, ex); } -static void set_fman_mac_params(struct mac_device *mac_dev, - struct fman_mac_params *params) +static int set_fman_mac_params(struct mac_device *mac_dev, + struct fman_mac_params *params) { struct mac_priv_s *priv = mac_dev->priv; params->base_addr = (typeof(params->base_addr)) devm_ioremap(priv->dev, mac_dev->res->start, resource_size(mac_dev->res)); + if (!params->base_addr) + return -ENOMEM; + memcpy(¶ms->addr, mac_dev->addr, sizeof(mac_dev->addr)); params->max_speed = priv->max_speed; params->phy_if = mac_dev->phy_if; @@ -112,6 +115,8 @@ static void set_fman_mac_params(struct mac_device *mac_dev, params->event_cb = mac_exception; params->dev_id = mac_dev; params->internal_phy_node = priv->internal_phy_node; + + return 0; } static int tgec_initialization(struct mac_device *mac_dev) @@ -123,7 +128,9 @@ static int tgec_initialization(struct mac_device *mac_dev) priv = mac_dev->priv; - set_fman_mac_params(mac_dev, ¶ms); + err = set_fman_mac_params(mac_dev, ¶ms); + if (err) + goto _return; mac_dev->fman_mac = tgec_config(¶ms); if (!mac_dev->fman_mac) { @@ -169,7 +176,9 @@ static int dtsec_initialization(struct mac_device *mac_dev) priv = mac_dev->priv; - set_fman_mac_params(mac_dev, ¶ms); + err = set_fman_mac_params(mac_dev, ¶ms); + if (err) + goto _return; mac_dev->fman_mac = dtsec_config(¶ms); if (!mac_dev->fman_mac) { @@ -218,7 +227,9 @@ static int memac_initialization(struct mac_device *mac_dev) priv = mac_dev->priv; - set_fman_mac_params(mac_dev, ¶ms); + err = set_fman_mac_params(mac_dev, ¶ms); + if (err) + goto _return; if (priv->max_speed == SPEED_10000) params.phy_if = PHY_INTERFACE_MODE_XGMII; diff --git a/drivers/net/ethernet/freescale/gianfar_ethtool.c b/drivers/net/ethernet/freescale/gianfar_ethtool.c index cc7d4f93da5408376fb60758a38832b9e6ac531b..799a1486f586dbcbf078b1776ae9eadb59409fe6 100644 --- a/drivers/net/ethernet/freescale/gianfar_ethtool.c +++ b/drivers/net/ethernet/freescale/gianfar_ethtool.c @@ -1456,6 +1456,7 @@ static int gfar_get_ts_info(struct net_device *dev, ptp_node = of_find_compatible_node(NULL, NULL, "fsl,etsec-ptp"); if (ptp_node) { ptp_dev = of_find_device_by_node(ptp_node); + of_node_put(ptp_node); if (ptp_dev) ptp = platform_get_drvdata(ptp_dev); } diff --git a/drivers/net/ethernet/freescale/xgmac_mdio.c b/drivers/net/ethernet/freescale/xgmac_mdio.c index bfa2826c55454db2586f8dfde2799989db243a46..b7984a772e12d61916d0a8d4a8513631e82a2c5d 100644 --- a/drivers/net/ethernet/freescale/xgmac_mdio.c +++ b/drivers/net/ethernet/freescale/xgmac_mdio.c @@ -49,6 +49,7 @@ struct tgec_mdio_controller { struct mdio_fsl_priv { struct tgec_mdio_controller __iomem *mdio_base; bool is_little_endian; + bool has_a009885; bool has_a011043; }; @@ -184,10 +185,10 @@ static int xgmac_mdio_read(struct mii_bus *bus, int phy_id, int regnum) { struct mdio_fsl_priv *priv = (struct mdio_fsl_priv *)bus->priv; struct tgec_mdio_controller __iomem *regs = priv->mdio_base; + unsigned long flags; uint16_t dev_addr; uint32_t mdio_stat; uint32_t mdio_ctl; - uint16_t value; int ret; bool endian = priv->is_little_endian; @@ -219,12 +220,18 @@ static int xgmac_mdio_read(struct mii_bus *bus, int phy_id, int regnum) return ret; } + if (priv->has_a009885) + /* Once the operation completes, i.e. MDIO_STAT_BSY clears, we + * must read back the data register within 16 MDC cycles. + */ + local_irq_save(flags); + /* Initiate the read */ xgmac_write32(mdio_ctl | MDIO_CTL_READ, ®s->mdio_ctl, endian); ret = xgmac_wait_until_done(&bus->dev, regs, endian); if (ret) - return ret; + goto irq_restore; /* Return all Fs if nothing was there */ if ((xgmac_read32(®s->mdio_stat, endian) & MDIO_STAT_RD_ER) && @@ -232,13 +239,17 @@ static int xgmac_mdio_read(struct mii_bus *bus, int phy_id, int regnum) dev_dbg(&bus->dev, "Error while reading PHY%d reg at %d.%hhu\n", phy_id, dev_addr, regnum); - return 0xffff; + ret = 0xffff; + } else { + ret = xgmac_read32(®s->mdio_data, endian) & 0xffff; + dev_dbg(&bus->dev, "read %04x\n", ret); } - value = xgmac_read32(®s->mdio_data, endian) & 0xffff; - dev_dbg(&bus->dev, "read %04x\n", value); +irq_restore: + if (priv->has_a009885) + local_irq_restore(flags); - return value; + return ret; } static int xgmac_mdio_probe(struct platform_device *pdev) @@ -282,6 +293,8 @@ static int xgmac_mdio_probe(struct platform_device *pdev) priv->is_little_endian = device_property_read_bool(&pdev->dev, "little-endian"); + priv->has_a009885 = device_property_read_bool(&pdev->dev, + "fsl,erratum-a009885"); priv->has_a011043 = device_property_read_bool(&pdev->dev, "fsl,erratum-a011043"); @@ -307,9 +320,10 @@ err_ioremap: static int xgmac_mdio_remove(struct platform_device *pdev) { struct mii_bus *bus = platform_get_drvdata(pdev); + struct mdio_fsl_priv *priv = bus->priv; mdiobus_unregister(bus); - iounmap(bus->priv); + iounmap(priv->mdio_base); mdiobus_free(bus); return 0; diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h index f5c80229ea9668100dd29a156421140bb4f31a4f..5c9a4d4362c7bde660d758924b22898b86b6dc74 100644 --- a/drivers/net/ethernet/google/gve/gve.h +++ b/drivers/net/ethernet/google/gve/gve.h @@ -28,7 +28,7 @@ #define GVE_MIN_MSIX 3 /* Numbers of gve tx/rx stats in stats report. */ -#define GVE_TX_STATS_REPORT_NUM 5 +#define GVE_TX_STATS_REPORT_NUM 6 #define GVE_RX_STATS_REPORT_NUM 2 /* Interval to schedule a stats report update, 20000ms. */ @@ -147,7 +147,9 @@ struct gve_tx_ring { u32 q_num ____cacheline_aligned; /* queue idx */ u32 stop_queue; /* count of queue stops */ u32 wake_queue; /* count of queue wakes */ + u32 queue_timeout; /* count of queue timeouts */ u32 ntfy_id; /* notification block index */ + u32 last_kick_msec; /* Last time the queue was kicked */ dma_addr_t bus; /* dma address of the descr ring */ dma_addr_t q_resources_bus; /* dma address of the queue resources */ struct u64_stats_sync statss; /* sync stats for 32bit archs */ @@ -472,7 +474,7 @@ struct gve_queue_page_list *gve_assign_rx_qpl(struct gve_priv *priv) gve_num_tx_qpls(priv)); /* we are out of rx qpls */ - if (id == priv->qpl_cfg.qpl_map_size) + if (id == gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv)) return NULL; set_bit(id, priv->qpl_cfg.qpl_id_map); diff --git a/drivers/net/ethernet/google/gve/gve_adminq.c b/drivers/net/ethernet/google/gve/gve_adminq.c index 6009d76e41fc4f26e6b3481aa3d533d76441de0e..67f2b9a61463ad38ef806f406b7bbd9fed6bb596 100644 --- a/drivers/net/ethernet/google/gve/gve_adminq.c +++ b/drivers/net/ethernet/google/gve/gve_adminq.c @@ -141,7 +141,7 @@ static int gve_adminq_parse_err(struct gve_priv *priv, u32 status) */ static int gve_adminq_kick_and_wait(struct gve_priv *priv) { - u32 tail, head; + int tail, head; int i; tail = ioread32be(&priv->reg_bar0->adminq_event_counter); diff --git a/drivers/net/ethernet/google/gve/gve_adminq.h b/drivers/net/ethernet/google/gve/gve_adminq.h index 015796a20118bcdf2c1b095d5f95c59d2300233e..8dbc2c03fbbddd49caab73db4e041490262dea69 100644 --- a/drivers/net/ethernet/google/gve/gve_adminq.h +++ b/drivers/net/ethernet/google/gve/gve_adminq.h @@ -212,6 +212,7 @@ enum gve_stat_names { TX_LAST_COMPLETION_PROCESSED = 5, RX_NEXT_EXPECTED_SEQUENCE = 6, RX_BUFFERS_POSTED = 7, + TX_TIMEOUT_CNT = 8, // stats from NIC RX_QUEUE_DROP_CNT = 65, RX_NO_BUFFERS_POSTED = 66, diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index 0b714b606ba191582d778d2bd3031c9af5331b97..6cb75bb1ed052fb73bdff058e9909bc2c81eee06 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -23,6 +23,9 @@ #define GVE_VERSION "1.0.0" #define GVE_VERSION_PREFIX "GVE-" +// Minimum amount of time between queue kicks in msec (10 seconds) +#define MIN_TX_TIMEOUT_GAP (1000 * 10) + const char gve_version_str[] = GVE_VERSION; static const char gve_version_prefix[] = GVE_VERSION_PREFIX; @@ -30,6 +33,7 @@ static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s) { struct gve_priv *priv = netdev_priv(dev); unsigned int start; + u64 packets, bytes; int ring; if (priv->rx) { @@ -37,10 +41,12 @@ static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s) do { start = u64_stats_fetch_begin(&priv->rx[ring].statss); - s->rx_packets += priv->rx[ring].rpackets; - s->rx_bytes += priv->rx[ring].rbytes; + packets = priv->rx[ring].rpackets; + bytes = priv->rx[ring].rbytes; } while (u64_stats_fetch_retry(&priv->rx[ring].statss, start)); + s->rx_packets += packets; + s->rx_bytes += bytes; } } if (priv->tx) { @@ -48,10 +54,12 @@ static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s) do { start = u64_stats_fetch_begin(&priv->tx[ring].statss); - s->tx_packets += priv->tx[ring].pkt_done; - s->tx_bytes += priv->tx[ring].bytes_done; + packets = priv->tx[ring].pkt_done; + bytes = priv->tx[ring].bytes_done; } while (u64_stats_fetch_retry(&priv->tx[ring].statss, start)); + s->tx_packets += packets; + s->tx_bytes += bytes; } } } @@ -71,6 +79,9 @@ static int gve_alloc_counter_array(struct gve_priv *priv) static void gve_free_counter_array(struct gve_priv *priv) { + if (!priv->counter_array) + return; + dma_free_coherent(&priv->pdev->dev, priv->num_event_counters * sizeof(*priv->counter_array), @@ -131,6 +142,9 @@ static int gve_alloc_stats_report(struct gve_priv *priv) static void gve_free_stats_report(struct gve_priv *priv) { + if (!priv->stats_report) + return; + del_timer_sync(&priv->stats_report_timer); dma_free_coherent(&priv->pdev->dev, priv->stats_report_len, priv->stats_report, priv->stats_report_bus); @@ -301,18 +315,19 @@ static void gve_free_notify_blocks(struct gve_priv *priv) { int i; - if (priv->msix_vectors) { - /* Free the irqs */ - for (i = 0; i < priv->num_ntfy_blks; i++) { - struct gve_notify_block *block = &priv->ntfy_blocks[i]; - int msix_idx = i; + if (!priv->msix_vectors) + return; - irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, - NULL); - free_irq(priv->msix_vectors[msix_idx].vector, block); - } - free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv); + /* Free the irqs */ + for (i = 0; i < priv->num_ntfy_blks; i++) { + struct gve_notify_block *block = &priv->ntfy_blocks[i]; + int msix_idx = i; + + irq_set_affinity_hint(priv->msix_vectors[msix_idx].vector, + NULL); + free_irq(priv->msix_vectors[msix_idx].vector, block); } + free_irq(priv->msix_vectors[priv->mgmt_msix_idx].vector, priv); dma_free_coherent(&priv->pdev->dev, priv->num_ntfy_blks * sizeof(*priv->ntfy_blocks), priv->ntfy_blocks, priv->ntfy_block_bus); @@ -931,9 +946,47 @@ static void gve_turnup(struct gve_priv *priv) static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue) { - struct gve_priv *priv = netdev_priv(dev); + struct gve_notify_block *block; + struct gve_tx_ring *tx = NULL; + struct gve_priv *priv; + u32 last_nic_done; + u32 current_time; + u32 ntfy_idx; + + netdev_info(dev, "Timeout on tx queue, %d", txqueue); + priv = netdev_priv(dev); + if (txqueue > priv->tx_cfg.num_queues) + goto reset; + + ntfy_idx = gve_tx_idx_to_ntfy(priv, txqueue); + if (ntfy_idx >= priv->num_ntfy_blks) + goto reset; + + block = &priv->ntfy_blocks[ntfy_idx]; + tx = block->tx; + + current_time = jiffies_to_msecs(jiffies); + if (tx->last_kick_msec + MIN_TX_TIMEOUT_GAP > current_time) + goto reset; + /* Check to see if there are missed completions, which will allow us to + * kick the queue. + */ + last_nic_done = gve_tx_load_event_counter(priv, tx); + if (last_nic_done - tx->done) { + netdev_info(dev, "Kicking queue %d", txqueue); + iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block)); + napi_schedule(&block->napi); + tx->last_kick_msec = current_time; + goto out; + } // Else reset. + +reset: gve_schedule_reset(priv); + +out: + if (tx) + tx->queue_timeout++; priv->tx_timeo_cnt++; } @@ -975,9 +1028,10 @@ static void gve_handle_reset(struct gve_priv *priv) void gve_handle_report_stats(struct gve_priv *priv) { - int idx, stats_idx = 0, tx_bytes; - unsigned int start = 0; struct stats *stats = priv->stats_report->stats; + int idx, stats_idx = 0; + unsigned int start = 0; + u64 tx_bytes; if (!gve_get_report_stats(priv)) return; @@ -1015,6 +1069,11 @@ void gve_handle_report_stats(struct gve_priv *priv) .value = cpu_to_be64(priv->tx[idx].done), .queue_id = cpu_to_be32(idx), }; + stats[stats_idx++] = (struct stats) { + .stat_name = cpu_to_be32(TX_TIMEOUT_CNT), + .value = cpu_to_be64(priv->tx[idx].queue_timeout), + .queue_id = cpu_to_be32(idx), + }; } } /* rx stats */ diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c index a9aca8c24e90d62cfe3b97b38fefd1cec25fb803..aa87e4d1215325d24ffabf32fdcf26574754f9b9 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c @@ -400,6 +400,10 @@ static void hns_dsaf_ge_srst_by_port(struct dsaf_device *dsaf_dev, u32 port, return; if (!HNS_DSAF_IS_DEBUG(dsaf_dev)) { + /* DSAF_MAX_PORT_NUM is 6, but DSAF_GE_NUM is 8. + We need check to prevent array overflow */ + if (port >= DSAF_MAX_PORT_NUM) + return; reg_val_1 = 0x1 << port; port_rst_off = dsaf_dev->mac_cb[port]->port_rst_off; /* there is difference between V1 and V2 in register.*/ diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.c b/drivers/net/ethernet/hisilicon/hns3/hnae3.c index eef1b2764d34ae60499cb3ce110c4390aba138eb..67b0bf310daaafd1f4a6f4b3bf6c64d06d99e10f 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.c +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.c @@ -10,6 +10,27 @@ static LIST_HEAD(hnae3_ae_algo_list); static LIST_HEAD(hnae3_client_list); static LIST_HEAD(hnae3_ae_dev_list); +void hnae3_unregister_ae_algo_prepare(struct hnae3_ae_algo *ae_algo) +{ + const struct pci_device_id *pci_id; + struct hnae3_ae_dev *ae_dev; + + if (!ae_algo) + return; + + list_for_each_entry(ae_dev, &hnae3_ae_dev_list, node) { + if (!hnae3_get_bit(ae_dev->flag, HNAE3_DEV_INITED_B)) + continue; + + pci_id = pci_match_id(ae_algo->pdev_id_table, ae_dev->pdev); + if (!pci_id) + continue; + if (IS_ENABLED(CONFIG_PCI_IOV)) + pci_disable_sriov(ae_dev->pdev); + } +} +EXPORT_SYMBOL(hnae3_unregister_ae_algo_prepare); + /* we are keeping things simple and using single lock for all the * list. This is a non-critical code so other updations, if happen * in parallel, can wait. diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h index 912c51e327d64fbcbd04ec78bc152c36a40fe091..4a9576a449e10256be547317c98ff48e6385e057 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h +++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h @@ -754,6 +754,7 @@ struct hnae3_handle { int hnae3_register_ae_dev(struct hnae3_ae_dev *ae_dev); void hnae3_unregister_ae_dev(struct hnae3_ae_dev *ae_dev); +void hnae3_unregister_ae_algo_prepare(struct hnae3_ae_algo *ae_algo); void hnae3_unregister_ae_algo(struct hnae3_ae_algo *ae_algo); void hnae3_register_ae_algo(struct hnae3_ae_algo *ae_algo); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index 936b9cfe1a62ff9f131b1bb1b91fc4259eb3f365..ae7cd73c823b72c5af12a0f6f90b2bbce77de95c 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -444,6 +444,11 @@ static int hns3_nic_net_open(struct net_device *netdev) if (hns3_nic_resetting(netdev)) return -EBUSY; + if (!test_bit(HNS3_NIC_STATE_DOWN, &priv->state)) { + netdev_warn(netdev, "net open repeatedly!\n"); + return 0; + } + netif_carrier_off(netdev); ret = hns3_nic_set_real_num_queue(netdev); @@ -1278,7 +1283,6 @@ void hns3_shinfo_pack(struct skb_shared_info *shinfo, __u32 *size) static int hns3_skb_linearize(struct hns3_enet_ring *ring, struct sk_buff *skb, - u8 max_non_tso_bd_num, unsigned int bd_num) { /* 'bd_num == UINT_MAX' means the skb' fraglist has a @@ -1295,8 +1299,7 @@ static int hns3_skb_linearize(struct hns3_enet_ring *ring, * will not help. */ if (skb->len > HNS3_MAX_TSO_SIZE || - (!skb_is_gso(skb) && skb->len > - HNS3_MAX_NON_TSO_SIZE(max_non_tso_bd_num))) { + (!skb_is_gso(skb) && skb->len > HNS3_MAX_NON_TSO_SIZE)) { u64_stats_update_begin(&ring->syncp); ring->stats.hw_limitation++; u64_stats_update_end(&ring->syncp); @@ -1331,8 +1334,7 @@ static int hns3_nic_maybe_stop_tx(struct hns3_enet_ring *ring, goto out; } - if (hns3_skb_linearize(ring, skb, max_non_tso_bd_num, - bd_num)) + if (hns3_skb_linearize(ring, skb, bd_num)) return -ENOMEM; bd_num = hns3_tx_bd_count(skb->len); @@ -2419,6 +2421,7 @@ static void hns3_buffer_detach(struct hns3_enet_ring *ring, int i) { hns3_unmap_buffer(ring, &ring->desc_cb[i]); ring->desc[i].addr = 0; + ring->desc_cb[i].refill = 0; } static void hns3_free_buffer_detach(struct hns3_enet_ring *ring, int i, @@ -2496,6 +2499,7 @@ static int hns3_alloc_and_attach_buffer(struct hns3_enet_ring *ring, int i) return ret; ring->desc[i].addr = cpu_to_le64(ring->desc_cb[i].dma); + ring->desc_cb[i].refill = 1; return 0; } @@ -2526,12 +2530,14 @@ static void hns3_replace_buffer(struct hns3_enet_ring *ring, int i, hns3_unmap_buffer(ring, &ring->desc_cb[i]); ring->desc_cb[i] = *res_cb; ring->desc[i].addr = cpu_to_le64(ring->desc_cb[i].dma); + ring->desc_cb[i].refill = 1; ring->desc[i].rx.bd_base_info = 0; } static void hns3_reuse_buffer(struct hns3_enet_ring *ring, int i) { ring->desc_cb[i].reuse_flag = 0; + ring->desc_cb[i].refill = 1; ring->desc[i].addr = cpu_to_le64(ring->desc_cb[i].dma + ring->desc_cb[i].page_offset); ring->desc[i].rx.bd_base_info = 0; @@ -2629,10 +2635,14 @@ static int hns3_desc_unused(struct hns3_enet_ring *ring) int ntc = ring->next_to_clean; int ntu = ring->next_to_use; + if (unlikely(ntc == ntu && !ring->desc_cb[ntc].refill)) + return ring->desc_num; + return ((ntc >= ntu) ? 0 : ring->desc_num) + ntc - ntu; } -static void hns3_nic_alloc_rx_buffers(struct hns3_enet_ring *ring, +/* Return true if there is any allocation failure */ +static bool hns3_nic_alloc_rx_buffers(struct hns3_enet_ring *ring, int cleand_count) { struct hns3_desc_cb *desc_cb; @@ -2657,7 +2667,10 @@ static void hns3_nic_alloc_rx_buffers(struct hns3_enet_ring *ring, hns3_rl_err(ring_to_netdev(ring), "alloc rx buffer failed: %d\n", ret); - break; + + writel(i, ring->tqp->io_base + + HNS3_RING_RX_RING_HEAD_REG); + return true; } hns3_replace_buffer(ring, ring->next_to_use, &res_cbs); @@ -2670,6 +2683,7 @@ static void hns3_nic_alloc_rx_buffers(struct hns3_enet_ring *ring, } writel(i, ring->tqp->io_base + HNS3_RING_RX_RING_HEAD_REG); + return false; } static bool hns3_page_is_reusable(struct page *page) @@ -2900,6 +2914,7 @@ static void hns3_rx_ring_move_fw(struct hns3_enet_ring *ring) { ring->desc[ring->next_to_clean].rx.bd_base_info &= cpu_to_le32(~BIT(HNS3_RXD_VLD_B)); + ring->desc_cb[ring->next_to_clean].refill = 0; ring->next_to_clean += 1; if (unlikely(ring->next_to_clean == ring->desc_num)) @@ -3213,6 +3228,7 @@ int hns3_clean_rx_ring(struct hns3_enet_ring *ring, int budget, { #define RCB_NOF_ALLOC_RX_BUFF_ONCE 16 int unused_count = hns3_desc_unused(ring); + bool failure = false; int recv_pkts = 0; int err; @@ -3221,9 +3237,9 @@ int hns3_clean_rx_ring(struct hns3_enet_ring *ring, int budget, while (recv_pkts < budget) { /* Reuse or realloc buffers */ if (unused_count >= RCB_NOF_ALLOC_RX_BUFF_ONCE) { - hns3_nic_alloc_rx_buffers(ring, unused_count); - unused_count = hns3_desc_unused(ring) - - ring->pending_buf; + failure = failure || + hns3_nic_alloc_rx_buffers(ring, unused_count); + unused_count = 0; } /* Poll one pkt */ @@ -3242,11 +3258,7 @@ int hns3_clean_rx_ring(struct hns3_enet_ring *ring, int budget, } out: - /* Make all data has been write before submit */ - if (unused_count > 0) - hns3_nic_alloc_rx_buffers(ring, unused_count); - - return recv_pkts; + return failure ? budget : recv_pkts; } static bool hns3_get_new_flow_lvl(struct hns3_enet_ring_group *ring_group) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h index 398686b15a826e8cefd446d9a1ead1691a50efd9..54d02ea4aaa7c977781ce8ffbebabd190586ed42 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h @@ -170,11 +170,9 @@ enum hns3_nic_state { #define HNS3_MAX_BD_SIZE 65535 #define HNS3_MAX_TSO_BD_NUM 63U -#define HNS3_MAX_TSO_SIZE \ - (HNS3_MAX_BD_SIZE * HNS3_MAX_TSO_BD_NUM) +#define HNS3_MAX_TSO_SIZE 1048576U +#define HNS3_MAX_NON_TSO_SIZE 9728U -#define HNS3_MAX_NON_TSO_SIZE(max_non_tso_bd_num) \ - (HNS3_MAX_BD_SIZE * (max_non_tso_bd_num)) #define HNS3_VECTOR_GL0_OFFSET 0x100 #define HNS3_VECTOR_GL1_OFFSET 0x200 @@ -285,6 +283,7 @@ struct hns3_desc_cb { u32 length; /* length of the buffer */ u16 reuse_flag; + u16 refill; /* desc type, used by the ring user to mark the type of the priv data */ u16 type; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c index c0aa3be0cdfbba468c184c65c68807c42b7aa352..cd0d7a546957a105874d36cdc27f8e766b0be767 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c @@ -300,33 +300,8 @@ out: return ret_val; } -/** - * hns3_nic_self_test - self test - * @ndev: net device - * @eth_test: test cmd - * @data: test result - */ -static void hns3_self_test(struct net_device *ndev, - struct ethtool_test *eth_test, u64 *data) +static void hns3_set_selftest_param(struct hnae3_handle *h, int (*st_param)[2]) { - struct hns3_nic_priv *priv = netdev_priv(ndev); - struct hnae3_handle *h = priv->ae_handle; - int st_param[HNS3_SELF_TEST_TYPE_NUM][2]; - bool if_running = netif_running(ndev); - int test_index = 0; - u32 i; - - if (hns3_nic_resetting(ndev)) { - netdev_err(ndev, "dev resetting!"); - return; - } - - /* Only do offline selftest, or pass by default */ - if (eth_test->flags != ETH_TEST_FL_OFFLINE) - return; - - netif_dbg(h, drv, ndev, "self test start"); - st_param[HNAE3_LOOP_APP][0] = HNAE3_LOOP_APP; st_param[HNAE3_LOOP_APP][1] = h->flags & HNAE3_SUPPORT_APP_LOOPBACK; @@ -343,13 +318,26 @@ static void hns3_self_test(struct net_device *ndev, st_param[HNAE3_LOOP_PHY][0] = HNAE3_LOOP_PHY; st_param[HNAE3_LOOP_PHY][1] = h->flags & HNAE3_SUPPORT_PHY_LOOPBACK; +} + +static void hns3_selftest_prepare(struct net_device *ndev, + bool if_running, int (*st_param)[2]) +{ + struct hns3_nic_priv *priv = netdev_priv(ndev); + struct hnae3_handle *h = priv->ae_handle; + + if (netif_msg_ifdown(h)) + netdev_info(ndev, "self test start\n"); + + hns3_set_selftest_param(h, st_param); if (if_running) ndev->netdev_ops->ndo_stop(ndev); #if IS_ENABLED(CONFIG_VLAN_8021Q) /* Disable the vlan filter for selftest does not support it */ - if (h->ae_algo->ops->enable_vlan_filter) + if (h->ae_algo->ops->enable_vlan_filter && + ndev->features & NETIF_F_HW_VLAN_CTAG_FILTER) h->ae_algo->ops->enable_vlan_filter(h, false); #endif @@ -361,6 +349,36 @@ static void hns3_self_test(struct net_device *ndev, h->ae_algo->ops->halt_autoneg(h, true); set_bit(HNS3_NIC_STATE_TESTING, &priv->state); +} + +static void hns3_selftest_restore(struct net_device *ndev, bool if_running) +{ + struct hns3_nic_priv *priv = netdev_priv(ndev); + struct hnae3_handle *h = priv->ae_handle; + + clear_bit(HNS3_NIC_STATE_TESTING, &priv->state); + + if (h->ae_algo->ops->halt_autoneg) + h->ae_algo->ops->halt_autoneg(h, false); + +#if IS_ENABLED(CONFIG_VLAN_8021Q) + if (h->ae_algo->ops->enable_vlan_filter && + ndev->features & NETIF_F_HW_VLAN_CTAG_FILTER) + h->ae_algo->ops->enable_vlan_filter(h, true); +#endif + + if (if_running) + ndev->netdev_ops->ndo_open(ndev); + + if (netif_msg_ifdown(h)) + netdev_info(ndev, "self test end\n"); +} + +static void hns3_do_selftest(struct net_device *ndev, int (*st_param)[2], + struct ethtool_test *eth_test, u64 *data) +{ + int test_index = 0; + u32 i; for (i = 0; i < HNS3_SELF_TEST_TYPE_NUM; i++) { enum hnae3_loop loop_type = (enum hnae3_loop)st_param[i][0]; @@ -379,21 +397,32 @@ static void hns3_self_test(struct net_device *ndev, test_index++; } +} - clear_bit(HNS3_NIC_STATE_TESTING, &priv->state); - - if (h->ae_algo->ops->halt_autoneg) - h->ae_algo->ops->halt_autoneg(h, false); +/** + * hns3_nic_self_test - self test + * @ndev: net device + * @eth_test: test cmd + * @data: test result + */ +static void hns3_self_test(struct net_device *ndev, + struct ethtool_test *eth_test, u64 *data) +{ + int st_param[HNS3_SELF_TEST_TYPE_NUM][2]; + bool if_running = netif_running(ndev); -#if IS_ENABLED(CONFIG_VLAN_8021Q) - if (h->ae_algo->ops->enable_vlan_filter) - h->ae_algo->ops->enable_vlan_filter(h, true); -#endif + if (hns3_nic_resetting(ndev)) { + netdev_err(ndev, "dev resetting!"); + return; + } - if (if_running) - ndev->netdev_ops->ndo_open(ndev); + /* Only do offline selftest, or pass by default */ + if (eth_test->flags != ETH_TEST_FL_OFFLINE) + return; - netif_dbg(h, drv, ndev, "self test end\n"); + hns3_selftest_prepare(ndev, if_running, st_param); + hns3_do_selftest(ndev, st_param, eth_test, data); + hns3_selftest_restore(ndev, if_running); } static int hns3_get_sset_count(struct net_device *netdev, int stringset) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c index a93c7eb4e7cbb8bb03b26f89e07ef01284734f96..5bab885744fc8ed33ec1e2a3a894469c679c4dc1 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c @@ -126,7 +126,7 @@ static int hclge_ets_validate(struct hclge_dev *hdev, struct ieee_ets *ets, if (ret) return ret; - for (i = 0; i < hdev->tc_max; i++) { + for (i = 0; i < HNAE3_MAX_TC; i++) { switch (ets->tc_tsa[i]) { case IEEE_8021QAZ_TSA_STRICT: if (hdev->tm_info.tc_info[i].tc_sch_mode != @@ -134,6 +134,15 @@ static int hclge_ets_validate(struct hclge_dev *hdev, struct ieee_ets *ets, *changed = true; break; case IEEE_8021QAZ_TSA_ETS: + /* The hardware will switch to sp mode if bandwidth is + * 0, so limit ets bandwidth must be greater than 0. + */ + if (!ets->tc_tx_bw[i]) { + dev_err(&hdev->pdev->dev, + "tc%u ets bw cannot be 0\n", i); + return -EINVAL; + } + if (hdev->tm_info.tc_info[i].tc_sch_mode != HCLGE_SCH_MODE_DWRR) *changed = true; @@ -248,6 +257,10 @@ static int hclge_ieee_setets(struct hnae3_handle *h, struct ieee_ets *ets) } hclge_tm_schd_info_update(hdev, num_tc); + if (num_tc > 1) + hdev->flag |= HCLGE_FLAG_DCB_ENABLE; + else + hdev->flag &= ~HCLGE_FLAG_DCB_ENABLE; ret = hclge_ieee_ets_to_tm_info(hdev, ets); if (ret) @@ -313,8 +326,7 @@ static int hclge_ieee_setpfc(struct hnae3_handle *h, struct ieee_pfc *pfc) u8 i, j, pfc_map, *prio_tc; int ret; - if (!(hdev->dcbx_cap & DCB_CAP_DCBX_VER_IEEE) || - hdev->flag & HCLGE_FLAG_MQPRIO_ENABLE) + if (!(hdev->dcbx_cap & DCB_CAP_DCBX_VER_IEEE)) return -EINVAL; if (pfc->pfc_en == hdev->tm_info.pfc_en) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 24357e9071553b2ec739f6126c405353541dab38..7b94764b4f5d9ccf6036be5cfb1e44f5bfe9d4ad 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -7581,15 +7581,8 @@ int hclge_add_uc_addr_common(struct hclge_vport *vport, } /* check if we just hit the duplicate */ - if (!ret) { - dev_warn(&hdev->pdev->dev, "VF %u mac(%pM) exists\n", - vport->vport_id, addr); - return 0; - } - - dev_err(&hdev->pdev->dev, - "PF failed to add unicast entry(%pM) in the MAC table\n", - addr); + if (!ret) + return -EEXIST; return ret; } @@ -7743,7 +7736,13 @@ static void hclge_sync_vport_mac_list(struct hclge_vport *vport, } else { set_bit(HCLGE_VPORT_STATE_MAC_TBL_CHANGE, &vport->state); - break; + + /* If one unicast mac address is existing in hardware, + * we need to try whether other unicast mac addresses + * are new addresses that can be added. + */ + if (ret != -EEXIST) + break; } } } @@ -11519,6 +11518,7 @@ static int hclge_init(void) static void hclge_exit(void) { + hnae3_unregister_ae_algo_prepare(&ae_algo); hnae3_unregister_ae_algo(&ae_algo); destroy_workqueue(hclge_wq); } diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c index e8495f58a1a8e81fa4dbdc86b998984fea437434..9168e39b63641945eb3f71e69ab7cec5b33cc49c 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c @@ -646,14 +646,6 @@ static void hclge_tm_tc_info_init(struct hclge_dev *hdev) for (i = 0; i < HNAE3_MAX_USER_PRIO; i++) hdev->tm_info.prio_tc[i] = (i >= hdev->tm_info.num_tc) ? 0 : i; - - /* DCB is enabled if we have more than 1 TC or pfc_en is - * non-zero. - */ - if (hdev->tm_info.num_tc > 1 || hdev->tm_info.pfc_en) - hdev->flag |= HCLGE_FLAG_DCB_ENABLE; - else - hdev->flag &= ~HCLGE_FLAG_DCB_ENABLE; } static void hclge_tm_pg_info_init(struct hclge_dev *hdev) @@ -679,15 +671,17 @@ static void hclge_tm_pg_info_init(struct hclge_dev *hdev) hdev->tm_info.pg_info[i].tc_bit_map = hdev->hw_tc_map; for (k = 0; k < hdev->tm_info.num_tc; k++) hdev->tm_info.pg_info[i].tc_dwrr[k] = BW_PERCENT; + for (; k < HNAE3_MAX_TC; k++) + hdev->tm_info.pg_info[i].tc_dwrr[k] = 0; } } -static void hclge_pfc_info_init(struct hclge_dev *hdev) +static void hclge_update_fc_mode_by_dcb_flag(struct hclge_dev *hdev) { - if (!(hdev->flag & HCLGE_FLAG_DCB_ENABLE)) { + if (hdev->tm_info.num_tc == 1 && !hdev->tm_info.pfc_en) { if (hdev->fc_mode_last_time == HCLGE_FC_PFC) dev_warn(&hdev->pdev->dev, - "DCB is disable, but last mode is FC_PFC\n"); + "Only 1 tc used, but last mode is FC_PFC\n"); hdev->tm_info.fc_mode = hdev->fc_mode_last_time; } else if (hdev->tm_info.fc_mode != HCLGE_FC_PFC) { @@ -700,6 +694,27 @@ static void hclge_pfc_info_init(struct hclge_dev *hdev) } } +static void hclge_update_fc_mode(struct hclge_dev *hdev) +{ + if (!hdev->tm_info.pfc_en) { + hdev->tm_info.fc_mode = hdev->fc_mode_last_time; + return; + } + + if (hdev->tm_info.fc_mode != HCLGE_FC_PFC) { + hdev->fc_mode_last_time = hdev->tm_info.fc_mode; + hdev->tm_info.fc_mode = HCLGE_FC_PFC; + } +} + +void hclge_tm_pfc_info_update(struct hclge_dev *hdev) +{ + if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V3) + hclge_update_fc_mode(hdev); + else + hclge_update_fc_mode_by_dcb_flag(hdev); +} + static void hclge_tm_schd_info_init(struct hclge_dev *hdev) { hclge_tm_pg_info_init(hdev); @@ -708,7 +723,7 @@ static void hclge_tm_schd_info_init(struct hclge_dev *hdev) hclge_tm_vport_info_update(hdev); - hclge_pfc_info_init(hdev); + hclge_tm_pfc_info_update(hdev); } static int hclge_tm_pg_to_pri_map(struct hclge_dev *hdev) @@ -1024,7 +1039,6 @@ static int hclge_tm_pri_tc_base_dwrr_cfg(struct hclge_dev *hdev) static int hclge_tm_ets_tc_dwrr_cfg(struct hclge_dev *hdev) { -#define DEFAULT_TC_WEIGHT 1 #define DEFAULT_TC_OFFSET 14 struct hclge_ets_tc_weight_cmd *ets_weight; @@ -1037,13 +1051,7 @@ static int hclge_tm_ets_tc_dwrr_cfg(struct hclge_dev *hdev) for (i = 0; i < HNAE3_MAX_TC; i++) { struct hclge_pg_info *pg_info; - ets_weight->tc_weight[i] = DEFAULT_TC_WEIGHT; - - if (!(hdev->hw_tc_map & BIT(i))) - continue; - - pg_info = - &hdev->tm_info.pg_info[hdev->tm_info.tc_info[i].pgid]; + pg_info = &hdev->tm_info.pg_info[hdev->tm_info.tc_info[i].pgid]; ets_weight->tc_weight[i] = pg_info->tc_dwrr[i]; } @@ -1444,19 +1452,6 @@ void hclge_tm_schd_info_update(struct hclge_dev *hdev, u8 num_tc) hclge_tm_schd_info_init(hdev); } -void hclge_tm_pfc_info_update(struct hclge_dev *hdev) -{ - /* DCB is enabled if we have more than 1 TC or pfc_en is - * non-zero. - */ - if (hdev->tm_info.num_tc > 1 || hdev->tm_info.pfc_en) - hdev->flag |= HCLGE_FLAG_DCB_ENABLE; - else - hdev->flag &= ~HCLGE_FLAG_DCB_ENABLE; - - hclge_pfc_info_init(hdev); -} - int hclge_tm_init_hw(struct hclge_dev *hdev, bool init) { int ret; @@ -1502,7 +1497,7 @@ int hclge_tm_vport_map_update(struct hclge_dev *hdev) if (ret) return ret; - if (!(hdev->flag & HCLGE_FLAG_DCB_ENABLE)) + if (hdev->tm_info.num_tc == 1 && !hdev->tm_info.pfc_en) return 0; return hclge_tm_bp_setup(hdev); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 3641d7c31451c2c25c24efbed37af6fd7da4533f..d6580e942724d9214bec07a7359dab6d99d64d79 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -679,9 +679,9 @@ static int hclgevf_set_rss_tc_mode(struct hclgevf_dev *hdev, u16 rss_size) roundup_size = ilog2(roundup_size); for (i = 0; i < HCLGEVF_MAX_TC_NUM; i++) { - tc_valid[i] = !!(hdev->hw_tc_map & BIT(i)); + tc_valid[i] = 1; tc_size[i] = roundup_size; - tc_offset[i] = rss_size * i; + tc_offset[i] = (hdev->hw_tc_map & BIT(i)) ? rss_size * i : 0; } hclgevf_cmd_setup_basic_desc(&desc, HCLGEVF_OPC_RSS_TC_MODE, false); @@ -2160,9 +2160,9 @@ static void hclgevf_reset_service_task(struct hclgevf_dev *hdev) hdev->reset_attempts = 0; hdev->last_reset_time = jiffies; - while ((hdev->reset_type = - hclgevf_get_reset_level(hdev, &hdev->reset_pending)) - != HNAE3_NONE_RESET) + hdev->reset_type = + hclgevf_get_reset_level(hdev, &hdev->reset_pending); + if (hdev->reset_type != HNAE3_NONE_RESET) hclgevf_reset(hdev); } else if (test_and_clear_bit(HCLGEVF_RESET_REQUESTED, &hdev->reset_state)) { @@ -2382,8 +2382,7 @@ static irqreturn_t hclgevf_misc_irq_handle(int irq, void *data) break; } - if (event_cause != HCLGEVF_VECTOR0_EVENT_OTHER) - hclgevf_enable_vector(&hdev->misc_vector, true); + hclgevf_enable_vector(&hdev->misc_vector, true); return IRQ_HANDLED; } @@ -2887,7 +2886,10 @@ static void hclgevf_uninit_client_instance(struct hnae3_client *client, /* un-init roce, if it exists */ if (hdev->roce_client) { + while (test_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state)) + msleep(HCLGEVF_WAIT_RESET_DONE); clear_bit(HCLGEVF_STATE_ROCE_REGISTERED, &hdev->state); + hdev->roce_client->ops->uninit_instance(&hdev->roce, 0); hdev->roce_client = NULL; hdev->roce.client = NULL; @@ -2896,6 +2898,8 @@ static void hclgevf_uninit_client_instance(struct hnae3_client *client, /* un-init nic/unic, if this was not called by roce client */ if (client->ops->uninit_instance && hdev->nic_client && client->type != HNAE3_CLIENT_ROCE) { + while (test_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state)) + msleep(HCLGEVF_WAIT_RESET_DONE); clear_bit(HCLGEVF_STATE_NIC_REGISTERED, &hdev->state); client->ops->uninit_instance(&hdev->nic, 0); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h index 526a62f97046690b4c29dc2bcc4176a5a565a4d4..c9b0fa5e8589dec2918645f7329dae3d0c1739ed 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h @@ -106,6 +106,8 @@ #define HCLGEVF_VF_RST_ING 0x07008 #define HCLGEVF_VF_RST_ING_BIT BIT(16) +#define HCLGEVF_WAIT_RESET_DONE 100 + #define HCLGEVF_RSS_IND_TBL_SIZE 512 #define HCLGEVF_RSS_SET_BITMAP_MSK 0xffff #define HCLGEVF_RSS_KEY_SIZE 40 diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c index 5b2dcd97c1078c83ab8d1bfcc7c9f2f82da27f7b..b8e5ca6700ed57fd9a6194ad2597c8f10909253d 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c @@ -109,7 +109,8 @@ int hclgevf_send_mbx_msg(struct hclgevf_dev *hdev, memcpy(&req->msg, send_msg, sizeof(struct hclge_vf_to_pf_msg)); - trace_hclge_vf_mbx_send(hdev, req); + if (test_bit(HCLGEVF_STATE_NIC_REGISTERED, &hdev->state)) + trace_hclge_vf_mbx_send(hdev, req); /* synchronous send */ if (need_resp) { diff --git a/drivers/net/ethernet/i825xx/sni_82596.c b/drivers/net/ethernet/i825xx/sni_82596.c index 27937c5d795673a53d79aee82a91898732102d52..daec9ce04531be2531a24d5b44bc83ad5dae3a2c 100644 --- a/drivers/net/ethernet/i825xx/sni_82596.c +++ b/drivers/net/ethernet/i825xx/sni_82596.c @@ -117,9 +117,10 @@ static int sni_82596_probe(struct platform_device *dev) netdevice->dev_addr[5] = readb(eth_addr + 0x06); iounmap(eth_addr); - if (!netdevice->irq) { + if (netdevice->irq < 0) { printk(KERN_ERR "%s: IRQ not found for i82596 at 0x%lx\n", __FILE__, netdevice->base_addr); + retval = netdevice->irq; goto probe_failed; } diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index bb8d0a0f48ee0006a4307f7678c6b08e8fcd7813..95bee3d915934242fe06bb30d650811dc08dae03 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -1548,8 +1548,6 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) netdev_tx_t ret = NETDEV_TX_OK; if (test_bit(0, &adapter->resetting)) { - if (!netif_subqueue_stopped(netdev, skb)) - netif_stop_subqueue(netdev, queue_num); dev_kfree_skb_any(skb); tx_send_failed++; @@ -2356,8 +2354,10 @@ static int ibmvnic_reset(struct ibmvnic_adapter *adapter, * flush reset queue and process this reset */ if (adapter->force_reset_recovery && !list_empty(&adapter->rwi_list)) { - list_for_each_safe(entry, tmp_entry, &adapter->rwi_list) + list_for_each_safe(entry, tmp_entry, &adapter->rwi_list) { list_del(entry); + kfree(list_entry(entry, struct ibmvnic_rwi, list)); + } } rwi->reset_reason = reason; list_add_tail(&rwi->list, &adapter->rwi_list); @@ -3403,11 +3403,25 @@ static void send_request_cap(struct ibmvnic_adapter *adapter, int retry) struct device *dev = &adapter->vdev->dev; union ibmvnic_crq crq; int max_entries; + int cap_reqs; + + /* We send out 6 or 7 REQUEST_CAPABILITY CRQs below (depending on + * the PROMISC flag). Initialize this count upfront. When the tasklet + * receives a response to all of these, it will send the next protocol + * message (QUERY_IP_OFFLOAD). + */ + if (!(adapter->netdev->flags & IFF_PROMISC) || + adapter->promisc_supported) + cap_reqs = 7; + else + cap_reqs = 6; if (!retry) { /* Sub-CRQ entries are 32 byte long */ int entries_page = 4 * PAGE_SIZE / (sizeof(u64) * 4); + atomic_set(&adapter->running_cap_crqs, cap_reqs); + if (adapter->min_tx_entries_per_subcrq > entries_page || adapter->min_rx_add_entries_per_subcrq > entries_page) { dev_err(dev, "Fatal, invalid entries per sub-crq\n"); @@ -3468,44 +3482,45 @@ static void send_request_cap(struct ibmvnic_adapter *adapter, int retry) adapter->opt_rx_comp_queues; adapter->req_rx_add_queues = adapter->max_rx_add_queues; + } else { + atomic_add(cap_reqs, &adapter->running_cap_crqs); } - memset(&crq, 0, sizeof(crq)); crq.request_capability.first = IBMVNIC_CRQ_CMD; crq.request_capability.cmd = REQUEST_CAPABILITY; crq.request_capability.capability = cpu_to_be16(REQ_TX_QUEUES); crq.request_capability.number = cpu_to_be64(adapter->req_tx_queues); - atomic_inc(&adapter->running_cap_crqs); + cap_reqs--; ibmvnic_send_crq(adapter, &crq); crq.request_capability.capability = cpu_to_be16(REQ_RX_QUEUES); crq.request_capability.number = cpu_to_be64(adapter->req_rx_queues); - atomic_inc(&adapter->running_cap_crqs); + cap_reqs--; ibmvnic_send_crq(adapter, &crq); crq.request_capability.capability = cpu_to_be16(REQ_RX_ADD_QUEUES); crq.request_capability.number = cpu_to_be64(adapter->req_rx_add_queues); - atomic_inc(&adapter->running_cap_crqs); + cap_reqs--; ibmvnic_send_crq(adapter, &crq); crq.request_capability.capability = cpu_to_be16(REQ_TX_ENTRIES_PER_SUBCRQ); crq.request_capability.number = cpu_to_be64(adapter->req_tx_entries_per_subcrq); - atomic_inc(&adapter->running_cap_crqs); + cap_reqs--; ibmvnic_send_crq(adapter, &crq); crq.request_capability.capability = cpu_to_be16(REQ_RX_ADD_ENTRIES_PER_SUBCRQ); crq.request_capability.number = cpu_to_be64(adapter->req_rx_add_entries_per_subcrq); - atomic_inc(&adapter->running_cap_crqs); + cap_reqs--; ibmvnic_send_crq(adapter, &crq); crq.request_capability.capability = cpu_to_be16(REQ_MTU); crq.request_capability.number = cpu_to_be64(adapter->req_mtu); - atomic_inc(&adapter->running_cap_crqs); + cap_reqs--; ibmvnic_send_crq(adapter, &crq); if (adapter->netdev->flags & IFF_PROMISC) { @@ -3513,16 +3528,21 @@ static void send_request_cap(struct ibmvnic_adapter *adapter, int retry) crq.request_capability.capability = cpu_to_be16(PROMISC_REQUESTED); crq.request_capability.number = cpu_to_be64(1); - atomic_inc(&adapter->running_cap_crqs); + cap_reqs--; ibmvnic_send_crq(adapter, &crq); } } else { crq.request_capability.capability = cpu_to_be16(PROMISC_REQUESTED); crq.request_capability.number = cpu_to_be64(0); - atomic_inc(&adapter->running_cap_crqs); + cap_reqs--; ibmvnic_send_crq(adapter, &crq); } + + /* Keep at end to catch any discrepancy between expected and actual + * CRQs sent. + */ + WARN_ON(cap_reqs != 0); } static int pending_scrq(struct ibmvnic_adapter *adapter, @@ -3955,118 +3975,132 @@ static void send_query_map(struct ibmvnic_adapter *adapter) static void send_query_cap(struct ibmvnic_adapter *adapter) { union ibmvnic_crq crq; + int cap_reqs; + + /* We send out 25 QUERY_CAPABILITY CRQs below. Initialize this count + * upfront. When the tasklet receives a response to all of these, it + * can send out the next protocol messaage (REQUEST_CAPABILITY). + */ + cap_reqs = 25; + + atomic_set(&adapter->running_cap_crqs, cap_reqs); - atomic_set(&adapter->running_cap_crqs, 0); memset(&crq, 0, sizeof(crq)); crq.query_capability.first = IBMVNIC_CRQ_CMD; crq.query_capability.cmd = QUERY_CAPABILITY; crq.query_capability.capability = cpu_to_be16(MIN_TX_QUEUES); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MIN_RX_QUEUES); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MIN_RX_ADD_QUEUES); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MAX_TX_QUEUES); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MAX_RX_QUEUES); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MAX_RX_ADD_QUEUES); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MIN_TX_ENTRIES_PER_SUBCRQ); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MIN_RX_ADD_ENTRIES_PER_SUBCRQ); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MAX_TX_ENTRIES_PER_SUBCRQ); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MAX_RX_ADD_ENTRIES_PER_SUBCRQ); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(TCP_IP_OFFLOAD); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(PROMISC_SUPPORTED); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MIN_MTU); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MAX_MTU); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MAX_MULTICAST_FILTERS); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(VLAN_HEADER_INSERTION); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(RX_VLAN_HEADER_INSERTION); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(MAX_TX_SG_ENTRIES); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(RX_SG_SUPPORTED); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(OPT_TX_COMP_SUB_QUEUES); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(OPT_RX_COMP_QUEUES); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(OPT_RX_BUFADD_Q_PER_RX_COMP_Q); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(OPT_TX_ENTRIES_PER_SUBCRQ); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(OPT_RXBA_ENTRIES_PER_SUBCRQ); - atomic_inc(&adapter->running_cap_crqs); ibmvnic_send_crq(adapter, &crq); + cap_reqs--; crq.query_capability.capability = cpu_to_be16(TX_RX_DESC_REQ); - atomic_inc(&adapter->running_cap_crqs); + ibmvnic_send_crq(adapter, &crq); + cap_reqs--; + + /* Keep at end to catch any discrepancy between expected and actual + * CRQs sent. + */ + WARN_ON(cap_reqs != 0); } static void send_query_ip_offload(struct ibmvnic_adapter *adapter) @@ -4371,6 +4405,8 @@ static void handle_request_cap_rsp(union ibmvnic_crq *crq, char *name; atomic_dec(&adapter->running_cap_crqs); + netdev_dbg(adapter->netdev, "Outstanding request-caps: %d\n", + atomic_read(&adapter->running_cap_crqs)); switch (be16_to_cpu(crq->request_capability_rsp.capability)) { case REQ_TX_QUEUES: req_value = &adapter->req_tx_queues; @@ -4887,6 +4923,13 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq, adapter->fw_done_rc = -EIO; complete(&adapter->fw_done); } + + /* if we got here during crq-init, retry crq-init */ + if (!completion_done(&adapter->init_done)) { + adapter->init_done_rc = -EAGAIN; + complete(&adapter->init_done); + } + if (!completion_done(&adapter->stats_done)) complete(&adapter->stats_done); if (test_bit(0, &adapter->resetting)) @@ -5041,12 +5084,6 @@ static void ibmvnic_tasklet(struct tasklet_struct *t) ibmvnic_handle_crq(crq, adapter); crq->generic.first = 0; } - - /* remain in tasklet until all - * capabilities responses are received - */ - if (!adapter->wait_capability) - done = true; } /* if capabilities CRQ's were sent in this tasklet, the following * tasklet must wait until all responses are received @@ -5187,6 +5224,9 @@ static int init_crq_queue(struct ibmvnic_adapter *adapter) crq->cur = 0; spin_lock_init(&crq->lock); + /* process any CRQs that were queued before we enabled interrupts */ + tasklet_schedule(&adapter->tasklet); + return retrc; req_irq_failed: @@ -5352,6 +5392,12 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) goto ibmvnic_dev_file_err; netif_carrier_off(netdev); + + adapter->state = VNIC_PROBED; + + adapter->wait_for_reset = false; + adapter->last_reset_time = jiffies; + rc = register_netdev(netdev); if (rc) { dev_err(&dev->dev, "failed to register netdev rc=%d\n", rc); @@ -5359,10 +5405,6 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) } dev_info(&dev->dev, "ibmvnic registered\n"); - adapter->state = VNIC_PROBED; - - adapter->wait_for_reset = false; - adapter->last_reset_time = jiffies; return 0; ibmvnic_register_fail: diff --git a/drivers/net/ethernet/intel/e100.c b/drivers/net/ethernet/intel/e100.c index 609e47b8287d1fa8a88cb14d40a0213ad38fc3f4..9295a9a1efc73d4c4d7c26b4ab365cf9f0583fab 100644 --- a/drivers/net/ethernet/intel/e100.c +++ b/drivers/net/ethernet/intel/e100.c @@ -2431,11 +2431,15 @@ static void e100_get_drvinfo(struct net_device *netdev, sizeof(info->bus_info)); } -#define E100_PHY_REGS 0x1C +#define E100_PHY_REGS 0x1D static int e100_get_regs_len(struct net_device *netdev) { struct nic *nic = netdev_priv(netdev); - return 1 + E100_PHY_REGS + sizeof(nic->mem->dump_buf); + + /* We know the number of registers, and the size of the dump buffer. + * Calculate the total size in bytes. + */ + return (1 + E100_PHY_REGS) * sizeof(u32) + sizeof(nic->mem->dump_buf); } static void e100_get_regs(struct net_device *netdev, @@ -2449,14 +2453,18 @@ static void e100_get_regs(struct net_device *netdev, buff[0] = ioread8(&nic->csr->scb.cmd_hi) << 24 | ioread8(&nic->csr->scb.cmd_lo) << 16 | ioread16(&nic->csr->scb.status); - for (i = E100_PHY_REGS; i >= 0; i--) - buff[1 + E100_PHY_REGS - i] = - mdio_read(netdev, nic->mii.phy_id, i); + for (i = 0; i < E100_PHY_REGS; i++) + /* Note that we read the registers in reverse order. This + * ordering is the ABI apparently used by ethtool and other + * applications. + */ + buff[1 + i] = mdio_read(netdev, nic->mii.phy_id, + E100_PHY_REGS - 1 - i); memset(nic->mem->dump_buf, 0, sizeof(nic->mem->dump_buf)); e100_exec_cb(nic, NULL, e100_dump); msleep(10); - memcpy(&buff[2 + E100_PHY_REGS], nic->mem->dump_buf, - sizeof(nic->mem->dump_buf)); + memcpy(&buff[1 + E100_PHY_REGS], nic->mem->dump_buf, + sizeof(nic->mem->dump_buf)); } static void e100_get_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) @@ -2989,9 +2997,10 @@ static void __e100_shutdown(struct pci_dev *pdev, bool *enable_wake) struct net_device *netdev = pci_get_drvdata(pdev); struct nic *nic = netdev_priv(netdev); + netif_device_detach(netdev); + if (netif_running(netdev)) e100_down(nic); - netif_device_detach(netdev); if ((nic->flags & wol_magic) | e100_asf(nic)) { /* enable reverse auto-negotiation */ @@ -3008,7 +3017,7 @@ static void __e100_shutdown(struct pci_dev *pdev, bool *enable_wake) *enable_wake = false; } - pci_clear_master(pdev); + pci_disable_device(pdev); } static int __e100_power_off(struct pci_dev *pdev, bool wake) @@ -3028,8 +3037,6 @@ static int __maybe_unused e100_suspend(struct device *dev_d) __e100_shutdown(to_pci_dev(dev_d), &wake); - device_wakeup_disable(dev_d); - return 0; } @@ -3037,6 +3044,14 @@ static int __maybe_unused e100_resume(struct device *dev_d) { struct net_device *netdev = dev_get_drvdata(dev_d); struct nic *nic = netdev_priv(netdev); + int err; + + err = pci_enable_device(to_pci_dev(dev_d)); + if (err) { + netdev_err(netdev, "Resume cannot enable PCI device, aborting\n"); + return err; + } + pci_set_master(to_pci_dev(dev_d)); /* disable reverse auto-negotiation */ if (nic->phy == phy_82552_v) { @@ -3048,10 +3063,11 @@ static int __maybe_unused e100_resume(struct device *dev_d) smartspeed & ~(E100_82552_REV_ANEG)); } - netif_device_attach(netdev); if (netif_running(netdev)) e100_up(nic); + netif_device_attach(netdev); + return 0; } diff --git a/drivers/net/ethernet/intel/e1000e/e1000.h b/drivers/net/ethernet/intel/e1000e/e1000.h index 5b2143f4b1f85fada39e3509508d7ed31be46abb..3178efd980066c92397807328148c7e37d39c134 100644 --- a/drivers/net/ethernet/intel/e1000e/e1000.h +++ b/drivers/net/ethernet/intel/e1000e/e1000.h @@ -113,7 +113,8 @@ enum e1000_boards { board_pch2lan, board_pch_lpt, board_pch_spt, - board_pch_cnp + board_pch_cnp, + board_pch_tgp }; struct e1000_ps_page { @@ -499,6 +500,7 @@ extern const struct e1000_info e1000_pch2_info; extern const struct e1000_info e1000_pch_lpt_info; extern const struct e1000_info e1000_pch_spt_info; extern const struct e1000_info e1000_pch_cnp_info; +extern const struct e1000_info e1000_pch_tgp_info; extern const struct e1000_info e1000_es2_info; void e1000e_ptp_init(struct e1000_adapter *adapter); diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index 854c585de2e13b8f28b528afc35f374bada0559d..15b1503d5b6ca9cd4aa7f8a7f28a2922aa90cca1 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -4134,9 +4134,9 @@ static s32 e1000_validate_nvm_checksum_ich8lan(struct e1000_hw *hw) return ret_val; if (!(data & valid_csum_mask)) { - e_dbg("NVM Checksum Invalid\n"); + e_dbg("NVM Checksum valid bit not set\n"); - if (hw->mac.type < e1000_pch_cnp) { + if (hw->mac.type < e1000_pch_tgp) { data |= valid_csum_mask; ret_val = e1000_write_nvm(hw, word, 1, &data); if (ret_val) @@ -4811,7 +4811,7 @@ static s32 e1000_reset_hw_ich8lan(struct e1000_hw *hw) static s32 e1000_init_hw_ich8lan(struct e1000_hw *hw) { struct e1000_mac_info *mac = &hw->mac; - u32 ctrl_ext, txdctl, snoop; + u32 ctrl_ext, txdctl, snoop, fflt_dbg; s32 ret_val; u16 i; @@ -4870,6 +4870,15 @@ static s32 e1000_init_hw_ich8lan(struct e1000_hw *hw) snoop = (u32)~(PCIE_NO_SNOOP_ALL); e1000e_set_pcie_no_snoop(hw, snoop); + /* Enable workaround for packet loss issue on TGP PCH + * Do not gate DMA clock from the modPHY block + */ + if (mac->type >= e1000_pch_tgp) { + fflt_dbg = er32(FFLT_DBG); + fflt_dbg |= E1000_FFLT_DBG_DONT_GATE_WAKE_DMA_CLK; + ew32(FFLT_DBG, fflt_dbg); + } + ctrl_ext = er32(CTRL_EXT); ctrl_ext |= E1000_CTRL_EXT_RO_DIS; ew32(CTRL_EXT, ctrl_ext); @@ -5990,3 +5999,23 @@ const struct e1000_info e1000_pch_cnp_info = { .phy_ops = &ich8_phy_ops, .nvm_ops = &spt_nvm_ops, }; + +const struct e1000_info e1000_pch_tgp_info = { + .mac = e1000_pch_tgp, + .flags = FLAG_IS_ICH + | FLAG_HAS_WOL + | FLAG_HAS_HW_TIMESTAMP + | FLAG_HAS_CTRLEXT_ON_LOAD + | FLAG_HAS_AMT + | FLAG_HAS_FLASH + | FLAG_HAS_JUMBO_FRAMES + | FLAG_APME_IN_WUC, + .flags2 = FLAG2_HAS_PHY_STATS + | FLAG2_HAS_EEE, + .pba = 26, + .max_hw_frame_size = 9022, + .get_variants = e1000_get_variants_ich8lan, + .mac_ops = &ich8_mac_ops, + .phy_ops = &ich8_phy_ops, + .nvm_ops = &spt_nvm_ops, +}; diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.h b/drivers/net/ethernet/intel/e1000e/ich8lan.h index e757896287eba1256d3838a7db8af4048c4b2347..8f2a8f4ce0ee4ec81a145def3b8c16b512fcc3c7 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.h +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.h @@ -286,6 +286,9 @@ /* Proprietary Latency Tolerance Reporting PCI Capability */ #define E1000_PCI_LTR_CAP_LPT 0xA8 +/* Don't gate wake DMA clock */ +#define E1000_FFLT_DBG_DONT_GATE_WAKE_DMA_CLK 0x1000 + void e1000e_write_protect_nvm_ich8lan(struct e1000_hw *hw); void e1000e_set_kmrn_lock_loss_workaround_ich8lan(struct e1000_hw *hw, bool state); diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 361b8d0bd78d7fcd9a5544fadd4cdbc6f0245398..d0c4de0231120ff03caa5a043eb797a32764ce2d 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -50,6 +50,7 @@ static const struct e1000_info *e1000_info_tbl[] = { [board_pch_lpt] = &e1000_pch_lpt_info, [board_pch_spt] = &e1000_pch_spt_info, [board_pch_cnp] = &e1000_pch_cnp_info, + [board_pch_tgp] = &e1000_pch_tgp_info, }; struct e1000_reg_info { @@ -7837,20 +7838,20 @@ static const struct pci_device_id e1000_pci_tbl[] = { { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_CMP_I219_V11), board_pch_cnp }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_CMP_I219_LM12), board_pch_spt }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_CMP_I219_V12), board_pch_spt }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_LM13), board_pch_cnp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_V13), board_pch_cnp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_LM14), board_pch_cnp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_V14), board_pch_cnp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_LM15), board_pch_cnp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_V15), board_pch_cnp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_LM16), board_pch_cnp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_V16), board_pch_cnp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_LM17), board_pch_cnp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_V17), board_pch_cnp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_LM18), board_pch_cnp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_V18), board_pch_cnp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_LM19), board_pch_cnp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_V19), board_pch_cnp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_LM13), board_pch_tgp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_V13), board_pch_tgp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_LM14), board_pch_tgp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_V14), board_pch_tgp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_LM15), board_pch_tgp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_V15), board_pch_tgp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_LM16), board_pch_tgp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_V16), board_pch_tgp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_LM17), board_pch_tgp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_V17), board_pch_tgp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_LM18), board_pch_tgp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_V18), board_pch_tgp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_LM19), board_pch_tgp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_V19), board_pch_tgp }, { 0, 0, 0, 0, 0, 0, 0 } /* terminate list */ }; diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index fe1258778cbc4b39fa95167393267f188ba00d8d..effdc3361266f11932d50925d35bd0ecd93dd7b4 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -159,6 +159,7 @@ enum i40e_vsi_state_t { __I40E_VSI_OVERFLOW_PROMISC, __I40E_VSI_REINIT_REQUESTED, __I40E_VSI_DOWN_REQUESTED, + __I40E_VSI_RELEASING, /* This must be last as it determines the size of the BITMAP */ __I40E_VSI_STATE_SIZE__, }; @@ -171,7 +172,6 @@ enum i40e_interrupt_policy { struct i40e_lump_tracking { u16 num_entries; - u16 search_hint; u16 list[0]; #define I40E_PILE_VALID_BIT 0x8000 #define I40E_IWARP_IRQ_PILE_ID (I40E_PILE_VALID_BIT - 2) @@ -754,12 +754,12 @@ struct i40e_vsi { struct rtnl_link_stats64 net_stats_offsets; struct i40e_eth_stats eth_stats; struct i40e_eth_stats eth_stats_offsets; - u32 tx_restart; - u32 tx_busy; + u64 tx_restart; + u64 tx_busy; u64 tx_linearize; u64 tx_force_wb; - u32 rx_buf_failed; - u32 rx_page_failed; + u64 rx_buf_failed; + u64 rx_page_failed; /* These are containers of ring pointers, allocated at run-time */ struct i40e_ring **rx_rings; @@ -1144,6 +1144,7 @@ void i40e_ptp_save_hw_time(struct i40e_pf *pf); void i40e_ptp_restore_hw_time(struct i40e_pf *pf); void i40e_ptp_init(struct i40e_pf *pf); void i40e_ptp_stop(struct i40e_pf *pf); +int i40e_update_adq_vsi_queues(struct i40e_vsi *vsi, int vsi_offset); int i40e_is_vsi_uplink_mode_veb(struct i40e_vsi *vsi); i40e_status i40e_get_partition_bw_setting(struct i40e_pf *pf); i40e_status i40e_set_partition_bw_setting(struct i40e_pf *pf); diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c index d627b59ad446523983f3646662ab52921221880d..989d5c7263d7cc1e32a16d5a3aad2499a54bbf19 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c +++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c @@ -240,7 +240,7 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid) (unsigned long int)vsi->net_stats_offsets.rx_compressed, (unsigned long int)vsi->net_stats_offsets.tx_compressed); dev_info(&pf->pdev->dev, - " tx_restart = %d, tx_busy = %d, rx_buf_failed = %d, rx_page_failed = %d\n", + " tx_restart = %llu, tx_busy = %llu, rx_buf_failed = %llu, rx_page_failed = %llu\n", vsi->tx_restart, vsi->tx_busy, vsi->rx_buf_failed, vsi->rx_page_failed); rcu_read_lock(); @@ -553,6 +553,14 @@ static void i40e_dbg_dump_desc(int cnt, int vsi_seid, int ring_id, int desc_n, dev_info(&pf->pdev->dev, "vsi %d not found\n", vsi_seid); return; } + if (vsi->type != I40E_VSI_MAIN && + vsi->type != I40E_VSI_FDIR && + vsi->type != I40E_VSI_VMDQ2) { + dev_info(&pf->pdev->dev, + "vsi %d type %d descriptor rings not available\n", + vsi_seid, vsi->type); + return; + } if (type == RING_TYPE_XDP && !i40e_enabled_xdp_vsi(vsi)) { dev_info(&pf->pdev->dev, "XDP not enabled on VSI %d\n", vsi_seid); return; @@ -734,10 +742,8 @@ static void i40e_dbg_dump_vf(struct i40e_pf *pf, int vf_id) vsi = pf->vsi[vf->lan_vsi_idx]; dev_info(&pf->pdev->dev, "vf %2d: VSI id=%d, seid=%d, qps=%d\n", vf_id, vf->lan_vsi_id, vsi->seid, vf->num_queue_pairs); - dev_info(&pf->pdev->dev, " num MDD=%lld, invalid msg=%lld, valid msg=%lld\n", - vf->num_mdd_events, - vf->num_invalid_msgs, - vf->num_valid_msgs); + dev_info(&pf->pdev->dev, " num MDD=%lld\n", + vf->num_mdd_events); } else { dev_info(&pf->pdev->dev, "invalid VF id %d\n", vf_id); } diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index bc648ce0743c7c7dce3b87cbcbb51582b54fed2b..bd18a780a0008de16a0f70428654389f2634734c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -98,6 +98,24 @@ MODULE_LICENSE("GPL v2"); static struct workqueue_struct *i40e_wq; +static void netdev_hw_addr_refcnt(struct i40e_mac_filter *f, + struct net_device *netdev, int delta) +{ + struct netdev_hw_addr *ha; + + if (!f || !netdev) + return; + + netdev_for_each_mc_addr(ha, netdev) { + if (ether_addr_equal(ha->addr, f->macaddr)) { + ha->refcount += delta; + if (ha->refcount <= 0) + ha->refcount = 1; + break; + } + } +} + /** * i40e_allocate_dma_mem_d - OS specific memory alloc for shared code * @hw: pointer to the HW structure @@ -177,10 +195,6 @@ int i40e_free_virt_mem_d(struct i40e_hw *hw, struct i40e_virt_mem *mem) * @id: an owner id to stick on the items assigned * * Returns the base item index of the lump, or negative for error - * - * The search_hint trick and lack of advanced fit-finding only work - * because we're highly likely to have all the same size lump requests. - * Linear search time and any fragmentation should be minimal. **/ static int i40e_get_lump(struct i40e_pf *pf, struct i40e_lump_tracking *pile, u16 needed, u16 id) @@ -195,8 +209,21 @@ static int i40e_get_lump(struct i40e_pf *pf, struct i40e_lump_tracking *pile, return -EINVAL; } - /* start the linear search with an imperfect hint */ - i = pile->search_hint; + /* Allocate last queue in the pile for FDIR VSI queue + * so it doesn't fragment the qp_pile + */ + if (pile == pf->qp_pile && pf->vsi[id]->type == I40E_VSI_FDIR) { + if (pile->list[pile->num_entries - 1] & I40E_PILE_VALID_BIT) { + dev_err(&pf->pdev->dev, + "Cannot allocate queue %d for I40E_VSI_FDIR\n", + pile->num_entries - 1); + return -ENOMEM; + } + pile->list[pile->num_entries - 1] = id | I40E_PILE_VALID_BIT; + return pile->num_entries - 1; + } + + i = 0; while (i < pile->num_entries) { /* skip already allocated entries */ if (pile->list[i] & I40E_PILE_VALID_BIT) { @@ -215,7 +242,6 @@ static int i40e_get_lump(struct i40e_pf *pf, struct i40e_lump_tracking *pile, for (j = 0; j < needed; j++) pile->list[i+j] = id | I40E_PILE_VALID_BIT; ret = i; - pile->search_hint = i + j; break; } @@ -238,7 +264,7 @@ static int i40e_put_lump(struct i40e_lump_tracking *pile, u16 index, u16 id) { int valid_id = (id | I40E_PILE_VALID_BIT); int count = 0; - int i; + u16 i; if (!pile || index >= pile->num_entries) return -EINVAL; @@ -250,8 +276,6 @@ static int i40e_put_lump(struct i40e_lump_tracking *pile, u16 index, u16 id) count++; } - if (count && index < pile->search_hint) - pile->search_hint = index; return count; } @@ -753,9 +777,9 @@ static void i40e_update_vsi_stats(struct i40e_vsi *vsi) struct rtnl_link_stats64 *ns; /* netdev stats */ struct i40e_eth_stats *oes; struct i40e_eth_stats *es; /* device's eth stats */ - u32 tx_restart, tx_busy; + u64 tx_restart, tx_busy; struct i40e_ring *p; - u32 rx_page, rx_buf; + u64 rx_page, rx_buf; u64 bytes, packets; unsigned int start; u64 tx_linearize; @@ -1789,6 +1813,7 @@ static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi, bool is_add) { struct i40e_pf *pf = vsi->back; + u16 num_tc_qps = 0; u16 sections = 0; u8 netdev_tc = 0; u16 numtc = 1; @@ -1796,13 +1821,33 @@ static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi, u8 offset; u16 qmap; int i; - u16 num_tc_qps = 0; sections = I40E_AQ_VSI_PROP_QUEUE_MAP_VALID; offset = 0; + /* zero out queue mapping, it will get updated on the end of the function */ + memset(ctxt->info.queue_mapping, 0, sizeof(ctxt->info.queue_mapping)); + + if (vsi->type == I40E_VSI_MAIN) { + /* This code helps add more queue to the VSI if we have + * more cores than RSS can support, the higher cores will + * be served by ATR or other filters. Furthermore, the + * non-zero req_queue_pairs says that user requested a new + * queue count via ethtool's set_channels, so use this + * value for queues distribution across traffic classes + */ + if (vsi->req_queue_pairs > 0) + vsi->num_queue_pairs = vsi->req_queue_pairs; + else if (pf->flags & I40E_FLAG_MSIX_ENABLED) + vsi->num_queue_pairs = pf->num_lan_msix; + } /* Number of queues per enabled TC */ - num_tc_qps = vsi->alloc_queue_pairs; + if (vsi->type == I40E_VSI_MAIN || + (vsi->type == I40E_VSI_SRIOV && vsi->num_queue_pairs != 0)) + num_tc_qps = vsi->num_queue_pairs; + else + num_tc_qps = vsi->alloc_queue_pairs; + if (enabled_tc && (vsi->back->flags & I40E_FLAG_DCB_ENABLED)) { /* Find numtc from enabled TC bitmap */ for (i = 0, numtc = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { @@ -1880,15 +1925,11 @@ static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi, } ctxt->info.tc_mapping[i] = cpu_to_le16(qmap); } - - /* Set actual Tx/Rx queue pairs */ - vsi->num_queue_pairs = offset; - if ((vsi->type == I40E_VSI_MAIN) && (numtc == 1)) { - if (vsi->req_queue_pairs > 0) - vsi->num_queue_pairs = vsi->req_queue_pairs; - else if (pf->flags & I40E_FLAG_MSIX_ENABLED) - vsi->num_queue_pairs = pf->num_lan_msix; - } + /* Do not change previously set num_queue_pairs for PFs and VFs*/ + if ((vsi->type == I40E_VSI_MAIN && numtc != 1) || + (vsi->type == I40E_VSI_SRIOV && vsi->num_queue_pairs == 0) || + (vsi->type != I40E_VSI_MAIN && vsi->type != I40E_VSI_SRIOV)) + vsi->num_queue_pairs = offset; /* Scheduler section valid can only be set for ADD VSI */ if (is_add) { @@ -2018,6 +2059,7 @@ static void i40e_undo_add_filter_entries(struct i40e_vsi *vsi, hlist_for_each_entry_safe(new, h, from, hlist) { /* We can simply free the wrapper structure */ hlist_del(&new->hlist); + netdev_hw_addr_refcnt(new->f, vsi->netdev, -1); kfree(new); } } @@ -2365,6 +2407,10 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) &tmp_add_list, &tmp_del_list, vlan_filters); + + hlist_for_each_entry(new, &tmp_add_list, hlist) + netdev_hw_addr_refcnt(new->f, vsi->netdev, 1); + if (retval) goto err_no_memory_locked; @@ -2497,6 +2543,7 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) if (new->f->state == I40E_FILTER_NEW) new->f->state = new->state; hlist_del(&new->hlist); + netdev_hw_addr_refcnt(new->f, vsi->netdev, -1); kfree(new); } spin_unlock_bh(&vsi->mac_filter_hash_lock); @@ -2622,7 +2669,8 @@ static void i40e_sync_filters_subtask(struct i40e_pf *pf) for (v = 0; v < pf->num_alloc_vsi; v++) { if (pf->vsi[v] && - (pf->vsi[v]->flags & I40E_VSI_FLAG_FILTER_CHANGED)) { + (pf->vsi[v]->flags & I40E_VSI_FLAG_FILTER_CHANGED) && + !test_bit(__I40E_VSI_RELEASING, pf->vsi[v]->state)) { int ret = i40e_sync_vsi_filters(pf->vsi[v]); if (ret) { @@ -4839,7 +4887,8 @@ static void i40e_clear_interrupt_scheme(struct i40e_pf *pf) { int i; - i40e_free_misc_vector(pf); + if (test_bit(__I40E_MISC_IRQ_REQUESTED, pf->state)) + i40e_free_misc_vector(pf); i40e_put_lump(pf->irq_pile, pf->iwarp_base_vector, I40E_IWARP_IRQ_PILE_ID); @@ -5392,6 +5441,58 @@ static void i40e_vsi_update_queue_map(struct i40e_vsi *vsi, sizeof(vsi->info.tc_mapping)); } +/** + * i40e_update_adq_vsi_queues - update queue mapping for ADq VSI + * @vsi: the VSI being reconfigured + * @vsi_offset: offset from main VF VSI + */ +int i40e_update_adq_vsi_queues(struct i40e_vsi *vsi, int vsi_offset) +{ + struct i40e_vsi_context ctxt = {}; + struct i40e_pf *pf; + struct i40e_hw *hw; + int ret; + + if (!vsi) + return I40E_ERR_PARAM; + pf = vsi->back; + hw = &pf->hw; + + ctxt.seid = vsi->seid; + ctxt.pf_num = hw->pf_id; + ctxt.vf_num = vsi->vf_id + hw->func_caps.vf_base_id + vsi_offset; + ctxt.uplink_seid = vsi->uplink_seid; + ctxt.connection_type = I40E_AQ_VSI_CONN_TYPE_NORMAL; + ctxt.flags = I40E_AQ_VSI_TYPE_VF; + ctxt.info = vsi->info; + + i40e_vsi_setup_queue_map(vsi, &ctxt, vsi->tc_config.enabled_tc, + false); + if (vsi->reconfig_rss) { + vsi->rss_size = min_t(int, pf->alloc_rss_size, + vsi->num_queue_pairs); + ret = i40e_vsi_config_rss(vsi); + if (ret) { + dev_info(&pf->pdev->dev, "Failed to reconfig rss for num_queues\n"); + return ret; + } + vsi->reconfig_rss = false; + } + + ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL); + if (ret) { + dev_info(&pf->pdev->dev, "Update vsi config failed, err %s aq_err %s\n", + i40e_stat_str(hw, ret), + i40e_aq_str(hw, hw->aq.asq_last_status)); + return ret; + } + /* update the local VSI info with updated queue map */ + i40e_vsi_update_queue_map(vsi, &ctxt); + vsi->info.valid_sections = 0; + + return ret; +} + /** * i40e_vsi_config_tc - Configure VSI Tx Scheduler for given TC map * @vsi: VSI to be configured @@ -5682,24 +5783,6 @@ static void i40e_remove_queue_channels(struct i40e_vsi *vsi) INIT_LIST_HEAD(&vsi->ch_list); } -/** - * i40e_is_any_channel - channel exist or not - * @vsi: ptr to VSI to which channels are associated with - * - * Returns true or false if channel(s) exist for associated VSI or not - **/ -static bool i40e_is_any_channel(struct i40e_vsi *vsi) -{ - struct i40e_channel *ch, *ch_tmp; - - list_for_each_entry_safe(ch, ch_tmp, &vsi->ch_list, list) { - if (ch->initialized) - return true; - } - - return false; -} - /** * i40e_get_max_queues_for_channel * @vsi: ptr to VSI to which channels are associated with @@ -6205,26 +6288,15 @@ int i40e_create_queue_channel(struct i40e_vsi *vsi, /* By default we are in VEPA mode, if this is the first VF/VMDq * VSI to be added switch to VEB mode. */ - if ((!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) || - (!i40e_is_any_channel(vsi))) { - if (!is_power_of_2(vsi->tc_config.tc_info[0].qcount)) { - dev_dbg(&pf->pdev->dev, - "Failed to create channel. Override queues (%u) not power of 2\n", - vsi->tc_config.tc_info[0].qcount); - return -EINVAL; - } - if (!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) { - pf->flags |= I40E_FLAG_VEB_MODE_ENABLED; + if (!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) { + pf->flags |= I40E_FLAG_VEB_MODE_ENABLED; - if (vsi->type == I40E_VSI_MAIN) { - if (pf->flags & I40E_FLAG_TC_MQPRIO) - i40e_do_reset(pf, I40E_PF_RESET_FLAG, - true); - else - i40e_do_reset_safe(pf, - I40E_PF_RESET_FLAG); - } + if (vsi->type == I40E_VSI_MAIN) { + if (pf->flags & I40E_FLAG_TC_MQPRIO) + i40e_do_reset(pf, I40E_PF_RESET_FLAG, true); + else + i40e_do_reset_safe(pf, I40E_PF_RESET_FLAG); } /* now onwards for main VSI, number of queues will be value * of TC0's queue count @@ -7551,12 +7623,20 @@ config_tc: vsi->seid); need_reset = true; goto exit; - } else { - dev_info(&vsi->back->pdev->dev, - "Setup channel (id:%u) utilizing num_queues %d\n", - vsi->seid, vsi->tc_config.tc_info[0].qcount); + } else if (enabled_tc && + (!is_power_of_2(vsi->tc_config.tc_info[0].qcount))) { + netdev_info(netdev, + "Failed to create channel. Override queues (%u) not power of 2\n", + vsi->tc_config.tc_info[0].qcount); + ret = -EINVAL; + need_reset = true; + goto exit; } + dev_info(&vsi->back->pdev->dev, + "Setup channel (id:%u) utilizing num_queues %d\n", + vsi->seid, vsi->tc_config.tc_info[0].qcount); + if (pf->flags & I40E_FLAG_TC_MQPRIO) { if (vsi->mqprio_qopt.max_rate[0]) { u64 max_tx_rate = vsi->mqprio_qopt.max_rate[0]; @@ -8121,9 +8201,8 @@ static int i40e_configure_clsflower(struct i40e_vsi *vsi, err = i40e_add_del_cloud_filter(vsi, filter, true); if (err) { - dev_err(&pf->pdev->dev, - "Failed to add cloud filter, err %s\n", - i40e_stat_str(&pf->hw, err)); + dev_err(&pf->pdev->dev, "Failed to add cloud filter, err %d\n", + err); goto err; } @@ -8308,6 +8387,27 @@ int i40e_open(struct net_device *netdev) return 0; } +/** + * i40e_netif_set_realnum_tx_rx_queues - Update number of tx/rx queues + * @vsi: vsi structure + * + * This updates netdev's number of tx/rx queues + * + * Returns status of setting tx/rx queues + **/ +static int i40e_netif_set_realnum_tx_rx_queues(struct i40e_vsi *vsi) +{ + int ret; + + ret = netif_set_real_num_rx_queues(vsi->netdev, + vsi->num_queue_pairs); + if (ret) + return ret; + + return netif_set_real_num_tx_queues(vsi->netdev, + vsi->num_queue_pairs); +} + /** * i40e_vsi_open - * @vsi: the VSI to open @@ -8344,13 +8444,7 @@ int i40e_vsi_open(struct i40e_vsi *vsi) goto err_setup_rx; /* Notify the stack of the actual queue counts. */ - err = netif_set_real_num_tx_queues(vsi->netdev, - vsi->num_queue_pairs); - if (err) - goto err_set_queues; - - err = netif_set_real_num_rx_queues(vsi->netdev, - vsi->num_queue_pairs); + err = i40e_netif_set_realnum_tx_rx_queues(vsi); if (err) goto err_set_queues; @@ -9662,7 +9756,7 @@ static int i40e_get_capabilities(struct i40e_pf *pf, if (pf->hw.aq.asq_last_status == I40E_AQ_RC_ENOMEM) { /* retry with a larger buffer */ buf_len = data_size; - } else if (pf->hw.aq.asq_last_status != I40E_AQ_RC_OK) { + } else if (pf->hw.aq.asq_last_status != I40E_AQ_RC_OK || err) { dev_info(&pf->pdev->dev, "capability discovery failed, err %s aq_err %s\n", i40e_stat_str(&pf->hw, err), @@ -10042,15 +10136,9 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) } i40e_get_oem_version(&pf->hw); - if (test_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state) && - ((hw->aq.fw_maj_ver == 4 && hw->aq.fw_min_ver <= 33) || - hw->aq.fw_maj_ver < 4) && hw->mac.type == I40E_MAC_XL710) { - /* The following delay is necessary for 4.33 firmware and older - * to recover after EMP reset. 200 ms should suffice but we - * put here 300 ms to be sure that FW is ready to operate - * after reset. - */ - mdelay(300); + if (test_and_clear_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state)) { + /* The following delay is necessary for firmware update. */ + mdelay(1000); } /* re-verify the eeprom if we just had an EMP reset */ @@ -11239,7 +11327,6 @@ static int i40e_init_interrupt_scheme(struct i40e_pf *pf) return -ENOMEM; pf->irq_pile->num_entries = vectors; - pf->irq_pile->search_hint = 0; /* track first vector for misc interrupts, ignore return */ (void)i40e_get_lump(pf, pf->irq_pile, 1, I40E_PILE_VALID_BIT - 1); @@ -12042,7 +12129,6 @@ static int i40e_sw_init(struct i40e_pf *pf) goto sw_init_done; } pf->qp_pile->num_entries = pf->hw.func_caps.num_tx_qp; - pf->qp_pile->search_hint = 0; pf->tx_timeout_recovery_level = 1; @@ -13307,7 +13393,7 @@ int i40e_vsi_release(struct i40e_vsi *vsi) dev_info(&pf->pdev->dev, "Can't remove PF VSI\n"); return -ENODEV; } - + set_bit(__I40E_VSI_RELEASING, vsi->state); uplink_seid = vsi->uplink_seid; if (vsi->type != I40E_VSI_SRIOV) { if (vsi->netdev_registered) { @@ -13637,6 +13723,9 @@ struct i40e_vsi *i40e_vsi_setup(struct i40e_pf *pf, u8 type, case I40E_VSI_MAIN: case I40E_VSI_VMDQ2: ret = i40e_config_netdev(vsi); + if (ret) + goto err_netdev; + ret = i40e_netif_set_realnum_tx_rx_queues(vsi); if (ret) goto err_netdev; ret = register_netdev(vsi->netdev); @@ -14914,8 +15003,8 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (hw->aq.api_maj_ver == I40E_FW_API_VERSION_MAJOR && hw->aq.api_min_ver > I40E_FW_MINOR_VERSION(hw)) - dev_info(&pdev->dev, - "The driver for the device detected a newer version of the NVM image v%u.%u than expected v%u.%u. Please install the most recent version of the network driver.\n", + dev_dbg(&pdev->dev, + "The driver for the device detected a newer version of the NVM image v%u.%u than v%u.%u.\n", hw->aq.api_maj_ver, hw->aq.api_min_ver, I40E_FW_API_VERSION_MAJOR, diff --git a/drivers/net/ethernet/intel/i40e/i40e_register.h b/drivers/net/ethernet/intel/i40e/i40e_register.h index 564df22f3f4639263b94bf7cd0374dbc7961cb85..8335f151ceefc9a0c9ee94d46fcdcfaabf7713a3 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_register.h +++ b/drivers/net/ethernet/intel/i40e/i40e_register.h @@ -279,6 +279,9 @@ #define I40E_VFINT_DYN_CTLN(_INTVF) (0x00024800 + ((_INTVF) * 4)) /* _i=0...511 */ /* Reset: VFR */ #define I40E_VFINT_DYN_CTLN_CLEARPBA_SHIFT 1 #define I40E_VFINT_DYN_CTLN_CLEARPBA_MASK I40E_MASK(0x1, I40E_VFINT_DYN_CTLN_CLEARPBA_SHIFT) +#define I40E_VFINT_ICR0_ADMINQ_SHIFT 30 +#define I40E_VFINT_ICR0_ADMINQ_MASK I40E_MASK(0x1, I40E_VFINT_ICR0_ADMINQ_SHIFT) +#define I40E_VFINT_ICR0_ENA(_VF) (0x0002C000 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: CORER */ #define I40E_VPINT_AEQCTL(_VF) (0x0002B800 + ((_VF) * 4)) /* _i=0...127 */ /* Reset: CORER */ #define I40E_VPINT_AEQCTL_MSIX_INDX_SHIFT 0 #define I40E_VPINT_AEQCTL_ITR_INDX_SHIFT 11 diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index a02167cce81e1ad46268ae48c2b9591147bec939..9181e007e0392e7150d6d01b39fc808cf74a86f7 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -130,17 +130,18 @@ void i40e_vc_notify_vf_reset(struct i40e_vf *vf) /***********************misc routines*****************************/ /** - * i40e_vc_disable_vf + * i40e_vc_reset_vf * @vf: pointer to the VF info - * - * Disable the VF through a SW reset. + * @notify_vf: notify vf about reset or not + * Reset VF handler. **/ -static inline void i40e_vc_disable_vf(struct i40e_vf *vf) +static void i40e_vc_reset_vf(struct i40e_vf *vf, bool notify_vf) { struct i40e_pf *pf = vf->pf; int i; - i40e_vc_notify_vf_reset(vf); + if (notify_vf) + i40e_vc_notify_vf_reset(vf); /* We want to ensure that an actual reset occurs initiated after this * function was called. However, we do not want to wait forever, so @@ -158,9 +159,14 @@ static inline void i40e_vc_disable_vf(struct i40e_vf *vf) usleep_range(10000, 20000); } - dev_warn(&vf->pf->pdev->dev, - "Failed to initiate reset for VF %d after 200 milliseconds\n", - vf->vf_id); + if (notify_vf) + dev_warn(&vf->pf->pdev->dev, + "Failed to initiate reset for VF %d after 200 milliseconds\n", + vf->vf_id); + else + dev_dbg(&vf->pf->pdev->dev, + "Failed to initiate reset for VF %d after 200 milliseconds\n", + vf->vf_id); } /** @@ -621,14 +627,13 @@ static int i40e_config_vsi_rx_queue(struct i40e_vf *vf, u16 vsi_id, u16 vsi_queue_id, struct virtchnl_rxq_info *info) { + u16 pf_queue_id = i40e_vc_get_pf_queue_id(vf, vsi_id, vsi_queue_id); struct i40e_pf *pf = vf->pf; + struct i40e_vsi *vsi = pf->vsi[vf->lan_vsi_idx]; struct i40e_hw *hw = &pf->hw; struct i40e_hmc_obj_rxq rx_ctx; - u16 pf_queue_id; int ret = 0; - pf_queue_id = i40e_vc_get_pf_queue_id(vf, vsi_id, vsi_queue_id); - /* clear the context structure first */ memset(&rx_ctx, 0, sizeof(struct i40e_hmc_obj_rxq)); @@ -666,6 +671,10 @@ static int i40e_config_vsi_rx_queue(struct i40e_vf *vf, u16 vsi_id, } rx_ctx.rxmax = info->max_pkt_size; + /* if port VLAN is configured increase the max packet size */ + if (vsi->info.pvid) + rx_ctx.rxmax += VLAN_HLEN; + /* enable 32bytes desc always */ rx_ctx.dsize = 1; @@ -1314,6 +1323,32 @@ static i40e_status i40e_config_vf_promiscuous_mode(struct i40e_vf *vf, return aq_ret; } +/** + * i40e_sync_vfr_reset + * @hw: pointer to hw struct + * @vf_id: VF identifier + * + * Before trigger hardware reset, we need to know if no other process has + * reserved the hardware for any reset operations. This check is done by + * examining the status of the RSTAT1 register used to signal the reset. + **/ +static int i40e_sync_vfr_reset(struct i40e_hw *hw, int vf_id) +{ + u32 reg; + int i; + + for (i = 0; i < I40E_VFR_WAIT_COUNT; i++) { + reg = rd32(hw, I40E_VFINT_ICR0_ENA(vf_id)) & + I40E_VFINT_ICR0_ADMINQ_MASK; + if (reg) + return 0; + + usleep_range(100, 200); + } + + return -EAGAIN; +} + /** * i40e_trigger_vf_reset * @vf: pointer to the VF structure @@ -1328,9 +1363,11 @@ static void i40e_trigger_vf_reset(struct i40e_vf *vf, bool flr) struct i40e_pf *pf = vf->pf; struct i40e_hw *hw = &pf->hw; u32 reg, reg_idx, bit_idx; + bool vf_active; + u32 radq; /* warn the VF */ - clear_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states); + vf_active = test_and_clear_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states); /* Disable VF's configuration API during reset. The flag is re-enabled * in i40e_alloc_vf_res(), when it's safe again to access VF's VSI. @@ -1344,7 +1381,19 @@ static void i40e_trigger_vf_reset(struct i40e_vf *vf, bool flr) * just need to clean up, so don't hit the VFRTRIG register. */ if (!flr) { - /* reset VF using VPGEN_VFRTRIG reg */ + /* Sync VFR reset before trigger next one */ + radq = rd32(hw, I40E_VFINT_ICR0_ENA(vf->vf_id)) & + I40E_VFINT_ICR0_ADMINQ_MASK; + if (vf_active && !radq) + /* waiting for finish reset by virtual driver */ + if (i40e_sync_vfr_reset(hw, vf->vf_id)) + dev_info(&pf->pdev->dev, + "Reset VF %d never finished\n", + vf->vf_id); + + /* Reset VF using VPGEN_VFRTRIG reg. It is also setting + * in progress state in rstat1 register. + */ reg = rd32(hw, I40E_VPGEN_VFRTRIG(vf->vf_id)); reg |= I40E_VPGEN_VFRTRIG_VFSWR_MASK; wr32(hw, I40E_VPGEN_VFRTRIG(vf->vf_id), reg); @@ -1840,25 +1889,6 @@ static int i40e_vc_send_msg_to_vf(struct i40e_vf *vf, u32 v_opcode, hw = &pf->hw; abs_vf_id = vf->vf_id + hw->func_caps.vf_base_id; - /* single place to detect unsuccessful return values */ - if (v_retval) { - vf->num_invalid_msgs++; - dev_info(&pf->pdev->dev, "VF %d failed opcode %d, retval: %d\n", - vf->vf_id, v_opcode, v_retval); - if (vf->num_invalid_msgs > - I40E_DEFAULT_NUM_INVALID_MSGS_ALLOWED) { - dev_err(&pf->pdev->dev, - "Number of invalid messages exceeded for VF %d\n", - vf->vf_id); - dev_err(&pf->pdev->dev, "Use PF Control I/F to enable the VF\n"); - set_bit(I40E_VF_STATE_DISABLED, &vf->vf_states); - } - } else { - vf->num_valid_msgs++; - /* reset the invalid counter, if a valid message is received. */ - vf->num_invalid_msgs = 0; - } - aq_ret = i40e_aq_send_msg_to_vf(hw, abs_vf_id, v_opcode, v_retval, msg, msglen, NULL); if (aq_ret) { @@ -1886,6 +1916,32 @@ static int i40e_vc_send_resp_to_vf(struct i40e_vf *vf, return i40e_vc_send_msg_to_vf(vf, opcode, retval, NULL, 0); } +/** + * i40e_sync_vf_state + * @vf: pointer to the VF info + * @state: VF state + * + * Called from a VF message to synchronize the service with a potential + * VF reset state + **/ +static bool i40e_sync_vf_state(struct i40e_vf *vf, enum i40e_vf_states state) +{ + int i; + + /* When handling some messages, it needs VF state to be set. + * It is possible that this flag is cleared during VF reset, + * so there is a need to wait until the end of the reset to + * handle the request message correctly. + */ + for (i = 0; i < I40E_VF_STATE_WAIT_COUNT; i++) { + if (test_bit(state, &vf->vf_states)) + return true; + usleep_range(10000, 20000); + } + + return test_bit(state, &vf->vf_states); +} + /** * i40e_vc_get_version_msg * @vf: pointer to the VF info @@ -1946,7 +2002,7 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg) size_t len = 0; int ret; - if (!test_bit(I40E_VF_STATE_INIT, &vf->vf_states)) { + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_INIT)) { aq_ret = I40E_ERR_PARAM; goto err; } @@ -2051,20 +2107,6 @@ err: return ret; } -/** - * i40e_vc_reset_vf_msg - * @vf: pointer to the VF info - * - * called from the VF to reset itself, - * unlike other virtchnl messages, PF driver - * doesn't send the response back to the VF - **/ -static void i40e_vc_reset_vf_msg(struct i40e_vf *vf) -{ - if (test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) - i40e_reset_vf(vf, false); -} - /** * i40e_vc_config_promiscuous_mode_msg * @vf: pointer to the VF info @@ -2082,7 +2124,7 @@ static int i40e_vc_config_promiscuous_mode_msg(struct i40e_vf *vf, u8 *msg) bool allmulti = false; bool alluni = false; - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) { aq_ret = I40E_ERR_PARAM; goto err_out; } @@ -2163,13 +2205,14 @@ static int i40e_vc_config_queues_msg(struct i40e_vf *vf, u8 *msg) struct virtchnl_vsi_queue_config_info *qci = (struct virtchnl_vsi_queue_config_info *)msg; struct virtchnl_queue_pair_info *qpi; - struct i40e_pf *pf = vf->pf; u16 vsi_id, vsi_queue_id = 0; - u16 num_qps_all = 0; + struct i40e_pf *pf = vf->pf; i40e_status aq_ret = 0; int i, j = 0, idx = 0; + struct i40e_vsi *vsi; + u16 num_qps_all = 0; - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) { aq_ret = I40E_ERR_PARAM; goto error_param; } @@ -2256,9 +2299,15 @@ static int i40e_vc_config_queues_msg(struct i40e_vf *vf, u8 *msg) pf->vsi[vf->lan_vsi_idx]->num_queue_pairs = qci->num_queue_pairs; } else { - for (i = 0; i < vf->num_tc; i++) - pf->vsi[vf->ch[i].vsi_idx]->num_queue_pairs = - vf->ch[i].num_qps; + for (i = 0; i < vf->num_tc; i++) { + vsi = pf->vsi[vf->ch[i].vsi_idx]; + vsi->num_queue_pairs = vf->ch[i].num_qps; + + if (i40e_update_adq_vsi_queues(vsi, i)) { + aq_ret = I40E_ERR_CONFIG; + goto error_param; + } + } } error_param: @@ -2312,7 +2361,7 @@ static int i40e_vc_config_irq_map_msg(struct i40e_vf *vf, u8 *msg) i40e_status aq_ret = 0; int i; - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) { aq_ret = I40E_ERR_PARAM; goto error_param; } @@ -2484,7 +2533,7 @@ static int i40e_vc_disable_queues_msg(struct i40e_vf *vf, u8 *msg) struct i40e_pf *pf = vf->pf; i40e_status aq_ret = 0; - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) { aq_ret = I40E_ERR_PARAM; goto error_param; } @@ -2516,6 +2565,59 @@ error_param: aq_ret); } +/** + * i40e_check_enough_queue - find big enough queue number + * @vf: pointer to the VF info + * @needed: the number of items needed + * + * Returns the base item index of the queue, or negative for error + **/ +static int i40e_check_enough_queue(struct i40e_vf *vf, u16 needed) +{ + unsigned int i, cur_queues, more, pool_size; + struct i40e_lump_tracking *pile; + struct i40e_pf *pf = vf->pf; + struct i40e_vsi *vsi; + + vsi = pf->vsi[vf->lan_vsi_idx]; + cur_queues = vsi->alloc_queue_pairs; + + /* if current allocated queues are enough for need */ + if (cur_queues >= needed) + return vsi->base_queue; + + pile = pf->qp_pile; + if (cur_queues > 0) { + /* if the allocated queues are not zero + * just check if there are enough queues for more + * behind the allocated queues. + */ + more = needed - cur_queues; + for (i = vsi->base_queue + cur_queues; + i < pile->num_entries; i++) { + if (pile->list[i] & I40E_PILE_VALID_BIT) + break; + + if (more-- == 1) + /* there is enough */ + return vsi->base_queue; + } + } + + pool_size = 0; + for (i = 0; i < pile->num_entries; i++) { + if (pile->list[i] & I40E_PILE_VALID_BIT) { + pool_size = 0; + continue; + } + if (needed <= ++pool_size) + /* there is enough */ + return i; + } + + return -ENOMEM; +} + /** * i40e_vc_request_queues_msg * @vf: pointer to the VF info @@ -2534,7 +2636,7 @@ static int i40e_vc_request_queues_msg(struct i40e_vf *vf, u8 *msg) u8 cur_pairs = vf->num_queue_pairs; struct i40e_pf *pf = vf->pf; - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) return -EINVAL; if (req_pairs > I40E_MAX_VF_QUEUES) { @@ -2550,11 +2652,16 @@ static int i40e_vc_request_queues_msg(struct i40e_vf *vf, u8 *msg) req_pairs - cur_pairs, pf->queues_left); vfres->num_queue_pairs = pf->queues_left + cur_pairs; + } else if (i40e_check_enough_queue(vf, req_pairs) < 0) { + dev_warn(&pf->pdev->dev, + "VF %d requested %d more queues, but there is not enough for it.\n", + vf->vf_id, + req_pairs - cur_pairs); + vfres->num_queue_pairs = cur_pairs; } else { /* successful request */ vf->num_req_queues = req_pairs; - i40e_vc_notify_vf_reset(vf); - i40e_reset_vf(vf, false); + i40e_vc_reset_vf(vf, true); return 0; } @@ -2580,7 +2687,7 @@ static int i40e_vc_get_stats_msg(struct i40e_vf *vf, u8 *msg) memset(&stats, 0, sizeof(struct i40e_eth_stats)); - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) { aq_ret = I40E_ERR_PARAM; goto error_param; } @@ -2697,7 +2804,7 @@ static int i40e_vc_add_mac_addr_msg(struct i40e_vf *vf, u8 *msg) i40e_status ret = 0; int i; - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states) || + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE) || !i40e_vc_isvalid_vsi_id(vf, al->vsi_id)) { ret = I40E_ERR_PARAM; goto error_param; @@ -2748,8 +2855,8 @@ static int i40e_vc_add_mac_addr_msg(struct i40e_vf *vf, u8 *msg) error_param: /* send the response to the VF */ - return i40e_vc_send_resp_to_vf(vf, VIRTCHNL_OP_ADD_ETH_ADDR, - ret); + return i40e_vc_send_msg_to_vf(vf, VIRTCHNL_OP_ADD_ETH_ADDR, + ret, NULL, 0); } /** @@ -2769,7 +2876,7 @@ static int i40e_vc_del_mac_addr_msg(struct i40e_vf *vf, u8 *msg) i40e_status ret = 0; int i; - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states) || + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE) || !i40e_vc_isvalid_vsi_id(vf, al->vsi_id)) { ret = I40E_ERR_PARAM; goto error_param; @@ -2913,7 +3020,7 @@ static int i40e_vc_remove_vlan_msg(struct i40e_vf *vf, u8 *msg) i40e_status aq_ret = 0; int i; - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states) || + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE) || !i40e_vc_isvalid_vsi_id(vf, vfl->vsi_id)) { aq_ret = I40E_ERR_PARAM; goto error_param; @@ -3033,9 +3140,9 @@ static int i40e_vc_config_rss_key(struct i40e_vf *vf, u8 *msg) struct i40e_vsi *vsi = NULL; i40e_status aq_ret = 0; - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states) || + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE) || !i40e_vc_isvalid_vsi_id(vf, vrk->vsi_id) || - (vrk->key_len != I40E_HKEY_ARRAY_SIZE)) { + vrk->key_len != I40E_HKEY_ARRAY_SIZE) { aq_ret = I40E_ERR_PARAM; goto err; } @@ -3064,9 +3171,9 @@ static int i40e_vc_config_rss_lut(struct i40e_vf *vf, u8 *msg) i40e_status aq_ret = 0; u16 i; - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states) || + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE) || !i40e_vc_isvalid_vsi_id(vf, vrl->vsi_id) || - (vrl->lut_entries != I40E_VF_HLUT_ARRAY_SIZE)) { + vrl->lut_entries != I40E_VF_HLUT_ARRAY_SIZE) { aq_ret = I40E_ERR_PARAM; goto err; } @@ -3099,7 +3206,7 @@ static int i40e_vc_get_rss_hena(struct i40e_vf *vf, u8 *msg) i40e_status aq_ret = 0; int len = 0; - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) { aq_ret = I40E_ERR_PARAM; goto err; } @@ -3135,7 +3242,7 @@ static int i40e_vc_set_rss_hena(struct i40e_vf *vf, u8 *msg) struct i40e_hw *hw = &pf->hw; i40e_status aq_ret = 0; - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) { aq_ret = I40E_ERR_PARAM; goto err; } @@ -3160,7 +3267,7 @@ static int i40e_vc_enable_vlan_stripping(struct i40e_vf *vf, u8 *msg) i40e_status aq_ret = 0; struct i40e_vsi *vsi; - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) { aq_ret = I40E_ERR_PARAM; goto err; } @@ -3186,7 +3293,7 @@ static int i40e_vc_disable_vlan_stripping(struct i40e_vf *vf, u8 *msg) i40e_status aq_ret = 0; struct i40e_vsi *vsi; - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) { aq_ret = I40E_ERR_PARAM; goto err; } @@ -3413,7 +3520,7 @@ static int i40e_vc_del_cloud_filter(struct i40e_vf *vf, u8 *msg) i40e_status aq_ret = 0; int i, ret; - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) { aq_ret = I40E_ERR_PARAM; goto err; } @@ -3544,7 +3651,7 @@ static int i40e_vc_add_cloud_filter(struct i40e_vf *vf, u8 *msg) i40e_status aq_ret = 0; int i, ret; - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) { aq_ret = I40E_ERR_PARAM; goto err_out; } @@ -3653,7 +3760,7 @@ static int i40e_vc_add_qch_msg(struct i40e_vf *vf, u8 *msg) i40e_status aq_ret = 0; u64 speed = 0; - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) { aq_ret = I40E_ERR_PARAM; goto err; } @@ -3760,15 +3867,9 @@ static int i40e_vc_add_qch_msg(struct i40e_vf *vf, u8 *msg) /* set this flag only after making sure all inputs are sane */ vf->adq_enabled = true; - /* num_req_queues is set when user changes number of queues via ethtool - * and this causes issue for default VSI(which depends on this variable) - * when ADq is enabled, hence reset it. - */ - vf->num_req_queues = 0; /* reset the VF in order to allocate resources */ - i40e_vc_notify_vf_reset(vf); - i40e_reset_vf(vf, false); + i40e_vc_reset_vf(vf, true); return I40E_SUCCESS; @@ -3788,7 +3889,7 @@ static int i40e_vc_del_qch_msg(struct i40e_vf *vf, u8 *msg) struct i40e_pf *pf = vf->pf; i40e_status aq_ret = 0; - if (!test_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states)) { + if (!i40e_sync_vf_state(vf, I40E_VF_STATE_ACTIVE)) { aq_ret = I40E_ERR_PARAM; goto err; } @@ -3808,8 +3909,7 @@ static int i40e_vc_del_qch_msg(struct i40e_vf *vf, u8 *msg) } /* reset the VF in order to allocate resources */ - i40e_vc_notify_vf_reset(vf); - i40e_reset_vf(vf, false); + i40e_vc_reset_vf(vf, true); return I40E_SUCCESS; @@ -3871,7 +3971,7 @@ int i40e_vc_process_vf_msg(struct i40e_pf *pf, s16 vf_id, u32 v_opcode, i40e_vc_notify_vf_link_state(vf); break; case VIRTCHNL_OP_RESET_VF: - i40e_vc_reset_vf_msg(vf); + i40e_vc_reset_vf(vf, false); ret = 0; break; case VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE: @@ -4125,7 +4225,7 @@ int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) /* Force the VF interface down so it has to bring up with new MAC * address */ - i40e_vc_disable_vf(vf); + i40e_vc_reset_vf(vf, true); dev_info(&pf->pdev->dev, "Bring down and up the VF interface to make this change effective.\n"); error_param: @@ -4133,34 +4233,6 @@ error_param: return ret; } -/** - * i40e_vsi_has_vlans - True if VSI has configured VLANs - * @vsi: pointer to the vsi - * - * Check if a VSI has configured any VLANs. False if we have a port VLAN or if - * we have no configured VLANs. Do not call while holding the - * mac_filter_hash_lock. - */ -static bool i40e_vsi_has_vlans(struct i40e_vsi *vsi) -{ - bool have_vlans; - - /* If we have a port VLAN, then the VSI cannot have any VLANs - * configured, as all MAC/VLAN filters will be assigned to the PVID. - */ - if (vsi->info.pvid) - return false; - - /* Since we don't have a PVID, we know that if the device is in VLAN - * mode it must be because of a VLAN filter configured on this VSI. - */ - spin_lock_bh(&vsi->mac_filter_hash_lock); - have_vlans = i40e_is_vsi_in_vlan(vsi); - spin_unlock_bh(&vsi->mac_filter_hash_lock); - - return have_vlans; -} - /** * i40e_ndo_set_vf_port_vlan * @netdev: network interface device structure @@ -4217,19 +4289,9 @@ int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, int vf_id, /* duplicate request, so just return success */ goto error_pvid; - if (i40e_vsi_has_vlans(vsi)) { - dev_err(&pf->pdev->dev, - "VF %d has already configured VLAN filters and the administrator is requesting a port VLAN override.\nPlease unload and reload the VF driver for this change to take effect.\n", - vf_id); - /* Administrator Error - knock the VF offline until he does - * the right thing by reconfiguring his network correctly - * and then reloading the VF driver. - */ - i40e_vc_disable_vf(vf); - /* During reset the VF got a new VSI, so refresh the pointer. */ - vsi = pf->vsi[vf->lan_vsi_idx]; - } - + i40e_vc_reset_vf(vf, true); + /* During reset the VF got a new VSI, so refresh a pointer. */ + vsi = pf->vsi[vf->lan_vsi_idx]; /* Locked once because multiple functions below iterate list */ spin_lock_bh(&vsi->mac_filter_hash_lock); @@ -4610,7 +4672,7 @@ int i40e_ndo_set_vf_trust(struct net_device *netdev, int vf_id, bool setting) goto out; vf->trusted = setting; - i40e_vc_disable_vf(vf); + i40e_vc_reset_vf(vf, true); dev_info(&pf->pdev->dev, "VF %u is now %strusted\n", vf_id, setting ? "" : "un"); diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h index 091e32c1bb46fa12dc4a91afa10293ccac535cfe..a554d0a0b09bd56fb9904defd0a390df877cfa89 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h @@ -10,14 +10,15 @@ #define I40E_VIRTCHNL_SUPPORTED_QTYPES 2 -#define I40E_DEFAULT_NUM_INVALID_MSGS_ALLOWED 10 - #define I40E_VLAN_PRIORITY_SHIFT 13 #define I40E_VLAN_MASK 0xFFF #define I40E_PRIORITY_MASK 0xE000 #define I40E_MAX_VF_PROMISC_FLAGS 3 +#define I40E_VF_STATE_WAIT_COUNT 20 +#define I40E_VFR_WAIT_COUNT 100 + /* Various queue ctrls */ enum i40e_queue_ctrl { I40E_QUEUE_CTRL_UNKNOWN = 0, @@ -89,9 +90,6 @@ struct i40e_vf { u8 num_queue_pairs; /* num of qps assigned to VF vsis */ u8 num_req_queues; /* num of requested qps */ u64 num_mdd_events; /* num of mdd events detected */ - /* num of continuous malformed or invalid msgs detected */ - u64 num_invalid_msgs; - u64 num_valid_msgs; /* num of valid msgs detected */ unsigned long vf_caps; /* vf's adv. capabilities */ unsigned long vf_states; /* vf's runtime states */ diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h index 6766446a33f49b13b6385026585c90827b630ab8..ce1e2fb22e09283302ad55e740b145a4f4ce6fa5 100644 --- a/drivers/net/ethernet/intel/iavf/iavf.h +++ b/drivers/net/ethernet/intel/iavf/iavf.h @@ -309,6 +309,7 @@ struct iavf_adapter { struct iavf_hw hw; /* defined in iavf_type.h */ enum iavf_state_t state; + enum iavf_state_t last_state; unsigned long crit_section; struct delayed_work watchdog_task; @@ -378,6 +379,15 @@ struct iavf_device { extern char iavf_driver_name[]; extern struct workqueue_struct *iavf_wq; +static inline void iavf_change_state(struct iavf_adapter *adapter, + enum iavf_state_t state) +{ + if (adapter->state != state) { + adapter->last_state = adapter->state; + adapter->state = state; + } +} + int iavf_up(struct iavf_adapter *adapter); void iavf_down(struct iavf_adapter *adapter); int iavf_process_config(struct iavf_adapter *adapter); diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c index c93567f4d0f79901a1cc45302d3304f0e0b395b3..4680a2fe6d3cc1fabc0dc99160eb788685ff6611 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c +++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c @@ -612,23 +612,44 @@ static int iavf_set_ringparam(struct net_device *netdev, if ((ring->rx_mini_pending) || (ring->rx_jumbo_pending)) return -EINVAL; - new_tx_count = clamp_t(u32, ring->tx_pending, - IAVF_MIN_TXD, - IAVF_MAX_TXD); - new_tx_count = ALIGN(new_tx_count, IAVF_REQ_DESCRIPTOR_MULTIPLE); + if (ring->tx_pending > IAVF_MAX_TXD || + ring->tx_pending < IAVF_MIN_TXD || + ring->rx_pending > IAVF_MAX_RXD || + ring->rx_pending < IAVF_MIN_RXD) { + netdev_err(netdev, "Descriptors requested (Tx: %d / Rx: %d) out of range [%d-%d] (increment %d)\n", + ring->tx_pending, ring->rx_pending, IAVF_MIN_TXD, + IAVF_MAX_RXD, IAVF_REQ_DESCRIPTOR_MULTIPLE); + return -EINVAL; + } - new_rx_count = clamp_t(u32, ring->rx_pending, - IAVF_MIN_RXD, - IAVF_MAX_RXD); - new_rx_count = ALIGN(new_rx_count, IAVF_REQ_DESCRIPTOR_MULTIPLE); + new_tx_count = ALIGN(ring->tx_pending, IAVF_REQ_DESCRIPTOR_MULTIPLE); + if (new_tx_count != ring->tx_pending) + netdev_info(netdev, "Requested Tx descriptor count rounded up to %d\n", + new_tx_count); + + new_rx_count = ALIGN(ring->rx_pending, IAVF_REQ_DESCRIPTOR_MULTIPLE); + if (new_rx_count != ring->rx_pending) + netdev_info(netdev, "Requested Rx descriptor count rounded up to %d\n", + new_rx_count); /* if nothing to do return success */ if ((new_tx_count == adapter->tx_desc_count) && - (new_rx_count == adapter->rx_desc_count)) + (new_rx_count == adapter->rx_desc_count)) { + netdev_dbg(netdev, "Nothing to change, descriptor count is same as requested\n"); return 0; + } - adapter->tx_desc_count = new_tx_count; - adapter->rx_desc_count = new_rx_count; + if (new_tx_count != adapter->tx_desc_count) { + netdev_dbg(netdev, "Changing Tx descriptor count from %d to %d\n", + adapter->tx_desc_count, new_tx_count); + adapter->tx_desc_count = new_tx_count; + } + + if (new_rx_count != adapter->rx_desc_count) { + netdev_dbg(netdev, "Changing Rx descriptor count from %d to %d\n", + adapter->rx_desc_count, new_rx_count); + adapter->rx_desc_count = new_rx_count; + } if (netif_running(netdev)) { adapter->flags |= IAVF_FLAG_RESET_NEEDED; @@ -719,12 +740,31 @@ static int iavf_get_per_queue_coalesce(struct net_device *netdev, u32 queue, * * Change the ITR settings for a specific queue. **/ -static void iavf_set_itr_per_queue(struct iavf_adapter *adapter, - struct ethtool_coalesce *ec, int queue) +static int iavf_set_itr_per_queue(struct iavf_adapter *adapter, + struct ethtool_coalesce *ec, int queue) { struct iavf_ring *rx_ring = &adapter->rx_rings[queue]; struct iavf_ring *tx_ring = &adapter->tx_rings[queue]; struct iavf_q_vector *q_vector; + u16 itr_setting; + + itr_setting = rx_ring->itr_setting & ~IAVF_ITR_DYNAMIC; + + if (ec->rx_coalesce_usecs != itr_setting && + ec->use_adaptive_rx_coalesce) { + netif_info(adapter, drv, adapter->netdev, + "Rx interrupt throttling cannot be changed if adaptive-rx is enabled\n"); + return -EINVAL; + } + + itr_setting = tx_ring->itr_setting & ~IAVF_ITR_DYNAMIC; + + if (ec->tx_coalesce_usecs != itr_setting && + ec->use_adaptive_tx_coalesce) { + netif_info(adapter, drv, adapter->netdev, + "Tx interrupt throttling cannot be changed if adaptive-tx is enabled\n"); + return -EINVAL; + } rx_ring->itr_setting = ITR_REG_ALIGN(ec->rx_coalesce_usecs); tx_ring->itr_setting = ITR_REG_ALIGN(ec->tx_coalesce_usecs); @@ -747,6 +787,7 @@ static void iavf_set_itr_per_queue(struct iavf_adapter *adapter, * the Tx and Rx ITR values based on the values we have entered * into the q_vector, no need to write the values now. */ + return 0; } /** @@ -788,9 +829,11 @@ static int __iavf_set_coalesce(struct net_device *netdev, */ if (queue < 0) { for (i = 0; i < adapter->num_active_queues; i++) - iavf_set_itr_per_queue(adapter, ec, i); + if (iavf_set_itr_per_queue(adapter, ec, i)) + return -EINVAL; } else if (queue < adapter->num_active_queues) { - iavf_set_itr_per_queue(adapter, ec, queue); + if (iavf_set_itr_per_queue(adapter, ec, queue)) + return -EINVAL; } else { netif_info(adapter, drv, netdev, "Invalid queue value, queue range is 0 - %d\n", adapter->num_active_queues - 1); @@ -892,6 +935,7 @@ static int iavf_set_channels(struct net_device *netdev, { struct iavf_adapter *adapter = netdev_priv(netdev); u32 num_req = ch->combined_count; + int i; if ((adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ) && adapter->num_tc) { @@ -902,7 +946,7 @@ static int iavf_set_channels(struct net_device *netdev, /* All of these should have already been checked by ethtool before this * even gets to us, but just to be sure. */ - if (num_req > adapter->vsi_res->num_queue_pairs) + if (num_req == 0 || num_req > adapter->vsi_res->num_queue_pairs) return -EINVAL; if (num_req == adapter->num_active_queues) @@ -914,6 +958,20 @@ static int iavf_set_channels(struct net_device *netdev, adapter->num_req_queues = num_req; adapter->flags |= IAVF_FLAG_REINIT_ITR_NEEDED; iavf_schedule_reset(adapter); + + /* wait for the reset is done */ + for (i = 0; i < IAVF_RESET_WAIT_COMPLETE_COUNT; i++) { + msleep(IAVF_RESET_WAIT_MS); + if (adapter->flags & IAVF_FLAG_RESET_PENDING) + continue; + break; + } + if (i == IAVF_RESET_WAIT_COMPLETE_COUNT) { + adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED; + adapter->num_active_queues = num_req; + return -EOPNOTSUPP; + } + return 0; } @@ -960,14 +1018,13 @@ static int iavf_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, if (hfunc) *hfunc = ETH_RSS_HASH_TOP; - if (!indir) - return 0; - - memcpy(key, adapter->rss_key, adapter->rss_key_size); + if (key) + memcpy(key, adapter->rss_key, adapter->rss_key_size); - /* Each 32 bits pointed by 'indir' is stored with a lut entry */ - for (i = 0; i < adapter->rss_lut_size; i++) - indir[i] = (u32)adapter->rss_lut[i]; + if (indir) + /* Each 32 bits pointed by 'indir' is stored with a lut entry */ + for (i = 0; i < adapter->rss_lut_size; i++) + indir[i] = (u32)adapter->rss_lut[i]; return 0; } diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index f06c079e812ecf5858dadf3d093f935f600f9337..bd1fb3774769b0c301d31f2e87a572cffd7c143f 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -963,7 +963,7 @@ static void iavf_configure(struct iavf_adapter *adapter) **/ static void iavf_up_complete(struct iavf_adapter *adapter) { - adapter->state = __IAVF_RUNNING; + iavf_change_state(adapter, __IAVF_RUNNING); clear_bit(__IAVF_VSI_DOWN, adapter->vsi.state); iavf_napi_enable_all(adapter); @@ -1616,8 +1616,7 @@ static int iavf_process_aq_command(struct iavf_adapter *adapter) iavf_set_promiscuous(adapter, FLAG_VF_MULTICAST_PROMISC); return 0; } - - if ((adapter->aq_required & IAVF_FLAG_AQ_RELEASE_PROMISC) && + if ((adapter->aq_required & IAVF_FLAG_AQ_RELEASE_PROMISC) || (adapter->aq_required & IAVF_FLAG_AQ_RELEASE_ALLMULTI)) { iavf_set_promiscuous(adapter, 0); return 0; @@ -1699,7 +1698,7 @@ static int iavf_startup(struct iavf_adapter *adapter) iavf_shutdown_adminq(hw); goto err; } - adapter->state = __IAVF_INIT_VERSION_CHECK; + iavf_change_state(adapter, __IAVF_INIT_VERSION_CHECK); err: return err; } @@ -1723,7 +1722,7 @@ static int iavf_init_version_check(struct iavf_adapter *adapter) if (!iavf_asq_done(hw)) { dev_err(&pdev->dev, "Admin queue command never completed\n"); iavf_shutdown_adminq(hw); - adapter->state = __IAVF_STARTUP; + iavf_change_state(adapter, __IAVF_STARTUP); goto err; } @@ -1746,8 +1745,7 @@ static int iavf_init_version_check(struct iavf_adapter *adapter) err); goto err; } - adapter->state = __IAVF_INIT_GET_RESOURCES; - + iavf_change_state(adapter, __IAVF_INIT_GET_RESOURCES); err: return err; } @@ -1863,7 +1861,7 @@ static int iavf_init_get_resources(struct iavf_adapter *adapter) if (netdev->features & NETIF_F_GRO) dev_info(&pdev->dev, "GRO is enabled\n"); - adapter->state = __IAVF_DOWN; + iavf_change_state(adapter, __IAVF_DOWN); set_bit(__IAVF_VSI_DOWN, adapter->vsi.state); rtnl_unlock(); @@ -1911,7 +1909,7 @@ static void iavf_watchdog_task(struct work_struct *work) goto restart_watchdog; if (adapter->flags & IAVF_FLAG_PF_COMMS_FAILED) - adapter->state = __IAVF_COMM_FAILED; + iavf_change_state(adapter, __IAVF_COMM_FAILED); switch (adapter->state) { case __IAVF_COMM_FAILED: @@ -1922,7 +1920,7 @@ static void iavf_watchdog_task(struct work_struct *work) /* A chance for redemption! */ dev_err(&adapter->pdev->dev, "Hardware came out of reset. Attempting reinit.\n"); - adapter->state = __IAVF_STARTUP; + iavf_change_state(adapter, __IAVF_STARTUP); adapter->flags &= ~IAVF_FLAG_PF_COMMS_FAILED; queue_delayed_work(iavf_wq, &adapter->init_task, 10); clear_bit(__IAVF_IN_CRITICAL_TASK, @@ -1972,9 +1970,10 @@ static void iavf_watchdog_task(struct work_struct *work) goto restart_watchdog; } - /* check for hw reset */ + /* check for hw reset */ reg_val = rd32(hw, IAVF_VF_ARQLEN1) & IAVF_VF_ARQLEN1_ARQENABLE_MASK; if (!reg_val) { + iavf_change_state(adapter, __IAVF_RESETTING); adapter->flags |= IAVF_FLAG_RESET_PENDING; adapter->aq_required = 0; adapter->current_op = VIRTCHNL_OP_UNKNOWN; @@ -2047,14 +2046,14 @@ static void iavf_disable_vf(struct iavf_adapter *adapter) iavf_free_misc_irq(adapter); iavf_reset_interrupt_capability(adapter); - iavf_free_queues(adapter); iavf_free_q_vectors(adapter); + iavf_free_queues(adapter); memset(adapter->vf_res, 0, IAVF_VIRTCHNL_VF_RESOURCE_SIZE); iavf_shutdown_adminq(&adapter->hw); adapter->netdev->flags &= ~IFF_UP; clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section); adapter->flags &= ~IAVF_FLAG_RESET_PENDING; - adapter->state = __IAVF_DOWN; + iavf_change_state(adapter, __IAVF_DOWN); wake_up(&adapter->down_waitqueue); dev_info(&adapter->pdev->dev, "Reset task did not complete, VF disabled\n"); } @@ -2140,6 +2139,7 @@ static void iavf_reset_task(struct work_struct *work) } pci_set_master(adapter->pdev); + pci_restore_msi_state(adapter->pdev); if (i == IAVF_RESET_WAIT_COMPLETE_COUNT) { dev_err(&adapter->pdev->dev, "Reset never finished (%x)\n", @@ -2165,7 +2165,7 @@ continue_reset: } iavf_irq_disable(adapter); - adapter->state = __IAVF_RESETTING; + iavf_change_state(adapter, __IAVF_RESETTING); adapter->flags &= ~IAVF_FLAG_RESET_PENDING; /* free the Tx/Rx rings and descriptors, might be better to just @@ -2265,11 +2265,14 @@ continue_reset: iavf_configure(adapter); + /* iavf_up_complete() will switch device back + * to __IAVF_RUNNING + */ iavf_up_complete(adapter); iavf_irq_enable(adapter, true); } else { - adapter->state = __IAVF_DOWN; + iavf_change_state(adapter, __IAVF_DOWN); wake_up(&adapter->down_waitqueue); } clear_bit(__IAVF_IN_CLIENT_TASK, &adapter->crit_section); @@ -2330,7 +2333,7 @@ static void iavf_adminq_task(struct work_struct *work) /* check for error indications */ val = rd32(hw, hw->aq.arq.len); - if (val == 0xdeadbeef) /* indicates device in reset */ + if (val == 0xdeadbeef || val == 0xffffffff) /* device in reset */ goto freedom; oldval = val; if (val & IAVF_VF_ARQLEN1_ARQVFE_MASK) { @@ -2598,8 +2601,11 @@ static int iavf_validate_ch_config(struct iavf_adapter *adapter, total_max_rate += tx_rate; num_qps += mqprio_qopt->qopt.count[i]; } - if (num_qps > IAVF_MAX_REQ_QUEUES) + if (num_qps > adapter->num_active_queues) { + dev_err(&adapter->pdev->dev, + "Cannot support requested number of queues\n"); return -EINVAL; + } ret = iavf_validate_tx_bandwidth(adapter, total_max_rate); return ret; @@ -3028,11 +3034,11 @@ static int iavf_configure_clsflower(struct iavf_adapter *adapter, /* start out with flow type and eth type IPv4 to begin with */ filter->f.flow_type = VIRTCHNL_TCP_V4_FLOW; err = iavf_parse_cls_flower(adapter, cls_flower, filter); - if (err < 0) + if (err) goto err; err = iavf_handle_tclass(adapter, tc, filter); - if (err < 0) + if (err) goto err; /* add filter to the list */ @@ -3274,7 +3280,7 @@ static int iavf_close(struct net_device *netdev) adapter->flags |= IAVF_FLAG_CLIENT_NEEDS_CLOSE; iavf_down(adapter); - adapter->state = __IAVF_DOWN_PENDING; + iavf_change_state(adapter, __IAVF_DOWN_PENDING); iavf_free_traffic_irqs(adapter); clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section); @@ -3314,8 +3320,11 @@ static int iavf_change_mtu(struct net_device *netdev, int new_mtu) iavf_notify_client_l2_params(&adapter->vsi); adapter->flags |= IAVF_FLAG_SERVICE_CLIENT_REQUESTED; } - adapter->flags |= IAVF_FLAG_RESET_NEEDED; - queue_work(iavf_wq, &adapter->reset_task); + + if (netif_running(netdev)) { + adapter->flags |= IAVF_FLAG_RESET_NEEDED; + queue_work(iavf_wq, &adapter->reset_task); + } return 0; } @@ -3419,7 +3428,8 @@ static netdev_features_t iavf_fix_features(struct net_device *netdev, { struct iavf_adapter *adapter = netdev_priv(netdev); - if (!(adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN)) + if (adapter->vf_res && + !(adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_VLAN)) features &= ~(NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER); @@ -3654,7 +3664,7 @@ init_failed: "Failed to communicate with PF; waiting before retry\n"); adapter->flags |= IAVF_FLAG_PF_COMMS_FAILED; iavf_shutdown_adminq(hw); - adapter->state = __IAVF_STARTUP; + iavf_change_state(adapter, __IAVF_STARTUP); queue_delayed_work(iavf_wq, &adapter->init_task, HZ * 5); goto out; } @@ -3680,7 +3690,7 @@ static void iavf_shutdown(struct pci_dev *pdev) if (iavf_lock_timeout(adapter, __IAVF_IN_CRITICAL_TASK, 5000)) dev_warn(&adapter->pdev->dev, "failed to set __IAVF_IN_CRITICAL_TASK in %s\n", __FUNCTION__); /* Prevent the watchdog from running. */ - adapter->state = __IAVF_REMOVE; + iavf_change_state(adapter, __IAVF_REMOVE); adapter->aq_required = 0; clear_bit(__IAVF_IN_CRITICAL_TASK, &adapter->crit_section); @@ -3753,7 +3763,7 @@ static int iavf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) hw->back = adapter; adapter->msg_enable = BIT(DEFAULT_DEBUG_LEVEL_SHIFT) - 1; - adapter->state = __IAVF_STARTUP; + iavf_change_state(adapter, __IAVF_STARTUP); /* Call save state here because it relies on the adapter struct. */ pci_save_state(pdev); @@ -3921,7 +3931,7 @@ static void iavf_remove(struct pci_dev *pdev) dev_warn(&adapter->pdev->dev, "failed to set __IAVF_IN_CRITICAL_TASK in %s\n", __FUNCTION__); /* Shut down all the garbage mashers on the detention level */ - adapter->state = __IAVF_REMOVE; + iavf_change_state(adapter, __IAVF_REMOVE); adapter->aq_required = 0; adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED; iavf_free_all_tx_resources(adapter); diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c index 8be3151f2c62b5c4d4ffd03dd1d935a4ee6fb15a..ff479bf7214433755a193951c51cad33afceefa2 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c +++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c @@ -1460,7 +1460,7 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, iavf_free_all_tx_resources(adapter); iavf_free_all_rx_resources(adapter); if (adapter->state == __IAVF_DOWN_PENDING) { - adapter->state = __IAVF_DOWN; + iavf_change_state(adapter, __IAVF_DOWN); wake_up(&adapter->down_waitqueue); } break; diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h index b06fbe99d8e9316e2df848dafd1db1f1487f2af2..b6dd8f81d69979edc78c323985bb99a2a35234e5 100644 --- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h @@ -870,11 +870,11 @@ struct ice_aqc_get_phy_caps { * 01b - Report topology capabilities * 10b - Report SW configured */ -#define ICE_AQC_REPORT_MODE_S 1 -#define ICE_AQC_REPORT_MODE_M (3 << ICE_AQC_REPORT_MODE_S) -#define ICE_AQC_REPORT_NVM_CAP 0 -#define ICE_AQC_REPORT_TOPO_CAP BIT(1) -#define ICE_AQC_REPORT_SW_CFG BIT(2) +#define ICE_AQC_REPORT_MODE_S 1 +#define ICE_AQC_REPORT_MODE_M (3 << ICE_AQC_REPORT_MODE_S) +#define ICE_AQC_REPORT_TOPO_CAP_NO_MEDIA 0 +#define ICE_AQC_REPORT_TOPO_CAP_MEDIA BIT(1) +#define ICE_AQC_REPORT_ACTIVE_CFG BIT(2) __le32 reserved1; __le32 addr_high; __le32 addr_low; diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c index fe4320e2d1f2f73c272fcdc0046f8225b0b4406f..1929847b8c404d19ec57207b16d5cb2665056327 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.c +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -839,7 +839,7 @@ ice_vsi_stop_tx_ring(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src, } else if (status == ICE_ERR_DOES_NOT_EXIST) { dev_dbg(ice_pf_to_dev(vsi->back), "LAN Tx queues do not exist, nothing to disable\n"); } else if (status) { - dev_err(ice_pf_to_dev(vsi->back), "Failed to disable LAN Tx queues, error: %s\n", + dev_dbg(ice_pf_to_dev(vsi->back), "Failed to disable LAN Tx queues, error: %s\n", ice_stat_str(status)); return -ENODEV; } diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 2239a5f45e5a7bc0a540922a3007b3badfec00e8..ecdc467c4f6f5c048bb855efb2d6f4e159979ebd 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -24,6 +24,8 @@ static enum ice_status ice_set_mac_type(struct ice_hw *hw) case ICE_DEV_ID_E810C_BACKPLANE: case ICE_DEV_ID_E810C_QSFP: case ICE_DEV_ID_E810C_SFP: + case ICE_DEV_ID_E810_XXV_BACKPLANE: + case ICE_DEV_ID_E810_XXV_QSFP: case ICE_DEV_ID_E810_XXV_SFP: hw->mac_type = ICE_MAC_E810; break; @@ -191,7 +193,7 @@ ice_aq_get_phy_caps(struct ice_port_info *pi, bool qual_mods, u8 report_mode, ice_debug(hw, ICE_DBG_LINK, " module_type[2] = 0x%x\n", pcaps->module_type[2]); - if (!status && report_mode == ICE_AQC_REPORT_TOPO_CAP) { + if (!status && report_mode == ICE_AQC_REPORT_TOPO_CAP_MEDIA) { pi->phy.phy_type_low = le64_to_cpu(pcaps->phy_type_low); pi->phy.phy_type_high = le64_to_cpu(pcaps->phy_type_high); memcpy(pi->phy.link_info.module_type, &pcaps->module_type, @@ -922,7 +924,8 @@ enum ice_status ice_init_hw(struct ice_hw *hw) /* Initialize port_info struct with PHY capabilities */ status = ice_aq_get_phy_caps(hw->port_info, false, - ICE_AQC_REPORT_TOPO_CAP, pcaps, NULL); + ICE_AQC_REPORT_TOPO_CAP_MEDIA, pcaps, + NULL); devm_kfree(ice_hw_to_dev(hw), pcaps); if (status) goto err_unroll_sched; @@ -2680,7 +2683,7 @@ enum ice_status ice_update_link_info(struct ice_port_info *pi) if (!pcaps) return ICE_ERR_NO_MEMORY; - status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP, + status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_MEDIA, pcaps, NULL); devm_kfree(ice_hw_to_dev(hw), pcaps); @@ -2840,8 +2843,8 @@ ice_set_fc(struct ice_port_info *pi, u8 *aq_failures, bool ena_auto_link_update) return ICE_ERR_NO_MEMORY; /* Get the current PHY config */ - status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_SW_CFG, pcaps, - NULL); + status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG, + pcaps, NULL); if (status) { *aq_failures = ICE_SET_FC_AQ_FAIL_GET; goto out; @@ -2987,7 +2990,7 @@ ice_cfg_phy_fec(struct ice_port_info *pi, struct ice_aqc_set_phy_cfg_data *cfg, if (!pcaps) return ICE_ERR_NO_MEMORY; - status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP, pcaps, + status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_MEDIA, pcaps, NULL); if (status) goto out; @@ -3030,7 +3033,8 @@ ice_cfg_phy_fec(struct ice_port_info *pi, struct ice_aqc_set_phy_cfg_data *cfg, if (fec == ICE_FEC_AUTO && ice_fw_supports_link_override(pi->hw)) { struct ice_link_default_override_tlv tlv; - if (ice_get_link_default_override(&tlv, pi)) + status = ice_get_link_default_override(&tlv, pi); + if (status) goto out; if (!(tlv.options & ICE_LINK_OVERRIDE_STRICT_MODE) && diff --git a/drivers/net/ethernet/intel/ice/ice_devids.h b/drivers/net/ethernet/intel/ice/ice_devids.h index 9d8194671f6a6abee4d8682dd314ab6d3346208d..ef4392e6e2444871c611affdd389964146ddbdb9 100644 --- a/drivers/net/ethernet/intel/ice/ice_devids.h +++ b/drivers/net/ethernet/intel/ice/ice_devids.h @@ -21,6 +21,10 @@ #define ICE_DEV_ID_E810C_QSFP 0x1592 /* Intel(R) Ethernet Controller E810-C for SFP */ #define ICE_DEV_ID_E810C_SFP 0x1593 +/* Intel(R) Ethernet Controller E810-XXV for backplane */ +#define ICE_DEV_ID_E810_XXV_BACKPLANE 0x1599 +/* Intel(R) Ethernet Controller E810-XXV for QSFP */ +#define ICE_DEV_ID_E810_XXV_QSFP 0x159A /* Intel(R) Ethernet Controller E810-XXV for SFP */ #define ICE_DEV_ID_E810_XXV_SFP 0x159B /* Intel(R) Ethernet Connection E823-C for backplane */ diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index 14eba9bc174d8fb64e345122c1d574adf3976486..421fc707f80af438ec967d28f5fe900daf9573cc 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -1081,7 +1081,7 @@ ice_get_fecparam(struct net_device *netdev, struct ethtool_fecparam *fecparam) if (!caps) return -ENOMEM; - status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP, + status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_MEDIA, caps, NULL); if (status) { err = -EAGAIN; @@ -1976,7 +1976,7 @@ ice_get_link_ksettings(struct net_device *netdev, return -ENOMEM; status = ice_aq_get_phy_caps(vsi->port_info, false, - ICE_AQC_REPORT_SW_CFG, caps, NULL); + ICE_AQC_REPORT_ACTIVE_CFG, caps, NULL); if (status) { err = -EIO; goto done; @@ -2013,7 +2013,7 @@ ice_get_link_ksettings(struct net_device *netdev, ethtool_link_ksettings_add_link_mode(ks, advertising, FEC_RS); status = ice_aq_get_phy_caps(vsi->port_info, false, - ICE_AQC_REPORT_TOPO_CAP, caps, NULL); + ICE_AQC_REPORT_TOPO_CAP_MEDIA, caps, NULL); if (status) { err = -EIO; goto done; @@ -2187,12 +2187,12 @@ ice_set_link_ksettings(struct net_device *netdev, { struct ice_netdev_priv *np = netdev_priv(netdev); struct ethtool_link_ksettings safe_ks, copy_ks; - struct ice_aqc_get_phy_caps_data *abilities; u8 autoneg, timeout = TEST_SET_BITS_TIMEOUT; - u16 adv_link_speed, curr_link_speed, idx; + struct ice_aqc_get_phy_caps_data *phy_caps; struct ice_aqc_set_phy_cfg_data config; + u16 adv_link_speed, curr_link_speed; struct ice_pf *pf = np->vsi->back; - struct ice_port_info *p; + struct ice_port_info *pi; u8 autoneg_changed = 0; enum ice_status status; u64 phy_type_high = 0; @@ -2200,33 +2200,25 @@ ice_set_link_ksettings(struct net_device *netdev, int err = 0; bool linkup; - p = np->vsi->port_info; + pi = np->vsi->port_info; - if (!p) + if (!pi) return -EOPNOTSUPP; - /* Check if this is LAN VSI */ - ice_for_each_vsi(pf, idx) - if (pf->vsi[idx]->type == ICE_VSI_PF) { - if (np->vsi != pf->vsi[idx]) - return -EOPNOTSUPP; - break; - } - - if (p->phy.media_type != ICE_MEDIA_BASET && - p->phy.media_type != ICE_MEDIA_FIBER && - p->phy.media_type != ICE_MEDIA_BACKPLANE && - p->phy.media_type != ICE_MEDIA_DA && - p->phy.link_info.link_info & ICE_AQ_LINK_UP) + if (pi->phy.media_type != ICE_MEDIA_BASET && + pi->phy.media_type != ICE_MEDIA_FIBER && + pi->phy.media_type != ICE_MEDIA_BACKPLANE && + pi->phy.media_type != ICE_MEDIA_DA && + pi->phy.link_info.link_info & ICE_AQ_LINK_UP) return -EOPNOTSUPP; - abilities = kzalloc(sizeof(*abilities), GFP_KERNEL); - if (!abilities) + phy_caps = kzalloc(sizeof(*phy_caps), GFP_KERNEL); + if (!phy_caps) return -ENOMEM; /* Get the PHY capabilities based on media */ - status = ice_aq_get_phy_caps(p, false, ICE_AQC_REPORT_TOPO_CAP, - abilities, NULL); + status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_MEDIA, + phy_caps, NULL); if (status) { err = -EAGAIN; goto done; @@ -2288,26 +2280,26 @@ ice_set_link_ksettings(struct net_device *netdev, * configuration is initialized during probe from PHY capabilities * software mode, and updated on set PHY configuration. */ - memcpy(&config, &p->phy.curr_user_phy_cfg, sizeof(config)); + memcpy(&config, &pi->phy.curr_user_phy_cfg, sizeof(config)); config.caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT; /* Check autoneg */ - err = ice_setup_autoneg(p, &safe_ks, &config, autoneg, &autoneg_changed, + err = ice_setup_autoneg(pi, &safe_ks, &config, autoneg, &autoneg_changed, netdev); if (err) goto done; /* Call to get the current link speed */ - p->phy.get_link_info = true; - status = ice_get_link_status(p, &linkup); + pi->phy.get_link_info = true; + status = ice_get_link_status(pi, &linkup); if (status) { err = -EAGAIN; goto done; } - curr_link_speed = p->phy.link_info.link_speed; + curr_link_speed = pi->phy.curr_user_speed_req; adv_link_speed = ice_ksettings_find_adv_link_speed(ks); /* If speed didn't get set, set it to what it currently is. @@ -2326,7 +2318,7 @@ ice_set_link_ksettings(struct net_device *netdev, } /* save the requested speeds */ - p->phy.link_info.req_speeds = adv_link_speed; + pi->phy.link_info.req_speeds = adv_link_speed; /* set link and auto negotiation so changes take effect */ config.caps |= ICE_AQ_PHY_ENA_LINK; @@ -2342,9 +2334,9 @@ ice_set_link_ksettings(struct net_device *netdev, * for set PHY configuration */ config.phy_type_high = cpu_to_le64(phy_type_high) & - abilities->phy_type_high; + phy_caps->phy_type_high; config.phy_type_low = cpu_to_le64(phy_type_low) & - abilities->phy_type_low; + phy_caps->phy_type_low; if (!(config.phy_type_high || config.phy_type_low)) { /* If there is no intersection and lenient mode is enabled, then @@ -2364,7 +2356,7 @@ ice_set_link_ksettings(struct net_device *netdev, } /* If link is up put link down */ - if (p->phy.link_info.link_info & ICE_AQ_LINK_UP) { + if (pi->phy.link_info.link_info & ICE_AQ_LINK_UP) { /* Tell the OS link is going down, the link will go * back up when fw says it is ready asynchronously */ @@ -2374,7 +2366,7 @@ ice_set_link_ksettings(struct net_device *netdev, } /* make the aq call */ - status = ice_aq_set_phy_cfg(&pf->hw, p, &config, NULL); + status = ice_aq_set_phy_cfg(&pf->hw, pi, &config, NULL); if (status) { netdev_info(netdev, "Set phy config failed,\n"); err = -EAGAIN; @@ -2382,9 +2374,9 @@ ice_set_link_ksettings(struct net_device *netdev, } /* Save speed request */ - p->phy.curr_user_speed_req = adv_link_speed; + pi->phy.curr_user_speed_req = adv_link_speed; done: - kfree(abilities); + kfree(phy_caps); clear_bit(__ICE_CFG_BUSY, pf->state); return err; @@ -2954,7 +2946,7 @@ ice_get_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause) return; /* Get current PHY config */ - status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_SW_CFG, pcaps, + status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG, pcaps, NULL); if (status) goto out; @@ -3021,7 +3013,7 @@ ice_set_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause) return -ENOMEM; /* Get current PHY config */ - status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_SW_CFG, pcaps, + status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG, pcaps, NULL); if (status) { kfree(pcaps); diff --git a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c index 9095b4d274ad2b0efb0d17358de55a7d829d10d8..a81be917f6538fc48b2e76e3b6d595415e71170c 100644 --- a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c +++ b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c @@ -1669,7 +1669,7 @@ static u16 ice_tunnel_idx_to_entry(struct ice_hw *hw, enum ice_tunnel_type type, for (i = 0; i < hw->tnl.count && i < ICE_TUNNEL_MAX_ENTRIES; i++) if (hw->tnl.tbl[i].valid && hw->tnl.tbl[i].type == type && - idx--) + idx-- == 0) return i; WARN_ON_ONCE(1); @@ -1829,7 +1829,7 @@ int ice_udp_tunnel_set_port(struct net_device *netdev, unsigned int table, u16 index; tnl_type = ti->type == UDP_TUNNEL_TYPE_VXLAN ? TNL_VXLAN : TNL_GENEVE; - index = ice_tunnel_idx_to_entry(&pf->hw, idx, tnl_type); + index = ice_tunnel_idx_to_entry(&pf->hw, tnl_type, idx); status = ice_create_tunnel(&pf->hw, index, tnl_type, ntohs(ti->port)); if (status) { diff --git a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h index c0ee0541e53fc14c92ef083b6accea03bc156d49..847e1ef8e10641a13c835eca849a4b7a4f1af679 100644 --- a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h +++ b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h @@ -507,6 +507,7 @@ struct ice_tx_ctx_desc { (0x3FFFFULL << ICE_TXD_CTX_QW1_TSO_LEN_S) #define ICE_TXD_CTX_QW1_MSS_S 50 +#define ICE_TXD_CTX_MIN_MSS 64 enum ice_tx_ctx_desc_cmd_bits { ICE_TX_CTX_DESC_TSO = 0x01, diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index dc944d605a741bf16585de9a3385e9ebdc39e29d..52ac6cc08e83e73acb646e5828e3689428ae509f 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -83,8 +83,13 @@ static int ice_vsi_alloc_arrays(struct ice_vsi *vsi) if (!vsi->rx_rings) goto err_rings; - /* XDP will have vsi->alloc_txq Tx queues as well, so double the size */ - vsi->txq_map = devm_kcalloc(dev, (2 * vsi->alloc_txq), + /* txq_map needs to have enough space to track both Tx (stack) rings + * and XDP rings; at this point vsi->num_xdp_txq might not be set, + * so use num_possible_cpus() as we want to always provide XDP ring + * per CPU, regardless of queue count settings from user that might + * have come from ethtool's set_channels() callback; + */ + vsi->txq_map = devm_kcalloc(dev, (vsi->alloc_txq + num_possible_cpus()), sizeof(*vsi->txq_map), GFP_KERNEL); if (!vsi->txq_map) diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 5d0dc1f811e0f11e6b63d2e1286f6e420e66efcf..20c9d55f3adcee9e9b126877f7cd79a9c339e39a 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -726,7 +726,7 @@ void ice_print_link_msg(struct ice_vsi *vsi, bool isup) } status = ice_aq_get_phy_caps(vsi->port_info, false, - ICE_AQC_REPORT_SW_CFG, caps, NULL); + ICE_AQC_REPORT_ACTIVE_CFG, caps, NULL); if (status) netdev_info(vsi->netdev, "Get phy capability failed.\n"); @@ -1602,7 +1602,9 @@ static void ice_handle_mdd_event(struct ice_pf *pf) * reset, so print the event prior to reset. */ ice_print_vf_rx_mdd_event(vf); + mutex_lock(&pf->vf[i].cfg_lock); ice_reset_vf(&pf->vf[i], false); + mutex_unlock(&pf->vf[i].cfg_lock); } } } @@ -1643,7 +1645,7 @@ static int ice_force_phys_link_state(struct ice_vsi *vsi, bool link_up) if (!pcaps) return -ENOMEM; - retcode = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_SW_CFG, pcaps, + retcode = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG, pcaps, NULL); if (retcode) { dev_err(dev, "Failed to get phy capabilities, VSI %d error %d\n", @@ -1703,7 +1705,7 @@ static int ice_init_nvm_phy_type(struct ice_port_info *pi) if (!pcaps) return -ENOMEM; - status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_NVM_CAP, pcaps, + status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_NO_MEDIA, pcaps, NULL); if (status) { @@ -1819,7 +1821,7 @@ static int ice_init_phy_user_cfg(struct ice_port_info *pi) if (!pcaps) return -ENOMEM; - status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP, pcaps, + status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_MEDIA, pcaps, NULL); if (status) { dev_err(ice_pf_to_dev(pf), "Get PHY capability failed.\n"); @@ -1898,7 +1900,7 @@ static int ice_configure_phy(struct ice_vsi *vsi) return -ENOMEM; /* Get current PHY config */ - status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_SW_CFG, pcaps, + status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG, pcaps, NULL); if (status) { dev_err(dev, "Failed to get PHY configuration, VSI %d error %s\n", @@ -1916,7 +1918,7 @@ static int ice_configure_phy(struct ice_vsi *vsi) /* Use PHY topology as baseline for configuration */ memset(pcaps, 0, sizeof(*pcaps)); - status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP, pcaps, + status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_MEDIA, pcaps, NULL); if (status) { dev_err(dev, "Failed to get PHY topology, VSI %d error %s\n", @@ -2397,7 +2399,18 @@ int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog) ice_stat_str(status)); goto clear_xdp_rings; } - ice_vsi_assign_bpf_prog(vsi, prog); + + /* assign the prog only when it's not already present on VSI; + * this flow is a subject of both ethtool -L and ndo_bpf flows; + * VSI rebuild that happens under ethtool -L can expose us to + * the bpf_prog refcount issues as we would be swapping same + * bpf_prog pointers from vsi->xdp_prog and calling bpf_prog_put + * on it as it would be treated as an 'old_prog'; for ndo_bpf + * this is not harmful as dev_xdp_install bumps the refcount + * before calling the op exposed by the driver; + */ + if (!ice_is_xdp_ena_vsi(vsi)) + ice_vsi_assign_bpf_prog(vsi, prog); return 0; clear_xdp_rings: @@ -2527,6 +2540,11 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog, if (xdp_ring_err) NL_SET_ERR_MSG_MOD(extack, "Freeing XDP Tx resources failed"); } else { + /* safe to call even when prog == vsi->xdp_prog as + * dev_xdp_install in net/core/dev.c incremented prog's + * refcount so corresponding bpf_prog_put won't cause + * underflow + */ ice_vsi_assign_bpf_prog(vsi, prog); } @@ -4361,9 +4379,6 @@ static void ice_remove(struct pci_dev *pdev) struct ice_pf *pf = pci_get_drvdata(pdev); int i; - if (!pf) - return; - for (i = 0; i < ICE_MAX_RESET_WAIT; i++) { if (!ice_is_reset_in_progress(pf->state)) break; @@ -4773,6 +4788,8 @@ static const struct pci_device_id ice_pci_tbl[] = { { PCI_VDEVICE(INTEL, ICE_DEV_ID_E810C_BACKPLANE), 0 }, { PCI_VDEVICE(INTEL, ICE_DEV_ID_E810C_QSFP), 0 }, { PCI_VDEVICE(INTEL, ICE_DEV_ID_E810C_SFP), 0 }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E810_XXV_BACKPLANE), 0 }, + { PCI_VDEVICE(INTEL, ICE_DEV_ID_E810_XXV_QSFP), 0 }, { PCI_VDEVICE(INTEL, ICE_DEV_ID_E810_XXV_SFP), 0 }, { PCI_VDEVICE(INTEL, ICE_DEV_ID_E823C_BACKPLANE), 0 }, { PCI_VDEVICE(INTEL, ICE_DEV_ID_E823C_QSFP), 0 }, @@ -5252,6 +5269,9 @@ static int ice_up_complete(struct ice_vsi *vsi) netif_carrier_on(vsi->netdev); } + /* clear this now, and the first stats read will be used as baseline */ + vsi->stat_offsets_loaded = false; + ice_service_task_schedule(pf); return 0; @@ -6769,6 +6789,7 @@ ice_features_check(struct sk_buff *skb, struct net_device __always_unused *netdev, netdev_features_t features) { + bool gso = skb_is_gso(skb); size_t len; /* No point in doing any of this if neither checksum nor GSO are @@ -6781,24 +6802,32 @@ ice_features_check(struct sk_buff *skb, /* We cannot support GSO if the MSS is going to be less than * 64 bytes. If it is then we need to drop support for GSO. */ - if (skb_is_gso(skb) && (skb_shinfo(skb)->gso_size < 64)) + if (gso && (skb_shinfo(skb)->gso_size < ICE_TXD_CTX_MIN_MSS)) features &= ~NETIF_F_GSO_MASK; - len = skb_network_header(skb) - skb->data; + len = skb_network_offset(skb); if (len > ICE_TXD_MACLEN_MAX || len & 0x1) goto out_rm_features; - len = skb_transport_header(skb) - skb_network_header(skb); + len = skb_network_header_len(skb); if (len > ICE_TXD_IPLEN_MAX || len & 0x1) goto out_rm_features; if (skb->encapsulation) { - len = skb_inner_network_header(skb) - skb_transport_header(skb); - if (len > ICE_TXD_L4LEN_MAX || len & 0x1) - goto out_rm_features; + /* this must work for VXLAN frames AND IPIP/SIT frames, and in + * the case of IPIP frames, the transport header pointer is + * after the inner header! So check to make sure that this + * is a GRE or UDP_TUNNEL frame before doing that math. + */ + if (gso && (skb_shinfo(skb)->gso_type & + (SKB_GSO_GRE | SKB_GSO_UDP_TUNNEL))) { + len = skb_inner_network_header(skb) - + skb_transport_header(skb); + if (len > ICE_TXD_L4LEN_MAX || len & 0x1) + goto out_rm_features; + } - len = skb_inner_transport_header(skb) - - skb_inner_network_header(skb); + len = skb_inner_network_header_len(skb); if (len > ICE_TXD_IPLEN_MAX || len & 0x1) goto out_rm_features; } diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c index c9f82fd3cf48d85968609308ac63b262912a5d80..5134342ff70fc3d188e91e8ca6ea96276eb2a8df 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c @@ -360,21 +360,26 @@ void ice_free_vfs(struct ice_pf *pf) else dev_warn(dev, "VFs are assigned - not disabling SR-IOV\n"); - /* Avoid wait time by stopping all VFs at the same time */ - ice_for_each_vf(pf, i) - if (test_bit(ICE_VF_STATE_QS_ENA, pf->vf[i].vf_states)) - ice_dis_vf_qs(&pf->vf[i]); - tmp = pf->num_alloc_vfs; pf->num_qps_per_vf = 0; pf->num_alloc_vfs = 0; for (i = 0; i < tmp; i++) { - if (test_bit(ICE_VF_STATE_INIT, pf->vf[i].vf_states)) { + struct ice_vf *vf = &pf->vf[i]; + + mutex_lock(&vf->cfg_lock); + + ice_dis_vf_qs(vf); + + if (test_bit(ICE_VF_STATE_INIT, vf->vf_states)) { /* disable VF qp mappings and set VF disable state */ - ice_dis_vf_mappings(&pf->vf[i]); - set_bit(ICE_VF_STATE_DIS, pf->vf[i].vf_states); - ice_free_vf_res(&pf->vf[i]); + ice_dis_vf_mappings(vf); + set_bit(ICE_VF_STATE_DIS, vf->vf_states); + ice_free_vf_res(vf); } + + mutex_unlock(&vf->cfg_lock); + + mutex_destroy(&vf->cfg_lock); } if (ice_sriov_free_msix_res(pf)) @@ -1222,9 +1227,13 @@ bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr) ice_for_each_vf(pf, v) { vf = &pf->vf[v]; + mutex_lock(&vf->cfg_lock); + ice_vf_pre_vsi_rebuild(vf); ice_vf_rebuild_vsi(vf); ice_vf_post_vsi_rebuild(vf); + + mutex_unlock(&vf->cfg_lock); } ice_flush(hw); @@ -1271,6 +1280,8 @@ bool ice_reset_vf(struct ice_vf *vf, bool is_vflr) u32 reg; int i; + lockdep_assert_held(&vf->cfg_lock); + dev = ice_pf_to_dev(pf); if (test_bit(__ICE_VF_RESETS_DISABLED, pf->state)) { @@ -1291,8 +1302,7 @@ bool ice_reset_vf(struct ice_vf *vf, bool is_vflr) vsi = pf->vsi[vf->lan_vsi_idx]; - if (test_bit(ICE_VF_STATE_QS_ENA, vf->vf_states)) - ice_dis_vf_qs(vf); + ice_dis_vf_qs(vf); /* Call Disable LAN Tx queue AQ whether or not queues are * enabled. This is needed for successful completion of VFR. @@ -1520,6 +1530,8 @@ static void ice_set_dflt_settings_vfs(struct ice_pf *pf) set_bit(ICE_VIRTCHNL_VF_CAP_L2, &vf->vf_caps); vf->spoofchk = true; vf->num_vf_qs = pf->num_qps_per_vf; + + mutex_init(&vf->cfg_lock); } } @@ -1723,9 +1735,12 @@ void ice_process_vflr_event(struct ice_pf *pf) bit_idx = (hw->func_caps.vf_base_id + vf_id) % 32; /* read GLGEN_VFLRSTAT register to find out the flr VFs */ reg = rd32(hw, GLGEN_VFLRSTAT(reg_idx)); - if (reg & BIT(bit_idx)) + if (reg & BIT(bit_idx)) { /* GLGEN_VFLRSTAT bit will be cleared in ice_reset_vf */ + mutex_lock(&vf->cfg_lock); ice_reset_vf(vf, true); + mutex_unlock(&vf->cfg_lock); + } } } @@ -1802,7 +1817,9 @@ ice_vf_lan_overflow_event(struct ice_pf *pf, struct ice_rq_event_info *event) if (!vf) return; + mutex_lock(&vf->cfg_lock); ice_vc_reset_vf(vf); + mutex_unlock(&vf->cfg_lock); } /** @@ -1832,24 +1849,6 @@ ice_vc_send_msg_to_vf(struct ice_vf *vf, u32 v_opcode, dev = ice_pf_to_dev(pf); - /* single place to detect unsuccessful return values */ - if (v_retval) { - vf->num_inval_msgs++; - dev_info(dev, "VF %d failed opcode %d, retval: %d\n", vf->vf_id, - v_opcode, v_retval); - if (vf->num_inval_msgs > ICE_DFLT_NUM_INVAL_MSGS_ALLOWED) { - dev_err(dev, "Number of invalid messages exceeded for VF %d\n", - vf->vf_id); - dev_err(dev, "Use PF Control I/F to enable the VF\n"); - set_bit(ICE_VF_STATE_DIS, vf->vf_states); - return -EIO; - } - } else { - vf->num_valid_msgs++; - /* reset the invalid counter, if a valid message is received. */ - vf->num_inval_msgs = 0; - } - aq_ret = ice_aq_send_msg_to_vf(&pf->hw, vf->vf_id, v_opcode, v_retval, msg, msglen, NULL); if (aq_ret && pf->hw.mailboxq.sq_last_status != ICE_AQ_RC_ENOSYS) { @@ -3068,6 +3067,7 @@ ice_vc_add_mac_addr(struct ice_vf *vf, struct ice_vsi *vsi, u8 *mac_addr) { struct device *dev = ice_pf_to_dev(vf->pf); enum ice_status status; + int ret = 0; /* default unicast MAC already added */ if (ether_addr_equal(mac_addr, vf->dflt_lan_addr.addr)) @@ -3080,13 +3080,18 @@ ice_vc_add_mac_addr(struct ice_vf *vf, struct ice_vsi *vsi, u8 *mac_addr) status = ice_fltr_add_mac(vsi, mac_addr, ICE_FWD_TO_VSI); if (status == ICE_ERR_ALREADY_EXISTS) { - dev_err(dev, "MAC %pM already exists for VF %d\n", mac_addr, + dev_dbg(dev, "MAC %pM already exists for VF %d\n", mac_addr, vf->vf_id); - return -EEXIST; + /* don't return since we might need to update + * the primary MAC in ice_vfhw_mac_add() below + */ + ret = -EEXIST; } else if (status) { dev_err(dev, "Failed to add MAC %pM for VF %d\n, error %s\n", mac_addr, vf->vf_id, ice_stat_str(status)); return -EIO; + } else { + vf->num_mac++; } /* Set the default LAN address to the latest unicast MAC address added @@ -3096,9 +3101,7 @@ ice_vc_add_mac_addr(struct ice_vf *vf, struct ice_vsi *vsi, u8 *mac_addr) if (is_unicast_ether_addr(mac_addr)) ether_addr_copy(vf->dflt_lan_addr.addr, mac_addr); - vf->num_mac++; - - return 0; + return ret; } /** @@ -3343,6 +3346,8 @@ ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos, return 0; } + mutex_lock(&vf->cfg_lock); + vf->port_vlan_info = vlanprio; if (vf->port_vlan_info) @@ -3352,6 +3357,7 @@ ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos, dev_info(dev, "Clearing port VLAN on VF %d\n", vf_id); ice_vc_reset_vf(vf); + mutex_unlock(&vf->cfg_lock); return 0; } @@ -3717,6 +3723,15 @@ error_handler: return; } + /* VF is being configured in another context that triggers a VFR, so no + * need to process this message + */ + if (!mutex_trylock(&vf->cfg_lock)) { + dev_info(dev, "VF %u is being configured in another context that will trigger a VFR, so there is no need to handle this message\n", + vf->vf_id); + return; + } + switch (v_opcode) { case VIRTCHNL_OP_VERSION: err = ice_vc_get_ver_msg(vf, msg); @@ -3793,6 +3808,8 @@ error_handler: dev_info(dev, "PF failed to honor VF %d, opcode %d, error %d\n", vf_id, v_opcode, err); } + + mutex_unlock(&vf->cfg_lock); } /** @@ -3907,6 +3924,8 @@ int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) return -EINVAL; } + mutex_lock(&vf->cfg_lock); + /* VF is notified of its new MAC via the PF's response to the * VIRTCHNL_OP_GET_VF_RESOURCES message after the VF has been reset */ @@ -3924,6 +3943,7 @@ int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) } ice_vc_reset_vf(vf); + mutex_unlock(&vf->cfg_lock); return 0; } @@ -3953,11 +3973,15 @@ int ice_set_vf_trust(struct net_device *netdev, int vf_id, bool trusted) if (trusted == vf->trusted) return 0; + mutex_lock(&vf->cfg_lock); + vf->trusted = trusted; ice_vc_reset_vf(vf); dev_info(ice_pf_to_dev(pf), "VF %u is now %strusted\n", vf_id, trusted ? "" : "un"); + mutex_unlock(&vf->cfg_lock); + return 0; } diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h index 0f519fba3770d3d3b4f250b69158c53f1170293b..d2e935c678a147e14037ea451a4b5f0e020c0640 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h @@ -13,7 +13,6 @@ #define ICE_MAX_MACADDR_PER_VF 18 /* Malicious Driver Detection */ -#define ICE_DFLT_NUM_INVAL_MSGS_ALLOWED 10 #define ICE_MDD_EVENTS_THRESHOLD 30 /* Static VF transaction/status register def */ @@ -68,6 +67,11 @@ struct ice_mdd_vf_events { struct ice_vf { struct ice_pf *pf; + /* Used during virtchnl message handling and NDO ops against the VF + * that will trigger a VFR + */ + struct mutex cfg_lock; + u16 vf_id; /* VF ID in the PF space */ u16 lan_vsi_idx; /* index into PF struct */ /* first vector index of this VF in the PF space */ @@ -92,8 +96,6 @@ struct ice_vf { unsigned int tx_rate; /* Tx bandwidth limit in Mbps */ DECLARE_BITMAP(vf_states, ICE_VF_STATES_NBITS); /* VF runtime states */ - u64 num_inval_msgs; /* number of continuous invalid msgs */ - u64 num_valid_msgs; /* number of valid msgs detected */ unsigned long vf_caps; /* VF's adv. capabilities */ u8 num_req_qs; /* num of queue pairs requested by VF */ u16 num_mac; diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index e24fb122c03a27047c2f531a6f9957059a5e9a0e..f854d41c6c94d15edbd4a31cb145460b43e9d5df 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -7654,6 +7654,20 @@ static int igb_set_vf_mac_filter(struct igb_adapter *adapter, const int vf, struct vf_mac_filter *entry = NULL; int ret = 0; + if ((vf_data->flags & IGB_VF_FLAG_PF_SET_MAC) && + !vf_data->trusted) { + dev_warn(&pdev->dev, + "VF %d requested MAC filter but is administratively denied\n", + vf); + return -EINVAL; + } + if (!is_valid_ether_addr(addr)) { + dev_warn(&pdev->dev, + "VF %d attempted to set invalid MAC filter\n", + vf); + return -EINVAL; + } + switch (info) { case E1000_VF_MAC_FILTER_CLR: /* remove all unicast MAC filters related to the current VF */ @@ -7667,20 +7681,6 @@ static int igb_set_vf_mac_filter(struct igb_adapter *adapter, const int vf, } break; case E1000_VF_MAC_FILTER_ADD: - if ((vf_data->flags & IGB_VF_FLAG_PF_SET_MAC) && - !vf_data->trusted) { - dev_warn(&pdev->dev, - "VF %d requested MAC filter but is administratively denied\n", - vf); - return -EINVAL; - } - if (!is_valid_ether_addr(addr)) { - dev_warn(&pdev->dev, - "VF %d attempted to set invalid MAC filter\n", - vf); - return -EINVAL; - } - /* try to find empty slot in the list */ list_for_each(pos, &adapter->vf_macs.l) { entry = list_entry(pos, struct vf_mac_filter, l); @@ -8032,7 +8032,7 @@ static int igb_poll(struct napi_struct *napi, int budget) if (likely(napi_complete_done(napi, work_done))) igb_ring_irq_enable(q_vector); - return min(work_done, budget - 1); + return work_done; } /** @@ -9260,7 +9260,7 @@ static int __maybe_unused igb_suspend(struct device *dev) return __igb_shutdown(to_pci_dev(dev), NULL, 0); } -static int __maybe_unused igb_resume(struct device *dev) +static int __maybe_unused __igb_resume(struct device *dev, bool rpm) { struct pci_dev *pdev = to_pci_dev(dev); struct net_device *netdev = pci_get_drvdata(pdev); @@ -9303,17 +9303,24 @@ static int __maybe_unused igb_resume(struct device *dev) wr32(E1000_WUS, ~0); - rtnl_lock(); + if (!rpm) + rtnl_lock(); if (!err && netif_running(netdev)) err = __igb_open(netdev, true); if (!err) netif_device_attach(netdev); - rtnl_unlock(); + if (!rpm) + rtnl_unlock(); return err; } +static int __maybe_unused igb_resume(struct device *dev) +{ + return __igb_resume(dev, false); +} + static int __maybe_unused igb_runtime_idle(struct device *dev) { struct net_device *netdev = dev_get_drvdata(dev); @@ -9332,7 +9339,7 @@ static int __maybe_unused igb_runtime_suspend(struct device *dev) static int __maybe_unused igb_runtime_resume(struct device *dev) { - return igb_resume(dev); + return __igb_resume(dev, true); } static void igb_shutdown(struct pci_dev *pdev) @@ -9448,7 +9455,7 @@ static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev, * @pdev: Pointer to PCI device * * Restart the card from scratch, as if from a cold-boot. Implementation - * resembles the first-half of the igb_resume routine. + * resembles the first-half of the __igb_resume routine. **/ static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev) { @@ -9488,7 +9495,7 @@ static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev) * * This callback is called when the error recovery driver tells us that * its OK to resume normal operation. Implementation resembles the - * second-half of the igb_resume routine. + * second-half of the __igb_resume routine. */ static void igb_io_resume(struct pci_dev *pdev) { diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c index 07c9e9e0546f50f5867b92976095b6932feee914..fe8c0a26b72013063e4f7729073fe5ea7b9263a4 100644 --- a/drivers/net/ethernet/intel/igbvf/netdev.c +++ b/drivers/net/ethernet/intel/igbvf/netdev.c @@ -2873,6 +2873,7 @@ static int igbvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) return 0; err_hw_init: + netif_napi_del(&adapter->rx_ring->napi); kfree(adapter->tx_ring); kfree(adapter->rx_ring); err_sw_init: diff --git a/drivers/net/ethernet/intel/igc/igc_i225.c b/drivers/net/ethernet/intel/igc/igc_i225.c index 7ec04e48860c6a52e1c6a09f35a31d3ad3507473..553d6bc78e6bd7242dc24609b7cf01ca0697a402 100644 --- a/drivers/net/ethernet/intel/igc/igc_i225.c +++ b/drivers/net/ethernet/intel/igc/igc_i225.c @@ -636,7 +636,7 @@ s32 igc_set_ltr_i225(struct igc_hw *hw, bool link) ltrv = rd32(IGC_LTRMAXV); if (ltr_max != (ltrv & IGC_LTRMAXV_LTRV_MASK)) { ltrv = IGC_LTRMAXV_LSNP_REQ | ltr_max | - (scale_min << IGC_LTRMAXV_SCALE_SHIFT); + (scale_max << IGC_LTRMAXV_SCALE_SHIFT); wr32(IGC_LTRMAXV, ltrv); } } diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index cae090a072524268b98329c68686bf817398f6a7..61cebb7df6bcbb60bc055f2c8e40c140a5274ad8 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -4422,6 +4422,9 @@ static irqreturn_t igc_intr_msi(int irq, void *data) mod_timer(&adapter->watchdog_timer, jiffies + 1); } + if (icr & IGC_ICR_TS) + igc_tsync_interrupt(adapter); + napi_schedule(&q_vector->napi); return IRQ_HANDLED; @@ -4465,6 +4468,9 @@ static irqreturn_t igc_intr(int irq, void *data) mod_timer(&adapter->watchdog_timer, jiffies + 1); } + if (icr & IGC_ICR_TS) + igc_tsync_interrupt(adapter); + napi_schedule(&q_vector->napi); return IRQ_HANDLED; diff --git a/drivers/net/ethernet/intel/igc/igc_phy.c b/drivers/net/ethernet/intel/igc/igc_phy.c index 8e1799508edc42438a128c38be2f62cf8eeae083..e380b7a3ea63b19468d976ac56a4ff29116b5bbb 100644 --- a/drivers/net/ethernet/intel/igc/igc_phy.c +++ b/drivers/net/ethernet/intel/igc/igc_phy.c @@ -748,8 +748,6 @@ s32 igc_write_phy_reg_gpy(struct igc_hw *hw, u32 offset, u16 data) if (ret_val) return ret_val; ret_val = igc_write_phy_reg_mdic(hw, offset, data); - if (ret_val) - return ret_val; hw->phy.ops.release(hw); } else { ret_val = igc_write_xmdio_reg(hw, (u16)offset, dev_addr, @@ -781,8 +779,6 @@ s32 igc_read_phy_reg_gpy(struct igc_hw *hw, u32 offset, u16 *data) if (ret_val) return ret_val; ret_val = igc_read_phy_reg_mdic(hw, offset, data); - if (ret_val) - return ret_val; hw->phy.ops.release(hw); } else { ret_val = igc_read_xmdio_reg(hw, (u16)offset, dev_addr, diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index a280aa34ca1dfdd5ecfb696e9633697dba303ba9..55983904b6df15fcf9df8e0e9119b0b565e17ee8 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -3216,7 +3216,7 @@ static unsigned int ixgbe_max_channels(struct ixgbe_adapter *adapter) max_combined = ixgbe_max_rss_indices(adapter); } - return max_combined; + return min_t(int, max_combined, num_online_cpus()); } static void ixgbe_get_channels(struct net_device *dev, diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 37439b76fcb5ec7fea75dca8878ac6196a7139fe..a3a02e2f92f646506169af9a90b14f2d22243a7c 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -5532,6 +5532,10 @@ static int ixgbe_non_sfp_link_config(struct ixgbe_hw *hw) if (!speed && hw->mac.ops.get_link_capabilities) { ret = hw->mac.ops.get_link_capabilities(hw, &speed, &autoneg); + /* remove NBASE-T speeds from default autonegotiation + * to accommodate broken network switches in the field + * which cannot cope with advertised NBASE-T speeds + */ speed &= ~(IXGBE_LINK_SPEED_5GB_FULL | IXGBE_LINK_SPEED_2_5GB_FULL); } @@ -10123,6 +10127,7 @@ static int ixgbe_xdp_setup(struct net_device *dev, struct bpf_prog *prog) struct ixgbe_adapter *adapter = netdev_priv(dev); struct bpf_prog *old_prog; bool need_reset; + int num_queues; if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) return -EINVAL; @@ -10172,11 +10177,14 @@ static int ixgbe_xdp_setup(struct net_device *dev, struct bpf_prog *prog) /* Kick start the NAPI context if there is an AF_XDP socket open * on that queue id. This so that receiving will start. */ - if (need_reset && prog) - for (i = 0; i < adapter->num_rx_queues; i++) + if (need_reset && prog) { + num_queues = min_t(int, adapter->num_rx_queues, + adapter->num_xdp_queues); + for (i = 0; i < num_queues; i++) if (adapter->xdp_ring[i]->xsk_pool) (void)ixgbe_xsk_wakeup(adapter->netdev, i, XDP_WAKEUP_RX); + } return 0; } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c index 5e339afa682a62ed3da06d1ab2a5b5f1d5b6fedb..37f2bc6de4b65732016e6b638e2a15d33e2da276 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c @@ -3405,6 +3405,9 @@ static s32 ixgbe_reset_hw_X550em(struct ixgbe_hw *hw) /* flush pending Tx transactions */ ixgbe_clear_tx_pending(hw); + /* set MDIO speed before talking to the PHY in case it's the 1st time */ + ixgbe_set_mdio_speed(hw); + /* PHY ops must be identified and initialized prior to reset */ status = hw->phy.ops.init(hw); if (status == IXGBE_ERR_SFP_NOT_SUPPORTED || diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c index d60da7a89092e82f8195b961ba67938af6739722..ca1a428b278e0df4dcceaf3bd048b79054b3faf5 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c @@ -391,12 +391,14 @@ static bool ixgbe_xmit_zc(struct ixgbe_ring *xdp_ring, unsigned int budget) u32 cmd_type; while (budget-- > 0) { - if (unlikely(!ixgbe_desc_unused(xdp_ring)) || - !netif_carrier_ok(xdp_ring->netdev)) { + if (unlikely(!ixgbe_desc_unused(xdp_ring))) { work_done = false; break; } + if (!netif_carrier_ok(xdp_ring->netdev)) + break; + if (!xsk_tx_peek_desc(pool, &desc)) break; diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index a7d0a459969a2ec7c77a58597ec931c87ce94436..2d6ac61d7a3e62fe3d8a0b6355eac8d9c3923895 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -1992,14 +1992,15 @@ static void ixgbevf_set_rx_buffer_len(struct ixgbevf_adapter *adapter, if (adapter->flags & IXGBEVF_FLAGS_LEGACY_RX) return; - set_ring_build_skb_enabled(rx_ring); + if (PAGE_SIZE < 8192) + if (max_frame > IXGBEVF_MAX_FRAME_BUILD_SKB) + set_ring_uses_large_buffer(rx_ring); - if (PAGE_SIZE < 8192) { - if (max_frame <= IXGBEVF_MAX_FRAME_BUILD_SKB) - return; + /* 82599 can't rely on RXDCTL.RLPML to restrict the size of the frame */ + if (adapter->hw.mac.type == ixgbe_mac_82599_vf && !ring_uses_large_buffer(rx_ring)) + return; - set_ring_uses_large_buffer(rx_ring); - } + set_ring_build_skb_enabled(rx_ring); } /** diff --git a/drivers/net/ethernet/lantiq_xrx200.c b/drivers/net/ethernet/lantiq_xrx200.c index 072075bc60ee9ac36d311b219b8a653c09927d4d..500511b72ac60b4b16575f847b074cd3eb9ede1d 100644 --- a/drivers/net/ethernet/lantiq_xrx200.c +++ b/drivers/net/ethernet/lantiq_xrx200.c @@ -209,7 +209,7 @@ static int xrx200_hw_receive(struct xrx200_chan *ch) skb->protocol = eth_type_trans(skb, net_dev); netif_receive_skb(skb); net_dev->stats.rx_packets++; - net_dev->stats.rx_bytes += len - ETH_FCS_LEN; + net_dev->stats.rx_bytes += len; return 0; } diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index ec9b6c564300e8c4f9dffc3f99c156973d6718d2..542cd6f2c9bd45c0f46e4477bf087556a8bfa259 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -2607,11 +2607,11 @@ static int mvpp2_rxq_init(struct mvpp2_port *port, mvpp2_rxq_status_update(port, rxq->id, 0, rxq->size); if (priv->percpu_pools) { - err = xdp_rxq_info_reg(&rxq->xdp_rxq_short, port->dev, rxq->id); + err = xdp_rxq_info_reg(&rxq->xdp_rxq_short, port->dev, rxq->logic_rxq); if (err < 0) goto err_free_dma; - err = xdp_rxq_info_reg(&rxq->xdp_rxq_long, port->dev, rxq->id); + err = xdp_rxq_info_reg(&rxq->xdp_rxq_long, port->dev, rxq->logic_rxq); if (err < 0) goto err_unregister_rxq_short; @@ -4652,11 +4652,13 @@ static int mvpp2_change_mtu(struct net_device *dev, int mtu) mtu = ALIGN(MVPP2_RX_PKT_SIZE(mtu), 8); } + if (port->xdp_prog && mtu > MVPP2_MAX_RX_BUF_SIZE) { + netdev_err(dev, "Illegal MTU value %d (> %d) for XDP mode\n", + mtu, (int)MVPP2_MAX_RX_BUF_SIZE); + return -EINVAL; + } + if (MVPP2_RX_PKT_SIZE(mtu) > MVPP2_BM_LONG_PKT_SIZE) { - if (port->xdp_prog) { - netdev_err(dev, "Jumbo frames are not supported with XDP\n"); - return -EINVAL; - } if (priv->percpu_pools) { netdev_warn(dev, "mtu %d too high, switching to shared buffers", mtu); mvpp2_bm_switch_buffers(priv, false); @@ -4942,8 +4944,8 @@ static int mvpp2_xdp_setup(struct mvpp2_port *port, struct netdev_bpf *bpf) bool running = netif_running(port->dev); bool reset = !prog != !port->xdp_prog; - if (port->dev->mtu > ETH_DATA_LEN) { - NL_SET_ERR_MSG_MOD(bpf->extack, "XDP is not supported with jumbo frames enabled"); + if (port->dev->mtu > MVPP2_MAX_RX_BUF_SIZE) { + NL_SET_ERR_MSG_MOD(bpf->extack, "MTU too large for XDP"); return -EOPNOTSUPP; } @@ -6916,7 +6918,7 @@ static int mvpp2_probe(struct platform_device *pdev) shared = num_present_cpus() - priv->nthreads; if (shared > 0) - bitmap_fill(&priv->lock_map, + bitmap_set(&priv->lock_map, 0, min_t(int, shared, MVPP2_MAX_THREADS)); for (i = 0; i < MVPP2_MAX_THREADS; i++) { diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c index bc870bff14df7e89b8c14bfd10e1255f087a2809..5205796859f6cb0116ccb59ba6bfa9c378d570e8 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c @@ -139,18 +139,85 @@ static const struct file_operations rvu_dbg_##name##_fops = { \ static void print_nix_qsize(struct seq_file *filp, struct rvu_pfvf *pfvf); +static void get_lf_str_list(struct rvu_block block, int pcifunc, + char *lfs) +{ + int lf = 0, seq = 0, len = 0, prev_lf = block.lf.max; + + for_each_set_bit(lf, block.lf.bmap, block.lf.max) { + if (lf >= block.lf.max) + break; + + if (block.fn_map[lf] != pcifunc) + continue; + + if (lf == prev_lf + 1) { + prev_lf = lf; + seq = 1; + continue; + } + + if (seq) + len += sprintf(lfs + len, "-%d,%d", prev_lf, lf); + else + len += (len ? sprintf(lfs + len, ",%d", lf) : + sprintf(lfs + len, "%d", lf)); + + prev_lf = lf; + seq = 0; + } + + if (seq) + len += sprintf(lfs + len, "-%d", prev_lf); + + lfs[len] = '\0'; +} + +static int get_max_column_width(struct rvu *rvu) +{ + int index, pf, vf, lf_str_size = 12, buf_size = 256; + struct rvu_block block; + u16 pcifunc; + char *buf; + + buf = kzalloc(buf_size, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + for (pf = 0; pf < rvu->hw->total_pfs; pf++) { + for (vf = 0; vf <= rvu->hw->total_vfs; vf++) { + pcifunc = pf << 10 | vf; + if (!pcifunc) + continue; + + for (index = 0; index < BLK_COUNT; index++) { + block = rvu->hw->block[index]; + if (!strlen(block.name)) + continue; + + get_lf_str_list(block, pcifunc, buf); + if (lf_str_size <= strlen(buf)) + lf_str_size = strlen(buf) + 1; + } + } + } + + kfree(buf); + return lf_str_size; +} + /* Dumps current provisioning status of all RVU block LFs */ static ssize_t rvu_dbg_rsrc_attach_status(struct file *filp, char __user *buffer, size_t count, loff_t *ppos) { - int index, off = 0, flag = 0, go_back = 0, len = 0; + int index, off = 0, flag = 0, len = 0, i = 0; struct rvu *rvu = filp->private_data; - int lf, pf, vf, pcifunc; + int bytes_not_copied = 0; struct rvu_block block; - int bytes_not_copied; - int lf_str_size = 12; + int pf, vf, pcifunc; int buf_size = 2048; + int lf_str_size; char *lfs; char *buf; @@ -162,6 +229,9 @@ static ssize_t rvu_dbg_rsrc_attach_status(struct file *filp, if (!buf) return -ENOSPC; + /* Get the maximum width of a column */ + lf_str_size = get_max_column_width(rvu); + lfs = kzalloc(lf_str_size, GFP_KERNEL); if (!lfs) { kfree(buf); @@ -175,65 +245,69 @@ static ssize_t rvu_dbg_rsrc_attach_status(struct file *filp, "%-*s", lf_str_size, rvu->hw->block[index].name); } + off += scnprintf(&buf[off], buf_size - 1 - off, "\n"); + bytes_not_copied = copy_to_user(buffer + (i * off), buf, off); + if (bytes_not_copied) + goto out; + + i++; + *ppos += off; for (pf = 0; pf < rvu->hw->total_pfs; pf++) { for (vf = 0; vf <= rvu->hw->total_vfs; vf++) { + off = 0; + flag = 0; pcifunc = pf << 10 | vf; if (!pcifunc) continue; if (vf) { sprintf(lfs, "PF%d:VF%d", pf, vf - 1); - go_back = scnprintf(&buf[off], - buf_size - 1 - off, - "%-*s", lf_str_size, lfs); + off = scnprintf(&buf[off], + buf_size - 1 - off, + "%-*s", lf_str_size, lfs); } else { sprintf(lfs, "PF%d", pf); - go_back = scnprintf(&buf[off], - buf_size - 1 - off, - "%-*s", lf_str_size, lfs); + off = scnprintf(&buf[off], + buf_size - 1 - off, + "%-*s", lf_str_size, lfs); } - off += go_back; - for (index = 0; index < BLKTYPE_MAX; index++) { + for (index = 0; index < BLK_COUNT; index++) { block = rvu->hw->block[index]; if (!strlen(block.name)) continue; len = 0; lfs[len] = '\0'; - for (lf = 0; lf < block.lf.max; lf++) { - if (block.fn_map[lf] != pcifunc) - continue; + get_lf_str_list(block, pcifunc, lfs); + if (strlen(lfs)) flag = 1; - len += sprintf(&lfs[len], "%d,", lf); - } - if (flag) - len--; - lfs[len] = '\0'; off += scnprintf(&buf[off], buf_size - 1 - off, "%-*s", lf_str_size, lfs); - if (!strlen(lfs)) - go_back += lf_str_size; } - if (!flag) - off -= go_back; - else - flag = 0; - off--; - off += scnprintf(&buf[off], buf_size - 1 - off, "\n"); + if (flag) { + off += scnprintf(&buf[off], + buf_size - 1 - off, "\n"); + bytes_not_copied = copy_to_user(buffer + + (i * off), + buf, off); + if (bytes_not_copied) + goto out; + + i++; + *ppos += off; + } } } - bytes_not_copied = copy_to_user(buffer, buf, off); +out: kfree(lfs); kfree(buf); - if (bytes_not_copied) return -EFAULT; - *ppos = off; - return off; + return *ppos; } RVU_DEBUG_FOPS(rsrc_status, rsrc_attach_status, NULL); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index 044a5b1196acbc2515b788cedeb4d16dbee79879..161174be51c31932b99af4725ca63fb2b67f9502 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -386,7 +386,12 @@ static int otx2_forward_vf_mbox_msgs(struct otx2_nic *pf, dst_mdev->msg_size = mbox_hdr->msg_size; dst_mdev->num_msgs = num_msgs; err = otx2_sync_mbox_msg(dst_mbox); - if (err) { + /* Error code -EIO indicate there is a communication failure + * to the AF. Rest of the error codes indicate that AF processed + * VF messages and set the error codes in response messages + * (if any) so simply forward responses to VF. + */ + if (err == -EIO) { dev_warn(pf->dev, "AF not responding to VF%d messages\n", vf); /* restore PF mbase and exit */ diff --git a/drivers/net/ethernet/marvell/prestera/prestera_main.c b/drivers/net/ethernet/marvell/prestera/prestera_main.c index feb69fcd908e3ed7fb128fbfe87ab0ad65426f09..f406f5b517b021d734a0a2a7b724484c9fbf2fa8 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_main.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_main.c @@ -50,12 +50,14 @@ int prestera_port_pvid_set(struct prestera_port *port, u16 vid) struct prestera_port *prestera_port_find_by_hwid(struct prestera_switch *sw, u32 dev_id, u32 hw_id) { - struct prestera_port *port = NULL; + struct prestera_port *port = NULL, *tmp; read_lock(&sw->port_list_lock); - list_for_each_entry(port, &sw->port_list, list) { - if (port->dev_id == dev_id && port->hw_id == hw_id) + list_for_each_entry(tmp, &sw->port_list, list) { + if (tmp->dev_id == dev_id && tmp->hw_id == hw_id) { + port = tmp; break; + } } read_unlock(&sw->port_list_lock); @@ -64,12 +66,14 @@ struct prestera_port *prestera_port_find_by_hwid(struct prestera_switch *sw, struct prestera_port *prestera_find_port(struct prestera_switch *sw, u32 id) { - struct prestera_port *port = NULL; + struct prestera_port *port = NULL, *tmp; read_lock(&sw->port_list_lock); - list_for_each_entry(port, &sw->port_list, list) { - if (port->id == id) + list_for_each_entry(tmp, &sw->port_list, list) { + if (tmp->id == id) { + port = tmp; break; + } } read_unlock(&sw->port_list_lock); diff --git a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c index 7d83e1f91ef17493ea1d04239205decdfb10dd24..9101d00e96b9db110ea6d1814545b74abddacb0c 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_switchdev.c @@ -439,8 +439,8 @@ static int prestera_port_bridge_join(struct prestera_port *port, br_port = prestera_bridge_port_add(bridge, port->dev); if (IS_ERR(br_port)) { - err = PTR_ERR(br_port); - goto err_brport_create; + prestera_bridge_put(bridge); + return PTR_ERR(br_port); } if (bridge->vlan_enabled) @@ -454,8 +454,6 @@ static int prestera_port_bridge_join(struct prestera_port *port, err_port_join: prestera_bridge_port_put(br_port); -err_brport_create: - prestera_bridge_put(bridge); return err; } diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index a2d3f04a9ff222fe4ebccd4607873e39ae0ca33d..7d7dc0754a3a1335df5458fb50b15344c274d600 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -215,7 +215,7 @@ static void mtk_mac_config(struct phylink_config *config, unsigned int mode, phylink_config); struct mtk_eth *eth = mac->hw; u32 mcr_cur, mcr_new, sid, i; - int val, ge_mode, err; + int val, ge_mode, err = 0; /* MT76x8 has no hardware settings between for the MAC */ if (!MTK_HAS_CAPS(eth->soc->caps, MTK_SOC_MT7628) && diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index 3616b77caa0adb407288c51297e580a7f415bb13..01275c376721ce3afec777402cd5af27e761de4b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -663,7 +663,7 @@ void __init mlx4_en_init_ptys2ethtool_map(void) MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_1000BASE_T, SPEED_1000, ETHTOOL_LINK_MODE_1000baseT_Full_BIT); MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_1000BASE_CX_SGMII, SPEED_1000, - ETHTOOL_LINK_MODE_1000baseKX_Full_BIT); + ETHTOOL_LINK_MODE_1000baseX_Full_BIT); MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_1000BASE_KX, SPEED_1000, ETHTOOL_LINK_MODE_1000baseKX_Full_BIT); MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_10GBASE_T, SPEED_10000, @@ -675,9 +675,9 @@ void __init mlx4_en_init_ptys2ethtool_map(void) MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_10GBASE_KR, SPEED_10000, ETHTOOL_LINK_MODE_10000baseKR_Full_BIT); MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_10GBASE_CR, SPEED_10000, - ETHTOOL_LINK_MODE_10000baseKR_Full_BIT); + ETHTOOL_LINK_MODE_10000baseCR_Full_BIT); MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_10GBASE_SR, SPEED_10000, - ETHTOOL_LINK_MODE_10000baseKR_Full_BIT); + ETHTOOL_LINK_MODE_10000baseSR_Full_BIT); MLX4_BUILD_PTYS2ETHTOOL_CONFIG(MLX4_20GBASE_KR2, SPEED_20000, ETHTOOL_LINK_MODE_20000baseMLD2_Full_BIT, ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 8999e9ce4f08e3ccff90e209689da103ef989229..a0551d6c2ffd6bd76991e37cbcf15f876a2f3034 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -2276,9 +2276,14 @@ int mlx4_en_try_alloc_resources(struct mlx4_en_priv *priv, bool carry_xdp_prog) { struct bpf_prog *xdp_prog; - int i, t; + int i, t, ret; - mlx4_en_copy_priv(tmp, priv, prof); + ret = mlx4_en_copy_priv(tmp, priv, prof); + if (ret) { + en_warn(priv, "%s: mlx4_en_copy_priv() failed, return\n", + __func__); + return ret; + } if (mlx4_en_alloc_resources(tmp)) { en_warn(priv, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 2e55e0088871540e1f139ab970660e5f184f8ec1..94426d29025eb4e8a3bf2d6ea3b73d8445768453 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -130,11 +130,8 @@ static int cmd_alloc_index(struct mlx5_cmd *cmd) static void cmd_free_index(struct mlx5_cmd *cmd, int idx) { - unsigned long flags; - - spin_lock_irqsave(&cmd->alloc_lock, flags); + lockdep_assert_held(&cmd->alloc_lock); set_bit(idx, &cmd->bitmask); - spin_unlock_irqrestore(&cmd->alloc_lock, flags); } static void cmd_ent_get(struct mlx5_cmd_work_ent *ent) @@ -144,13 +141,21 @@ static void cmd_ent_get(struct mlx5_cmd_work_ent *ent) static void cmd_ent_put(struct mlx5_cmd_work_ent *ent) { + struct mlx5_cmd *cmd = ent->cmd; + unsigned long flags; + + spin_lock_irqsave(&cmd->alloc_lock, flags); if (!refcount_dec_and_test(&ent->refcnt)) - return; + goto out; - if (ent->idx >= 0) - cmd_free_index(ent->cmd, ent->idx); + if (ent->idx >= 0) { + cmd_free_index(cmd, ent->idx); + up(ent->page_queue ? &cmd->pages_sem : &cmd->sem); + } cmd_free_ent(ent); +out: + spin_unlock_irqrestore(&cmd->alloc_lock, flags); } static struct mlx5_cmd_layout *get_inst(struct mlx5_cmd *cmd, int idx) @@ -883,25 +888,6 @@ static bool opcode_allowed(struct mlx5_cmd *cmd, u16 opcode) return cmd->allowed_opcode == opcode; } -static int cmd_alloc_index_retry(struct mlx5_cmd *cmd) -{ - unsigned long alloc_end = jiffies + msecs_to_jiffies(1000); - int idx; - -retry: - idx = cmd_alloc_index(cmd); - if (idx < 0 && time_before(jiffies, alloc_end)) { - /* Index allocation can fail on heavy load of commands. This is a temporary - * situation as the current command already holds the semaphore, meaning that - * another command completion is being handled and it is expected to release - * the entry index soon. - */ - cpu_relax(); - goto retry; - } - return idx; -} - bool mlx5_cmd_is_down(struct mlx5_core_dev *dev) { return pci_channel_offline(dev->pdev) || @@ -926,7 +912,7 @@ static void cmd_work_handler(struct work_struct *work) sem = ent->page_queue ? &cmd->pages_sem : &cmd->sem; down(sem); if (!ent->page_queue) { - alloc_ret = cmd_alloc_index_retry(cmd); + alloc_ret = cmd_alloc_index(cmd); if (alloc_ret < 0) { mlx5_core_err_rl(dev, "failed to allocate command entry\n"); if (ent->callback) { @@ -1582,8 +1568,6 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force vector = vec & 0xffffffff; for (i = 0; i < (1 << cmd->log_sz); i++) { if (test_bit(i, &vector)) { - struct semaphore *sem; - ent = cmd->ent_arr[i]; /* if we already completed the command, ignore it */ @@ -1606,10 +1590,6 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) cmd_ent_put(ent); - if (ent->page_queue) - sem = &cmd->pages_sem; - else - sem = &cmd->sem; ent->ts2 = ktime_get_ns(); memcpy(ent->out->first.data, ent->lay->out, sizeof(ent->lay->out)); dump_command(dev, ent, 0); @@ -1663,7 +1643,6 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force */ complete(&ent->done); } - up(sem); } } } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c index 360e093874d4fc00ddf68aecf8353d204011224a..68d7ca17b6f51490374cf48a5c56887b91895220 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c @@ -154,6 +154,8 @@ int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq) u32 in[MLX5_ST_SZ_DW(destroy_cq_in)] = {}; int err; + mlx5_debug_cq_remove(dev, cq); + mlx5_eq_del_cq(mlx5_get_async_eq(dev), cq); mlx5_eq_del_cq(&cq->eq->core, cq); @@ -165,8 +167,6 @@ int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq) return err; synchronize_irq(cq->irqn); - - mlx5_debug_cq_remove(dev, cq); mlx5_cq_put(cq); wait_for_completion(&cq->free); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c index 07c8d9811bc811249ef42ff583cf9c15827a5829..10d195042ab554029304343e76922967f632b55a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c @@ -507,6 +507,8 @@ void mlx5_debug_cq_remove(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq) if (!mlx5_debugfs_root) return; - if (cq->dbg) + if (cq->dbg) { rem_res_tree(cq->dbg); + cq->dbg = NULL; + } } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 9da34f82d466815d2b4a52197970a9d130dca9f7..73060b30fece312f011543dc615bffe70ce075f9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -916,9 +916,6 @@ void mlx5e_deactivate_rq(struct mlx5e_rq *rq); void mlx5e_close_rq(struct mlx5e_rq *rq); struct mlx5e_sq_param; -int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params, - struct mlx5e_sq_param *param, struct mlx5e_icosq *sq); -void mlx5e_close_icosq(struct mlx5e_icosq *sq); int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params, struct mlx5e_sq_param *param, struct xsk_buff_pool *xsk_pool, struct mlx5e_xdpsq *sq, bool is_redirect); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c index 9c076aa20306a7d2bd116b85dfac000bcafaa3f0..b6f5c1bcdbcd493f4ec4ea9bae6eb904454ccce5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c @@ -183,18 +183,7 @@ void mlx5e_rep_bond_unslave(struct mlx5_eswitch *esw, static bool mlx5e_rep_is_lag_netdev(struct net_device *netdev) { - struct mlx5e_rep_priv *rpriv; - struct mlx5e_priv *priv; - - /* A given netdev is not a representor or not a slave of LAG configuration */ - if (!mlx5e_eswitch_rep(netdev) || !netif_is_lag_port(netdev)) - return false; - - priv = netdev_priv(netdev); - rpriv = priv->ppriv; - - /* Egress acl forward to vport is supported only non-uplink representor */ - return rpriv->rep->vport != MLX5_VPORT_UPLINK; + return netif_is_lag_port(netdev) && mlx5e_eswitch_vf_rep(netdev); } static void mlx5e_rep_changelowerstate_event(struct net_device *netdev, void *ptr) @@ -210,9 +199,6 @@ static void mlx5e_rep_changelowerstate_event(struct net_device *netdev, void *pt u16 fwd_vport_num; int err; - if (!mlx5e_rep_is_lag_netdev(netdev)) - return; - info = ptr; lag_info = info->lower_state_info; /* This is not an event of a representor becoming active slave */ @@ -266,9 +252,6 @@ static void mlx5e_rep_changeupper_event(struct net_device *netdev, void *ptr) struct net_device *lag_dev; struct mlx5e_priv *priv; - if (!mlx5e_rep_is_lag_netdev(netdev)) - return; - priv = netdev_priv(netdev); rpriv = priv->ppriv; lag_dev = info->upper_dev; @@ -293,6 +276,19 @@ static int mlx5e_rep_esw_bond_netevent(struct notifier_block *nb, unsigned long event, void *ptr) { struct net_device *netdev = netdev_notifier_info_to_dev(ptr); + struct mlx5e_rep_priv *rpriv; + struct mlx5e_rep_bond *bond; + struct mlx5e_priv *priv; + + if (!mlx5e_rep_is_lag_netdev(netdev)) + return NOTIFY_DONE; + + bond = container_of(nb, struct mlx5e_rep_bond, nb); + priv = netdev_priv(netdev); + rpriv = mlx5_eswitch_get_uplink_priv(priv->mdev->priv.eswitch, REP_ETH); + /* Verify VF representor is on the same device of the bond handling the netevent. */ + if (rpriv->uplink_priv.bond != bond) + return NOTIFY_DONE; switch (event) { case NETDEV_CHANGELOWERSTATE: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c index 8be6eaa3eeb14c6ccd0452ea788f76b9c861de22..13dd34c571b9f5c7081d33c6173fe0d64920a8bb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c @@ -335,6 +335,14 @@ static int mlx5e_tx_reporter_dump_sq(struct mlx5e_priv *priv, struct devlink_fms return mlx5e_health_fmsg_named_obj_nest_end(fmsg); } +static int mlx5e_tx_reporter_timeout_dump(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg, + void *ctx) +{ + struct mlx5e_tx_timeout_ctx *to_ctx = ctx; + + return mlx5e_tx_reporter_dump_sq(priv, fmsg, to_ctx->sq); +} + static int mlx5e_tx_reporter_dump_all_sqs(struct mlx5e_priv *priv, struct devlink_fmsg *fmsg) { @@ -418,7 +426,7 @@ int mlx5e_reporter_tx_timeout(struct mlx5e_txqsq *sq) to_ctx.sq = sq; err_ctx.ctx = &to_ctx; err_ctx.recover = mlx5e_tx_reporter_timeout_recover; - err_ctx.dump = mlx5e_tx_reporter_dump_sq; + err_ctx.dump = mlx5e_tx_reporter_timeout_dump; snprintf(err_str, sizeof(err_str), "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x, usecs since last trans: %u", sq->channel->ix, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c index 71e8d66fa1509434f300c4447036fce13f281666..6692bc8333f73fa1723d0b2deede69a9dcb2889e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/pool.c @@ -11,13 +11,13 @@ static int mlx5e_xsk_map_pool(struct mlx5e_priv *priv, { struct device *dev = mlx5_core_dma_dev(priv->mdev); - return xsk_pool_dma_map(pool, dev, 0); + return xsk_pool_dma_map(pool, dev, DMA_ATTR_SKIP_CPU_SYNC); } static void mlx5e_xsk_unmap_pool(struct mlx5e_priv *priv, struct xsk_buff_pool *pool) { - return xsk_pool_dma_unmap(pool, 0); + return xsk_pool_dma_unmap(pool, DMA_ATTR_SKIP_CPU_SYNC); } static int mlx5e_xsk_get_pools(struct mlx5e_xsk *xsk) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index bc7c1962f9e66565ee186da96fe09b82f971c832..6a1b1363ac16a8d0049ade5da6ad5ccf8a955221 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -1746,7 +1746,7 @@ static int mlx5e_get_module_eeprom(struct net_device *netdev, if (size_read < 0) { netdev_err(priv->netdev, "%s: mlx5_query_eeprom failed:0x%x\n", __func__, size_read); - return 0; + return size_read; } i += size_read; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 6974090a7efacb8ea95a5186031feb7d89195c35..16e98ac47624c7c7496f8418c1db082c6f5d7add 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1051,9 +1051,20 @@ static void mlx5e_icosq_err_cqe_work(struct work_struct *recover_work) mlx5e_reporter_icosq_cqe_err(sq); } +static void mlx5e_async_icosq_err_cqe_work(struct work_struct *recover_work) +{ + struct mlx5e_icosq *sq = container_of(recover_work, struct mlx5e_icosq, + recover_work); + + /* Not implemented yet. */ + + netdev_warn(sq->channel->netdev, "async_icosq recovery is not implemented\n"); +} + static int mlx5e_alloc_icosq(struct mlx5e_channel *c, struct mlx5e_sq_param *param, - struct mlx5e_icosq *sq) + struct mlx5e_icosq *sq, + work_func_t recover_work_func) { void *sqc_wq = MLX5_ADDR_OF(sqc, param->sqc, wq); struct mlx5_core_dev *mdev = c->mdev; @@ -1073,7 +1084,7 @@ static int mlx5e_alloc_icosq(struct mlx5e_channel *c, if (err) goto err_sq_wq_destroy; - INIT_WORK(&sq->recover_work, mlx5e_icosq_err_cqe_work); + INIT_WORK(&sq->recover_work, recover_work_func); return 0; @@ -1423,13 +1434,14 @@ static void mlx5e_tx_err_cqe_work(struct work_struct *recover_work) mlx5e_reporter_tx_err_cqe(sq); } -int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params, - struct mlx5e_sq_param *param, struct mlx5e_icosq *sq) +static int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params, + struct mlx5e_sq_param *param, struct mlx5e_icosq *sq, + work_func_t recover_work_func) { struct mlx5e_create_sq_param csp = {}; int err; - err = mlx5e_alloc_icosq(c, param, sq); + err = mlx5e_alloc_icosq(c, param, sq, recover_work_func); if (err) return err; @@ -1459,7 +1471,7 @@ void mlx5e_deactivate_icosq(struct mlx5e_icosq *icosq) synchronize_net(); /* Sync with NAPI. */ } -void mlx5e_close_icosq(struct mlx5e_icosq *sq) +static void mlx5e_close_icosq(struct mlx5e_icosq *sq) { struct mlx5e_channel *c = sq->channel; @@ -1862,11 +1874,13 @@ static int mlx5e_open_queues(struct mlx5e_channel *c, spin_lock_init(&c->async_icosq_lock); - err = mlx5e_open_icosq(c, params, &cparam->async_icosq, &c->async_icosq); + err = mlx5e_open_icosq(c, params, &cparam->async_icosq, &c->async_icosq, + mlx5e_async_icosq_err_cqe_work); if (err) goto err_disable_napi; - err = mlx5e_open_icosq(c, params, &cparam->icosq, &c->icosq); + err = mlx5e_open_icosq(c, params, &cparam->icosq, &c->icosq, + mlx5e_icosq_err_cqe_work); if (err) goto err_close_async_icosq; @@ -3819,20 +3833,67 @@ static int set_feature_rx_all(struct net_device *netdev, bool enable) return mlx5_set_port_fcs(mdev, !enable); } +static int mlx5e_set_rx_port_ts(struct mlx5_core_dev *mdev, bool enable) +{ + u32 in[MLX5_ST_SZ_DW(pcmr_reg)] = {}; + bool supported, curr_state; + int err; + + if (!MLX5_CAP_GEN(mdev, ports_check)) + return 0; + + err = mlx5_query_ports_check(mdev, in, sizeof(in)); + if (err) + return err; + + supported = MLX5_GET(pcmr_reg, in, rx_ts_over_crc_cap); + curr_state = MLX5_GET(pcmr_reg, in, rx_ts_over_crc); + + if (!supported || enable == curr_state) + return 0; + + MLX5_SET(pcmr_reg, in, local_port, 1); + MLX5_SET(pcmr_reg, in, rx_ts_over_crc, enable); + + return mlx5_set_ports_check(mdev, in, sizeof(in)); +} + static int set_feature_rx_fcs(struct net_device *netdev, bool enable) { struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5e_channels *chs = &priv->channels; + struct mlx5_core_dev *mdev = priv->mdev; int err; mutex_lock(&priv->state_lock); - priv->channels.params.scatter_fcs_en = enable; - err = mlx5e_modify_channels_scatter_fcs(&priv->channels, enable); - if (err) - priv->channels.params.scatter_fcs_en = !enable; + if (enable) { + err = mlx5e_set_rx_port_ts(mdev, false); + if (err) + goto out; - mutex_unlock(&priv->state_lock); + chs->params.scatter_fcs_en = true; + err = mlx5e_modify_channels_scatter_fcs(chs, true); + if (err) { + chs->params.scatter_fcs_en = false; + mlx5e_set_rx_port_ts(mdev, true); + } + } else { + chs->params.scatter_fcs_en = false; + err = mlx5e_modify_channels_scatter_fcs(chs, false); + if (err) { + chs->params.scatter_fcs_en = true; + goto out; + } + err = mlx5e_set_rx_port_ts(mdev, true); + if (err) { + mlx5_core_warn(mdev, "Failed to set RX port timestamp %d\n", err); + err = 0; + } + } +out: + mutex_unlock(&priv->state_lock); return err; } @@ -3874,12 +3935,11 @@ static int set_feature_arfs(struct net_device *netdev, bool enable) static int mlx5e_handle_feature(struct net_device *netdev, netdev_features_t *features, - netdev_features_t wanted_features, netdev_features_t feature, mlx5e_feature_handler feature_handler) { - netdev_features_t changes = wanted_features ^ netdev->features; - bool enable = !!(wanted_features & feature); + netdev_features_t changes = *features ^ netdev->features; + bool enable = !!(*features & feature); int err; if (!(changes & feature)) @@ -3887,22 +3947,22 @@ static int mlx5e_handle_feature(struct net_device *netdev, err = feature_handler(netdev, enable); if (err) { + MLX5E_SET_FEATURE(features, feature, !enable); netdev_err(netdev, "%s feature %pNF failed, err %d\n", enable ? "Enable" : "Disable", &feature, err); return err; } - MLX5E_SET_FEATURE(features, feature, enable); return 0; } int mlx5e_set_features(struct net_device *netdev, netdev_features_t features) { - netdev_features_t oper_features = netdev->features; + netdev_features_t oper_features = features; int err = 0; #define MLX5E_HANDLE_FEATURE(feature, handler) \ - mlx5e_handle_feature(netdev, &oper_features, features, feature, handler) + mlx5e_handle_feature(netdev, &oper_features, feature, handler) err |= MLX5E_HANDLE_FEATURE(NETIF_F_LRO, set_feature_lro); err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_VLAN_CTAG_FILTER, @@ -4964,9 +5024,13 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) } if (mlx5_vxlan_allowed(mdev->vxlan) || mlx5_geneve_tx_allowed(mdev)) { - netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL; - netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL; - netdev->vlan_features |= NETIF_F_GSO_UDP_TUNNEL; + netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_UDP_TUNNEL_CSUM; + netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_UDP_TUNNEL_CSUM; + netdev->gso_partial_features = NETIF_F_GSO_UDP_TUNNEL_CSUM; + netdev->vlan_features |= NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_UDP_TUNNEL_CSUM; } if (mlx5e_tunnel_proto_supported(mdev, IPPROTO_GRE)) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index f327b78261ec4eaf306115690163219e469ece04..b8637547800f98042433b588f1a1f7c7a935542f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -276,8 +276,8 @@ static inline int mlx5e_page_alloc_pool(struct mlx5e_rq *rq, if (unlikely(!dma_info->page)) return -ENOMEM; - dma_info->addr = dma_map_page(rq->pdev, dma_info->page, 0, - PAGE_SIZE, rq->buff.map_dir); + dma_info->addr = dma_map_page_attrs(rq->pdev, dma_info->page, 0, PAGE_SIZE, + rq->buff.map_dir, DMA_ATTR_SKIP_CPU_SYNC); if (unlikely(dma_mapping_error(rq->pdev, dma_info->addr))) { page_pool_recycle_direct(rq->page_pool, dma_info->page); dma_info->page = NULL; @@ -298,7 +298,8 @@ static inline int mlx5e_page_alloc(struct mlx5e_rq *rq, void mlx5e_page_dma_unmap(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info) { - dma_unmap_page(rq->pdev, dma_info->addr, PAGE_SIZE, rq->buff.map_dir); + dma_unmap_page_attrs(rq->pdev, dma_info->addr, PAGE_SIZE, rq->buff.map_dir, + DMA_ATTR_SKIP_CPU_SYNC); } void mlx5e_page_release_dynamic(struct mlx5e_rq *rq, @@ -984,7 +985,8 @@ static inline void mlx5e_handle_csum(struct net_device *netdev, } /* True when explicitly set via priv flag, or XDP prog is loaded */ - if (test_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &rq->state)) + if (test_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &rq->state) || + get_cqe_tls_offload(cqe)) goto csum_unnecessary; /* CQE csum doesn't cover padding octets in short ethernet @@ -999,14 +1001,9 @@ static inline void mlx5e_handle_csum(struct net_device *netdev, goto csum_unnecessary; if (likely(is_last_ethertype_ip(skb, &network_depth, &proto))) { - u8 ipproto = get_ip_proto(skb, network_depth, proto); - - if (unlikely(ipproto == IPPROTO_SCTP)) + if (unlikely(get_ip_proto(skb, network_depth, proto) == IPPROTO_SCTP)) goto csum_unnecessary; - if (unlikely(mlx5_ipsec_is_rx_flow(cqe))) - goto csum_none; - stats->csum_complete++; skb->ip_summed = CHECKSUM_COMPLETE; skb->csum = csum_unfold((__force __sum16)cqe->check_sum); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c index 3e19b1721303f53284379db71ec81fb389634bc5..b00c7d47833f3bbaf651265fd623a94807a7fe30 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c @@ -79,12 +79,16 @@ int esw_acl_egress_lgcy_setup(struct mlx5_eswitch *esw, int dest_num = 0; int err = 0; - if (MLX5_CAP_ESW_EGRESS_ACL(esw->dev, flow_counter)) { + if (vport->egress.legacy.drop_counter) { + drop_counter = vport->egress.legacy.drop_counter; + } else if (MLX5_CAP_ESW_EGRESS_ACL(esw->dev, flow_counter)) { drop_counter = mlx5_fc_create(esw->dev, false); - if (IS_ERR(drop_counter)) + if (IS_ERR(drop_counter)) { esw_warn(esw->dev, "vport[%d] configure egress drop rule counter err(%ld)\n", vport->vport, PTR_ERR(drop_counter)); + drop_counter = NULL; + } vport->egress.legacy.drop_counter = drop_counter; } @@ -123,7 +127,7 @@ int esw_acl_egress_lgcy_setup(struct mlx5_eswitch *esw, flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP; /* Attach egress drop flow counter */ - if (!IS_ERR_OR_NULL(drop_counter)) { + if (drop_counter) { flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; drop_ctr_dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; drop_ctr_dst.counter_id = mlx5_fc_id(drop_counter); @@ -162,7 +166,7 @@ void esw_acl_egress_lgcy_cleanup(struct mlx5_eswitch *esw, esw_acl_egress_table_destroy(vport); clean_drop_counter: - if (!IS_ERR_OR_NULL(vport->egress.legacy.drop_counter)) { + if (vport->egress.legacy.drop_counter) { mlx5_fc_destroy(esw->dev, vport->egress.legacy.drop_counter); vport->egress.legacy.drop_counter = NULL; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c index d64fad2823e73cd097a7e85b5641ae292c21340e..45570d0a58d2fa9dc055720e27e7819a5022b00c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c @@ -160,7 +160,9 @@ int esw_acl_ingress_lgcy_setup(struct mlx5_eswitch *esw, esw_acl_ingress_lgcy_rules_destroy(vport); - if (MLX5_CAP_ESW_INGRESS_ACL(esw->dev, flow_counter)) { + if (vport->ingress.legacy.drop_counter) { + counter = vport->ingress.legacy.drop_counter; + } else if (MLX5_CAP_ESW_INGRESS_ACL(esw->dev, flow_counter)) { counter = mlx5_fc_create(esw->dev, false); if (IS_ERR(counter)) { esw_warn(esw->dev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 401b2f5128dd43dab666eb52e28c938f57069051..78cc6f0bbc72b173bd80db84c5e356784f350b3b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1663,7 +1663,7 @@ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs) if (!ESW_ALLOWED(esw)) return 0; - mutex_lock(&esw->mode_lock); + down_write(&esw->mode_lock); if (esw->mode == MLX5_ESWITCH_NONE) { ret = mlx5_eswitch_enable_locked(esw, MLX5_ESWITCH_LEGACY, num_vfs); } else { @@ -1675,7 +1675,7 @@ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs) if (!ret) esw->esw_funcs.num_vfs = num_vfs; } - mutex_unlock(&esw->mode_lock); + up_write(&esw->mode_lock); return ret; } @@ -1719,10 +1719,10 @@ void mlx5_eswitch_disable(struct mlx5_eswitch *esw, bool clear_vf) if (!ESW_ALLOWED(esw)) return; - mutex_lock(&esw->mode_lock); + down_write(&esw->mode_lock); mlx5_eswitch_disable_locked(esw, clear_vf); esw->esw_funcs.num_vfs = 0; - mutex_unlock(&esw->mode_lock); + up_write(&esw->mode_lock); } int mlx5_eswitch_init(struct mlx5_core_dev *dev) @@ -1778,7 +1778,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) atomic64_set(&esw->offloads.num_flows, 0); ida_init(&esw->offloads.vport_metadata_ida); mutex_init(&esw->state_lock); - mutex_init(&esw->mode_lock); + init_rwsem(&esw->mode_lock); mlx5_esw_for_all_vports(esw, i, vport) { vport->vport = mlx5_eswitch_index_to_vport_num(esw, i); @@ -1813,7 +1813,6 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) esw->dev->priv.eswitch = NULL; destroy_workqueue(esw->work_queue); esw_offloads_cleanup_reps(esw); - mutex_destroy(&esw->mode_lock); mutex_destroy(&esw->state_lock); ida_destroy(&esw->offloads.vport_metadata_ida); mlx5e_mod_hdr_tbl_destroy(&esw->offloads.mod_hdr); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index cf87de94418ff7d74319e2c936f92f5c8e6f3fcf..59c674f157a8c10e706615840a8d77eb83b5a66e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -262,7 +262,7 @@ struct mlx5_eswitch { /* Protects eswitch mode change that occurs via one or more * user commands, i.e. sriov state change, devlink commands. */ - struct mutex mode_lock; + struct rw_semaphore mode_lock; struct { bool enabled; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 5801f55ff077195d13694e78905688530e170624..ccc7dd3e738a48ede918113f769a0bed7b84300b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -2037,10 +2037,6 @@ esw_check_vport_match_metadata_supported(const struct mlx5_eswitch *esw) if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev, flow_source)) return false; - if (mlx5_core_is_ecpf_esw_manager(esw->dev) || - mlx5_ecpf_vport_exists(esw->dev)) - return false; - return true; } @@ -2508,7 +2504,7 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, if (esw_mode_from_devlink(mode, &mlx5_mode)) return -EINVAL; - mutex_lock(&esw->mode_lock); + down_write(&esw->mode_lock); cur_mlx5_mode = esw->mode; if (cur_mlx5_mode == mlx5_mode) goto unlock; @@ -2521,7 +2517,7 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, err = -EINVAL; unlock: - mutex_unlock(&esw->mode_lock); + up_write(&esw->mode_lock); return err; } @@ -2534,14 +2530,14 @@ int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode) if (IS_ERR(esw)) return PTR_ERR(esw); - mutex_lock(&esw->mode_lock); + down_write(&esw->mode_lock); err = eswitch_devlink_esw_mode_check(esw); if (err) goto unlock; err = esw_mode_to_devlink(esw->mode, mode); unlock: - mutex_unlock(&esw->mode_lock); + up_write(&esw->mode_lock); return err; } @@ -2557,7 +2553,7 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode, if (IS_ERR(esw)) return PTR_ERR(esw); - mutex_lock(&esw->mode_lock); + down_write(&esw->mode_lock); err = eswitch_devlink_esw_mode_check(esw); if (err) goto out; @@ -2599,7 +2595,7 @@ int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode, } esw->offloads.inline_mode = mlx5_mode; - mutex_unlock(&esw->mode_lock); + up_write(&esw->mode_lock); return 0; revert_inline_mode: @@ -2609,7 +2605,7 @@ revert_inline_mode: vport, esw->offloads.inline_mode); out: - mutex_unlock(&esw->mode_lock); + up_write(&esw->mode_lock); return err; } @@ -2622,14 +2618,14 @@ int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode) if (IS_ERR(esw)) return PTR_ERR(esw); - mutex_lock(&esw->mode_lock); + down_write(&esw->mode_lock); err = eswitch_devlink_esw_mode_check(esw); if (err) goto unlock; err = esw_inline_mode_to_devlink(esw->offloads.inline_mode, mode); unlock: - mutex_unlock(&esw->mode_lock); + up_write(&esw->mode_lock); return err; } @@ -2645,7 +2641,7 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, if (IS_ERR(esw)) return PTR_ERR(esw); - mutex_lock(&esw->mode_lock); + down_write(&esw->mode_lock); err = eswitch_devlink_esw_mode_check(esw); if (err) goto unlock; @@ -2691,7 +2687,7 @@ int mlx5_devlink_eswitch_encap_mode_set(struct devlink *devlink, } unlock: - mutex_unlock(&esw->mode_lock); + up_write(&esw->mode_lock); return err; } @@ -2706,15 +2702,15 @@ int mlx5_devlink_eswitch_encap_mode_get(struct devlink *devlink, return PTR_ERR(esw); - mutex_lock(&esw->mode_lock); + down_write(&esw->mode_lock); err = eswitch_devlink_esw_mode_check(esw); if (err) goto unlock; *encap = esw->offloads.encap; unlock: - mutex_unlock(&esw->mode_lock); - return 0; + up_write(&esw->mode_lock); + return err; } static bool diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 0ff034b0866e2e9da7334fee35531f7f9a1830a3..55772f0cbbf8fef911b7e233605b0bd9564fd286 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -2034,6 +2034,8 @@ void mlx5_del_flow_rules(struct mlx5_flow_handle *handle) fte->node.del_hw_func = NULL; up_write_ref_node(&fte->node, false); tree_put_node(&fte->node, false); + } else { + up_write_ref_node(&fte->node, false); } kfree(handle); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c index ee710ce0079504466f34fd9910c481a0537fadd9..9b472e793ee361acf907286584842d993c8db075 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c @@ -131,7 +131,7 @@ static void mlx5_stop_sync_reset_poll(struct mlx5_core_dev *dev) { struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; - del_timer(&fw_reset->timer); + del_timer_sync(&fw_reset->timer); } static void mlx5_sync_reset_clear_reset_requested(struct mlx5_core_dev *dev, bool poll_health) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c index fe5476a76464fbc53e2083909f0869fbeec74f05..11cc3ea5010aa2db68543f55fe30f692bc313539 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c @@ -365,6 +365,7 @@ static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev, bool is_bonded, is_in_lag, mode_supported; int bond_status = 0; int num_slaves = 0; + int changed = 0; int idx; if (!netif_is_lag_master(upper)) @@ -401,27 +402,27 @@ static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev, */ is_in_lag = num_slaves == MLX5_MAX_PORTS && bond_status == 0x3; - if (!mlx5_lag_is_ready(ldev) && is_in_lag) { - NL_SET_ERR_MSG_MOD(info->info.extack, - "Can't activate LAG offload, PF is configured with more than 64 VFs"); - return 0; - } - /* Lag mode must be activebackup or hash. */ mode_supported = tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP || tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH; - if (is_in_lag && !mode_supported) - NL_SET_ERR_MSG_MOD(info->info.extack, - "Can't activate LAG offload, TX type isn't supported"); - is_bonded = is_in_lag && mode_supported; if (tracker->is_bonded != is_bonded) { tracker->is_bonded = is_bonded; - return 1; + changed = 1; } - return 0; + if (!is_in_lag) + return changed; + + if (!mlx5_lag_is_ready(ldev)) + NL_SET_ERR_MSG_MOD(info->info.extack, + "Can't activate LAG offload, PF is configured with more than 64 VFs"); + else if (!mode_supported) + NL_SET_ERR_MSG_MOD(info->info.extack, + "Can't activate LAG offload, TX type isn't supported"); + + return changed; } static int mlx5_handle_changelowerstate_event(struct mlx5_lag *ldev, @@ -464,9 +465,6 @@ static int mlx5_lag_netdev_event(struct notifier_block *this, ldev = container_of(this, struct mlx5_lag, nb); - if (!mlx5_lag_is_ready(ldev) && event == NETDEV_CHANGELOWERSTATE) - return NOTIFY_DONE; - tracker = ldev->tracker; switch (event) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c index 15c3a9058e72876fda3ac796fa78bf98dbc42255..c04413f449c509ac7e387b2a556d2ffa86176dfb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c @@ -123,6 +123,10 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, return; } + /* Handle multipath entry with lower priority value */ + if (mp->mfi && mp->mfi != fi && fi->fib_priority >= mp->mfi->fib_priority) + return; + /* Handle add/replace event */ nhs = fib_info_num_path(fi); if (nhs == 1) { @@ -132,12 +136,13 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, int i = mlx5_lag_dev_get_netdev_idx(ldev, nh_dev); if (i < 0) - i = MLX5_LAG_NORMAL_AFFINITY; - else - ++i; + return; + i++; mlx5_lag_set_port_affinity(ldev, i); } + + mp->mfi = fi; return; } @@ -265,10 +270,8 @@ static int mlx5_lag_fib_event(struct notifier_block *nb, fen_info = container_of(info, struct fib_entry_notifier_info, info); fi = fen_info->fi; - if (fi->nh) { - NL_SET_ERR_MSG_MOD(info->extack, "IPv4 route with nexthop objects is not supported"); - return notifier_from_errno(-EINVAL); - } + if (fi->nh) + return NOTIFY_DONE; fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev; if (fib_dev != ldev->pf[MLX5_LAG_P1].netdev && fib_dev != ldev->pf[MLX5_LAG_P2].netdev) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c index 947f346bdc2d6ef373b6d1fd7ee1a824b84934fc..77c6287c90d55063cb61b49d8c2b298312f27bc3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c @@ -292,7 +292,7 @@ static int create_chain_restore(struct fs_chain *chain) { struct mlx5_eswitch *esw = chain->chains->dev->priv.eswitch; - char modact[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)]; + u8 modact[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {}; struct mlx5_fs_chains *chains = chain->chains; enum mlx5e_tc_attr_to_reg chain_to_reg; struct mlx5_modify_hdr *mod_hdr; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c index 00d861361428f30532a225157e375c831658f6fb..16a7c7ec5e138b27367a943f4d125179154ac3d6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c @@ -2,6 +2,7 @@ /* Copyright (c) 2019 Mellanox Technologies. */ #include +#include #include "dr_types.h" #define DR_DOMAIN_SW_STEERING_SUPPORTED(dmn, dmn_type) \ @@ -69,9 +70,9 @@ static int dr_domain_init_resources(struct mlx5dr_domain *dmn) } dmn->uar = mlx5_get_uars_page(dmn->mdev); - if (!dmn->uar) { + if (IS_ERR(dmn->uar)) { mlx5dr_err(dmn, "Couldn't allocate UAR\n"); - ret = -ENOMEM; + ret = PTR_ERR(dmn->uar); goto clean_pd; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/cmd.h b/drivers/net/ethernet/mellanox/mlxsw/cmd.h index 5ffdfb532cb7f9e4a16f1423283648e8f85bce1a..91f68fb0b420ae5eee729cf3bdb094b20fec4618 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/cmd.h +++ b/drivers/net/ethernet/mellanox/mlxsw/cmd.h @@ -905,6 +905,18 @@ static inline int mlxsw_cmd_sw2hw_rdq(struct mlxsw_core *mlxsw_core, */ MLXSW_ITEM32(cmd_mbox, sw2hw_dq, cq, 0x00, 24, 8); +enum mlxsw_cmd_mbox_sw2hw_dq_sdq_lp { + MLXSW_CMD_MBOX_SW2HW_DQ_SDQ_LP_WQE, + MLXSW_CMD_MBOX_SW2HW_DQ_SDQ_LP_IGNORE_WQE, +}; + +/* cmd_mbox_sw2hw_dq_sdq_lp + * SDQ local Processing + * 0: local processing by wqe.lp + * 1: local processing (ignoring wqe.lp) + */ +MLXSW_ITEM32(cmd_mbox, sw2hw_dq, sdq_lp, 0x00, 23, 1); + /* cmd_mbox_sw2hw_dq_sdq_tclass * SDQ: CPU Egress TClass * RDQ: Reserved diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c index 42e4437ac3c168d77413ae8f01906fa7a1b6fac0..7ec1d0ee9beeb1b570bfbe7a672d06c760630915 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c @@ -25,16 +25,8 @@ #define MLXSW_THERMAL_ZONE_MAX_NAME 16 #define MLXSW_THERMAL_TEMP_SCORE_MAX GENMASK(31, 0) #define MLXSW_THERMAL_MAX_STATE 10 +#define MLXSW_THERMAL_MIN_STATE 2 #define MLXSW_THERMAL_MAX_DUTY 255 -/* Minimum and maximum fan allowed speed in percent: from 20% to 100%. Values - * MLXSW_THERMAL_MAX_STATE + x, where x is between 2 and 10 are used for - * setting fan speed dynamic minimum. For example, if value is set to 14 (40%) - * cooling levels vector will be set to 4, 4, 4, 4, 4, 5, 6, 7, 8, 9, 10 to - * introduce PWM speed in percent: 40, 40, 40, 40, 40, 50, 60. 70, 80, 90, 100. - */ -#define MLXSW_THERMAL_SPEED_MIN (MLXSW_THERMAL_MAX_STATE + 2) -#define MLXSW_THERMAL_SPEED_MAX (MLXSW_THERMAL_MAX_STATE * 2) -#define MLXSW_THERMAL_SPEED_MIN_LEVEL 2 /* 20% */ /* External cooling devices, allowed for binding to mlxsw thermal zones. */ static char * const mlxsw_thermal_external_allowed_cdev[] = { @@ -635,49 +627,16 @@ static int mlxsw_thermal_set_cur_state(struct thermal_cooling_device *cdev, struct mlxsw_thermal *thermal = cdev->devdata; struct device *dev = thermal->bus_info->dev; char mfsc_pl[MLXSW_REG_MFSC_LEN]; - unsigned long cur_state, i; int idx; - u8 duty; int err; + if (state > MLXSW_THERMAL_MAX_STATE) + return -EINVAL; + idx = mlxsw_get_cooling_device_idx(thermal, cdev); if (idx < 0) return idx; - /* Verify if this request is for changing allowed fan dynamical - * minimum. If it is - update cooling levels accordingly and update - * state, if current state is below the newly requested minimum state. - * For example, if current state is 5, and minimal state is to be - * changed from 4 to 6, thermal->cooling_levels[0 to 5] will be changed - * all from 4 to 6. And state 5 (thermal->cooling_levels[4]) should be - * overwritten. - */ - if (state >= MLXSW_THERMAL_SPEED_MIN && - state <= MLXSW_THERMAL_SPEED_MAX) { - state -= MLXSW_THERMAL_MAX_STATE; - for (i = 0; i <= MLXSW_THERMAL_MAX_STATE; i++) - thermal->cooling_levels[i] = max(state, i); - - mlxsw_reg_mfsc_pack(mfsc_pl, idx, 0); - err = mlxsw_reg_query(thermal->core, MLXSW_REG(mfsc), mfsc_pl); - if (err) - return err; - - duty = mlxsw_reg_mfsc_pwm_duty_cycle_get(mfsc_pl); - cur_state = mlxsw_duty_to_state(duty); - - /* If current fan state is lower than requested dynamical - * minimum, increase fan speed up to dynamical minimum. - */ - if (state < cur_state) - return 0; - - state = cur_state; - } - - if (state > MLXSW_THERMAL_MAX_STATE) - return -EINVAL; - /* Normalize the state to the valid speed range. */ state = thermal->cooling_levels[state]; mlxsw_reg_mfsc_pack(mfsc_pl, idx, mlxsw_state_to_duty(state)); @@ -980,8 +939,7 @@ int mlxsw_thermal_init(struct mlxsw_core *core, /* Initialize cooling levels per PWM state. */ for (i = 0; i < MLXSW_THERMAL_MAX_STATE; i++) - thermal->cooling_levels[i] = max(MLXSW_THERMAL_SPEED_MIN_LEVEL, - i); + thermal->cooling_levels[i] = max(MLXSW_THERMAL_MIN_STATE, i); thermal->polling_delay = bus_info->low_frequency ? MLXSW_THERMAL_SLOW_POLL_INT : diff --git a/drivers/net/ethernet/mellanox/mlxsw/minimal.c b/drivers/net/ethernet/mellanox/mlxsw/minimal.c index c010db2c9dba99566a2f06dcc70adebc5cbb5c6e..443dc44452ef8b5fcf9df28df480b4c757cb1be9 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/minimal.c +++ b/drivers/net/ethernet/mellanox/mlxsw/minimal.c @@ -234,6 +234,7 @@ static void mlxsw_m_port_remove(struct mlxsw_m *mlxsw_m, u8 local_port) static int mlxsw_m_port_module_map(struct mlxsw_m *mlxsw_m, u8 local_port, u8 *last_module) { + unsigned int max_ports = mlxsw_core_max_ports(mlxsw_m->core); u8 module, width; int err; @@ -249,6 +250,9 @@ static int mlxsw_m_port_module_map(struct mlxsw_m *mlxsw_m, u8 local_port, if (module == *last_module) return 0; *last_module = module; + + if (WARN_ON_ONCE(module >= max_ports)) + return -EINVAL; mlxsw_m->module_to_port[module] = ++mlxsw_m->max_ports; return 0; diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index 641cdd81882b87be67073d3d3f433c88610ea16e..dbb16ce25bdf32b92d8466adbd42801e33af17d8 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -285,6 +285,7 @@ static int mlxsw_pci_sdq_init(struct mlxsw_pci *mlxsw_pci, char *mbox, struct mlxsw_pci_queue *q) { int tclass; + int lp; int i; int err; @@ -292,9 +293,12 @@ static int mlxsw_pci_sdq_init(struct mlxsw_pci *mlxsw_pci, char *mbox, q->consumer_counter = 0; tclass = q->num == MLXSW_PCI_SDQ_EMAD_INDEX ? MLXSW_PCI_SDQ_EMAD_TC : MLXSW_PCI_SDQ_CTL_TC; + lp = q->num == MLXSW_PCI_SDQ_EMAD_INDEX ? MLXSW_CMD_MBOX_SW2HW_DQ_SDQ_LP_IGNORE_WQE : + MLXSW_CMD_MBOX_SW2HW_DQ_SDQ_LP_WQE; /* Set CQ of same number of this SDQ. */ mlxsw_cmd_mbox_sw2hw_dq_cq_set(mbox, q->num); + mlxsw_cmd_mbox_sw2hw_dq_sdq_lp_set(mbox, lp); mlxsw_cmd_mbox_sw2hw_dq_sdq_tclass_set(mbox, tclass); mlxsw_cmd_mbox_sw2hw_dq_log2_dq_sz_set(mbox, 3); /* 8 pages */ for (i = 0; i < MLXSW_PCI_AQ_PAGES; i++) { @@ -353,13 +357,10 @@ static int mlxsw_pci_rdq_skb_alloc(struct mlxsw_pci *mlxsw_pci, struct sk_buff *skb; int err; - elem_info->u.rdq.skb = NULL; skb = netdev_alloc_skb_ip_align(NULL, buf_len); if (!skb) return -ENOMEM; - /* Assume that wqe was previously zeroed. */ - err = mlxsw_pci_wqe_frag_map(mlxsw_pci, wqe, 0, skb->data, buf_len, DMA_FROM_DEVICE); if (err) @@ -548,21 +549,26 @@ static void mlxsw_pci_cqe_rdq_handle(struct mlxsw_pci *mlxsw_pci, struct pci_dev *pdev = mlxsw_pci->pdev; struct mlxsw_pci_queue_elem_info *elem_info; struct mlxsw_rx_info rx_info = {}; - char *wqe; + char wqe[MLXSW_PCI_WQE_SIZE]; struct sk_buff *skb; u16 byte_count; int err; elem_info = mlxsw_pci_queue_elem_info_consumer_get(q); - skb = elem_info->u.sdq.skb; - if (!skb) - return; - wqe = elem_info->elem; - mlxsw_pci_wqe_frag_unmap(mlxsw_pci, wqe, 0, DMA_FROM_DEVICE); + skb = elem_info->u.rdq.skb; + memcpy(wqe, elem_info->elem, MLXSW_PCI_WQE_SIZE); if (q->consumer_counter++ != consumer_counter_limit) dev_dbg_ratelimited(&pdev->dev, "Consumer counter does not match limit in RDQ\n"); + err = mlxsw_pci_rdq_skb_alloc(mlxsw_pci, elem_info); + if (err) { + dev_err_ratelimited(&pdev->dev, "Failed to alloc skb for RDQ\n"); + goto out; + } + + mlxsw_pci_wqe_frag_unmap(mlxsw_pci, wqe, 0, DMA_FROM_DEVICE); + if (mlxsw_pci_cqe_lag_get(cqe_v, cqe)) { rx_info.is_lag = true; rx_info.u.lag_id = mlxsw_pci_cqe_lag_id_get(cqe_v, cqe); @@ -594,10 +600,7 @@ static void mlxsw_pci_cqe_rdq_handle(struct mlxsw_pci *mlxsw_pci, skb_put(skb, byte_count); mlxsw_core_skb_receive(mlxsw_pci->core, skb, &rx_info); - memset(wqe, 0, q->elem_size); - err = mlxsw_pci_rdq_skb_alloc(mlxsw_pci, elem_info); - if (err) - dev_dbg_ratelimited(&pdev->dev, "Failed to alloc skb for RDQ\n"); +out: /* Everything is set up, ring doorbell to pass elem to HW */ q->producer_counter++; mlxsw_pci_queue_doorbell_producer_ring(mlxsw_pci, q); @@ -1600,7 +1603,7 @@ static int mlxsw_pci_skb_transmit(void *bus_priv, struct sk_buff *skb, wqe = elem_info->elem; mlxsw_pci_wqe_c_set(wqe, 1); /* always report completion */ - mlxsw_pci_wqe_lp_set(wqe, !!tx_info->is_emad); + mlxsw_pci_wqe_lp_set(wqe, 0); mlxsw_pci_wqe_type_set(wqe, MLXSW_PCI_WQE_TYPE_ETHERNET); err = mlxsw_pci_wqe_frag_map(mlxsw_pci, wqe, 0, skb->data, @@ -1901,6 +1904,7 @@ int mlxsw_pci_driver_register(struct pci_driver *pci_driver) { pci_driver->probe = mlxsw_pci_probe; pci_driver->remove = mlxsw_pci_remove; + pci_driver->shutdown = mlxsw_pci_remove; return pci_register_driver(pci_driver); } EXPORT_SYMBOL(mlxsw_pci_driver_register); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index b08853f71b2beae79e1458e2fc2cb77eb6d4dec5..4110e15c22c7995dd8121b2a5458786f42548f8d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -2052,9 +2052,14 @@ static void mlxsw_sp_pude_event_func(const struct mlxsw_reg_info *reg, struct mlxsw_sp *mlxsw_sp = priv; struct mlxsw_sp_port *mlxsw_sp_port; enum mlxsw_reg_pude_oper_status status; + unsigned int max_ports; u8 local_port; + max_ports = mlxsw_core_max_ports(mlxsw_sp->core); local_port = mlxsw_reg_pude_local_port_get(pude_pl); + + if (WARN_ON_ONCE(!local_port || local_port >= max_ports)) + return; mlxsw_sp_port = mlxsw_sp->ports[local_port]; if (!mlxsw_sp_port) return; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c index ca8090a28dec600fd4e82164755c98e74363adf1..50eca2daad8439f95222da1a179729eb1cab02cb 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c @@ -568,10 +568,13 @@ void mlxsw_sp1_ptp_got_timestamp(struct mlxsw_sp *mlxsw_sp, bool ingress, u8 domain_number, u16 sequence_id, u64 timestamp) { + unsigned int max_ports = mlxsw_core_max_ports(mlxsw_sp->core); struct mlxsw_sp_port *mlxsw_sp_port; struct mlxsw_sp1_ptp_key key; u8 types; + if (WARN_ON_ONCE(local_port >= max_ports)) + return; mlxsw_sp_port = mlxsw_sp->ports[local_port]; if (!mlxsw_sp_port) return; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 4381f8c6c3fb718698091172d1567e3b8056d2ac..53128382fc2e05e25c655f8fc0cf5f076d98240d 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -2177,6 +2177,7 @@ static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp, char *rauhtd_pl, int ent_index) { + u64 max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); struct net_device *dev; struct neighbour *n; __be32 dipn; @@ -2185,6 +2186,8 @@ static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp, mlxsw_reg_rauhtd_ent_ipv4_unpack(rauhtd_pl, ent_index, &rif, &dip); + if (WARN_ON_ONCE(rif >= max_rifs)) + return; if (!mlxsw_sp->router->rifs[rif]) { dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n"); return; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 6501ce94ace586eb6c6742f179f97254aa1d3f05..368fa0e5ad3152e150e983ac632bb76cc78a1b17 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -2410,6 +2410,7 @@ static void mlxsw_sp_fdb_notify_mac_process(struct mlxsw_sp *mlxsw_sp, char *sfn_pl, int rec_index, bool adding) { + unsigned int max_ports = mlxsw_core_max_ports(mlxsw_sp->core); struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan; struct mlxsw_sp_bridge_device *bridge_device; struct mlxsw_sp_bridge_port *bridge_port; @@ -2422,6 +2423,9 @@ static void mlxsw_sp_fdb_notify_mac_process(struct mlxsw_sp *mlxsw_sp, int err; mlxsw_reg_sfn_mac_unpack(sfn_pl, rec_index, mac, &fid, &local_port); + + if (WARN_ON_ONCE(local_port >= max_ports)) + return; mlxsw_sp_port = mlxsw_sp->ports[local_port]; if (!mlxsw_sp_port) { dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect local port in FDB notification\n"); diff --git a/drivers/net/ethernet/micrel/Makefile b/drivers/net/ethernet/micrel/Makefile index 5cc00d22c708c46f37afbea48830223b53237cb9..6ecc4eb30e74b80582abde169a21afbacc577705 100644 --- a/drivers/net/ethernet/micrel/Makefile +++ b/drivers/net/ethernet/micrel/Makefile @@ -4,8 +4,6 @@ # obj-$(CONFIG_KS8842) += ks8842.o -obj-$(CONFIG_KS8851) += ks8851.o -ks8851-objs = ks8851_common.o ks8851_spi.o -obj-$(CONFIG_KS8851_MLL) += ks8851_mll.o -ks8851_mll-objs = ks8851_common.o ks8851_par.o +obj-$(CONFIG_KS8851) += ks8851_common.o ks8851_spi.o +obj-$(CONFIG_KS8851_MLL) += ks8851_common.o ks8851_par.o obj-$(CONFIG_KSZ884X_PCI) += ksz884x.o diff --git a/drivers/net/ethernet/micrel/ks8851_common.c b/drivers/net/ethernet/micrel/ks8851_common.c index d65872172229b80e47e9b7dc7d312b52231c2e08..f74eae8eed02fff309294b6ef68af993307bb879 100644 --- a/drivers/net/ethernet/micrel/ks8851_common.c +++ b/drivers/net/ethernet/micrel/ks8851_common.c @@ -1031,6 +1031,7 @@ int ks8851_suspend(struct device *dev) return 0; } +EXPORT_SYMBOL_GPL(ks8851_suspend); int ks8851_resume(struct device *dev) { @@ -1044,6 +1045,7 @@ int ks8851_resume(struct device *dev) return 0; } +EXPORT_SYMBOL_GPL(ks8851_resume); #endif int ks8851_probe_common(struct net_device *netdev, struct device *dev, @@ -1175,6 +1177,7 @@ err_reg: err_reg_io: return ret; } +EXPORT_SYMBOL_GPL(ks8851_probe_common); int ks8851_remove_common(struct device *dev) { @@ -1191,3 +1194,8 @@ int ks8851_remove_common(struct device *dev) return 0; } +EXPORT_SYMBOL_GPL(ks8851_remove_common); + +MODULE_DESCRIPTION("KS8851 Network driver"); +MODULE_AUTHOR("Ben Dooks "); +MODULE_LICENSE("GPL"); diff --git a/drivers/net/ethernet/micrel/ks8851_par.c b/drivers/net/ethernet/micrel/ks8851_par.c index 3bab0cb2b1a56a52e12b4551beb8640e4b378f58..c7c99cc54ca11af24ed2ea44a77c008565072dc0 100644 --- a/drivers/net/ethernet/micrel/ks8851_par.c +++ b/drivers/net/ethernet/micrel/ks8851_par.c @@ -323,6 +323,8 @@ static int ks8851_probe_par(struct platform_device *pdev) return ret; netdev->irq = platform_get_irq(pdev, 0); + if (netdev->irq < 0) + return netdev->irq; return ks8851_probe_common(netdev, dev, msg_enable); } diff --git a/drivers/net/ethernet/microchip/encx24j600-regmap.c b/drivers/net/ethernet/microchip/encx24j600-regmap.c index 796e46a5392699c932d1d9b94c09bd5749ad6b18..81a8ccca7e5e075de4e1c4d5dfc21cab6874a686 100644 --- a/drivers/net/ethernet/microchip/encx24j600-regmap.c +++ b/drivers/net/ethernet/microchip/encx24j600-regmap.c @@ -497,13 +497,19 @@ static struct regmap_bus phymap_encx24j600 = { .reg_read = regmap_encx24j600_phy_reg_read, }; -void devm_regmap_init_encx24j600(struct device *dev, - struct encx24j600_context *ctx) +int devm_regmap_init_encx24j600(struct device *dev, + struct encx24j600_context *ctx) { mutex_init(&ctx->mutex); regcfg.lock_arg = ctx; ctx->regmap = devm_regmap_init(dev, ®map_encx24j600, ctx, ®cfg); + if (IS_ERR(ctx->regmap)) + return PTR_ERR(ctx->regmap); ctx->phymap = devm_regmap_init(dev, &phymap_encx24j600, ctx, &phycfg); + if (IS_ERR(ctx->phymap)) + return PTR_ERR(ctx->phymap); + + return 0; } EXPORT_SYMBOL_GPL(devm_regmap_init_encx24j600); diff --git a/drivers/net/ethernet/microchip/encx24j600.c b/drivers/net/ethernet/microchip/encx24j600.c index 2c0dcd7acf3fd9be0abfff51ce76acc2c1c4a93a..c95e29ae6f20c56f0977c2daa9c1ca9ee08be237 100644 --- a/drivers/net/ethernet/microchip/encx24j600.c +++ b/drivers/net/ethernet/microchip/encx24j600.c @@ -1024,10 +1024,13 @@ static int encx24j600_spi_probe(struct spi_device *spi) priv->speed = SPEED_100; priv->ctx.spi = spi; - devm_regmap_init_encx24j600(&spi->dev, &priv->ctx); ndev->irq = spi->irq; ndev->netdev_ops = &encx24j600_netdev_ops; + ret = devm_regmap_init_encx24j600(&spi->dev, &priv->ctx); + if (ret) + goto out_free; + mutex_init(&priv->lock); /* Reset device and check if it is connected */ diff --git a/drivers/net/ethernet/microchip/encx24j600_hw.h b/drivers/net/ethernet/microchip/encx24j600_hw.h index f604a260ede798b93d9235087cb850250f40de2e..711147a159aa991f39b65268c5a6773c90be3303 100644 --- a/drivers/net/ethernet/microchip/encx24j600_hw.h +++ b/drivers/net/ethernet/microchip/encx24j600_hw.h @@ -15,8 +15,8 @@ struct encx24j600_context { int bank; }; -void devm_regmap_init_encx24j600(struct device *dev, - struct encx24j600_context *ctx); +int devm_regmap_init_encx24j600(struct device *dev, + struct encx24j600_context *ctx); /* Single-byte instructions */ #define BANK_SELECT(bank) (0xC0 | ((bank & (BANK_MASK >> BANK_SHIFT)) << 1)) diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index 8947c3a6281094d60226ae69772dfbf1ecc353a9..481f89d193f77657229029edacb6dfb429672db4 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@ -922,8 +922,7 @@ static int lan743x_phy_reset(struct lan743x_adapter *adapter) } static void lan743x_phy_update_flowcontrol(struct lan743x_adapter *adapter, - u8 duplex, u16 local_adv, - u16 remote_adv) + u16 local_adv, u16 remote_adv) { struct lan743x_phy *phy = &adapter->phy; u8 cap; @@ -951,7 +950,6 @@ static void lan743x_phy_link_status_change(struct net_device *netdev) phy_print_status(phydev); if (phydev->state == PHY_RUNNING) { - struct ethtool_link_ksettings ksettings; int remote_advertisement = 0; int local_advertisement = 0; @@ -988,18 +986,14 @@ static void lan743x_phy_link_status_change(struct net_device *netdev) } lan743x_csr_write(adapter, MAC_CR, data); - memset(&ksettings, 0, sizeof(ksettings)); - phy_ethtool_get_link_ksettings(netdev, &ksettings); local_advertisement = linkmode_adv_to_mii_adv_t(phydev->advertising); remote_advertisement = linkmode_adv_to_mii_adv_t(phydev->lp_advertising); - lan743x_phy_update_flowcontrol(adapter, - ksettings.base.duplex, - local_advertisement, + lan743x_phy_update_flowcontrol(adapter, local_advertisement, remote_advertisement); - lan743x_ptp_update_latency(adapter, ksettings.base.speed); + lan743x_ptp_update_latency(adapter, phydev->speed); } } @@ -1280,7 +1274,7 @@ static void lan743x_tx_release_desc(struct lan743x_tx *tx, if (!(buffer_info->flags & TX_BUFFER_INFO_FLAG_ACTIVE)) goto done; - descriptor_type = (descriptor->data0) & + descriptor_type = le32_to_cpu(descriptor->data0) & TX_DESC_DATA0_DTYPE_MASK_; if (descriptor_type == TX_DESC_DATA0_DTYPE_DATA_) goto clean_up_data_descriptor; @@ -1340,7 +1334,7 @@ static int lan743x_tx_next_index(struct lan743x_tx *tx, int index) static void lan743x_tx_release_completed_descriptors(struct lan743x_tx *tx) { - while ((*tx->head_cpu_ptr) != (tx->last_head)) { + while (le32_to_cpu(*tx->head_cpu_ptr) != (tx->last_head)) { lan743x_tx_release_desc(tx, tx->last_head, false); tx->last_head = lan743x_tx_next_index(tx, tx->last_head); } @@ -1426,10 +1420,10 @@ static int lan743x_tx_frame_start(struct lan743x_tx *tx, if (dma_mapping_error(dev, dma_ptr)) return -ENOMEM; - tx_descriptor->data1 = DMA_ADDR_LOW32(dma_ptr); - tx_descriptor->data2 = DMA_ADDR_HIGH32(dma_ptr); - tx_descriptor->data3 = (frame_length << 16) & - TX_DESC_DATA3_FRAME_LENGTH_MSS_MASK_; + tx_descriptor->data1 = cpu_to_le32(DMA_ADDR_LOW32(dma_ptr)); + tx_descriptor->data2 = cpu_to_le32(DMA_ADDR_HIGH32(dma_ptr)); + tx_descriptor->data3 = cpu_to_le32((frame_length << 16) & + TX_DESC_DATA3_FRAME_LENGTH_MSS_MASK_); buffer_info->skb = NULL; buffer_info->dma_ptr = dma_ptr; @@ -1470,7 +1464,7 @@ static void lan743x_tx_frame_add_lso(struct lan743x_tx *tx, tx->frame_data0 |= TX_DESC_DATA0_IOC_; } tx_descriptor = &tx->ring_cpu_ptr[tx->frame_tail]; - tx_descriptor->data0 = tx->frame_data0; + tx_descriptor->data0 = cpu_to_le32(tx->frame_data0); /* move to next descriptor */ tx->frame_tail = lan743x_tx_next_index(tx, tx->frame_tail); @@ -1514,7 +1508,7 @@ static int lan743x_tx_frame_add_fragment(struct lan743x_tx *tx, /* wrap up previous descriptor */ tx_descriptor = &tx->ring_cpu_ptr[tx->frame_tail]; - tx_descriptor->data0 = tx->frame_data0; + tx_descriptor->data0 = cpu_to_le32(tx->frame_data0); /* move to next descriptor */ tx->frame_tail = lan743x_tx_next_index(tx, tx->frame_tail); @@ -1540,10 +1534,10 @@ static int lan743x_tx_frame_add_fragment(struct lan743x_tx *tx, return -ENOMEM; } - tx_descriptor->data1 = DMA_ADDR_LOW32(dma_ptr); - tx_descriptor->data2 = DMA_ADDR_HIGH32(dma_ptr); - tx_descriptor->data3 = (frame_length << 16) & - TX_DESC_DATA3_FRAME_LENGTH_MSS_MASK_; + tx_descriptor->data1 = cpu_to_le32(DMA_ADDR_LOW32(dma_ptr)); + tx_descriptor->data2 = cpu_to_le32(DMA_ADDR_HIGH32(dma_ptr)); + tx_descriptor->data3 = cpu_to_le32((frame_length << 16) & + TX_DESC_DATA3_FRAME_LENGTH_MSS_MASK_); buffer_info->skb = NULL; buffer_info->dma_ptr = dma_ptr; @@ -1587,7 +1581,7 @@ static void lan743x_tx_frame_end(struct lan743x_tx *tx, if (ignore_sync) buffer_info->flags |= TX_BUFFER_INFO_FLAG_IGNORE_SYNC; - tx_descriptor->data0 = tx->frame_data0; + tx_descriptor->data0 = cpu_to_le32(tx->frame_data0); tx->frame_tail = lan743x_tx_next_index(tx, tx->frame_tail); tx->last_tail = tx->frame_tail; @@ -1770,6 +1764,16 @@ static int lan743x_tx_ring_init(struct lan743x_tx *tx) ret = -EINVAL; goto cleanup; } + if (dma_set_mask_and_coherent(&tx->adapter->pdev->dev, + DMA_BIT_MASK(64))) { + if (dma_set_mask_and_coherent(&tx->adapter->pdev->dev, + DMA_BIT_MASK(32))) { + dev_warn(&tx->adapter->pdev->dev, + "lan743x_: No suitable DMA available\n"); + ret = -ENOMEM; + goto cleanup; + } + } ring_allocation_size = ALIGN(tx->ring_size * sizeof(struct lan743x_tx_descriptor), PAGE_SIZE); @@ -1953,13 +1957,13 @@ static int lan743x_rx_next_index(struct lan743x_rx *rx, int index) return ((++index) % rx->ring_size); } -static struct sk_buff *lan743x_rx_allocate_skb(struct lan743x_rx *rx) +static struct sk_buff *lan743x_rx_allocate_skb(struct lan743x_rx *rx, gfp_t gfp) { int length = 0; length = (LAN743X_MAX_FRAME_SIZE + ETH_HLEN + 4 + RX_HEAD_PADDING); return __netdev_alloc_skb(rx->adapter->netdev, - length, GFP_ATOMIC | GFP_DMA); + length, gfp); } static void lan743x_rx_update_tail(struct lan743x_rx *rx, int index) @@ -1994,11 +1998,11 @@ static int lan743x_rx_init_ring_element(struct lan743x_rx *rx, int index, } buffer_info->buffer_length = length; - descriptor->data1 = DMA_ADDR_LOW32(buffer_info->dma_ptr); - descriptor->data2 = DMA_ADDR_HIGH32(buffer_info->dma_ptr); + descriptor->data1 = cpu_to_le32(DMA_ADDR_LOW32(buffer_info->dma_ptr)); + descriptor->data2 = cpu_to_le32(DMA_ADDR_HIGH32(buffer_info->dma_ptr)); descriptor->data3 = 0; - descriptor->data0 = (RX_DESC_DATA0_OWN_ | - (length & RX_DESC_DATA0_BUF_LENGTH_MASK_)); + descriptor->data0 = cpu_to_le32((RX_DESC_DATA0_OWN_ | + (length & RX_DESC_DATA0_BUF_LENGTH_MASK_))); skb_reserve(buffer_info->skb, RX_HEAD_PADDING); lan743x_rx_update_tail(rx, index); @@ -2013,12 +2017,12 @@ static void lan743x_rx_reuse_ring_element(struct lan743x_rx *rx, int index) descriptor = &rx->ring_cpu_ptr[index]; buffer_info = &rx->buffer_info[index]; - descriptor->data1 = DMA_ADDR_LOW32(buffer_info->dma_ptr); - descriptor->data2 = DMA_ADDR_HIGH32(buffer_info->dma_ptr); + descriptor->data1 = cpu_to_le32(DMA_ADDR_LOW32(buffer_info->dma_ptr)); + descriptor->data2 = cpu_to_le32(DMA_ADDR_HIGH32(buffer_info->dma_ptr)); descriptor->data3 = 0; - descriptor->data0 = (RX_DESC_DATA0_OWN_ | + descriptor->data0 = cpu_to_le32((RX_DESC_DATA0_OWN_ | ((buffer_info->buffer_length) & - RX_DESC_DATA0_BUF_LENGTH_MASK_)); + RX_DESC_DATA0_BUF_LENGTH_MASK_))); lan743x_rx_update_tail(rx, index); } @@ -2052,7 +2056,7 @@ static int lan743x_rx_process_packet(struct lan743x_rx *rx) { struct skb_shared_hwtstamps *hwtstamps = NULL; int result = RX_PROCESS_RESULT_NOTHING_TO_DO; - int current_head_index = *rx->head_cpu_ptr; + int current_head_index = le32_to_cpu(*rx->head_cpu_ptr); struct lan743x_rx_buffer_info *buffer_info; struct lan743x_rx_descriptor *descriptor; int extension_index = -1; @@ -2067,14 +2071,14 @@ static int lan743x_rx_process_packet(struct lan743x_rx *rx) if (rx->last_head != current_head_index) { descriptor = &rx->ring_cpu_ptr[rx->last_head]; - if (descriptor->data0 & RX_DESC_DATA0_OWN_) + if (le32_to_cpu(descriptor->data0) & RX_DESC_DATA0_OWN_) goto done; - if (!(descriptor->data0 & RX_DESC_DATA0_FS_)) + if (!(le32_to_cpu(descriptor->data0) & RX_DESC_DATA0_FS_)) goto done; first_index = rx->last_head; - if (descriptor->data0 & RX_DESC_DATA0_LS_) { + if (le32_to_cpu(descriptor->data0) & RX_DESC_DATA0_LS_) { last_index = rx->last_head; } else { int index; @@ -2082,10 +2086,10 @@ static int lan743x_rx_process_packet(struct lan743x_rx *rx) index = lan743x_rx_next_index(rx, first_index); while (index != current_head_index) { descriptor = &rx->ring_cpu_ptr[index]; - if (descriptor->data0 & RX_DESC_DATA0_OWN_) + if (le32_to_cpu(descriptor->data0) & RX_DESC_DATA0_OWN_) goto done; - if (descriptor->data0 & RX_DESC_DATA0_LS_) { + if (le32_to_cpu(descriptor->data0) & RX_DESC_DATA0_LS_) { last_index = index; break; } @@ -2094,17 +2098,17 @@ static int lan743x_rx_process_packet(struct lan743x_rx *rx) } if (last_index >= 0) { descriptor = &rx->ring_cpu_ptr[last_index]; - if (descriptor->data0 & RX_DESC_DATA0_EXT_) { + if (le32_to_cpu(descriptor->data0) & RX_DESC_DATA0_EXT_) { /* extension is expected to follow */ int index = lan743x_rx_next_index(rx, last_index); if (index != current_head_index) { descriptor = &rx->ring_cpu_ptr[index]; - if (descriptor->data0 & + if (le32_to_cpu(descriptor->data0) & RX_DESC_DATA0_OWN_) { goto done; } - if (descriptor->data0 & + if (le32_to_cpu(descriptor->data0) & RX_DESC_DATA0_EXT_) { extension_index = index; } else { @@ -2131,7 +2135,8 @@ static int lan743x_rx_process_packet(struct lan743x_rx *rx) struct sk_buff *new_skb = NULL; int packet_length; - new_skb = lan743x_rx_allocate_skb(rx); + new_skb = lan743x_rx_allocate_skb(rx, + GFP_ATOMIC | GFP_DMA); if (!new_skb) { /* failed to allocate next skb. * Memory is very low. @@ -2156,7 +2161,7 @@ static int lan743x_rx_process_packet(struct lan743x_rx *rx) } buffer_info->skb = NULL; packet_length = RX_DESC_DATA0_FRAME_LENGTH_GET_ - (descriptor->data0); + (le32_to_cpu(descriptor->data0)); skb_put(skb, packet_length - 4); skb->protocol = eth_type_trans(skb, rx->adapter->netdev); @@ -2194,8 +2199,8 @@ process_extension: descriptor = &rx->ring_cpu_ptr[extension_index]; buffer_info = &rx->buffer_info[extension_index]; - ts_sec = descriptor->data1; - ts_nsec = (descriptor->data2 & + ts_sec = le32_to_cpu(descriptor->data1); + ts_nsec = (le32_to_cpu(descriptor->data2) & RX_DESC_DATA2_TS_NS_MASK_); lan743x_rx_reuse_ring_element(rx, extension_index); real_last_index = extension_index; @@ -2318,6 +2323,16 @@ static int lan743x_rx_ring_init(struct lan743x_rx *rx) ret = -EINVAL; goto cleanup; } + if (dma_set_mask_and_coherent(&rx->adapter->pdev->dev, + DMA_BIT_MASK(64))) { + if (dma_set_mask_and_coherent(&rx->adapter->pdev->dev, + DMA_BIT_MASK(32))) { + dev_warn(&rx->adapter->pdev->dev, + "lan743x_: No suitable DMA available\n"); + ret = -ENOMEM; + goto cleanup; + } + } ring_allocation_size = ALIGN(rx->ring_size * sizeof(struct lan743x_rx_descriptor), PAGE_SIZE); @@ -2357,7 +2372,8 @@ static int lan743x_rx_ring_init(struct lan743x_rx *rx) rx->last_head = 0; for (index = 0; index < rx->ring_size; index++) { - struct sk_buff *new_skb = lan743x_rx_allocate_skb(rx); + struct sk_buff *new_skb = lan743x_rx_allocate_skb(rx, + GFP_KERNEL); ret = lan743x_rx_init_ring_element(rx, index, new_skb); if (ret) @@ -3066,6 +3082,8 @@ static int lan743x_pm_resume(struct device *dev) if (ret) { netif_err(adapter, probe, adapter->netdev, "lan743x_hardware_init returned %d\n", ret); + lan743x_pci_cleanup(adapter); + return ret; } /* open netdev when netdev is at running state while resume. diff --git a/drivers/net/ethernet/microchip/lan743x_main.h b/drivers/net/ethernet/microchip/lan743x_main.h index a536f4a4994dfa98e46cc3ad13e15bb6db5e71ac..751f2bc9ce84ecfacab33659c98e981b0b8bf32e 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.h +++ b/drivers/net/ethernet/microchip/lan743x_main.h @@ -660,7 +660,7 @@ struct lan743x_tx { struct lan743x_tx_buffer_info *buffer_info; - u32 *head_cpu_ptr; + __le32 *head_cpu_ptr; dma_addr_t head_dma_ptr; int last_head; int last_tail; @@ -690,7 +690,7 @@ struct lan743x_rx { struct lan743x_rx_buffer_info *buffer_info; - u32 *head_cpu_ptr; + __le32 *head_cpu_ptr; dma_addr_t head_dma_ptr; u32 last_head; u32 last_tail; @@ -775,10 +775,10 @@ struct lan743x_adapter { #define TX_DESC_DATA3_FRAME_LENGTH_MSS_MASK_ (0x3FFF0000) struct lan743x_tx_descriptor { - u32 data0; - u32 data1; - u32 data2; - u32 data3; + __le32 data0; + __le32 data1; + __le32 data2; + __le32 data3; } __aligned(DEFAULT_DMA_DESCRIPTOR_SPACING); #define TX_BUFFER_INFO_FLAG_ACTIVE BIT(0) @@ -813,10 +813,10 @@ struct lan743x_tx_buffer_info { #define RX_HEAD_PADDING NET_IP_ALIGN struct lan743x_rx_descriptor { - u32 data0; - u32 data1; - u32 data2; - u32 data3; + __le32 data0; + __le32 data1; + __le32 data2; + __le32 data3; } __aligned(DEFAULT_DMA_DESCRIPTOR_SPACING); #define RX_BUFFER_INFO_FLAG_ACTIVE BIT(0) diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 5bfc7acfd13a994675269ce08006b1cd6119b33a..a06466ecca12a90055016c3e1f6adafbea314f42 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -594,12 +594,12 @@ void ocelot_get_txtstamp(struct ocelot *ocelot) spin_unlock_irqrestore(&port->tx_skbs.lock, flags); + if (WARN_ON(!skb_match)) + continue; + /* Get the h/w timestamp */ ocelot_get_hwtimestamp(ocelot, &ts); - if (unlikely(!skb_match)) - continue; - /* Set the timestamp into the skb */ memset(&shhwtstamps, 0, sizeof(shhwtstamps)); shhwtstamps.hwtstamp = ktime_set(ts.tv_sec, ts.tv_nsec); @@ -811,12 +811,6 @@ int ocelot_hwstamp_set(struct ocelot *ocelot, int port, struct ifreq *ifr) switch (cfg.rx_filter) { case HWTSTAMP_FILTER_NONE: break; - case HWTSTAMP_FILTER_ALL: - case HWTSTAMP_FILTER_SOME: - case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: - case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: - case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: - case HWTSTAMP_FILTER_NTP_ALL: case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: @@ -854,12 +848,11 @@ void ocelot_get_strings(struct ocelot *ocelot, int port, u32 sset, u8 *data) } EXPORT_SYMBOL(ocelot_get_strings); +/* Caller must hold &ocelot->stats_lock */ static void ocelot_update_stats(struct ocelot *ocelot) { int i, j; - mutex_lock(&ocelot->stats_lock); - for (i = 0; i < ocelot->num_phys_ports; i++) { /* Configure the port to read the stats from */ ocelot_write(ocelot, SYS_STAT_CFG_STAT_VIEW(i), SYS_STAT_CFG); @@ -878,8 +871,6 @@ static void ocelot_update_stats(struct ocelot *ocelot) ~(u64)U32_MAX) + val; } } - - mutex_unlock(&ocelot->stats_lock); } static void ocelot_check_stats_work(struct work_struct *work) @@ -888,7 +879,9 @@ static void ocelot_check_stats_work(struct work_struct *work) struct ocelot *ocelot = container_of(del_work, struct ocelot, stats_work); + mutex_lock(&ocelot->stats_lock); ocelot_update_stats(ocelot); + mutex_unlock(&ocelot->stats_lock); queue_delayed_work(ocelot->stats_queue, &ocelot->stats_work, OCELOT_STATS_CHECK_DELAY); @@ -898,12 +891,16 @@ void ocelot_get_ethtool_stats(struct ocelot *ocelot, int port, u64 *data) { int i; + mutex_lock(&ocelot->stats_lock); + /* check and update now */ ocelot_update_stats(ocelot); /* Copy all counters */ for (i = 0; i < ocelot->num_stats; i++) *data++ = ocelot->stats[port * ocelot->num_stats + i]; + + mutex_unlock(&ocelot->stats_lock); } EXPORT_SYMBOL(ocelot_get_ethtool_stats); @@ -935,7 +932,10 @@ int ocelot_get_ts_info(struct ocelot *ocelot, int port, SOF_TIMESTAMPING_RAW_HARDWARE; info->tx_types = BIT(HWTSTAMP_TX_OFF) | BIT(HWTSTAMP_TX_ON) | BIT(HWTSTAMP_TX_ONESTEP_SYNC); - info->rx_filters = BIT(HWTSTAMP_FILTER_NONE) | BIT(HWTSTAMP_FILTER_ALL); + info->rx_filters = BIT(HWTSTAMP_FILTER_NONE) | + BIT(HWTSTAMP_FILTER_PTP_V2_EVENT) | + BIT(HWTSTAMP_FILTER_PTP_V2_L2_EVENT) | + BIT(HWTSTAMP_FILTER_PTP_V2_L4_EVENT); return 0; } diff --git a/drivers/net/ethernet/mscc/ocelot_flower.c b/drivers/net/ethernet/mscc/ocelot_flower.c index 3655503352928d9a7d368003ad530df20c3b1015..217e8333de6c66154cbe0d6b0af078b3200083ac 100644 --- a/drivers/net/ethernet/mscc/ocelot_flower.c +++ b/drivers/net/ethernet/mscc/ocelot_flower.c @@ -462,13 +462,6 @@ ocelot_flower_parse_key(struct ocelot *ocelot, int port, bool ingress, return -EOPNOTSUPP; } - if (filter->block_id == VCAP_IS1 && - !is_zero_ether_addr(match.mask->dst)) { - NL_SET_ERR_MSG_MOD(extack, - "Key type S1_NORMAL cannot match on destination MAC"); - return -EOPNOTSUPP; - } - /* The hw support mac matches only for MAC_ETYPE key, * therefore if other matches(port, tcp flags, etc) are added * then just bail out @@ -483,6 +476,14 @@ ocelot_flower_parse_key(struct ocelot *ocelot, int port, bool ingress, return -EOPNOTSUPP; flow_rule_match_eth_addrs(rule, &match); + + if (filter->block_id == VCAP_IS1 && + !is_zero_ether_addr(match.mask->dst)) { + NL_SET_ERR_MSG_MOD(extack, + "Key type S1_NORMAL cannot match on destination MAC"); + return -EOPNOTSUPP; + } + filter->key_type = OCELOT_VCAP_KEY_ETYPE; ether_addr_copy(filter->key.etype.dmac.value, match.key->dst); diff --git a/drivers/net/ethernet/natsemi/xtsonic.c b/drivers/net/ethernet/natsemi/xtsonic.c index 28d9e98db81a8bbd82a4fdd0d19c524a00b95332..33f0014b3c4bb788941dff95de6d12f46b010148 100644 --- a/drivers/net/ethernet/natsemi/xtsonic.c +++ b/drivers/net/ethernet/natsemi/xtsonic.c @@ -120,7 +120,7 @@ static const struct net_device_ops xtsonic_netdev_ops = { .ndo_set_mac_address = eth_mac_addr, }; -static int __init sonic_probe1(struct net_device *dev) +static int sonic_probe1(struct net_device *dev) { unsigned int silicon_revision; struct sonic_local *lp = netdev_priv(dev); diff --git a/drivers/net/ethernet/neterion/s2io.c b/drivers/net/ethernet/neterion/s2io.c index d13d92bf7447885434f5b13c750f80dee7c67e84..3cae8449fadb7a6fe2753a791b6115e9056655b4 100644 --- a/drivers/net/ethernet/neterion/s2io.c +++ b/drivers/net/ethernet/neterion/s2io.c @@ -8555,7 +8555,7 @@ static void s2io_io_resume(struct pci_dev *pdev) return; } - if (s2io_set_mac_addr(netdev, netdev->dev_addr) == FAILURE) { + if (do_s2io_prog_unicast(netdev, netdev->dev_addr) == FAILURE) { s2io_card_down(sp); pr_err("Can't restore mac addr after reset.\n"); return; diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c index 11c83a99b0140dfb9c11e8246336d6aad0fe8278..f469950c726573fd1f3a40a162b9f0f03a3bc6a5 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c @@ -182,15 +182,21 @@ static int nfp_bpf_check_mtu(struct nfp_app *app, struct net_device *netdev, int new_mtu) { struct nfp_net *nn = netdev_priv(netdev); - unsigned int max_mtu; + struct nfp_bpf_vnic *bv; + struct bpf_prog *prog; if (~nn->dp.ctrl & NFP_NET_CFG_CTRL_BPF) return 0; - max_mtu = nn_readb(nn, NFP_NET_CFG_BPF_INL_MTU) * 64 - 32; - if (new_mtu > max_mtu) { - nn_info(nn, "BPF offload active, MTU over %u not supported\n", - max_mtu); + if (nn->xdp_hw.prog) { + prog = nn->xdp_hw.prog; + } else { + bv = nn->app_priv; + prog = bv->tc_prog; + } + + if (nfp_bpf_offload_check_mtu(nn, prog, new_mtu)) { + nn_info(nn, "BPF offload active, potential packet access beyond hardware packet boundary"); return -EBUSY; } return 0; diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h index fac9c6f9e197b44882309b08522187ee0f5eba74..c74620fcc539c211019d7b0b63c60be973f912f1 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.h +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h @@ -560,6 +560,8 @@ bool nfp_is_subprog_start(struct nfp_insn_meta *meta); void nfp_bpf_jit_prepare(struct nfp_prog *nfp_prog); int nfp_bpf_jit(struct nfp_prog *prog); bool nfp_bpf_supported_opcode(u8 code); +bool nfp_bpf_offload_check_mtu(struct nfp_net *nn, struct bpf_prog *prog, + unsigned int mtu); int nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx); diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c index 53851853562c63221afca3e0f2aa8be2aa40ec48..9d97cd281f18e4d055d8b95233b8e4566f051a7b 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c @@ -481,19 +481,28 @@ int nfp_bpf_event_output(struct nfp_app_bpf *bpf, const void *data, return 0; } +bool nfp_bpf_offload_check_mtu(struct nfp_net *nn, struct bpf_prog *prog, + unsigned int mtu) +{ + unsigned int fw_mtu, pkt_off; + + fw_mtu = nn_readb(nn, NFP_NET_CFG_BPF_INL_MTU) * 64 - 32; + pkt_off = min(prog->aux->max_pkt_offset, mtu); + + return fw_mtu < pkt_off; +} + static int nfp_net_bpf_load(struct nfp_net *nn, struct bpf_prog *prog, struct netlink_ext_ack *extack) { struct nfp_prog *nfp_prog = prog->aux->offload->dev_priv; - unsigned int fw_mtu, pkt_off, max_stack, max_prog_len; + unsigned int max_stack, max_prog_len; dma_addr_t dma_addr; void *img; int err; - fw_mtu = nn_readb(nn, NFP_NET_CFG_BPF_INL_MTU) * 64 - 32; - pkt_off = min(prog->aux->max_pkt_offset, nn->dp.netdev->mtu); - if (fw_mtu < pkt_off) { + if (nfp_bpf_offload_check_mtu(nn, prog, nn->dp.netdev->mtu)) { NL_SET_ERR_MSG_MOD(extack, "BPF offload not supported with potential packet access beyond HW packet split boundary"); return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.c b/drivers/net/ethernet/netronome/nfp/flower/main.c index c029950a81e202230ea8b8b4e427bf018643c258..ac1dcfa1d17908fae345db8555de8919fb40db14 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/main.c +++ b/drivers/net/ethernet/netronome/nfp/flower/main.c @@ -830,10 +830,6 @@ static int nfp_flower_init(struct nfp_app *app) if (err) goto err_cleanup; - err = flow_indr_dev_register(nfp_flower_indr_setup_tc_cb, app); - if (err) - goto err_cleanup; - if (app_priv->flower_ext_feats & NFP_FL_FEATS_VF_RLIM) nfp_flower_qos_init(app); @@ -942,7 +938,20 @@ static int nfp_flower_start(struct nfp_app *app) return err; } - return nfp_tunnel_config_start(app); + err = flow_indr_dev_register(nfp_flower_indr_setup_tc_cb, app); + if (err) + return err; + + err = nfp_tunnel_config_start(app); + if (err) + goto err_tunnel_config; + + return 0; + +err_tunnel_config: + flow_indr_dev_unregister(nfp_flower_indr_setup_tc_cb, app, + nfp_flower_setup_indr_tc_release); + return err; } static void nfp_flower_stop(struct nfp_app *app) diff --git a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c index d19c02e991145bed556f3500c6007e253057881e..088ceac07b80556be5b85ea5b1ad6ccb4f05391a 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c +++ b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c @@ -922,8 +922,8 @@ nfp_tunnel_add_shared_mac(struct nfp_app *app, struct net_device *netdev, int port, bool mod) { struct nfp_flower_priv *priv = app->priv; - int ida_idx = NFP_MAX_MAC_INDEX, err; struct nfp_tun_offloaded_mac *entry; + int ida_idx = -1, err; u16 nfp_mac_idx = 0; entry = nfp_tunnel_lookup_offloaded_macs(app, netdev->dev_addr); @@ -997,7 +997,7 @@ err_remove_hash: err_free_entry: kfree(entry); err_free_ida: - if (ida_idx != NFP_MAX_MAC_INDEX) + if (ida_idx != -1) ida_simple_remove(&priv->tun.mac_off_ids, ida_idx); return err; @@ -1011,6 +1011,7 @@ nfp_tunnel_del_shared_mac(struct nfp_app *app, struct net_device *netdev, struct nfp_flower_repr_priv *repr_priv; struct nfp_tun_offloaded_mac *entry; struct nfp_repr *repr; + u16 nfp_mac_idx; int ida_idx; entry = nfp_tunnel_lookup_offloaded_macs(app, mac); @@ -1029,8 +1030,6 @@ nfp_tunnel_del_shared_mac(struct nfp_app *app, struct net_device *netdev, entry->bridge_count--; if (!entry->bridge_count && entry->ref_count) { - u16 nfp_mac_idx; - nfp_mac_idx = entry->index & ~NFP_TUN_PRE_TUN_IDX_BIT; if (__nfp_tunnel_offload_mac(app, mac, nfp_mac_idx, false)) { @@ -1046,7 +1045,6 @@ nfp_tunnel_del_shared_mac(struct nfp_app *app, struct net_device *netdev, /* If MAC is now used by 1 repr set the offloaded MAC index to port. */ if (entry->ref_count == 1 && list_is_singular(&entry->repr_list)) { - u16 nfp_mac_idx; int port, err; repr_priv = list_first_entry(&entry->repr_list, @@ -1074,8 +1072,14 @@ nfp_tunnel_del_shared_mac(struct nfp_app *app, struct net_device *netdev, WARN_ON_ONCE(rhashtable_remove_fast(&priv->tun.offloaded_macs, &entry->ht_node, offloaded_macs_params)); + + if (nfp_flower_is_supported_bridge(netdev)) + nfp_mac_idx = entry->index & ~NFP_TUN_PRE_TUN_IDX_BIT; + else + nfp_mac_idx = entry->index; + /* If MAC has global ID then extract and free the ida entry. */ - if (nfp_tunnel_is_mac_idx_global(entry->index)) { + if (nfp_tunnel_is_mac_idx_global(nfp_mac_idx)) { ida_idx = nfp_tunnel_get_ida_from_global_mac_idx(entry->index); ida_simple_remove(&priv->tun.mac_off_ids, ida_idx); } diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h index df5b748be068c891770c11cc44618be2aee74983..cc2ce452000a3b132e46062f7b5d6ba2e3cc7cfb 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h @@ -557,7 +557,6 @@ struct nfp_net_dp { * @exn_name: Name for Exception interrupt * @shared_handler: Handler for shared interrupts * @shared_name: Name for shared interrupt - * @me_freq_mhz: ME clock_freq (MHz) * @reconfig_lock: Protects @reconfig_posted, @reconfig_timer_active, * @reconfig_sync_present and HW reconfiguration request * regs/machinery from async requests (sync must take @@ -640,8 +639,6 @@ struct nfp_net { irq_handler_t shared_handler; char shared_name[IFNAMSIZ + 8]; - u32 me_freq_mhz; - bool link_up; spinlock_t link_status_lock; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c index c036a1d0f8de6fe2d4c0003f473bda0a77f9b89a..cd0c9623f7dd20733436ecc3b3ae40e8bafbe94e 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c @@ -1347,7 +1347,7 @@ static int nfp_net_set_coalesce(struct net_device *netdev, * ME timestamp ticks. There are 16 ME clock cycles for each timestamp * count. */ - factor = nn->me_freq_mhz / 16; + factor = nn->tlv_caps.me_freq_mhz / 16; /* Each pair of (usecs, max_frames) fields specifies that interrupts * should be coalesced until diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c index 94994a939277b6f78790bf3b3a999eea3a72d3b7..6ef48eb3a77d43e4268feb536968841e8cb72cbe 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c @@ -803,8 +803,10 @@ int nfp_cpp_area_cache_add(struct nfp_cpp *cpp, size_t size) return -ENOMEM; cache = kzalloc(sizeof(*cache), GFP_KERNEL); - if (!cache) + if (!cache) { + nfp_cpp_area_free(area); return -ENOMEM; + } cache->id = 0; cache->addr = 0; diff --git a/drivers/net/ethernet/nxp/lpc_eth.c b/drivers/net/ethernet/nxp/lpc_eth.c index d3cbb4215f5cf022186bbeb03abddef24620d383..a9a9bf2e065a5635880392e0c4099eed982bb381 100644 --- a/drivers/net/ethernet/nxp/lpc_eth.c +++ b/drivers/net/ethernet/nxp/lpc_eth.c @@ -1015,9 +1015,6 @@ static int lpc_eth_close(struct net_device *ndev) napi_disable(&pldat->napi); netif_stop_queue(ndev); - if (ndev->phydev) - phy_stop(ndev->phydev); - spin_lock_irqsave(&pldat->lock, flags); __lpc_eth_reset(pldat); netif_carrier_off(ndev); @@ -1025,6 +1022,8 @@ static int lpc_eth_close(struct net_device *ndev) writel(0, LPC_ENET_MAC2(pldat->net_base)); spin_unlock_irqrestore(&pldat->lock, flags); + if (ndev->phydev) + phy_stop(ndev->phydev); clk_disable_unprepare(pldat->clk); return 0; @@ -1469,6 +1468,7 @@ static int lpc_eth_drv_resume(struct platform_device *pdev) { struct net_device *ndev = platform_get_drvdata(pdev); struct netdata_local *pldat; + int ret; if (device_may_wakeup(&pdev->dev)) disable_irq_wake(ndev->irq); @@ -1478,7 +1478,9 @@ static int lpc_eth_drv_resume(struct platform_device *pdev) pldat = netdev_priv(ndev); /* Enable interface clock */ - clk_enable(pldat->clk); + ret = clk_enable(pldat->clk); + if (ret) + return ret; /* Reset and initialize */ __lpc_eth_reset(pldat); diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index 6dc7ce649448858d7e08a15bcede5249cd297cfa..e95c09dc2c30d2911888410621897edc9ee8f93f 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -1096,6 +1096,10 @@ static int ionic_ndo_addr_add(struct net_device *netdev, const u8 *addr) static int ionic_addr_del(struct net_device *netdev, const u8 *addr) { + /* Don't delete our own address from the uc list */ + if (ether_addr_equal(addr, netdev->dev_addr)) + return 0; + return ionic_lif_addr(netdev_priv(netdev), addr, false, true); } @@ -2832,7 +2836,7 @@ int ionic_lif_init(struct ionic_lif *lif) return -EINVAL; } - lif->dbid_inuse = bitmap_alloc(lif->dbid_count, GFP_KERNEL); + lif->dbid_inuse = bitmap_zalloc(lif->dbid_count, GFP_KERNEL); if (!lif->dbid_inuse) { dev_err(dev, "Failed alloc doorbell id bitmap, aborting\n"); return -ENOMEM; diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index 6bb9ec98a12b53b954cd5003e2c1024400138538..41bc31e3f935613595c5792498a1330a2e21442d 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -1295,6 +1295,7 @@ static int qed_slowpath_start(struct qed_dev *cdev, } else { DP_NOTICE(cdev, "Failed to acquire PTT for aRFS\n"); + rc = -EINVAL; goto err; } } diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c index b8dc5c4591ef52e212776aeece736303092c4ff6..ef0ad4cf82e6060c9c6ccd07ccc7e27bd9a0bf2c 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c +++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c @@ -3778,11 +3778,11 @@ bool qed_iov_mark_vf_flr(struct qed_hwfn *p_hwfn, u32 *p_disabled_vfs) return found; } -static void qed_iov_get_link(struct qed_hwfn *p_hwfn, - u16 vfid, - struct qed_mcp_link_params *p_params, - struct qed_mcp_link_state *p_link, - struct qed_mcp_link_capabilities *p_caps) +static int qed_iov_get_link(struct qed_hwfn *p_hwfn, + u16 vfid, + struct qed_mcp_link_params *p_params, + struct qed_mcp_link_state *p_link, + struct qed_mcp_link_capabilities *p_caps) { struct qed_vf_info *p_vf = qed_iov_get_vf_info(p_hwfn, vfid, @@ -3790,7 +3790,7 @@ static void qed_iov_get_link(struct qed_hwfn *p_hwfn, struct qed_bulletin_content *p_bulletin; if (!p_vf) - return; + return -EINVAL; p_bulletin = p_vf->bulletin.p_virt; @@ -3800,6 +3800,7 @@ static void qed_iov_get_link(struct qed_hwfn *p_hwfn, __qed_vf_get_link_state(p_hwfn, p_link, p_bulletin); if (p_caps) __qed_vf_get_link_caps(p_hwfn, p_caps, p_bulletin); + return 0; } static int @@ -4658,6 +4659,7 @@ static int qed_get_vf_config(struct qed_dev *cdev, struct qed_public_vf_info *vf_info; struct qed_mcp_link_state link; u32 tx_rate; + int ret; /* Sanitize request */ if (IS_VF(cdev)) @@ -4671,7 +4673,9 @@ static int qed_get_vf_config(struct qed_dev *cdev, vf_info = qed_iov_get_public_vf_info(hwfn, vf_id, true); - qed_iov_get_link(hwfn, vf_id, NULL, &link, NULL); + ret = qed_iov_get_link(hwfn, vf_id, NULL, &link, NULL); + if (ret) + return ret; /* Fill information about VF */ ivi->vf = vf_id; diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.c b/drivers/net/ethernet/qlogic/qed/qed_vf.c index 72a38d53d33f68c497c3f18b14c7874c6da713f4..e2a5a6a373cbe613ad06ef5d94196efc8174f5c7 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_vf.c +++ b/drivers/net/ethernet/qlogic/qed/qed_vf.c @@ -513,6 +513,9 @@ int qed_vf_hw_prepare(struct qed_hwfn *p_hwfn) p_iov->bulletin.size, &p_iov->bulletin.phys, GFP_KERNEL); + if (!p_iov->bulletin.p_virt) + goto free_pf2vf_reply; + DP_VERBOSE(p_hwfn, QED_MSG_IOV, "VF's bulletin Board [%p virt 0x%llx phys 0x%08x bytes]\n", p_iov->bulletin.p_virt, @@ -552,6 +555,10 @@ int qed_vf_hw_prepare(struct qed_hwfn *p_hwfn) return rc; +free_pf2vf_reply: + dma_free_coherent(&p_hwfn->cdev->pdev->dev, + sizeof(union pfvf_tlvs), + p_iov->pf2vf_reply, p_iov->pf2vf_reply_phys); free_vf2pf_request: dma_free_coherent(&p_hwfn->cdev->pdev->dev, sizeof(union vfpf_tlvs), diff --git a/drivers/net/ethernet/qlogic/qede/qede_fp.c b/drivers/net/ethernet/qlogic/qede/qede_fp.c index ca0ee29a57b50ac4fd15d19be46ab6de0766a311..21c906200e791a538c6be100d6ecc7a112cce1c1 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_fp.c +++ b/drivers/net/ethernet/qlogic/qede/qede_fp.c @@ -1659,6 +1659,13 @@ netdev_tx_t qede_start_xmit(struct sk_buff *skb, struct net_device *ndev) data_split = true; } } else { + if (unlikely(skb->len > ETH_TX_MAX_NON_LSO_PKT_LEN)) { + DP_ERR(edev, "Unexpected non LSO skb length = 0x%x\n", skb->len); + qede_free_failed_tx_pkt(txq, first_bd, 0, false); + qede_update_tx_producer(txq); + return NETDEV_TX_OK; + } + val |= ((skb->len & ETH_TX_DATA_1ST_BD_PKT_LEN_MASK) << ETH_TX_DATA_1ST_BD_PKT_LEN_SHIFT); } diff --git a/drivers/net/ethernet/qlogic/qla3xxx.c b/drivers/net/ethernet/qlogic/qla3xxx.c index c7923e22a4c426b1af33974abbc7585fe5124c70..c9f32fc50254fb86ecee9311edd8f08598116654 100644 --- a/drivers/net/ethernet/qlogic/qla3xxx.c +++ b/drivers/net/ethernet/qlogic/qla3xxx.c @@ -3494,20 +3494,19 @@ static int ql_adapter_up(struct ql3_adapter *qdev) spin_lock_irqsave(&qdev->hw_lock, hw_flags); - err = ql_wait_for_drvr_lock(qdev); - if (err) { - err = ql_adapter_initialize(qdev); - if (err) { - netdev_err(ndev, "Unable to initialize adapter\n"); - goto err_init; - } - netdev_err(ndev, "Releasing driver lock\n"); - ql_sem_unlock(qdev, QL_DRVR_SEM_MASK); - } else { + if (!ql_wait_for_drvr_lock(qdev)) { netdev_err(ndev, "Could not acquire driver lock\n"); + err = -ENODEV; goto err_lock; } + err = ql_adapter_initialize(qdev); + if (err) { + netdev_err(ndev, "Unable to initialize adapter\n"); + goto err_init; + } + ql_sem_unlock(qdev, QL_DRVR_SEM_MASK); + spin_unlock_irqrestore(&qdev->hw_lock, hw_flags); set_bit(QL_ADAPTER_UP, &qdev->flags); diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c index d51bac7ba5afadca6df37a8761838432fba08b1f..bd06076803295fb5a6a0946db8be6bf4ad901076 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c @@ -1077,8 +1077,14 @@ static int qlcnic_83xx_add_rings(struct qlcnic_adapter *adapter) sds_mbx_size = sizeof(struct qlcnic_sds_mbx); context_id = recv_ctx->context_id; num_sds = adapter->drv_sds_rings - QLCNIC_MAX_SDS_RINGS; - ahw->hw_ops->alloc_mbx_args(&cmd, adapter, - QLCNIC_CMD_ADD_RCV_RINGS); + err = ahw->hw_ops->alloc_mbx_args(&cmd, adapter, + QLCNIC_CMD_ADD_RCV_RINGS); + if (err) { + dev_err(&adapter->pdev->dev, + "Failed to alloc mbx args %d\n", err); + return err; + } + cmd.req.arg[1] = 0 | (num_sds << 8) | (context_id << 16); /* set up status rings, mbx 2-81 */ diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h index 7160b42f51ddd0ab11bfd8c94f77d506efd8efca..d0111cb3b40e1c5f1e0804e7d8b5dddcbc863541 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h @@ -201,7 +201,7 @@ int qlcnic_sriov_get_vf_vport_info(struct qlcnic_adapter *, struct qlcnic_info *, u16); int qlcnic_sriov_cfg_vf_guest_vlan(struct qlcnic_adapter *, u16, u8); void qlcnic_sriov_free_vlans(struct qlcnic_adapter *); -void qlcnic_sriov_alloc_vlans(struct qlcnic_adapter *); +int qlcnic_sriov_alloc_vlans(struct qlcnic_adapter *); bool qlcnic_sriov_check_any_vlan(struct qlcnic_vf_info *); void qlcnic_sriov_del_vlan_id(struct qlcnic_sriov *, struct qlcnic_vf_info *, u16); diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c index 30e52f9697593fa178de62e87010675bf8644744..8367891bfb13926a543ae5500765a4bf3455d773 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c @@ -432,7 +432,7 @@ static int qlcnic_sriov_set_guest_vlan_mode(struct qlcnic_adapter *adapter, struct qlcnic_cmd_args *cmd) { struct qlcnic_sriov *sriov = adapter->ahw->sriov; - int i, num_vlans; + int i, num_vlans, ret; u16 *vlans; if (sriov->allowed_vlans) @@ -443,7 +443,9 @@ static int qlcnic_sriov_set_guest_vlan_mode(struct qlcnic_adapter *adapter, dev_info(&adapter->pdev->dev, "Number of allowed Guest VLANs = %d\n", sriov->num_allowed_vlans); - qlcnic_sriov_alloc_vlans(adapter); + ret = qlcnic_sriov_alloc_vlans(adapter); + if (ret) + return ret; if (!sriov->any_vlan) return 0; @@ -2159,7 +2161,7 @@ static int qlcnic_sriov_vf_resume(struct qlcnic_adapter *adapter) return err; } -void qlcnic_sriov_alloc_vlans(struct qlcnic_adapter *adapter) +int qlcnic_sriov_alloc_vlans(struct qlcnic_adapter *adapter) { struct qlcnic_sriov *sriov = adapter->ahw->sriov; struct qlcnic_vf_info *vf; @@ -2169,7 +2171,11 @@ void qlcnic_sriov_alloc_vlans(struct qlcnic_adapter *adapter) vf = &sriov->vf_info[i]; vf->sriov_vlans = kcalloc(sriov->num_allowed_vlans, sizeof(*vf->sriov_vlans), GFP_KERNEL); + if (!vf->sriov_vlans) + return -ENOMEM; } + + return 0; } void qlcnic_sriov_free_vlans(struct qlcnic_adapter *adapter) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c index 447720b93e5ab3b04622429d0eb3d20b4f4edeb4..e90fa97c0ae6c36bf18fffb822d78ce4f77448f8 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c @@ -597,7 +597,9 @@ static int __qlcnic_pci_sriov_enable(struct qlcnic_adapter *adapter, if (err) goto del_flr_queue; - qlcnic_sriov_alloc_vlans(adapter); + err = qlcnic_sriov_alloc_vlans(adapter); + if (err) + goto del_flr_queue; return err; diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 30be18bac80636b13ce90ca1930b05a1b1460604..5eac3f494d9e9b78d7c4c37d948fa32ef9167a77 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -157,6 +157,7 @@ static const struct pci_device_id rtl8169_pci_tbl[] = { { PCI_VDEVICE(REALTEK, 0x8129) }, { PCI_VDEVICE(REALTEK, 0x8136), RTL_CFG_NO_GBIT }, { PCI_VDEVICE(REALTEK, 0x8161) }, + { PCI_VDEVICE(REALTEK, 0x8162) }, { PCI_VDEVICE(REALTEK, 0x8167) }, { PCI_VDEVICE(REALTEK, 0x8168) }, { PCI_VDEVICE(NCUBE, 0x8168) }, diff --git a/drivers/net/ethernet/rocker/rocker_ofdpa.c b/drivers/net/ethernet/rocker/rocker_ofdpa.c index 7072b249c8bd60ebb302db31df468635d841acc6..8157666209798ffc73a1c97d2905d7be16c5584f 100644 --- a/drivers/net/ethernet/rocker/rocker_ofdpa.c +++ b/drivers/net/ethernet/rocker/rocker_ofdpa.c @@ -2795,7 +2795,8 @@ static void ofdpa_fib4_abort(struct rocker *rocker) if (!ofdpa_port) continue; nh->fib_nh_flags &= ~RTNH_F_OFFLOAD; - ofdpa_flow_tbl_del(ofdpa_port, OFDPA_OP_FLAG_REMOVE, + ofdpa_flow_tbl_del(ofdpa_port, + OFDPA_OP_FLAG_REMOVE | OFDPA_OP_FLAG_NOWAIT, flow_entry); } spin_unlock_irqrestore(&ofdpa->flow_tbl_lock, flags); diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c index 971f1e54b6526e36223059f684292bbd95bc539f..b1dd6189638b3ee173366f0c0ab274f11ae5c420 100644 --- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c +++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c @@ -2282,18 +2282,18 @@ static int __init sxgbe_cmdline_opt(char *str) char *opt; if (!str || !*str) - return -EINVAL; + return 1; while ((opt = strsep(&str, ",")) != NULL) { if (!strncmp(opt, "eee_timer:", 10)) { if (kstrtoint(opt + 10, 0, &eee_timer)) goto err; } } - return 0; + return 1; err: pr_err("%s: ERROR broken module parameter conversion\n", __func__); - return -EINVAL; + return 1; } __setup("sxgbeeth=", sxgbe_cmdline_opt); diff --git a/drivers/net/ethernet/sfc/ef100_nic.c b/drivers/net/ethernet/sfc/ef100_nic.c index 3148fe7703564561fd5d29fcd44f04ba3b8a4373..cb6897c2193c23ca9f423df50e6f224b03d3c2b3 100644 --- a/drivers/net/ethernet/sfc/ef100_nic.c +++ b/drivers/net/ethernet/sfc/ef100_nic.c @@ -597,6 +597,9 @@ static size_t ef100_update_stats(struct efx_nic *efx, ef100_common_stat_mask(mask); ef100_ethtool_stat_mask(mask); + if (!mc_stats) + return 0; + efx_nic_copy_stats(efx, mc_stats); efx_nic_update_stats(ef100_stat_desc, EF100_STAT_COUNT, mask, stats, mc_stats, false); diff --git a/drivers/net/ethernet/sfc/ethtool_common.c b/drivers/net/ethernet/sfc/ethtool_common.c index bf1443539a1a451dbe6aa6a6beced0225a8463a4..bd552c7dffcb1b2c30a92f7c15d1e985129472e1 100644 --- a/drivers/net/ethernet/sfc/ethtool_common.c +++ b/drivers/net/ethernet/sfc/ethtool_common.c @@ -563,20 +563,14 @@ int efx_ethtool_get_link_ksettings(struct net_device *net_dev, { struct efx_nic *efx = netdev_priv(net_dev); struct efx_link_state *link_state = &efx->link_state; - u32 supported; mutex_lock(&efx->mac_lock); efx_mcdi_phy_get_link_ksettings(efx, cmd); mutex_unlock(&efx->mac_lock); /* Both MACs support pause frames (bidirectional and respond-only) */ - ethtool_convert_link_mode_to_legacy_u32(&supported, - cmd->link_modes.supported); - - supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause; - - ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported, - supported); + ethtool_link_ksettings_add_link_mode(cmd, supported, Pause); + ethtool_link_ksettings_add_link_mode(cmd, supported, Asym_Pause); if (LOOPBACK_INTERNAL(efx)) { cmd->base.speed = link_state->speed; diff --git a/drivers/net/ethernet/sfc/falcon/rx.c b/drivers/net/ethernet/sfc/falcon/rx.c index 966f13e7475ddb7e650b8c05516e90810862d02e..0c6cc219136938c4a6bdc19c4f6467442c2605e1 100644 --- a/drivers/net/ethernet/sfc/falcon/rx.c +++ b/drivers/net/ethernet/sfc/falcon/rx.c @@ -110,6 +110,8 @@ static struct page *ef4_reuse_page(struct ef4_rx_queue *rx_queue) struct ef4_rx_page_state *state; unsigned index; + if (unlikely(!rx_queue->page_ring)) + return NULL; index = rx_queue->page_remove & rx_queue->page_ptr_mask; page = rx_queue->page_ring[index]; if (page == NULL) @@ -293,6 +295,9 @@ static void ef4_recycle_rx_pages(struct ef4_channel *channel, { struct ef4_rx_queue *rx_queue = ef4_channel_get_rx_queue(channel); + if (unlikely(!rx_queue->page_ring)) + return; + do { ef4_recycle_rx_page(channel, rx_buf); rx_buf = ef4_rx_buf_next(rx_queue, rx_buf); @@ -728,7 +733,10 @@ static void ef4_init_rx_recycle_ring(struct ef4_nic *efx, efx->rx_bufs_per_page); rx_queue->page_ring = kcalloc(page_ring_size, sizeof(*rx_queue->page_ring), GFP_KERNEL); - rx_queue->page_ptr_mask = page_ring_size - 1; + if (!rx_queue->page_ring) + rx_queue->page_ptr_mask = 0; + else + rx_queue->page_ptr_mask = page_ring_size - 1; } void ef4_init_rx_queue(struct ef4_rx_queue *rx_queue) diff --git a/drivers/net/ethernet/sfc/mcdi.c b/drivers/net/ethernet/sfc/mcdi.c index be6bfd6b7ec7576407f85ebb786eaed238f7d8be..50baf62b2cbc6808bad368e45d6e2efce7573f8c 100644 --- a/drivers/net/ethernet/sfc/mcdi.c +++ b/drivers/net/ethernet/sfc/mcdi.c @@ -163,9 +163,9 @@ static void efx_mcdi_send_request(struct efx_nic *efx, unsigned cmd, /* Serialise with efx_mcdi_ev_cpl() and efx_mcdi_ev_death() */ spin_lock_bh(&mcdi->iface_lock); ++mcdi->seqno; + seqno = mcdi->seqno & SEQ_MASK; spin_unlock_bh(&mcdi->iface_lock); - seqno = mcdi->seqno & SEQ_MASK; xflags = 0; if (mcdi->mode == MCDI_MODE_EVENTS) xflags |= MCDI_HEADER_XFLAGS_EVREQ; diff --git a/drivers/net/ethernet/sfc/mcdi_port_common.c b/drivers/net/ethernet/sfc/mcdi_port_common.c index 4bd3ef8f3384eb2afef04622518540385b096350..c4fe3c48ac46ab4f4bae0081b52a63e10acc9f66 100644 --- a/drivers/net/ethernet/sfc/mcdi_port_common.c +++ b/drivers/net/ethernet/sfc/mcdi_port_common.c @@ -132,16 +132,27 @@ void mcdi_to_ethtool_linkset(u32 media, u32 cap, unsigned long *linkset) case MC_CMD_MEDIA_SFP_PLUS: case MC_CMD_MEDIA_QSFP_PLUS: SET_BIT(FIBRE); - if (cap & (1 << MC_CMD_PHY_CAP_1000FDX_LBN)) + if (cap & (1 << MC_CMD_PHY_CAP_1000FDX_LBN)) { SET_BIT(1000baseT_Full); - if (cap & (1 << MC_CMD_PHY_CAP_10000FDX_LBN)) - SET_BIT(10000baseT_Full); - if (cap & (1 << MC_CMD_PHY_CAP_40000FDX_LBN)) + SET_BIT(1000baseX_Full); + } + if (cap & (1 << MC_CMD_PHY_CAP_10000FDX_LBN)) { + SET_BIT(10000baseCR_Full); + SET_BIT(10000baseLR_Full); + SET_BIT(10000baseSR_Full); + } + if (cap & (1 << MC_CMD_PHY_CAP_40000FDX_LBN)) { SET_BIT(40000baseCR4_Full); - if (cap & (1 << MC_CMD_PHY_CAP_100000FDX_LBN)) + SET_BIT(40000baseSR4_Full); + } + if (cap & (1 << MC_CMD_PHY_CAP_100000FDX_LBN)) { SET_BIT(100000baseCR4_Full); - if (cap & (1 << MC_CMD_PHY_CAP_25000FDX_LBN)) + SET_BIT(100000baseSR4_Full); + } + if (cap & (1 << MC_CMD_PHY_CAP_25000FDX_LBN)) { SET_BIT(25000baseCR_Full); + SET_BIT(25000baseSR_Full); + } if (cap & (1 << MC_CMD_PHY_CAP_50000FDX_LBN)) SET_BIT(50000baseCR2_Full); break; @@ -192,15 +203,19 @@ u32 ethtool_linkset_to_mcdi_cap(const unsigned long *linkset) result |= (1 << MC_CMD_PHY_CAP_100FDX_LBN); if (TEST_BIT(1000baseT_Half)) result |= (1 << MC_CMD_PHY_CAP_1000HDX_LBN); - if (TEST_BIT(1000baseT_Full) || TEST_BIT(1000baseKX_Full)) + if (TEST_BIT(1000baseT_Full) || TEST_BIT(1000baseKX_Full) || + TEST_BIT(1000baseX_Full)) result |= (1 << MC_CMD_PHY_CAP_1000FDX_LBN); - if (TEST_BIT(10000baseT_Full) || TEST_BIT(10000baseKX4_Full)) + if (TEST_BIT(10000baseT_Full) || TEST_BIT(10000baseKX4_Full) || + TEST_BIT(10000baseCR_Full) || TEST_BIT(10000baseLR_Full) || + TEST_BIT(10000baseSR_Full)) result |= (1 << MC_CMD_PHY_CAP_10000FDX_LBN); - if (TEST_BIT(40000baseCR4_Full) || TEST_BIT(40000baseKR4_Full)) + if (TEST_BIT(40000baseCR4_Full) || TEST_BIT(40000baseKR4_Full) || + TEST_BIT(40000baseSR4_Full)) result |= (1 << MC_CMD_PHY_CAP_40000FDX_LBN); - if (TEST_BIT(100000baseCR4_Full)) + if (TEST_BIT(100000baseCR4_Full) || TEST_BIT(100000baseSR4_Full)) result |= (1 << MC_CMD_PHY_CAP_100000FDX_LBN); - if (TEST_BIT(25000baseCR_Full)) + if (TEST_BIT(25000baseCR_Full) || TEST_BIT(25000baseSR_Full)) result |= (1 << MC_CMD_PHY_CAP_25000FDX_LBN); if (TEST_BIT(50000baseCR2_Full)) result |= (1 << MC_CMD_PHY_CAP_50000FDX_LBN); diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c index a39c5143b3864d7dacfc81af09d860032651b337..797e51802ccbb5bfd6add5eeb2d9b164eed63a3a 100644 --- a/drivers/net/ethernet/sfc/ptp.c +++ b/drivers/net/ethernet/sfc/ptp.c @@ -648,7 +648,7 @@ static int efx_ptp_get_attributes(struct efx_nic *efx) } else if (rc == -EINVAL) { fmt = MC_CMD_PTP_OUT_GET_ATTRIBUTES_SECONDS_NANOSECONDS; } else if (rc == -EPERM) { - netif_info(efx, probe, efx->net_dev, "no PTP support\n"); + pci_info(efx->pci_dev, "no PTP support\n"); return rc; } else { efx_mcdi_display_error(efx, MC_CMD_PTP, sizeof(inbuf), @@ -824,7 +824,7 @@ static int efx_ptp_disable(struct efx_nic *efx) * should only have been called during probe. */ if (rc == -ENOSYS || rc == -EPERM) - netif_info(efx, probe, efx->net_dev, "no PTP support\n"); + pci_info(efx->pci_dev, "no PTP support\n"); else if (rc) efx_mcdi_display_error(efx, MC_CMD_PTP, MC_CMD_PTP_IN_DISABLE_LEN, diff --git a/drivers/net/ethernet/sfc/rx_common.c b/drivers/net/ethernet/sfc/rx_common.c index 19cf7cac1e6e9e8005cc12cc95ddb024f5f770f1..e423b17e2a148ac8f7f2e1f2956b67d328e120f4 100644 --- a/drivers/net/ethernet/sfc/rx_common.c +++ b/drivers/net/ethernet/sfc/rx_common.c @@ -45,6 +45,8 @@ static struct page *efx_reuse_page(struct efx_rx_queue *rx_queue) unsigned int index; struct page *page; + if (unlikely(!rx_queue->page_ring)) + return NULL; index = rx_queue->page_remove & rx_queue->page_ptr_mask; page = rx_queue->page_ring[index]; if (page == NULL) @@ -114,6 +116,9 @@ void efx_recycle_rx_pages(struct efx_channel *channel, { struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel); + if (unlikely(!rx_queue->page_ring)) + return; + do { efx_recycle_rx_page(channel, rx_buf); rx_buf = efx_rx_buf_next(rx_queue, rx_buf); @@ -150,7 +155,10 @@ static void efx_init_rx_recycle_ring(struct efx_rx_queue *rx_queue) efx->rx_bufs_per_page); rx_queue->page_ring = kcalloc(page_ring_size, sizeof(*rx_queue->page_ring), GFP_KERNEL); - rx_queue->page_ptr_mask = page_ring_size - 1; + if (!rx_queue->page_ring) + rx_queue->page_ptr_mask = 0; + else + rx_queue->page_ptr_mask = page_ring_size - 1; } static void efx_fini_rx_recycle_ring(struct efx_rx_queue *rx_queue) diff --git a/drivers/net/ethernet/sfc/siena_sriov.c b/drivers/net/ethernet/sfc/siena_sriov.c index 83dcfcae3d4b543e669eb69254383423662d57aa..441e7f3e5375155ce033f5fa26a2241e289af291 100644 --- a/drivers/net/ethernet/sfc/siena_sriov.c +++ b/drivers/net/ethernet/sfc/siena_sriov.c @@ -1057,7 +1057,7 @@ void efx_siena_sriov_probe(struct efx_nic *efx) return; if (efx_siena_sriov_cmd(efx, false, &efx->vi_scale, &count)) { - netif_info(efx, probe, efx->net_dev, "no SR-IOV VFs probed\n"); + pci_info(efx->pci_dev, "no SR-IOV VFs probed\n"); return; } if (count > 0 && count > max_vfs) diff --git a/drivers/net/ethernet/smsc/smc911x.c b/drivers/net/ethernet/smsc/smc911x.c index 01069dfaf75c9c156d142fa19efb014820e12b37..288b420f88d4259881fbddcf88108deda7f586be 100644 --- a/drivers/net/ethernet/smsc/smc911x.c +++ b/drivers/net/ethernet/smsc/smc911x.c @@ -2069,6 +2069,11 @@ static int smc911x_drv_probe(struct platform_device *pdev) ndev->dma = (unsigned char)-1; ndev->irq = platform_get_irq(pdev, 0); + if (ndev->irq < 0) { + ret = ndev->irq; + goto release_both; + } + lp = netdev_priv(ndev); lp->netdev = ndev; #ifdef SMC_DYNAMIC_BUS_CONFIG diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c index fad503820e04033ffb3f7e1fa72afdbf3d1c5a69..b3365b34cac7c8ae52e3b72ac6bc20f224896b68 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c @@ -71,6 +71,7 @@ err_remove_config_dt: static const struct of_device_id dwmac_generic_match[] = { { .compatible = "st,spear600-gmac"}, + { .compatible = "snps,dwmac-3.40a"}, { .compatible = "snps,dwmac-3.50a"}, { .compatible = "snps,dwmac-3.610"}, { .compatible = "snps,dwmac-3.70a"}, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c index 6ef30252bfe0a7e9e7619b5a7174a5ec4ba323b3..e7fbc9b30bf964669db90327b88f3804ffb00467 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c @@ -1336,7 +1336,6 @@ static int rk_gmac_powerup(struct rk_priv_data *bsp_priv) return ret; } - pm_runtime_enable(dev); pm_runtime_get_sync(dev); if (bsp_priv->integrated_phy) @@ -1347,13 +1346,10 @@ static int rk_gmac_powerup(struct rk_priv_data *bsp_priv) static void rk_gmac_powerdown(struct rk_priv_data *gmac) { - struct device *dev = &gmac->pdev->dev; - if (gmac->integrated_phy) rk_gmac_integrated_phy_powerdown(gmac); - pm_runtime_put_sync(dev); - pm_runtime_disable(dev); + pm_runtime_put_sync(&gmac->pdev->dev); phy_power_on(gmac, false); gmac_clk_enable(gmac, false); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c index 70d41783329dd4e9b8b25fcc2487c244ad44cc7a..f37b6d57b2fe22c0febda05c88213e484874ec9d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c @@ -485,8 +485,28 @@ static int socfpga_dwmac_resume(struct device *dev) } #endif /* CONFIG_PM_SLEEP */ -static SIMPLE_DEV_PM_OPS(socfpga_dwmac_pm_ops, stmmac_suspend, - socfpga_dwmac_resume); +static int __maybe_unused socfpga_dwmac_runtime_suspend(struct device *dev) +{ + struct net_device *ndev = dev_get_drvdata(dev); + struct stmmac_priv *priv = netdev_priv(ndev); + + stmmac_bus_clks_config(priv, false); + + return 0; +} + +static int __maybe_unused socfpga_dwmac_runtime_resume(struct device *dev) +{ + struct net_device *ndev = dev_get_drvdata(dev); + struct stmmac_priv *priv = netdev_priv(ndev); + + return stmmac_bus_clks_config(priv, true); +} + +static const struct dev_pm_ops socfpga_dwmac_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(stmmac_suspend, socfpga_dwmac_resume) + SET_RUNTIME_PM_OPS(socfpga_dwmac_runtime_suspend, socfpga_dwmac_runtime_resume, NULL) +}; static const struct socfpga_dwmac_ops socfpga_gen5_ops = { .set_phy_mode = socfpga_gen5_set_phy_mode, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c index 9f5ccf1a0a54015c09687297ec6b269cad68b37e..cad6588840d8b1678697330af987ef96944f577f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c @@ -734,7 +734,7 @@ static int sun8i_dwmac_reset(struct stmmac_priv *priv) if (err) { dev_err(priv->device, "EMAC reset timeout\n"); - return -EFAULT; + return err; } return 0; } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c index 2bac49b49f739233e5e9f94d2d8bc37d46162c76..fbf2deafe8ba31c33cfc08acbc1396c646bab6cc 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c @@ -218,11 +218,18 @@ static void dwmac1000_dump_dma_regs(void __iomem *ioaddr, u32 *reg_space) readl(ioaddr + DMA_BUS_MODE + i * 4); } -static void dwmac1000_get_hw_feature(void __iomem *ioaddr, - struct dma_features *dma_cap) +static int dwmac1000_get_hw_feature(void __iomem *ioaddr, + struct dma_features *dma_cap) { u32 hw_cap = readl(ioaddr + DMA_HW_FEATURE); + if (!hw_cap) { + /* 0x00000000 is the value read on old hardware that does not + * implement this register + */ + return -EOPNOTSUPP; + } + dma_cap->mbps_10_100 = (hw_cap & DMA_HW_FEAT_MIISEL); dma_cap->mbps_1000 = (hw_cap & DMA_HW_FEAT_GMIISEL) >> 1; dma_cap->half_duplex = (hw_cap & DMA_HW_FEAT_HDSEL) >> 2; @@ -252,6 +259,8 @@ static void dwmac1000_get_hw_feature(void __iomem *ioaddr, dma_cap->number_tx_channel = (hw_cap & DMA_HW_FEAT_TXCHCNT) >> 22; /* Alternate (enhanced) DESC mode */ dma_cap->enh_desc = (hw_cap & DMA_HW_FEAT_ENHDESSEL) >> 24; + + return 0; } static void dwmac1000_rx_watchdog(void __iomem *ioaddr, u32 riwt, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c index a7249e4071f16210dbebd86b69768f52d435c996..935510cdc3ffd406a43aab0b2bd2a8d284e399b8 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c @@ -337,8 +337,8 @@ static void dwmac4_dma_tx_chan_op_mode(void __iomem *ioaddr, int mode, writel(mtl_tx_op, ioaddr + MTL_CHAN_TX_OP_MODE(channel)); } -static void dwmac4_get_hw_feature(void __iomem *ioaddr, - struct dma_features *dma_cap) +static int dwmac4_get_hw_feature(void __iomem *ioaddr, + struct dma_features *dma_cap) { u32 hw_cap = readl(ioaddr + GMAC_HW_FEATURE0); @@ -425,6 +425,8 @@ static void dwmac4_get_hw_feature(void __iomem *ioaddr, dma_cap->frpbs = (hw_cap & GMAC_HW_FEAT_FRPBS) >> 11; dma_cap->frpsel = (hw_cap & GMAC_HW_FEAT_FRPSEL) >> 10; dma_cap->dvlan = (hw_cap & GMAC_HW_FEAT_DVLAN) >> 5; + + return 0; } /* Enable/disable TSO feature and set MSS */ diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h b/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h index e5dbd0bc257e7a1d8136bca183f972be4f75740b..82889c363c7773cec228762ee688eada68c7850a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h @@ -130,6 +130,7 @@ #define NUM_DWMAC100_DMA_REGS 9 #define NUM_DWMAC1000_DMA_REGS 23 +#define NUM_DWMAC4_DMA_REGS 27 void dwmac_enable_dma_transmission(void __iomem *ioaddr); void dwmac_enable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c index 77308c5c5d2947dc71b9d495f1b24202f909fd52..a5583d706b9f263aa447443daa9364224ab945fc 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c @@ -365,8 +365,8 @@ static int dwxgmac2_dma_interrupt(void __iomem *ioaddr, return ret; } -static void dwxgmac2_get_hw_feature(void __iomem *ioaddr, - struct dma_features *dma_cap) +static int dwxgmac2_get_hw_feature(void __iomem *ioaddr, + struct dma_features *dma_cap) { u32 hw_cap; @@ -439,6 +439,8 @@ static void dwxgmac2_get_hw_feature(void __iomem *ioaddr, dma_cap->frpes = (hw_cap & XGMAC_HWFEAT_FRPES) >> 11; dma_cap->frpbs = (hw_cap & XGMAC_HWFEAT_FRPPB) >> 9; dma_cap->frpsel = (hw_cap & XGMAC_HWFEAT_FRPSEL) >> 3; + + return 0; } static void dwxgmac2_rx_watchdog(void __iomem *ioaddr, u32 riwt, u32 nchan) diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h index b0b84244ef107bb88adf9031d356e0af16f7760a..8b7ec2457eba233805f1595dbbfb5dcab69dd150 100644 --- a/drivers/net/ethernet/stmicro/stmmac/hwif.h +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h @@ -203,8 +203,8 @@ struct stmmac_dma_ops { int (*dma_interrupt) (void __iomem *ioaddr, struct stmmac_extra_stats *x, u32 chan); /* If supported then get the optional core features */ - void (*get_hw_feature)(void __iomem *ioaddr, - struct dma_features *dma_cap); + int (*get_hw_feature)(void __iomem *ioaddr, + struct dma_features *dma_cap); /* Program the HW RX Watchdog */ void (*rx_watchdog)(void __iomem *ioaddr, u32 riwt, u32 number_chan); void (*set_tx_ring_len)(void __iomem *ioaddr, u32 len, u32 chan); @@ -255,7 +255,7 @@ struct stmmac_dma_ops { #define stmmac_dma_interrupt_status(__priv, __args...) \ stmmac_do_callback(__priv, dma, dma_interrupt, __args) #define stmmac_get_hw_feature(__priv, __args...) \ - stmmac_do_void_callback(__priv, dma, get_hw_feature, __args) + stmmac_do_callback(__priv, dma, get_hw_feature, __args) #define stmmac_rx_watchdog(__priv, __args...) \ stmmac_do_void_callback(__priv, dma, rx_watchdog, __args) #define stmmac_set_tx_ring_len(__priv, __args...) \ diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index 727e68dfaf1c27db982996f2bb07fd10acfb95eb..617c960cfb5a59d8fa6fe1b987e7a02f3031c4e5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -258,6 +258,7 @@ int stmmac_mdio_register(struct net_device *ndev); int stmmac_mdio_reset(struct mii_bus *mii); void stmmac_set_ethtool_ops(struct net_device *netdev); +int stmmac_init_tstamp_counter(struct stmmac_priv *priv, u32 systime_flags); void stmmac_ptp_register(struct stmmac_priv *priv); void stmmac_ptp_unregister(struct stmmac_priv *priv); int stmmac_resume(struct device *dev); @@ -270,6 +271,7 @@ void stmmac_disable_eee_mode(struct stmmac_priv *priv); bool stmmac_eee_init(struct stmmac_priv *priv); int stmmac_reinit_queues(struct net_device *dev, u32 rx_cnt, u32 tx_cnt); int stmmac_reinit_ringparam(struct net_device *dev, u32 rx_size, u32 tx_size); +int stmmac_bus_clks_config(struct stmmac_priv *priv, bool enabled); #if IS_ENABLED(CONFIG_STMMAC_SELFTESTS) void stmmac_selftest_run(struct net_device *dev, diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c index 9e54f953634b7d7656b46cfece6e357e4c5b6cf9..0c0f01f490057c1dc3da829882c1720674796933 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c @@ -21,10 +21,18 @@ #include "dwxgmac2.h" #define REG_SPACE_SIZE 0x1060 +#define GMAC4_REG_SPACE_SIZE 0x116C #define MAC100_ETHTOOL_NAME "st_mac100" #define GMAC_ETHTOOL_NAME "st_gmac" #define XGMAC_ETHTOOL_NAME "st_xgmac" +/* Same as DMA_CHAN_BASE_ADDR defined in dwmac4_dma.h + * + * It is here because dwmac_dma.h and dwmac4_dam.h can not be included at the + * same time due to the conflicting macro names. + */ +#define GMAC4_DMA_CHAN_BASE_ADDR 0x00001100 + #define ETHTOOL_DMA_OFFSET 55 struct stmmac_stats { @@ -413,6 +421,8 @@ static int stmmac_ethtool_get_regs_len(struct net_device *dev) if (priv->plat->has_xgmac) return XGMAC_REGSIZE * 4; + else if (priv->plat->has_gmac4) + return GMAC4_REG_SPACE_SIZE; return REG_SPACE_SIZE; } @@ -425,8 +435,13 @@ static void stmmac_ethtool_gregs(struct net_device *dev, stmmac_dump_mac_regs(priv, priv->hw, reg_space); stmmac_dump_dma_regs(priv, priv->ioaddr, reg_space); - if (!priv->plat->has_xgmac) { - /* Copy DMA registers to where ethtool expects them */ + /* Copy DMA registers to where ethtool expects them */ + if (priv->plat->has_gmac4) { + /* GMAC4 dumps its DMA registers at its DMA_CHAN_BASE_ADDR */ + memcpy(®_space[ETHTOOL_DMA_OFFSET], + ®_space[GMAC4_DMA_CHAN_BASE_ADDR / 4], + NUM_DWMAC4_DMA_REGS * 4); + } else if (!priv->plat->has_xgmac) { memcpy(®_space[ETHTOOL_DMA_OFFSET], ®_space[DMA_BUS_MODE / 4], NUM_DWMAC1000_DMA_REGS * 4); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c index d291612eeafb96b753da3a6e6b42fc2b16bc0d1e..07b1b8374cd26b8d9f2a52aa1a2d80d39b0b0c7f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c @@ -142,15 +142,20 @@ static int adjust_systime(void __iomem *ioaddr, u32 sec, u32 nsec, static void get_systime(void __iomem *ioaddr, u64 *systime) { - u64 ns; - - /* Get the TSSS value */ - ns = readl(ioaddr + PTP_STNSR); - /* Get the TSS and convert sec time value to nanosecond */ - ns += readl(ioaddr + PTP_STSR) * 1000000000ULL; + u64 ns, sec0, sec1; + + /* Get the TSS value */ + sec1 = readl_relaxed(ioaddr + PTP_STSR); + do { + sec0 = sec1; + /* Get the TSSS value */ + ns = readl_relaxed(ioaddr + PTP_STNSR); + /* Get the TSS value */ + sec1 = readl_relaxed(ioaddr + PTP_STSR); + } while (sec0 != sec1); if (systime) - *systime = ns; + *systime = ns + (sec1 * 1000000000ULL); } const struct stmmac_hwtimestamp stmmac_ptp = { diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 6133b2fe8a78a66609767b346f7d763af33a3157..a46c32257de42ba1e18c693a09999437a51d9c1c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #ifdef CONFIG_DEBUG_FS @@ -46,6 +47,13 @@ #include "dwxgmac2.h" #include "hwif.h" +/* As long as the interface is active, we keep the timestamping counter enabled + * with fine resolution and binary rollover. This avoid non-monotonic behavior + * (clock jumps) when changing timestamping settings at runtime. + */ +#define STMMAC_HWTS_ACTIVE (PTP_TCR_TSENA | PTP_TCR_TSCFUPDT | \ + PTP_TCR_TSCTRLSSR) + #define STMMAC_ALIGN(x) ALIGN(ALIGN(x, SMP_CACHE_BYTES), 16) #define TSO_MAX_BUFF_SIZE (SZ_16K - 1) @@ -113,6 +121,28 @@ static void stmmac_exit_fs(struct net_device *dev); #define STMMAC_COAL_TIMER(x) (jiffies + usecs_to_jiffies(x)) +int stmmac_bus_clks_config(struct stmmac_priv *priv, bool enabled) +{ + int ret = 0; + + if (enabled) { + ret = clk_prepare_enable(priv->plat->stmmac_clk); + if (ret) + return ret; + ret = clk_prepare_enable(priv->plat->pclk); + if (ret) { + clk_disable_unprepare(priv->plat->stmmac_clk); + return ret; + } + } else { + clk_disable_unprepare(priv->plat->stmmac_clk); + clk_disable_unprepare(priv->plat->pclk); + } + + return ret; +} +EXPORT_SYMBOL_GPL(stmmac_bus_clks_config); + /** * stmmac_verify_args - verify the driver parameters. * Description: it checks the driver parameters and set a default in case of @@ -485,8 +515,6 @@ static int stmmac_hwtstamp_set(struct net_device *dev, struct ifreq *ifr) { struct stmmac_priv *priv = netdev_priv(dev); struct hwtstamp_config config; - struct timespec64 now; - u64 temp = 0; u32 ptp_v2 = 0; u32 tstamp_all = 0; u32 ptp_over_ipv4_udp = 0; @@ -495,11 +523,6 @@ static int stmmac_hwtstamp_set(struct net_device *dev, struct ifreq *ifr) u32 snap_type_sel = 0; u32 ts_master_en = 0; u32 ts_event_en = 0; - u32 sec_inc = 0; - u32 value = 0; - bool xmac; - - xmac = priv->plat->has_gmac4 || priv->plat->has_xgmac; if (!(priv->dma_cap.time_stamp || priv->adv_ts)) { netdev_alert(priv->dev, "No support for HW time stamping\n"); @@ -605,7 +628,7 @@ static int stmmac_hwtstamp_set(struct net_device *dev, struct ifreq *ifr) config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; ptp_v2 = PTP_TCR_TSVER2ENA; snap_type_sel = PTP_TCR_SNAPTYPSEL_1; - if (priv->synopsys_id != DWMAC_CORE_5_10) + if (priv->synopsys_id < DWMAC_CORE_4_10) ts_event_en = PTP_TCR_TSEVNTENA; ptp_over_ipv4_udp = PTP_TCR_TSIPV4ENA; ptp_over_ipv6_udp = PTP_TCR_TSIPV6ENA; @@ -661,42 +684,17 @@ static int stmmac_hwtstamp_set(struct net_device *dev, struct ifreq *ifr) priv->hwts_rx_en = ((config.rx_filter == HWTSTAMP_FILTER_NONE) ? 0 : 1); priv->hwts_tx_en = config.tx_type == HWTSTAMP_TX_ON; - if (!priv->hwts_tx_en && !priv->hwts_rx_en) - stmmac_config_hw_tstamping(priv, priv->ptpaddr, 0); - else { - value = (PTP_TCR_TSENA | PTP_TCR_TSCFUPDT | PTP_TCR_TSCTRLSSR | - tstamp_all | ptp_v2 | ptp_over_ethernet | - ptp_over_ipv6_udp | ptp_over_ipv4_udp | ts_event_en | - ts_master_en | snap_type_sel); - stmmac_config_hw_tstamping(priv, priv->ptpaddr, value); - - /* program Sub Second Increment reg */ - stmmac_config_sub_second_increment(priv, - priv->ptpaddr, priv->plat->clk_ptp_rate, - xmac, &sec_inc); - temp = div_u64(1000000000ULL, sec_inc); - - /* Store sub second increment and flags for later use */ - priv->sub_second_inc = sec_inc; - priv->systime_flags = value; - - /* calculate default added value: - * formula is : - * addend = (2^32)/freq_div_ratio; - * where, freq_div_ratio = 1e9ns/sec_inc - */ - temp = (u64)(temp << 32); - priv->default_addend = div_u64(temp, priv->plat->clk_ptp_rate); - stmmac_config_addend(priv, priv->ptpaddr, priv->default_addend); - - /* initialize system time */ - ktime_get_real_ts64(&now); + priv->systime_flags = STMMAC_HWTS_ACTIVE; - /* lower 32 bits of tv_sec are safe until y2106 */ - stmmac_init_systime(priv, priv->ptpaddr, - (u32)now.tv_sec, now.tv_nsec); + if (priv->hwts_tx_en || priv->hwts_rx_en) { + priv->systime_flags |= tstamp_all | ptp_v2 | + ptp_over_ethernet | ptp_over_ipv6_udp | + ptp_over_ipv4_udp | ts_event_en | + ts_master_en | snap_type_sel; } + stmmac_config_hw_tstamping(priv, priv->ptpaddr, priv->systime_flags); + memcpy(&priv->tstamp_config, &config, sizeof(config)); return copy_to_user(ifr->ifr_data, &config, @@ -724,6 +722,66 @@ static int stmmac_hwtstamp_get(struct net_device *dev, struct ifreq *ifr) sizeof(*config)) ? -EFAULT : 0; } +/** + * stmmac_init_tstamp_counter - init hardware timestamping counter + * @priv: driver private structure + * @systime_flags: timestamping flags + * Description: + * Initialize hardware counter for packet timestamping. + * This is valid as long as the interface is open and not suspended. + * Will be rerun after resuming from suspend, case in which the timestamping + * flags updated by stmmac_hwtstamp_set() also need to be restored. + */ +int stmmac_init_tstamp_counter(struct stmmac_priv *priv, u32 systime_flags) +{ + bool xmac = priv->plat->has_gmac4 || priv->plat->has_xgmac; + struct timespec64 now; + u32 sec_inc = 0; + u64 temp = 0; + int ret; + + if (!(priv->dma_cap.time_stamp || priv->dma_cap.atime_stamp)) + return -EOPNOTSUPP; + + ret = clk_prepare_enable(priv->plat->clk_ptp_ref); + if (ret < 0) { + netdev_warn(priv->dev, + "failed to enable PTP reference clock: %pe\n", + ERR_PTR(ret)); + return ret; + } + + stmmac_config_hw_tstamping(priv, priv->ptpaddr, systime_flags); + priv->systime_flags = systime_flags; + + /* program Sub Second Increment reg */ + stmmac_config_sub_second_increment(priv, priv->ptpaddr, + priv->plat->clk_ptp_rate, + xmac, &sec_inc); + temp = div_u64(1000000000ULL, sec_inc); + + /* Store sub second increment for later use */ + priv->sub_second_inc = sec_inc; + + /* calculate default added value: + * formula is : + * addend = (2^32)/freq_div_ratio; + * where, freq_div_ratio = 1e9ns/sec_inc + */ + temp = (u64)(temp << 32); + priv->default_addend = div_u64(temp, priv->plat->clk_ptp_rate); + stmmac_config_addend(priv, priv->ptpaddr, priv->default_addend); + + /* initialize system time */ + ktime_get_real_ts64(&now); + + /* lower 32 bits of tv_sec are safe until y2106 */ + stmmac_init_systime(priv, priv->ptpaddr, (u32)now.tv_sec, now.tv_nsec); + + return 0; +} +EXPORT_SYMBOL_GPL(stmmac_init_tstamp_counter); + /** * stmmac_init_ptp - init PTP * @priv: driver private structure @@ -734,9 +792,11 @@ static int stmmac_hwtstamp_get(struct net_device *dev, struct ifreq *ifr) static int stmmac_init_ptp(struct stmmac_priv *priv) { bool xmac = priv->plat->has_gmac4 || priv->plat->has_xgmac; + int ret; - if (!(priv->dma_cap.time_stamp || priv->dma_cap.atime_stamp)) - return -EOPNOTSUPP; + ret = stmmac_init_tstamp_counter(priv, STMMAC_HWTS_ACTIVE); + if (ret) + return ret; priv->adv_ts = 0; /* Check if adv_ts can be enabled for dwmac 4.x / xgmac core */ @@ -756,8 +816,6 @@ static int stmmac_init_ptp(struct stmmac_priv *priv) priv->hwts_tx_en = 0; priv->hwts_rx_en = 0; - stmmac_ptp_register(priv); - return 0; } @@ -2631,7 +2689,7 @@ static void stmmac_safety_feat_configuration(struct stmmac_priv *priv) /** * stmmac_hw_setup - setup mac in a usable state. * @dev : pointer to the device structure. - * @init_ptp: initialize PTP if set + * @ptp_register: register PTP if set * Description: * this is the main function to setup the HW in a usable state because the * dma engine is reset, the core registers are configured (e.g. AXI, @@ -2641,7 +2699,7 @@ static void stmmac_safety_feat_configuration(struct stmmac_priv *priv) * 0 on success and an appropriate (-)ve integer as defined in errno.h * file on failure. */ -static int stmmac_hw_setup(struct net_device *dev, bool init_ptp) +static int stmmac_hw_setup(struct net_device *dev, bool ptp_register) { struct stmmac_priv *priv = netdev_priv(dev); u32 rx_cnt = priv->plat->rx_queues_to_use; @@ -2697,17 +2755,13 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp) stmmac_mmc_setup(priv); - if (init_ptp) { - ret = clk_prepare_enable(priv->plat->clk_ptp_ref); - if (ret < 0) - netdev_warn(priv->dev, "failed to enable PTP reference clock: %d\n", ret); - - ret = stmmac_init_ptp(priv); - if (ret == -EOPNOTSUPP) - netdev_warn(priv->dev, "PTP not supported by HW\n"); - else if (ret) - netdev_warn(priv->dev, "PTP init failed\n"); - } + ret = stmmac_init_ptp(priv); + if (ret == -EOPNOTSUPP) + netdev_warn(priv->dev, "PTP not supported by HW\n"); + else if (ret) + netdev_warn(priv->dev, "PTP init failed\n"); + else if (ptp_register) + stmmac_ptp_register(priv); priv->eee_tw_timer = STMMAC_DEFAULT_TWT_LS; @@ -2792,6 +2846,12 @@ static int stmmac_open(struct net_device *dev) u32 chan; int ret; + ret = pm_runtime_get_sync(priv->device); + if (ret < 0) { + pm_runtime_put_noidle(priv->device); + return ret; + } + if (priv->hw->pcs != STMMAC_PCS_TBI && priv->hw->pcs != STMMAC_PCS_RTBI && priv->hw->xpcs == NULL) { @@ -2800,7 +2860,7 @@ static int stmmac_open(struct net_device *dev) netdev_err(priv->dev, "%s: Cannot attach to PHY (error: %d)\n", __func__, ret); - return ret; + goto init_phy_error; } } @@ -2915,6 +2975,8 @@ init_error: free_dma_desc_resources(priv); dma_desc_error: phylink_disconnect_phy(priv->phylink); +init_phy_error: + pm_runtime_put(priv->device); return ret; } @@ -2965,6 +3027,8 @@ static int stmmac_release(struct net_device *dev) stmmac_release_ptp(priv); + pm_runtime_put(priv->device); + return 0; } @@ -4291,12 +4355,21 @@ static int stmmac_set_mac_address(struct net_device *ndev, void *addr) struct stmmac_priv *priv = netdev_priv(ndev); int ret = 0; + ret = pm_runtime_get_sync(priv->device); + if (ret < 0) { + pm_runtime_put_noidle(priv->device); + return ret; + } + ret = eth_mac_addr(ndev, addr); if (ret) - return ret; + goto set_mac_error; stmmac_set_umac_addr(priv, priv->hw, ndev->dev_addr, 0); +set_mac_error: + pm_runtime_put(priv->device); + return ret; } @@ -4616,6 +4689,12 @@ static int stmmac_vlan_rx_kill_vid(struct net_device *ndev, __be16 proto, u16 vi bool is_double = false; int ret; + ret = pm_runtime_get_sync(priv->device); + if (ret < 0) { + pm_runtime_put_noidle(priv->device); + return ret; + } + if (be16_to_cpu(proto) == ETH_P_8021AD) is_double = true; @@ -4624,10 +4703,15 @@ static int stmmac_vlan_rx_kill_vid(struct net_device *ndev, __be16 proto, u16 vi if (priv->hw->num_vlan) { ret = stmmac_del_hw_vlan_rx_fltr(priv, ndev, priv->hw, proto, vid); if (ret) - return ret; + goto del_vlan_error; } - return stmmac_vlan_update(priv, is_double); + ret = stmmac_vlan_update(priv, is_double); + +del_vlan_error: + pm_runtime_put(priv->device); + + return ret; } static const struct net_device_ops stmmac_netdev_ops = { @@ -5066,6 +5150,10 @@ int stmmac_dvr_probe(struct device *device, stmmac_check_pcs_mode(priv); + pm_runtime_get_noresume(device); + pm_runtime_set_active(device); + pm_runtime_enable(device); + if (priv->hw->pcs != STMMAC_PCS_TBI && priv->hw->pcs != STMMAC_PCS_RTBI) { /* MDIO bus Registration */ @@ -5103,6 +5191,11 @@ int stmmac_dvr_probe(struct device *device, stmmac_init_fs(ndev); #endif + /* Let pm_runtime_put() disable the clocks. + * If CONFIG_PM is not enabled, the clocks will stay powered. + */ + pm_runtime_put(device); + return ret; error_serdes_powerup: @@ -5152,8 +5245,8 @@ int stmmac_dvr_remove(struct device *dev) phylink_destroy(priv->phylink); if (priv->plat->stmmac_rst) reset_control_assert(priv->plat->stmmac_rst); - clk_disable_unprepare(priv->plat->pclk); - clk_disable_unprepare(priv->plat->stmmac_clk); + pm_runtime_put(dev); + pm_runtime_disable(dev); if (priv->hw->pcs != STMMAC_PCS_TBI && priv->hw->pcs != STMMAC_PCS_RTBI) stmmac_mdio_unregister(ndev); @@ -5217,10 +5310,6 @@ int stmmac_suspend(struct device *dev) stmmac_mac_set(priv, priv->ioaddr, false); pinctrl_pm_select_sleep_state(priv->device); - /* Disable clock in case of PWM is off */ - clk_disable_unprepare(priv->plat->clk_ptp_ref); - clk_disable_unprepare(priv->plat->pclk); - clk_disable_unprepare(priv->plat->stmmac_clk); } mutex_unlock(&priv->lock); @@ -5285,11 +5374,6 @@ int stmmac_resume(struct device *dev) priv->irq_wake = 0; } else { pinctrl_pm_select_default_state(priv->device); - /* enable the clk previously disabled */ - clk_prepare_enable(priv->plat->stmmac_clk); - clk_prepare_enable(priv->plat->pclk); - if (priv->plat->clk_ptp_ref) - clk_prepare_enable(priv->plat->clk_ptp_ref); /* reset the phy so that it's ready */ if (priv->mii) stmmac_mdio_reset(priv->mii); @@ -5344,7 +5428,7 @@ static int __init stmmac_cmdline_opt(char *str) char *opt; if (!str || !*str) - return -EINVAL; + return 1; while ((opt = strsep(&str, ",")) != NULL) { if (!strncmp(opt, "debug:", 6)) { if (kstrtoint(opt + 6, 0, &debug)) @@ -5375,11 +5459,11 @@ static int __init stmmac_cmdline_opt(char *str) goto err; } } - return 0; + return 1; err: pr_err("%s: ERROR broken module parameter conversion", __func__); - return -EINVAL; + return 1; } __setup("stmmaceth=", stmmac_cmdline_opt); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c index 678726c62a8af5cde782f7474624429382b90d92..7c1a14b256da3f66f922fd01eea3ae4aa60fb878 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -87,21 +88,29 @@ static int stmmac_xgmac2_mdio_read(struct mii_bus *bus, int phyaddr, int phyreg) u32 tmp, addr, value = MII_XGMAC_BUSY; int ret; + ret = pm_runtime_get_sync(priv->device); + if (ret < 0) { + pm_runtime_put_noidle(priv->device); + return ret; + } + /* Wait until any existing MII operation is complete */ if (readl_poll_timeout(priv->ioaddr + mii_data, tmp, - !(tmp & MII_XGMAC_BUSY), 100, 10000)) - return -EBUSY; + !(tmp & MII_XGMAC_BUSY), 100, 10000)) { + ret = -EBUSY; + goto err_disable_clks; + } if (phyreg & MII_ADDR_C45) { phyreg &= ~MII_ADDR_C45; ret = stmmac_xgmac2_c45_format(priv, phyaddr, phyreg, &addr); if (ret) - return ret; + goto err_disable_clks; } else { ret = stmmac_xgmac2_c22_format(priv, phyaddr, phyreg, &addr); if (ret) - return ret; + goto err_disable_clks; value |= MII_XGMAC_SADDR; } @@ -112,8 +121,10 @@ static int stmmac_xgmac2_mdio_read(struct mii_bus *bus, int phyaddr, int phyreg) /* Wait until any existing MII operation is complete */ if (readl_poll_timeout(priv->ioaddr + mii_data, tmp, - !(tmp & MII_XGMAC_BUSY), 100, 10000)) - return -EBUSY; + !(tmp & MII_XGMAC_BUSY), 100, 10000)) { + ret = -EBUSY; + goto err_disable_clks; + } /* Set the MII address register to read */ writel(addr, priv->ioaddr + mii_address); @@ -121,11 +132,18 @@ static int stmmac_xgmac2_mdio_read(struct mii_bus *bus, int phyaddr, int phyreg) /* Wait until any existing MII operation is complete */ if (readl_poll_timeout(priv->ioaddr + mii_data, tmp, - !(tmp & MII_XGMAC_BUSY), 100, 10000)) - return -EBUSY; + !(tmp & MII_XGMAC_BUSY), 100, 10000)) { + ret = -EBUSY; + goto err_disable_clks; + } /* Read the data from the MII data register */ - return readl(priv->ioaddr + mii_data) & GENMASK(15, 0); + ret = (int)readl(priv->ioaddr + mii_data) & GENMASK(15, 0); + +err_disable_clks: + pm_runtime_put(priv->device); + + return ret; } static int stmmac_xgmac2_mdio_write(struct mii_bus *bus, int phyaddr, @@ -138,21 +156,29 @@ static int stmmac_xgmac2_mdio_write(struct mii_bus *bus, int phyaddr, u32 addr, tmp, value = MII_XGMAC_BUSY; int ret; + ret = pm_runtime_get_sync(priv->device); + if (ret < 0) { + pm_runtime_put_noidle(priv->device); + return ret; + } + /* Wait until any existing MII operation is complete */ if (readl_poll_timeout(priv->ioaddr + mii_data, tmp, - !(tmp & MII_XGMAC_BUSY), 100, 10000)) - return -EBUSY; + !(tmp & MII_XGMAC_BUSY), 100, 10000)) { + ret = -EBUSY; + goto err_disable_clks; + } if (phyreg & MII_ADDR_C45) { phyreg &= ~MII_ADDR_C45; ret = stmmac_xgmac2_c45_format(priv, phyaddr, phyreg, &addr); if (ret) - return ret; + goto err_disable_clks; } else { ret = stmmac_xgmac2_c22_format(priv, phyaddr, phyreg, &addr); if (ret) - return ret; + goto err_disable_clks; value |= MII_XGMAC_SADDR; } @@ -164,16 +190,23 @@ static int stmmac_xgmac2_mdio_write(struct mii_bus *bus, int phyaddr, /* Wait until any existing MII operation is complete */ if (readl_poll_timeout(priv->ioaddr + mii_data, tmp, - !(tmp & MII_XGMAC_BUSY), 100, 10000)) - return -EBUSY; + !(tmp & MII_XGMAC_BUSY), 100, 10000)) { + ret = -EBUSY; + goto err_disable_clks; + } /* Set the MII address register to write */ writel(addr, priv->ioaddr + mii_address); writel(value, priv->ioaddr + mii_data); /* Wait until any existing MII operation is complete */ - return readl_poll_timeout(priv->ioaddr + mii_data, tmp, - !(tmp & MII_XGMAC_BUSY), 100, 10000); + ret = readl_poll_timeout(priv->ioaddr + mii_data, tmp, + !(tmp & MII_XGMAC_BUSY), 100, 10000); + +err_disable_clks: + pm_runtime_put(priv->device); + + return ret; } /** @@ -196,6 +229,12 @@ static int stmmac_mdio_read(struct mii_bus *bus, int phyaddr, int phyreg) int data = 0; u32 v; + data = pm_runtime_get_sync(priv->device); + if (data < 0) { + pm_runtime_put_noidle(priv->device); + return data; + } + value |= (phyaddr << priv->hw->mii.addr_shift) & priv->hw->mii.addr_mask; value |= (phyreg << priv->hw->mii.reg_shift) & priv->hw->mii.reg_mask; @@ -216,19 +255,26 @@ static int stmmac_mdio_read(struct mii_bus *bus, int phyaddr, int phyreg) } if (readl_poll_timeout(priv->ioaddr + mii_address, v, !(v & MII_BUSY), - 100, 10000)) - return -EBUSY; + 100, 10000)) { + data = -EBUSY; + goto err_disable_clks; + } writel(data, priv->ioaddr + mii_data); writel(value, priv->ioaddr + mii_address); if (readl_poll_timeout(priv->ioaddr + mii_address, v, !(v & MII_BUSY), - 100, 10000)) - return -EBUSY; + 100, 10000)) { + data = -EBUSY; + goto err_disable_clks; + } /* Read the data from the MII data register */ data = (int)readl(priv->ioaddr + mii_data) & MII_DATA_MASK; +err_disable_clks: + pm_runtime_put(priv->device); + return data; } @@ -247,10 +293,16 @@ static int stmmac_mdio_write(struct mii_bus *bus, int phyaddr, int phyreg, struct stmmac_priv *priv = netdev_priv(ndev); unsigned int mii_address = priv->hw->mii.addr; unsigned int mii_data = priv->hw->mii.data; + int ret, data = phydata; u32 value = MII_BUSY; - int data = phydata; u32 v; + ret = pm_runtime_get_sync(priv->device); + if (ret < 0) { + pm_runtime_put_noidle(priv->device); + return ret; + } + value |= (phyaddr << priv->hw->mii.addr_shift) & priv->hw->mii.addr_mask; value |= (phyreg << priv->hw->mii.reg_shift) & priv->hw->mii.reg_mask; @@ -275,16 +327,23 @@ static int stmmac_mdio_write(struct mii_bus *bus, int phyaddr, int phyreg, /* Wait until any existing MII operation is complete */ if (readl_poll_timeout(priv->ioaddr + mii_address, v, !(v & MII_BUSY), - 100, 10000)) - return -EBUSY; + 100, 10000)) { + ret = -EBUSY; + goto err_disable_clks; + } /* Set the MII address register to write */ writel(data, priv->ioaddr + mii_data); writel(value, priv->ioaddr + mii_address); /* Wait until any existing MII operation is complete */ - return readl_poll_timeout(priv->ioaddr + mii_address, v, !(v & MII_BUSY), - 100, 10000); + ret = readl_poll_timeout(priv->ioaddr + mii_address, v, !(v & MII_BUSY), + 100, 10000); + +err_disable_clks: + pm_runtime_put(priv->device); + + return ret; } /** diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index 53be8fc1d125fa99c7ec86e741df88b8966e85ac..3183d8826981e14cc54289be667a3d5c036d544b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -9,6 +9,7 @@ *******************************************************************************/ #include +#include #include #include #include @@ -508,6 +509,14 @@ stmmac_probe_config_dt(struct platform_device *pdev, const char **mac) plat->pmt = 1; } + if (of_device_is_compatible(np, "snps,dwmac-3.40a")) { + plat->has_gmac = 1; + plat->enh_desc = 1; + plat->tx_coe = 1; + plat->bugged_jumbo = 1; + plat->pmt = 1; + } + if (of_device_is_compatible(np, "snps,dwmac-4.00") || of_device_is_compatible(np, "snps,dwmac-4.10a") || of_device_is_compatible(np, "snps,dwmac-4.20a") || @@ -712,7 +721,6 @@ int stmmac_pltfr_remove(struct platform_device *pdev) } EXPORT_SYMBOL_GPL(stmmac_pltfr_remove); -#ifdef CONFIG_PM_SLEEP /** * stmmac_pltfr_suspend * @dev: device pointer @@ -720,7 +728,7 @@ EXPORT_SYMBOL_GPL(stmmac_pltfr_remove); * call the main suspend function and then, if required, on some platform, it * can call an exit helper. */ -static int stmmac_pltfr_suspend(struct device *dev) +static int __maybe_unused stmmac_pltfr_suspend(struct device *dev) { int ret; struct net_device *ndev = dev_get_drvdata(dev); @@ -741,7 +749,7 @@ static int stmmac_pltfr_suspend(struct device *dev) * the main resume function, on some platforms, it can call own init helper * if required. */ -static int stmmac_pltfr_resume(struct device *dev) +static int __maybe_unused stmmac_pltfr_resume(struct device *dev) { struct net_device *ndev = dev_get_drvdata(dev); struct stmmac_priv *priv = netdev_priv(ndev); @@ -752,10 +760,72 @@ static int stmmac_pltfr_resume(struct device *dev) return stmmac_resume(dev); } -#endif /* CONFIG_PM_SLEEP */ -SIMPLE_DEV_PM_OPS(stmmac_pltfr_pm_ops, stmmac_pltfr_suspend, - stmmac_pltfr_resume); +static int __maybe_unused stmmac_runtime_suspend(struct device *dev) +{ + struct net_device *ndev = dev_get_drvdata(dev); + struct stmmac_priv *priv = netdev_priv(ndev); + + stmmac_bus_clks_config(priv, false); + + return 0; +} + +static int __maybe_unused stmmac_runtime_resume(struct device *dev) +{ + struct net_device *ndev = dev_get_drvdata(dev); + struct stmmac_priv *priv = netdev_priv(ndev); + + return stmmac_bus_clks_config(priv, true); +} + +static int __maybe_unused stmmac_pltfr_noirq_suspend(struct device *dev) +{ + struct net_device *ndev = dev_get_drvdata(dev); + struct stmmac_priv *priv = netdev_priv(ndev); + int ret; + + if (!netif_running(ndev)) + return 0; + + if (!device_may_wakeup(priv->device) || !priv->plat->pmt) { + /* Disable clock in case of PWM is off */ + clk_disable_unprepare(priv->plat->clk_ptp_ref); + + ret = pm_runtime_force_suspend(dev); + if (ret) + return ret; + } + + return 0; +} + +static int __maybe_unused stmmac_pltfr_noirq_resume(struct device *dev) +{ + struct net_device *ndev = dev_get_drvdata(dev); + struct stmmac_priv *priv = netdev_priv(ndev); + int ret; + + if (!netif_running(ndev)) + return 0; + + if (!device_may_wakeup(priv->device) || !priv->plat->pmt) { + /* enable the clk previously disabled */ + ret = pm_runtime_force_resume(dev); + if (ret) + return ret; + + stmmac_init_tstamp_counter(priv, priv->systime_flags); + } + + return 0; +} + +const struct dev_pm_ops stmmac_pltfr_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(stmmac_pltfr_suspend, stmmac_pltfr_resume) + SET_RUNTIME_PM_OPS(stmmac_runtime_suspend, stmmac_runtime_resume, NULL) + SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(stmmac_pltfr_noirq_suspend, stmmac_pltfr_noirq_resume) +}; EXPORT_SYMBOL_GPL(stmmac_pltfr_pm_ops); MODULE_DESCRIPTION("STMMAC 10/100/1000 Ethernet platform support"); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c index 639980306115889ecc8cff0ec8d72a0e346360b8..43165c662740dc668a74b903ecd17166f2d86ea3 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c @@ -679,8 +679,6 @@ static int tc_setup_taprio(struct stmmac_priv *priv, goto disable; if (qopt->num_entries >= dep) return -EINVAL; - if (!qopt->base_time) - return -ERANGE; if (!qopt->cycle_time) return -ERANGE; diff --git a/drivers/net/ethernet/sun/Kconfig b/drivers/net/ethernet/sun/Kconfig index 309de38a7530432e6d0bf56337002ff4d694683b..b0d3f9a2950c023333a88dca8b8e44614af547f0 100644 --- a/drivers/net/ethernet/sun/Kconfig +++ b/drivers/net/ethernet/sun/Kconfig @@ -73,6 +73,7 @@ config CASSINI config SUNVNET_COMMON tristate "Common routines to support Sun Virtual Networking" depends on SUN_LDOMS + depends on INET default m config SUNVNET diff --git a/drivers/net/ethernet/ti/cpsw_priv.c b/drivers/net/ethernet/ti/cpsw_priv.c index 424e644724e4608acfe6f2f631ac71e5b7d22f57..e74f2e95a46eb8ab8f01384f0718ebbc4b7a6504 100644 --- a/drivers/net/ethernet/ti/cpsw_priv.c +++ b/drivers/net/ethernet/ti/cpsw_priv.c @@ -1144,7 +1144,7 @@ int cpsw_fill_rx_channels(struct cpsw_priv *priv) static struct page_pool *cpsw_create_page_pool(struct cpsw_common *cpsw, int size) { - struct page_pool_params pp_params; + struct page_pool_params pp_params = {}; struct page_pool *pool; pp_params.order = 0; diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c index 43222a34cba069b9bc10750cbd9a4bcdfc8b6228..f9514518700ebb7f70009b46583f66af968ec417 100644 --- a/drivers/net/ethernet/ti/cpts.c +++ b/drivers/net/ethernet/ti/cpts.c @@ -568,7 +568,9 @@ int cpts_register(struct cpts *cpts) for (i = 0; i < CPTS_MAX_EVENTS; i++) list_add(&cpts->pool_data[i].list, &cpts->pool); - clk_enable(cpts->refclk); + err = clk_enable(cpts->refclk); + if (err) + return err; cpts_write32(cpts, CPTS_EN, control); cpts_write32(cpts, TS_PEND_EN, int_enable); diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c index 03055c96f07604eaa5b752024a309c9a8d4bbb8a..ad5293571af4d0afc16794754da225eefa33e718 100644 --- a/drivers/net/ethernet/ti/davinci_emac.c +++ b/drivers/net/ethernet/ti/davinci_emac.c @@ -412,8 +412,20 @@ static int emac_set_coalesce(struct net_device *ndev, u32 int_ctrl, num_interrupts = 0; u32 prescale = 0, addnl_dvdr = 1, coal_intvl = 0; - if (!coal->rx_coalesce_usecs) - return -EINVAL; + if (!coal->rx_coalesce_usecs) { + priv->coal_intvl = 0; + + switch (priv->version) { + case EMAC_VERSION_2: + emac_ctrl_write(EMAC_DM646X_CMINTCTRL, 0); + break; + default: + emac_ctrl_write(EMAC_CTRL_EWINTTCNT, 0); + break; + } + + return 0; + } coal_intvl = coal->rx_coalesce_usecs; diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c index 650ffb93796f1a50a77ea41056d1e9ad4289dfa3..130f4b707bdc4455face8f93cc98710d4abfe246 100644 --- a/drivers/net/ethernet/xilinx/ll_temac_main.c +++ b/drivers/net/ethernet/xilinx/ll_temac_main.c @@ -1421,6 +1421,8 @@ static int temac_probe(struct platform_device *pdev) lp->indirect_lock = devm_kmalloc(&pdev->dev, sizeof(*lp->indirect_lock), GFP_KERNEL); + if (!lp->indirect_lock) + return -ENOMEM; spin_lock_init(lp->indirect_lock); } diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 69c79cc24e6e4c019bc13f5382971da81715446e..0baf85122f5acebf183d1b2c2b5dc1ec1c4d7ee4 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -41,8 +41,9 @@ #include "xilinx_axienet.h" /* Descriptors defines for Tx and Rx DMA */ -#define TX_BD_NUM_DEFAULT 64 +#define TX_BD_NUM_DEFAULT 128 #define RX_BD_NUM_DEFAULT 1024 +#define TX_BD_NUM_MIN (MAX_SKB_FRAGS + 1) #define TX_BD_NUM_MAX 4096 #define RX_BD_NUM_MAX 4096 @@ -496,7 +497,8 @@ static void axienet_setoptions(struct net_device *ndev, u32 options) static int __axienet_device_reset(struct axienet_local *lp) { - u32 timeout; + u32 value; + int ret; /* Reset Axi DMA. This would reset Axi Ethernet core as well. The reset * process of Axi DMA takes a while to complete as all pending @@ -506,15 +508,23 @@ static int __axienet_device_reset(struct axienet_local *lp) * they both reset the entire DMA core, so only one needs to be used. */ axienet_dma_out32(lp, XAXIDMA_TX_CR_OFFSET, XAXIDMA_CR_RESET_MASK); - timeout = DELAY_OF_ONE_MILLISEC; - while (axienet_dma_in32(lp, XAXIDMA_TX_CR_OFFSET) & - XAXIDMA_CR_RESET_MASK) { - udelay(1); - if (--timeout == 0) { - netdev_err(lp->ndev, "%s: DMA reset timeout!\n", - __func__); - return -ETIMEDOUT; - } + ret = read_poll_timeout(axienet_dma_in32, value, + !(value & XAXIDMA_CR_RESET_MASK), + DELAY_OF_ONE_MILLISEC, 50000, false, lp, + XAXIDMA_TX_CR_OFFSET); + if (ret) { + dev_err(lp->dev, "%s: DMA reset timeout!\n", __func__); + return ret; + } + + /* Wait for PhyRstCmplt bit to be set, indicating the PHY reset has finished */ + ret = read_poll_timeout(axienet_ior, value, + value & XAE_INT_PHYRSTCMPLT_MASK, + DELAY_OF_ONE_MILLISEC, 50000, false, lp, + XAE_IS_OFFSET); + if (ret) { + dev_err(lp->dev, "%s: timeout waiting for PhyRstCmplt\n", __func__); + return ret; } return 0; @@ -623,6 +633,8 @@ static int axienet_free_tx_chain(struct net_device *ndev, u32 first_bd, if (nr_bds == -1 && !(status & XAXIDMA_BD_STS_COMPLETE_MASK)) break; + /* Ensure we see complete descriptor update */ + dma_rmb(); phys = desc_get_phys_addr(lp, cur_p); dma_unmap_single(ndev->dev.parent, phys, (cur_p->cntrl & XAXIDMA_BD_CTRL_LENGTH_MASK), @@ -631,13 +643,15 @@ static int axienet_free_tx_chain(struct net_device *ndev, u32 first_bd, if (cur_p->skb && (status & XAXIDMA_BD_STS_COMPLETE_MASK)) dev_consume_skb_irq(cur_p->skb); - cur_p->cntrl = 0; cur_p->app0 = 0; cur_p->app1 = 0; cur_p->app2 = 0; cur_p->app4 = 0; - cur_p->status = 0; cur_p->skb = NULL; + /* ensure our transmit path and device don't prematurely see status cleared */ + wmb(); + cur_p->cntrl = 0; + cur_p->status = 0; if (sizep) *sizep += status & XAXIDMA_BD_STS_ACTUAL_LEN_MASK; @@ -646,6 +660,32 @@ static int axienet_free_tx_chain(struct net_device *ndev, u32 first_bd, return i; } +/** + * axienet_check_tx_bd_space - Checks if a BD/group of BDs are currently busy + * @lp: Pointer to the axienet_local structure + * @num_frag: The number of BDs to check for + * + * Return: 0, on success + * NETDEV_TX_BUSY, if any of the descriptors are not free + * + * This function is invoked before BDs are allocated and transmission starts. + * This function returns 0 if a BD or group of BDs can be allocated for + * transmission. If the BD or any of the BDs are not free the function + * returns a busy status. This is invoked from axienet_start_xmit. + */ +static inline int axienet_check_tx_bd_space(struct axienet_local *lp, + int num_frag) +{ + struct axidma_bd *cur_p; + + /* Ensure we see all descriptor updates from device or TX IRQ path */ + rmb(); + cur_p = &lp->tx_bd_v[(lp->tx_bd_tail + num_frag) % lp->tx_bd_num]; + if (cur_p->cntrl) + return NETDEV_TX_BUSY; + return 0; +} + /** * axienet_start_xmit_done - Invoked once a transmit is completed by the * Axi DMA Tx channel. @@ -675,30 +715,8 @@ static void axienet_start_xmit_done(struct net_device *ndev) /* Matches barrier in axienet_start_xmit */ smp_mb(); - netif_wake_queue(ndev); -} - -/** - * axienet_check_tx_bd_space - Checks if a BD/group of BDs are currently busy - * @lp: Pointer to the axienet_local structure - * @num_frag: The number of BDs to check for - * - * Return: 0, on success - * NETDEV_TX_BUSY, if any of the descriptors are not free - * - * This function is invoked before BDs are allocated and transmission starts. - * This function returns 0 if a BD or group of BDs can be allocated for - * transmission. If the BD or any of the BDs are not free the function - * returns a busy status. This is invoked from axienet_start_xmit. - */ -static inline int axienet_check_tx_bd_space(struct axienet_local *lp, - int num_frag) -{ - struct axidma_bd *cur_p; - cur_p = &lp->tx_bd_v[(lp->tx_bd_tail + num_frag) % lp->tx_bd_num]; - if (cur_p->status & XAXIDMA_BD_STS_ALL_MASK) - return NETDEV_TX_BUSY; - return 0; + if (!axienet_check_tx_bd_space(lp, MAX_SKB_FRAGS + 1)) + netif_wake_queue(ndev); } /** @@ -730,20 +748,15 @@ axienet_start_xmit(struct sk_buff *skb, struct net_device *ndev) num_frag = skb_shinfo(skb)->nr_frags; cur_p = &lp->tx_bd_v[lp->tx_bd_tail]; - if (axienet_check_tx_bd_space(lp, num_frag)) { - if (netif_queue_stopped(ndev)) - return NETDEV_TX_BUSY; - + if (axienet_check_tx_bd_space(lp, num_frag + 1)) { + /* Should not happen as last start_xmit call should have + * checked for sufficient space and queue should only be + * woken when sufficient space is available. + */ netif_stop_queue(ndev); - - /* Matches barrier in axienet_start_xmit_done */ - smp_mb(); - - /* Space might have just been freed - check again */ - if (axienet_check_tx_bd_space(lp, num_frag)) - return NETDEV_TX_BUSY; - - netif_wake_queue(ndev); + if (net_ratelimit()) + netdev_warn(ndev, "TX ring unexpectedly full\n"); + return NETDEV_TX_BUSY; } if (skb->ip_summed == CHECKSUM_PARTIAL) { @@ -804,6 +817,18 @@ axienet_start_xmit(struct sk_buff *skb, struct net_device *ndev) if (++lp->tx_bd_tail >= lp->tx_bd_num) lp->tx_bd_tail = 0; + /* Stop queue if next transmit may not have space */ + if (axienet_check_tx_bd_space(lp, MAX_SKB_FRAGS + 1)) { + netif_stop_queue(ndev); + + /* Matches barrier in axienet_start_xmit_done */ + smp_mb(); + + /* Space might have just been freed - check again */ + if (!axienet_check_tx_bd_space(lp, MAX_SKB_FRAGS + 1)) + netif_wake_queue(ndev); + } + return NETDEV_TX_OK; } @@ -834,6 +859,8 @@ static void axienet_recv(struct net_device *ndev) tail_p = lp->rx_bd_p + sizeof(*lp->rx_bd_v) * lp->rx_bd_ci; + /* Ensure we see complete descriptor update */ + dma_rmb(); phys = desc_get_phys_addr(lp, cur_p); dma_unmap_single(ndev->dev.parent, phys, lp->max_frm_size, DMA_FROM_DEVICE); @@ -1355,7 +1382,8 @@ static int axienet_ethtools_set_ringparam(struct net_device *ndev, if (ering->rx_pending > RX_BD_NUM_MAX || ering->rx_mini_pending || ering->rx_jumbo_pending || - ering->rx_pending > TX_BD_NUM_MAX) + ering->tx_pending < TX_BD_NUM_MIN || + ering->tx_pending > TX_BD_NUM_MAX) return -EINVAL; if (netif_running(ndev)) @@ -2015,6 +2043,11 @@ static int axienet_probe(struct platform_device *pdev) lp->coalesce_count_rx = XAXIDMA_DFT_RX_THRESHOLD; lp->coalesce_count_tx = XAXIDMA_DFT_TX_THRESHOLD; + /* Reset core now that clocks are enabled, prior to accessing MDIO */ + ret = __axienet_device_reset(lp); + if (ret) + goto cleanup_clk; + lp->phy_node = of_parse_phandle(pdev->dev.of_node, "phy-handle", 0); if (lp->phy_node) { ret = axienet_mdio_setup(lp); diff --git a/drivers/net/ethernet/xilinx/xilinx_emaclite.c b/drivers/net/ethernet/xilinx/xilinx_emaclite.c index 962831cdde4db0860673e24d51f2305fda0cf333..4bd44fbc6ecfa92030021750b0637555d255e74b 100644 --- a/drivers/net/ethernet/xilinx/xilinx_emaclite.c +++ b/drivers/net/ethernet/xilinx/xilinx_emaclite.c @@ -1187,7 +1187,7 @@ static int xemaclite_of_probe(struct platform_device *ofdev) if (rc) { dev_err(dev, "Cannot register network device, aborting\n"); - goto error; + goto put_node; } dev_info(dev, @@ -1195,6 +1195,8 @@ static int xemaclite_of_probe(struct platform_device *ofdev) (unsigned int __force)ndev->mem_start, lp->base_addr, ndev->irq); return 0; +put_node: + of_node_put(lp->phy_node); error: free_netdev(ndev); return rc; diff --git a/drivers/net/fjes/fjes_main.c b/drivers/net/fjes/fjes_main.c index e449d946612259cf77ac78ea87449f0b3ebb0b40..2a569eea4ee8feccdcc014be07ef888e23fc8af6 100644 --- a/drivers/net/fjes/fjes_main.c +++ b/drivers/net/fjes/fjes_main.c @@ -1269,6 +1269,11 @@ static int fjes_probe(struct platform_device *plat_dev) hw->hw_res.start = res->start; hw->hw_res.size = resource_size(res); hw->hw_res.irq = platform_get_irq(plat_dev, 0); + if (hw->hw_res.irq < 0) { + err = hw->hw_res.irq; + goto err_free_control_wq; + } + err = fjes_hw_init(&adapter->hw); if (err) goto err_free_control_wq; diff --git a/drivers/net/hamradio/mkiss.c b/drivers/net/hamradio/mkiss.c index 920e9f888cc353a0be7b84e76cf55977657cd5e0..049264a7d9611b5697741b67e8ea5f10dd6c535e 100644 --- a/drivers/net/hamradio/mkiss.c +++ b/drivers/net/hamradio/mkiss.c @@ -31,6 +31,8 @@ #define AX_MTU 236 +/* some arch define END as assembly function ending, just undef it */ +#undef END /* SLIP/KISS protocol characters. */ #define END 0300 /* indicates end of frame */ #define ESC 0333 /* indicates byte stuffing */ @@ -792,13 +794,14 @@ static void mkiss_close(struct tty_struct *tty) */ netif_stop_queue(ax->dev); - /* Free all AX25 frame buffers. */ + unregister_netdev(ax->dev); + + /* Free all AX25 frame buffers after unreg. */ kfree(ax->rbuff); kfree(ax->xbuff); ax->tty = NULL; - unregister_netdev(ax->dev); free_netdev(ax->dev); } diff --git a/drivers/net/hamradio/yam.c b/drivers/net/hamradio/yam.c index 5ab53e9942f3052b16f9e1f867e63901341b67b1..5d30b3e1806ab91fe39f0fa95586e79591035b28 100644 --- a/drivers/net/hamradio/yam.c +++ b/drivers/net/hamradio/yam.c @@ -951,9 +951,7 @@ static int yam_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) sizeof(struct yamdrv_ioctl_mcs)); if (IS_ERR(ym)) return PTR_ERR(ym); - if (ym->cmd != SIOCYAMSMCS) - return -EINVAL; - if (ym->bitrate > YAM_MAXBITRATE) { + if (ym->cmd != SIOCYAMSMCS || ym->bitrate > YAM_MAXBITRATE) { kfree(ym); return -EINVAL; } diff --git a/drivers/net/ieee802154/at86rf230.c b/drivers/net/ieee802154/at86rf230.c index 7d67f41387f5577959839f902128b1777b4b51ee..4f5ef8a9a9a8796db21f270bbb707eb5204e10f1 100644 --- a/drivers/net/ieee802154/at86rf230.c +++ b/drivers/net/ieee802154/at86rf230.c @@ -100,6 +100,7 @@ struct at86rf230_local { unsigned long cal_timeout; bool is_tx; bool is_tx_from_off; + bool was_tx; u8 tx_retry; struct sk_buff *tx_skb; struct at86rf230_state_change tx; @@ -343,7 +344,11 @@ at86rf230_async_error_recover_complete(void *context) if (ctx->free) kfree(ctx); - ieee802154_wake_queue(lp->hw); + if (lp->was_tx) { + lp->was_tx = 0; + dev_kfree_skb_any(lp->tx_skb); + ieee802154_wake_queue(lp->hw); + } } static void @@ -352,7 +357,11 @@ at86rf230_async_error_recover(void *context) struct at86rf230_state_change *ctx = context; struct at86rf230_local *lp = ctx->lp; - lp->is_tx = 0; + if (lp->is_tx) { + lp->was_tx = 1; + lp->is_tx = 0; + } + at86rf230_async_state_change(lp, ctx, STATE_RX_AACK_ON, at86rf230_async_error_recover_complete); } diff --git a/drivers/net/ieee802154/atusb.c b/drivers/net/ieee802154/atusb.c index 23ee0b14cbfa1f39f5d3a828d2cad130b456bffd..2f5e7b31032aa130f808491b85930b7be07577b9 100644 --- a/drivers/net/ieee802154/atusb.c +++ b/drivers/net/ieee802154/atusb.c @@ -93,7 +93,9 @@ static int atusb_control_msg(struct atusb *atusb, unsigned int pipe, ret = usb_control_msg(usb_dev, pipe, request, requesttype, value, index, data, size, timeout); - if (ret < 0) { + if (ret < size) { + ret = ret < 0 ? ret : -ENODATA; + atusb->err = ret; dev_err(&usb_dev->dev, "%s: req 0x%02x val 0x%x idx 0x%x, error %d\n", @@ -861,9 +863,9 @@ static int atusb_get_and_show_build(struct atusb *atusb) if (!build) return -ENOMEM; - ret = atusb_control_msg(atusb, usb_rcvctrlpipe(usb_dev, 0), - ATUSB_BUILD, ATUSB_REQ_FROM_DEV, 0, 0, - build, ATUSB_BUILD_SIZE, 1000); + /* We cannot call atusb_control_msg() here, since this request may read various length data */ + ret = usb_control_msg(atusb->usb_dev, usb_rcvctrlpipe(usb_dev, 0), ATUSB_BUILD, + ATUSB_REQ_FROM_DEV, 0, 0, build, ATUSB_BUILD_SIZE, 1000); if (ret >= 0) { build[ret] = 0; dev_info(&usb_dev->dev, "Firmware: build %s\n", build); diff --git a/drivers/net/ieee802154/ca8210.c b/drivers/net/ieee802154/ca8210.c index 4eb64709d44cb09865d23d9e34ea5d53a9cfe6b4..fd9f33c833fa367afd5a5a69e8c165f37ab3d70f 100644 --- a/drivers/net/ieee802154/ca8210.c +++ b/drivers/net/ieee802154/ca8210.c @@ -1771,6 +1771,7 @@ static int ca8210_async_xmit_complete( status ); if (status != MAC_TRANSACTION_OVERFLOW) { + dev_kfree_skb_any(priv->tx_skb); ieee802154_wake_queue(priv->hw); return 0; } @@ -2976,8 +2977,8 @@ static void ca8210_hw_setup(struct ieee802154_hw *ca8210_hw) ca8210_hw->phy->cca.opt = NL802154_CCA_OPT_ENERGY_CARRIER_AND; ca8210_hw->phy->cca_ed_level = -9800; ca8210_hw->phy->symbol_duration = 16; - ca8210_hw->phy->lifs_period = 40; - ca8210_hw->phy->sifs_period = 12; + ca8210_hw->phy->lifs_period = 40 * ca8210_hw->phy->symbol_duration; + ca8210_hw->phy->sifs_period = 12 * ca8210_hw->phy->symbol_duration; ca8210_hw->flags = IEEE802154_HW_AFILT | IEEE802154_HW_OMIT_CKSUM | diff --git a/drivers/net/ieee802154/mac802154_hwsim.c b/drivers/net/ieee802154/mac802154_hwsim.c index 080b15fc00601ef832c5c1090c8872c211037e4c..97981cf7661ad759d901ff667174fe63bfea0a1f 100644 --- a/drivers/net/ieee802154/mac802154_hwsim.c +++ b/drivers/net/ieee802154/mac802154_hwsim.c @@ -786,6 +786,7 @@ static int hwsim_add_one(struct genl_info *info, struct device *dev, goto err_pib; } + pib->channel = 13; rcu_assign_pointer(phy->pib, pib); phy->idx = idx; INIT_LIST_HEAD(&phy->edges); diff --git a/drivers/net/ieee802154/mcr20a.c b/drivers/net/ieee802154/mcr20a.c index 8dc04e2590b18b1b41239c4ebf8ac1382942a8d5..383231b85464252a9dfc6a20f938247e5b546959 100644 --- a/drivers/net/ieee802154/mcr20a.c +++ b/drivers/net/ieee802154/mcr20a.c @@ -976,8 +976,8 @@ static void mcr20a_hw_setup(struct mcr20a_local *lp) dev_dbg(printdev(lp), "%s\n", __func__); phy->symbol_duration = 16; - phy->lifs_period = 40; - phy->sifs_period = 12; + phy->lifs_period = 40 * phy->symbol_duration; + phy->sifs_period = 12 * phy->symbol_duration; hw->flags = IEEE802154_HW_TX_OMIT_CKSUM | IEEE802154_HW_AFILT | diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c index 7fe306e76281d926ab228dda471790a9b3634d61..db3a9b93d4db7ff736d9aeb41337b998144e1288 100644 --- a/drivers/net/ifb.c +++ b/drivers/net/ifb.c @@ -76,7 +76,9 @@ static void ifb_ri_tasklet(unsigned long _txp) while ((skb = __skb_dequeue(&txp->tq)) != NULL) { skb->redirected = 0; +#ifdef CONFIG_NET_CLS_ACT skb->tc_skip_classify = 1; +#endif u64_stats_update_begin(&txp->tsync); txp->tx_packets++; diff --git a/drivers/net/ipa/ipa_endpoint.c b/drivers/net/ipa/ipa_endpoint.c index b40b711cf4bd5911a365f8ccdaf5f19853f3afba..621648ce750b7efd60c328152d27cef3f90c0f5c 100644 --- a/drivers/net/ipa/ipa_endpoint.c +++ b/drivers/net/ipa/ipa_endpoint.c @@ -703,6 +703,7 @@ static void ipa_endpoint_init_hol_block_timer(struct ipa_endpoint *endpoint, u32 offset; u32 val; + /* This should only be changed when HOL_BLOCK_EN is disabled */ offset = IPA_REG_ENDP_INIT_HOL_BLOCK_TIMER_N_OFFSET(endpoint_id); val = ipa_reg_init_hol_block_timer_val(ipa, microseconds); iowrite32(val, ipa->reg_virt + offset); @@ -730,6 +731,7 @@ void ipa_endpoint_modem_hol_block_clear_all(struct ipa *ipa) if (endpoint->toward_ipa || endpoint->ee_id != GSI_EE_MODEM) continue; + ipa_endpoint_init_hol_block_enable(endpoint, false); ipa_endpoint_init_hol_block_timer(endpoint, 0); ipa_endpoint_init_hol_block_enable(endpoint, true); } @@ -899,27 +901,35 @@ static void ipa_endpoint_replenish(struct ipa_endpoint *endpoint, u32 count) struct gsi *gsi; u32 backlog; - if (!endpoint->replenish_enabled) { + if (!test_bit(IPA_REPLENISH_ENABLED, endpoint->replenish_flags)) { if (count) atomic_add(count, &endpoint->replenish_saved); return; } + /* If already active, just update the backlog */ + if (test_and_set_bit(IPA_REPLENISH_ACTIVE, endpoint->replenish_flags)) { + if (count) + atomic_add(count, &endpoint->replenish_backlog); + return; + } while (atomic_dec_not_zero(&endpoint->replenish_backlog)) if (ipa_endpoint_replenish_one(endpoint)) goto try_again_later; + + clear_bit(IPA_REPLENISH_ACTIVE, endpoint->replenish_flags); + if (count) atomic_add(count, &endpoint->replenish_backlog); return; try_again_later: - /* The last one didn't succeed, so fix the backlog */ - backlog = atomic_inc_return(&endpoint->replenish_backlog); + clear_bit(IPA_REPLENISH_ACTIVE, endpoint->replenish_flags); - if (count) - atomic_add(count, &endpoint->replenish_backlog); + /* The last one didn't succeed, so fix the backlog */ + backlog = atomic_add_return(count + 1, &endpoint->replenish_backlog); /* Whenever a receive buffer transaction completes we'll try to * replenish again. It's unlikely, but if we fail to supply even @@ -939,7 +949,7 @@ static void ipa_endpoint_replenish_enable(struct ipa_endpoint *endpoint) u32 max_backlog; u32 saved; - endpoint->replenish_enabled = true; + set_bit(IPA_REPLENISH_ENABLED, endpoint->replenish_flags); while ((saved = atomic_xchg(&endpoint->replenish_saved, 0))) atomic_add(saved, &endpoint->replenish_backlog); @@ -953,7 +963,7 @@ static void ipa_endpoint_replenish_disable(struct ipa_endpoint *endpoint) { u32 backlog; - endpoint->replenish_enabled = false; + clear_bit(IPA_REPLENISH_ENABLED, endpoint->replenish_flags); while ((backlog = atomic_xchg(&endpoint->replenish_backlog, 0))) atomic_add(backlog, &endpoint->replenish_saved); } @@ -1470,7 +1480,8 @@ static void ipa_endpoint_setup_one(struct ipa_endpoint *endpoint) /* RX transactions require a single TRE, so the maximum * backlog is the same as the maximum outstanding TREs. */ - endpoint->replenish_enabled = false; + clear_bit(IPA_REPLENISH_ENABLED, endpoint->replenish_flags); + clear_bit(IPA_REPLENISH_ACTIVE, endpoint->replenish_flags); atomic_set(&endpoint->replenish_saved, gsi_channel_tre_max(gsi, endpoint->channel_id)); atomic_set(&endpoint->replenish_backlog, 0); diff --git a/drivers/net/ipa/ipa_endpoint.h b/drivers/net/ipa/ipa_endpoint.h index 58a245de488e86298f2bf5eb515032cf3f00a6e9..823c4a1296587c225f65239e624fe1911b707a52 100644 --- a/drivers/net/ipa/ipa_endpoint.h +++ b/drivers/net/ipa/ipa_endpoint.h @@ -39,6 +39,19 @@ enum ipa_endpoint_name { #define IPA_ENDPOINT_MAX 32 /* Max supported by driver */ +/** + * enum ipa_replenish_flag: RX buffer replenish flags + * + * @IPA_REPLENISH_ENABLED: Whether receive buffer replenishing is enabled + * @IPA_REPLENISH_ACTIVE: Whether replenishing is underway + * @IPA_REPLENISH_COUNT: Number of defined replenish flags + */ +enum ipa_replenish_flag { + IPA_REPLENISH_ENABLED, + IPA_REPLENISH_ACTIVE, + IPA_REPLENISH_COUNT, /* Number of flags (must be last) */ +}; + /** * struct ipa_endpoint - IPA endpoint information * @channel_id: EP's GSI channel @@ -60,7 +73,7 @@ struct ipa_endpoint { struct net_device *netdev; /* Receive buffer replenishing for RX endpoints */ - bool replenish_enabled; + DECLARE_BITMAP(replenish_flags, IPA_REPLENISH_COUNT); u32 replenish_ready; atomic_t replenish_saved; atomic_t replenish_backlog; diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index c601d3df272202bd52d23888c59312cbf2e87b4e..789a124809e3c2b118bb376b9b011eabfabf64b1 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -3869,6 +3869,18 @@ static void macsec_common_dellink(struct net_device *dev, struct list_head *head struct macsec_dev *macsec = macsec_priv(dev); struct net_device *real_dev = macsec->real_dev; + /* If h/w offloading is available, propagate to the device */ + if (macsec_is_offloaded(macsec)) { + const struct macsec_ops *ops; + struct macsec_context ctx; + + ops = macsec_get_ops(netdev_priv(dev), &ctx); + if (ops) { + ctx.secy = &macsec->secy; + macsec_offload(ops->mdo_del_secy, &ctx); + } + } + unregister_netdevice_queue(dev, head); list_del_rcu(&macsec->secys); macsec_del_dev(macsec); @@ -3883,18 +3895,6 @@ static void macsec_dellink(struct net_device *dev, struct list_head *head) struct net_device *real_dev = macsec->real_dev; struct macsec_rxh_data *rxd = macsec_data_rtnl(real_dev); - /* If h/w offloading is available, propagate to the device */ - if (macsec_is_offloaded(macsec)) { - const struct macsec_ops *ops; - struct macsec_context ctx; - - ops = macsec_get_ops(netdev_priv(dev), &ctx); - if (ops) { - ctx.secy = &macsec->secy; - macsec_offload(ops->mdo_del_secy, &ctx); - } - } - macsec_common_dellink(dev, head); if (list_empty(&rxd->secys)) { @@ -4017,6 +4017,15 @@ static int macsec_newlink(struct net *net, struct net_device *dev, !macsec_check_offload(macsec->offload, macsec)) return -EOPNOTSUPP; + /* send_sci must be set to true when transmit sci explicitly is set */ + if ((data && data[IFLA_MACSEC_SCI]) && + (data && data[IFLA_MACSEC_INC_SCI])) { + u8 send_sci = !!nla_get_u8(data[IFLA_MACSEC_INC_SCI]); + + if (!send_sci) + return -EINVAL; + } + if (data && data[IFLA_MACSEC_ICV_LEN]) icv_len = nla_get_u8(data[IFLA_MACSEC_ICV_LEN]); mtu = real_dev->mtu - icv_len - macsec_extra_len(true); diff --git a/drivers/net/mdio/mdio-aspeed.c b/drivers/net/mdio/mdio-aspeed.c index cad820568f75119f70c234d9efe15cb54847bd7f..e2273588c75b6f3dd212269611d7f41a8769ed16 100644 --- a/drivers/net/mdio/mdio-aspeed.c +++ b/drivers/net/mdio/mdio-aspeed.c @@ -61,6 +61,13 @@ static int aspeed_mdio_read(struct mii_bus *bus, int addr, int regnum) iowrite32(ctrl, ctx->base + ASPEED_MDIO_CTRL); + rc = readl_poll_timeout(ctx->base + ASPEED_MDIO_CTRL, ctrl, + !(ctrl & ASPEED_MDIO_CTRL_FIRE), + ASPEED_MDIO_INTERVAL_US, + ASPEED_MDIO_TIMEOUT_US); + if (rc < 0) + return rc; + rc = readl_poll_timeout(ctx->base + ASPEED_MDIO_DATA, data, data & ASPEED_MDIO_DATA_IDLE, ASPEED_MDIO_INTERVAL_US, @@ -141,6 +148,7 @@ static const struct of_device_id aspeed_mdio_of_match[] = { { .compatible = "aspeed,ast2600-mdio", }, { }, }; +MODULE_DEVICE_TABLE(of, aspeed_mdio_of_match); static struct platform_driver aspeed_mdio_driver = { .driver = { diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c index 90aafb56f1409ed7be70dd5c4e8b4a8cce7f7dda..a438202129323860798fc23708d1105d1ad5d446 100644 --- a/drivers/net/netdevsim/bpf.c +++ b/drivers/net/netdevsim/bpf.c @@ -514,6 +514,7 @@ nsim_bpf_map_alloc(struct netdevsim *ns, struct bpf_offloaded_map *offmap) goto err_free; key = nmap->entry[i].key; *key = i; + memset(nmap->entry[i].value, 0, offmap->map.value_size); } } diff --git a/drivers/net/phy/bcm7xxx.c b/drivers/net/phy/bcm7xxx.c index 15812001b3ff0b3b25165938df10c4374443ea7e..115044e21c742c390985af3d8cfab62490b81d53 100644 --- a/drivers/net/phy/bcm7xxx.c +++ b/drivers/net/phy/bcm7xxx.c @@ -27,7 +27,12 @@ #define MII_BCM7XXX_SHD_2_ADDR_CTRL 0xe #define MII_BCM7XXX_SHD_2_CTRL_STAT 0xf #define MII_BCM7XXX_SHD_2_BIAS_TRIM 0x1a +#define MII_BCM7XXX_SHD_3_PCS_CTRL 0x0 +#define MII_BCM7XXX_SHD_3_PCS_STATUS 0x1 +#define MII_BCM7XXX_SHD_3_EEE_CAP 0x2 #define MII_BCM7XXX_SHD_3_AN_EEE_ADV 0x3 +#define MII_BCM7XXX_SHD_3_EEE_LP 0x4 +#define MII_BCM7XXX_SHD_3_EEE_WK_ERR 0x5 #define MII_BCM7XXX_SHD_3_PCS_CTRL_2 0x6 #define MII_BCM7XXX_PCS_CTRL_2_DEF 0x4400 #define MII_BCM7XXX_SHD_3_AN_STAT 0xb @@ -216,25 +221,37 @@ static int bcm7xxx_28nm_resume(struct phy_device *phydev) return genphy_config_aneg(phydev); } -static int phy_set_clr_bits(struct phy_device *dev, int location, - int set_mask, int clr_mask) +static int __phy_set_clr_bits(struct phy_device *dev, int location, + int set_mask, int clr_mask) { int v, ret; - v = phy_read(dev, location); + v = __phy_read(dev, location); if (v < 0) return v; v &= ~clr_mask; v |= set_mask; - ret = phy_write(dev, location, v); + ret = __phy_write(dev, location, v); if (ret < 0) return ret; return v; } +static int phy_set_clr_bits(struct phy_device *dev, int location, + int set_mask, int clr_mask) +{ + int ret; + + mutex_lock(&dev->mdio.bus->mdio_lock); + ret = __phy_set_clr_bits(dev, location, set_mask, clr_mask); + mutex_unlock(&dev->mdio.bus->mdio_lock); + + return ret; +} + static int bcm7xxx_28nm_ephy_01_afe_config_init(struct phy_device *phydev) { int ret; @@ -398,6 +415,93 @@ static int bcm7xxx_28nm_ephy_config_init(struct phy_device *phydev) return bcm7xxx_28nm_ephy_apd_enable(phydev); } +#define MII_BCM7XXX_REG_INVALID 0xff + +static u8 bcm7xxx_28nm_ephy_regnum_to_shd(u16 regnum) +{ + switch (regnum) { + case MDIO_CTRL1: + return MII_BCM7XXX_SHD_3_PCS_CTRL; + case MDIO_STAT1: + return MII_BCM7XXX_SHD_3_PCS_STATUS; + case MDIO_PCS_EEE_ABLE: + return MII_BCM7XXX_SHD_3_EEE_CAP; + case MDIO_AN_EEE_ADV: + return MII_BCM7XXX_SHD_3_AN_EEE_ADV; + case MDIO_AN_EEE_LPABLE: + return MII_BCM7XXX_SHD_3_EEE_LP; + case MDIO_PCS_EEE_WK_ERR: + return MII_BCM7XXX_SHD_3_EEE_WK_ERR; + default: + return MII_BCM7XXX_REG_INVALID; + } +} + +static bool bcm7xxx_28nm_ephy_dev_valid(int devnum) +{ + return devnum == MDIO_MMD_AN || devnum == MDIO_MMD_PCS; +} + +static int bcm7xxx_28nm_ephy_read_mmd(struct phy_device *phydev, + int devnum, u16 regnum) +{ + u8 shd = bcm7xxx_28nm_ephy_regnum_to_shd(regnum); + int ret; + + if (!bcm7xxx_28nm_ephy_dev_valid(devnum) || + shd == MII_BCM7XXX_REG_INVALID) + return -EOPNOTSUPP; + + /* set shadow mode 2 */ + ret = __phy_set_clr_bits(phydev, MII_BCM7XXX_TEST, + MII_BCM7XXX_SHD_MODE_2, 0); + if (ret < 0) + return ret; + + /* Access the desired shadow register address */ + ret = __phy_write(phydev, MII_BCM7XXX_SHD_2_ADDR_CTRL, shd); + if (ret < 0) + goto reset_shadow_mode; + + ret = __phy_read(phydev, MII_BCM7XXX_SHD_2_CTRL_STAT); + +reset_shadow_mode: + /* reset shadow mode 2 */ + __phy_set_clr_bits(phydev, MII_BCM7XXX_TEST, 0, + MII_BCM7XXX_SHD_MODE_2); + return ret; +} + +static int bcm7xxx_28nm_ephy_write_mmd(struct phy_device *phydev, + int devnum, u16 regnum, u16 val) +{ + u8 shd = bcm7xxx_28nm_ephy_regnum_to_shd(regnum); + int ret; + + if (!bcm7xxx_28nm_ephy_dev_valid(devnum) || + shd == MII_BCM7XXX_REG_INVALID) + return -EOPNOTSUPP; + + /* set shadow mode 2 */ + ret = __phy_set_clr_bits(phydev, MII_BCM7XXX_TEST, + MII_BCM7XXX_SHD_MODE_2, 0); + if (ret < 0) + return ret; + + /* Access the desired shadow register address */ + ret = __phy_write(phydev, MII_BCM7XXX_SHD_2_ADDR_CTRL, shd); + if (ret < 0) + goto reset_shadow_mode; + + /* Write the desired value in the shadow register */ + __phy_write(phydev, MII_BCM7XXX_SHD_2_CTRL_STAT, val); + +reset_shadow_mode: + /* reset shadow mode 2 */ + return __phy_set_clr_bits(phydev, MII_BCM7XXX_TEST, 0, + MII_BCM7XXX_SHD_MODE_2); +} + static int bcm7xxx_28nm_ephy_resume(struct phy_device *phydev) { int ret; @@ -595,6 +699,8 @@ static void bcm7xxx_28nm_remove(struct phy_device *phydev) .get_stats = bcm7xxx_28nm_get_phy_stats, \ .probe = bcm7xxx_28nm_probe, \ .remove = bcm7xxx_28nm_remove, \ + .read_mmd = bcm7xxx_28nm_ephy_read_mmd, \ + .write_mmd = bcm7xxx_28nm_ephy_write_mmd, \ } #define BCM7XXX_40NM_EPHY(_oui, _name) \ diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c index dbed15dc0fe777468c8e0a92514d53c6df9a2484..644861366d5442c7aaca5c29c7801ae73a70e61e 100644 --- a/drivers/net/phy/broadcom.c +++ b/drivers/net/phy/broadcom.c @@ -789,6 +789,7 @@ static struct phy_driver broadcom_drivers[] = { .phy_id_mask = 0xfffffff0, .name = "Broadcom BCM54616S", /* PHY_GBIT_FEATURES */ + .soft_reset = genphy_soft_reset, .config_init = bcm54xx_config_init, .config_aneg = bcm54616s_config_aneg, .ack_interrupt = bcm_phy_ack_intr, diff --git a/drivers/net/phy/dp83822.c b/drivers/net/phy/dp83822.c index 7bf43031cea8c6c533b05e68a8ecaf6fb0029500..3d75b98f3051d9b5a1c49db8f2aa7e791220aa32 100644 --- a/drivers/net/phy/dp83822.c +++ b/drivers/net/phy/dp83822.c @@ -289,7 +289,7 @@ static int dp83822_config_intr(struct phy_device *phydev) if (err < 0) return err; - err = phy_write(phydev, MII_DP83822_MISR1, 0); + err = phy_write(phydev, MII_DP83822_MISR2, 0); if (err < 0) return err; diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index 91616182c311f9733dc63bf2ff86f48caad4706f..cb9d1852a75c8c0f23d68583f3309230bb84c8e8 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -515,9 +515,9 @@ static int m88e1121_config_aneg_rgmii_delays(struct phy_device *phydev) else mscr = 0; - return phy_modify_paged(phydev, MII_MARVELL_MSCR_PAGE, - MII_88E1121_PHY_MSCR_REG, - MII_88E1121_PHY_MSCR_DELAY_MASK, mscr); + return phy_modify_paged_changed(phydev, MII_MARVELL_MSCR_PAGE, + MII_88E1121_PHY_MSCR_REG, + MII_88E1121_PHY_MSCR_DELAY_MASK, mscr); } static int m88e1121_config_aneg(struct phy_device *phydev) @@ -531,11 +531,13 @@ static int m88e1121_config_aneg(struct phy_device *phydev) return err; } + changed = err; + err = marvell_set_polarity(phydev, phydev->mdix_ctrl); if (err < 0) return err; - changed = err; + changed |= err; err = genphy_config_aneg(phydev); if (err < 0) @@ -1059,16 +1061,15 @@ static int m88e1118_config_aneg(struct phy_device *phydev) { int err; - err = genphy_soft_reset(phydev); + err = marvell_set_polarity(phydev, phydev->mdix_ctrl); if (err < 0) return err; - err = marvell_set_polarity(phydev, phydev->mdix_ctrl); + err = genphy_config_aneg(phydev); if (err < 0) return err; - err = genphy_config_aneg(phydev); - return 0; + return genphy_soft_reset(phydev); } static int m88e1118_config_init(struct phy_device *phydev) @@ -1090,6 +1091,12 @@ static int m88e1118_config_init(struct phy_device *phydev) if (err < 0) return err; + if (phy_interface_is_rgmii(phydev)) { + err = m88e1121_config_aneg_rgmii_delays(phydev); + if (err < 0) + return err; + } + /* Adjust LED Control */ if (phydev->dev_flags & MARVELL_PHY_M1118_DNS323_LEDS) err = phy_write(phydev, 0x10, 0x1100); diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index b848439fa837c607d52f3c01162d3134ad08d1a5..c416ab1d2b008899d85955435298acb31d14ca4f 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -534,6 +534,13 @@ int __mdiobus_register(struct mii_bus *bus, struct module *owner) bus->dev.groups = NULL; dev_set_name(&bus->dev, "%s", bus->id); + /* We need to set state to MDIOBUS_UNREGISTERED to correctly release + * the device in mdiobus_free() + * + * State will be updated later in this function in case of success + */ + bus->state = MDIOBUS_UNREGISTERED; + err = device_register(&bus->dev); if (err) { pr_err("mii_bus %s failed to register\n", bus->id); @@ -581,7 +588,7 @@ int __mdiobus_register(struct mii_bus *bus, struct module *owner) mdiobus_setup_mdiodev_from_board_info(bus, mdiobus_create_device); bus->state = MDIOBUS_REGISTERED; - pr_info("%s: probed\n", bus->name); + dev_dbg(&bus->dev, "probed\n"); return 0; error: diff --git a/drivers/net/phy/mdio_device.c b/drivers/net/phy/mdio_device.c index 0837319a52d755a8c124435b9a05d730425d5bbc..797c41f5590efe6e557e4b0babf69a1ecfc08135 100644 --- a/drivers/net/phy/mdio_device.c +++ b/drivers/net/phy/mdio_device.c @@ -179,6 +179,16 @@ static int mdio_remove(struct device *dev) return 0; } +static void mdio_shutdown(struct device *dev) +{ + struct mdio_device *mdiodev = to_mdio_device(dev); + struct device_driver *drv = mdiodev->dev.driver; + struct mdio_driver *mdiodrv = to_mdio_driver(drv); + + if (mdiodrv->shutdown) + mdiodrv->shutdown(mdiodev); +} + /** * mdio_driver_register - register an mdio_driver with the MDIO layer * @drv: new mdio_driver to register @@ -193,6 +203,7 @@ int mdio_driver_register(struct mdio_driver *drv) mdiodrv->driver.bus = &mdio_bus_type; mdiodrv->driver.probe = mdio_probe; mdiodrv->driver.remove = mdio_remove; + mdiodrv->driver.shutdown = mdio_shutdown; retval = driver_register(&mdiodrv->driver); if (retval) { diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 69b20a466c61c1f1bc7ff933b00b0b916b86b9ad..92e94ac94a342cacac58dc56c6e98ce55e000bbe 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -732,9 +732,9 @@ static int ksz9031_config_init(struct phy_device *phydev) MII_KSZ9031RN_TX_DATA_PAD_SKEW, 4, tx_data_skews, 4, &update); - if (update && phydev->interface != PHY_INTERFACE_MODE_RGMII) + if (update && !phy_interface_is_rgmii(phydev)) phydev_warn(phydev, - "*-skew-ps values should be used only with phy-mode = \"rgmii\"\n"); + "*-skew-ps values should be used only with RGMII PHY modes\n"); /* Silicon Errata Sheet (DS80000691D or DS80000692D): * When the device links in the 1000BASE-T slave mode only, @@ -1216,8 +1216,9 @@ static struct phy_driver ksphy_driver[] = { .get_sset_count = kszphy_get_sset_count, .get_strings = kszphy_get_strings, .get_stats = kszphy_get_stats, - .suspend = genphy_suspend, - .resume = genphy_resume, + /* No suspend/resume callbacks because of errata DS80000700A, + * receiver error following software power down. + */ }, { .phy_id = PHY_ID_KSZ8041RNLI, .phy_id_mask = MICREL_PHY_ID_MASK, diff --git a/drivers/net/phy/phy-core.c b/drivers/net/phy/phy-core.c index 8d333d3084ed32a7c345656555b2b59376bb8f98..cccb83dae673ba1db8db6ea226fb51df49dd9c9a 100644 --- a/drivers/net/phy/phy-core.c +++ b/drivers/net/phy/phy-core.c @@ -161,11 +161,11 @@ static const struct phy_setting settings[] = { PHY_SETTING( 2500, FULL, 2500baseT_Full ), PHY_SETTING( 2500, FULL, 2500baseX_Full ), /* 1G */ - PHY_SETTING( 1000, FULL, 1000baseKX_Full ), PHY_SETTING( 1000, FULL, 1000baseT_Full ), PHY_SETTING( 1000, HALF, 1000baseT_Half ), PHY_SETTING( 1000, FULL, 1000baseT1_Full ), PHY_SETTING( 1000, FULL, 1000baseX_Full ), + PHY_SETTING( 1000, FULL, 1000baseKX_Full ), /* 100M */ PHY_SETTING( 100, FULL, 100baseT_Full ), PHY_SETTING( 100, FULL, 100baseT1_Full ), diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 28ddaad721ed1a4827e36e0015c8266965f3a24b..db7866b6f75259e7de9fd07c0490ccc69e211336 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -260,62 +260,10 @@ static void phy_sanitize_settings(struct phy_device *phydev) } } -int phy_ethtool_ksettings_set(struct phy_device *phydev, - const struct ethtool_link_ksettings *cmd) -{ - __ETHTOOL_DECLARE_LINK_MODE_MASK(advertising); - u8 autoneg = cmd->base.autoneg; - u8 duplex = cmd->base.duplex; - u32 speed = cmd->base.speed; - - if (cmd->base.phy_address != phydev->mdio.addr) - return -EINVAL; - - linkmode_copy(advertising, cmd->link_modes.advertising); - - /* We make sure that we don't pass unsupported values in to the PHY */ - linkmode_and(advertising, advertising, phydev->supported); - - /* Verify the settings we care about. */ - if (autoneg != AUTONEG_ENABLE && autoneg != AUTONEG_DISABLE) - return -EINVAL; - - if (autoneg == AUTONEG_ENABLE && linkmode_empty(advertising)) - return -EINVAL; - - if (autoneg == AUTONEG_DISABLE && - ((speed != SPEED_1000 && - speed != SPEED_100 && - speed != SPEED_10) || - (duplex != DUPLEX_HALF && - duplex != DUPLEX_FULL))) - return -EINVAL; - - phydev->autoneg = autoneg; - - if (autoneg == AUTONEG_DISABLE) { - phydev->speed = speed; - phydev->duplex = duplex; - } - - linkmode_copy(phydev->advertising, advertising); - - linkmode_mod_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, - phydev->advertising, autoneg == AUTONEG_ENABLE); - - phydev->master_slave_set = cmd->base.master_slave_cfg; - phydev->mdix_ctrl = cmd->base.eth_tp_mdix_ctrl; - - /* Restart the PHY */ - phy_start_aneg(phydev); - - return 0; -} -EXPORT_SYMBOL(phy_ethtool_ksettings_set); - void phy_ethtool_ksettings_get(struct phy_device *phydev, struct ethtool_link_ksettings *cmd) { + mutex_lock(&phydev->lock); linkmode_copy(cmd->link_modes.supported, phydev->supported); linkmode_copy(cmd->link_modes.advertising, phydev->advertising); linkmode_copy(cmd->link_modes.lp_advertising, phydev->lp_advertising); @@ -334,6 +282,7 @@ void phy_ethtool_ksettings_get(struct phy_device *phydev, cmd->base.autoneg = phydev->autoneg; cmd->base.eth_tp_mdix_ctrl = phydev->mdix_ctrl; cmd->base.eth_tp_mdix = phydev->mdix; + mutex_unlock(&phydev->lock); } EXPORT_SYMBOL(phy_ethtool_ksettings_get); @@ -767,7 +716,7 @@ static int phy_check_link_status(struct phy_device *phydev) } /** - * phy_start_aneg - start auto-negotiation for this PHY device + * _phy_start_aneg - start auto-negotiation for this PHY device * @phydev: the phy_device struct * * Description: Sanitizes the settings (if we're not autonegotiating @@ -775,25 +724,43 @@ static int phy_check_link_status(struct phy_device *phydev) * If the PHYCONTROL Layer is operating, we change the state to * reflect the beginning of Auto-negotiation or forcing. */ -int phy_start_aneg(struct phy_device *phydev) +static int _phy_start_aneg(struct phy_device *phydev) { int err; + lockdep_assert_held(&phydev->lock); + if (!phydev->drv) return -EIO; - mutex_lock(&phydev->lock); - if (AUTONEG_DISABLE == phydev->autoneg) phy_sanitize_settings(phydev); err = phy_config_aneg(phydev); if (err < 0) - goto out_unlock; + return err; if (phy_is_started(phydev)) err = phy_check_link_status(phydev); -out_unlock: + + return err; +} + +/** + * phy_start_aneg - start auto-negotiation for this PHY device + * @phydev: the phy_device struct + * + * Description: Sanitizes the settings (if we're not autonegotiating + * them), and then calls the driver's config_aneg function. + * If the PHYCONTROL Layer is operating, we change the state to + * reflect the beginning of Auto-negotiation or forcing. + */ +int phy_start_aneg(struct phy_device *phydev) +{ + int err; + + mutex_lock(&phydev->lock); + err = _phy_start_aneg(phydev); mutex_unlock(&phydev->lock); return err; @@ -816,6 +783,66 @@ static int phy_poll_aneg_done(struct phy_device *phydev) return ret < 0 ? ret : 0; } +int phy_ethtool_ksettings_set(struct phy_device *phydev, + const struct ethtool_link_ksettings *cmd) +{ + __ETHTOOL_DECLARE_LINK_MODE_MASK(advertising); + u8 autoneg = cmd->base.autoneg; + u8 duplex = cmd->base.duplex; + u32 speed = cmd->base.speed; + + if (cmd->base.phy_address != phydev->mdio.addr) + return -EINVAL; + + linkmode_copy(advertising, cmd->link_modes.advertising); + + /* We make sure that we don't pass unsupported values in to the PHY */ + linkmode_and(advertising, advertising, phydev->supported); + + /* Verify the settings we care about. */ + if (autoneg != AUTONEG_ENABLE && autoneg != AUTONEG_DISABLE) + return -EINVAL; + + if (autoneg == AUTONEG_ENABLE && linkmode_empty(advertising)) + return -EINVAL; + + if (autoneg == AUTONEG_DISABLE && + ((speed != SPEED_1000 && + speed != SPEED_100 && + speed != SPEED_10) || + (duplex != DUPLEX_HALF && + duplex != DUPLEX_FULL))) + return -EINVAL; + + mutex_lock(&phydev->lock); + phydev->autoneg = autoneg; + + if (autoneg == AUTONEG_DISABLE) { + phydev->speed = speed; + phydev->duplex = duplex; + } + + linkmode_copy(phydev->advertising, advertising); + + linkmode_mod_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, + phydev->advertising, autoneg == AUTONEG_ENABLE); + + phydev->master_slave_set = cmd->base.master_slave_cfg; + phydev->mdix_ctrl = cmd->base.eth_tp_mdix_ctrl; + + /* Restart the PHY */ + if (phy_is_started(phydev)) { + phydev->state = PHY_UP; + phy_trigger_machine(phydev); + } else { + _phy_start_aneg(phydev); + } + + mutex_unlock(&phydev->lock); + return 0; +} +EXPORT_SYMBOL(phy_ethtool_ksettings_set); + /** * phy_speed_down - set speed to lowest speed supported by both link partners * @phydev: the phy_device struct diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 85f3cde5ffd09d9bc05956b47f23339b9c1f891c..d2f6d8107595a673d11b17e42ed98dcf2c27bfdc 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1682,6 +1682,9 @@ void phy_detach(struct phy_device *phydev) phy_driver_is_genphy_10g(phydev)) device_release_driver(&phydev->mdio.dev); + /* Assert the reset signal */ + phy_device_reset(phydev, 1); + /* * The phydev might go away on the put_device() below, so avoid * a use-after-free bug by reading the underlying bus first. @@ -1693,9 +1696,6 @@ void phy_detach(struct phy_device *phydev) ndev_owner = dev->dev.parent->driver->owner; if (ndev_owner != bus->owner) module_put(bus->owner); - - /* Assert the reset signal */ - phy_device_reset(phydev, 1); } EXPORT_SYMBOL(phy_detach); diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 025c3246f33967dceea18b782aa1a9184ea69dab..57b1b138522e0c3e079900fcb49efef6a3eda5db 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -644,6 +644,7 @@ static void phylink_resolve(struct work_struct *w) struct phylink_link_state link_state; struct net_device *ndev = pl->netdev; bool mac_config = false; + bool retrigger = false; bool cur_link_state; mutex_lock(&pl->state_mutex); @@ -657,6 +658,7 @@ static void phylink_resolve(struct work_struct *w) link_state.link = false; } else if (pl->mac_link_dropped) { link_state.link = false; + retrigger = true; } else { switch (pl->cur_link_an_mode) { case MLO_AN_PHY: @@ -673,6 +675,19 @@ static void phylink_resolve(struct work_struct *w) case MLO_AN_INBAND: phylink_mac_pcs_get_state(pl, &link_state); + /* The PCS may have a latching link-fail indicator. + * If the link was up, bring the link down and + * re-trigger the resolve. Otherwise, re-read the + * PCS state to get the current status of the link. + */ + if (!link_state.link) { + if (cur_link_state) + retrigger = true; + else + phylink_mac_pcs_get_state(pl, + &link_state); + } + /* If we have a phy, the "up" state is the union of * both the PHY and the MAC */ if (pl->phydev) @@ -680,6 +695,15 @@ static void phylink_resolve(struct work_struct *w) /* Only update if the PHY link is up */ if (pl->phydev && pl->phy_state.link) { + /* If the interface has changed, force a + * link down event if the link isn't already + * down, and re-resolve. + */ + if (link_state.interface != + pl->phy_state.interface) { + retrigger = true; + link_state.link = false; + } link_state.interface = pl->phy_state.interface; /* If we have a PHY, we need to update with @@ -721,7 +745,7 @@ static void phylink_resolve(struct work_struct *w) else phylink_link_up(pl, link_state); } - if (!link_state.link && pl->mac_link_dropped) { + if (!link_state.link && retrigger) { pl->mac_link_dropped = false; queue_work(system_power_efficient_wq, &pl->resolve); } @@ -1610,7 +1634,7 @@ int phylink_ethtool_set_pauseparam(struct phylink *pl, return -EOPNOTSUPP; if (!phylink_test(pl->supported, Asym_Pause) && - !pause->autoneg && pause->rx_pause != pause->tx_pause) + pause->rx_pause != pause->tx_pause) return -EINVAL; pause_state = 0; diff --git a/drivers/net/phy/sfp-bus.c b/drivers/net/phy/sfp-bus.c index 4cf874fb5c5b414bc76b190d1d355a75df5aff76..a05d8372669c15b179fcd6abb55e794b9ce4909c 100644 --- a/drivers/net/phy/sfp-bus.c +++ b/drivers/net/phy/sfp-bus.c @@ -609,6 +609,11 @@ struct sfp_bus *sfp_bus_find_fwnode(struct fwnode_handle *fwnode) else if (ret < 0) return ERR_PTR(ret); + if (!fwnode_device_is_available(ref.fwnode)) { + fwnode_handle_put(ref.fwnode); + return NULL; + } + bus = sfp_bus_get(ref.fwnode); fwnode_handle_put(ref.fwnode); if (!bus) diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index 2fff62695455dbe6ef248fa1b0212e61ea21abd5..efffa65f8214313efde976e20ed95d85f191359e 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -133,7 +133,7 @@ static const char * const sm_state_strings[] = { [SFP_S_LINK_UP] = "link_up", [SFP_S_TX_FAULT] = "tx_fault", [SFP_S_REINIT] = "reinit", - [SFP_S_TX_DISABLE] = "rx_disable", + [SFP_S_TX_DISABLE] = "tx_disable", }; static const char *sm_state_to_str(unsigned short sm_state) @@ -1589,17 +1589,20 @@ static int sfp_sm_probe_for_phy(struct sfp *sfp) static int sfp_module_parse_power(struct sfp *sfp) { u32 power_mW = 1000; + bool supports_a2; if (sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_POWER_DECL)) power_mW = 1500; if (sfp->id.ext.options & cpu_to_be16(SFP_OPTIONS_HIGH_POWER_LEVEL)) power_mW = 2000; + supports_a2 = sfp->id.ext.sff8472_compliance != + SFP_SFF8472_COMPLIANCE_NONE || + sfp->id.ext.diagmon & SFP_DIAGMON_DDM; + if (power_mW > sfp->max_power_mW) { /* Module power specification exceeds the allowed maximum. */ - if (sfp->id.ext.sff8472_compliance == - SFP_SFF8472_COMPLIANCE_NONE && - !(sfp->id.ext.diagmon & SFP_DIAGMON_DDM)) { + if (!supports_a2) { /* The module appears not to implement bus address * 0xa2, so assume that the module powers up in the * indicated mode. @@ -1616,11 +1619,25 @@ static int sfp_module_parse_power(struct sfp *sfp) } } + if (power_mW <= 1000) { + /* Modules below 1W do not require a power change sequence */ + sfp->module_power_mW = power_mW; + return 0; + } + + if (!supports_a2) { + /* The module power level is below the host maximum and the + * module appears not to implement bus address 0xa2, so assume + * that the module powers up in the indicated mode. + */ + return 0; + } + /* If the module requires a higher power mode, but also requires * an address change sequence, warn the user that the module may * not be functional. */ - if (sfp->id.ext.diagmon & SFP_DIAGMON_ADDRMODE && power_mW > 1000) { + if (sfp->id.ext.diagmon & SFP_DIAGMON_ADDRMODE) { dev_warn(sfp->dev, "Address Change Sequence not supported but module requires %u.%uW, module may not be functional\n", power_mW / 1000, (power_mW / 100) % 10); diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index 33b2e0fb68bbbae1f2c27564e425764672a14a3c..2b9815ec4a622ff9b03805ae06f5daec40fa151e 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -69,6 +69,8 @@ #define MPHDRLEN 6 /* multilink protocol header length */ #define MPHDRLEN_SSN 4 /* ditto with short sequence numbers */ +#define PPP_PROTO_LEN 2 + /* * An instance of /dev/ppp can be associated with either a ppp * interface unit or a ppp channel. In both cases, file->private_data @@ -496,6 +498,9 @@ static ssize_t ppp_write(struct file *file, const char __user *buf, if (!pf) return -ENXIO; + /* All PPP packets should start with the 2-byte protocol */ + if (count < PPP_PROTO_LEN) + return -EINVAL; ret = -ENOMEM; skb = alloc_skb(count + pf->hdrlen, GFP_KERNEL); if (!skb) @@ -1632,7 +1637,7 @@ ppp_send_frame(struct ppp *ppp, struct sk_buff *skb) } ++ppp->stats64.tx_packets; - ppp->stats64.tx_bytes += skb->len - 2; + ppp->stats64.tx_bytes += skb->len - PPP_PROTO_LEN; switch (proto) { case PPP_IP: diff --git a/drivers/net/tun.c b/drivers/net/tun.c index c671d8e257741f3c66c7697d8e5f38f931fe5369..ffbc7eda95eed1614edaa916761c6fd870f6c599 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1021,6 +1021,7 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) { struct tun_struct *tun = netdev_priv(dev); int txq = skb->queue_mapping; + struct netdev_queue *queue; struct tun_file *tfile; int len = skb->len; @@ -1065,6 +1066,10 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) if (ptr_ring_produce(&tfile->tx_ring, skb)) goto drop; + /* NETIF_F_LLTX requires to do our own update of trans_start */ + queue = netdev_get_tx_queue(dev, txq); + queue->trans_start = jiffies; + /* Notify and wake up reader process */ if (tfile->flags & TUN_FASYNC) kill_fasync(&tfile->fasync, SIGIO, POLL_IN); diff --git a/drivers/net/usb/Kconfig b/drivers/net/usb/Kconfig index b46993d5f9978bd787d7ba3e29389e9daa7c9782..867ff2ee8ecf378a9bea3b8831385ca541f9a1f1 100644 --- a/drivers/net/usb/Kconfig +++ b/drivers/net/usb/Kconfig @@ -99,6 +99,10 @@ config USB_RTL8150 config USB_RTL8152 tristate "Realtek RTL8152/RTL8153 Based USB Ethernet Adapters" select MII + select CRC32 + select CRYPTO + select CRYPTO_HASH + select CRYPTO_SHA256 help This option adds support for Realtek RTL8152 based USB 2.0 10/100 Ethernet adapters and RTL8153 based USB 3.0 10/100/1000 @@ -113,6 +117,7 @@ config USB_LAN78XX select PHYLIB select MICROCHIP_PHY select FIXED_PHY + select CRC32 help This option adds support for Microchip LAN78XX based USB 2 & USB 3 10/100/1000 Ethernet adapters. diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c index b77b0a33d697dd3295b44a5543707db21649d627..0b0cbcee1920bdabe037345450c89e22e3853fa2 100644 --- a/drivers/net/usb/ax88179_178a.c +++ b/drivers/net/usb/ax88179_178a.c @@ -1467,58 +1467,68 @@ static int ax88179_rx_fixup(struct usbnet *dev, struct sk_buff *skb) u16 hdr_off; u32 *pkt_hdr; - /* This check is no longer done by usbnet */ - if (skb->len < dev->net->hard_header_len) + /* At the end of the SKB, there's a header telling us how many packets + * are bundled into this buffer and where we can find an array of + * per-packet metadata (which contains elements encoded into u16). + */ + if (skb->len < 4) return 0; - skb_trim(skb, skb->len - 4); rx_hdr = get_unaligned_le32(skb_tail_pointer(skb)); - pkt_cnt = (u16)rx_hdr; hdr_off = (u16)(rx_hdr >> 16); + + if (pkt_cnt == 0) + return 0; + + /* Make sure that the bounds of the metadata array are inside the SKB + * (and in front of the counter at the end). + */ + if (pkt_cnt * 2 + hdr_off > skb->len) + return 0; pkt_hdr = (u32 *)(skb->data + hdr_off); - while (pkt_cnt--) { + /* Packets must not overlap the metadata array */ + skb_trim(skb, hdr_off); + + for (; ; pkt_cnt--, pkt_hdr++) { u16 pkt_len; le32_to_cpus(pkt_hdr); pkt_len = (*pkt_hdr >> 16) & 0x1fff; - /* Check CRC or runt packet */ - if ((*pkt_hdr & AX_RXHDR_CRC_ERR) || - (*pkt_hdr & AX_RXHDR_DROP_ERR)) { - skb_pull(skb, (pkt_len + 7) & 0xFFF8); - pkt_hdr++; - continue; - } - - if (pkt_cnt == 0) { - skb->len = pkt_len; - /* Skip IP alignment pseudo header */ - skb_pull(skb, 2); - skb_set_tail_pointer(skb, skb->len); - skb->truesize = pkt_len + sizeof(struct sk_buff); - ax88179_rx_checksum(skb, pkt_hdr); - return 1; - } + if (pkt_len > skb->len) + return 0; - ax_skb = skb_clone(skb, GFP_ATOMIC); - if (ax_skb) { + /* Check CRC or runt packet */ + if (((*pkt_hdr & (AX_RXHDR_CRC_ERR | AX_RXHDR_DROP_ERR)) == 0) && + pkt_len >= 2 + ETH_HLEN) { + bool last = (pkt_cnt == 0); + + if (last) { + ax_skb = skb; + } else { + ax_skb = skb_clone(skb, GFP_ATOMIC); + if (!ax_skb) + return 0; + } ax_skb->len = pkt_len; /* Skip IP alignment pseudo header */ skb_pull(ax_skb, 2); skb_set_tail_pointer(ax_skb, ax_skb->len); ax_skb->truesize = pkt_len + sizeof(struct sk_buff); ax88179_rx_checksum(ax_skb, pkt_hdr); + + if (last) + return 1; + usbnet_skb_return(dev, ax_skb); - } else { - return 0; } - skb_pull(skb, (pkt_len + 7) & 0xFFF8); - pkt_hdr++; + /* Trim this packet away from the SKB */ + if (!skb_pull(skb, (pkt_len + 7) & 0xFFF8)) + return 0; } - return 1; } static struct sk_buff * diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c index 6aaa0675c28a397c30991375f22256e9209bb2a3..43ddbe61dc58e140015e43317efcd7893fdb1693 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -570,6 +570,11 @@ static const struct usb_device_id products[] = { .bInterfaceSubClass = USB_CDC_SUBCLASS_ETHERNET, \ .bInterfaceProtocol = USB_CDC_PROTO_NONE +#define ZAURUS_FAKE_INTERFACE \ + .bInterfaceClass = USB_CLASS_COMM, \ + .bInterfaceSubClass = USB_CDC_SUBCLASS_MDLM, \ + .bInterfaceProtocol = USB_CDC_PROTO_NONE + /* SA-1100 based Sharp Zaurus ("collie"), or compatible; * wire-incompatible with true CDC Ethernet implementations. * (And, it seems, needlessly so...) @@ -623,6 +628,13 @@ static const struct usb_device_id products[] = { .idProduct = 0x9032, /* SL-6000 */ ZAURUS_MASTER_INTERFACE, .driver_info = 0, +}, { + .match_flags = USB_DEVICE_ID_MATCH_INT_INFO + | USB_DEVICE_ID_MATCH_DEVICE, + .idVendor = 0x04DD, + .idProduct = 0x9032, /* SL-6000 */ + ZAURUS_FAKE_INTERFACE, + .driver_info = 0, }, { .match_flags = USB_DEVICE_ID_MATCH_INT_INFO | USB_DEVICE_ID_MATCH_DEVICE, diff --git a/drivers/net/usb/cdc_mbim.c b/drivers/net/usb/cdc_mbim.c index 77ac5a721e7b6e6d8e1b31b3fc11547bb793a2d3..414341c9cf5ae17dc4f0ff14f02f37468d57c78a 100644 --- a/drivers/net/usb/cdc_mbim.c +++ b/drivers/net/usb/cdc_mbim.c @@ -658,6 +658,11 @@ static const struct usb_device_id mbim_devs[] = { .driver_info = (unsigned long)&cdc_mbim_info_avoid_altsetting_toggle, }, + /* Telit FN990 */ + { USB_DEVICE_AND_INTERFACE_INFO(0x1bc7, 0x1071, USB_CLASS_COMM, USB_CDC_SUBCLASS_MBIM, USB_CDC_PROTO_NONE), + .driver_info = (unsigned long)&cdc_mbim_info_avoid_altsetting_toggle, + }, + /* default entry */ { USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_MBIM, USB_CDC_PROTO_NONE), .driver_info = (unsigned long)&cdc_mbim_info_zlp, diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index 016d058f6967f5ece88fd0e2a920338e8f071fc1..913d91381f23cc63bc009f378d4489c1d3d21147 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -181,6 +181,8 @@ static u32 cdc_ncm_check_tx_max(struct usbnet *dev, u32 new_tx) min = ctx->max_datagram_size + ctx->max_ndp_size + sizeof(struct usb_cdc_ncm_nth32); max = min_t(u32, CDC_NCM_NTB_MAX_SIZE_TX, le32_to_cpu(ctx->ncm_parm.dwNtbOutMaxSize)); + if (max == 0) + max = CDC_NCM_NTB_MAX_SIZE_TX; /* dwNtbOutMaxSize not set */ /* some devices set dwNtbOutMaxSize too low for the above default */ min = min(min, max); @@ -1700,10 +1702,10 @@ int cdc_ncm_rx_fixup(struct usbnet *dev, struct sk_buff *skb_in) { struct sk_buff *skb; struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0]; - int len; + unsigned int len; int nframes; int x; - int offset; + unsigned int offset; union { struct usb_cdc_ncm_ndp16 *ndp16; struct usb_cdc_ncm_ndp32 *ndp32; @@ -1775,8 +1777,8 @@ next_ndp: break; } - /* sanity checking */ - if (((offset + len) > skb_in->len) || + /* sanity checking - watch out for integer wrap*/ + if ((offset > skb_in->len) || (len > skb_in->len - offset) || (len > ctx->rx_max) || (len < ETH_HLEN)) { netif_dbg(dev, rx_err, dev->net, "invalid frame detected (ignored) offset[%u]=%u, length=%u, skb=%p\n", diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c index df8d4c1e5be74e6a655ef540a66c87373d401c07..db484215a78c890a7a3f0dd5501334df102e7a1e 100644 --- a/drivers/net/usb/hso.c +++ b/drivers/net/usb/hso.c @@ -2354,7 +2354,7 @@ static int remove_net_device(struct hso_device *hso_dev) } /* Frees our network device */ -static void hso_free_net_device(struct hso_device *hso_dev, bool bailout) +static void hso_free_net_device(struct hso_device *hso_dev) { int i; struct hso_net *hso_net = dev2net(hso_dev); @@ -2377,7 +2377,7 @@ static void hso_free_net_device(struct hso_device *hso_dev, bool bailout) kfree(hso_net->mux_bulk_tx_buf); hso_net->mux_bulk_tx_buf = NULL; - if (hso_net->net && !bailout) + if (hso_net->net) free_netdev(hso_net->net); kfree(hso_dev); @@ -3137,7 +3137,7 @@ static void hso_free_interface(struct usb_interface *interface) rfkill_unregister(rfk); rfkill_destroy(rfk); } - hso_free_net_device(network_table[i], false); + hso_free_net_device(network_table[i]); } } } diff --git a/drivers/net/usb/ipheth.c b/drivers/net/usb/ipheth.c index 207e59e74935aaf4b23add136b611d3138a6349b..06d9f19ca142a69a7c70d4200cd0d7455cbb7d4e 100644 --- a/drivers/net/usb/ipheth.c +++ b/drivers/net/usb/ipheth.c @@ -121,7 +121,7 @@ static int ipheth_alloc_urbs(struct ipheth_device *iphone) if (tx_buf == NULL) goto free_rx_urb; - rx_buf = usb_alloc_coherent(iphone->udev, IPHETH_BUF_SIZE, + rx_buf = usb_alloc_coherent(iphone->udev, IPHETH_BUF_SIZE + IPHETH_IP_ALIGN, GFP_KERNEL, &rx_urb->transfer_dma); if (rx_buf == NULL) goto free_tx_buf; @@ -146,7 +146,7 @@ error_nomem: static void ipheth_free_urbs(struct ipheth_device *iphone) { - usb_free_coherent(iphone->udev, IPHETH_BUF_SIZE, iphone->rx_buf, + usb_free_coherent(iphone->udev, IPHETH_BUF_SIZE + IPHETH_IP_ALIGN, iphone->rx_buf, iphone->rx_urb->transfer_dma); usb_free_coherent(iphone->udev, IPHETH_BUF_SIZE, iphone->tx_buf, iphone->tx_urb->transfer_dma); @@ -317,7 +317,7 @@ static int ipheth_rx_submit(struct ipheth_device *dev, gfp_t mem_flags) usb_fill_bulk_urb(dev->rx_urb, udev, usb_rcvbulkpipe(udev, dev->bulk_in), - dev->rx_buf, IPHETH_BUF_SIZE, + dev->rx_buf, IPHETH_BUF_SIZE + IPHETH_IP_ALIGN, ipheth_rcvbulk_callback, dev); dev->rx_urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index a5cd42bae962174f6d17bbf8c6891eac1fb09b22..6f7b70522d926b610d273a36a57e83efa032f15a 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -64,6 +64,8 @@ #define LAN7801_USB_PRODUCT_ID (0x7801) #define LAN78XX_EEPROM_MAGIC (0x78A5) #define LAN78XX_OTP_MAGIC (0x78F3) +#define AT29M2AF_USB_VENDOR_ID (0x07C9) +#define AT29M2AF_USB_PRODUCT_ID (0x0012) #define MII_READ 1 #define MII_WRITE 0 @@ -2128,7 +2130,7 @@ static int lan78xx_phy_init(struct lan78xx_net *dev) if (dev->domain_data.phyirq > 0) phydev->irq = dev->domain_data.phyirq; else - phydev->irq = 0; + phydev->irq = PHY_POLL; netdev_dbg(dev->net, "phydev->irq = %d\n", phydev->irq); /* set to AUTOMDIX */ @@ -3745,6 +3747,12 @@ static int lan78xx_probe(struct usb_interface *intf, dev->maxpacket = usb_maxpacket(dev->udev, dev->pipe_out, 1); + /* Reject broken descriptors. */ + if (dev->maxpacket == 0) { + ret = -ENODEV; + goto out4; + } + /* driver requires remote-wakeup capability during autosuspend. */ intf->needs_remote_wakeup = 1; @@ -4136,6 +4144,10 @@ static const struct usb_device_id products[] = { /* LAN7801 USB Gigabit Ethernet Device */ USB_DEVICE(LAN78XX_USB_VENDOR_ID, LAN7801_USB_PRODUCT_ID), }, + { + /* ATM2-AF USB Gigabit Ethernet Device */ + USB_DEVICE(AT29M2AF_USB_VENDOR_ID, AT29M2AF_USB_PRODUCT_ID), + }, {}, }; MODULE_DEVICE_TABLE(usb, products); diff --git a/drivers/net/usb/mcs7830.c b/drivers/net/usb/mcs7830.c index 09bfa6a4dfbc17ecd40bc1fd37f23a8e88b75afe..7e40e2e2f37230c43ac875997485e23741abb420 100644 --- a/drivers/net/usb/mcs7830.c +++ b/drivers/net/usb/mcs7830.c @@ -108,8 +108,16 @@ static const char driver_name[] = "MOSCHIP usb-ethernet driver"; static int mcs7830_get_reg(struct usbnet *dev, u16 index, u16 size, void *data) { - return usbnet_read_cmd(dev, MCS7830_RD_BREQ, MCS7830_RD_BMREQ, - 0x0000, index, data, size); + int ret; + + ret = usbnet_read_cmd(dev, MCS7830_RD_BREQ, MCS7830_RD_BMREQ, + 0x0000, index, data, size); + if (ret < 0) + return ret; + else if (ret < size) + return -ENODATA; + + return ret; } static int mcs7830_set_reg(struct usbnet *dev, u16 index, u16 size, const void *data) diff --git a/drivers/net/usb/pegasus.c b/drivers/net/usb/pegasus.c index 2a748a924f838dad5e32e48a87c80df07899994c..138279bbb544b35aa5c1abbd9d9e83e86fef2d51 100644 --- a/drivers/net/usb/pegasus.c +++ b/drivers/net/usb/pegasus.c @@ -518,11 +518,11 @@ static void read_bulk_callback(struct urb *urb) goto goon; rx_status = buf[count - 2]; - if (rx_status & 0x1e) { + if (rx_status & 0x1c) { netif_dbg(pegasus, rx_err, net, "RX packet error %x\n", rx_status); net->stats.rx_errors++; - if (rx_status & 0x06) /* long or runt */ + if (rx_status & 0x04) /* runt */ net->stats.rx_length_errors++; if (rx_status & 0x08) net->stats.rx_crc_errors++; diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 6e033ba717030e9c26a366b21f0bb74e2c078bc3..597766d14563e07242d76a5ae19b2321d0f5f3c6 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1333,6 +1333,8 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x413c, 0x81d7, 0)}, /* Dell Wireless 5821e */ {QMI_FIXED_INTF(0x413c, 0x81d7, 1)}, /* Dell Wireless 5821e preproduction config */ {QMI_FIXED_INTF(0x413c, 0x81e0, 0)}, /* Dell Wireless 5821e with eSIM support*/ + {QMI_FIXED_INTF(0x413c, 0x81e4, 0)}, /* Dell Wireless 5829e with eSIM support*/ + {QMI_FIXED_INTF(0x413c, 0x81e6, 0)}, /* Dell Wireless 5829e */ {QMI_FIXED_INTF(0x03f0, 0x4e1d, 8)}, /* HP lt4111 LTE/EV-DO/HSPA+ Gobi 4G Module */ {QMI_FIXED_INTF(0x03f0, 0x9d1d, 1)}, /* HP lt4120 Snapdragon X5 LTE */ {QMI_FIXED_INTF(0x22de, 0x9061, 3)}, /* WeTelecom WPD-600N */ diff --git a/drivers/net/usb/rndis_host.c b/drivers/net/usb/rndis_host.c index f9b359d4e29398379e95063dbd88b3d44e9acbc3..1505fe3f87ed395a62b2a1d71e60fe98581a6c24 100644 --- a/drivers/net/usb/rndis_host.c +++ b/drivers/net/usb/rndis_host.c @@ -608,6 +608,11 @@ static const struct usb_device_id products [] = { USB_DEVICE_AND_INTERFACE_INFO(0x1630, 0x0042, USB_CLASS_COMM, 2 /* ACM */, 0x0ff), .driver_info = (unsigned long) &rndis_poll_status_info, +}, { + /* Hytera Communications DMR radios' "Radio to PC Network" */ + USB_VENDOR_AND_INTERFACE_INFO(0x238b, + USB_CLASS_COMM, 2 /* ACM */, 0x0ff), + .driver_info = (unsigned long)&rndis_info, }, { /* RNDIS is MSFT's un-official variant of CDC ACM */ USB_INTERFACE_INFO(USB_CLASS_COMM, 2 /* ACM */, 0x0ff), diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c index ea0d5f04dc3a80c2703c03c92a57bd0de903b626..465e11dcdf12938d938a966b560a2fbbe20d6bd5 100644 --- a/drivers/net/usb/smsc95xx.c +++ b/drivers/net/usb/smsc95xx.c @@ -1178,7 +1178,10 @@ static void smsc95xx_unbind(struct usbnet *dev, struct usb_interface *intf) static void smsc95xx_handle_link_change(struct net_device *net) { + struct usbnet *dev = netdev_priv(net); + phy_print_status(net->phydev); + usbnet_defer_kevent(dev, EVENT_LINK_CHANGE); } static int smsc95xx_start_phy(struct usbnet *dev) diff --git a/drivers/net/usb/sr9700.c b/drivers/net/usb/sr9700.c index e04c8054c2cf3c7c3f5c3a065b48e99d299b5501..fce6713e970badda0337a49a518c07d88ecd5fa9 100644 --- a/drivers/net/usb/sr9700.c +++ b/drivers/net/usb/sr9700.c @@ -410,7 +410,7 @@ static int sr9700_rx_fixup(struct usbnet *dev, struct sk_buff *skb) /* ignore the CRC length */ len = (skb->data[1] | (skb->data[2] << 8)) - 4; - if (len > ETH_FRAME_LEN) + if (len > ETH_FRAME_LEN || len > skb->len) return 0; /* the last packet of current skb */ diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 6062dc27870e728ceb0a2ae559bae9782bee237b..402390b1a66b5ba5420df24259a8dac073da679a 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -1755,6 +1755,11 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod) if (!dev->rx_urb_size) dev->rx_urb_size = dev->hard_mtu; dev->maxpacket = usb_maxpacket (dev->udev, dev->out, 1); + if (dev->maxpacket == 0) { + /* that is a broken device */ + status = -ENODEV; + goto out4; + } /* let userspace know we have a random address */ if (ether_addr_equal(net->dev_addr, node_id)) diff --git a/drivers/net/usb/zaurus.c b/drivers/net/usb/zaurus.c index 8e717a0b559b3aac42e94c0ea1c776737c79527b..7984f2157d222dbe2971702221dac46b2a370721 100644 --- a/drivers/net/usb/zaurus.c +++ b/drivers/net/usb/zaurus.c @@ -256,6 +256,11 @@ static const struct usb_device_id products [] = { .bInterfaceSubClass = USB_CDC_SUBCLASS_ETHERNET, \ .bInterfaceProtocol = USB_CDC_PROTO_NONE +#define ZAURUS_FAKE_INTERFACE \ + .bInterfaceClass = USB_CLASS_COMM, \ + .bInterfaceSubClass = USB_CDC_SUBCLASS_MDLM, \ + .bInterfaceProtocol = USB_CDC_PROTO_NONE + /* SA-1100 based Sharp Zaurus ("collie"), or compatible. */ { .match_flags = USB_DEVICE_ID_MATCH_INT_INFO @@ -313,6 +318,13 @@ static const struct usb_device_id products [] = { .idProduct = 0x9032, /* SL-6000 */ ZAURUS_MASTER_INTERFACE, .driver_info = ZAURUS_PXA_INFO, +}, { + .match_flags = USB_DEVICE_ID_MATCH_INT_INFO + | USB_DEVICE_ID_MATCH_DEVICE, + .idVendor = 0x04DD, + .idProduct = 0x9032, /* SL-6000 */ + ZAURUS_FAKE_INTERFACE, + .driver_info = (unsigned long)&bogus_mdlm_info, }, { .match_flags = USB_DEVICE_ID_MATCH_INT_INFO | USB_DEVICE_ID_MATCH_DEVICE, diff --git a/drivers/net/veth.c b/drivers/net/veth.c index be18b243642f0f1c4ac4f296a59f2ec9f07a410f..f7e3eb309a26ede481f7d257bb4e10e94f53a331 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -256,9 +256,10 @@ static void __veth_xdp_flush(struct veth_rq *rq) { /* Write ptr_ring before reading rx_notify_masked */ smp_mb(); - if (!rq->rx_notify_masked) { - rq->rx_notify_masked = true; - napi_schedule(&rq->xdp_napi); + if (!READ_ONCE(rq->rx_notify_masked) && + napi_schedule_prep(&rq->xdp_napi)) { + WRITE_ONCE(rq->rx_notify_masked, true); + __napi_schedule(&rq->xdp_napi); } } @@ -301,7 +302,6 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev) if (rxq < rcv->real_num_rx_queues) { rq = &rcv_priv->rq[rxq]; rcv_xdp = rcu_access_pointer(rq->xdp_prog); - skb_record_rx_queue(skb, rxq); } skb_tx_timestamp(skb); @@ -853,8 +853,10 @@ static int veth_poll(struct napi_struct *napi, int budget) /* Write rx_notify_masked before reading ptr_ring */ smp_store_mb(rq->rx_notify_masked, false); if (unlikely(!__ptr_ring_empty(&rq->xdp_ring))) { - rq->rx_notify_masked = true; - napi_schedule(&rq->xdp_napi); + if (napi_schedule_prep(&rq->xdp_napi)) { + WRITE_ONCE(rq->rx_notify_masked, true); + __napi_schedule(&rq->xdp_napi); + } } } diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index 336504b7531d9099eac648abf483511c696e4061..932a39945cc625fea6b5f4e65dae96f4b65f81f5 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -3765,7 +3765,6 @@ vmxnet3_suspend(struct device *device) vmxnet3_free_intr_resources(adapter); netif_device_detach(netdev); - netif_tx_stop_all_queues(netdev); /* Create wake-up filters. */ pmConf = adapter->pm_conf; diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index d406da82b4fb86f84eb270fdd8c615462b91598e..8ab0b5a8dfeff378da9e9d3fa70c91e24cdfbaab 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -34,6 +34,7 @@ #include #include #include +#include #define DRV_NAME "vrf" #define DRV_VERSION "1.1" @@ -423,12 +424,26 @@ static int vrf_local_xmit(struct sk_buff *skb, struct net_device *dev, return NETDEV_TX_OK; } +static void vrf_nf_set_untracked(struct sk_buff *skb) +{ + if (skb_get_nfct(skb) == 0) + nf_ct_set(skb, NULL, IP_CT_UNTRACKED); +} + +static void vrf_nf_reset_ct(struct sk_buff *skb) +{ + if (skb_get_nfct(skb) == IP_CT_UNTRACKED) + nf_reset_ct(skb); +} + #if IS_ENABLED(CONFIG_IPV6) static int vrf_ip6_local_out(struct net *net, struct sock *sk, struct sk_buff *skb) { int err; + vrf_nf_reset_ct(skb); + err = nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb, NULL, skb_dst(skb)->dev, dst_output); @@ -482,6 +497,7 @@ static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb, /* strip the ethernet header added for pass through VRF device */ __skb_pull(skb, skb_network_offset(skb)); + memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); ret = vrf_ip6_local_out(net, skb->sk, skb); if (unlikely(net_xmit_eval(ret))) dev->stats.tx_errors++; @@ -508,6 +524,8 @@ static int vrf_ip_local_out(struct net *net, struct sock *sk, { int err; + vrf_nf_reset_ct(skb); + err = nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, net, sk, skb, NULL, skb_dst(skb)->dev, dst_output); if (likely(err == 1)) @@ -563,6 +581,7 @@ static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb, RT_SCOPE_LINK); } + memset(IPCB(skb), 0, sizeof(*IPCB(skb))); ret = vrf_ip_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb); if (unlikely(net_xmit_eval(ret))) vrf_dev->stats.tx_errors++; @@ -627,8 +646,7 @@ static void vrf_finish_direct(struct sk_buff *skb) skb_pull(skb, ETH_HLEN); } - /* reset skb device */ - nf_reset_ct(skb); + vrf_nf_reset_ct(skb); } #if IS_ENABLED(CONFIG_IPV6) @@ -642,7 +660,7 @@ static int vrf_finish_output6(struct net *net, struct sock *sk, struct neighbour *neigh; int ret; - nf_reset_ct(skb); + vrf_nf_reset_ct(skb); skb->protocol = htons(ETH_P_IPV6); skb->dev = dev; @@ -773,6 +791,8 @@ static struct sk_buff *vrf_ip6_out(struct net_device *vrf_dev, if (rt6_need_strict(&ipv6_hdr(skb)->daddr)) return skb; + vrf_nf_set_untracked(skb); + if (qdisc_tx_is_default(vrf_dev) || IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) return vrf_ip6_out_direct(vrf_dev, sk, skb); @@ -860,7 +880,7 @@ static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *s bool is_v6gw = false; int ret = -EINVAL; - nf_reset_ct(skb); + vrf_nf_reset_ct(skb); /* Be paranoid, rather than too clever. */ if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) { @@ -1009,6 +1029,8 @@ static struct sk_buff *vrf_ip_out(struct net_device *vrf_dev, ipv4_is_lbcast(ip_hdr(skb)->daddr)) return skb; + vrf_nf_set_untracked(skb); + if (qdisc_tx_is_default(vrf_dev) || IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) return vrf_ip_out_direct(vrf_dev, sk, skb); @@ -1313,8 +1335,6 @@ static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev, bool need_strict = rt6_need_strict(&ipv6_hdr(skb)->daddr); bool is_ndisc = ipv6_ndisc_frame(skb); - nf_reset_ct(skb); - /* loopback, multicast & non-ND link-local traffic; do not push through * packet taps again. Reset pkt_type for upper layers to process skb. * For strict packets with a source LLA, determine the dst using the @@ -1371,8 +1391,6 @@ static struct sk_buff *vrf_ip_rcv(struct net_device *vrf_dev, skb->skb_iif = vrf_dev->ifindex; IPCB(skb)->flags |= IPSKB_L3SLAVE; - nf_reset_ct(skb); - if (ipv4_is_multicast(ip_hdr(skb)->daddr)) goto out; diff --git a/drivers/net/wireguard/allowedips.c b/drivers/net/wireguard/allowedips.c index b7197e80f2264053d4e4e28bdf69a33038335294..9a4c8ff32d9dd9407ec50591a44008570f4e7411 100644 --- a/drivers/net/wireguard/allowedips.c +++ b/drivers/net/wireguard/allowedips.c @@ -163,7 +163,7 @@ static bool node_placement(struct allowedips_node __rcu *trie, const u8 *key, return exact; } -static inline void connect_node(struct allowedips_node **parent, u8 bit, struct allowedips_node *node) +static inline void connect_node(struct allowedips_node __rcu **parent, u8 bit, struct allowedips_node *node) { node->parent_bit_packed = (unsigned long)parent | bit; rcu_assign_pointer(*parent, node); diff --git a/drivers/net/wireguard/device.c b/drivers/net/wireguard/device.c index e01ab0742738a369e847aa55bc8b9f02e10b00d5..e189eb95678d81567eb26b22bf382ad347f7c259 100644 --- a/drivers/net/wireguard/device.c +++ b/drivers/net/wireguard/device.c @@ -98,6 +98,7 @@ static int wg_stop(struct net_device *dev) { struct wg_device *wg = netdev_priv(dev); struct wg_peer *peer; + struct sk_buff *skb; mutex_lock(&wg->device_update_lock); list_for_each_entry(peer, &wg->peer_list, peer_list) { @@ -108,7 +109,9 @@ static int wg_stop(struct net_device *dev) wg_noise_reset_last_sent_handshake(&peer->last_sent_handshake); } mutex_unlock(&wg->device_update_lock); - skb_queue_purge(&wg->incoming_handshakes); + while ((skb = ptr_ring_consume(&wg->handshake_queue.ring)) != NULL) + kfree_skb(skb); + atomic_set(&wg->handshake_queue_len, 0); wg_socket_reinit(wg, NULL, NULL); return 0; } @@ -235,14 +238,13 @@ static void wg_destruct(struct net_device *dev) destroy_workqueue(wg->handshake_receive_wq); destroy_workqueue(wg->handshake_send_wq); destroy_workqueue(wg->packet_crypt_wq); - wg_packet_queue_free(&wg->decrypt_queue); - wg_packet_queue_free(&wg->encrypt_queue); + wg_packet_queue_free(&wg->handshake_queue, true); + wg_packet_queue_free(&wg->decrypt_queue, false); + wg_packet_queue_free(&wg->encrypt_queue, false); rcu_barrier(); /* Wait for all the peers to be actually freed. */ wg_ratelimiter_uninit(); memzero_explicit(&wg->static_identity, sizeof(wg->static_identity)); - skb_queue_purge(&wg->incoming_handshakes); free_percpu(dev->tstats); - free_percpu(wg->incoming_handshakes_worker); kvfree(wg->index_hashtable); kvfree(wg->peer_hashtable); mutex_unlock(&wg->device_update_lock); @@ -298,7 +300,6 @@ static int wg_newlink(struct net *src_net, struct net_device *dev, init_rwsem(&wg->static_identity.lock); mutex_init(&wg->socket_update_lock); mutex_init(&wg->device_update_lock); - skb_queue_head_init(&wg->incoming_handshakes); wg_allowedips_init(&wg->peer_allowedips); wg_cookie_checker_init(&wg->cookie_checker, wg); INIT_LIST_HEAD(&wg->peer_list); @@ -316,16 +317,10 @@ static int wg_newlink(struct net *src_net, struct net_device *dev, if (!dev->tstats) goto err_free_index_hashtable; - wg->incoming_handshakes_worker = - wg_packet_percpu_multicore_worker_alloc( - wg_packet_handshake_receive_worker, wg); - if (!wg->incoming_handshakes_worker) - goto err_free_tstats; - wg->handshake_receive_wq = alloc_workqueue("wg-kex-%s", WQ_CPU_INTENSIVE | WQ_FREEZABLE, 0, dev->name); if (!wg->handshake_receive_wq) - goto err_free_incoming_handshakes; + goto err_free_tstats; wg->handshake_send_wq = alloc_workqueue("wg-kex-%s", WQ_UNBOUND | WQ_FREEZABLE, 0, dev->name); @@ -347,10 +342,15 @@ static int wg_newlink(struct net *src_net, struct net_device *dev, if (ret < 0) goto err_free_encrypt_queue; - ret = wg_ratelimiter_init(); + ret = wg_packet_queue_init(&wg->handshake_queue, wg_packet_handshake_receive_worker, + MAX_QUEUED_INCOMING_HANDSHAKES); if (ret < 0) goto err_free_decrypt_queue; + ret = wg_ratelimiter_init(); + if (ret < 0) + goto err_free_handshake_queue; + ret = register_netdevice(dev); if (ret < 0) goto err_uninit_ratelimiter; @@ -367,18 +367,18 @@ static int wg_newlink(struct net *src_net, struct net_device *dev, err_uninit_ratelimiter: wg_ratelimiter_uninit(); +err_free_handshake_queue: + wg_packet_queue_free(&wg->handshake_queue, false); err_free_decrypt_queue: - wg_packet_queue_free(&wg->decrypt_queue); + wg_packet_queue_free(&wg->decrypt_queue, false); err_free_encrypt_queue: - wg_packet_queue_free(&wg->encrypt_queue); + wg_packet_queue_free(&wg->encrypt_queue, false); err_destroy_packet_crypt: destroy_workqueue(wg->packet_crypt_wq); err_destroy_handshake_send: destroy_workqueue(wg->handshake_send_wq); err_destroy_handshake_receive: destroy_workqueue(wg->handshake_receive_wq); -err_free_incoming_handshakes: - free_percpu(wg->incoming_handshakes_worker); err_free_tstats: free_percpu(dev->tstats); err_free_index_hashtable: @@ -398,6 +398,7 @@ static struct rtnl_link_ops link_ops __read_mostly = { static void wg_netns_pre_exit(struct net *net) { struct wg_device *wg; + struct wg_peer *peer; rtnl_lock(); list_for_each_entry(wg, &device_list, device_list) { @@ -407,6 +408,8 @@ static void wg_netns_pre_exit(struct net *net) mutex_lock(&wg->device_update_lock); rcu_assign_pointer(wg->creating_net, NULL); wg_socket_reinit(wg, NULL, NULL); + list_for_each_entry(peer, &wg->peer_list, peer_list) + wg_socket_clear_peer_endpoint_src(peer); mutex_unlock(&wg->device_update_lock); } } diff --git a/drivers/net/wireguard/device.h b/drivers/net/wireguard/device.h index 854bc3d97150e1c1dab3befbe64966add4f65746..43c7cebbf50b08f2a1868f0017d0bee8aee700f8 100644 --- a/drivers/net/wireguard/device.h +++ b/drivers/net/wireguard/device.h @@ -39,21 +39,18 @@ struct prev_queue { struct wg_device { struct net_device *dev; - struct crypt_queue encrypt_queue, decrypt_queue; + struct crypt_queue encrypt_queue, decrypt_queue, handshake_queue; struct sock __rcu *sock4, *sock6; struct net __rcu *creating_net; struct noise_static_identity static_identity; - struct workqueue_struct *handshake_receive_wq, *handshake_send_wq; - struct workqueue_struct *packet_crypt_wq; - struct sk_buff_head incoming_handshakes; - int incoming_handshake_cpu; - struct multicore_worker __percpu *incoming_handshakes_worker; + struct workqueue_struct *packet_crypt_wq,*handshake_receive_wq, *handshake_send_wq; struct cookie_checker cookie_checker; struct pubkey_hashtable *peer_hashtable; struct index_hashtable *index_hashtable; struct allowedips peer_allowedips; struct mutex device_update_lock, socket_update_lock; struct list_head device_list, peer_list; + atomic_t handshake_queue_len; unsigned int num_peers, device_update_gen; u32 fwmark; u16 incoming_port; diff --git a/drivers/net/wireguard/queueing.c b/drivers/net/wireguard/queueing.c index 48e7b982a30736bc147712ce52957517e3e862af..1de413b19e3424a2ace2edcbcf0d0d49c4be6167 100644 --- a/drivers/net/wireguard/queueing.c +++ b/drivers/net/wireguard/queueing.c @@ -38,11 +38,11 @@ int wg_packet_queue_init(struct crypt_queue *queue, work_func_t function, return 0; } -void wg_packet_queue_free(struct crypt_queue *queue) +void wg_packet_queue_free(struct crypt_queue *queue, bool purge) { free_percpu(queue->worker); - WARN_ON(!__ptr_ring_empty(&queue->ring)); - ptr_ring_cleanup(&queue->ring, NULL); + WARN_ON(!purge && !__ptr_ring_empty(&queue->ring)); + ptr_ring_cleanup(&queue->ring, purge ? (void(*)(void*))kfree_skb : NULL); } #define NEXT(skb) ((skb)->prev) diff --git a/drivers/net/wireguard/queueing.h b/drivers/net/wireguard/queueing.h index 4ef2944a68bc906ebec5167d1e17e281ea67be61..e2388107f7fdc9c040841adfc164459e0c777d7d 100644 --- a/drivers/net/wireguard/queueing.h +++ b/drivers/net/wireguard/queueing.h @@ -23,7 +23,7 @@ struct sk_buff; /* queueing.c APIs: */ int wg_packet_queue_init(struct crypt_queue *queue, work_func_t function, unsigned int len); -void wg_packet_queue_free(struct crypt_queue *queue); +void wg_packet_queue_free(struct crypt_queue *queue, bool purge); struct multicore_worker __percpu * wg_packet_percpu_multicore_worker_alloc(work_func_t function, void *ptr); diff --git a/drivers/net/wireguard/ratelimiter.c b/drivers/net/wireguard/ratelimiter.c index 3fedd1d21f5ee019917a7280294cfc7398e65b11..dd55e5c26f468f71518cc5956f4b84480af5d9c8 100644 --- a/drivers/net/wireguard/ratelimiter.c +++ b/drivers/net/wireguard/ratelimiter.c @@ -176,12 +176,12 @@ int wg_ratelimiter_init(void) (1U << 14) / sizeof(struct hlist_head))); max_entries = table_size * 8; - table_v4 = kvzalloc(table_size * sizeof(*table_v4), GFP_KERNEL); + table_v4 = kvcalloc(table_size, sizeof(*table_v4), GFP_KERNEL); if (unlikely(!table_v4)) goto err_kmemcache; #if IS_ENABLED(CONFIG_IPV6) - table_v6 = kvzalloc(table_size * sizeof(*table_v6), GFP_KERNEL); + table_v6 = kvcalloc(table_size, sizeof(*table_v6), GFP_KERNEL); if (unlikely(!table_v6)) { kvfree(table_v4); goto err_kmemcache; diff --git a/drivers/net/wireguard/receive.c b/drivers/net/wireguard/receive.c index 7dc84bcca26139991be00759c0228d3126df2671..7b8df406c7737398f0270361afcb196af4b6a76e 100644 --- a/drivers/net/wireguard/receive.c +++ b/drivers/net/wireguard/receive.c @@ -116,8 +116,8 @@ static void wg_receive_handshake_packet(struct wg_device *wg, return; } - under_load = skb_queue_len(&wg->incoming_handshakes) >= - MAX_QUEUED_INCOMING_HANDSHAKES / 8; + under_load = atomic_read(&wg->handshake_queue_len) >= + MAX_QUEUED_INCOMING_HANDSHAKES / 8; if (under_load) { last_under_load = ktime_get_coarse_boottime_ns(); } else if (last_under_load) { @@ -212,13 +212,14 @@ static void wg_receive_handshake_packet(struct wg_device *wg, void wg_packet_handshake_receive_worker(struct work_struct *work) { - struct wg_device *wg = container_of(work, struct multicore_worker, - work)->ptr; + struct crypt_queue *queue = container_of(work, struct multicore_worker, work)->ptr; + struct wg_device *wg = container_of(queue, struct wg_device, handshake_queue); struct sk_buff *skb; - while ((skb = skb_dequeue(&wg->incoming_handshakes)) != NULL) { + while ((skb = ptr_ring_consume_bh(&queue->ring)) != NULL) { wg_receive_handshake_packet(wg, skb); dev_kfree_skb(skb); + atomic_dec(&wg->handshake_queue_len); cond_resched(); } } @@ -553,22 +554,28 @@ void wg_packet_receive(struct wg_device *wg, struct sk_buff *skb) case cpu_to_le32(MESSAGE_HANDSHAKE_INITIATION): case cpu_to_le32(MESSAGE_HANDSHAKE_RESPONSE): case cpu_to_le32(MESSAGE_HANDSHAKE_COOKIE): { - int cpu; - - if (skb_queue_len(&wg->incoming_handshakes) > - MAX_QUEUED_INCOMING_HANDSHAKES || - unlikely(!rng_is_initialized())) { + int cpu, ret = -EBUSY; + + if (unlikely(!rng_is_initialized())) + goto drop; + if (atomic_read(&wg->handshake_queue_len) > MAX_QUEUED_INCOMING_HANDSHAKES / 2) { + if (spin_trylock_bh(&wg->handshake_queue.ring.producer_lock)) { + ret = __ptr_ring_produce(&wg->handshake_queue.ring, skb); + spin_unlock_bh(&wg->handshake_queue.ring.producer_lock); + } + } else + ret = ptr_ring_produce_bh(&wg->handshake_queue.ring, skb); + if (ret) { + drop: net_dbg_skb_ratelimited("%s: Dropping handshake packet from %pISpfsc\n", wg->dev->name, skb); goto err; } - skb_queue_tail(&wg->incoming_handshakes, skb); - /* Queues up a call to packet_process_queued_handshake_ - * packets(skb): - */ - cpu = wg_cpumask_next_online(&wg->incoming_handshake_cpu); + atomic_inc(&wg->handshake_queue_len); + cpu = wg_cpumask_next_online(&wg->handshake_queue.last_cpu); + /* Queues up a call to packet_process_queued_handshake_packets(skb): */ queue_work_on(cpu, wg->handshake_receive_wq, - &per_cpu_ptr(wg->incoming_handshakes_worker, cpu)->work); + &per_cpu_ptr(wg->handshake_queue.worker, cpu)->work); break; } case cpu_to_le32(MESSAGE_DATA): diff --git a/drivers/net/wireguard/socket.c b/drivers/net/wireguard/socket.c index c8cd385d233b6785ac17eee3483bcaea40e53c42..52b9bc83abcbcca9007795fee738f1ad564e84e3 100644 --- a/drivers/net/wireguard/socket.c +++ b/drivers/net/wireguard/socket.c @@ -308,7 +308,7 @@ void wg_socket_clear_peer_endpoint_src(struct wg_peer *peer) { write_lock_bh(&peer->endpoint_lock); memset(&peer->endpoint.src6, 0, sizeof(peer->endpoint.src6)); - dst_cache_reset(&peer->endpoint_cache); + dst_cache_reset_now(&peer->endpoint_cache); write_unlock_bh(&peer->endpoint_lock); } diff --git a/drivers/net/wireless/ath/ar5523/ar5523.c b/drivers/net/wireless/ath/ar5523/ar5523.c index 49cc4b7ed5163a2de35cd84c6987e813573aae90..1baec4b412c8dc7f0c3b1edbe4c1f445e5074704 100644 --- a/drivers/net/wireless/ath/ar5523/ar5523.c +++ b/drivers/net/wireless/ath/ar5523/ar5523.c @@ -153,6 +153,10 @@ static void ar5523_cmd_rx_cb(struct urb *urb) ar5523_err(ar, "Invalid reply to WDCMSG_TARGET_START"); return; } + if (!cmd->odata) { + ar5523_err(ar, "Unexpected WDCMSG_TARGET_START reply"); + return; + } memcpy(cmd->odata, hdr + 1, sizeof(u32)); cmd->olen = sizeof(u32); cmd->res = 0; diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c index d73ad60b571c2c41653cf8723f998a010bcebec8..d0967bb1f38718a9efbc2280fdec3e0f3c92e2f6 100644 --- a/drivers/net/wireless/ath/ath10k/core.c +++ b/drivers/net/wireless/ath/ath10k/core.c @@ -89,6 +89,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .rri_on_ddr = false, .hw_filter_reset_required = true, .fw_diag_ce_download = false, + .credit_size_workaround = false, .tx_stats_over_pktlog = true, }, { @@ -123,6 +124,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .rri_on_ddr = false, .hw_filter_reset_required = true, .fw_diag_ce_download = false, + .credit_size_workaround = false, .tx_stats_over_pktlog = true, }, { @@ -158,6 +160,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .rri_on_ddr = false, .hw_filter_reset_required = true, .fw_diag_ce_download = false, + .credit_size_workaround = false, .tx_stats_over_pktlog = false, }, { @@ -187,6 +190,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .num_wds_entries = 0x20, .uart_pin_workaround = true, .tx_stats_over_pktlog = false, + .credit_size_workaround = false, .bmi_large_size_download = true, .supports_peer_stats_info = true, }, @@ -222,6 +226,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .rri_on_ddr = false, .hw_filter_reset_required = true, .fw_diag_ce_download = false, + .credit_size_workaround = false, .tx_stats_over_pktlog = false, }, { @@ -256,6 +261,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .rri_on_ddr = false, .hw_filter_reset_required = true, .fw_diag_ce_download = false, + .credit_size_workaround = false, .tx_stats_over_pktlog = false, }, { @@ -290,6 +296,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .rri_on_ddr = false, .hw_filter_reset_required = true, .fw_diag_ce_download = false, + .credit_size_workaround = false, .tx_stats_over_pktlog = false, }, { @@ -327,6 +334,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .rri_on_ddr = false, .hw_filter_reset_required = true, .fw_diag_ce_download = true, + .credit_size_workaround = false, .tx_stats_over_pktlog = false, .supports_peer_stats_info = true, }, @@ -368,6 +376,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .rri_on_ddr = false, .hw_filter_reset_required = true, .fw_diag_ce_download = false, + .credit_size_workaround = false, .tx_stats_over_pktlog = false, }, { @@ -415,6 +424,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .rri_on_ddr = false, .hw_filter_reset_required = true, .fw_diag_ce_download = false, + .credit_size_workaround = false, .tx_stats_over_pktlog = false, }, { @@ -459,6 +469,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .rri_on_ddr = false, .hw_filter_reset_required = true, .fw_diag_ce_download = false, + .credit_size_workaround = false, .tx_stats_over_pktlog = false, }, { @@ -493,6 +504,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .rri_on_ddr = false, .hw_filter_reset_required = true, .fw_diag_ce_download = false, + .credit_size_workaround = false, .tx_stats_over_pktlog = false, }, { @@ -529,6 +541,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .rri_on_ddr = false, .hw_filter_reset_required = true, .fw_diag_ce_download = true, + .credit_size_workaround = false, .tx_stats_over_pktlog = false, }, { @@ -557,6 +570,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .ast_skid_limit = 0x10, .num_wds_entries = 0x20, .uart_pin_workaround = true, + .credit_size_workaround = true, }, { .id = QCA4019_HW_1_0_DEV_VERSION, @@ -597,6 +611,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .rri_on_ddr = false, .hw_filter_reset_required = true, .fw_diag_ce_download = false, + .credit_size_workaround = false, .tx_stats_over_pktlog = false, }, { @@ -624,6 +639,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .rri_on_ddr = true, .hw_filter_reset_required = false, .fw_diag_ce_download = false, + .credit_size_workaround = false, .tx_stats_over_pktlog = false, }, }; @@ -697,6 +713,7 @@ static void ath10k_send_suspend_complete(struct ath10k *ar) static int ath10k_init_sdio(struct ath10k *ar, enum ath10k_firmware_mode mode) { + bool mtu_workaround = ar->hw_params.credit_size_workaround; int ret; u32 param = 0; @@ -714,7 +731,7 @@ static int ath10k_init_sdio(struct ath10k *ar, enum ath10k_firmware_mode mode) param |= HI_ACS_FLAGS_SDIO_REDUCE_TX_COMPL_SET; - if (mode == ATH10K_FIRMWARE_MODE_NORMAL) + if (mode == ATH10K_FIRMWARE_MODE_NORMAL && !mtu_workaround) param |= HI_ACS_FLAGS_ALT_DATA_CREDIT_SIZE; else param &= ~HI_ACS_FLAGS_ALT_DATA_CREDIT_SIZE; diff --git a/drivers/net/wireless/ath/ath10k/htt_tx.c b/drivers/net/wireless/ath/ath10k/htt_tx.c index 1fc0a312ab587337c54ecf3eb7d9f11b648fd653..5f67da47036cf1be24ec705c8d56ef03f796cd4e 100644 --- a/drivers/net/wireless/ath/ath10k/htt_tx.c +++ b/drivers/net/wireless/ath/ath10k/htt_tx.c @@ -147,6 +147,9 @@ void ath10k_htt_tx_dec_pending(struct ath10k_htt *htt) htt->num_pending_tx--; if (htt->num_pending_tx == htt->max_num_pending_tx - 1) ath10k_mac_tx_unlock(htt->ar, ATH10K_TX_PAUSE_Q_FULL); + + if (htt->num_pending_tx == 0) + wake_up(&htt->empty_tx_wq); } int ath10k_htt_tx_inc_pending(struct ath10k_htt *htt) diff --git a/drivers/net/wireless/ath/ath10k/hw.h b/drivers/net/wireless/ath/ath10k/hw.h index c6ded21f5ed69bbb5a846115a12593a14291dfe4..d3ef83ad577dada610f557a757389518383d11a7 100644 --- a/drivers/net/wireless/ath/ath10k/hw.h +++ b/drivers/net/wireless/ath/ath10k/hw.h @@ -618,6 +618,9 @@ struct ath10k_hw_params { */ bool uart_pin_workaround; + /* Workaround for the credit size calculation */ + bool credit_size_workaround; + /* tx stats support over pktlog */ bool tx_stats_over_pktlog; diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index 36183fdfb7f03308a28cfa226f2a1e1108b24783..b59d482d9c23ef86a453d6887410f16a62a1434b 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -982,8 +982,12 @@ static void ath10k_mac_vif_beacon_cleanup(struct ath10k_vif *arvif) ath10k_mac_vif_beacon_free(arvif); if (arvif->beacon_buf) { - dma_free_coherent(ar->dev, IEEE80211_MAX_FRAME_LEN, - arvif->beacon_buf, arvif->beacon_paddr); + if (ar->bus_param.dev_type == ATH10K_DEV_TYPE_HL) + kfree(arvif->beacon_buf); + else + dma_free_coherent(ar->dev, IEEE80211_MAX_FRAME_LEN, + arvif->beacon_buf, + arvif->beacon_paddr); arvif->beacon_buf = NULL; } } @@ -1037,7 +1041,7 @@ static int ath10k_monitor_vdev_start(struct ath10k *ar, int vdev_id) arg.channel.min_power = 0; arg.channel.max_power = channel->max_power * 2; arg.channel.max_reg_power = channel->max_reg_power * 2; - arg.channel.max_antenna_gain = channel->max_antenna_gain * 2; + arg.channel.max_antenna_gain = channel->max_antenna_gain; reinit_completion(&ar->vdev_setup_done); reinit_completion(&ar->vdev_delete_done); @@ -1483,7 +1487,7 @@ static int ath10k_vdev_start_restart(struct ath10k_vif *arvif, arg.channel.min_power = 0; arg.channel.max_power = chandef->chan->max_power * 2; arg.channel.max_reg_power = chandef->chan->max_reg_power * 2; - arg.channel.max_antenna_gain = chandef->chan->max_antenna_gain * 2; + arg.channel.max_antenna_gain = chandef->chan->max_antenna_gain; if (arvif->vdev_type == WMI_VDEV_TYPE_AP) { arg.ssid = arvif->u.ap.ssid; @@ -3254,7 +3258,7 @@ static int ath10k_update_channel_list(struct ath10k *ar) ch->min_power = 0; ch->max_power = channel->max_power * 2; ch->max_reg_power = channel->max_reg_power * 2; - ch->max_antenna_gain = channel->max_antenna_gain * 2; + ch->max_antenna_gain = channel->max_antenna_gain; ch->reg_class_id = 0; /* FIXME */ /* FIXME: why use only legacy modes, why not any @@ -5466,10 +5470,25 @@ static int ath10k_add_interface(struct ieee80211_hw *hw, if (vif->type == NL80211_IFTYPE_ADHOC || vif->type == NL80211_IFTYPE_MESH_POINT || vif->type == NL80211_IFTYPE_AP) { - arvif->beacon_buf = dma_alloc_coherent(ar->dev, - IEEE80211_MAX_FRAME_LEN, - &arvif->beacon_paddr, - GFP_ATOMIC); + if (ar->bus_param.dev_type == ATH10K_DEV_TYPE_HL) { + arvif->beacon_buf = kmalloc(IEEE80211_MAX_FRAME_LEN, + GFP_KERNEL); + + /* Using a kernel pointer in place of a dma_addr_t + * token can lead to undefined behavior if that + * makes it into cache management functions. Use a + * known-invalid address token instead, which + * avoids the warning and makes it easier to catch + * bugs if it does end up getting used. + */ + arvif->beacon_paddr = DMA_MAPPING_ERROR; + } else { + arvif->beacon_buf = + dma_alloc_coherent(ar->dev, + IEEE80211_MAX_FRAME_LEN, + &arvif->beacon_paddr, + GFP_ATOMIC); + } if (!arvif->beacon_buf) { ret = -ENOMEM; ath10k_warn(ar, "failed to allocate beacon buffer: %d\n", @@ -5684,8 +5703,12 @@ err_vdev_delete: err: if (arvif->beacon_buf) { - dma_free_coherent(ar->dev, IEEE80211_MAX_FRAME_LEN, - arvif->beacon_buf, arvif->beacon_paddr); + if (ar->bus_param.dev_type == ATH10K_DEV_TYPE_HL) + kfree(arvif->beacon_buf); + else + dma_free_coherent(ar->dev, IEEE80211_MAX_FRAME_LEN, + arvif->beacon_buf, + arvif->beacon_paddr); arvif->beacon_buf = NULL; } diff --git a/drivers/net/wireless/ath/ath10k/sdio.c b/drivers/net/wireless/ath/ath10k/sdio.c index 81ddaafb6721c1dd19f1b750cfc0bfac4e9372f3..0fe639710a8bb4576ff0bc3026d3aefa7534693d 100644 --- a/drivers/net/wireless/ath/ath10k/sdio.c +++ b/drivers/net/wireless/ath/ath10k/sdio.c @@ -1363,8 +1363,11 @@ static void ath10k_rx_indication_async_work(struct work_struct *work) ep->ep_ops.ep_rx_complete(ar, skb); } - if (test_bit(ATH10K_FLAG_CORE_REGISTERED, &ar->dev_flags)) + if (test_bit(ATH10K_FLAG_CORE_REGISTERED, &ar->dev_flags)) { + local_bh_disable(); napi_schedule(&ar->napi); + local_bh_enable(); + } } static int ath10k_sdio_read_rtc_state(struct ath10k_sdio *ar_sdio, unsigned char *state) diff --git a/drivers/net/wireless/ath/ath10k/txrx.c b/drivers/net/wireless/ath/ath10k/txrx.c index aefe1f7f906c07d5eacaf918e069f187e5bbb59f..f51f1cf2c6a40eec433e5d415aec40ab165a3206 100644 --- a/drivers/net/wireless/ath/ath10k/txrx.c +++ b/drivers/net/wireless/ath/ath10k/txrx.c @@ -82,8 +82,6 @@ int ath10k_txrx_tx_unref(struct ath10k_htt *htt, flags = skb_cb->flags; ath10k_htt_tx_free_msdu_id(htt, tx_done->msdu_id); ath10k_htt_tx_dec_pending(htt); - if (htt->num_pending_tx == 0) - wake_up(&htt->empty_tx_wq); spin_unlock_bh(&htt->tx_lock); rcu_read_lock(); diff --git a/drivers/net/wireless/ath/ath10k/usb.c b/drivers/net/wireless/ath/ath10k/usb.c index 19b9c27e30e20977f0a6b4b65dcda8eddad2bcc8..3d98f19c6ec8aaffdf3aa65b7f3309892b18ab34 100644 --- a/drivers/net/wireless/ath/ath10k/usb.c +++ b/drivers/net/wireless/ath/ath10k/usb.c @@ -525,7 +525,7 @@ static int ath10k_usb_submit_ctrl_in(struct ath10k *ar, req, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, value, index, buf, - size, 2 * HZ); + size, 2000); if (ret < 0) { ath10k_warn(ar, "Failed to read usb control message: %d\n", @@ -853,6 +853,11 @@ static int ath10k_usb_setup_pipe_resources(struct ath10k *ar, le16_to_cpu(endpoint->wMaxPacketSize), endpoint->bInterval); } + + /* Ignore broken descriptors. */ + if (usb_endpoint_maxp(endpoint) == 0) + continue; + urbcount = 0; pipe_num = diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c index 37b53af760d766cb22a793c3f7440401cddf94ee..85fe855ece09738dddbae234ae387f670650103a 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.c +++ b/drivers/net/wireless/ath/ath10k/wmi.c @@ -2610,6 +2610,10 @@ int ath10k_wmi_event_mgmt_rx(struct ath10k *ar, struct sk_buff *skb) if (ieee80211_is_beacon(hdr->frame_control)) ath10k_mac_handle_beacon(ar, skb); + if (ieee80211_is_beacon(hdr->frame_control) || + ieee80211_is_probe_resp(hdr->frame_control)) + status->boottime_ns = ktime_get_boottime_ns(); + ath10k_dbg(ar, ATH10K_DBG_MGMT, "event mgmt rx skb %pK len %d ftype %02x stype %02x\n", skb, skb->len, diff --git a/drivers/net/wireless/ath/ath10k/wmi.h b/drivers/net/wireless/ath/ath10k/wmi.h index 66ecf09068c1974151f18e73a00a101d29ff7930..e244b7038e60621241dcfcfbb90897597ac322ba 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.h +++ b/drivers/net/wireless/ath/ath10k/wmi.h @@ -2066,7 +2066,9 @@ struct wmi_channel { union { __le32 reginfo1; struct { + /* note: power unit is 1 dBm */ u8 antenna_max; + /* note: power unit is 0.5 dBm */ u8 max_tx_power; } __packed; } __packed; @@ -2086,6 +2088,7 @@ struct wmi_channel_arg { u32 min_power; u32 max_power; u32 max_reg_power; + /* note: power unit is 1 dBm */ u32 max_antenna_gain; u32 reg_class_id; enum wmi_phy_mode mode; diff --git a/drivers/net/wireless/ath/ath11k/ahb.c b/drivers/net/wireless/ath/ath11k/ahb.c index 430723c64adce6d03db67ca2196348d5801affef..9ff6e685331424df67c8ea1ae10d1bdacb1c7988 100644 --- a/drivers/net/wireless/ath/ath11k/ahb.c +++ b/drivers/net/wireless/ath/ath11k/ahb.c @@ -175,8 +175,11 @@ static void __ath11k_ahb_ext_irq_disable(struct ath11k_base *ab) ath11k_ahb_ext_grp_disable(irq_grp); - napi_synchronize(&irq_grp->napi); - napi_disable(&irq_grp->napi); + if (irq_grp->napi_enabled) { + napi_synchronize(&irq_grp->napi); + napi_disable(&irq_grp->napi); + irq_grp->napi_enabled = false; + } } } @@ -206,13 +209,13 @@ static void ath11k_ahb_clearbit32(struct ath11k_base *ab, u8 bit, u32 offset) static void ath11k_ahb_ce_irq_enable(struct ath11k_base *ab, u16 ce_id) { - const struct ce_pipe_config *ce_config; + const struct ce_attr *ce_attr; - ce_config = &ab->hw_params.target_ce_config[ce_id]; - if (__le32_to_cpu(ce_config->pipedir) & PIPEDIR_OUT) + ce_attr = &ab->hw_params.host_ce_config[ce_id]; + if (ce_attr->src_nentries) ath11k_ahb_setbit32(ab, ce_id, CE_HOST_IE_ADDRESS); - if (__le32_to_cpu(ce_config->pipedir) & PIPEDIR_IN) { + if (ce_attr->dest_nentries) { ath11k_ahb_setbit32(ab, ce_id, CE_HOST_IE_2_ADDRESS); ath11k_ahb_setbit32(ab, ce_id + CE_HOST_IE_3_SHIFT, CE_HOST_IE_3_ADDRESS); @@ -221,13 +224,13 @@ static void ath11k_ahb_ce_irq_enable(struct ath11k_base *ab, u16 ce_id) static void ath11k_ahb_ce_irq_disable(struct ath11k_base *ab, u16 ce_id) { - const struct ce_pipe_config *ce_config; + const struct ce_attr *ce_attr; - ce_config = &ab->hw_params.target_ce_config[ce_id]; - if (__le32_to_cpu(ce_config->pipedir) & PIPEDIR_OUT) + ce_attr = &ab->hw_params.host_ce_config[ce_id]; + if (ce_attr->src_nentries) ath11k_ahb_clearbit32(ab, ce_id, CE_HOST_IE_ADDRESS); - if (__le32_to_cpu(ce_config->pipedir) & PIPEDIR_IN) { + if (ce_attr->dest_nentries) { ath11k_ahb_clearbit32(ab, ce_id, CE_HOST_IE_2_ADDRESS); ath11k_ahb_clearbit32(ab, ce_id + CE_HOST_IE_3_SHIFT, CE_HOST_IE_3_ADDRESS); @@ -300,7 +303,10 @@ static void ath11k_ahb_ext_irq_enable(struct ath11k_base *ab) for (i = 0; i < ATH11K_EXT_IRQ_GRP_NUM_MAX; i++) { struct ath11k_ext_irq_grp *irq_grp = &ab->ext_irq_grp[i]; - napi_enable(&irq_grp->napi); + if (!irq_grp->napi_enabled) { + napi_enable(&irq_grp->napi); + irq_grp->napi_enabled = true; + } ath11k_ahb_ext_grp_enable(irq_grp); } } diff --git a/drivers/net/wireless/ath/ath11k/core.h b/drivers/net/wireless/ath/ath11k/core.h index c8e36251068c9861bb75521be380f0dabfd2bed4..d2f2898d17b498967e646289bdd61d772d62f528 100644 --- a/drivers/net/wireless/ath/ath11k/core.h +++ b/drivers/net/wireless/ath/ath11k/core.h @@ -124,6 +124,7 @@ struct ath11k_ext_irq_grp { u32 num_irq; u32 grp_id; u64 timestamp; + bool napi_enabled; struct napi_struct napi; struct net_device napi_ndev; }; @@ -687,7 +688,6 @@ struct ath11k_base { u32 wlan_init_status; int irq_num[ATH11K_IRQ_NUM_MAX]; struct ath11k_ext_irq_grp ext_irq_grp[ATH11K_EXT_IRQ_GRP_NUM_MAX]; - struct napi_struct *napi; struct ath11k_targ_cap target_caps; u32 ext_service_bitmap[WMI_SERVICE_EXT_BM_SIZE]; bool pdevs_macaddr_valid; diff --git a/drivers/net/wireless/ath/ath11k/dbring.c b/drivers/net/wireless/ath/ath11k/dbring.c index 5e1f5437b4185d786c15848cf505bd8165541a00..fd98ba5b1130b47231ca65abae015dce21536b56 100644 --- a/drivers/net/wireless/ath/ath11k/dbring.c +++ b/drivers/net/wireless/ath/ath11k/dbring.c @@ -8,8 +8,7 @@ static int ath11k_dbring_bufs_replenish(struct ath11k *ar, struct ath11k_dbring *ring, - struct ath11k_dbring_element *buff, - gfp_t gfp) + struct ath11k_dbring_element *buff) { struct ath11k_base *ab = ar->ab; struct hal_srng *srng; @@ -35,7 +34,7 @@ static int ath11k_dbring_bufs_replenish(struct ath11k *ar, goto err; spin_lock_bh(&ring->idr_lock); - buf_id = idr_alloc(&ring->bufs_idr, buff, 0, ring->bufs_max, gfp); + buf_id = idr_alloc(&ring->bufs_idr, buff, 0, ring->bufs_max, GFP_ATOMIC); spin_unlock_bh(&ring->idr_lock); if (buf_id < 0) { ret = -ENOBUFS; @@ -72,8 +71,7 @@ err: } static int ath11k_dbring_fill_bufs(struct ath11k *ar, - struct ath11k_dbring *ring, - gfp_t gfp) + struct ath11k_dbring *ring) { struct ath11k_dbring_element *buff; struct hal_srng *srng; @@ -92,11 +90,11 @@ static int ath11k_dbring_fill_bufs(struct ath11k *ar, size = sizeof(*buff) + ring->buf_sz + align - 1; while (num_remain > 0) { - buff = kzalloc(size, gfp); + buff = kzalloc(size, GFP_ATOMIC); if (!buff) break; - ret = ath11k_dbring_bufs_replenish(ar, ring, buff, gfp); + ret = ath11k_dbring_bufs_replenish(ar, ring, buff); if (ret) { ath11k_warn(ar->ab, "failed to replenish db ring num_remain %d req_ent %d\n", num_remain, req_entries); @@ -176,7 +174,7 @@ int ath11k_dbring_buf_setup(struct ath11k *ar, ring->hp_addr = ath11k_hal_srng_get_hp_addr(ar->ab, srng); ring->tp_addr = ath11k_hal_srng_get_tp_addr(ar->ab, srng); - ret = ath11k_dbring_fill_bufs(ar, ring, GFP_KERNEL); + ret = ath11k_dbring_fill_bufs(ar, ring); return ret; } @@ -322,7 +320,7 @@ int ath11k_dbring_buffer_release_event(struct ath11k_base *ab, } memset(buff, 0, size); - ath11k_dbring_bufs_replenish(ar, ring, buff, GFP_ATOMIC); + ath11k_dbring_bufs_replenish(ar, ring, buff); } spin_unlock_bh(&srng->lock); diff --git a/drivers/net/wireless/ath/ath11k/dp.h b/drivers/net/wireless/ath/ath11k/dp.h index ee8db812589b3cdecdfda17ba796e36b75faa0a9..c4972233149f40aaac7a4377852e95f3bdecd892 100644 --- a/drivers/net/wireless/ath/ath11k/dp.h +++ b/drivers/net/wireless/ath/ath11k/dp.h @@ -514,7 +514,8 @@ struct htt_ppdu_stats_cfg_cmd { } __packed; #define HTT_PPDU_STATS_CFG_MSG_TYPE GENMASK(7, 0) -#define HTT_PPDU_STATS_CFG_PDEV_ID GENMASK(15, 8) +#define HTT_PPDU_STATS_CFG_SOC_STATS BIT(8) +#define HTT_PPDU_STATS_CFG_PDEV_ID GENMASK(15, 9) #define HTT_PPDU_STATS_CFG_TLV_TYPE_BITMASK GENMASK(31, 16) enum htt_ppdu_stats_tag_type { diff --git a/drivers/net/wireless/ath/ath11k/dp_rx.c b/drivers/net/wireless/ath/ath11k/dp_rx.c index 2bff8eb507d4d141cba3fa35b2135e0a14cb79db..2e77dca6b1ad62d2c06b1acb6f6311e86f4985bc 100644 --- a/drivers/net/wireless/ath/ath11k/dp_rx.c +++ b/drivers/net/wireless/ath/ath11k/dp_rx.c @@ -2303,8 +2303,10 @@ static void ath11k_dp_rx_h_ppdu(struct ath11k *ar, struct hal_rx_desc *rx_desc, channel_num = ath11k_dp_rx_h_msdu_start_freq(rx_desc); center_freq = ath11k_dp_rx_h_msdu_start_freq(rx_desc) >> 16; - if (center_freq >= 5935 && center_freq <= 7105) { + if (center_freq >= ATH11K_MIN_6G_FREQ && + center_freq <= ATH11K_MAX_6G_FREQ) { rx_status->band = NL80211_BAND_6GHZ; + rx_status->freq = center_freq; } else if (channel_num >= 1 && channel_num <= 14) { rx_status->band = NL80211_BAND_2GHZ; } else if (channel_num >= 36 && channel_num <= 173) { @@ -2322,8 +2324,9 @@ static void ath11k_dp_rx_h_ppdu(struct ath11k *ar, struct hal_rx_desc *rx_desc, rx_desc, sizeof(struct hal_rx_desc)); } - rx_status->freq = ieee80211_channel_to_frequency(channel_num, - rx_status->band); + if (rx_status->band != NL80211_BAND_6GHZ) + rx_status->freq = ieee80211_channel_to_frequency(channel_num, + rx_status->band); ath11k_dp_rx_h_rate(ar, rx_desc, rx_status); } @@ -3273,7 +3276,7 @@ static int ath11k_dp_rx_h_defrag_reo_reinject(struct ath11k *ar, struct dp_rx_ti paddr = dma_map_single(ab->dev, defrag_skb->data, defrag_skb->len + skb_tailroom(defrag_skb), - DMA_FROM_DEVICE); + DMA_TO_DEVICE); if (dma_mapping_error(ab->dev, paddr)) return -ENOMEM; @@ -3338,7 +3341,7 @@ err_free_idr: spin_unlock_bh(&rx_refill_ring->idr_lock); err_unmap_dma: dma_unmap_single(ab->dev, paddr, defrag_skb->len + skb_tailroom(defrag_skb), - DMA_FROM_DEVICE); + DMA_TO_DEVICE); return ret; } diff --git a/drivers/net/wireless/ath/ath11k/dp_tx.c b/drivers/net/wireless/ath/ath11k/dp_tx.c index 21dfd08d3debb6b489e3647aeb2d02ec0d47dc0a..092eee735da29d2bf5a465a264d13fafb9bd90fa 100644 --- a/drivers/net/wireless/ath/ath11k/dp_tx.c +++ b/drivers/net/wireless/ath/ath11k/dp_tx.c @@ -894,7 +894,7 @@ int ath11k_dp_tx_htt_h2t_ppdu_stats_req(struct ath11k *ar, u32 mask) cmd->msg = FIELD_PREP(HTT_PPDU_STATS_CFG_MSG_TYPE, HTT_H2T_MSG_TYPE_PPDU_STATS_CFG); - pdev_mask = 1 << (i + 1); + pdev_mask = 1 << (ar->pdev_idx + i); cmd->msg |= FIELD_PREP(HTT_PPDU_STATS_CFG_PDEV_ID, pdev_mask); cmd->msg |= FIELD_PREP(HTT_PPDU_STATS_CFG_TLV_TYPE_BITMASK, mask); diff --git a/drivers/net/wireless/ath/ath11k/hal.c b/drivers/net/wireless/ath/ath11k/hal.c index 9904c0eb758754ebbe076ff116e4a973bd75443a..f3b9108ab6bd073e695c40851f3fff4520e64695 100644 --- a/drivers/net/wireless/ath/ath11k/hal.c +++ b/drivers/net/wireless/ath/ath11k/hal.c @@ -991,6 +991,7 @@ int ath11k_hal_srng_setup(struct ath11k_base *ab, enum hal_ring_type type, srng->msi_data = params->msi_data; srng->initialized = 1; spin_lock_init(&srng->lock); + lockdep_set_class(&srng->lock, hal->srng_key + ring_id); for (i = 0; i < HAL_SRNG_NUM_REG_GRP; i++) { srng->hwreg_base[i] = srng_config->reg_start[i] + @@ -1237,6 +1238,24 @@ static int ath11k_hal_srng_create_config(struct ath11k_base *ab) return 0; } +static void ath11k_hal_register_srng_key(struct ath11k_base *ab) +{ + struct ath11k_hal *hal = &ab->hal; + u32 ring_id; + + for (ring_id = 0; ring_id < HAL_SRNG_RING_ID_MAX; ring_id++) + lockdep_register_key(hal->srng_key + ring_id); +} + +static void ath11k_hal_unregister_srng_key(struct ath11k_base *ab) +{ + struct ath11k_hal *hal = &ab->hal; + u32 ring_id; + + for (ring_id = 0; ring_id < HAL_SRNG_RING_ID_MAX; ring_id++) + lockdep_unregister_key(hal->srng_key + ring_id); +} + int ath11k_hal_srng_init(struct ath11k_base *ab) { struct ath11k_hal *hal = &ab->hal; @@ -1256,6 +1275,8 @@ int ath11k_hal_srng_init(struct ath11k_base *ab) if (ret) goto err_free_cont_rdp; + ath11k_hal_register_srng_key(ab); + return 0; err_free_cont_rdp: @@ -1270,6 +1291,7 @@ void ath11k_hal_srng_deinit(struct ath11k_base *ab) { struct ath11k_hal *hal = &ab->hal; + ath11k_hal_unregister_srng_key(ab); ath11k_hal_free_cont_rdp(ab); ath11k_hal_free_cont_wrp(ab); kfree(hal->srng_config); diff --git a/drivers/net/wireless/ath/ath11k/hal.h b/drivers/net/wireless/ath/ath11k/hal.h index 1f1b29cd0aa399f078a8c9a020ff81e7461736d6..5fbfded8d546c4193daa9d8dbb7a10963a1f2d9d 100644 --- a/drivers/net/wireless/ath/ath11k/hal.h +++ b/drivers/net/wireless/ath/ath11k/hal.h @@ -888,6 +888,8 @@ struct ath11k_hal { /* shadow register configuration */ u32 shadow_reg_addr[HAL_SHADOW_NUM_REGS]; int num_shadow_reg_configured; + + struct lock_class_key srng_key[HAL_SRNG_RING_ID_MAX]; }; u32 ath11k_hal_reo_qdesc_size(u32 ba_window_size, u8 tid); diff --git a/drivers/net/wireless/ath/ath11k/hw.c b/drivers/net/wireless/ath/ath11k/hw.c index 66331da3501291f08039073604089c32e10bfa86..f6282e8702923f27e7ff3a1c12a514886517d332 100644 --- a/drivers/net/wireless/ath/ath11k/hw.c +++ b/drivers/net/wireless/ath/ath11k/hw.c @@ -246,8 +246,6 @@ const struct ath11k_hw_ring_mask ath11k_hw_ring_mask_ipq8074 = { const struct ath11k_hw_ring_mask ath11k_hw_ring_mask_qca6390 = { .tx = { ATH11K_TX_RING_MASK_0, - ATH11K_TX_RING_MASK_1, - ATH11K_TX_RING_MASK_2, }, .rx_mon_status = { 0, 0, 0, 0, diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index 63d70aecbd0f1d49ea9c113aada52f7453720830..cc9122f420243069939f08cd12276fdb7e091a0c 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: BSD-3-Clause-Clear /* * Copyright (c) 2018-2019 The Linux Foundation. All rights reserved. + * Copyright (c) 2021 Qualcomm Innovation Center, Inc. All rights reserved. */ #include @@ -792,11 +793,15 @@ static int ath11k_mac_setup_bcn_tmpl(struct ath11k_vif *arvif) if (cfg80211_find_ie(WLAN_EID_RSN, ies, (skb_tail_pointer(bcn) - ies))) arvif->rsnie_present = true; + else + arvif->rsnie_present = false; if (cfg80211_find_vendor_ie(WLAN_OUI_MICROSOFT, WLAN_OUI_TYPE_MICROSOFT_WPA, ies, (skb_tail_pointer(bcn) - ies))) arvif->wpaie_present = true; + else + arvif->wpaie_present = false; ret = ath11k_wmi_bcn_tmpl(ar, arvif->vdev_id, &offs, bcn); @@ -2316,9 +2321,12 @@ static int ath11k_mac_op_hw_scan(struct ieee80211_hw *hw, arg.scan_id = ATH11K_SCAN_ID; if (req->ie_len) { + arg.extraie.ptr = kmemdup(req->ie, req->ie_len, GFP_KERNEL); + if (!arg.extraie.ptr) { + ret = -ENOMEM; + goto exit; + } arg.extraie.len = req->ie_len; - arg.extraie.ptr = kzalloc(req->ie_len, GFP_KERNEL); - memcpy(arg.extraie.ptr, req->ie, req->ie_len); } if (req->n_ssids) { @@ -2395,9 +2403,7 @@ static int ath11k_install_key(struct ath11k_vif *arvif, return 0; if (cmd == DISABLE_KEY) { - /* TODO: Check if FW expects value other than NONE for del */ - /* arg.key_cipher = WMI_CIPHER_NONE; */ - arg.key_len = 0; + arg.key_cipher = WMI_CIPHER_NONE; arg.key_data = NULL; goto install; } @@ -2529,7 +2535,7 @@ static int ath11k_mac_op_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, /* flush the fragments cache during key (re)install to * ensure all frags in the new frag list belong to the same key. */ - if (peer && cmd == SET_KEY) + if (peer && sta && cmd == SET_KEY) ath11k_peer_frags_flush(ar, peer); spin_unlock_bh(&ab->base_lock); @@ -3878,23 +3884,32 @@ static int __ath11k_set_antenna(struct ath11k *ar, u32 tx_ant, u32 rx_ant) return 0; } -int ath11k_mac_tx_mgmt_pending_free(int buf_id, void *skb, void *ctx) +static void ath11k_mac_tx_mgmt_free(struct ath11k *ar, int buf_id) { - struct sk_buff *msdu = skb; + struct sk_buff *msdu; struct ieee80211_tx_info *info; - struct ath11k *ar = ctx; - struct ath11k_base *ab = ar->ab; spin_lock_bh(&ar->txmgmt_idr_lock); - idr_remove(&ar->txmgmt_idr, buf_id); + msdu = idr_remove(&ar->txmgmt_idr, buf_id); spin_unlock_bh(&ar->txmgmt_idr_lock); - dma_unmap_single(ab->dev, ATH11K_SKB_CB(msdu)->paddr, msdu->len, + + if (!msdu) + return; + + dma_unmap_single(ar->ab->dev, ATH11K_SKB_CB(msdu)->paddr, msdu->len, DMA_TO_DEVICE); info = IEEE80211_SKB_CB(msdu); memset(&info->status, 0, sizeof(info->status)); ieee80211_free_txskb(ar->hw, msdu); +} + +int ath11k_mac_tx_mgmt_pending_free(int buf_id, void *skb, void *ctx) +{ + struct ath11k *ar = ctx; + + ath11k_mac_tx_mgmt_free(ar, buf_id); return 0; } @@ -3903,17 +3918,10 @@ static int ath11k_mac_vif_txmgmt_idr_remove(int buf_id, void *skb, void *ctx) { struct ieee80211_vif *vif = ctx; struct ath11k_skb_cb *skb_cb = ATH11K_SKB_CB((struct sk_buff *)skb); - struct sk_buff *msdu = skb; struct ath11k *ar = skb_cb->ar; - struct ath11k_base *ab = ar->ab; - if (skb_cb->vif == vif) { - spin_lock_bh(&ar->txmgmt_idr_lock); - idr_remove(&ar->txmgmt_idr, buf_id); - spin_unlock_bh(&ar->txmgmt_idr_lock); - dma_unmap_single(ab->dev, skb_cb->paddr, msdu->len, - DMA_TO_DEVICE); - } + if (skb_cb->vif == vif) + ath11k_mac_tx_mgmt_free(ar, buf_id); return 0; } @@ -3928,6 +3936,8 @@ static int ath11k_mac_mgmt_tx_wmi(struct ath11k *ar, struct ath11k_vif *arvif, int buf_id; int ret; + ATH11K_SKB_CB(skb)->ar = ar; + spin_lock_bh(&ar->txmgmt_idr_lock); buf_id = idr_alloc(&ar->txmgmt_idr, skb, 0, ATH11K_TX_MGMT_NUM_PENDING_MAX, GFP_ATOMIC); @@ -6320,7 +6330,7 @@ static int __ath11k_mac_register(struct ath11k *ar) ar->hw->wiphy->interface_modes &= ~BIT(NL80211_IFTYPE_MONITOR); /* Apply the regd received during initialization */ - ret = ath11k_regd_update(ar, true); + ret = ath11k_regd_update(ar); if (ret) { ath11k_err(ar->ab, "ath11k regd update failed: %d\n", ret); goto err_unregister_hw; diff --git a/drivers/net/wireless/ath/ath11k/pci.c b/drivers/net/wireless/ath/ath11k/pci.c index d7eb6b7160bb4ff204c4a2c29819f73b90b92cd5..105e344240c100f1d9e31de57e27b467750cd485 100644 --- a/drivers/net/wireless/ath/ath11k/pci.c +++ b/drivers/net/wireless/ath/ath11k/pci.c @@ -416,8 +416,11 @@ static void __ath11k_pci_ext_irq_disable(struct ath11k_base *sc) ath11k_pci_ext_grp_disable(irq_grp); - napi_synchronize(&irq_grp->napi); - napi_disable(&irq_grp->napi); + if (irq_grp->napi_enabled) { + napi_synchronize(&irq_grp->napi); + napi_disable(&irq_grp->napi); + irq_grp->napi_enabled = false; + } } } @@ -436,7 +439,10 @@ static void ath11k_pci_ext_irq_enable(struct ath11k_base *ab) for (i = 0; i < ATH11K_EXT_IRQ_GRP_NUM_MAX; i++) { struct ath11k_ext_irq_grp *irq_grp = &ab->ext_irq_grp[i]; - napi_enable(&irq_grp->napi); + if (!irq_grp->napi_enabled) { + napi_enable(&irq_grp->napi); + irq_grp->napi_enabled = true; + } ath11k_pci_ext_grp_enable(irq_grp); } } diff --git a/drivers/net/wireless/ath/ath11k/qmi.c b/drivers/net/wireless/ath/ath11k/qmi.c index 2ae7c6bf091e9bd8e119b58df021af3bfbe27609..c842e275d1adff300d8644dbc009700156bca702 100644 --- a/drivers/net/wireless/ath/ath11k/qmi.c +++ b/drivers/net/wireless/ath/ath11k/qmi.c @@ -2616,8 +2616,10 @@ static void ath11k_qmi_driver_event_work(struct work_struct *work) list_del(&event->list); spin_unlock(&qmi->event_lock); - if (test_bit(ATH11K_FLAG_UNREGISTERING, &ab->dev_flags)) + if (test_bit(ATH11K_FLAG_UNREGISTERING, &ab->dev_flags)) { + kfree(event); return; + } switch (event->type) { case ATH11K_QMI_EVENT_SERVER_ARRIVE: diff --git a/drivers/net/wireless/ath/ath11k/reg.c b/drivers/net/wireless/ath/ath11k/reg.c index 678d0885fcee7ebc5fc98e92c845011be171f4a2..e34311516b958a121464f36716f642a6c85d52ba 100644 --- a/drivers/net/wireless/ath/ath11k/reg.c +++ b/drivers/net/wireless/ath/ath11k/reg.c @@ -198,7 +198,7 @@ static void ath11k_copy_regd(struct ieee80211_regdomain *regd_orig, sizeof(struct ieee80211_reg_rule)); } -int ath11k_regd_update(struct ath11k *ar, bool init) +int ath11k_regd_update(struct ath11k *ar) { struct ieee80211_regdomain *regd, *regd_copy = NULL; int ret, regd_len, pdev_id; @@ -209,7 +209,10 @@ int ath11k_regd_update(struct ath11k *ar, bool init) spin_lock_bh(&ab->base_lock); - if (init) { + /* Prefer the latest regd update over default if it's available */ + if (ab->new_regd[pdev_id]) { + regd = ab->new_regd[pdev_id]; + } else { /* Apply the regd received during init through * WMI_REG_CHAN_LIST_CC event. In case of failure to * receive the regd, initialize with a default world @@ -222,8 +225,6 @@ int ath11k_regd_update(struct ath11k *ar, bool init) "failed to receive default regd during init\n"); regd = (struct ieee80211_regdomain *)&ath11k_world_regd; } - } else { - regd = ab->new_regd[pdev_id]; } if (!regd) { @@ -455,6 +456,9 @@ ath11k_reg_adjust_bw(u16 start_freq, u16 end_freq, u16 max_bw) { u16 bw; + if (end_freq <= start_freq) + return 0; + bw = end_freq - start_freq; bw = min_t(u16, bw, max_bw); @@ -462,8 +466,10 @@ ath11k_reg_adjust_bw(u16 start_freq, u16 end_freq, u16 max_bw) bw = 80; else if (bw >= 40 && bw < 80) bw = 40; - else if (bw < 40) + else if (bw >= 20 && bw < 40) bw = 20; + else + bw = 0; return bw; } @@ -487,73 +493,77 @@ ath11k_reg_update_weather_radar_band(struct ath11k_base *ab, struct cur_reg_rule *reg_rule, u8 *rule_idx, u32 flags, u16 max_bw) { + u32 start_freq; u32 end_freq; u16 bw; u8 i; i = *rule_idx; + /* there might be situations when even the input rule must be dropped */ + i--; + + /* frequencies below weather radar */ bw = ath11k_reg_adjust_bw(reg_rule->start_freq, ETSI_WEATHER_RADAR_BAND_LOW, max_bw); + if (bw > 0) { + i++; - ath11k_reg_update_rule(regd->reg_rules + i, reg_rule->start_freq, - ETSI_WEATHER_RADAR_BAND_LOW, bw, - reg_rule->ant_gain, reg_rule->reg_power, - flags); + ath11k_reg_update_rule(regd->reg_rules + i, + reg_rule->start_freq, + ETSI_WEATHER_RADAR_BAND_LOW, bw, + reg_rule->ant_gain, reg_rule->reg_power, + flags); - ath11k_dbg(ab, ATH11K_DBG_REG, - "\t%d. (%d - %d @ %d) (%d, %d) (%d ms) (FLAGS %d)\n", - i + 1, reg_rule->start_freq, ETSI_WEATHER_RADAR_BAND_LOW, - bw, reg_rule->ant_gain, reg_rule->reg_power, - regd->reg_rules[i].dfs_cac_ms, - flags); - - if (reg_rule->end_freq > ETSI_WEATHER_RADAR_BAND_HIGH) - end_freq = ETSI_WEATHER_RADAR_BAND_HIGH; - else - end_freq = reg_rule->end_freq; + ath11k_dbg(ab, ATH11K_DBG_REG, + "\t%d. (%d - %d @ %d) (%d, %d) (%d ms) (FLAGS %d)\n", + i + 1, reg_rule->start_freq, + ETSI_WEATHER_RADAR_BAND_LOW, bw, reg_rule->ant_gain, + reg_rule->reg_power, regd->reg_rules[i].dfs_cac_ms, + flags); + } - bw = ath11k_reg_adjust_bw(ETSI_WEATHER_RADAR_BAND_LOW, end_freq, - max_bw); + /* weather radar frequencies */ + start_freq = max_t(u32, reg_rule->start_freq, + ETSI_WEATHER_RADAR_BAND_LOW); + end_freq = min_t(u32, reg_rule->end_freq, ETSI_WEATHER_RADAR_BAND_HIGH); - i++; + bw = ath11k_reg_adjust_bw(start_freq, end_freq, max_bw); + if (bw > 0) { + i++; - ath11k_reg_update_rule(regd->reg_rules + i, - ETSI_WEATHER_RADAR_BAND_LOW, end_freq, bw, - reg_rule->ant_gain, reg_rule->reg_power, - flags); + ath11k_reg_update_rule(regd->reg_rules + i, start_freq, + end_freq, bw, reg_rule->ant_gain, + reg_rule->reg_power, flags); - regd->reg_rules[i].dfs_cac_ms = ETSI_WEATHER_RADAR_BAND_CAC_TIMEOUT; + regd->reg_rules[i].dfs_cac_ms = ETSI_WEATHER_RADAR_BAND_CAC_TIMEOUT; - ath11k_dbg(ab, ATH11K_DBG_REG, - "\t%d. (%d - %d @ %d) (%d, %d) (%d ms) (FLAGS %d)\n", - i + 1, ETSI_WEATHER_RADAR_BAND_LOW, end_freq, - bw, reg_rule->ant_gain, reg_rule->reg_power, - regd->reg_rules[i].dfs_cac_ms, - flags); - - if (end_freq == reg_rule->end_freq) { - regd->n_reg_rules--; - *rule_idx = i; - return; + ath11k_dbg(ab, ATH11K_DBG_REG, + "\t%d. (%d - %d @ %d) (%d, %d) (%d ms) (FLAGS %d)\n", + i + 1, start_freq, end_freq, bw, + reg_rule->ant_gain, reg_rule->reg_power, + regd->reg_rules[i].dfs_cac_ms, flags); } + /* frequencies above weather radar */ bw = ath11k_reg_adjust_bw(ETSI_WEATHER_RADAR_BAND_HIGH, reg_rule->end_freq, max_bw); + if (bw > 0) { + i++; - i++; - - ath11k_reg_update_rule(regd->reg_rules + i, ETSI_WEATHER_RADAR_BAND_HIGH, - reg_rule->end_freq, bw, - reg_rule->ant_gain, reg_rule->reg_power, - flags); + ath11k_reg_update_rule(regd->reg_rules + i, + ETSI_WEATHER_RADAR_BAND_HIGH, + reg_rule->end_freq, bw, + reg_rule->ant_gain, reg_rule->reg_power, + flags); - ath11k_dbg(ab, ATH11K_DBG_REG, - "\t%d. (%d - %d @ %d) (%d, %d) (%d ms) (FLAGS %d)\n", - i + 1, ETSI_WEATHER_RADAR_BAND_HIGH, reg_rule->end_freq, - bw, reg_rule->ant_gain, reg_rule->reg_power, - regd->reg_rules[i].dfs_cac_ms, - flags); + ath11k_dbg(ab, ATH11K_DBG_REG, + "\t%d. (%d - %d @ %d) (%d, %d) (%d ms) (FLAGS %d)\n", + i + 1, ETSI_WEATHER_RADAR_BAND_HIGH, + reg_rule->end_freq, bw, reg_rule->ant_gain, + reg_rule->reg_power, regd->reg_rules[i].dfs_cac_ms, + flags); + } *rule_idx = i; } @@ -680,7 +690,7 @@ void ath11k_regd_update_work(struct work_struct *work) regd_update_work); int ret; - ret = ath11k_regd_update(ar, false); + ret = ath11k_regd_update(ar); if (ret) { /* Firmware has already moved to the new regd. We need * to maintain channel consistency across FW, Host driver diff --git a/drivers/net/wireless/ath/ath11k/reg.h b/drivers/net/wireless/ath/ath11k/reg.h index 39b7fc9435415e4a3a07a767ab61b35692d45c6f..7dbbba9fae1d2185da841a901eb94c8a01d95a86 100644 --- a/drivers/net/wireless/ath/ath11k/reg.h +++ b/drivers/net/wireless/ath/ath11k/reg.h @@ -30,6 +30,6 @@ void ath11k_regd_update_work(struct work_struct *work); struct ieee80211_regdomain * ath11k_reg_build_regd(struct ath11k_base *ab, struct cur_regulatory_info *reg_info, bool intersect); -int ath11k_regd_update(struct ath11k *ar, bool init); +int ath11k_regd_update(struct ath11k *ar); int ath11k_reg_update_chan_list(struct ath11k *ar); #endif diff --git a/drivers/net/wireless/ath/ath11k/wmi.c b/drivers/net/wireless/ath/ath11k/wmi.c index eca86225a3413c4dd1f691e6e4633246b97ba3b7..53846dc9a5c5abe92de8f591eed7ed0a48593eb3 100644 --- a/drivers/net/wireless/ath/ath11k/wmi.c +++ b/drivers/net/wireless/ath/ath11k/wmi.c @@ -1333,6 +1333,7 @@ int ath11k_wmi_pdev_bss_chan_info_request(struct ath11k *ar, WMI_TAG_PDEV_BSS_CHAN_INFO_REQUEST) | FIELD_PREP(WMI_TLV_LEN, sizeof(*cmd) - TLV_HDR_SIZE); cmd->req_type = type; + cmd->pdev_id = ar->pdev->pdev_id; ath11k_dbg(ar->ab, ATH11K_DBG_WMI, "WMI bss chan info req type %d\n", type); @@ -1664,7 +1665,8 @@ int ath11k_wmi_vdev_install_key(struct ath11k *ar, tlv = (struct wmi_tlv *)(skb->data + sizeof(*cmd)); tlv->header = FIELD_PREP(WMI_TLV_TAG, WMI_TAG_ARRAY_BYTE) | FIELD_PREP(WMI_TLV_LEN, key_len_aligned); - memcpy(tlv->value, (u8 *)arg->key_data, key_len_aligned); + if (arg->key_data) + memcpy(tlv->value, (u8 *)arg->key_data, key_len_aligned); ret = ath11k_wmi_cmd_send(wmi, skb, WMI_VDEV_INSTALL_KEY_CMDID); if (ret) { @@ -2035,7 +2037,7 @@ int ath11k_wmi_send_scan_start_cmd(struct ath11k *ar, void *ptr; int i, ret, len; u32 *tmp_ptr; - u8 extraie_len_with_pad = 0; + u16 extraie_len_with_pad = 0; struct hint_short_ssid *s_ssid = NULL; struct hint_bssid *hint_bssid = NULL; @@ -2054,7 +2056,7 @@ int ath11k_wmi_send_scan_start_cmd(struct ath11k *ar, len += sizeof(*bssid) * params->num_bssid; len += TLV_HDR_SIZE; - if (params->extraie.len) + if (params->extraie.len && params->extraie.len <= 0xFFFF) extraie_len_with_pad = roundup(params->extraie.len, sizeof(u32)); len += extraie_len_with_pad; @@ -2161,7 +2163,7 @@ int ath11k_wmi_send_scan_start_cmd(struct ath11k *ar, FIELD_PREP(WMI_TLV_LEN, len); ptr += TLV_HDR_SIZE; - if (params->extraie.len) + if (extraie_len_with_pad) memcpy(ptr, params->extraie.ptr, params->extraie.len); @@ -5361,6 +5363,17 @@ static int ath11k_reg_chan_list_event(struct ath11k_base *ab, struct sk_buff *sk pdev_idx = reg_info->phy_id; + /* Avoid default reg rule updates sent during FW recovery if + * it is already available + */ + spin_lock(&ab->base_lock); + if (test_bit(ATH11K_FLAG_RECOVERY, &ab->dev_flags) && + ab->default_regd[pdev_idx]) { + spin_unlock(&ab->base_lock); + goto mem_free; + } + spin_unlock(&ab->base_lock); + if (pdev_idx >= ab->num_radios) { /* Process the event for phy0 only if single_pdev_only * is true. If pdev_idx is valid but not 0, discard the @@ -5398,10 +5411,10 @@ static int ath11k_reg_chan_list_event(struct ath11k_base *ab, struct sk_buff *sk } spin_lock(&ab->base_lock); - if (test_bit(ATH11K_FLAG_REGISTERED, &ab->dev_flags)) { - /* Once mac is registered, ar is valid and all CC events from - * fw is considered to be received due to user requests - * currently. + if (ab->default_regd[pdev_idx]) { + /* The initial rules from FW after WMI Init is to build + * the default regd. From then on, any rules updated for + * the pdev could be due to user reg changes. * Free previously built regd before assigning the newly * generated regd to ar. NULL pointer handling will be * taken care by kfree itself. @@ -5409,15 +5422,11 @@ static int ath11k_reg_chan_list_event(struct ath11k_base *ab, struct sk_buff *sk ar = ab->pdevs[pdev_idx].ar; kfree(ab->new_regd[pdev_idx]); ab->new_regd[pdev_idx] = regd; - ieee80211_queue_work(ar->hw, &ar->regd_update_work); + queue_work(ab->workqueue, &ar->regd_update_work); } else { - /* Multiple events for the same *ar is not expected. But we - * can still clear any previously stored default_regd if we - * are receiving this event for the same radio by mistake. - * NULL pointer handling will be taken care by kfree itself. + /* This regd would be applied during mac registration and is + * held constant throughout for regd intersection purpose */ - kfree(ab->default_regd[pdev_idx]); - /* This regd would be applied during mac registration */ ab->default_regd[pdev_idx] = regd; } ab->dfs_region = reg_info->dfs_region; @@ -5660,8 +5669,10 @@ static void ath11k_mgmt_rx_event(struct ath11k_base *ab, struct sk_buff *skb) if (rx_ev.status & WMI_RX_STATUS_ERR_MIC) status->flag |= RX_FLAG_MMIC_ERROR; - if (rx_ev.chan_freq >= ATH11K_MIN_6G_FREQ) { + if (rx_ev.chan_freq >= ATH11K_MIN_6G_FREQ && + rx_ev.chan_freq <= ATH11K_MAX_6G_FREQ) { status->band = NL80211_BAND_6GHZ; + status->freq = rx_ev.chan_freq; } else if (rx_ev.channel >= 1 && rx_ev.channel <= 14) { status->band = NL80211_BAND_2GHZ; } else if (rx_ev.channel >= 36 && rx_ev.channel <= ATH11K_MAX_5G_CHAN) { @@ -5682,8 +5693,10 @@ static void ath11k_mgmt_rx_event(struct ath11k_base *ab, struct sk_buff *skb) sband = &ar->mac.sbands[status->band]; - status->freq = ieee80211_channel_to_frequency(rx_ev.channel, - status->band); + if (status->band != NL80211_BAND_6GHZ) + status->freq = ieee80211_channel_to_frequency(rx_ev.channel, + status->band); + status->signal = rx_ev.snr + ATH11K_DEFAULT_NOISE_FLOOR; status->rate_idx = ath11k_mac_bitrate_to_idx(sband, rx_ev.rate / 100); @@ -5844,6 +5857,8 @@ static void ath11k_scan_event(struct ath11k_base *ab, struct sk_buff *skb) ath11k_wmi_event_scan_start_failed(ar); break; case WMI_SCAN_EVENT_DEQUEUED: + __ath11k_mac_scan_finish(ar); + break; case WMI_SCAN_EVENT_PREEMPTED: case WMI_SCAN_EVENT_RESTARTED: case WMI_SCAN_EVENT_FOREIGN_CHAN_EXIT: diff --git a/drivers/net/wireless/ath/ath11k/wmi.h b/drivers/net/wireless/ath/ath11k/wmi.h index 5a32ba0eb4f5749b00b85e0dadccef37dd382dcd..c47adaab7918b581616125928283dc1e5c73c043 100644 --- a/drivers/net/wireless/ath/ath11k/wmi.h +++ b/drivers/net/wireless/ath/ath11k/wmi.h @@ -2935,6 +2935,7 @@ struct wmi_pdev_bss_chan_info_req_cmd { u32 tlv_header; /* ref wmi_bss_chan_info_req_type */ u32 req_type; + u32 pdev_id; } __packed; struct wmi_ap_ps_peer_cmd { @@ -4028,7 +4029,6 @@ struct wmi_vdev_stopped_event { } __packed; struct wmi_pdev_bss_chan_info_event { - u32 pdev_id; u32 freq; /* Units in MHz */ u32 noise_floor; /* units are dBm */ /* rx clear - how often the channel was unused */ @@ -4046,6 +4046,7 @@ struct wmi_pdev_bss_chan_info_event { /*rx_cycle cnt for my bss in 64bits format */ u32 rx_bss_cycle_count_low; u32 rx_bss_cycle_count_high; + u32 pdev_id; } __packed; #define WMI_VDEV_INSTALL_KEY_COMPL_STATUS_SUCCESS 0 diff --git a/drivers/net/wireless/ath/ath5k/Kconfig b/drivers/net/wireless/ath/ath5k/Kconfig index f35cd8de228e49f4db2c3fef795b131f5c30ad1a..6914b37bb0fbcf4e35334ec5e13c4c8d3b738f88 100644 --- a/drivers/net/wireless/ath/ath5k/Kconfig +++ b/drivers/net/wireless/ath/ath5k/Kconfig @@ -3,9 +3,7 @@ config ATH5K tristate "Atheros 5xxx wireless cards support" depends on (PCI || ATH25) && MAC80211 select ATH_COMMON - select MAC80211_LEDS - select LEDS_CLASS - select NEW_LEDS + select MAC80211_LEDS if LEDS_CLASS=y || LEDS_CLASS=MAC80211 select ATH5K_AHB if ATH25 select ATH5K_PCI if !ATH25 help diff --git a/drivers/net/wireless/ath/ath5k/led.c b/drivers/net/wireless/ath/ath5k/led.c index 6a2a168567630fdbf8ea47818c3a336876b04df4..33e9928af36354dda8848a50f93ea53523a9e694 100644 --- a/drivers/net/wireless/ath/ath5k/led.c +++ b/drivers/net/wireless/ath/ath5k/led.c @@ -89,7 +89,8 @@ static const struct pci_device_id ath5k_led_devices[] = { void ath5k_led_enable(struct ath5k_hw *ah) { - if (test_bit(ATH_STAT_LEDSOFT, ah->status)) { + if (IS_ENABLED(CONFIG_MAC80211_LEDS) && + test_bit(ATH_STAT_LEDSOFT, ah->status)) { ath5k_hw_set_gpio_output(ah, ah->led_pin); ath5k_led_off(ah); } @@ -104,7 +105,8 @@ static void ath5k_led_on(struct ath5k_hw *ah) void ath5k_led_off(struct ath5k_hw *ah) { - if (!test_bit(ATH_STAT_LEDSOFT, ah->status)) + if (!IS_ENABLED(CONFIG_MAC80211_LEDS) || + !test_bit(ATH_STAT_LEDSOFT, ah->status)) return; ath5k_hw_set_gpio(ah, ah->led_pin, !ah->led_on); } @@ -146,7 +148,7 @@ ath5k_register_led(struct ath5k_hw *ah, struct ath5k_led *led, static void ath5k_unregister_led(struct ath5k_led *led) { - if (!led->ah) + if (!IS_ENABLED(CONFIG_MAC80211_LEDS) || !led->ah) return; led_classdev_unregister(&led->led_dev); ath5k_led_off(led->ah); @@ -169,7 +171,7 @@ int ath5k_init_leds(struct ath5k_hw *ah) char name[ATH5K_LED_MAX_NAME_LEN + 1]; const struct pci_device_id *match; - if (!ah->pdev) + if (!IS_ENABLED(CONFIG_MAC80211_LEDS) || !ah->pdev) return 0; #ifdef CONFIG_ATH5K_AHB diff --git a/drivers/net/wireless/ath/ath6kl/usb.c b/drivers/net/wireless/ath/ath6kl/usb.c index 5372e948e761d2b2f8f373a55a33e9c87969201f..aba70f35e574b96092f4bd068b76208b535443c5 100644 --- a/drivers/net/wireless/ath/ath6kl/usb.c +++ b/drivers/net/wireless/ath/ath6kl/usb.c @@ -340,6 +340,11 @@ static int ath6kl_usb_setup_pipe_resources(struct ath6kl_usb *ar_usb) le16_to_cpu(endpoint->wMaxPacketSize), endpoint->bInterval); } + + /* Ignore broken descriptors. */ + if (usb_endpoint_maxp(endpoint) == 0) + continue; + urbcount = 0; pipe_num = @@ -907,7 +912,7 @@ static int ath6kl_usb_submit_ctrl_in(struct ath6kl_usb *ar_usb, req, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, value, index, buf, - size, 2 * HZ); + size, 2000); if (ret < 0) { ath6kl_warn("Failed to read usb control message: %d\n", ret); diff --git a/drivers/net/wireless/ath/ath9k/hif_usb.c b/drivers/net/wireless/ath/ath9k/hif_usb.c index 860da13bfb6ac91c55925babbd8150458d4b97a5..f06eec99de688c87e42fb8379d339b7a7aa1d4d8 100644 --- a/drivers/net/wireless/ath/ath9k/hif_usb.c +++ b/drivers/net/wireless/ath/ath9k/hif_usb.c @@ -590,6 +590,13 @@ static void ath9k_hif_usb_rx_stream(struct hif_device_usb *hif_dev, return; } + if (pkt_len > 2 * MAX_RX_BUF_SIZE) { + dev_err(&hif_dev->udev->dev, + "ath9k_htc: invalid pkt_len (%x)\n", pkt_len); + RX_STAT_INC(skb_dropped); + return; + } + pad_len = 4 - (pkt_len & 0x3); if (pad_len == 4) pad_len = 0; diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index 5739c1dbf1661c0493c3f4b818c1b4df00592adb..af367696fd92ff97d5748f710a666c5576d49139 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -533,8 +533,10 @@ irqreturn_t ath_isr(int irq, void *dev) ath9k_debug_sync_cause(sc, sync_cause); status &= ah->imask; /* discard unasked-for bits */ - if (test_bit(ATH_OP_HW_RESET, &common->op_flags)) + if (test_bit(ATH_OP_HW_RESET, &common->op_flags)) { + ath9k_hw_kill_interrupts(sc->sc_ah); return IRQ_HANDLED; + } /* * If there are no status bits set, then this interrupt was not diff --git a/drivers/net/wireless/ath/dfs_pattern_detector.c b/drivers/net/wireless/ath/dfs_pattern_detector.c index 0813473793df1d1d76206221c1485f5ee7ed5b71..87369073098c878b72c2bebced33f45560a0680f 100644 --- a/drivers/net/wireless/ath/dfs_pattern_detector.c +++ b/drivers/net/wireless/ath/dfs_pattern_detector.c @@ -182,10 +182,12 @@ static void channel_detector_exit(struct dfs_pattern_detector *dpd, if (cd == NULL) return; list_del(&cd->head); - for (i = 0; i < dpd->num_radar_types; i++) { - struct pri_detector *de = cd->detectors[i]; - if (de != NULL) - de->exit(de); + if (cd->detectors) { + for (i = 0; i < dpd->num_radar_types; i++) { + struct pri_detector *de = cd->detectors[i]; + if (de != NULL) + de->exit(de); + } } kfree(cd->detectors); kfree(cd); diff --git a/drivers/net/wireless/ath/wcn36xx/dxe.c b/drivers/net/wireless/ath/wcn36xx/dxe.c index 63079231e48ec9955250f4990aab42035b957179..6c62ffc799a2b5e07d5632461888cf2887664177 100644 --- a/drivers/net/wireless/ath/wcn36xx/dxe.c +++ b/drivers/net/wireless/ath/wcn36xx/dxe.c @@ -272,6 +272,21 @@ static int wcn36xx_dxe_enable_ch_int(struct wcn36xx *wcn, u16 wcn_ch) return 0; } +static void wcn36xx_dxe_disable_ch_int(struct wcn36xx *wcn, u16 wcn_ch) +{ + int reg_data = 0; + + wcn36xx_dxe_read_register(wcn, + WCN36XX_DXE_INT_MASK_REG, + ®_data); + + reg_data &= ~wcn_ch; + + wcn36xx_dxe_write_register(wcn, + WCN36XX_DXE_INT_MASK_REG, + (int)reg_data); +} + static int wcn36xx_dxe_fill_skb(struct device *dev, struct wcn36xx_dxe_ctl *ctl, gfp_t gfp) @@ -403,8 +418,21 @@ static void reap_tx_dxes(struct wcn36xx *wcn, struct wcn36xx_dxe_ch *ch) dma_unmap_single(wcn->dev, ctl->desc->src_addr_l, ctl->skb->len, DMA_TO_DEVICE); info = IEEE80211_SKB_CB(ctl->skb); - if (!(info->flags & IEEE80211_TX_CTL_REQ_TX_STATUS)) { - /* Keep frame until TX status comes */ + if (info->flags & IEEE80211_TX_CTL_REQ_TX_STATUS) { + if (info->flags & IEEE80211_TX_CTL_NO_ACK) { + info->flags |= IEEE80211_TX_STAT_NOACK_TRANSMITTED; + ieee80211_tx_status_irqsafe(wcn->hw, ctl->skb); + } else { + /* Wait for the TX ack indication or timeout... */ + spin_lock(&wcn->dxe_lock); + if (WARN_ON(wcn->tx_ack_skb)) + ieee80211_free_txskb(wcn->hw, wcn->tx_ack_skb); + wcn->tx_ack_skb = ctl->skb; /* Tracking ref */ + mod_timer(&wcn->tx_ack_timer, jiffies + HZ / 10); + spin_unlock(&wcn->dxe_lock); + } + /* do not free, ownership transferred to mac80211 status cb */ + } else { ieee80211_free_txskb(wcn->hw, ctl->skb); } @@ -426,7 +454,6 @@ static irqreturn_t wcn36xx_irq_tx_complete(int irq, void *dev) { struct wcn36xx *wcn = (struct wcn36xx *)dev; int int_src, int_reason; - bool transmitted = false; wcn36xx_dxe_read_register(wcn, WCN36XX_DXE_INT_SRC_RAW_REG, &int_src); @@ -466,7 +493,6 @@ static irqreturn_t wcn36xx_irq_tx_complete(int irq, void *dev) if (int_reason & (WCN36XX_CH_STAT_INT_DONE_MASK | WCN36XX_CH_STAT_INT_ED_MASK)) { reap_tx_dxes(wcn, &wcn->dxe_tx_h_ch); - transmitted = true; } } @@ -479,7 +505,6 @@ static irqreturn_t wcn36xx_irq_tx_complete(int irq, void *dev) WCN36XX_DXE_0_INT_CLR, WCN36XX_INT_MASK_CHAN_TX_L); - if (int_reason & WCN36XX_CH_STAT_INT_ERR_MASK ) { wcn36xx_dxe_write_register(wcn, WCN36XX_DXE_0_INT_ERR_CLR, @@ -507,25 +532,8 @@ static irqreturn_t wcn36xx_irq_tx_complete(int irq, void *dev) if (int_reason & (WCN36XX_CH_STAT_INT_DONE_MASK | WCN36XX_CH_STAT_INT_ED_MASK)) { reap_tx_dxes(wcn, &wcn->dxe_tx_l_ch); - transmitted = true; - } - } - - spin_lock(&wcn->dxe_lock); - if (wcn->tx_ack_skb && transmitted) { - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(wcn->tx_ack_skb); - - /* TX complete, no need to wait for 802.11 ack indication */ - if (info->flags & IEEE80211_TX_CTL_REQ_TX_STATUS && - info->flags & IEEE80211_TX_CTL_NO_ACK) { - info->flags |= IEEE80211_TX_STAT_NOACK_TRANSMITTED; - del_timer(&wcn->tx_ack_timer); - ieee80211_tx_status_irqsafe(wcn->hw, wcn->tx_ack_skb); - wcn->tx_ack_skb = NULL; - ieee80211_wake_queues(wcn->hw); } } - spin_unlock(&wcn->dxe_lock); return IRQ_HANDLED; } @@ -613,6 +621,10 @@ static int wcn36xx_rx_handle_packets(struct wcn36xx *wcn, dxe = ctl->desc; while (!(READ_ONCE(dxe->ctrl) & WCN36xx_DXE_CTRL_VLD)) { + /* do not read until we own DMA descriptor */ + dma_rmb(); + + /* read/modify DMA descriptor */ skb = ctl->skb; dma_addr = dxe->dst_addr_l; ret = wcn36xx_dxe_fill_skb(wcn->dev, ctl, GFP_ATOMIC); @@ -623,9 +635,15 @@ static int wcn36xx_rx_handle_packets(struct wcn36xx *wcn, dma_unmap_single(wcn->dev, dma_addr, WCN36XX_PKT_SIZE, DMA_FROM_DEVICE); wcn36xx_rx_skb(wcn, skb); - } /* else keep old skb not submitted and use it for rx DMA */ + } + /* else keep old skb not submitted and reuse it for rx DMA + * (dropping the packet that it contained) + */ + /* flush descriptor changes before re-marking as valid */ + dma_wmb(); dxe->ctrl = ctrl; + ctl = ctl->next; dxe = ctl->desc; } @@ -866,7 +884,6 @@ int wcn36xx_dxe_init(struct wcn36xx *wcn) WCN36XX_DXE_WQ_TX_L); wcn36xx_dxe_read_register(wcn, WCN36XX_DXE_REG_CH_EN, ®_data); - wcn36xx_dxe_enable_ch_int(wcn, WCN36XX_INT_MASK_CHAN_TX_L); /***************************************/ /* Init descriptors for TX HIGH channel */ @@ -890,9 +907,6 @@ int wcn36xx_dxe_init(struct wcn36xx *wcn) wcn36xx_dxe_read_register(wcn, WCN36XX_DXE_REG_CH_EN, ®_data); - /* Enable channel interrupts */ - wcn36xx_dxe_enable_ch_int(wcn, WCN36XX_INT_MASK_CHAN_TX_H); - /***************************************/ /* Init descriptors for RX LOW channel */ /***************************************/ @@ -902,7 +916,6 @@ int wcn36xx_dxe_init(struct wcn36xx *wcn) goto out_err_rxl_ch; } - /* For RX we need to preallocated buffers */ wcn36xx_dxe_ch_alloc_skb(wcn, &wcn->dxe_rx_l_ch); @@ -925,9 +938,6 @@ int wcn36xx_dxe_init(struct wcn36xx *wcn) WCN36XX_DXE_REG_CTL_RX_L, WCN36XX_DXE_CH_DEFAULT_CTL_RX_L); - /* Enable channel interrupts */ - wcn36xx_dxe_enable_ch_int(wcn, WCN36XX_INT_MASK_CHAN_RX_L); - /***************************************/ /* Init descriptors for RX HIGH channel */ /***************************************/ @@ -959,15 +969,18 @@ int wcn36xx_dxe_init(struct wcn36xx *wcn) WCN36XX_DXE_REG_CTL_RX_H, WCN36XX_DXE_CH_DEFAULT_CTL_RX_H); - /* Enable channel interrupts */ - wcn36xx_dxe_enable_ch_int(wcn, WCN36XX_INT_MASK_CHAN_RX_H); - ret = wcn36xx_dxe_request_irqs(wcn); if (ret < 0) goto out_err_irq; timer_setup(&wcn->tx_ack_timer, wcn36xx_dxe_tx_timer, 0); + /* Enable channel interrupts */ + wcn36xx_dxe_enable_ch_int(wcn, WCN36XX_INT_MASK_CHAN_TX_L); + wcn36xx_dxe_enable_ch_int(wcn, WCN36XX_INT_MASK_CHAN_TX_H); + wcn36xx_dxe_enable_ch_int(wcn, WCN36XX_INT_MASK_CHAN_RX_L); + wcn36xx_dxe_enable_ch_int(wcn, WCN36XX_INT_MASK_CHAN_RX_H); + return 0; out_err_irq: @@ -984,6 +997,14 @@ out_err_txh_ch: void wcn36xx_dxe_deinit(struct wcn36xx *wcn) { + int reg_data = 0; + + /* Disable channel interrupts */ + wcn36xx_dxe_disable_ch_int(wcn, WCN36XX_INT_MASK_CHAN_RX_H); + wcn36xx_dxe_disable_ch_int(wcn, WCN36XX_INT_MASK_CHAN_RX_L); + wcn36xx_dxe_disable_ch_int(wcn, WCN36XX_INT_MASK_CHAN_TX_H); + wcn36xx_dxe_disable_ch_int(wcn, WCN36XX_INT_MASK_CHAN_TX_L); + free_irq(wcn->tx_irq, wcn); free_irq(wcn->rx_irq, wcn); del_timer(&wcn->tx_ack_timer); @@ -993,6 +1014,15 @@ void wcn36xx_dxe_deinit(struct wcn36xx *wcn) wcn->tx_ack_skb = NULL; } + /* Put the DXE block into reset before freeing memory */ + reg_data = WCN36XX_DXE_REG_RESET; + wcn36xx_dxe_write_register(wcn, WCN36XX_DXE_REG_CSR_RESET, reg_data); + wcn36xx_dxe_ch_free_skbs(wcn, &wcn->dxe_rx_l_ch); wcn36xx_dxe_ch_free_skbs(wcn, &wcn->dxe_rx_h_ch); + + wcn36xx_dxe_deinit_descs(wcn->dev, &wcn->dxe_tx_l_ch); + wcn36xx_dxe_deinit_descs(wcn->dev, &wcn->dxe_tx_h_ch); + wcn36xx_dxe_deinit_descs(wcn->dev, &wcn->dxe_rx_l_ch); + wcn36xx_dxe_deinit_descs(wcn->dev, &wcn->dxe_rx_h_ch); } diff --git a/drivers/net/wireless/ath/wcn36xx/main.c b/drivers/net/wireless/ath/wcn36xx/main.c index 6bed619535427e43d84ca898790f10e46e29e8f0..9aaf6f74733336cbe842625821667068813323e9 100644 --- a/drivers/net/wireless/ath/wcn36xx/main.c +++ b/drivers/net/wireless/ath/wcn36xx/main.c @@ -134,7 +134,9 @@ static struct ieee80211_supported_band wcn_band_2ghz = { .cap = IEEE80211_HT_CAP_GRN_FLD | IEEE80211_HT_CAP_SGI_20 | IEEE80211_HT_CAP_DSSSCCK40 | - IEEE80211_HT_CAP_LSIG_TXOP_PROT, + IEEE80211_HT_CAP_LSIG_TXOP_PROT | + IEEE80211_HT_CAP_SGI_40 | + IEEE80211_HT_CAP_SUP_WIDTH_20_40, .ht_supported = true, .ampdu_factor = IEEE80211_HT_MAX_AMPDU_64K, .ampdu_density = IEEE80211_HT_MPDU_DENSITY_16, @@ -395,6 +397,7 @@ static void wcn36xx_change_opchannel(struct wcn36xx *wcn, int ch) static int wcn36xx_config(struct ieee80211_hw *hw, u32 changed) { struct wcn36xx *wcn = hw->priv; + int ret; wcn36xx_dbg(WCN36XX_DBG_MAC, "mac config changed 0x%08x\n", changed); @@ -410,17 +413,31 @@ static int wcn36xx_config(struct ieee80211_hw *hw, u32 changed) * want to receive/transmit regular data packets, then * simply stop the scan session and exit PS mode. */ - wcn36xx_smd_finish_scan(wcn, HAL_SYS_MODE_SCAN, - wcn->sw_scan_vif); - wcn->sw_scan_channel = 0; + if (wcn->sw_scan_channel) + wcn36xx_smd_end_scan(wcn, wcn->sw_scan_channel); + if (wcn->sw_scan_init) { + wcn36xx_smd_finish_scan(wcn, HAL_SYS_MODE_SCAN, + wcn->sw_scan_vif); + } } else if (wcn->sw_scan) { /* A scan is ongoing, do not change the operating * channel, but start a scan session on the channel. */ - wcn36xx_smd_init_scan(wcn, HAL_SYS_MODE_SCAN, - wcn->sw_scan_vif); + if (wcn->sw_scan_channel) + wcn36xx_smd_end_scan(wcn, wcn->sw_scan_channel); + if (!wcn->sw_scan_init) { + /* This can fail if we are unable to notify the + * operating channel. + */ + ret = wcn36xx_smd_init_scan(wcn, + HAL_SYS_MODE_SCAN, + wcn->sw_scan_vif); + if (ret) { + mutex_unlock(&wcn->conf_mutex); + return -EIO; + } + } wcn36xx_smd_start_scan(wcn, ch); - wcn->sw_scan_channel = ch; } else { wcn36xx_change_opchannel(wcn, ch); } @@ -566,12 +583,14 @@ static int wcn36xx_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, if (IEEE80211_KEY_FLAG_PAIRWISE & key_conf->flags) { sta_priv->is_data_encrypted = true; /* Reconfigure bss with encrypt_type */ - if (NL80211_IFTYPE_STATION == vif->type) + if (NL80211_IFTYPE_STATION == vif->type) { wcn36xx_smd_config_bss(wcn, vif, sta, sta->addr, true); + wcn36xx_smd_config_sta(wcn, vif, sta); + } wcn36xx_smd_set_stakey(wcn, vif_priv->encrypt_type, @@ -601,15 +620,6 @@ static int wcn36xx_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, } } } - /* FIXME: Only enable bmps support when encryption is enabled. - * For any reasons, when connected to open/no-security BSS, - * the wcn36xx controller in bmps mode does not forward - * 'wake-up' beacons despite AP sends DTIM with station AID. - * It could be due to a firmware issue or to the way driver - * configure the station. - */ - if (vif->type == NL80211_IFTYPE_STATION) - vif_priv->allow_bmps = true; break; case DISABLE_KEY: if (!(IEEE80211_KEY_FLAG_PAIRWISE & key_conf->flags)) { @@ -714,7 +724,12 @@ static void wcn36xx_sw_scan_complete(struct ieee80211_hw *hw, struct wcn36xx *wcn = hw->priv; /* ensure that any scan session is finished */ - wcn36xx_smd_finish_scan(wcn, HAL_SYS_MODE_SCAN, wcn->sw_scan_vif); + if (wcn->sw_scan_channel) + wcn36xx_smd_end_scan(wcn, wcn->sw_scan_channel); + if (wcn->sw_scan_init) { + wcn36xx_smd_finish_scan(wcn, HAL_SYS_MODE_SCAN, + wcn->sw_scan_vif); + } wcn->sw_scan = false; wcn->sw_scan_opchannel = 0; } @@ -909,7 +924,6 @@ static void wcn36xx_bss_info_changed(struct ieee80211_hw *hw, vif->addr, bss_conf->aid); vif_priv->sta_assoc = false; - vif_priv->allow_bmps = false; wcn36xx_smd_set_link_st(wcn, bss_conf->bssid, vif->addr, diff --git a/drivers/net/wireless/ath/wcn36xx/pmc.c b/drivers/net/wireless/ath/wcn36xx/pmc.c index 2d0780fefd477efe00cde748667237654821a2e2..2936aaf532738405cb57d91ff5371e0f46aa5671 100644 --- a/drivers/net/wireless/ath/wcn36xx/pmc.c +++ b/drivers/net/wireless/ath/wcn36xx/pmc.c @@ -23,10 +23,7 @@ int wcn36xx_pmc_enter_bmps_state(struct wcn36xx *wcn, { int ret = 0; struct wcn36xx_vif *vif_priv = wcn36xx_vif_to_priv(vif); - - if (!vif_priv->allow_bmps) - return -ENOTSUPP; - + /* TODO: Make sure the TX chain clean */ ret = wcn36xx_smd_enter_bmps(wcn, vif); if (!ret) { wcn36xx_dbg(WCN36XX_DBG_PMC, "Entered BMPS\n"); diff --git a/drivers/net/wireless/ath/wcn36xx/smd.c b/drivers/net/wireless/ath/wcn36xx/smd.c index 766400f7b61c152d18e231ebe9a37e43a96cac16..7f00cb6f5e16b3dbb0aa281587a1d6a8756c053b 100644 --- a/drivers/net/wireless/ath/wcn36xx/smd.c +++ b/drivers/net/wireless/ath/wcn36xx/smd.c @@ -730,6 +730,7 @@ int wcn36xx_smd_init_scan(struct wcn36xx *wcn, enum wcn36xx_hal_sys_mode mode, wcn36xx_err("hal_init_scan response failed err=%d\n", ret); goto out; } + wcn->sw_scan_init = true; out: mutex_unlock(&wcn->hal_mutex); return ret; @@ -760,6 +761,7 @@ int wcn36xx_smd_start_scan(struct wcn36xx *wcn, u8 scan_channel) wcn36xx_err("hal_start_scan response failed err=%d\n", ret); goto out; } + wcn->sw_scan_channel = scan_channel; out: mutex_unlock(&wcn->hal_mutex); return ret; @@ -790,6 +792,7 @@ int wcn36xx_smd_end_scan(struct wcn36xx *wcn, u8 scan_channel) wcn36xx_err("hal_end_scan response failed err=%d\n", ret); goto out; } + wcn->sw_scan_channel = 0; out: mutex_unlock(&wcn->hal_mutex); return ret; @@ -831,6 +834,7 @@ int wcn36xx_smd_finish_scan(struct wcn36xx *wcn, wcn36xx_err("hal_finish_scan response failed err=%d\n", ret); goto out; } + wcn->sw_scan_init = false; out: mutex_unlock(&wcn->hal_mutex); return ret; @@ -2603,7 +2607,7 @@ static int wcn36xx_smd_missed_beacon_ind(struct wcn36xx *wcn, wcn36xx_dbg(WCN36XX_DBG_HAL, "beacon missed bss_index %d\n", tmp->bss_index); vif = wcn36xx_priv_to_vif(tmp); - ieee80211_connection_loss(vif); + ieee80211_beacon_loss(vif); } return 0; } @@ -2618,7 +2622,7 @@ static int wcn36xx_smd_missed_beacon_ind(struct wcn36xx *wcn, wcn36xx_dbg(WCN36XX_DBG_HAL, "beacon missed bss_index %d\n", rsp->bss_index); vif = wcn36xx_priv_to_vif(tmp); - ieee80211_connection_loss(vif); + ieee80211_beacon_loss(vif); return 0; } } @@ -2632,30 +2636,52 @@ static int wcn36xx_smd_delete_sta_context_ind(struct wcn36xx *wcn, size_t len) { struct wcn36xx_hal_delete_sta_context_ind_msg *rsp = buf; - struct wcn36xx_vif *tmp; + struct wcn36xx_vif *vif_priv; + struct ieee80211_vif *vif; + struct ieee80211_bss_conf *bss_conf; struct ieee80211_sta *sta; + bool found = false; if (len != sizeof(*rsp)) { wcn36xx_warn("Corrupted delete sta indication\n"); return -EIO; } - wcn36xx_dbg(WCN36XX_DBG_HAL, "delete station indication %pM index %d\n", - rsp->addr2, rsp->sta_id); + wcn36xx_dbg(WCN36XX_DBG_HAL, + "delete station indication %pM index %d reason %d\n", + rsp->addr2, rsp->sta_id, rsp->reason_code); - list_for_each_entry(tmp, &wcn->vif_list, list) { + list_for_each_entry(vif_priv, &wcn->vif_list, list) { rcu_read_lock(); - sta = ieee80211_find_sta(wcn36xx_priv_to_vif(tmp), rsp->addr2); - if (sta) - ieee80211_report_low_ack(sta, 0); + vif = wcn36xx_priv_to_vif(vif_priv); + + if (vif->type == NL80211_IFTYPE_STATION) { + /* We could call ieee80211_find_sta too, but checking + * bss_conf is clearer. + */ + bss_conf = &vif->bss_conf; + if (vif_priv->sta_assoc && + !memcmp(bss_conf->bssid, rsp->addr2, ETH_ALEN)) { + found = true; + wcn36xx_dbg(WCN36XX_DBG_HAL, + "connection loss bss_index %d\n", + vif_priv->bss_index); + ieee80211_connection_loss(vif); + } + } else { + sta = ieee80211_find_sta(vif, rsp->addr2); + if (sta) { + found = true; + ieee80211_report_low_ack(sta, 0); + } + } + rcu_read_unlock(); - if (sta) + if (found) return 0; } - wcn36xx_warn("STA with addr %pM and index %d not found\n", - rsp->addr2, - rsp->sta_id); + wcn36xx_warn("BSS or STA with addr %pM not found\n", rsp->addr2); return -ENOENT; } diff --git a/drivers/net/wireless/ath/wcn36xx/txrx.c b/drivers/net/wireless/ath/wcn36xx/txrx.c index cab196bb38cd4c92f6cf2c40d9df180e13789ca3..f33e7228a1010f4e81b3211f7b33fd319960718f 100644 --- a/drivers/net/wireless/ath/wcn36xx/txrx.c +++ b/drivers/net/wireless/ath/wcn36xx/txrx.c @@ -31,6 +31,13 @@ struct wcn36xx_rate { enum rate_info_bw bw; }; +/* Buffer descriptor rx_ch field is limited to 5-bit (4+1), a mapping is used + * for 11A Channels. + */ +static const u8 ab_rx_ch_map[] = { 36, 40, 44, 48, 52, 56, 60, 64, 100, 104, + 108, 112, 116, 120, 124, 128, 132, 136, 140, + 149, 153, 157, 161, 165, 144 }; + static const struct wcn36xx_rate wcn36xx_rate_table[] = { /* 11b rates */ { 10, 0, RX_ENC_LEGACY, 0, RATE_INFO_BW_20 }, @@ -230,7 +237,6 @@ int wcn36xx_rx_skb(struct wcn36xx *wcn, struct sk_buff *skb) const struct wcn36xx_rate *rate; struct ieee80211_hdr *hdr; struct wcn36xx_rx_bd *bd; - struct ieee80211_supported_band *sband; u16 fc, sn; /* @@ -252,8 +258,6 @@ int wcn36xx_rx_skb(struct wcn36xx *wcn, struct sk_buff *skb) fc = __le16_to_cpu(hdr->frame_control); sn = IEEE80211_SEQ_TO_SN(__le16_to_cpu(hdr->seq_ctrl)); - status.freq = WCN36XX_CENTER_FREQ(wcn); - status.band = WCN36XX_BAND(wcn); status.mactime = 10; status.signal = -get_rssi0(bd); status.antenna = 1; @@ -265,18 +269,36 @@ int wcn36xx_rx_skb(struct wcn36xx *wcn, struct sk_buff *skb) wcn36xx_dbg(WCN36XX_DBG_RX, "status.flags=%x\n", status.flag); + if (bd->scan_learn) { + /* If packet originate from hardware scanning, extract the + * band/channel from bd descriptor. + */ + u8 hwch = (bd->reserved0 << 4) + bd->rx_ch; + + if (bd->rf_band != 1 && hwch <= sizeof(ab_rx_ch_map) && hwch >= 1) { + status.band = NL80211_BAND_5GHZ; + status.freq = ieee80211_channel_to_frequency(ab_rx_ch_map[hwch - 1], + status.band); + } else { + status.band = NL80211_BAND_2GHZ; + status.freq = ieee80211_channel_to_frequency(hwch, status.band); + } + } else { + status.band = WCN36XX_BAND(wcn); + status.freq = WCN36XX_CENTER_FREQ(wcn); + } + if (bd->rate_id < ARRAY_SIZE(wcn36xx_rate_table)) { rate = &wcn36xx_rate_table[bd->rate_id]; status.encoding = rate->encoding; status.enc_flags = rate->encoding_flags; status.bw = rate->bw; status.rate_idx = rate->mcs_or_legacy_index; - sband = wcn->hw->wiphy->bands[status.band]; status.nss = 1; if (status.band == NL80211_BAND_5GHZ && status.encoding == RX_ENC_LEGACY && - status.rate_idx >= sband->n_bitrates) { + status.rate_idx >= 4) { /* no dsss rates in 5Ghz rates table */ status.rate_idx -= 4; } @@ -321,8 +343,6 @@ static void wcn36xx_set_tx_pdu(struct wcn36xx_tx_bd *bd, bd->pdu.mpdu_header_off; bd->pdu.mpdu_len = len; bd->pdu.tid = tid; - /* Use seq number generated by mac80211 */ - bd->pdu.bd_ssn = WCN36XX_TXBD_SSN_FILL_HOST; } static inline struct wcn36xx_vif *get_vif_by_addr(struct wcn36xx *wcn, @@ -419,6 +439,9 @@ static void wcn36xx_set_tx_data(struct wcn36xx_tx_bd *bd, tid = ieee80211_get_tid(hdr); /* TID->QID is one-to-one mapping */ bd->queue_id = tid; + bd->pdu.bd_ssn = WCN36XX_TXBD_SSN_FILL_DPU_QOS; + } else { + bd->pdu.bd_ssn = WCN36XX_TXBD_SSN_FILL_DPU_NON_QOS; } if (info->flags & IEEE80211_TX_INTFL_DONT_ENCRYPT || @@ -429,6 +452,9 @@ static void wcn36xx_set_tx_data(struct wcn36xx_tx_bd *bd, if (ieee80211_is_any_nullfunc(hdr->frame_control)) { /* Don't use a regular queue for null packet (no ampdu) */ bd->queue_id = WCN36XX_TX_U_WQ_ID; + bd->bd_rate = WCN36XX_BD_RATE_CTRL; + if (ieee80211_is_qos_nullfunc(hdr->frame_control)) + bd->pdu.bd_ssn = WCN36XX_TXBD_SSN_FILL_HOST; } if (bcast) { @@ -488,6 +514,8 @@ static void wcn36xx_set_tx_mgmt(struct wcn36xx_tx_bd *bd, bd->queue_id = WCN36XX_TX_U_WQ_ID; *vif_priv = __vif_priv; + bd->pdu.bd_ssn = WCN36XX_TXBD_SSN_FILL_DPU_NON_QOS; + wcn36xx_set_tx_pdu(bd, ieee80211_is_data_qos(hdr->frame_control) ? sizeof(struct ieee80211_qos_hdr) : @@ -502,10 +530,11 @@ int wcn36xx_start_tx(struct wcn36xx *wcn, struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; struct wcn36xx_vif *vif_priv = NULL; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - unsigned long flags; bool is_low = ieee80211_is_data(hdr->frame_control); bool bcast = is_broadcast_ether_addr(hdr->addr1) || is_multicast_ether_addr(hdr->addr1); + bool ack_ind = (info->flags & IEEE80211_TX_CTL_REQ_TX_STATUS) && + !(info->flags & IEEE80211_TX_CTL_NO_ACK); struct wcn36xx_tx_bd bd; int ret; @@ -521,30 +550,16 @@ int wcn36xx_start_tx(struct wcn36xx *wcn, bd.dpu_rf = WCN36XX_BMU_WQ_TX; - if (info->flags & IEEE80211_TX_CTL_REQ_TX_STATUS) { + if (unlikely(ack_ind)) { wcn36xx_dbg(WCN36XX_DBG_DXE, "TX_ACK status requested\n"); - spin_lock_irqsave(&wcn->dxe_lock, flags); - if (wcn->tx_ack_skb) { - spin_unlock_irqrestore(&wcn->dxe_lock, flags); - wcn36xx_warn("tx_ack_skb already set\n"); - return -EINVAL; - } - - wcn->tx_ack_skb = skb; - spin_unlock_irqrestore(&wcn->dxe_lock, flags); - /* Only one at a time is supported by fw. Stop the TX queues * until the ack status gets back. */ ieee80211_stop_queues(wcn->hw); - /* TX watchdog if no TX irq or ack indication received */ - mod_timer(&wcn->tx_ack_timer, jiffies + HZ / 10); - /* Request ack indication from the firmware */ - if (!(info->flags & IEEE80211_TX_CTL_NO_ACK)) - bd.tx_comp = 1; + bd.tx_comp = 1; } /* Data frames served first*/ @@ -558,14 +573,8 @@ int wcn36xx_start_tx(struct wcn36xx *wcn, bd.tx_bd_sign = 0xbdbdbdbd; ret = wcn36xx_dxe_tx_frame(wcn, vif_priv, &bd, skb, is_low); - if (ret && (info->flags & IEEE80211_TX_CTL_REQ_TX_STATUS)) { - /* If the skb has not been transmitted, - * don't keep a reference to it. - */ - spin_lock_irqsave(&wcn->dxe_lock, flags); - wcn->tx_ack_skb = NULL; - spin_unlock_irqrestore(&wcn->dxe_lock, flags); - + if (unlikely(ret && ack_ind)) { + /* If the skb has not been transmitted, resume TX queue */ ieee80211_wake_queues(wcn->hw); } diff --git a/drivers/net/wireless/ath/wcn36xx/txrx.h b/drivers/net/wireless/ath/wcn36xx/txrx.h index 032216e82b2bee6067ff0d9d72c8f115650eec93..b54311ffde9c533b1c0ee12f5ebaa2813bc99e82 100644 --- a/drivers/net/wireless/ath/wcn36xx/txrx.h +++ b/drivers/net/wireless/ath/wcn36xx/txrx.h @@ -110,7 +110,8 @@ struct wcn36xx_rx_bd { /* 0x44 */ u32 exp_seq_num:12; u32 cur_seq_num:12; - u32 fr_type_subtype:8; + u32 rf_band:2; + u32 fr_type_subtype:6; /* 0x48 */ u32 msdu_size:16; diff --git a/drivers/net/wireless/ath/wcn36xx/wcn36xx.h b/drivers/net/wireless/ath/wcn36xx/wcn36xx.h index d0fcce86903aeb2a373557f7390330be9643d3cc..5c40d0bdee2451245f0442da5077c5eed34699cc 100644 --- a/drivers/net/wireless/ath/wcn36xx/wcn36xx.h +++ b/drivers/net/wireless/ath/wcn36xx/wcn36xx.h @@ -127,7 +127,6 @@ struct wcn36xx_vif { enum wcn36xx_hal_bss_type bss_type; /* Power management */ - bool allow_bmps; enum wcn36xx_power_state pw_state; u8 bss_index; @@ -232,6 +231,7 @@ struct wcn36xx { struct cfg80211_scan_request *scan_req; bool sw_scan; u8 sw_scan_opchannel; + bool sw_scan_init; u8 sw_scan_channel; struct ieee80211_vif *sw_scan_vif; struct mutex scan_lock; diff --git a/drivers/net/wireless/broadcom/b43/phy_g.c b/drivers/net/wireless/broadcom/b43/phy_g.c index d5a1a5c582366e30e4d2946df3b9dc1e800fe5ce..ac72ca39e409b29eee5250256b803a1ab313617e 100644 --- a/drivers/net/wireless/broadcom/b43/phy_g.c +++ b/drivers/net/wireless/broadcom/b43/phy_g.c @@ -2297,7 +2297,7 @@ static u8 b43_gphy_aci_scan(struct b43_wldev *dev) b43_phy_mask(dev, B43_PHY_G_CRS, 0x7FFF); b43_set_all_gains(dev, 3, 8, 1); - start = (channel - 5 > 0) ? channel - 5 : 1; + start = (channel > 5) ? channel - 5 : 1; end = (channel + 5 < 14) ? channel + 5 : 13; for (i = start; i <= end; i++) { diff --git a/drivers/net/wireless/broadcom/b43legacy/radio.c b/drivers/net/wireless/broadcom/b43legacy/radio.c index 06891b4f837b9285377c1229f2cc8e56c0ec14dd..fdf78c10a05c2cad59940ad1e3b4f0cdc3cacdbc 100644 --- a/drivers/net/wireless/broadcom/b43legacy/radio.c +++ b/drivers/net/wireless/broadcom/b43legacy/radio.c @@ -283,7 +283,7 @@ u8 b43legacy_radio_aci_scan(struct b43legacy_wldev *dev) & 0x7FFF); b43legacy_set_all_gains(dev, 3, 8, 1); - start = (channel - 5 > 0) ? channel - 5 : 1; + start = (channel > 5) ? channel - 5 : 1; end = (channel + 5 < 14) ? channel + 5 : 13; for (i = start; i <= end; i++) { diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/dmi.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/dmi.c index 6d5188b78f2de06b46eb0170fe8c2abc34adbb68..0af452dca7664e5d9c14ede702278e8bace35fd2 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/dmi.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/dmi.c @@ -75,6 +75,16 @@ static const struct dmi_system_id dmi_platform_data[] = { }, .driver_data = (void *)&acepc_t8_data, }, + { + /* Cyberbook T116 rugged tablet */ + .matches = { + DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "Default string"), + DMI_EXACT_MATCH(DMI_BOARD_NAME, "Cherry Trail CR"), + DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "20170531"), + }, + /* The factory image nvram file is identical to the ACEPC T8 one */ + .driver_data = (void *)&acepc_t8_data, + }, { /* Match for the GPDwin which unfortunately uses somewhat * generic dmi strings, which is why we test for 4 strings. diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c index 9dcd2e990c9c17d8ad4fbbcd48b8e44eeb21ff78..ab84ac3f8f03f382a3fa60edf344e4a6d1a60891 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c @@ -185,6 +185,9 @@ static void iwl_dealloc_ucode(struct iwl_drv *drv) for (i = 0; i < IWL_UCODE_TYPE_MAX; i++) iwl_free_fw_img(drv, drv->fw.img + i); + + /* clear the data for the aborted load case */ + memset(&drv->fw, 0, sizeof(drv->fw)); } static int iwl_alloc_fw_desc(struct iwl_drv *drv, struct fw_desc *desc, @@ -1303,23 +1306,31 @@ _iwl_op_mode_start(struct iwl_drv *drv, struct iwlwifi_opmode_table *op) const struct iwl_op_mode_ops *ops = op->ops; struct dentry *dbgfs_dir = NULL; struct iwl_op_mode *op_mode = NULL; + int retry, max_retry = !!iwlwifi_mod_params.fw_restart * IWL_MAX_INIT_RETRY; + + for (retry = 0; retry <= max_retry; retry++) { #ifdef CONFIG_IWLWIFI_DEBUGFS - drv->dbgfs_op_mode = debugfs_create_dir(op->name, - drv->dbgfs_drv); - dbgfs_dir = drv->dbgfs_op_mode; + drv->dbgfs_op_mode = debugfs_create_dir(op->name, + drv->dbgfs_drv); + dbgfs_dir = drv->dbgfs_op_mode; #endif - op_mode = ops->start(drv->trans, drv->trans->cfg, &drv->fw, dbgfs_dir); + op_mode = ops->start(drv->trans, drv->trans->cfg, + &drv->fw, dbgfs_dir); + + if (op_mode) + return op_mode; + + IWL_ERR(drv, "retry init count %d\n", retry); #ifdef CONFIG_IWLWIFI_DEBUGFS - if (!op_mode) { debugfs_remove_recursive(drv->dbgfs_op_mode); drv->dbgfs_op_mode = NULL; - } #endif + } - return op_mode; + return NULL; } static void _iwl_op_mode_stop(struct iwl_drv *drv) @@ -1357,6 +1368,7 @@ static void iwl_req_fw_callback(const struct firmware *ucode_raw, void *context) int i; bool load_module = false; bool usniffer_images = false; + bool failure = true; fw->ucode_capa.max_probe_length = IWL_DEFAULT_MAX_PROBE_LENGTH; fw->ucode_capa.standard_phy_calibration_size = @@ -1617,15 +1629,9 @@ static void iwl_req_fw_callback(const struct firmware *ucode_raw, void *context) * else from proceeding if the module fails to load * or hangs loading. */ - if (load_module) { + if (load_module) request_module("%s", op->name); -#ifdef CONFIG_IWLWIFI_OPMODE_MODULAR - if (err) - IWL_ERR(drv, - "failed to load module %s (error %d), is dynamic loading enabled?\n", - op->name, err); -#endif - } + failure = false; goto free; try_again: @@ -1640,7 +1646,12 @@ static void iwl_req_fw_callback(const struct firmware *ucode_raw, void *context) out_unbind: complete(&drv->request_firmware_complete); device_release_driver(drv->trans->dev); + /* drv has just been freed by the release */ + failure = false; free: + if (failure) + iwl_dealloc_ucode(drv); + if (pieces) { for (i = 0; i < ARRAY_SIZE(pieces->img); i++) kfree(pieces->img[i].sec); diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-drv.h b/drivers/net/wireless/intel/iwlwifi/iwl-drv.h index 8938a64679963cf14c34cf1acc73e33bc556146b..a6e9bc56f7ddd7129387ab349c95f2c055f7594c 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-drv.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-drv.h @@ -144,4 +144,7 @@ void iwl_drv_stop(struct iwl_drv *drv); #define IWL_EXPORT_SYMBOL(sym) #endif +/* max retry for init flow */ +#define IWL_MAX_INIT_RETRY 2 + #endif /* __iwl_drv_h__ */ diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c index cbde21e772b17a328e3b170d20364862cbf8ceb4..b862cfbcd6e79ccf1c0e0e1992521c836135b950 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c @@ -587,8 +587,7 @@ static struct ieee80211_sband_iftype_data iwl_he_capa[] = { .has_he = true, .he_cap_elem = { .mac_cap_info[0] = - IEEE80211_HE_MAC_CAP0_HTC_HE | - IEEE80211_HE_MAC_CAP0_TWT_REQ, + IEEE80211_HE_MAC_CAP0_HTC_HE, .mac_cap_info[1] = IEEE80211_HE_MAC_CAP1_TF_MAC_PAD_DUR_16US | IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_RX_QOS_8, diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c b/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c index a0ce761d0c59b423ec150878615aa79a81eadfa4..b1335fe3b01a29ced68eba4c6f9ca4522ce8c180 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c @@ -967,7 +967,7 @@ static void iwl_mvm_ftm_rtt_smoothing(struct iwl_mvm *mvm, overshoot = IWL_MVM_FTM_INITIATOR_SMOOTH_OVERSHOOT; alpha = IWL_MVM_FTM_INITIATOR_SMOOTH_ALPHA; - rtt_avg = (alpha * rtt + (100 - alpha) * resp->rtt_avg) / 100; + rtt_avg = div_s64(alpha * rtt + (100 - alpha) * resp->rtt_avg, 100); IWL_DEBUG_INFO(mvm, "%pM: prev rtt_avg=%lld, new rtt_avg=%lld, rtt=%lld\n", diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index 6f301ac8cce209310563da7f7ecbfcbe36114512..d2c6fdb7027320ac7bbeff2a638ad62d735d47f3 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -71,6 +71,7 @@ #include #include +#include "iwl-drv.h" #include "iwl-op-mode.h" #include "iwl-io.h" #include "mvm.h" @@ -349,7 +350,6 @@ static const u8 he_if_types_ext_capa_sta[] = { [0] = WLAN_EXT_CAPA1_EXT_CHANNEL_SWITCHING, [2] = WLAN_EXT_CAPA3_MULTI_BSSID_SUPPORT, [7] = WLAN_EXT_CAPA8_OPMODE_NOTIF, - [9] = WLAN_EXT_CAPA10_TWT_REQUESTER_SUPPORT, }; static const struct wiphy_iftype_ext_capab he_iftypes_ext_capa[] = { @@ -1163,9 +1163,30 @@ static int iwl_mvm_mac_start(struct ieee80211_hw *hw) { struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw); int ret; + int retry, max_retry = 0; mutex_lock(&mvm->mutex); - ret = __iwl_mvm_mac_start(mvm); + + /* we are starting the mac not in error flow, and restart is enabled */ + if (!test_bit(IWL_MVM_STATUS_HW_RESTART_REQUESTED, &mvm->status) && + iwlwifi_mod_params.fw_restart) { + max_retry = IWL_MAX_INIT_RETRY; + /* + * This will prevent mac80211 recovery flows to trigger during + * init failures + */ + set_bit(IWL_MVM_STATUS_STARTING, &mvm->status); + } + + for (retry = 0; retry <= max_retry; retry++) { + ret = __iwl_mvm_mac_start(mvm); + if (!ret) + break; + + IWL_ERR(mvm, "mac start retry %d\n", retry); + } + clear_bit(IWL_MVM_STATUS_STARTING, &mvm->status); + mutex_unlock(&mvm->mutex); return ret; @@ -1717,6 +1738,7 @@ static void iwl_mvm_recalc_multicast(struct iwl_mvm *mvm) struct iwl_mvm_mc_iter_data iter_data = { .mvm = mvm, }; + int ret; lockdep_assert_held(&mvm->mutex); @@ -1726,6 +1748,22 @@ static void iwl_mvm_recalc_multicast(struct iwl_mvm *mvm) ieee80211_iterate_active_interfaces_atomic( mvm->hw, IEEE80211_IFACE_ITER_NORMAL, iwl_mvm_mc_iface_iterator, &iter_data); + + /* + * Send a (synchronous) ech command so that we wait for the + * multiple asynchronous MCAST_FILTER_CMD commands sent by + * the interface iterator. Otherwise, we might get here over + * and over again (by userspace just sending a lot of these) + * and the CPU can send them faster than the firmware can + * process them. + * Note that the CPU is still faster - but with this we'll + * actually send fewer commands overall because the CPU will + * not schedule the work in mac80211 as frequently if it's + * still running when rescheduled (possibly multiple times). + */ + ret = iwl_mvm_send_cmd_pdu(mvm, ECHO_CMD, 0, 0, NULL); + if (ret) + IWL_ERR(mvm, "Failed to synchronize multicast groups update\n"); } static u64 iwl_mvm_prepare_multicast(struct ieee80211_hw *hw, diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index 7159d1da3e7726fad76ff27ff6d2cab32fc44f9b..64f5a4cb3d3ac83ae433d95b3b3a684d29808bfe 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -1162,6 +1162,8 @@ struct iwl_mvm { * @IWL_MVM_STATUS_FIRMWARE_RUNNING: firmware is running * @IWL_MVM_STATUS_NEED_FLUSH_P2P: need to flush P2P bcast STA * @IWL_MVM_STATUS_IN_D3: in D3 (or at least about to go into it) + * @IWL_MVM_STATUS_STARTING: starting mac, + * used to disable restart flow while in STARTING state */ enum iwl_mvm_status { IWL_MVM_STATUS_HW_RFKILL, @@ -1173,6 +1175,7 @@ enum iwl_mvm_status { IWL_MVM_STATUS_FIRMWARE_RUNNING, IWL_MVM_STATUS_NEED_FLUSH_P2P, IWL_MVM_STATUS_IN_D3, + IWL_MVM_STATUS_STARTING, }; /* Keep track of completed init configuration */ diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c index 0be8ff30b13e620356380c2664619df4942cae25..7c61d179895b34e1c93337d12ef8ef881cc13c0b 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c @@ -1295,6 +1295,9 @@ void iwl_mvm_nic_restart(struct iwl_mvm *mvm, bool fw_error) */ if (!mvm->fw_restart && fw_error) { iwl_fw_error_collect(&mvm->fwrt); + } else if (test_bit(IWL_MVM_STATUS_STARTING, + &mvm->status)) { + IWL_ERR(mvm, "Starting mac, retry will be triggered anyway\n"); } else if (test_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status)) { struct iwl_mvm_reprobe *reprobe; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index 838734fec50234ed253e752e86b9b39ea82c6804..86b3fb321dfdd82b0871bfb7920eac724c78e09e 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -177,12 +177,39 @@ static int iwl_mvm_create_skb(struct iwl_mvm *mvm, struct sk_buff *skb, struct iwl_rx_mpdu_desc *desc = (void *)pkt->data; unsigned int headlen, fraglen, pad_len = 0; unsigned int hdrlen = ieee80211_hdrlen(hdr->frame_control); + u8 mic_crc_len = u8_get_bits(desc->mac_flags1, + IWL_RX_MPDU_MFLG1_MIC_CRC_LEN_MASK) << 1; if (desc->mac_flags2 & IWL_RX_MPDU_MFLG2_PAD) { len -= 2; pad_len = 2; } + /* + * For non monitor interface strip the bytes the RADA might not have + * removed. As monitor interface cannot exist with other interfaces + * this removal is safe. + */ + if (mic_crc_len && !ieee80211_hw_check(mvm->hw, RX_INCLUDES_FCS)) { + u32 pkt_flags = le32_to_cpu(pkt->len_n_flags); + + /* + * If RADA was not enabled then decryption was not performed so + * the MIC cannot be removed. + */ + if (!(pkt_flags & FH_RSCSR_RADA_EN)) { + if (WARN_ON(crypt_len > mic_crc_len)) + return -EINVAL; + + mic_crc_len -= crypt_len; + } + + if (WARN_ON(mic_crc_len > len)) + return -EINVAL; + + len -= mic_crc_len; + } + /* If frame is small enough to fit in skb->head, pull it completely. * If not, only pull ieee80211_hdr (including crypto if present, and * an additional 8 bytes for SNAP/ethertype, see below) so that diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c index a5d90e028833c7e420a2c85e2ffddce7980b903a..46255d2c555b6d97dbcd3b5213af86c63234c364 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c @@ -2157,7 +2157,7 @@ static int iwl_mvm_check_running_scans(struct iwl_mvm *mvm, int type) return -EIO; } -#define SCAN_TIMEOUT 20000 +#define SCAN_TIMEOUT 30000 void iwl_mvm_scan_timeout_wk(struct work_struct *work) { diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c index 394598b14a17335ca0cae9c1057edb8cdde73cc0..3f081cdea09caf0012fbecdd460e3be5f8121f38 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c @@ -98,14 +98,13 @@ void iwl_mvm_roc_done_wk(struct work_struct *wk) struct iwl_mvm *mvm = container_of(wk, struct iwl_mvm, roc_done_wk); /* - * Clear the ROC_RUNNING /ROC_AUX_RUNNING status bit. + * Clear the ROC_RUNNING status bit. * This will cause the TX path to drop offchannel transmissions. * That would also be done by mac80211, but it is racy, in particular * in the case that the time event actually completed in the firmware * (which is handled in iwl_mvm_te_handle_notif). */ clear_bit(IWL_MVM_STATUS_ROC_RUNNING, &mvm->status); - clear_bit(IWL_MVM_STATUS_ROC_AUX_RUNNING, &mvm->status); synchronize_net(); @@ -131,9 +130,19 @@ void iwl_mvm_roc_done_wk(struct work_struct *wk) mvmvif = iwl_mvm_vif_from_mac80211(mvm->p2p_device_vif); iwl_mvm_flush_sta(mvm, &mvmvif->bcast_sta, true); } - } else { + } + + /* + * Clear the ROC_AUX_RUNNING status bit. + * This will cause the TX path to drop offchannel transmissions. + * That would also be done by mac80211, but it is racy, in particular + * in the case that the time event actually completed in the firmware + * (which is handled in iwl_mvm_te_handle_notif). + */ + if (test_and_clear_bit(IWL_MVM_STATUS_ROC_AUX_RUNNING, &mvm->status)) { /* do the same in case of hot spot 2.0 */ iwl_mvm_flush_sta(mvm, &mvm->aux_sta, true); + /* In newer version of this command an aux station is added only * in cases of dedicated tx queue and need to be removed in end * of use */ @@ -1157,15 +1166,10 @@ void iwl_mvm_schedule_session_protection(struct iwl_mvm *mvm, cpu_to_le32(FW_CMD_ID_AND_COLOR(mvmvif->id, mvmvif->color)), .action = cpu_to_le32(FW_CTXT_ACTION_ADD), + .conf_id = cpu_to_le32(SESSION_PROTECT_CONF_ASSOC), .duration_tu = cpu_to_le32(MSEC_TO_TU(duration)), }; - /* The time_event_data.id field is reused to save session - * protection's configuration. - */ - mvmvif->time_event_data.id = SESSION_PROTECT_CONF_ASSOC; - cmd.conf_id = cpu_to_le32(mvmvif->time_event_data.id); - lockdep_assert_held(&mvm->mutex); spin_lock_bh(&mvm->time_event_lock); @@ -1179,6 +1183,11 @@ void iwl_mvm_schedule_session_protection(struct iwl_mvm *mvm, } iwl_mvm_te_clear_data(mvm, te_data); + /* + * The time_event_data.id field is reused to save session + * protection's configuration. + */ + te_data->id = le32_to_cpu(cmd.conf_id); te_data->duration = le32_to_cpu(cmd.duration_tu); spin_unlock_bh(&mvm->time_event_lock); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c index 3123036978a59be0e6f8cb9b0c0c2b6cee213c54..caf38ef64d3ce69872ec7a685b61a8e63f15abec 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c @@ -741,6 +741,9 @@ bool iwl_mvm_rx_diversity_allowed(struct iwl_mvm *mvm) lockdep_assert_held(&mvm->mutex); + if (iwlmvm_mod_params.power_scheme != IWL_POWER_SCHEME_CAM) + return false; + if (num_of_ant(iwl_mvm_get_valid_rx_ant(mvm)) == 1) return false; diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index 90b12e201795cc0c1d03982b1316b4045832748f..4e43efd5d1ea15d13edfbbafe562af5269e9edba 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -635,6 +635,8 @@ static const struct iwl_dev_info iwl_dev_info_table[] = { IWL_DEV_INFO(0x43F0, 0x0074, iwl_ax201_cfg_qu_hr, NULL), IWL_DEV_INFO(0x43F0, 0x0078, iwl_ax201_cfg_qu_hr, NULL), IWL_DEV_INFO(0x43F0, 0x007C, iwl_ax201_cfg_qu_hr, NULL), + IWL_DEV_INFO(0x43F0, 0x1651, killer1650s_2ax_cfg_qu_b0_hr_b0, iwl_ax201_killer_1650s_name), + IWL_DEV_INFO(0x43F0, 0x1652, killer1650i_2ax_cfg_qu_b0_hr_b0, iwl_ax201_killer_1650i_name), IWL_DEV_INFO(0x43F0, 0x2074, iwl_ax201_cfg_qu_hr, NULL), IWL_DEV_INFO(0x43F0, 0x4070, iwl_ax201_cfg_qu_hr, NULL), IWL_DEV_INFO(0xA0F0, 0x0070, iwl_ax201_cfg_qu_hr, NULL), diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c index 2c13fa8f282009a74ee5ec9d502fc8d4f5ab5396..6aedf5762571df54ab615a66ce944e1efc580aca 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c @@ -2260,7 +2260,12 @@ irqreturn_t iwl_pcie_irq_msix_handler(int irq, void *dev_id) } } - if (inta_hw & MSIX_HW_INT_CAUSES_REG_WAKEUP) { + /* + * In some rare cases when the HW is in a bad state, we may + * get this interrupt too early, when prph_info is still NULL. + * So make sure that it's not NULL to prevent crashing. + */ + if (inta_hw & MSIX_HW_INT_CAUSES_REG_WAKEUP && trans_pcie->prph_info) { u32 sleep_notif = le32_to_cpu(trans_pcie->prph_info->sleep_notif); if (sleep_notif == IWL_D3_SLEEP_STATUS_SUSPEND || diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c index b031e9304983c40ed32abf549576923e99150304..b2991582189c20f1b1c5c60e072d3561d40ef2ae 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c @@ -320,8 +320,7 @@ int iwl_trans_pcie_gen2_start_fw(struct iwl_trans *trans, /* This may fail if AMT took ownership of the device */ if (iwl_pcie_prepare_card_hw(trans)) { IWL_WARN(trans, "Exit HW not ready\n"); - ret = -EIO; - goto out; + return -EIO; } iwl_enable_rfkill_int(trans); diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index 082768ec8aa808b46c41e5df5a8bdd8c6f77cd00..daec61a60fec5f39f5fc4d375568058f7e315593 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -1313,8 +1313,7 @@ static int iwl_trans_pcie_start_fw(struct iwl_trans *trans, /* This may fail if AMT took ownership of the device */ if (iwl_pcie_prepare_card_hw(trans)) { IWL_WARN(trans, "Exit HW not ready\n"); - ret = -EIO; - goto out; + return -EIO; } iwl_enable_rfkill_int(trans); diff --git a/drivers/net/wireless/intel/iwlwifi/queue/tx.c b/drivers/net/wireless/intel/iwlwifi/queue/tx.c index 9181221a2434d89f318d58b87d9c4dc011dca146..0136df00ff6a65454e0b4aecb9f7575c569780a4 100644 --- a/drivers/net/wireless/intel/iwlwifi/queue/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/queue/tx.c @@ -1148,6 +1148,7 @@ int iwl_txq_alloc(struct iwl_trans *trans, struct iwl_txq *txq, int slots_num, return 0; err_free_tfds: dma_free_coherent(trans->dev, tfd_sz, txq->tfds, txq->dma_addr); + txq->tfds = NULL; error: if (txq->entries && cmd_queue) for (i = 0; i < slots_num; i++) diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 79ee470fe566adf5b7daeefd7322b630c5c13b6f..6911e588e578703b392bc4ecc9dc5acd7718b0e2 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -1799,8 +1799,8 @@ mac80211_hwsim_beacon(struct hrtimer *timer) bcn_int -= data->bcn_delta; data->bcn_delta = 0; } - hrtimer_forward(&data->beacon_timer, hrtimer_get_expires(timer), - ns_to_ktime(bcn_int * NSEC_PER_USEC)); + hrtimer_forward_now(&data->beacon_timer, + ns_to_ktime(bcn_int * NSEC_PER_USEC)); return HRTIMER_RESTART; } @@ -2268,6 +2268,15 @@ static void hw_scan_work(struct work_struct *work) if (req->ie_len) skb_put_data(probe, req->ie, req->ie_len); + if (!ieee80211_tx_prepare_skb(hwsim->hw, + hwsim->hw_scan_vif, + probe, + hwsim->tmp_chan->band, + NULL)) { + kfree_skb(probe); + continue; + } + local_bh_disable(); mac80211_hwsim_tx_frame(hwsim->hw, probe, hwsim->tmp_chan); @@ -3573,6 +3582,10 @@ static int hwsim_tx_info_frame_received_nl(struct sk_buff *skb_2, } txi->flags |= IEEE80211_TX_STAT_ACK; } + + if (hwsim_flags & HWSIM_TX_CTL_NO_ACK) + txi->flags |= IEEE80211_TX_STAT_NOACK_TRANSMITTED; + ieee80211_tx_status_irqsafe(data2->hw, skb); return 0; out: diff --git a/drivers/net/wireless/marvell/libertas/if_usb.c b/drivers/net/wireless/marvell/libertas/if_usb.c index 20436a289d5cd926bd2aed5cef99176bf7e4873c..5d6dc1dd050d4a9b3b02d42f1b7f64e9c22c13db 100644 --- a/drivers/net/wireless/marvell/libertas/if_usb.c +++ b/drivers/net/wireless/marvell/libertas/if_usb.c @@ -292,6 +292,7 @@ err_add_card: if_usb_reset_device(cardp); dealloc: if_usb_free(cardp); + kfree(cardp); error: return r; @@ -316,6 +317,7 @@ static void if_usb_disconnect(struct usb_interface *intf) /* Unlink and free urb */ if_usb_free(cardp); + kfree(cardp); usb_set_intfdata(intf, NULL); usb_put_dev(interface_to_usbdev(intf)); diff --git a/drivers/net/wireless/marvell/libertas_tf/if_usb.c b/drivers/net/wireless/marvell/libertas_tf/if_usb.c index a92916dc81a96c8a4139048010cf7479eaf4a403..ecce8b56f8a2835c3e96e79ccefb42b1fa550d84 100644 --- a/drivers/net/wireless/marvell/libertas_tf/if_usb.c +++ b/drivers/net/wireless/marvell/libertas_tf/if_usb.c @@ -230,6 +230,7 @@ static int if_usb_probe(struct usb_interface *intf, dealloc: if_usb_free(cardp); + kfree(cardp); error: lbtf_deb_leave(LBTF_DEB_MAIN); return -ENOMEM; @@ -254,6 +255,7 @@ static void if_usb_disconnect(struct usb_interface *intf) /* Unlink and free urb */ if_usb_free(cardp); + kfree(cardp); usb_set_intfdata(intf, NULL); usb_put_dev(interface_to_usbdev(intf)); diff --git a/drivers/net/wireless/marvell/mwifiex/11n.c b/drivers/net/wireless/marvell/mwifiex/11n.c index 6696bce561786290109f70589b14d0e8c7499450..cf08a4af84d6d7fa82b1ba925eb6c34026095220 100644 --- a/drivers/net/wireless/marvell/mwifiex/11n.c +++ b/drivers/net/wireless/marvell/mwifiex/11n.c @@ -657,14 +657,15 @@ int mwifiex_send_delba(struct mwifiex_private *priv, int tid, u8 *peer_mac, uint16_t del_ba_param_set; memset(&delba, 0, sizeof(delba)); - delba.del_ba_param_set = cpu_to_le16(tid << DELBA_TID_POS); - del_ba_param_set = le16_to_cpu(delba.del_ba_param_set); + del_ba_param_set = tid << DELBA_TID_POS; + if (initiator) del_ba_param_set |= IEEE80211_DELBA_PARAM_INITIATOR_MASK; else del_ba_param_set &= ~IEEE80211_DELBA_PARAM_INITIATOR_MASK; + delba.del_ba_param_set = cpu_to_le16(del_ba_param_set); memcpy(&delba.peer_mac_addr, peer_mac, ETH_ALEN); /* We don't wait for the response of this command */ diff --git a/drivers/net/wireless/marvell/mwifiex/cfg80211.c b/drivers/net/wireless/marvell/mwifiex/cfg80211.c index a6b9dc6700b1429284728d3196149e2c9096b8b7..3d1b5d3d295aedef24ea817ff3a6bf26eea380fc 100644 --- a/drivers/net/wireless/marvell/mwifiex/cfg80211.c +++ b/drivers/net/wireless/marvell/mwifiex/cfg80211.c @@ -908,16 +908,20 @@ mwifiex_init_new_priv_params(struct mwifiex_private *priv, switch (type) { case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_ADHOC: - priv->bss_role = MWIFIEX_BSS_ROLE_STA; + priv->bss_role = MWIFIEX_BSS_ROLE_STA; + priv->bss_type = MWIFIEX_BSS_TYPE_STA; break; case NL80211_IFTYPE_P2P_CLIENT: - priv->bss_role = MWIFIEX_BSS_ROLE_STA; + priv->bss_role = MWIFIEX_BSS_ROLE_STA; + priv->bss_type = MWIFIEX_BSS_TYPE_P2P; break; case NL80211_IFTYPE_P2P_GO: - priv->bss_role = MWIFIEX_BSS_ROLE_UAP; + priv->bss_role = MWIFIEX_BSS_ROLE_UAP; + priv->bss_type = MWIFIEX_BSS_TYPE_P2P; break; case NL80211_IFTYPE_AP: priv->bss_role = MWIFIEX_BSS_ROLE_UAP; + priv->bss_type = MWIFIEX_BSS_TYPE_UAP; break; default: mwifiex_dbg(adapter, ERROR, @@ -1229,29 +1233,15 @@ mwifiex_cfg80211_change_virtual_intf(struct wiphy *wiphy, break; case NL80211_IFTYPE_P2P_CLIENT: case NL80211_IFTYPE_P2P_GO: + if (mwifiex_cfg80211_deinit_p2p(priv)) + return -EFAULT; + switch (type) { - case NL80211_IFTYPE_STATION: - if (mwifiex_cfg80211_deinit_p2p(priv)) - return -EFAULT; - priv->adapter->curr_iface_comb.p2p_intf--; - priv->adapter->curr_iface_comb.sta_intf++; - dev->ieee80211_ptr->iftype = type; - if (mwifiex_deinit_priv_params(priv)) - return -1; - if (mwifiex_init_new_priv_params(priv, dev, type)) - return -1; - if (mwifiex_sta_init_cmd(priv, false, false)) - return -1; - break; case NL80211_IFTYPE_ADHOC: - if (mwifiex_cfg80211_deinit_p2p(priv)) - return -EFAULT; + case NL80211_IFTYPE_STATION: return mwifiex_change_vif_to_sta_adhoc(dev, curr_iftype, type, params); - break; case NL80211_IFTYPE_AP: - if (mwifiex_cfg80211_deinit_p2p(priv)) - return -EFAULT; return mwifiex_change_vif_to_ap(dev, curr_iftype, type, params); case NL80211_IFTYPE_UNSPECIFIED: diff --git a/drivers/net/wireless/marvell/mwifiex/pcie.c b/drivers/net/wireless/marvell/mwifiex/pcie.c index b2de8d03c5fac352778a3ee91e64e674b847aa22..7c137eba8cda778f0db9de6928495d6c353687b2 100644 --- a/drivers/net/wireless/marvell/mwifiex/pcie.c +++ b/drivers/net/wireless/marvell/mwifiex/pcie.c @@ -17,6 +17,7 @@ * this warranty disclaimer. */ +#include #include #include "decl.h" @@ -637,11 +638,15 @@ static void mwifiex_delay_for_sleep_cookie(struct mwifiex_adapter *adapter, "max count reached while accessing sleep cookie\n"); } +#define N_WAKEUP_TRIES_SHORT_INTERVAL 15 +#define N_WAKEUP_TRIES_LONG_INTERVAL 35 + /* This function wakes up the card by reading fw_status register. */ static int mwifiex_pm_wakeup_card(struct mwifiex_adapter *adapter) { struct pcie_service_card *card = adapter->card; const struct mwifiex_pcie_card_reg *reg = card->pcie.reg; + int retval; mwifiex_dbg(adapter, EVENT, "event: Wakeup device...\n"); @@ -649,11 +654,24 @@ static int mwifiex_pm_wakeup_card(struct mwifiex_adapter *adapter) if (reg->sleep_cookie) mwifiex_pcie_dev_wakeup_delay(adapter); - /* Accessing fw_status register will wakeup device */ - if (mwifiex_write_reg(adapter, reg->fw_status, FIRMWARE_READY_PCIE)) { - mwifiex_dbg(adapter, ERROR, - "Writing fw_status register failed\n"); - return -1; + /* The 88W8897 PCIe+USB firmware (latest version 15.68.19.p21) sometimes + * appears to ignore or miss our wakeup request, so we continue trying + * until we receive an interrupt from the card. + */ + if (read_poll_timeout(mwifiex_write_reg, retval, + READ_ONCE(adapter->int_status) != 0, + 500, 500 * N_WAKEUP_TRIES_SHORT_INTERVAL, + false, + adapter, reg->fw_status, FIRMWARE_READY_PCIE)) { + if (read_poll_timeout(mwifiex_write_reg, retval, + READ_ONCE(adapter->int_status) != 0, + 10000, 10000 * N_WAKEUP_TRIES_LONG_INTERVAL, + false, + adapter, reg->fw_status, FIRMWARE_READY_PCIE)) { + mwifiex_dbg(adapter, ERROR, + "Firmware didn't wake up\n"); + return -EIO; + } } if (reg->sleep_cookie) { @@ -1480,6 +1498,14 @@ mwifiex_pcie_send_data(struct mwifiex_adapter *adapter, struct sk_buff *skb, ret = -1; goto done_unmap; } + + /* The firmware (latest version 15.68.19.p21) of the 88W8897 PCIe+USB card + * seems to crash randomly after setting the TX ring write pointer when + * ASPM powersaving is enabled. A workaround seems to be keeping the bus + * busy by reading a random register afterwards. + */ + mwifiex_read_reg(adapter, PCI_VENDOR_ID, &rx_val); + if ((mwifiex_pcie_txbd_not_full(card)) && tx_param->next_pkt_len) { /* have more packets and TxBD still can hold more */ diff --git a/drivers/net/wireless/marvell/mwifiex/sta_event.c b/drivers/net/wireless/marvell/mwifiex/sta_event.c index bc79ca4cb803cc987cc0b9d33316950cd7f07520..753458628f86a18707b0b20acd9923bacd8706a2 100644 --- a/drivers/net/wireless/marvell/mwifiex/sta_event.c +++ b/drivers/net/wireless/marvell/mwifiex/sta_event.c @@ -364,10 +364,12 @@ static void mwifiex_process_uap_tx_pause(struct mwifiex_private *priv, sta_ptr = mwifiex_get_sta_entry(priv, tp->peermac); if (sta_ptr && sta_ptr->tx_pause != tp->tx_pause) { sta_ptr->tx_pause = tp->tx_pause; + spin_unlock_bh(&priv->sta_list_spinlock); mwifiex_update_ralist_tx_pause(priv, tp->peermac, tp->tx_pause); + } else { + spin_unlock_bh(&priv->sta_list_spinlock); } - spin_unlock_bh(&priv->sta_list_spinlock); } } @@ -399,11 +401,13 @@ static void mwifiex_process_sta_tx_pause(struct mwifiex_private *priv, sta_ptr = mwifiex_get_sta_entry(priv, tp->peermac); if (sta_ptr && sta_ptr->tx_pause != tp->tx_pause) { sta_ptr->tx_pause = tp->tx_pause; + spin_unlock_bh(&priv->sta_list_spinlock); mwifiex_update_ralist_tx_pause(priv, tp->peermac, tp->tx_pause); + } else { + spin_unlock_bh(&priv->sta_list_spinlock); } - spin_unlock_bh(&priv->sta_list_spinlock); } } } diff --git a/drivers/net/wireless/marvell/mwifiex/usb.c b/drivers/net/wireless/marvell/mwifiex/usb.c index 426e39d4ccf0f0bbee4c5f78922039eccb57bea4..8f01fcbe93961a03a8c78924440ba7382489aea4 100644 --- a/drivers/net/wireless/marvell/mwifiex/usb.c +++ b/drivers/net/wireless/marvell/mwifiex/usb.c @@ -130,7 +130,8 @@ static int mwifiex_usb_recv(struct mwifiex_adapter *adapter, default: mwifiex_dbg(adapter, ERROR, "unknown recv_type %#x\n", recv_type); - return -1; + ret = -1; + goto exit_restore_skb; } break; case MWIFIEX_USB_EP_DATA: @@ -505,6 +506,22 @@ static int mwifiex_usb_probe(struct usb_interface *intf, } } + switch (card->usb_boot_state) { + case USB8XXX_FW_DNLD: + /* Reject broken descriptors. */ + if (!card->rx_cmd_ep || !card->tx_cmd_ep) + return -ENODEV; + if (card->bulk_out_maxpktsize == 0) + return -ENODEV; + break; + case USB8XXX_FW_READY: + /* Assume the driver can handle missing endpoints for now. */ + break; + default: + WARN_ON(1); + return -ENODEV; + } + usb_set_intfdata(intf, card); ret = mwifiex_add_card(card, &card->fw_done, &usb_ops, diff --git a/drivers/net/wireless/marvell/mwl8k.c b/drivers/net/wireless/marvell/mwl8k.c index 27b7d4b779e0b258c078aaa88503f05a04390332..dc91ac8cbd48b41582efa2495cbea0804d58bfd0 100644 --- a/drivers/net/wireless/marvell/mwl8k.c +++ b/drivers/net/wireless/marvell/mwl8k.c @@ -5796,8 +5796,8 @@ static void mwl8k_fw_state_machine(const struct firmware *fw, void *context) fail: priv->fw_state = FW_STATE_ERROR; complete(&priv->firmware_loading_complete); - device_release_driver(&priv->pdev->dev); mwl8k_release_firmware(priv); + device_release_driver(&priv->pdev->dev); } #define MAX_RESTART_ATTEMPTS 1 diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c index f44f478bb970ea202d0adeff3f37b1a8d2047fd8..424be103093c6a61473c8971fa676e5f99c733cc 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c @@ -672,12 +672,15 @@ int mt7615_mac_write_txwi(struct mt7615_dev *dev, __le32 *txwi, if (info->flags & IEEE80211_TX_CTL_NO_ACK) txwi[3] |= cpu_to_le32(MT_TXD3_NO_ACK); - txwi[7] = FIELD_PREP(MT_TXD7_TYPE, fc_type) | - FIELD_PREP(MT_TXD7_SUB_TYPE, fc_stype) | - FIELD_PREP(MT_TXD7_SPE_IDX, 0x18); - if (!is_mmio) - txwi[8] = FIELD_PREP(MT_TXD8_L_TYPE, fc_type) | - FIELD_PREP(MT_TXD8_L_SUB_TYPE, fc_stype); + val = FIELD_PREP(MT_TXD7_TYPE, fc_type) | + FIELD_PREP(MT_TXD7_SUB_TYPE, fc_stype) | + FIELD_PREP(MT_TXD7_SPE_IDX, 0x18); + txwi[7] = cpu_to_le32(val); + if (!is_mmio) { + val = FIELD_PREP(MT_TXD8_L_TYPE, fc_type) | + FIELD_PREP(MT_TXD8_L_SUB_TYPE, fc_stype); + txwi[8] = cpu_to_le32(val); + } return 0; } diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c index da6d3f51f6d47811cc7b7d64416ffd49ae7c4a0a..677082d8659a6892620df8de3a340613ba588957 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c @@ -176,7 +176,7 @@ void mt76x02_mac_wcid_set_drop(struct mt76x02_dev *dev, u8 idx, bool drop) mt76_wr(dev, MT_WCID_DROP(idx), (val & ~bit) | (bit * drop)); } -static __le16 +static u16 mt76x02_mac_tx_rate_val(struct mt76x02_dev *dev, const struct ieee80211_tx_rate *rate, u8 *nss_val) { @@ -222,14 +222,14 @@ mt76x02_mac_tx_rate_val(struct mt76x02_dev *dev, rateval |= MT_RXWI_RATE_SGI; *nss_val = nss; - return cpu_to_le16(rateval); + return rateval; } void mt76x02_mac_wcid_set_rate(struct mt76x02_dev *dev, struct mt76_wcid *wcid, const struct ieee80211_tx_rate *rate) { s8 max_txpwr_adj = mt76x02_tx_get_max_txpwr_adj(dev, rate); - __le16 rateval; + u16 rateval; u32 tx_info; s8 nss; @@ -342,7 +342,7 @@ void mt76x02_mac_write_txwi(struct mt76x02_dev *dev, struct mt76x02_txwi *txwi, struct ieee80211_key_conf *key = info->control.hw_key; u32 wcid_tx_info; u16 rate_ht_mask = FIELD_PREP(MT_RXWI_RATE_PHY, BIT(1) | BIT(2)); - u16 txwi_flags = 0; + u16 txwi_flags = 0, rateval; u8 nss; s8 txpwr_adj, max_txpwr_adj; u8 ccmp_pn[8], nstreams = dev->chainmask & 0xf; @@ -380,14 +380,15 @@ void mt76x02_mac_write_txwi(struct mt76x02_dev *dev, struct mt76x02_txwi *txwi, if (wcid && (rate->idx < 0 || !rate->count)) { wcid_tx_info = wcid->tx_info; - txwi->rate = FIELD_GET(MT_WCID_TX_INFO_RATE, wcid_tx_info); + rateval = FIELD_GET(MT_WCID_TX_INFO_RATE, wcid_tx_info); max_txpwr_adj = FIELD_GET(MT_WCID_TX_INFO_TXPWR_ADJ, wcid_tx_info); nss = FIELD_GET(MT_WCID_TX_INFO_NSS, wcid_tx_info); } else { - txwi->rate = mt76x02_mac_tx_rate_val(dev, rate, &nss); + rateval = mt76x02_mac_tx_rate_val(dev, rate, &nss); max_txpwr_adj = mt76x02_tx_get_max_txpwr_adj(dev, rate); } + txwi->rate = cpu_to_le16(rateval); txpwr_adj = mt76x02_tx_get_txpwr_adj(dev, dev->txpower_conf, max_txpwr_adj); diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c index ea71409751519f8aa7704423cac7dd723b3ed7b0..9a7f317a098fc805ba661803d8eb456f051b807c 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c @@ -182,7 +182,7 @@ mt7915_get_phy_mode(struct mt7915_dev *dev, struct ieee80211_vif *vif, if (ht_cap->ht_supported) mode |= PHY_MODE_GN; - if (he_cap->has_he) + if (he_cap && he_cap->has_he) mode |= PHY_MODE_AX_24G; } else if (band == NL80211_BAND_5GHZ) { mode |= PHY_MODE_A; @@ -193,7 +193,7 @@ mt7915_get_phy_mode(struct mt7915_dev *dev, struct ieee80211_vif *vif, if (vht_cap->vht_supported) mode |= PHY_MODE_AC; - if (he_cap->has_he) + if (he_cap && he_cap->has_he) mode |= PHY_MODE_AX_5G; } @@ -631,7 +631,7 @@ mt7915_mcu_alloc_sta_req(struct mt7915_dev *dev, struct mt7915_vif *mvif, .bss_idx = mvif->idx, .wlan_idx_lo = msta ? to_wcid_lo(msta->wcid.idx) : 0, .wlan_idx_hi = msta ? to_wcid_hi(msta->wcid.idx) : 0, - .muar_idx = msta ? mvif->omac_idx : 0, + .muar_idx = msta && msta->wcid.sta ? mvif->omac_idx : 0xe, .is_tlv_append = 1, }; struct sk_buff *skb; @@ -667,7 +667,7 @@ mt7915_mcu_alloc_wtbl_req(struct mt7915_dev *dev, struct mt7915_sta *msta, } if (sta_hdr) - sta_hdr->len = cpu_to_le16(sizeof(hdr)); + le16_add_cpu(&sta_hdr->len, sizeof(hdr)); return skb_put_data(nskb, &hdr, sizeof(hdr)); } @@ -830,7 +830,7 @@ static void mt7915_check_he_obss_narrow_bw_ru_iter(struct wiphy *wiphy, elem = ieee80211_bss_get_elem(bss, WLAN_EID_EXT_CAPABILITY); - if (!elem || elem->datalen < 10 || + if (!elem || elem->datalen <= 10 || !(elem->data[10] & WLAN_EXT_CAPA10_OBSS_NARROW_BW_RU_TOLERANCE_SUPPORT)) data->tolerated = false; @@ -2648,7 +2648,7 @@ out: default: ret = -EAGAIN; dev_err(dev->mt76.dev, "Failed to release patch semaphore\n"); - goto out; + break; } release_firmware(fw); diff --git a/drivers/net/wireless/microchip/wilc1000/cfg80211.c b/drivers/net/wireless/microchip/wilc1000/cfg80211.c index c1ac1d84790f0bb11c8ee1c1366c9b901d922fe4..6be5ac8ba518d84e70e6c66a63fc13769846097e 100644 --- a/drivers/net/wireless/microchip/wilc1000/cfg80211.c +++ b/drivers/net/wireless/microchip/wilc1000/cfg80211.c @@ -129,8 +129,7 @@ static void cfg_scan_result(enum scan_event scan_event, info->frame_len, (s32)info->rssi * 100, GFP_KERNEL); - if (!bss) - cfg80211_put_bss(wiphy, bss); + cfg80211_put_bss(wiphy, bss); } else if (scan_event == SCAN_EVENT_DONE) { mutex_lock(&priv->scan_req_lock); diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00usb.c b/drivers/net/wireless/ralink/rt2x00/rt2x00usb.c index e4473a5512415241d012f973862e12d122b3845e..74c3d8cb31002d0ec583fbbda66df4d9efcde6db 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2x00usb.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2x00usb.c @@ -25,6 +25,9 @@ static bool rt2x00usb_check_usb_error(struct rt2x00_dev *rt2x00dev, int status) if (status == -ENODEV || status == -ENOENT) return true; + if (!test_bit(DEVICE_STATE_STARTED, &rt2x00dev->flags)) + return false; + if (status == -EPROTO || status == -ETIMEDOUT) rt2x00dev->num_proto_errs++; else diff --git a/drivers/net/wireless/realtek/rtl818x/rtl8187/rtl8225.c b/drivers/net/wireless/realtek/rtl818x/rtl8187/rtl8225.c index 585784258c6654ef62569a7c365c962a6a55dcaa..4efab907a3ac6d0f23059e310ca5226782f520e2 100644 --- a/drivers/net/wireless/realtek/rtl818x/rtl8187/rtl8225.c +++ b/drivers/net/wireless/realtek/rtl818x/rtl8187/rtl8225.c @@ -28,7 +28,7 @@ u8 rtl818x_ioread8_idx(struct rtl8187_priv *priv, usb_control_msg(priv->udev, usb_rcvctrlpipe(priv->udev, 0), RTL8187_REQ_GET_REG, RTL8187_REQT_READ, (unsigned long)addr, idx & 0x03, - &priv->io_dmabuf->bits8, sizeof(val), HZ / 2); + &priv->io_dmabuf->bits8, sizeof(val), 500); val = priv->io_dmabuf->bits8; mutex_unlock(&priv->io_mutex); @@ -45,7 +45,7 @@ u16 rtl818x_ioread16_idx(struct rtl8187_priv *priv, usb_control_msg(priv->udev, usb_rcvctrlpipe(priv->udev, 0), RTL8187_REQ_GET_REG, RTL8187_REQT_READ, (unsigned long)addr, idx & 0x03, - &priv->io_dmabuf->bits16, sizeof(val), HZ / 2); + &priv->io_dmabuf->bits16, sizeof(val), 500); val = priv->io_dmabuf->bits16; mutex_unlock(&priv->io_mutex); @@ -62,7 +62,7 @@ u32 rtl818x_ioread32_idx(struct rtl8187_priv *priv, usb_control_msg(priv->udev, usb_rcvctrlpipe(priv->udev, 0), RTL8187_REQ_GET_REG, RTL8187_REQT_READ, (unsigned long)addr, idx & 0x03, - &priv->io_dmabuf->bits32, sizeof(val), HZ / 2); + &priv->io_dmabuf->bits32, sizeof(val), 500); val = priv->io_dmabuf->bits32; mutex_unlock(&priv->io_mutex); @@ -79,7 +79,7 @@ void rtl818x_iowrite8_idx(struct rtl8187_priv *priv, usb_control_msg(priv->udev, usb_sndctrlpipe(priv->udev, 0), RTL8187_REQ_SET_REG, RTL8187_REQT_WRITE, (unsigned long)addr, idx & 0x03, - &priv->io_dmabuf->bits8, sizeof(val), HZ / 2); + &priv->io_dmabuf->bits8, sizeof(val), 500); mutex_unlock(&priv->io_mutex); } @@ -93,7 +93,7 @@ void rtl818x_iowrite16_idx(struct rtl8187_priv *priv, usb_control_msg(priv->udev, usb_sndctrlpipe(priv->udev, 0), RTL8187_REQ_SET_REG, RTL8187_REQT_WRITE, (unsigned long)addr, idx & 0x03, - &priv->io_dmabuf->bits16, sizeof(val), HZ / 2); + &priv->io_dmabuf->bits16, sizeof(val), 500); mutex_unlock(&priv->io_mutex); } @@ -107,7 +107,7 @@ void rtl818x_iowrite32_idx(struct rtl8187_priv *priv, usb_control_msg(priv->udev, usb_sndctrlpipe(priv->udev, 0), RTL8187_REQ_SET_REG, RTL8187_REQT_WRITE, (unsigned long)addr, idx & 0x03, - &priv->io_dmabuf->bits32, sizeof(val), HZ / 2); + &priv->io_dmabuf->bits32, sizeof(val), 500); mutex_unlock(&priv->io_mutex); } @@ -183,7 +183,7 @@ static void rtl8225_write_8051(struct ieee80211_hw *dev, u8 addr, __le16 data) usb_control_msg(priv->udev, usb_sndctrlpipe(priv->udev, 0), RTL8187_REQ_SET_REG, RTL8187_REQT_WRITE, addr, 0x8225, &priv->io_dmabuf->bits16, sizeof(data), - HZ / 2); + 500); mutex_unlock(&priv->io_mutex); diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/hw.c index 6312fddd9c00a2b37dee0951810098472e72c053..eaba661133280272cb370fb647b04fefcc7d1c96 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/hw.c @@ -1000,6 +1000,7 @@ int rtl92cu_hw_init(struct ieee80211_hw *hw) _initpabias(hw); rtl92c_dm_init(hw); exit: + local_irq_disable(); local_irq_restore(flags); return err; } diff --git a/drivers/net/wireless/realtek/rtw88/fw.c b/drivers/net/wireless/realtek/rtw88/fw.c index 0452630bcfaccd19f3b1ddb50f1fca66067d426b..40bcfabd2d2147114be9f2b96dc82bfee1e7f90e 100644 --- a/drivers/net/wireless/realtek/rtw88/fw.c +++ b/drivers/net/wireless/realtek/rtw88/fw.c @@ -1421,12 +1421,10 @@ static void rtw_fw_read_fifo_page(struct rtw_dev *rtwdev, u32 offset, u32 size, u32 i; u16 idx = 0; u16 ctl; - u8 rcr; - rcr = rtw_read8(rtwdev, REG_RCR + 2); ctl = rtw_read16(rtwdev, REG_PKTBUF_DBG_CTRL) & 0xf000; /* disable rx clock gate */ - rtw_write8(rtwdev, REG_RCR, rcr | BIT(3)); + rtw_write32_set(rtwdev, REG_RCR, BIT_DISGCLK); do { rtw_write16(rtwdev, REG_PKTBUF_DBG_CTRL, start_pg | ctl); @@ -1445,7 +1443,8 @@ static void rtw_fw_read_fifo_page(struct rtw_dev *rtwdev, u32 offset, u32 size, out: rtw_write16(rtwdev, REG_PKTBUF_DBG_CTRL, ctl); - rtw_write8(rtwdev, REG_RCR + 2, rcr); + /* restore rx clock gate */ + rtw_write32_clr(rtwdev, REG_RCR, BIT_DISGCLK); } static void rtw_fw_read_fifo(struct rtw_dev *rtwdev, enum rtw_fw_fifo_sel sel, diff --git a/drivers/net/wireless/realtek/rtw88/main.c b/drivers/net/wireless/realtek/rtw88/main.c index 565efd8806247edbdf94b1fed4f134afe6cd290a..2ef1416899f03858b38ea838052c999cb65352d2 100644 --- a/drivers/net/wireless/realtek/rtw88/main.c +++ b/drivers/net/wireless/realtek/rtw88/main.c @@ -1652,7 +1652,7 @@ int rtw_core_init(struct rtw_dev *rtwdev) /* default rx filter setting */ rtwdev->hal.rcr = BIT_APP_FCS | BIT_APP_MIC | BIT_APP_ICV | - BIT_HTC_LOC_CTRL | BIT_APP_PHYSTS | + BIT_PKTCTL_DLEN | BIT_HTC_LOC_CTRL | BIT_APP_PHYSTS | BIT_AB | BIT_AM | BIT_APM; ret = rtw_load_firmware(rtwdev, RTW_NORMAL_FW); diff --git a/drivers/net/wireless/realtek/rtw88/reg.h b/drivers/net/wireless/realtek/rtw88/reg.h index aca3dbdc2d5a51b9def7ca5b50c7debce694501e..9088bfb2a3157442afb7f61aa6b42b420b002f70 100644 --- a/drivers/net/wireless/realtek/rtw88/reg.h +++ b/drivers/net/wireless/realtek/rtw88/reg.h @@ -400,6 +400,7 @@ #define BIT_MFBEN BIT(22) #define BIT_DISCHKPPDLLEN BIT(21) #define BIT_PKTCTL_DLEN BIT(20) +#define BIT_DISGCLK BIT(19) #define BIT_TIM_PARSER_EN BIT(18) #define BIT_BC_MD_EN BIT(17) #define BIT_UC_MD_EN BIT(16) diff --git a/drivers/net/wireless/realtek/rtw88/rtw8821c.h b/drivers/net/wireless/realtek/rtw88/rtw8821c.h index bd01e82b6bcd0bf6c2c3bd9d6c0b38f091dfbd5a..8d1e8ff71d7efc3d0fe13e6fb4346b26565e39fe 100644 --- a/drivers/net/wireless/realtek/rtw88/rtw8821c.h +++ b/drivers/net/wireless/realtek/rtw88/rtw8821c.h @@ -131,7 +131,7 @@ _rtw_write32s_mask(struct rtw_dev *rtwdev, u32 addr, u32 mask, u32 data) #define WLAN_TX_FUNC_CFG2 0x30 #define WLAN_MAC_OPT_NORM_FUNC1 0x98 #define WLAN_MAC_OPT_LB_FUNC1 0x80 -#define WLAN_MAC_OPT_FUNC2 0x30810041 +#define WLAN_MAC_OPT_FUNC2 0xb0810041 #define WLAN_SIFS_CFG (WLAN_SIFS_CCK_CONT_TX | \ (WLAN_SIFS_OFDM_CONT_TX << BIT_SHIFT_SIFS_OFDM_CTX) | \ diff --git a/drivers/net/wireless/realtek/rtw88/rtw8822b.c b/drivers/net/wireless/realtek/rtw88/rtw8822b.c index 22d0dd640ac94fa8a06848ba23b33056748c5505..dbfd67c3f598cac92bbbcb5a2f49218e14a93621 100644 --- a/drivers/net/wireless/realtek/rtw88/rtw8822b.c +++ b/drivers/net/wireless/realtek/rtw88/rtw8822b.c @@ -204,7 +204,7 @@ static void rtw8822b_phy_set_param(struct rtw_dev *rtwdev) #define WLAN_TX_FUNC_CFG2 0x30 #define WLAN_MAC_OPT_NORM_FUNC1 0x98 #define WLAN_MAC_OPT_LB_FUNC1 0x80 -#define WLAN_MAC_OPT_FUNC2 0x30810041 +#define WLAN_MAC_OPT_FUNC2 0xb0810041 #define WLAN_SIFS_CFG (WLAN_SIFS_CCK_CONT_TX | \ (WLAN_SIFS_OFDM_CONT_TX << BIT_SHIFT_SIFS_OFDM_CTX) | \ diff --git a/drivers/net/wireless/realtek/rtw88/rtw8822c.c b/drivers/net/wireless/realtek/rtw88/rtw8822c.c index 79ad6232dce8303353548ce52c5b4f2dde9018c8..cee586335552d82621c54fcb080cc9a2cc522b92 100644 --- a/drivers/net/wireless/realtek/rtw88/rtw8822c.c +++ b/drivers/net/wireless/realtek/rtw88/rtw8822c.c @@ -1248,7 +1248,7 @@ static void rtw8822c_phy_set_param(struct rtw_dev *rtwdev) #define WLAN_TX_FUNC_CFG2 0x30 #define WLAN_MAC_OPT_NORM_FUNC1 0x98 #define WLAN_MAC_OPT_LB_FUNC1 0x80 -#define WLAN_MAC_OPT_FUNC2 0x30810041 +#define WLAN_MAC_OPT_FUNC2 0xb0810041 #define WLAN_MAC_INT_MIG_CFG 0x33330000 #define WLAN_SIFS_CFG (WLAN_SIFS_CCK_CONT_TX | \ diff --git a/drivers/net/wireless/rsi/rsi_91x_core.c b/drivers/net/wireless/rsi/rsi_91x_core.c index 2d49c5b5eefb4e7b5d818d9ab13d01f15ff3b7e4..9c4c585572488b0ff1558efe047d0e633fde88cc 100644 --- a/drivers/net/wireless/rsi/rsi_91x_core.c +++ b/drivers/net/wireless/rsi/rsi_91x_core.c @@ -400,6 +400,8 @@ void rsi_core_xmit(struct rsi_common *common, struct sk_buff *skb) info = IEEE80211_SKB_CB(skb); tx_params = (struct skb_info *)info->driver_data; + /* info->driver_data and info->control part of union so make copy */ + tx_params->have_key = !!info->control.hw_key; wh = (struct ieee80211_hdr *)&skb->data[0]; tx_params->sta_id = 0; diff --git a/drivers/net/wireless/rsi/rsi_91x_hal.c b/drivers/net/wireless/rsi/rsi_91x_hal.c index f4a26f16f00f445912a81d417349d76d6888741d..dca81a4bbdd7f63a4ccf9a854e101683a22fafef 100644 --- a/drivers/net/wireless/rsi/rsi_91x_hal.c +++ b/drivers/net/wireless/rsi/rsi_91x_hal.c @@ -203,7 +203,7 @@ int rsi_prepare_data_desc(struct rsi_common *common, struct sk_buff *skb) wh->frame_control |= cpu_to_le16(RSI_SET_PS_ENABLE); if ((!(info->flags & IEEE80211_TX_INTFL_DONT_ENCRYPT)) && - info->control.hw_key) { + tx_params->have_key) { if (rsi_is_cipher_wep(common)) ieee80211_size += 4; else @@ -214,15 +214,17 @@ int rsi_prepare_data_desc(struct rsi_common *common, struct sk_buff *skb) RSI_WIFI_DATA_Q); data_desc->header_len = ieee80211_size; - if (common->min_rate != RSI_RATE_AUTO) { + if (common->rate_config[common->band].fixed_enabled) { /* Send fixed rate */ + u16 fixed_rate = common->rate_config[common->band].fixed_hw_rate; + data_desc->frame_info = cpu_to_le16(RATE_INFO_ENABLE); - data_desc->rate_info = cpu_to_le16(common->min_rate); + data_desc->rate_info = cpu_to_le16(fixed_rate); if (conf_is_ht40(&common->priv->hw->conf)) data_desc->bbp_info = cpu_to_le16(FULL40M_ENABLE); - if ((common->vif_info[0].sgi) && (common->min_rate & 0x100)) { + if (common->vif_info[0].sgi && (fixed_rate & 0x100)) { /* Only MCS rates */ data_desc->rate_info |= cpu_to_le16(ENABLE_SHORTGI_RATE); diff --git a/drivers/net/wireless/rsi/rsi_91x_mac80211.c b/drivers/net/wireless/rsi/rsi_91x_mac80211.c index 57c9e3559dfd1916897c35e0a870c72a4076c4e9..8abf9f699db17904748333eb031afd1fb3fe9b81 100644 --- a/drivers/net/wireless/rsi/rsi_91x_mac80211.c +++ b/drivers/net/wireless/rsi/rsi_91x_mac80211.c @@ -510,7 +510,6 @@ static int rsi_mac80211_add_interface(struct ieee80211_hw *hw, if ((vif->type == NL80211_IFTYPE_AP) || (vif->type == NL80211_IFTYPE_P2P_GO)) { rsi_send_rx_filter_frame(common, DISALLOW_BEACONS); - common->min_rate = RSI_RATE_AUTO; for (i = 0; i < common->max_stations; i++) common->stations[i].sta = NULL; } @@ -1211,20 +1210,32 @@ static int rsi_mac80211_set_rate_mask(struct ieee80211_hw *hw, struct ieee80211_vif *vif, const struct cfg80211_bitrate_mask *mask) { + const unsigned int mcs_offset = ARRAY_SIZE(rsi_rates); struct rsi_hw *adapter = hw->priv; struct rsi_common *common = adapter->priv; - enum nl80211_band band = hw->conf.chandef.chan->band; + int i; mutex_lock(&common->mutex); - common->fixedrate_mask[band] = 0; - if (mask->control[band].legacy == 0xfff) { - common->fixedrate_mask[band] = - (mask->control[band].ht_mcs[0] << 12); - } else { - common->fixedrate_mask[band] = - mask->control[band].legacy; + for (i = 0; i < ARRAY_SIZE(common->rate_config); i++) { + struct rsi_rate_config *cfg = &common->rate_config[i]; + u32 bm; + + bm = mask->control[i].legacy | (mask->control[i].ht_mcs[0] << mcs_offset); + if (hweight32(bm) == 1) { /* single rate */ + int rate_index = ffs(bm) - 1; + + if (rate_index < mcs_offset) + cfg->fixed_hw_rate = rsi_rates[rate_index].hw_value; + else + cfg->fixed_hw_rate = rsi_mcsrates[rate_index - mcs_offset]; + cfg->fixed_enabled = true; + } else { + cfg->configured_mask = bm; + cfg->fixed_enabled = false; + } } + mutex_unlock(&common->mutex); return 0; @@ -1361,46 +1372,6 @@ void rsi_indicate_pkt_to_os(struct rsi_common *common, ieee80211_rx_irqsafe(hw, skb); } -static void rsi_set_min_rate(struct ieee80211_hw *hw, - struct ieee80211_sta *sta, - struct rsi_common *common) -{ - u8 band = hw->conf.chandef.chan->band; - u8 ii; - u32 rate_bitmap; - bool matched = false; - - common->bitrate_mask[band] = sta->supp_rates[band]; - - rate_bitmap = (common->fixedrate_mask[band] & sta->supp_rates[band]); - - if (rate_bitmap & 0xfff) { - /* Find out the min rate */ - for (ii = 0; ii < ARRAY_SIZE(rsi_rates); ii++) { - if (rate_bitmap & BIT(ii)) { - common->min_rate = rsi_rates[ii].hw_value; - matched = true; - break; - } - } - } - - common->vif_info[0].is_ht = sta->ht_cap.ht_supported; - - if ((common->vif_info[0].is_ht) && (rate_bitmap >> 12)) { - for (ii = 0; ii < ARRAY_SIZE(rsi_mcsrates); ii++) { - if ((rate_bitmap >> 12) & BIT(ii)) { - common->min_rate = rsi_mcsrates[ii]; - matched = true; - break; - } - } - } - - if (!matched) - common->min_rate = 0xffff; -} - /** * rsi_mac80211_sta_add() - This function notifies driver about a peer getting * connected. @@ -1499,9 +1470,9 @@ static int rsi_mac80211_sta_add(struct ieee80211_hw *hw, if ((vif->type == NL80211_IFTYPE_STATION) || (vif->type == NL80211_IFTYPE_P2P_CLIENT)) { - rsi_set_min_rate(hw, sta, common); + common->bitrate_mask[common->band] = sta->supp_rates[common->band]; + common->vif_info[0].is_ht = sta->ht_cap.ht_supported; if (sta->ht_cap.ht_supported) { - common->vif_info[0].is_ht = true; common->bitrate_mask[NL80211_BAND_2GHZ] = sta->supp_rates[NL80211_BAND_2GHZ]; if ((sta->ht_cap.cap & IEEE80211_HT_CAP_SGI_20) || @@ -1575,7 +1546,6 @@ static int rsi_mac80211_sta_remove(struct ieee80211_hw *hw, bss->qos = sta->wme; common->bitrate_mask[NL80211_BAND_2GHZ] = 0; common->bitrate_mask[NL80211_BAND_5GHZ] = 0; - common->min_rate = 0xffff; common->vif_info[0].is_ht = false; common->vif_info[0].sgi = false; common->vif_info[0].seq_start = 0; diff --git a/drivers/net/wireless/rsi/rsi_91x_main.c b/drivers/net/wireless/rsi/rsi_91x_main.c index 9a3d2439a8e7aecfd903d825d8d4c96c2f774025..fe8aed58ac0888d088820da525347dfcd6648194 100644 --- a/drivers/net/wireless/rsi/rsi_91x_main.c +++ b/drivers/net/wireless/rsi/rsi_91x_main.c @@ -23,6 +23,7 @@ #include "rsi_common.h" #include "rsi_coex.h" #include "rsi_hal.h" +#include "rsi_usb.h" u32 rsi_zone_enabled = /* INFO_ZONE | INIT_ZONE | @@ -168,6 +169,9 @@ int rsi_read_pkt(struct rsi_common *common, u8 *rx_pkt, s32 rcv_pkt_len) frame_desc = &rx_pkt[index]; actual_length = *(u16 *)&frame_desc[0]; offset = *(u16 *)&frame_desc[2]; + if (!rcv_pkt_len && offset > + RSI_MAX_RX_USB_PKT_SIZE - FRAME_DESC_SZ) + goto fail; queueno = rsi_get_queueno(frame_desc, offset); length = rsi_get_length(frame_desc, offset); @@ -211,9 +215,10 @@ int rsi_read_pkt(struct rsi_common *common, u8 *rx_pkt, s32 rcv_pkt_len) bt_pkt_type = frame_desc[offset + BT_RX_PKT_TYPE_OFST]; if (bt_pkt_type == BT_CARD_READY_IND) { rsi_dbg(INFO_ZONE, "BT Card ready recvd\n"); - if (rsi_bt_ops.attach(common, &g_proto_ops)) - rsi_dbg(ERR_ZONE, - "Failed to attach BT module\n"); + if (common->fsm_state == FSM_MAC_INIT_DONE) + rsi_attach_bt(common); + else + common->bt_defer_attach = true; } else { if (common->bt_adapter) rsi_bt_ops.recv_pkt(common->bt_adapter, @@ -278,6 +283,15 @@ void rsi_set_bt_context(void *priv, void *bt_context) } #endif +void rsi_attach_bt(struct rsi_common *common) +{ +#ifdef CONFIG_RSI_COEX + if (rsi_bt_ops.attach(common, &g_proto_ops)) + rsi_dbg(ERR_ZONE, + "Failed to attach BT module\n"); +#endif +} + /** * rsi_91x_init() - This function initializes os interface operations. * @oper_mode: One of DEV_OPMODE_*. @@ -359,6 +373,7 @@ struct rsi_hw *rsi_91x_init(u16 oper_mode) if (common->coex_mode > 1) { if (rsi_coex_attach(common)) { rsi_dbg(ERR_ZONE, "Failed to init coex module\n"); + rsi_kill_thread(&common->tx_thread); goto err; } } diff --git a/drivers/net/wireless/rsi/rsi_91x_mgmt.c b/drivers/net/wireless/rsi/rsi_91x_mgmt.c index b6d050a2fbe7e84ed1fee1419572783a0bb18c32..9000a5d51003482322bc0e3c1dff08ef82a1f2af 100644 --- a/drivers/net/wireless/rsi/rsi_91x_mgmt.c +++ b/drivers/net/wireless/rsi/rsi_91x_mgmt.c @@ -276,7 +276,7 @@ static void rsi_set_default_parameters(struct rsi_common *common) common->channel_width = BW_20MHZ; common->rts_threshold = IEEE80211_MAX_RTS_THRESHOLD; common->channel = 1; - common->min_rate = 0xffff; + memset(&common->rate_config, 0, sizeof(common->rate_config)); common->fsm_state = FSM_CARD_NOT_READY; common->iface_down = true; common->endpoint = EP_2GHZ_20MHZ; @@ -1314,7 +1314,7 @@ static int rsi_send_auto_rate_request(struct rsi_common *common, u8 band = hw->conf.chandef.chan->band; u8 num_supported_rates = 0; u8 rate_table_offset, rate_offset = 0; - u32 rate_bitmap; + u32 rate_bitmap, configured_rates; u16 *selected_rates, min_rate; bool is_ht = false, is_sgi = false; u16 frame_len = sizeof(struct rsi_auto_rate); @@ -1364,6 +1364,10 @@ static int rsi_send_auto_rate_request(struct rsi_common *common, is_sgi = true; } + /* Limit to any rates administratively configured by cfg80211 */ + configured_rates = common->rate_config[band].configured_mask ?: 0xffffffff; + rate_bitmap &= configured_rates; + if (band == NL80211_BAND_2GHZ) { if ((rate_bitmap == 0) && (is_ht)) min_rate = RSI_RATE_MCS0; @@ -1389,10 +1393,13 @@ static int rsi_send_auto_rate_request(struct rsi_common *common, num_supported_rates = jj; if (is_ht) { - for (ii = 0; ii < ARRAY_SIZE(mcs); ii++) - selected_rates[jj++] = mcs[ii]; - num_supported_rates += ARRAY_SIZE(mcs); - rate_offset += ARRAY_SIZE(mcs); + for (ii = 0; ii < ARRAY_SIZE(mcs); ii++) { + if (configured_rates & BIT(ii + ARRAY_SIZE(rsi_rates))) { + selected_rates[jj++] = mcs[ii]; + num_supported_rates++; + rate_offset++; + } + } } sort(selected_rates, jj, sizeof(u16), &rsi_compare, NULL); @@ -1482,7 +1489,7 @@ void rsi_inform_bss_status(struct rsi_common *common, qos_enable, aid, sta_id, vif); - if (common->min_rate == 0xffff) + if (!common->rate_config[common->band].fixed_enabled) rsi_send_auto_rate_request(common, sta, sta_id, vif); if (opmode == RSI_OPMODE_STA && !(assoc_cap & WLAN_CAPABILITY_PRIVACY) && @@ -2071,6 +2078,9 @@ static int rsi_handle_ta_confirm_type(struct rsi_common *common, if (common->reinit_hw) { complete(&common->wlan_init_completion); } else { + if (common->bt_defer_attach) + rsi_attach_bt(common); + return rsi_mac80211_attach(common); } } diff --git a/drivers/net/wireless/rsi/rsi_91x_sdio.c b/drivers/net/wireless/rsi/rsi_91x_sdio.c index 3a243c532647173157e50fd9e114bb2c6d87dbad..8108f941ccd3f72b65a33e31f5d2260e3260ac9b 100644 --- a/drivers/net/wireless/rsi/rsi_91x_sdio.c +++ b/drivers/net/wireless/rsi/rsi_91x_sdio.c @@ -24,10 +24,7 @@ /* Default operating mode is wlan STA + BT */ static u16 dev_oper_mode = DEV_OPMODE_STA_BT_DUAL; module_param(dev_oper_mode, ushort, 0444); -MODULE_PARM_DESC(dev_oper_mode, - "1[Wi-Fi], 4[BT], 8[BT LE], 5[Wi-Fi STA + BT classic]\n" - "9[Wi-Fi STA + BT LE], 13[Wi-Fi STA + BT classic + BT LE]\n" - "6[AP + BT classic], 14[AP + BT classic + BT LE]"); +MODULE_PARM_DESC(dev_oper_mode, DEV_OPMODE_PARAM_DESC); /** * rsi_sdio_set_cmd52_arg() - This function prepares cmd 52 read/write arg. diff --git a/drivers/net/wireless/rsi/rsi_91x_usb.c b/drivers/net/wireless/rsi/rsi_91x_usb.c index 3b13de59605e1e7258d52fd6abc99ac1cae129c7..11388a14696211b3bcc112eb0af0b7ffa70ac6e7 100644 --- a/drivers/net/wireless/rsi/rsi_91x_usb.c +++ b/drivers/net/wireless/rsi/rsi_91x_usb.c @@ -25,10 +25,7 @@ /* Default operating mode is wlan STA + BT */ static u16 dev_oper_mode = DEV_OPMODE_STA_BT_DUAL; module_param(dev_oper_mode, ushort, 0444); -MODULE_PARM_DESC(dev_oper_mode, - "1[Wi-Fi], 4[BT], 8[BT LE], 5[Wi-Fi STA + BT classic]\n" - "9[Wi-Fi STA + BT LE], 13[Wi-Fi STA + BT classic + BT LE]\n" - "6[AP + BT classic], 14[AP + BT classic + BT LE]"); +MODULE_PARM_DESC(dev_oper_mode, DEV_OPMODE_PARAM_DESC); static int rsi_rx_urb_submit(struct rsi_hw *adapter, u8 ep_num, gfp_t flags); @@ -61,7 +58,7 @@ static int rsi_usb_card_write(struct rsi_hw *adapter, (void *)seg, (int)len, &transfer, - HZ * 5); + USB_CTRL_SET_TIMEOUT); if (status < 0) { rsi_dbg(ERR_ZONE, @@ -272,8 +269,12 @@ static void rsi_rx_done_handler(struct urb *urb) struct rsi_91x_usbdev *dev = (struct rsi_91x_usbdev *)rx_cb->data; int status = -EINVAL; + if (!rx_cb->rx_skb) + return; + if (urb->status) { dev_kfree_skb(rx_cb->rx_skb); + rx_cb->rx_skb = NULL; return; } @@ -297,8 +298,10 @@ out: if (rsi_rx_urb_submit(dev->priv, rx_cb->ep_num, GFP_ATOMIC)) rsi_dbg(ERR_ZONE, "%s: Failed in urb submission", __func__); - if (status) + if (status) { dev_kfree_skb(rx_cb->rx_skb); + rx_cb->rx_skb = NULL; + } } static void rsi_rx_urb_kill(struct rsi_hw *adapter, u8 ep_num) @@ -325,7 +328,6 @@ static int rsi_rx_urb_submit(struct rsi_hw *adapter, u8 ep_num, gfp_t mem_flags) struct sk_buff *skb; u8 dword_align_bytes = 0; -#define RSI_MAX_RX_USB_PKT_SIZE 3000 skb = dev_alloc_skb(RSI_MAX_RX_USB_PKT_SIZE); if (!skb) return -ENOMEM; diff --git a/drivers/net/wireless/rsi/rsi_hal.h b/drivers/net/wireless/rsi/rsi_hal.h index 46e36df9e8e3ccb4ff14aab41f7d098592a2f308..a2fbec1cec4c30dc39bae3950704b423db1a61fc 100644 --- a/drivers/net/wireless/rsi/rsi_hal.h +++ b/drivers/net/wireless/rsi/rsi_hal.h @@ -28,6 +28,17 @@ #define DEV_OPMODE_AP_BT 6 #define DEV_OPMODE_AP_BT_DUAL 14 +#define DEV_OPMODE_PARAM_DESC \ + __stringify(DEV_OPMODE_WIFI_ALONE) "[Wi-Fi alone], " \ + __stringify(DEV_OPMODE_BT_ALONE) "[BT classic alone], " \ + __stringify(DEV_OPMODE_BT_LE_ALONE) "[BT LE alone], " \ + __stringify(DEV_OPMODE_BT_DUAL) "[BT classic + BT LE alone], " \ + __stringify(DEV_OPMODE_STA_BT) "[Wi-Fi STA + BT classic], " \ + __stringify(DEV_OPMODE_STA_BT_LE) "[Wi-Fi STA + BT LE], " \ + __stringify(DEV_OPMODE_STA_BT_DUAL) "[Wi-Fi STA + BT classic + BT LE], " \ + __stringify(DEV_OPMODE_AP_BT) "[Wi-Fi AP + BT classic], " \ + __stringify(DEV_OPMODE_AP_BT_DUAL) "[Wi-Fi AP + BT classic + BT LE]" + #define FLASH_WRITE_CHUNK_SIZE (4 * 1024) #define FLASH_SECTOR_SIZE (4 * 1024) diff --git a/drivers/net/wireless/rsi/rsi_main.h b/drivers/net/wireless/rsi/rsi_main.h index b3e25bc28682c751b507af49b67547bbb5573558..de595025c0197edbae80aa80def76df4c6f1aafc 100644 --- a/drivers/net/wireless/rsi/rsi_main.h +++ b/drivers/net/wireless/rsi/rsi_main.h @@ -61,6 +61,7 @@ enum RSI_FSM_STATES { extern u32 rsi_zone_enabled; extern __printf(2, 3) void rsi_dbg(u32 zone, const char *fmt, ...); +#define RSI_MAX_BANDS 2 #define RSI_MAX_VIFS 3 #define NUM_EDCA_QUEUES 4 #define IEEE80211_ADDR_LEN 6 @@ -139,6 +140,7 @@ struct skb_info { u8 internal_hdr_size; struct ieee80211_vif *vif; u8 vap_id; + bool have_key; }; enum edca_queue { @@ -229,6 +231,12 @@ struct rsi_9116_features { u32 ps_options; }; +struct rsi_rate_config { + u32 configured_mask; /* configured by mac80211 bits 0-11=legacy 12+ mcs */ + u16 fixed_hw_rate; + bool fixed_enabled; +}; + struct rsi_common { struct rsi_hw *priv; struct vif_priv vif_info[RSI_MAX_VIFS]; @@ -254,8 +262,8 @@ struct rsi_common { u8 channel_width; u16 rts_threshold; - u16 bitrate_mask[2]; - u32 fixedrate_mask[2]; + u32 bitrate_mask[RSI_MAX_BANDS]; + struct rsi_rate_config rate_config[RSI_MAX_BANDS]; u8 rf_reset; struct transmit_q_stats tx_stats; @@ -276,7 +284,6 @@ struct rsi_common { u8 mac_id; u8 radio_id; u16 rate_pwr[20]; - u16 min_rate; /* WMM algo related */ u8 selected_qnum; @@ -320,6 +327,7 @@ struct rsi_common { struct ieee80211_vif *roc_vif; bool eapol4_confirm; + bool bt_defer_attach; void *bt_adapter; struct cfg80211_scan_request *hwscan; @@ -401,5 +409,6 @@ struct rsi_host_intf_ops { enum rsi_host_intf rsi_get_host_intf(void *priv); void rsi_set_bt_context(void *priv, void *bt_context); +void rsi_attach_bt(struct rsi_common *common); #endif diff --git a/drivers/net/wireless/rsi/rsi_usb.h b/drivers/net/wireless/rsi/rsi_usb.h index 8702f434b5699978549144b03203dacf5e016790..ad88f8c70a35174ea969f37464df307e0794508d 100644 --- a/drivers/net/wireless/rsi/rsi_usb.h +++ b/drivers/net/wireless/rsi/rsi_usb.h @@ -44,6 +44,8 @@ #define RSI_USB_BUF_SIZE 4096 #define RSI_USB_CTRL_BUF_SIZE 0x04 +#define RSI_MAX_RX_USB_PKT_SIZE 3000 + struct rx_usb_ctrl_block { u8 *data; struct urb *rx_urb; diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h index 8ee24e351bdc2b92581e4adfed6d72884aff81c2..6a9178896c909428795421bb5d53cff6047541c7 100644 --- a/drivers/net/xen-netback/common.h +++ b/drivers/net/xen-netback/common.h @@ -203,6 +203,7 @@ struct xenvif_queue { /* Per-queue data for xenvif */ unsigned int rx_queue_max; unsigned int rx_queue_len; unsigned long last_rx_time; + unsigned int rx_slots_needed; bool stalled; struct xenvif_copy_state rx_copy; diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 986b569709616a2094f7087edcc40d80c1beecae..b0cbc7fead7455defdc01b83f4bbc8cc4d86c94d 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -499,7 +499,7 @@ check_frags: * the header's copy failed, and they are * sharing a slot, send an error */ - if (i == 0 && sharedslot) + if (i == 0 && !first_shinfo && sharedslot) xenvif_idx_release(queue, pending_idx, XEN_NETIF_RSP_ERROR); else diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c index accc991d153f7c787c1b83bedb14183d5019c1c4..dbac4c03d21a14d12a2eee9e7b3418c9cc184d88 100644 --- a/drivers/net/xen-netback/rx.c +++ b/drivers/net/xen-netback/rx.c @@ -33,28 +33,36 @@ #include #include -static bool xenvif_rx_ring_slots_available(struct xenvif_queue *queue) +/* + * Update the needed ring page slots for the first SKB queued. + * Note that any call sequence outside the RX thread calling this function + * needs to wake up the RX thread via a call of xenvif_kick_thread() + * afterwards in order to avoid a race with putting the thread to sleep. + */ +static void xenvif_update_needed_slots(struct xenvif_queue *queue, + const struct sk_buff *skb) { - RING_IDX prod, cons; - struct sk_buff *skb; - int needed; - unsigned long flags; - - spin_lock_irqsave(&queue->rx_queue.lock, flags); + unsigned int needed = 0; - skb = skb_peek(&queue->rx_queue); - if (!skb) { - spin_unlock_irqrestore(&queue->rx_queue.lock, flags); - return false; + if (skb) { + needed = DIV_ROUND_UP(skb->len, XEN_PAGE_SIZE); + if (skb_is_gso(skb)) + needed++; + if (skb->sw_hash) + needed++; } - needed = DIV_ROUND_UP(skb->len, XEN_PAGE_SIZE); - if (skb_is_gso(skb)) - needed++; - if (skb->sw_hash) - needed++; + WRITE_ONCE(queue->rx_slots_needed, needed); +} - spin_unlock_irqrestore(&queue->rx_queue.lock, flags); +static bool xenvif_rx_ring_slots_available(struct xenvif_queue *queue) +{ + RING_IDX prod, cons; + unsigned int needed; + + needed = READ_ONCE(queue->rx_slots_needed); + if (!needed) + return false; do { prod = queue->rx.sring->req_prod; @@ -80,13 +88,19 @@ void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb) spin_lock_irqsave(&queue->rx_queue.lock, flags); - __skb_queue_tail(&queue->rx_queue, skb); - - queue->rx_queue_len += skb->len; - if (queue->rx_queue_len > queue->rx_queue_max) { + if (queue->rx_queue_len >= queue->rx_queue_max) { struct net_device *dev = queue->vif->dev; netif_tx_stop_queue(netdev_get_tx_queue(dev, queue->id)); + kfree_skb(skb); + queue->vif->dev->stats.rx_dropped++; + } else { + if (skb_queue_empty(&queue->rx_queue)) + xenvif_update_needed_slots(queue, skb); + + __skb_queue_tail(&queue->rx_queue, skb); + + queue->rx_queue_len += skb->len; } spin_unlock_irqrestore(&queue->rx_queue.lock, flags); @@ -100,6 +114,8 @@ static struct sk_buff *xenvif_rx_dequeue(struct xenvif_queue *queue) skb = __skb_dequeue(&queue->rx_queue); if (skb) { + xenvif_update_needed_slots(queue, skb_peek(&queue->rx_queue)); + queue->rx_queue_len -= skb->len; if (queue->rx_queue_len < queue->rx_queue_max) { struct netdev_queue *txq; @@ -134,6 +150,7 @@ static void xenvif_rx_queue_drop_expired(struct xenvif_queue *queue) break; xenvif_rx_dequeue(queue); kfree_skb(skb); + queue->vif->dev->stats.rx_dropped++; } } @@ -487,27 +504,31 @@ void xenvif_rx_action(struct xenvif_queue *queue) xenvif_rx_copy_flush(queue); } -static bool xenvif_rx_queue_stalled(struct xenvif_queue *queue) +static RING_IDX xenvif_rx_queue_slots(const struct xenvif_queue *queue) { RING_IDX prod, cons; prod = queue->rx.sring->req_prod; cons = queue->rx.req_cons; + return prod - cons; +} + +static bool xenvif_rx_queue_stalled(const struct xenvif_queue *queue) +{ + unsigned int needed = READ_ONCE(queue->rx_slots_needed); + return !queue->stalled && - prod - cons < 1 && + xenvif_rx_queue_slots(queue) < needed && time_after(jiffies, queue->last_rx_time + queue->vif->stall_timeout); } static bool xenvif_rx_queue_ready(struct xenvif_queue *queue) { - RING_IDX prod, cons; - - prod = queue->rx.sring->req_prod; - cons = queue->rx.req_cons; + unsigned int needed = READ_ONCE(queue->rx_slots_needed); - return queue->stalled && prod - cons >= 1; + return queue->stalled && xenvif_rx_queue_slots(queue) >= needed; } bool xenvif_have_rx_work(struct xenvif_queue *queue, bool test_kthread) diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c index 94d19158efc188acc0d963b491343f52319f873b..ca261e0fc9c9b4b1ecd082db124b45a9452d7e73 100644 --- a/drivers/net/xen-netback/xenbus.c +++ b/drivers/net/xen-netback/xenbus.c @@ -256,6 +256,7 @@ static void backend_disconnect(struct backend_info *be) unsigned int queue_index; xen_unregister_watchers(vif); + xenbus_rm(XBT_NIL, be->dev->nodename, "hotplug-status"); #ifdef CONFIG_DEBUG_FS xenvif_debugfs_delif(vif); #endif /* CONFIG_DEBUG_FS */ @@ -675,7 +676,6 @@ static void hotplug_status_changed(struct xenbus_watch *watch, /* Not interested in this watch anymore. */ unregister_hotplug_status_watch(be); - xenbus_rm(XBT_NIL, be->dev->nodename, "hotplug-status"); } kfree(str); } @@ -824,15 +824,11 @@ static void connect(struct backend_info *be) xenvif_carrier_on(be->vif); unregister_hotplug_status_watch(be); - if (xenbus_exists(XBT_NIL, dev->nodename, "hotplug-status")) { - err = xenbus_watch_pathfmt(dev, &be->hotplug_status_watch, - NULL, hotplug_status_changed, - "%s/%s", dev->nodename, - "hotplug-status"); - if (err) - goto err; + err = xenbus_watch_pathfmt(dev, &be->hotplug_status_watch, NULL, + hotplug_status_changed, + "%s/%s", dev->nodename, "hotplug-status"); + if (!err) be->have_hotplug_status_watch = 1; - } netif_tx_wake_all_queues(be->vif->dev); diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 3e9895bec15f08c2239aad442f7e5659c4cb23a8..1a69b5246133b1310e06f670d15be400f22b540e 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -126,21 +126,17 @@ struct netfront_queue { /* * {tx,rx}_skbs store outstanding skbuffs. Free tx_skb entries - * are linked from tx_skb_freelist through skb_entry.link. - * - * NB. Freelist index entries are always going to be less than - * PAGE_OFFSET, whereas pointers to skbs will always be equal or - * greater than PAGE_OFFSET: we use this property to distinguish - * them. + * are linked from tx_skb_freelist through tx_link. */ - union skb_entry { - struct sk_buff *skb; - unsigned long link; - } tx_skbs[NET_TX_RING_SIZE]; + struct sk_buff *tx_skbs[NET_TX_RING_SIZE]; + unsigned short tx_link[NET_TX_RING_SIZE]; +#define TX_LINK_NONE 0xffff +#define TX_PENDING 0xfffe grant_ref_t gref_tx_head; grant_ref_t grant_tx_ref[NET_TX_RING_SIZE]; struct page *grant_tx_page[NET_TX_RING_SIZE]; unsigned tx_skb_freelist; + unsigned int tx_pend_queue; spinlock_t rx_lock ____cacheline_aligned_in_smp; struct xen_netif_rx_front_ring rx; @@ -152,6 +148,9 @@ struct netfront_queue { grant_ref_t gref_rx_head; grant_ref_t grant_rx_ref[NET_RX_RING_SIZE]; + unsigned int rx_rsp_unconsumed; + spinlock_t rx_cons_lock; + struct page_pool *page_pool; struct xdp_rxq_info xdp_rxq; }; @@ -173,6 +172,9 @@ struct netfront_info { bool netback_has_xdp_headroom; bool netfront_xdp_enabled; + /* Is device behaving sane? */ + bool broken; + atomic_t rx_gso_checksum_fixup; }; @@ -181,33 +183,25 @@ struct netfront_rx_info { struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1]; }; -static void skb_entry_set_link(union skb_entry *list, unsigned short id) -{ - list->link = id; -} - -static int skb_entry_is_link(const union skb_entry *list) -{ - BUILD_BUG_ON(sizeof(list->skb) != sizeof(list->link)); - return (unsigned long)list->skb < PAGE_OFFSET; -} - /* * Access macros for acquiring freeing slots in tx_skbs[]. */ -static void add_id_to_freelist(unsigned *head, union skb_entry *list, - unsigned short id) +static void add_id_to_list(unsigned *head, unsigned short *list, + unsigned short id) { - skb_entry_set_link(&list[id], *head); + list[id] = *head; *head = id; } -static unsigned short get_id_from_freelist(unsigned *head, - union skb_entry *list) +static unsigned short get_id_from_list(unsigned *head, unsigned short *list) { unsigned int id = *head; - *head = list[id].link; + + if (id != TX_LINK_NONE) { + *head = list[id]; + list[id] = TX_LINK_NONE; + } return id; } @@ -363,7 +357,7 @@ static int xennet_open(struct net_device *dev) unsigned int i = 0; struct netfront_queue *queue = NULL; - if (!np->queues) + if (!np->queues || np->broken) return -ENODEV; for (i = 0; i < num_queues; ++i) { @@ -385,41 +379,62 @@ static int xennet_open(struct net_device *dev) return 0; } -static void xennet_tx_buf_gc(struct netfront_queue *queue) +static bool xennet_tx_buf_gc(struct netfront_queue *queue) { RING_IDX cons, prod; unsigned short id; struct sk_buff *skb; bool more_to_do; + bool work_done = false; + const struct device *dev = &queue->info->netdev->dev; BUG_ON(!netif_carrier_ok(queue->info->netdev)); do { prod = queue->tx.sring->rsp_prod; + if (RING_RESPONSE_PROD_OVERFLOW(&queue->tx, prod)) { + dev_alert(dev, "Illegal number of responses %u\n", + prod - queue->tx.rsp_cons); + goto err; + } rmb(); /* Ensure we see responses up to 'rp'. */ for (cons = queue->tx.rsp_cons; cons != prod; cons++) { - struct xen_netif_tx_response *txrsp; + struct xen_netif_tx_response txrsp; + + work_done = true; - txrsp = RING_GET_RESPONSE(&queue->tx, cons); - if (txrsp->status == XEN_NETIF_RSP_NULL) + RING_COPY_RESPONSE(&queue->tx, cons, &txrsp); + if (txrsp.status == XEN_NETIF_RSP_NULL) continue; - id = txrsp->id; - skb = queue->tx_skbs[id].skb; - if (unlikely(gnttab_query_foreign_access( - queue->grant_tx_ref[id]) != 0)) { - pr_alert("%s: warning -- grant still in use by backend domain\n", - __func__); - BUG(); + id = txrsp.id; + if (id >= RING_SIZE(&queue->tx)) { + dev_alert(dev, + "Response has incorrect id (%u)\n", + id); + goto err; + } + if (queue->tx_link[id] != TX_PENDING) { + dev_alert(dev, + "Response for inactive request\n"); + goto err; + } + + queue->tx_link[id] = TX_LINK_NONE; + skb = queue->tx_skbs[id]; + queue->tx_skbs[id] = NULL; + if (unlikely(!gnttab_end_foreign_access_ref( + queue->grant_tx_ref[id], GNTMAP_readonly))) { + dev_alert(dev, + "Grant still in use by backend domain\n"); + goto err; } - gnttab_end_foreign_access_ref( - queue->grant_tx_ref[id], GNTMAP_readonly); gnttab_release_grant_reference( &queue->gref_tx_head, queue->grant_tx_ref[id]); queue->grant_tx_ref[id] = GRANT_INVALID_REF; queue->grant_tx_page[id] = NULL; - add_id_to_freelist(&queue->tx_skb_freelist, queue->tx_skbs, id); + add_id_to_list(&queue->tx_skb_freelist, queue->tx_link, id); dev_kfree_skb_irq(skb); } @@ -429,13 +444,22 @@ static void xennet_tx_buf_gc(struct netfront_queue *queue) } while (more_to_do); xennet_maybe_wake_tx(queue); + + return work_done; + + err: + queue->info->broken = true; + dev_alert(dev, "Disabled for further use\n"); + + return work_done; } struct xennet_gnttab_make_txreq { struct netfront_queue *queue; struct sk_buff *skb; struct page *page; - struct xen_netif_tx_request *tx; /* Last request */ + struct xen_netif_tx_request *tx; /* Last request on ring page */ + struct xen_netif_tx_request tx_local; /* Last request local copy*/ unsigned int size; }; @@ -451,7 +475,7 @@ static void xennet_tx_setup_grant(unsigned long gfn, unsigned int offset, struct netfront_queue *queue = info->queue; struct sk_buff *skb = info->skb; - id = get_id_from_freelist(&queue->tx_skb_freelist, queue->tx_skbs); + id = get_id_from_list(&queue->tx_skb_freelist, queue->tx_link); tx = RING_GET_REQUEST(&queue->tx, queue->tx.req_prod_pvt++); ref = gnttab_claim_grant_reference(&queue->gref_tx_head); WARN_ON_ONCE(IS_ERR_VALUE((unsigned long)(int)ref)); @@ -459,34 +483,37 @@ static void xennet_tx_setup_grant(unsigned long gfn, unsigned int offset, gnttab_grant_foreign_access_ref(ref, queue->info->xbdev->otherend_id, gfn, GNTMAP_readonly); - queue->tx_skbs[id].skb = skb; + queue->tx_skbs[id] = skb; queue->grant_tx_page[id] = page; queue->grant_tx_ref[id] = ref; - tx->id = id; - tx->gref = ref; - tx->offset = offset; - tx->size = len; - tx->flags = 0; + info->tx_local.id = id; + info->tx_local.gref = ref; + info->tx_local.offset = offset; + info->tx_local.size = len; + info->tx_local.flags = 0; + + *tx = info->tx_local; + + /* + * Put the request in the pending queue, it will be set to be pending + * when the producer index is about to be raised. + */ + add_id_to_list(&queue->tx_pend_queue, queue->tx_link, id); info->tx = tx; - info->size += tx->size; + info->size += info->tx_local.size; } static struct xen_netif_tx_request *xennet_make_first_txreq( - struct netfront_queue *queue, struct sk_buff *skb, - struct page *page, unsigned int offset, unsigned int len) + struct xennet_gnttab_make_txreq *info, + unsigned int offset, unsigned int len) { - struct xennet_gnttab_make_txreq info = { - .queue = queue, - .skb = skb, - .page = page, - .size = 0, - }; + info->size = 0; - gnttab_for_one_grant(page, offset, len, xennet_tx_setup_grant, &info); + gnttab_for_one_grant(info->page, offset, len, xennet_tx_setup_grant, info); - return info.tx; + return info->tx; } static void xennet_make_one_txreq(unsigned long gfn, unsigned int offset, @@ -499,35 +526,27 @@ static void xennet_make_one_txreq(unsigned long gfn, unsigned int offset, xennet_tx_setup_grant(gfn, offset, len, data); } -static struct xen_netif_tx_request *xennet_make_txreqs( - struct netfront_queue *queue, struct xen_netif_tx_request *tx, - struct sk_buff *skb, struct page *page, +static void xennet_make_txreqs( + struct xennet_gnttab_make_txreq *info, + struct page *page, unsigned int offset, unsigned int len) { - struct xennet_gnttab_make_txreq info = { - .queue = queue, - .skb = skb, - .tx = tx, - }; - /* Skip unused frames from start of page */ page += offset >> PAGE_SHIFT; offset &= ~PAGE_MASK; while (len) { - info.page = page; - info.size = 0; + info->page = page; + info->size = 0; gnttab_foreach_grant_in_range(page, offset, len, xennet_make_one_txreq, - &info); + info); page++; offset = 0; - len -= info.size; + len -= info->size; } - - return info.tx; } /* @@ -574,19 +593,34 @@ static u16 xennet_select_queue(struct net_device *dev, struct sk_buff *skb, return queue_idx; } +static void xennet_mark_tx_pending(struct netfront_queue *queue) +{ + unsigned int i; + + while ((i = get_id_from_list(&queue->tx_pend_queue, queue->tx_link)) != + TX_LINK_NONE) + queue->tx_link[i] = TX_PENDING; +} + static int xennet_xdp_xmit_one(struct net_device *dev, struct netfront_queue *queue, struct xdp_frame *xdpf) { struct netfront_info *np = netdev_priv(dev); struct netfront_stats *tx_stats = this_cpu_ptr(np->tx_stats); + struct xennet_gnttab_make_txreq info = { + .queue = queue, + .skb = NULL, + .page = virt_to_page(xdpf->data), + }; int notify; - xennet_make_first_txreq(queue, NULL, - virt_to_page(xdpf->data), + xennet_make_first_txreq(&info, offset_in_page(xdpf->data), xdpf->len); + xennet_mark_tx_pending(queue); + RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&queue->tx, notify); if (notify) notify_remote_via_irq(queue->tx_irq); @@ -611,6 +645,8 @@ static int xennet_xdp_xmit(struct net_device *dev, int n, int drops = 0; int i, err; + if (unlikely(np->broken)) + return -ENODEV; if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) return -EINVAL; @@ -640,7 +676,7 @@ static netdev_tx_t xennet_start_xmit(struct sk_buff *skb, struct net_device *dev { struct netfront_info *np = netdev_priv(dev); struct netfront_stats *tx_stats = this_cpu_ptr(np->tx_stats); - struct xen_netif_tx_request *tx, *first_tx; + struct xen_netif_tx_request *first_tx; unsigned int i; int notify; int slots; @@ -649,6 +685,7 @@ static netdev_tx_t xennet_start_xmit(struct sk_buff *skb, struct net_device *dev unsigned int len; unsigned long flags; struct netfront_queue *queue = NULL; + struct xennet_gnttab_make_txreq info = { }; unsigned int num_queues = dev->real_num_tx_queues; u16 queue_index; struct sk_buff *nskb; @@ -656,6 +693,8 @@ static netdev_tx_t xennet_start_xmit(struct sk_buff *skb, struct net_device *dev /* Drop the packet if no queues are set up */ if (num_queues < 1) goto drop; + if (unlikely(np->broken)) + goto drop; /* Determine which queue to transmit this SKB on */ queue_index = skb_get_queue_mapping(skb); queue = &np->queues[queue_index]; @@ -706,21 +745,24 @@ static netdev_tx_t xennet_start_xmit(struct sk_buff *skb, struct net_device *dev } /* First request for the linear area. */ - first_tx = tx = xennet_make_first_txreq(queue, skb, - page, offset, len); - offset += tx->size; + info.queue = queue; + info.skb = skb; + info.page = page; + first_tx = xennet_make_first_txreq(&info, offset, len); + offset += info.tx_local.size; if (offset == PAGE_SIZE) { page++; offset = 0; } - len -= tx->size; + len -= info.tx_local.size; if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */ - tx->flags |= XEN_NETTXF_csum_blank | XEN_NETTXF_data_validated; + first_tx->flags |= XEN_NETTXF_csum_blank | + XEN_NETTXF_data_validated; else if (skb->ip_summed == CHECKSUM_UNNECESSARY) /* remote but checksummed. */ - tx->flags |= XEN_NETTXF_data_validated; + first_tx->flags |= XEN_NETTXF_data_validated; /* Optional extra info after the first request. */ if (skb_shinfo(skb)->gso_size) { @@ -729,7 +771,7 @@ static netdev_tx_t xennet_start_xmit(struct sk_buff *skb, struct net_device *dev gso = (struct xen_netif_extra_info *) RING_GET_REQUEST(&queue->tx, queue->tx.req_prod_pvt++); - tx->flags |= XEN_NETTXF_extra_info; + first_tx->flags |= XEN_NETTXF_extra_info; gso->u.gso.size = skb_shinfo(skb)->gso_size; gso->u.gso.type = (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) ? @@ -743,12 +785,12 @@ static netdev_tx_t xennet_start_xmit(struct sk_buff *skb, struct net_device *dev } /* Requests for the rest of the linear area. */ - tx = xennet_make_txreqs(queue, tx, skb, page, offset, len); + xennet_make_txreqs(&info, page, offset, len); /* Requests for all the frags. */ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - tx = xennet_make_txreqs(queue, tx, skb, skb_frag_page(frag), + xennet_make_txreqs(&info, skb_frag_page(frag), skb_frag_off(frag), skb_frag_size(frag)); } @@ -759,6 +801,8 @@ static netdev_tx_t xennet_start_xmit(struct sk_buff *skb, struct net_device *dev /* timestamp packet in software */ skb_tx_timestamp(skb); + xennet_mark_tx_pending(queue); + RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&queue->tx, notify); if (notify) notify_remote_via_irq(queue->tx_irq); @@ -798,6 +842,38 @@ static int xennet_close(struct net_device *dev) return 0; } +static void xennet_destroy_queues(struct netfront_info *info) +{ + unsigned int i; + + for (i = 0; i < info->netdev->real_num_tx_queues; i++) { + struct netfront_queue *queue = &info->queues[i]; + + if (netif_running(info->netdev)) + napi_disable(&queue->napi); + netif_napi_del(&queue->napi); + } + + kfree(info->queues); + info->queues = NULL; +} + +static void xennet_uninit(struct net_device *dev) +{ + struct netfront_info *np = netdev_priv(dev); + xennet_destroy_queues(np); +} + +static void xennet_set_rx_rsp_cons(struct netfront_queue *queue, RING_IDX val) +{ + unsigned long flags; + + spin_lock_irqsave(&queue->rx_cons_lock, flags); + queue->rx.rsp_cons = val; + queue->rx_rsp_unconsumed = RING_HAS_UNCONSUMED_RESPONSES(&queue->rx); + spin_unlock_irqrestore(&queue->rx_cons_lock, flags); +} + static void xennet_move_rx_slot(struct netfront_queue *queue, struct sk_buff *skb, grant_ref_t ref) { @@ -816,7 +892,7 @@ static int xennet_get_extras(struct netfront_queue *queue, RING_IDX rp) { - struct xen_netif_extra_info *extra; + struct xen_netif_extra_info extra; struct device *dev = &queue->info->netdev->dev; RING_IDX cons = queue->rx.rsp_cons; int err = 0; @@ -832,26 +908,24 @@ static int xennet_get_extras(struct netfront_queue *queue, break; } - extra = (struct xen_netif_extra_info *) - RING_GET_RESPONSE(&queue->rx, ++cons); + RING_COPY_RESPONSE(&queue->rx, ++cons, &extra); - if (unlikely(!extra->type || - extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) { + if (unlikely(!extra.type || + extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) { if (net_ratelimit()) dev_warn(dev, "Invalid extra type: %d\n", - extra->type); + extra.type); err = -EINVAL; } else { - memcpy(&extras[extra->type - 1], extra, - sizeof(*extra)); + extras[extra.type - 1] = extra; } skb = xennet_get_rx_skb(queue, cons); ref = xennet_get_rx_ref(queue, cons); xennet_move_rx_slot(queue, skb, ref); - } while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE); + } while (extra.flags & XEN_NETIF_EXTRA_FLAG_MORE); - queue->rx.rsp_cons = cons; + xennet_set_rx_rsp_cons(queue, cons); return err; } @@ -907,7 +981,7 @@ static int xennet_get_responses(struct netfront_queue *queue, struct sk_buff_head *list, bool *need_xdp_flush) { - struct xen_netif_rx_response *rx = &rinfo->rx; + struct xen_netif_rx_response *rx = &rinfo->rx, rx_local; int max = XEN_NETIF_NR_SLOTS_MIN + (rx->status <= RX_COPY_THRESHOLD); RING_IDX cons = queue->rx.rsp_cons; struct sk_buff *skb = xennet_get_rx_skb(queue, cons); @@ -916,7 +990,6 @@ static int xennet_get_responses(struct netfront_queue *queue, struct device *dev = &queue->info->netdev->dev; struct bpf_prog *xdp_prog; struct xdp_buff xdp; - unsigned long ret; int slots = 1; int err = 0; u32 verdict; @@ -958,8 +1031,13 @@ static int xennet_get_responses(struct netfront_queue *queue, goto next; } - ret = gnttab_end_foreign_access_ref(ref, 0); - BUG_ON(!ret); + if (!gnttab_end_foreign_access_ref(ref, 0)) { + dev_alert(dev, + "Grant still in use by backend domain\n"); + queue->info->broken = true; + dev_alert(dev, "Disabled for further use\n"); + return -EINVAL; + } gnttab_release_grant_reference(&queue->gref_rx_head, ref); @@ -991,7 +1069,8 @@ next: break; } - rx = RING_GET_RESPONSE(&queue->rx, cons + slots); + RING_COPY_RESPONSE(&queue->rx, cons + slots, &rx_local); + rx = &rx_local; skb = xennet_get_rx_skb(queue, cons + slots); ref = xennet_get_rx_ref(queue, cons + slots); slots++; @@ -1004,7 +1083,7 @@ next: } if (unlikely(err)) - queue->rx.rsp_cons = cons + slots; + xennet_set_rx_rsp_cons(queue, cons + slots); return err; } @@ -1046,10 +1125,11 @@ static int xennet_fill_frags(struct netfront_queue *queue, struct sk_buff *nskb; while ((nskb = __skb_dequeue(list))) { - struct xen_netif_rx_response *rx = - RING_GET_RESPONSE(&queue->rx, ++cons); + struct xen_netif_rx_response rx; skb_frag_t *nfrag = &skb_shinfo(nskb)->frags[0]; + RING_COPY_RESPONSE(&queue->rx, ++cons, &rx); + if (skb_shinfo(skb)->nr_frags == MAX_SKB_FRAGS) { unsigned int pull_to = NETFRONT_SKB_CB(skb)->pull_to; @@ -1057,20 +1137,21 @@ static int xennet_fill_frags(struct netfront_queue *queue, __pskb_pull_tail(skb, pull_to - skb_headlen(skb)); } if (unlikely(skb_shinfo(skb)->nr_frags >= MAX_SKB_FRAGS)) { - queue->rx.rsp_cons = ++cons + skb_queue_len(list); + xennet_set_rx_rsp_cons(queue, + ++cons + skb_queue_len(list)); kfree_skb(nskb); return -ENOENT; } skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, skb_frag_page(nfrag), - rx->offset, rx->status, PAGE_SIZE); + rx.offset, rx.status, PAGE_SIZE); skb_shinfo(nskb)->nr_frags = 0; kfree_skb(nskb); } - queue->rx.rsp_cons = cons; + xennet_set_rx_rsp_cons(queue, cons); return 0; } @@ -1158,18 +1239,29 @@ static int xennet_poll(struct napi_struct *napi, int budget) skb_queue_head_init(&tmpq); rp = queue->rx.sring->rsp_prod; + if (RING_RESPONSE_PROD_OVERFLOW(&queue->rx, rp)) { + dev_alert(&dev->dev, "Illegal number of responses %u\n", + rp - queue->rx.rsp_cons); + queue->info->broken = true; + spin_unlock(&queue->rx_lock); + return 0; + } rmb(); /* Ensure we see queued responses up to 'rp'. */ i = queue->rx.rsp_cons; work_done = 0; while ((i != rp) && (work_done < budget)) { - memcpy(rx, RING_GET_RESPONSE(&queue->rx, i), sizeof(*rx)); + RING_COPY_RESPONSE(&queue->rx, i, rx); memset(extras, 0, sizeof(rinfo.extras)); err = xennet_get_responses(queue, &rinfo, rp, &tmpq, &need_xdp_flush); if (unlikely(err)) { + if (queue->info->broken) { + spin_unlock(&queue->rx_lock); + return 0; + } err: while ((skb = __skb_dequeue(&tmpq))) __skb_queue_tail(&errq, skb); @@ -1186,7 +1278,9 @@ err: if (unlikely(xennet_set_skb_gso(skb, gso))) { __skb_queue_head(&tmpq, skb); - queue->rx.rsp_cons += skb_queue_len(&tmpq); + xennet_set_rx_rsp_cons(queue, + queue->rx.rsp_cons + + skb_queue_len(&tmpq)); goto err; } } @@ -1210,7 +1304,8 @@ err: __skb_queue_tail(&rxq, skb); - i = ++queue->rx.rsp_cons; + i = queue->rx.rsp_cons + 1; + xennet_set_rx_rsp_cons(queue, i); work_done++; } if (need_xdp_flush) @@ -1288,17 +1383,18 @@ static void xennet_release_tx_bufs(struct netfront_queue *queue) for (i = 0; i < NET_TX_RING_SIZE; i++) { /* Skip over entries which are actually freelist references */ - if (skb_entry_is_link(&queue->tx_skbs[i])) + if (!queue->tx_skbs[i]) continue; - skb = queue->tx_skbs[i].skb; + skb = queue->tx_skbs[i]; + queue->tx_skbs[i] = NULL; get_page(queue->grant_tx_page[i]); gnttab_end_foreign_access(queue->grant_tx_ref[i], GNTMAP_readonly, (unsigned long)page_address(queue->grant_tx_page[i])); queue->grant_tx_page[i] = NULL; queue->grant_tx_ref[i] = GRANT_INVALID_REF; - add_id_to_freelist(&queue->tx_skb_freelist, queue->tx_skbs, i); + add_id_to_list(&queue->tx_skb_freelist, queue->tx_link, i); dev_kfree_skb_irq(skb); } } @@ -1373,34 +1469,79 @@ static int xennet_set_features(struct net_device *dev, return 0; } -static irqreturn_t xennet_tx_interrupt(int irq, void *dev_id) +static bool xennet_handle_tx(struct netfront_queue *queue, unsigned int *eoi) { - struct netfront_queue *queue = dev_id; unsigned long flags; + if (unlikely(queue->info->broken)) + return false; + spin_lock_irqsave(&queue->tx_lock, flags); - xennet_tx_buf_gc(queue); + if (xennet_tx_buf_gc(queue)) + *eoi = 0; spin_unlock_irqrestore(&queue->tx_lock, flags); + return true; +} + +static irqreturn_t xennet_tx_interrupt(int irq, void *dev_id) +{ + unsigned int eoiflag = XEN_EOI_FLAG_SPURIOUS; + + if (likely(xennet_handle_tx(dev_id, &eoiflag))) + xen_irq_lateeoi(irq, eoiflag); + return IRQ_HANDLED; } -static irqreturn_t xennet_rx_interrupt(int irq, void *dev_id) +static bool xennet_handle_rx(struct netfront_queue *queue, unsigned int *eoi) { - struct netfront_queue *queue = dev_id; - struct net_device *dev = queue->info->netdev; + unsigned int work_queued; + unsigned long flags; + + if (unlikely(queue->info->broken)) + return false; + + spin_lock_irqsave(&queue->rx_cons_lock, flags); + work_queued = RING_HAS_UNCONSUMED_RESPONSES(&queue->rx); + if (work_queued > queue->rx_rsp_unconsumed) { + queue->rx_rsp_unconsumed = work_queued; + *eoi = 0; + } else if (unlikely(work_queued < queue->rx_rsp_unconsumed)) { + const struct device *dev = &queue->info->netdev->dev; - if (likely(netif_carrier_ok(dev) && - RING_HAS_UNCONSUMED_RESPONSES(&queue->rx))) + spin_unlock_irqrestore(&queue->rx_cons_lock, flags); + dev_alert(dev, "RX producer index going backwards\n"); + dev_alert(dev, "Disabled for further use\n"); + queue->info->broken = true; + return false; + } + spin_unlock_irqrestore(&queue->rx_cons_lock, flags); + + if (likely(netif_carrier_ok(queue->info->netdev) && work_queued)) napi_schedule(&queue->napi); + return true; +} + +static irqreturn_t xennet_rx_interrupt(int irq, void *dev_id) +{ + unsigned int eoiflag = XEN_EOI_FLAG_SPURIOUS; + + if (likely(xennet_handle_rx(dev_id, &eoiflag))) + xen_irq_lateeoi(irq, eoiflag); + return IRQ_HANDLED; } static irqreturn_t xennet_interrupt(int irq, void *dev_id) { - xennet_tx_interrupt(irq, dev_id); - xennet_rx_interrupt(irq, dev_id); + unsigned int eoiflag = XEN_EOI_FLAG_SPURIOUS; + + if (xennet_handle_tx(dev_id, &eoiflag) && + xennet_handle_rx(dev_id, &eoiflag)) + xen_irq_lateeoi(irq, eoiflag); + return IRQ_HANDLED; } @@ -1411,6 +1552,10 @@ static void xennet_poll_controller(struct net_device *dev) struct netfront_info *info = netdev_priv(dev); unsigned int num_queues = dev->real_num_tx_queues; unsigned int i; + + if (info->broken) + return; + for (i = 0; i < num_queues; ++i) xennet_interrupt(0, &info->queues[i]); } @@ -1482,6 +1627,11 @@ static int xennet_xdp_set(struct net_device *dev, struct bpf_prog *prog, static int xennet_xdp(struct net_device *dev, struct netdev_bpf *xdp) { + struct netfront_info *np = netdev_priv(dev); + + if (np->broken) + return -ENODEV; + switch (xdp->command) { case XDP_SETUP_PROG: return xennet_xdp_set(dev, xdp->prog, xdp->extack); @@ -1491,6 +1641,7 @@ static int xennet_xdp(struct net_device *dev, struct netdev_bpf *xdp) } static const struct net_device_ops xennet_netdev_ops = { + .ndo_uninit = xennet_uninit, .ndo_open = xennet_open, .ndo_stop = xennet_close, .ndo_start_xmit = xennet_start_xmit, @@ -1671,6 +1822,10 @@ static int netfront_resume(struct xenbus_device *dev) dev_dbg(&dev->dev, "%s\n", dev->nodename); + netif_tx_lock_bh(info->netdev); + netif_device_detach(info->netdev); + netif_tx_unlock_bh(info->netdev); + xennet_disconnect_backend(info); return 0; } @@ -1705,9 +1860,10 @@ static int setup_netfront_single(struct netfront_queue *queue) if (err < 0) goto fail; - err = bind_evtchn_to_irqhandler(queue->tx_evtchn, - xennet_interrupt, - 0, queue->info->netdev->name, queue); + err = bind_evtchn_to_irqhandler_lateeoi(queue->tx_evtchn, + xennet_interrupt, 0, + queue->info->netdev->name, + queue); if (err < 0) goto bind_fail; queue->rx_evtchn = queue->tx_evtchn; @@ -1735,18 +1891,18 @@ static int setup_netfront_split(struct netfront_queue *queue) snprintf(queue->tx_irq_name, sizeof(queue->tx_irq_name), "%s-tx", queue->name); - err = bind_evtchn_to_irqhandler(queue->tx_evtchn, - xennet_tx_interrupt, - 0, queue->tx_irq_name, queue); + err = bind_evtchn_to_irqhandler_lateeoi(queue->tx_evtchn, + xennet_tx_interrupt, 0, + queue->tx_irq_name, queue); if (err < 0) goto bind_tx_fail; queue->tx_irq = err; snprintf(queue->rx_irq_name, sizeof(queue->rx_irq_name), "%s-rx", queue->name); - err = bind_evtchn_to_irqhandler(queue->rx_evtchn, - xennet_rx_interrupt, - 0, queue->rx_irq_name, queue); + err = bind_evtchn_to_irqhandler_lateeoi(queue->rx_evtchn, + xennet_rx_interrupt, 0, + queue->rx_irq_name, queue); if (err < 0) goto bind_rx_fail; queue->rx_irq = err; @@ -1770,7 +1926,7 @@ static int setup_netfront(struct xenbus_device *dev, struct netfront_queue *queue, unsigned int feature_split_evtchn) { struct xen_netif_tx_sring *txs; - struct xen_netif_rx_sring *rxs; + struct xen_netif_rx_sring *rxs = NULL; grant_ref_t gref; int err; @@ -1790,21 +1946,21 @@ static int setup_netfront(struct xenbus_device *dev, err = xenbus_grant_ring(dev, txs, 1, &gref); if (err < 0) - goto grant_tx_ring_fail; + goto fail; queue->tx_ring_ref = gref; rxs = (struct xen_netif_rx_sring *)get_zeroed_page(GFP_NOIO | __GFP_HIGH); if (!rxs) { err = -ENOMEM; xenbus_dev_fatal(dev, err, "allocating rx ring page"); - goto alloc_rx_ring_fail; + goto fail; } SHARED_RING_INIT(rxs); FRONT_RING_INIT(&queue->rx, rxs, XEN_PAGE_SIZE); err = xenbus_grant_ring(dev, rxs, 1, &gref); if (err < 0) - goto grant_rx_ring_fail; + goto fail; queue->rx_ring_ref = gref; if (feature_split_evtchn) @@ -1817,22 +1973,28 @@ static int setup_netfront(struct xenbus_device *dev, err = setup_netfront_single(queue); if (err) - goto alloc_evtchn_fail; + goto fail; return 0; /* If we fail to setup netfront, it is safe to just revoke access to * granted pages because backend is not accessing it at this point. */ -alloc_evtchn_fail: - gnttab_end_foreign_access_ref(queue->rx_ring_ref, 0); -grant_rx_ring_fail: - free_page((unsigned long)rxs); -alloc_rx_ring_fail: - gnttab_end_foreign_access_ref(queue->tx_ring_ref, 0); -grant_tx_ring_fail: - free_page((unsigned long)txs); -fail: + fail: + if (queue->rx_ring_ref != GRANT_INVALID_REF) { + gnttab_end_foreign_access(queue->rx_ring_ref, 0, + (unsigned long)rxs); + queue->rx_ring_ref = GRANT_INVALID_REF; + } else { + free_page((unsigned long)rxs); + } + if (queue->tx_ring_ref != GRANT_INVALID_REF) { + gnttab_end_foreign_access(queue->tx_ring_ref, 0, + (unsigned long)txs); + queue->tx_ring_ref = GRANT_INVALID_REF; + } else { + free_page((unsigned long)txs); + } return err; } @@ -1848,6 +2010,7 @@ static int xennet_init_queue(struct netfront_queue *queue) spin_lock_init(&queue->tx_lock); spin_lock_init(&queue->rx_lock); + spin_lock_init(&queue->rx_cons_lock); timer_setup(&queue->rx_refill_timer, rx_refill_timeout, 0); @@ -1855,13 +2018,15 @@ static int xennet_init_queue(struct netfront_queue *queue) snprintf(queue->name, sizeof(queue->name), "vif%s-q%u", devid, queue->id); - /* Initialise tx_skbs as a free chain containing every entry. */ + /* Initialise tx_skb_freelist as a free chain containing every entry. */ queue->tx_skb_freelist = 0; + queue->tx_pend_queue = TX_LINK_NONE; for (i = 0; i < NET_TX_RING_SIZE; i++) { - skb_entry_set_link(&queue->tx_skbs[i], i+1); + queue->tx_link[i] = i + 1; queue->grant_tx_ref[i] = GRANT_INVALID_REF; queue->grant_tx_page[i] = NULL; } + queue->tx_link[NET_TX_RING_SIZE - 1] = TX_LINK_NONE; /* Clear out rx_skbs */ for (i = 0; i < NET_RX_RING_SIZE; i++) { @@ -1975,22 +2140,6 @@ error: return err; } -static void xennet_destroy_queues(struct netfront_info *info) -{ - unsigned int i; - - for (i = 0; i < info->netdev->real_num_tx_queues; i++) { - struct netfront_queue *queue = &info->queues[i]; - - if (netif_running(info->netdev)) - napi_disable(&queue->napi); - netif_napi_del(&queue->napi); - } - - kfree(info->queues); - info->queues = NULL; -} - static int xennet_create_page_pool(struct netfront_queue *queue) @@ -2130,6 +2279,9 @@ static int talk_to_netback(struct xenbus_device *dev, if (info->queues) xennet_destroy_queues(info); + /* For the case of a reconnect reset the "broken" indicator. */ + info->broken = false; + err = xennet_create_queues(info, &num_queues); if (err < 0) { xenbus_dev_fatal(dev, err, "creating queues"); @@ -2285,6 +2437,10 @@ static int xennet_connect(struct net_device *dev) * domain a kick because we've probably just requeued some * packets. */ + netif_tx_lock_bh(np->netdev); + netif_device_attach(np->netdev); + netif_tx_unlock_bh(np->netdev); + netif_carrier_on(np->netdev); for (j = 0; j < num_queues; ++j) { queue = &np->queues[j]; diff --git a/drivers/nfc/pn533/pn533.c b/drivers/nfc/pn533/pn533.c index 18e3435ab8f331323b7b66924e61801348ae6059..d2c0116157759c01c643d06bae462a0c55b9591a 100644 --- a/drivers/nfc/pn533/pn533.c +++ b/drivers/nfc/pn533/pn533.c @@ -2258,7 +2258,7 @@ static int pn533_fill_fragment_skbs(struct pn533 *dev, struct sk_buff *skb) frag = pn533_alloc_skb(dev, frag_size); if (!frag) { skb_queue_purge(&dev->fragment_skb); - break; + return -ENOMEM; } if (!dev->tgt_mode) { @@ -2329,7 +2329,7 @@ static int pn533_transceive(struct nfc_dev *nfc_dev, /* jumbo frame ? */ if (skb->len > PN533_CMD_DATAEXCH_DATA_MAXLEN) { rc = pn533_fill_fragment_skbs(dev, skb); - if (rc <= 0) + if (rc < 0) goto error; skb = skb_dequeue(&dev->fragment_skb); @@ -2401,7 +2401,7 @@ static int pn533_tm_send(struct nfc_dev *nfc_dev, struct sk_buff *skb) /* let's split in multiple chunks if size's too big */ if (skb->len > PN533_CMD_DATAEXCH_DATA_MAXLEN) { rc = pn533_fill_fragment_skbs(dev, skb); - if (rc <= 0) + if (rc < 0) goto error; /* get the first skb */ diff --git a/drivers/nfc/port100.c b/drivers/nfc/port100.c index 8e4d355dc3aec4e6c0e75afe01f2e9b190009f68..2ae1474faede99aea3e9fea55d80da245f7e164b 100644 --- a/drivers/nfc/port100.c +++ b/drivers/nfc/port100.c @@ -1003,11 +1003,11 @@ static u64 port100_get_command_type_mask(struct port100 *dev) skb = port100_alloc_skb(dev, 0); if (!skb) - return -ENOMEM; + return 0; resp = port100_send_cmd_sync(dev, PORT100_CMD_GET_COMMAND_TYPE, skb); if (IS_ERR(resp)) - return PTR_ERR(resp); + return 0; if (resp->len < 8) mask = 0; @@ -1609,7 +1609,9 @@ free_nfc_dev: nfc_digital_free_device(dev->nfc_digital_dev); error: + usb_kill_urb(dev->in_urb); usb_free_urb(dev->in_urb); + usb_kill_urb(dev->out_urb); usb_free_urb(dev->out_urb); usb_put_dev(dev->udev); diff --git a/drivers/nfc/st21nfca/i2c.c b/drivers/nfc/st21nfca/i2c.c index 23ed11f91213d250321be31105080503954dbdc1..6ea59426ab0bfda7f6ae8eb1f7b5ab5ff7dd754d 100644 --- a/drivers/nfc/st21nfca/i2c.c +++ b/drivers/nfc/st21nfca/i2c.c @@ -533,7 +533,8 @@ static int st21nfca_hci_i2c_probe(struct i2c_client *client, phy->gpiod_ena = devm_gpiod_get(dev, "enable", GPIOD_OUT_LOW); if (IS_ERR(phy->gpiod_ena)) { nfc_err(dev, "Unable to get ENABLE GPIO\n"); - return PTR_ERR(phy->gpiod_ena); + r = PTR_ERR(phy->gpiod_ena); + goto out_free; } phy->se_status.is_ese_present = @@ -544,7 +545,7 @@ static int st21nfca_hci_i2c_probe(struct i2c_client *client, r = st21nfca_hci_platform_init(phy); if (r < 0) { nfc_err(&client->dev, "Unable to reboot st21nfca\n"); - return r; + goto out_free; } r = devm_request_threaded_irq(&client->dev, client->irq, NULL, @@ -553,15 +554,23 @@ static int st21nfca_hci_i2c_probe(struct i2c_client *client, ST21NFCA_HCI_DRIVER_NAME, phy); if (r < 0) { nfc_err(&client->dev, "Unable to register IRQ handler\n"); - return r; + goto out_free; } - return st21nfca_hci_probe(phy, &i2c_phy_ops, LLC_SHDLC_NAME, - ST21NFCA_FRAME_HEADROOM, - ST21NFCA_FRAME_TAILROOM, - ST21NFCA_HCI_LLC_MAX_PAYLOAD, - &phy->hdev, - &phy->se_status); + r = st21nfca_hci_probe(phy, &i2c_phy_ops, LLC_SHDLC_NAME, + ST21NFCA_FRAME_HEADROOM, + ST21NFCA_FRAME_TAILROOM, + ST21NFCA_HCI_LLC_MAX_PAYLOAD, + &phy->hdev, + &phy->se_status); + if (r) + goto out_free; + + return 0; + +out_free: + kfree_skb(phy->pending_skb); + return r; } static int st21nfca_hci_i2c_remove(struct i2c_client *client) @@ -574,6 +583,8 @@ static int st21nfca_hci_i2c_remove(struct i2c_client *client) if (phy->powered) st21nfca_hci_i2c_disable(phy); + if (phy->pending_skb) + kfree_skb(phy->pending_skb); return 0; } diff --git a/drivers/ntb/hw/intel/ntb_hw_gen4.c b/drivers/ntb/hw/intel/ntb_hw_gen4.c index bc4541cbf8c6e174011851c09627971613a17057..99a5fc1ab0aafb2f0d1f0f969ea8a229cf70a10b 100644 --- a/drivers/ntb/hw/intel/ntb_hw_gen4.c +++ b/drivers/ntb/hw/intel/ntb_hw_gen4.c @@ -168,6 +168,18 @@ static enum ntb_topo gen4_ppd_topo(struct intel_ntb_dev *ndev, u32 ppd) return NTB_TOPO_NONE; } +static enum ntb_topo spr_ppd_topo(struct intel_ntb_dev *ndev, u32 ppd) +{ + switch (ppd & SPR_PPD_TOPO_MASK) { + case SPR_PPD_TOPO_B2B_USD: + return NTB_TOPO_B2B_USD; + case SPR_PPD_TOPO_B2B_DSD: + return NTB_TOPO_B2B_DSD; + } + + return NTB_TOPO_NONE; +} + int gen4_init_dev(struct intel_ntb_dev *ndev) { struct pci_dev *pdev = ndev->ntb.pdev; @@ -181,7 +193,10 @@ int gen4_init_dev(struct intel_ntb_dev *ndev) ndev->hwerr_flags |= NTB_HWERR_BAR_ALIGN; ppd1 = ioread32(ndev->self_mmio + GEN4_PPD1_OFFSET); - ndev->ntb.topo = gen4_ppd_topo(ndev, ppd1); + if (pdev_is_ICX(pdev)) + ndev->ntb.topo = gen4_ppd_topo(ndev, ppd1); + else if (pdev_is_SPR(pdev)) + ndev->ntb.topo = spr_ppd_topo(ndev, ppd1); dev_dbg(&pdev->dev, "ppd %#x topo %s\n", ppd1, ntb_topo_string(ndev->ntb.topo)); if (ndev->ntb.topo == NTB_TOPO_NONE) diff --git a/drivers/ntb/hw/intel/ntb_hw_gen4.h b/drivers/ntb/hw/intel/ntb_hw_gen4.h index a868c788de02f3abf37a1ac862814344e1d22f52..ec293953d665f7f7d98ca8134e69387f7fa8a9a3 100644 --- a/drivers/ntb/hw/intel/ntb_hw_gen4.h +++ b/drivers/ntb/hw/intel/ntb_hw_gen4.h @@ -46,10 +46,14 @@ #define GEN4_PPD_CLEAR_TRN 0x0001 #define GEN4_PPD_LINKTRN 0x0008 #define GEN4_PPD_CONN_MASK 0x0300 +#define SPR_PPD_CONN_MASK 0x0700 #define GEN4_PPD_CONN_B2B 0x0200 #define GEN4_PPD_DEV_MASK 0x1000 #define GEN4_PPD_DEV_DSD 0x1000 #define GEN4_PPD_DEV_USD 0x0000 +#define SPR_PPD_DEV_MASK 0x4000 +#define SPR_PPD_DEV_DSD 0x4000 +#define SPR_PPD_DEV_USD 0x0000 #define GEN4_LINK_CTRL_LINK_DISABLE 0x0010 #define GEN4_SLOTSTS 0xb05a @@ -59,6 +63,10 @@ #define GEN4_PPD_TOPO_B2B_USD (GEN4_PPD_CONN_B2B | GEN4_PPD_DEV_USD) #define GEN4_PPD_TOPO_B2B_DSD (GEN4_PPD_CONN_B2B | GEN4_PPD_DEV_DSD) +#define SPR_PPD_TOPO_MASK (SPR_PPD_CONN_MASK | SPR_PPD_DEV_MASK) +#define SPR_PPD_TOPO_B2B_USD (GEN4_PPD_CONN_B2B | SPR_PPD_DEV_USD) +#define SPR_PPD_TOPO_B2B_DSD (GEN4_PPD_CONN_B2B | SPR_PPD_DEV_DSD) + #define GEN4_DB_COUNT 32 #define GEN4_DB_LINK 32 #define GEN4_DB_LINK_BIT BIT_ULL(GEN4_DB_LINK) @@ -97,4 +105,12 @@ static inline int pdev_is_ICX(struct pci_dev *pdev) return 0; } +static inline int pdev_is_SPR(struct pci_dev *pdev) +{ + if (pdev_is_gen4(pdev) && + pdev->revision > PCI_DEVICE_REVISION_ICX_MAX) + return 1; + return 0; +} + #endif diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index bbc3efef5027892c7712370883581a1f7a23e7df..71c85c99e86c6c13dd0c1f79d872b32433a51d9b 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -831,6 +831,7 @@ EXPORT_SYMBOL_GPL(nvme_cleanup_cmd); blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req, struct nvme_command *cmd) { + struct nvme_ctrl *ctrl = nvme_req(req)->ctrl; blk_status_t ret = BLK_STS_OK; nvme_clear_nvme_request(req); @@ -877,7 +878,8 @@ blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req, return BLK_STS_IOERR; } - nvme_req(req)->genctr++; + if (!(ctrl->quirks & NVME_QUIRK_SKIP_CID_GEN)) + nvme_req(req)->genctr++; cmd->common.command_id = nvme_cid(req); trace_nvme_setup_cmd(req, cmd); return ret; @@ -4257,7 +4259,14 @@ static void nvme_async_event_work(struct work_struct *work) container_of(work, struct nvme_ctrl, async_event_work); nvme_aen_uevent(ctrl); - ctrl->ops->submit_async_event(ctrl); + + /* + * The transport drivers must guarantee AER submission here is safe by + * flushing ctrl async_event_work after changing the controller state + * from LIVE and before freeing the admin queue. + */ + if (ctrl->state == NVME_CTRL_LIVE) + ctrl->ops->submit_async_event(ctrl); } static bool nvme_ctrl_pp_status(struct nvme_ctrl *ctrl) diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h index a9c1e3b4585ec3f9721c7e76fd031e8250fbd265..78467cb3f343e7dad0321e059264ca6e1bfb3cfe 100644 --- a/drivers/nvme/host/fabrics.h +++ b/drivers/nvme/host/fabrics.h @@ -153,6 +153,7 @@ nvmf_ctlr_matches_baseopts(struct nvme_ctrl *ctrl, struct nvmf_ctrl_options *opts) { if (ctrl->state == NVME_CTRL_DELETING || + ctrl->state == NVME_CTRL_DELETING_NOIO || ctrl->state == NVME_CTRL_DEAD || strcmp(opts->subsysnqn, ctrl->opts->subsysnqn) || strcmp(opts->host->nqn, ctrl->opts->host->nqn) || diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index a0bcec33b0208873ba8711f9561d2df70a98d244..906cab35afe7adc3ace9a500f77ab1cfd3bcadc0 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2486,6 +2486,7 @@ __nvme_fc_abort_outstanding_ios(struct nvme_fc_ctrl *ctrl, bool start_queues) */ if (ctrl->ctrl.queue_count > 1) { nvme_stop_queues(&ctrl->ctrl); + nvme_sync_io_queues(&ctrl->ctrl); blk_mq_tagset_busy_iter(&ctrl->tag_set, nvme_fc_terminate_exchange, &ctrl->ctrl); blk_mq_tagset_wait_completed_request(&ctrl->tag_set); @@ -2509,6 +2510,7 @@ __nvme_fc_abort_outstanding_ios(struct nvme_fc_ctrl *ctrl, bool start_queues) * clean up the admin queue. Same thing as above. */ blk_mq_quiesce_queue(ctrl->ctrl.admin_q); + blk_sync_queue(ctrl->ctrl.admin_q); blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, nvme_fc_terminate_exchange, &ctrl->ctrl); blk_mq_tagset_wait_completed_request(&ctrl->admin_tag_set); @@ -2952,14 +2954,6 @@ nvme_fc_recreate_io_queues(struct nvme_fc_ctrl *ctrl) if (ctrl->ctrl.queue_count == 1) return 0; - ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.sqsize + 1); - if (ret) - goto out_free_io_queues; - - ret = nvme_fc_connect_io_queues(ctrl, ctrl->ctrl.sqsize + 1); - if (ret) - goto out_delete_hw_queues; - if (prior_ioq_cnt != nr_io_queues) { dev_info(ctrl->ctrl.device, "reconnect: revising io queue count from %d to %d\n", @@ -2969,6 +2963,14 @@ nvme_fc_recreate_io_queues(struct nvme_fc_ctrl *ctrl) nvme_unfreeze(&ctrl->ctrl); } + ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.sqsize + 1); + if (ret) + goto out_free_io_queues; + + ret = nvme_fc_connect_io_queues(ctrl, ctrl->ctrl.sqsize + 1); + if (ret) + goto out_delete_hw_queues; + return 0; out_delete_hw_queues: diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 46a1e24ba6f47b603cf5579481927a8b7b5be9e3..18a756444d5a91dd79ddc50148e9cd144965ed3f 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -135,13 +135,12 @@ void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl) { struct nvme_ns *ns; - mutex_lock(&ctrl->scan_lock); down_read(&ctrl->namespaces_rwsem); - list_for_each_entry(ns, &ctrl->namespaces, list) - if (nvme_mpath_clear_current_path(ns)) - kblockd_schedule_work(&ns->head->requeue_work); + list_for_each_entry(ns, &ctrl->namespaces, list) { + nvme_mpath_clear_current_path(ns); + kblockd_schedule_work(&ns->head->requeue_work); + } up_read(&ctrl->namespaces_rwsem); - mutex_unlock(&ctrl->scan_lock); } static bool nvme_path_is_disabled(struct nvme_ns *ns) diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 8c735c55c15bf20d8f947418f5597c72884b7b42..5dd1dd8021ba1df7dac38ff690bd9a591ee4beea 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -144,6 +144,12 @@ enum nvme_quirks { * NVMe 1.3 compliance. */ NVME_QUIRK_NO_NS_DESC_LIST = (1 << 15), + + /* + * The controller requires the command_id value be be limited, so skip + * encoding the generation sequence number. + */ + NVME_QUIRK_SKIP_CID_GEN = (1 << 17), }; /* diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 09767a805492c3edd54c3725cff9dad4f8537fbd..97afeb898b2531bf627f460c4acbf4a612fcc37e 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1342,7 +1342,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) memset(&cmd, 0, sizeof(cmd)); cmd.abort.opcode = nvme_admin_abort_cmd; - cmd.abort.cid = req->tag; + cmd.abort.cid = nvme_cid(req); cmd.abort.sqid = cpu_to_le16(nvmeq->qid); dev_warn(nvmeq->dev->ctrl.device, @@ -3198,7 +3198,8 @@ static const struct pci_device_id nvme_id_table[] = { NVME_QUIRK_DEALLOCATE_ZEROES, }, { PCI_VDEVICE(INTEL, 0x0a54), /* Intel P4500/P4600 */ .driver_data = NVME_QUIRK_STRIPE_SIZE | - NVME_QUIRK_DEALLOCATE_ZEROES, }, + NVME_QUIRK_DEALLOCATE_ZEROES | + NVME_QUIRK_IGNORE_DEV_SUBNQN, }, { PCI_VDEVICE(INTEL, 0x0a55), /* Dell Express Flash P4600 */ .driver_data = NVME_QUIRK_STRIPE_SIZE | NVME_QUIRK_DEALLOCATE_ZEROES, }, @@ -3259,7 +3260,8 @@ static const struct pci_device_id nvme_id_table[] = { { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2005), .driver_data = NVME_QUIRK_SINGLE_VECTOR | NVME_QUIRK_128_BYTES_SQES | - NVME_QUIRK_SHARED_TAGS }, + NVME_QUIRK_SHARED_TAGS | + NVME_QUIRK_SKIP_CID_GEN }, { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) }, { 0, } diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 51f4647ea21428819e1d659d440b89c496afaf1e..8eacc9bd58f5acfe6ce034e4869d75ef545fd71b 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1103,11 +1103,13 @@ static int nvme_rdma_setup_ctrl(struct nvme_rdma_ctrl *ctrl, bool new) return ret; if (ctrl->ctrl.icdoff) { + ret = -EOPNOTSUPP; dev_err(ctrl->ctrl.device, "icdoff is not supported!\n"); goto destroy_admin; } if (!(ctrl->ctrl.sgls & (1 << 2))) { + ret = -EOPNOTSUPP; dev_err(ctrl->ctrl.device, "Mandatory keyed sgls are not supported!\n"); goto destroy_admin; @@ -1198,6 +1200,7 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work) struct nvme_rdma_ctrl, err_work); nvme_stop_keep_alive(&ctrl->ctrl); + flush_work(&ctrl->ctrl.async_event_work); nvme_rdma_teardown_io_queues(ctrl, false); nvme_start_queues(&ctrl->ctrl); nvme_rdma_teardown_admin_queue(ctrl, false); diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 05ad6bee085c1c891a5a6ef1aa1b80efee9ba327..6105894a218a56575b7325c2fef80226fbde32bf 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -904,18 +904,28 @@ static inline void nvme_tcp_done_send_req(struct nvme_tcp_queue *queue) static void nvme_tcp_fail_request(struct nvme_tcp_request *req) { - nvme_tcp_end_request(blk_mq_rq_from_pdu(req), NVME_SC_HOST_PATH_ERROR); + if (nvme_tcp_async_req(req)) { + union nvme_result res = {}; + + nvme_complete_async_event(&req->queue->ctrl->ctrl, + cpu_to_le16(NVME_SC_HOST_PATH_ERROR), &res); + } else { + nvme_tcp_end_request(blk_mq_rq_from_pdu(req), + NVME_SC_HOST_PATH_ERROR); + } } static int nvme_tcp_try_send_data(struct nvme_tcp_request *req) { struct nvme_tcp_queue *queue = req->queue; + int req_data_len = req->data_len; while (true) { struct page *page = nvme_tcp_req_cur_page(req); size_t offset = nvme_tcp_req_cur_offset(req); size_t len = nvme_tcp_req_cur_length(req); bool last = nvme_tcp_pdu_last_send(req, len); + int req_data_sent = req->data_sent; int ret, flags = MSG_DONTWAIT; if (last && !queue->data_digest && !nvme_tcp_queue_more(queue)) @@ -942,7 +952,7 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req) * in the request where we don't want to modify it as we may * compete with the RX path completing the request. */ - if (req->data_sent + ret < req->data_len) + if (req_data_sent + ret < req_data_len) nvme_tcp_advance_req(req, ret); /* fully successful last send in current PDU */ @@ -1035,10 +1045,11 @@ static int nvme_tcp_try_send_data_pdu(struct nvme_tcp_request *req) static int nvme_tcp_try_send_ddgst(struct nvme_tcp_request *req) { struct nvme_tcp_queue *queue = req->queue; + size_t offset = req->offset; int ret; struct msghdr msg = { .msg_flags = MSG_DONTWAIT }; struct kvec iov = { - .iov_base = &req->ddgst + req->offset, + .iov_base = (u8 *)&req->ddgst + req->offset, .iov_len = NVME_TCP_DIGEST_LENGTH - req->offset }; @@ -1051,7 +1062,7 @@ static int nvme_tcp_try_send_ddgst(struct nvme_tcp_request *req) if (unlikely(ret <= 0)) return ret; - if (req->offset + ret == NVME_TCP_DIGEST_LENGTH) { + if (offset + ret == NVME_TCP_DIGEST_LENGTH) { nvme_tcp_done_send_req(queue); return 1; } @@ -2066,6 +2077,7 @@ static void nvme_tcp_error_recovery_work(struct work_struct *work) struct nvme_ctrl *ctrl = &tcp_ctrl->ctrl; nvme_stop_keep_alive(ctrl); + flush_work(&ctrl->async_event_work); nvme_tcp_teardown_io_queues(ctrl, false); /* unquiesce to fail fast pending requests */ nvme_start_queues(ctrl); diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index 37e1d7784e175c1f8713d76728089a9747ec74a1..9aed5cc7109605f4c9529f08c21a9d79d8745d25 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -1462,6 +1462,8 @@ static void nvmet_port_release(struct config_item *item) { struct nvmet_port *port = to_nvmet_port(item); + /* Let inflight controllers teardown complete */ + flush_scheduled_work(); list_del(&port->global_entry); kfree(port->ana_state); diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 9a8fa2e582d5be9dd080337bed03c5840a6d653e..f0f9d9007d49c8f4d3a3a6ccbdfd8a5453ecd31c 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -1602,3 +1602,4 @@ module_init(nvmet_init); module_exit(nvmet_exit); MODULE_LICENSE("GPL v2"); +MODULE_IMPORT_NS(VFS_internal_I_am_really_a_filesystem_and_am_NOT_a_driver); diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c index 6a9626ff07135743d61f8e045a5d944706088b5d..d5beb38b2f52f6bfe8b2fa2dd45386abd53bfb4a 100644 --- a/drivers/nvme/target/io-cmd-bdev.c +++ b/drivers/nvme/target/io-cmd-bdev.c @@ -185,7 +185,7 @@ static int nvmet_bdev_alloc_bip(struct nvmet_req *req, struct bio *bio, } bip = bio_integrity_alloc(bio, GFP_NOIO, - min_t(unsigned int, req->metadata_sg_cnt, BIO_MAX_PAGES)); + bio_max_segs(req->metadata_sg_cnt)); if (IS_ERR(bip)) { pr_err("Unable to allocate bio_integrity_payload\n"); return PTR_ERR(bip); @@ -225,7 +225,7 @@ static int nvmet_bdev_alloc_bip(struct nvmet_req *req, struct bio *bio, static void nvmet_bdev_execute_rw(struct nvmet_req *req) { - int sg_cnt = req->sg_cnt; + unsigned int sg_cnt = req->sg_cnt; struct bio *bio; struct scatterlist *sg; struct blk_plug plug; @@ -262,7 +262,7 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req) bio = &req->b.inline_bio; bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec)); } else { - bio = bio_alloc(GFP_KERNEL, min(sg_cnt, BIO_MAX_PAGES)); + bio = bio_alloc(GFP_KERNEL, bio_max_segs(sg_cnt)); } bio_set_dev(bio, req->ns->bdev); bio->bi_iter.bi_sector = sector; @@ -289,7 +289,7 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req) } } - bio = bio_alloc(GFP_KERNEL, min(sg_cnt, BIO_MAX_PAGES)); + bio = bio_alloc(GFP_KERNEL, bio_max_segs(sg_cnt)); bio_set_dev(bio, req->ns->bdev); bio->bi_iter.bi_sector = sector; bio->bi_opf = op; diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c index b5759972444820a1710f44666e8cb73a680f5eb3..c81690b2a681b09b4d8c12a352f4563df78b7233 100644 --- a/drivers/nvme/target/io-cmd-file.c +++ b/drivers/nvme/target/io-cmd-file.c @@ -8,6 +8,7 @@ #include #include #include +#include #include "nvmet.h" #define NVMET_MAX_MPOOL_BVEC 16 @@ -266,7 +267,8 @@ static void nvmet_file_execute_rw(struct nvmet_req *req) if (req->ns->buffered_io) { if (likely(!req->f.mpool_alloc) && - nvmet_file_execute_io(req, IOCB_NOWAIT)) + (req->ns->file->f_mode & FMODE_NOWAIT) && + nvmet_file_execute_io(req, IOCB_NOWAIT)) return; nvmet_file_submit_buffered_io(req); } else diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c index 8ee94f056898324cca7495adda2cc9469d97cd6d..6c747dc458df50912088be91dfde46d986d194c5 100644 --- a/drivers/nvme/target/passthru.c +++ b/drivers/nvme/target/passthru.c @@ -26,7 +26,7 @@ static u16 nvmet_passthru_override_id_ctrl(struct nvmet_req *req) struct nvme_ctrl *pctrl = ctrl->subsys->passthru_ctrl; u16 status = NVME_SC_SUCCESS; struct nvme_id_ctrl *id; - int max_hw_sectors; + unsigned int max_hw_sectors; int page_shift; id = kzalloc(sizeof(*id), GFP_KERNEL); @@ -200,7 +200,7 @@ static int nvmet_passthru_map_sg(struct nvmet_req *req, struct request *rq) else if (nvme_is_write(req->cmd)) op_flags = REQ_SYNC | REQ_IDLE; - bio = bio_alloc(GFP_KERNEL, req->sg_cnt); + bio = bio_alloc(GFP_KERNEL, bio_max_segs(req->sg_cnt)); bio->bi_end_io = bio_put; bio->bi_opf = req_op(rq) | op_flags; diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 7d607f435e36664f7bc36233ae8ab3af96575e63..6d5552f2f184a09184243ffce036ff03438758a4 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -1819,12 +1819,36 @@ restart: mutex_unlock(&nvmet_rdma_queue_mutex); } +static void nvmet_rdma_destroy_port_queues(struct nvmet_rdma_port *port) +{ + struct nvmet_rdma_queue *queue, *tmp; + struct nvmet_port *nport = port->nport; + + mutex_lock(&nvmet_rdma_queue_mutex); + list_for_each_entry_safe(queue, tmp, &nvmet_rdma_queue_list, + queue_list) { + if (queue->port != nport) + continue; + + list_del_init(&queue->queue_list); + __nvmet_rdma_queue_disconnect(queue); + } + mutex_unlock(&nvmet_rdma_queue_mutex); +} + static void nvmet_rdma_disable_port(struct nvmet_rdma_port *port) { struct rdma_cm_id *cm_id = xchg(&port->cm_id, NULL); if (cm_id) rdma_destroy_id(cm_id); + + /* + * Destroy the remaining queues, which are not belong to any + * controller yet. Do it here after the RDMA-CM was destroyed + * guarantees that no new queue will be created. + */ + nvmet_rdma_destroy_port_queues(port); } static int nvmet_rdma_enable_port(struct nvmet_rdma_port *port) diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index dedcb7aaf0d822318ddcd703672a6518bf490e20..96b67a70cbbbdbaa84bc2b70cf114b4004b7d11f 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -688,10 +688,11 @@ static int nvmet_try_send_r2t(struct nvmet_tcp_cmd *cmd, bool last_in_batch) static int nvmet_try_send_ddgst(struct nvmet_tcp_cmd *cmd, bool last_in_batch) { struct nvmet_tcp_queue *queue = cmd->queue; + int left = NVME_TCP_DIGEST_LENGTH - cmd->offset; struct msghdr msg = { .msg_flags = MSG_DONTWAIT }; struct kvec iov = { - .iov_base = &cmd->exp_ddgst + cmd->offset, - .iov_len = NVME_TCP_DIGEST_LENGTH - cmd->offset + .iov_base = (u8 *)&cmd->exp_ddgst + cmd->offset, + .iov_len = left }; int ret; @@ -705,6 +706,10 @@ static int nvmet_try_send_ddgst(struct nvmet_tcp_cmd *cmd, bool last_in_batch) return ret; cmd->offset += ret; + left -= ret; + + if (left) + return -EAGAIN; if (queue->nvme_sq.sqhd_disabled) { cmd->queue->snd_cmd = NULL; @@ -1084,7 +1089,7 @@ recv: } if (queue->hdr_digest && - nvmet_tcp_verify_hdgst(queue, &queue->pdu, queue->offset)) { + nvmet_tcp_verify_hdgst(queue, &queue->pdu, hdr->hlen)) { nvmet_tcp_fatal_error(queue); /* fatal */ return -EPROTO; } @@ -1398,6 +1403,7 @@ static void nvmet_tcp_uninit_data_in_cmds(struct nvmet_tcp_queue *queue) static void nvmet_tcp_release_queue_work(struct work_struct *w) { + struct page *page; struct nvmet_tcp_queue *queue = container_of(w, struct nvmet_tcp_queue, release_work); @@ -1417,6 +1423,8 @@ static void nvmet_tcp_release_queue_work(struct work_struct *w) nvmet_tcp_free_crypto(queue); ida_simple_remove(&nvmet_tcp_queue_ida, queue->idx); + page = virt_to_head_page(queue->pf_cache.va); + __page_frag_cache_drain(page, queue->pf_cache.pagecnt_bias); kfree(queue); } @@ -1705,6 +1713,17 @@ err_port: return ret; } +static void nvmet_tcp_destroy_port_queues(struct nvmet_tcp_port *port) +{ + struct nvmet_tcp_queue *queue; + + mutex_lock(&nvmet_tcp_queue_mutex); + list_for_each_entry(queue, &nvmet_tcp_queue_list, queue_list) + if (queue->port == port) + kernel_sock_shutdown(queue->sock, SHUT_RDWR); + mutex_unlock(&nvmet_tcp_queue_mutex); +} + static void nvmet_tcp_remove_port(struct nvmet_port *nport) { struct nvmet_tcp_port *port = nport->priv; @@ -1714,6 +1733,11 @@ static void nvmet_tcp_remove_port(struct nvmet_port *nport) port->sock->sk->sk_user_data = NULL; write_unlock_bh(&port->sock->sk->sk_callback_lock); cancel_work_sync(&port->accept_work); + /* + * Destroy the remaining queues, which are not belong to any + * controller yet. + */ + nvmet_tcp_destroy_port_queues(port); sock_release(port->sock); kfree(port); diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c index bb8fb2b3711d43cea971bb33824de40efd886951..21d89d80d083834ad0b6c7d0189fc68f236118cc 100644 --- a/drivers/nvmem/core.c +++ b/drivers/nvmem/core.c @@ -222,6 +222,8 @@ static umode_t nvmem_bin_attr_is_visible(struct kobject *kobj, struct device *dev = kobj_to_dev(kobj); struct nvmem_device *nvmem = to_nvmem_device(dev); + attr->size = nvmem->size; + return nvmem_bin_attr_get_umode(nvmem); } @@ -1229,7 +1231,8 @@ static void nvmem_shift_read_buffer_in_place(struct nvmem_cell *cell, void *buf) *p-- = 0; /* clear msb bits if any leftover in the last byte */ - *p &= GENMASK((cell->nbits%BITS_PER_BYTE) - 1, 0); + if (cell->nbits % BITS_PER_BYTE) + *p &= GENMASK((cell->nbits % BITS_PER_BYTE) - 1, 0); } static int __nvmem_cell_read(struct nvmem_device *nvmem, diff --git a/drivers/of/base.c b/drivers/of/base.c index 5947d9218289dec5043841a3d6e5ca435fa7a8ee..bcc88cbbbead2be1c92ce5b8055ca3c72a164de0 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -1328,9 +1328,14 @@ int of_phandle_iterator_next(struct of_phandle_iterator *it) * property data length */ if (it->cur + count > it->list_end) { - pr_err("%pOF: %s = %d found %d\n", - it->parent, it->cells_name, - count, it->cell_count); + if (it->cells_name) + pr_err("%pOF: %s = %d found %td\n", + it->parent, it->cells_name, + count, it->list_end - it->cur); + else + pr_err("%pOF: phandle %s needs %d, found %td\n", + it->parent, of_node_full_name(it->node), + count, it->list_end - it->cur); goto err; } } diff --git a/drivers/of/device.c b/drivers/of/device.c index f0594e91565a7dc267aaa257c996a1c6a1eaca33..7a662cd62abffcbc2693666a2367be37c48c4432 100644 --- a/drivers/of/device.c +++ b/drivers/of/device.c @@ -106,7 +106,11 @@ of_dma_set_restricted_buffer(struct device *dev, struct device_node *np) break; } - if (i != count && of_reserved_mem_device_init_by_idx(dev, of_node, i)) + /* + * Attempt to initialize a restricted-dma-pool region if one was found. + * Note that count can hold a negative error code. + */ + if (i < count && of_reserved_mem_device_init_by_idx(dev, of_node, i)) dev_warn(dev, "failed to initialise \"restricted-dma-pool\" memory node\n"); } diff --git a/drivers/of/platform.c b/drivers/of/platform.c index 79bd5f5a1bf171758cd2a802a16a63c12c86c074..eb26896ee4c7af29ed461d57fbe26702bf1cba75 100644 --- a/drivers/of/platform.c +++ b/drivers/of/platform.c @@ -511,6 +511,7 @@ static const struct of_device_id reserved_mem_matches[] = { { .compatible = "qcom,rmtfs-mem" }, { .compatible = "qcom,cmd-db" }, { .compatible = "ramoops" }, + { .compatible = "google,open-dice" }, {} }; diff --git a/drivers/of/property.c b/drivers/of/property.c index 78427c85bef38737d850905a1adb143dbfbb930b..797ac714f386234c007cab0e6daaba55a97c91dc 100644 --- a/drivers/of/property.c +++ b/drivers/of/property.c @@ -1260,6 +1260,8 @@ DEFINE_SIMPLE_PROP(pinctrl5, "pinctrl-5", NULL) DEFINE_SIMPLE_PROP(pinctrl6, "pinctrl-6", NULL) DEFINE_SIMPLE_PROP(pinctrl7, "pinctrl-7", NULL) DEFINE_SIMPLE_PROP(pinctrl8, "pinctrl-8", NULL) +DEFINE_SIMPLE_PROP(leds, "leds", NULL) +DEFINE_SIMPLE_PROP(backlight, "backlight", NULL) DEFINE_SUFFIX_PROP(regulators, "-supply", NULL) DEFINE_SUFFIX_PROP(gpio, "-gpio", "#gpio-cells") @@ -1343,6 +1345,8 @@ static const struct supplier_bindings of_supplier_bindings[] = { { .parse_prop = parse_pinctrl6, }, { .parse_prop = parse_pinctrl7, }, { .parse_prop = parse_pinctrl8, }, + { .parse_prop = parse_leds, }, + { .parse_prop = parse_backlight, }, { .parse_prop = parse_gpio_compat, }, { .parse_prop = parse_interrupts, }, { .parse_prop = parse_regulators, }, diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c index eb51bc147440172ceb00ba4239e72f0c1bc358a8..5407bbdb6439519e9a2521a7aff31c2789fa2df4 100644 --- a/drivers/of/unittest.c +++ b/drivers/of/unittest.c @@ -910,11 +910,18 @@ static void __init of_unittest_dma_ranges_one(const char *path, if (!rc) { phys_addr_t paddr; dma_addr_t dma_addr; - struct device dev_bogus; + struct device *dev_bogus; - dev_bogus.dma_range_map = map; - paddr = dma_to_phys(&dev_bogus, expect_dma_addr); - dma_addr = phys_to_dma(&dev_bogus, expect_paddr); + dev_bogus = kzalloc(sizeof(struct device), GFP_KERNEL); + if (!dev_bogus) { + unittest(0, "kzalloc() failed\n"); + kfree(map); + return; + } + + dev_bogus->dma_range_map = map; + paddr = dma_to_phys(dev_bogus, expect_dma_addr); + dma_addr = phys_to_dma(dev_bogus, expect_paddr); unittest(paddr == expect_paddr, "of_dma_get_range: wrong phys addr %pap (expecting %llx) on node %pOF\n", @@ -924,6 +931,7 @@ static void __init of_unittest_dma_ranges_one(const char *path, &dma_addr, expect_dma_addr, np); kfree(map); + kfree(dev_bogus); } of_node_put(np); #endif @@ -933,8 +941,9 @@ static void __init of_unittest_parse_dma_ranges(void) { of_unittest_dma_ranges_one("/testcase-data/address-tests/device@70000000", 0x0, 0x20000000); - of_unittest_dma_ranges_one("/testcase-data/address-tests/bus@80000000/device@1000", - 0x100000000, 0x20000000); + if (IS_ENABLED(CONFIG_ARCH_DMA_ADDR_T_64BIT)) + of_unittest_dma_ranges_one("/testcase-data/address-tests/bus@80000000/device@1000", + 0x100000000, 0x20000000); of_unittest_dma_ranges_one("/testcase-data/address-tests/pci@90000000", 0x80000000, 0x20000000); } @@ -1682,19 +1691,19 @@ static void __init of_unittest_overlay_gpio(void) */ EXPECT_BEGIN(KERN_INFO, - "GPIO line <> (line-B-input) hogged as input\n"); + "gpio-<> (line-B-input): hogged as input\n"); EXPECT_BEGIN(KERN_INFO, - "GPIO line <> (line-A-input) hogged as input\n"); + "gpio-<> (line-A-input): hogged as input\n"); ret = platform_driver_register(&unittest_gpio_driver); if (unittest(ret == 0, "could not register unittest gpio driver\n")) return; EXPECT_END(KERN_INFO, - "GPIO line <> (line-A-input) hogged as input\n"); + "gpio-<> (line-A-input): hogged as input\n"); EXPECT_END(KERN_INFO, - "GPIO line <> (line-B-input) hogged as input\n"); + "gpio-<> (line-B-input): hogged as input\n"); unittest(probe_pass_count + 2 == unittest_gpio_probe_pass_count, "unittest_gpio_probe() failed or not called\n"); @@ -1721,7 +1730,7 @@ static void __init of_unittest_overlay_gpio(void) chip_request_count = unittest_gpio_chip_request_count; EXPECT_BEGIN(KERN_INFO, - "GPIO line <> (line-D-input) hogged as input\n"); + "gpio-<> (line-D-input): hogged as input\n"); /* overlay_gpio_03 contains gpio node and child gpio hog node */ @@ -1729,7 +1738,7 @@ static void __init of_unittest_overlay_gpio(void) "Adding overlay 'overlay_gpio_03' failed\n"); EXPECT_END(KERN_INFO, - "GPIO line <> (line-D-input) hogged as input\n"); + "gpio-<> (line-D-input): hogged as input\n"); unittest(probe_pass_count + 1 == unittest_gpio_probe_pass_count, "unittest_gpio_probe() failed or not called\n"); @@ -1768,7 +1777,7 @@ static void __init of_unittest_overlay_gpio(void) */ EXPECT_BEGIN(KERN_INFO, - "GPIO line <> (line-C-input) hogged as input\n"); + "gpio-<> (line-C-input): hogged as input\n"); /* overlay_gpio_04b contains child gpio hog node */ @@ -1776,7 +1785,7 @@ static void __init of_unittest_overlay_gpio(void) "Adding overlay 'overlay_gpio_04b' failed\n"); EXPECT_END(KERN_INFO, - "GPIO line <> (line-C-input) hogged as input\n"); + "gpio-<> (line-C-input): hogged as input\n"); unittest(chip_request_count + 1 == unittest_gpio_chip_request_count, "unittest_gpio_chip_request() called %d times (expected 1 time)\n", diff --git a/drivers/opp/of.c b/drivers/opp/of.c index b90e69b881c88ea3d0b27d3c712f7533ffae8c05..4468623689493118f078e7bcc4c70d42c2dab718 100644 --- a/drivers/opp/of.c +++ b/drivers/opp/of.c @@ -827,7 +827,7 @@ free_required_opps: free_opp: _opp_free(new_opp); - return ERR_PTR(ret); + return ret ? ERR_PTR(ret) : NULL; } /* Initializes OPP tables based on new bindings */ diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c index b5f9ee81a46c1e2753d5f9150bf696279e882fff..b916fab9b161816bf032ba5b47ab58dc07cc4692 100644 --- a/drivers/parisc/ccio-dma.c +++ b/drivers/parisc/ccio-dma.c @@ -1003,7 +1003,7 @@ ccio_unmap_sg(struct device *dev, struct scatterlist *sglist, int nents, ioc->usg_calls++; #endif - while(sg_dma_len(sglist) && nents--) { + while (nents && sg_dma_len(sglist)) { #ifdef CCIO_COLLECT_STATS ioc->usg_pages += sg_dma_len(sglist) >> PAGE_SHIFT; @@ -1011,6 +1011,7 @@ ccio_unmap_sg(struct device *dev, struct scatterlist *sglist, int nents, ccio_unmap_page(dev, sg_dma_address(sglist), sg_dma_len(sglist), direction, 0); ++sglist; + nents--; } DBG_RUN_SG("%s() DONE (nents %d)\n", __func__, nents); diff --git a/drivers/parisc/pdc_stable.c b/drivers/parisc/pdc_stable.c index e090978518f1a651195cad3e654fc6bde4900094..4760f82def6ec5738f1aada43b4323a200065098 100644 --- a/drivers/parisc/pdc_stable.c +++ b/drivers/parisc/pdc_stable.c @@ -979,8 +979,10 @@ pdcs_register_pathentries(void) entry->kobj.kset = paths_kset; err = kobject_init_and_add(&entry->kobj, &ktype_pdcspath, NULL, "%s", entry->name); - if (err) + if (err) { + kobject_put(&entry->kobj); return err; + } /* kobject is now registered */ write_lock(&entry->rw_lock); diff --git a/drivers/parisc/sba_iommu.c b/drivers/parisc/sba_iommu.c index dce4cdf786cdb19a607a79dc35a804067577d41d..228c58060e9b3810f7ceb9dc1939628ec6eb804e 100644 --- a/drivers/parisc/sba_iommu.c +++ b/drivers/parisc/sba_iommu.c @@ -1047,7 +1047,7 @@ sba_unmap_sg(struct device *dev, struct scatterlist *sglist, int nents, spin_unlock_irqrestore(&ioc->res_lock, flags); #endif - while (sg_dma_len(sglist) && nents--) { + while (nents && sg_dma_len(sglist)) { sba_unmap_page(dev, sg_dma_address(sglist), sg_dma_len(sglist), direction, 0); @@ -1056,6 +1056,7 @@ sba_unmap_sg(struct device *dev, struct scatterlist *sglist, int nents, ioc->usingle_calls--; /* kluge since call is unmap_sg() */ #endif ++sglist; + nents--; } DBG_RUN_SG("%s() DONE (nents %d)\n", __func__, nents); diff --git a/drivers/pci/controller/cadence/pcie-cadence-plat.c b/drivers/pci/controller/cadence/pcie-cadence-plat.c index 5fee0f89ab594888e55a154c5e133e11262b97d4..a224afadbcc005099efe20f3a429cec96183b2d0 100644 --- a/drivers/pci/controller/cadence/pcie-cadence-plat.c +++ b/drivers/pci/controller/cadence/pcie-cadence-plat.c @@ -127,6 +127,8 @@ static int cdns_plat_pcie_probe(struct platform_device *pdev) goto err_init; } + return 0; + err_init: err_get_sync: pm_runtime_put_sync(dev); diff --git a/drivers/pci/controller/dwc/pcie-uniphier.c b/drivers/pci/controller/dwc/pcie-uniphier.c index 48176265c867e417e8bb0a3ab8e9d8ca514625ea..527ec8aeb602f62cf1aea141cafc3c9dd5d691d0 100644 --- a/drivers/pci/controller/dwc/pcie-uniphier.c +++ b/drivers/pci/controller/dwc/pcie-uniphier.c @@ -171,30 +171,21 @@ static void uniphier_pcie_irq_enable(struct uniphier_pcie_priv *priv) writel(PCL_RCV_INTX_ALL_ENABLE, priv->base + PCL_RCV_INTX); } -static void uniphier_pcie_irq_ack(struct irq_data *d) -{ - struct pcie_port *pp = irq_data_get_irq_chip_data(d); - struct dw_pcie *pci = to_dw_pcie_from_pp(pp); - struct uniphier_pcie_priv *priv = to_uniphier_pcie(pci); - u32 val; - - val = readl(priv->base + PCL_RCV_INTX); - val &= ~PCL_RCV_INTX_ALL_STATUS; - val |= BIT(irqd_to_hwirq(d) + PCL_RCV_INTX_STATUS_SHIFT); - writel(val, priv->base + PCL_RCV_INTX); -} - static void uniphier_pcie_irq_mask(struct irq_data *d) { struct pcie_port *pp = irq_data_get_irq_chip_data(d); struct dw_pcie *pci = to_dw_pcie_from_pp(pp); struct uniphier_pcie_priv *priv = to_uniphier_pcie(pci); + unsigned long flags; u32 val; + raw_spin_lock_irqsave(&pp->lock, flags); + val = readl(priv->base + PCL_RCV_INTX); - val &= ~PCL_RCV_INTX_ALL_MASK; val |= BIT(irqd_to_hwirq(d) + PCL_RCV_INTX_MASK_SHIFT); writel(val, priv->base + PCL_RCV_INTX); + + raw_spin_unlock_irqrestore(&pp->lock, flags); } static void uniphier_pcie_irq_unmask(struct irq_data *d) @@ -202,17 +193,20 @@ static void uniphier_pcie_irq_unmask(struct irq_data *d) struct pcie_port *pp = irq_data_get_irq_chip_data(d); struct dw_pcie *pci = to_dw_pcie_from_pp(pp); struct uniphier_pcie_priv *priv = to_uniphier_pcie(pci); + unsigned long flags; u32 val; + raw_spin_lock_irqsave(&pp->lock, flags); + val = readl(priv->base + PCL_RCV_INTX); - val &= ~PCL_RCV_INTX_ALL_MASK; val &= ~BIT(irqd_to_hwirq(d) + PCL_RCV_INTX_MASK_SHIFT); writel(val, priv->base + PCL_RCV_INTX); + + raw_spin_unlock_irqrestore(&pp->lock, flags); } static struct irq_chip uniphier_pcie_irq_chip = { .name = "PCI", - .irq_ack = uniphier_pcie_irq_ack, .irq_mask = uniphier_pcie_irq_mask, .irq_unmask = uniphier_pcie_irq_unmask, }; diff --git a/drivers/pci/controller/pci-aardvark.c b/drivers/pci/controller/pci-aardvark.c index 4f1a29ede576ac3dd5183f7f7182b029b5de1cf1..f30144c8c0bd2c6ae5adf93a7837c10e80d08e97 100644 --- a/drivers/pci/controller/pci-aardvark.c +++ b/drivers/pci/controller/pci-aardvark.c @@ -30,9 +30,6 @@ /* PCIe core registers */ #define PCIE_CORE_DEV_ID_REG 0x0 #define PCIE_CORE_CMD_STATUS_REG 0x4 -#define PCIE_CORE_CMD_IO_ACCESS_EN BIT(0) -#define PCIE_CORE_CMD_MEM_ACCESS_EN BIT(1) -#define PCIE_CORE_CMD_MEM_IO_REQ_EN BIT(2) #define PCIE_CORE_DEV_REV_REG 0x8 #define PCIE_CORE_PCIEXP_CAP 0xc0 #define PCIE_CORE_ERR_CAPCTL_REG 0x118 @@ -98,6 +95,7 @@ #define PCIE_CORE_CTRL2_MSI_ENABLE BIT(10) #define PCIE_CORE_REF_CLK_REG (CONTROL_BASE_ADDR + 0x14) #define PCIE_CORE_REF_CLK_TX_ENABLE BIT(1) +#define PCIE_CORE_REF_CLK_RX_ENABLE BIT(2) #define PCIE_MSG_LOG_REG (CONTROL_BASE_ADDR + 0x30) #define PCIE_ISR0_REG (CONTROL_BASE_ADDR + 0x40) #define PCIE_MSG_PM_PME_MASK BIT(7) @@ -105,18 +103,19 @@ #define PCIE_ISR0_MSI_INT_PENDING BIT(24) #define PCIE_ISR0_INTX_ASSERT(val) BIT(16 + (val)) #define PCIE_ISR0_INTX_DEASSERT(val) BIT(20 + (val)) -#define PCIE_ISR0_ALL_MASK GENMASK(26, 0) +#define PCIE_ISR0_ALL_MASK GENMASK(31, 0) #define PCIE_ISR1_REG (CONTROL_BASE_ADDR + 0x48) #define PCIE_ISR1_MASK_REG (CONTROL_BASE_ADDR + 0x4C) #define PCIE_ISR1_POWER_STATE_CHANGE BIT(4) #define PCIE_ISR1_FLUSH BIT(5) #define PCIE_ISR1_INTX_ASSERT(val) BIT(8 + (val)) -#define PCIE_ISR1_ALL_MASK GENMASK(11, 4) +#define PCIE_ISR1_ALL_MASK GENMASK(31, 0) #define PCIE_MSI_ADDR_LOW_REG (CONTROL_BASE_ADDR + 0x50) #define PCIE_MSI_ADDR_HIGH_REG (CONTROL_BASE_ADDR + 0x54) #define PCIE_MSI_STATUS_REG (CONTROL_BASE_ADDR + 0x58) #define PCIE_MSI_MASK_REG (CONTROL_BASE_ADDR + 0x5C) #define PCIE_MSI_PAYLOAD_REG (CONTROL_BASE_ADDR + 0x9C) +#define PCIE_MSI_DATA_MASK GENMASK(15, 0) /* PCIe window configuration */ #define OB_WIN_BASE_ADDR 0x4c00 @@ -163,8 +162,50 @@ #define CFG_REG (LMI_BASE_ADDR + 0x0) #define LTSSM_SHIFT 24 #define LTSSM_MASK 0x3f -#define LTSSM_L0 0x10 #define RC_BAR_CONFIG 0x300 + +/* LTSSM values in CFG_REG */ +enum { + LTSSM_DETECT_QUIET = 0x0, + LTSSM_DETECT_ACTIVE = 0x1, + LTSSM_POLLING_ACTIVE = 0x2, + LTSSM_POLLING_COMPLIANCE = 0x3, + LTSSM_POLLING_CONFIGURATION = 0x4, + LTSSM_CONFIG_LINKWIDTH_START = 0x5, + LTSSM_CONFIG_LINKWIDTH_ACCEPT = 0x6, + LTSSM_CONFIG_LANENUM_ACCEPT = 0x7, + LTSSM_CONFIG_LANENUM_WAIT = 0x8, + LTSSM_CONFIG_COMPLETE = 0x9, + LTSSM_CONFIG_IDLE = 0xa, + LTSSM_RECOVERY_RCVR_LOCK = 0xb, + LTSSM_RECOVERY_SPEED = 0xc, + LTSSM_RECOVERY_RCVR_CFG = 0xd, + LTSSM_RECOVERY_IDLE = 0xe, + LTSSM_L0 = 0x10, + LTSSM_RX_L0S_ENTRY = 0x11, + LTSSM_RX_L0S_IDLE = 0x12, + LTSSM_RX_L0S_FTS = 0x13, + LTSSM_TX_L0S_ENTRY = 0x14, + LTSSM_TX_L0S_IDLE = 0x15, + LTSSM_TX_L0S_FTS = 0x16, + LTSSM_L1_ENTRY = 0x17, + LTSSM_L1_IDLE = 0x18, + LTSSM_L2_IDLE = 0x19, + LTSSM_L2_TRANSMIT_WAKE = 0x1a, + LTSSM_DISABLED = 0x20, + LTSSM_LOOPBACK_ENTRY_MASTER = 0x21, + LTSSM_LOOPBACK_ACTIVE_MASTER = 0x22, + LTSSM_LOOPBACK_EXIT_MASTER = 0x23, + LTSSM_LOOPBACK_ENTRY_SLAVE = 0x24, + LTSSM_LOOPBACK_ACTIVE_SLAVE = 0x25, + LTSSM_LOOPBACK_EXIT_SLAVE = 0x26, + LTSSM_HOT_RESET = 0x27, + LTSSM_RECOVERY_EQUALIZATION_PHASE0 = 0x28, + LTSSM_RECOVERY_EQUALIZATION_PHASE1 = 0x29, + LTSSM_RECOVERY_EQUALIZATION_PHASE2 = 0x2a, + LTSSM_RECOVERY_EQUALIZATION_PHASE3 = 0x2b, +}; + #define VENDOR_ID_REG (LMI_BASE_ADDR + 0x44) /* PCIe core controller registers */ @@ -197,7 +238,7 @@ #define PCIE_IRQ_MSI_INT2_DET BIT(21) #define PCIE_IRQ_RC_DBELL_DET BIT(22) #define PCIE_IRQ_EP_STATUS BIT(23) -#define PCIE_IRQ_ALL_MASK 0xfff0fb +#define PCIE_IRQ_ALL_MASK GENMASK(31, 0) #define PCIE_IRQ_ENABLE_INTS_MASK PCIE_IRQ_CORE_INT /* Transaction types */ @@ -264,18 +305,49 @@ static inline u32 advk_readl(struct advk_pcie *pcie, u64 reg) return readl(pcie->base + reg); } -static inline u16 advk_read16(struct advk_pcie *pcie, u64 reg) +static u8 advk_pcie_ltssm_state(struct advk_pcie *pcie) { - return advk_readl(pcie, (reg & ~0x3)) >> ((reg & 0x3) * 8); + u32 val; + u8 ltssm_state; + + val = advk_readl(pcie, CFG_REG); + ltssm_state = (val >> LTSSM_SHIFT) & LTSSM_MASK; + return ltssm_state; } -static int advk_pcie_link_up(struct advk_pcie *pcie) +static inline bool advk_pcie_link_up(struct advk_pcie *pcie) { - u32 val, ltssm_state; + /* check if LTSSM is in normal operation - some L* state */ + u8 ltssm_state = advk_pcie_ltssm_state(pcie); + return ltssm_state >= LTSSM_L0 && ltssm_state < LTSSM_DISABLED; +} - val = advk_readl(pcie, CFG_REG); - ltssm_state = (val >> LTSSM_SHIFT) & LTSSM_MASK; - return ltssm_state >= LTSSM_L0; +static inline bool advk_pcie_link_active(struct advk_pcie *pcie) +{ + /* + * According to PCIe Base specification 3.0, Table 4-14: Link + * Status Mapped to the LTSSM, and 4.2.6.3.6 Configuration.Idle + * is Link Up mapped to LTSSM Configuration.Idle, Recovery, L0, + * L0s, L1 and L2 states. And according to 3.2.1. Data Link + * Control and Management State Machine Rules is DL Up status + * reported in DL Active state. + */ + u8 ltssm_state = advk_pcie_ltssm_state(pcie); + return ltssm_state >= LTSSM_CONFIG_IDLE && ltssm_state < LTSSM_DISABLED; +} + +static inline bool advk_pcie_link_training(struct advk_pcie *pcie) +{ + /* + * According to PCIe Base specification 3.0, Table 4-14: Link + * Status Mapped to the LTSSM is Link Training mapped to LTSSM + * Configuration and Recovery states. + */ + u8 ltssm_state = advk_pcie_ltssm_state(pcie); + return ((ltssm_state >= LTSSM_CONFIG_LINKWIDTH_START && + ltssm_state < LTSSM_L0) || + (ltssm_state >= LTSSM_RECOVERY_EQUALIZATION_PHASE0 && + ltssm_state <= LTSSM_RECOVERY_EQUALIZATION_PHASE3)); } static int advk_pcie_wait_for_link(struct advk_pcie *pcie) @@ -298,7 +370,7 @@ static void advk_pcie_wait_for_retrain(struct advk_pcie *pcie) size_t retries; for (retries = 0; retries < RETRAIN_WAIT_MAX_RETRIES; ++retries) { - if (!advk_pcie_link_up(pcie)) + if (advk_pcie_link_training(pcie)) break; udelay(RETRAIN_WAIT_USLEEP_US); } @@ -306,16 +378,9 @@ static void advk_pcie_wait_for_retrain(struct advk_pcie *pcie) static void advk_pcie_issue_perst(struct advk_pcie *pcie) { - u32 reg; - if (!pcie->reset_gpio) return; - /* PERST does not work for some cards when link training is enabled */ - reg = advk_readl(pcie, PCIE_CORE_CTRL0_REG); - reg &= ~LINK_TRAINING_EN; - advk_writel(pcie, reg, PCIE_CORE_CTRL0_REG); - /* 10ms delay is needed for some cards */ dev_info(&pcie->pdev->dev, "issuing PERST via reset GPIO for 10ms\n"); gpiod_set_value_cansleep(pcie->reset_gpio, 1); @@ -323,53 +388,46 @@ static void advk_pcie_issue_perst(struct advk_pcie *pcie) gpiod_set_value_cansleep(pcie->reset_gpio, 0); } -static int advk_pcie_train_at_gen(struct advk_pcie *pcie, int gen) +static void advk_pcie_train_link(struct advk_pcie *pcie) { - int ret, neg_gen; + struct device *dev = &pcie->pdev->dev; u32 reg; + int ret; - /* Setup link speed */ + /* + * Setup PCIe rev / gen compliance based on device tree property + * 'max-link-speed' which also forces maximal link speed. + */ reg = advk_readl(pcie, PCIE_CORE_CTRL0_REG); reg &= ~PCIE_GEN_SEL_MSK; - if (gen == 3) + if (pcie->link_gen == 3) reg |= SPEED_GEN_3; - else if (gen == 2) + else if (pcie->link_gen == 2) reg |= SPEED_GEN_2; else reg |= SPEED_GEN_1; advk_writel(pcie, reg, PCIE_CORE_CTRL0_REG); /* - * Enable link training. This is not needed in every call to this - * function, just once suffices, but it does not break anything either. + * Set maximal link speed value also into PCIe Link Control 2 register. + * Armada 3700 Functional Specification says that default value is based + * on SPEED_GEN but tests showed that default value is always 8.0 GT/s. */ + reg = advk_readl(pcie, PCIE_CORE_PCIEXP_CAP + PCI_EXP_LNKCTL2); + reg &= ~PCI_EXP_LNKCTL2_TLS; + if (pcie->link_gen == 3) + reg |= PCI_EXP_LNKCTL2_TLS_8_0GT; + else if (pcie->link_gen == 2) + reg |= PCI_EXP_LNKCTL2_TLS_5_0GT; + else + reg |= PCI_EXP_LNKCTL2_TLS_2_5GT; + advk_writel(pcie, reg, PCIE_CORE_PCIEXP_CAP + PCI_EXP_LNKCTL2); + + /* Enable link training after selecting PCIe generation */ reg = advk_readl(pcie, PCIE_CORE_CTRL0_REG); reg |= LINK_TRAINING_EN; advk_writel(pcie, reg, PCIE_CORE_CTRL0_REG); - /* - * Start link training immediately after enabling it. - * This solves problems for some buggy cards. - */ - reg = advk_readl(pcie, PCIE_CORE_PCIEXP_CAP + PCI_EXP_LNKCTL); - reg |= PCI_EXP_LNKCTL_RL; - advk_writel(pcie, reg, PCIE_CORE_PCIEXP_CAP + PCI_EXP_LNKCTL); - - ret = advk_pcie_wait_for_link(pcie); - if (ret) - return ret; - - reg = advk_read16(pcie, PCIE_CORE_PCIEXP_CAP + PCI_EXP_LNKSTA); - neg_gen = reg & PCI_EXP_LNKSTA_CLS; - - return neg_gen; -} - -static void advk_pcie_train_link(struct advk_pcie *pcie) -{ - struct device *dev = &pcie->pdev->dev; - int neg_gen = -1, gen; - /* * Reset PCIe card via PERST# signal. Some cards are not detected * during link training when they are in some non-initial state. @@ -380,41 +438,18 @@ static void advk_pcie_train_link(struct advk_pcie *pcie) * PERST# signal could have been asserted by pinctrl subsystem before * probe() callback has been called or issued explicitly by reset gpio * function advk_pcie_issue_perst(), making the endpoint going into - * fundamental reset. As required by PCI Express spec a delay for at - * least 100ms after such a reset before link training is needed. + * fundamental reset. As required by PCI Express spec (PCI Express + * Base Specification, REV. 4.0 PCI Express, February 19 2014, 6.6.1 + * Conventional Reset) a delay for at least 100ms after such a reset + * before sending a Configuration Request to the device is needed. + * So wait until PCIe link is up. Function advk_pcie_wait_for_link() + * waits for link at least 900ms. */ - msleep(PCI_PM_D3COLD_WAIT); - - /* - * Try link training at link gen specified by device tree property - * 'max-link-speed'. If this fails, iteratively train at lower gen. - */ - for (gen = pcie->link_gen; gen > 0; --gen) { - neg_gen = advk_pcie_train_at_gen(pcie, gen); - if (neg_gen > 0) - break; - } - - if (neg_gen < 0) - goto err; - - /* - * After successful training if negotiated gen is lower than requested, - * train again on negotiated gen. This solves some stability issues for - * some buggy gen1 cards. - */ - if (neg_gen < gen) { - gen = neg_gen; - neg_gen = advk_pcie_train_at_gen(pcie, gen); - } - - if (neg_gen == gen) { - dev_info(dev, "link up at gen %i\n", gen); - return; - } - -err: - dev_err(dev, "link never came up\n"); + ret = advk_pcie_wait_for_link(pcie); + if (ret < 0) + dev_err(dev, "link never came up\n"); + else + dev_info(dev, "link up\n"); } /* @@ -451,9 +486,15 @@ static void advk_pcie_setup_hw(struct advk_pcie *pcie) u32 reg; int i; - /* Enable TX */ + /* + * Configure PCIe Reference clock. Direction is from the PCIe + * controller to the endpoint card, so enable transmitting of + * Reference clock differential signal off-chip and disable + * receiving off-chip differential signal. + */ reg = advk_readl(pcie, PCIE_CORE_REF_CLK_REG); reg |= PCIE_CORE_REF_CLK_TX_ENABLE; + reg &= ~PCIE_CORE_REF_CLK_RX_ENABLE; advk_writel(pcie, reg, PCIE_CORE_REF_CLK_REG); /* Set to Direct mode */ @@ -477,6 +518,31 @@ static void advk_pcie_setup_hw(struct advk_pcie *pcie) reg = (PCI_VENDOR_ID_MARVELL << 16) | PCI_VENDOR_ID_MARVELL; advk_writel(pcie, reg, VENDOR_ID_REG); + /* + * Change Class Code of PCI Bridge device to PCI Bridge (0x600400), + * because the default value is Mass storage controller (0x010400). + * + * Note that this Aardvark PCI Bridge does not have compliant Type 1 + * Configuration Space and it even cannot be accessed via Aardvark's + * PCI config space access method. Something like config space is + * available in internal Aardvark registers starting at offset 0x0 + * and is reported as Type 0. In range 0x10 - 0x34 it has totally + * different registers. + * + * Therefore driver uses emulation of PCI Bridge which emulates + * access to configuration space via internal Aardvark registers or + * emulated configuration buffer. + */ + reg = advk_readl(pcie, PCIE_CORE_DEV_REV_REG); + reg &= ~0xffffff00; + reg |= (PCI_CLASS_BRIDGE_PCI << 8) << 8; + advk_writel(pcie, reg, PCIE_CORE_DEV_REV_REG); + + /* Disable Root Bridge I/O space, memory space and bus mastering */ + reg = advk_readl(pcie, PCIE_CORE_CMD_STATUS_REG); + reg &= ~(PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER); + advk_writel(pcie, reg, PCIE_CORE_CMD_STATUS_REG); + /* Set Advanced Error Capabilities and Control PF0 register */ reg = PCIE_CORE_ERR_CAPCTL_ECRC_CHK_TX | PCIE_CORE_ERR_CAPCTL_ECRC_CHK_TX_EN | @@ -488,8 +554,9 @@ static void advk_pcie_setup_hw(struct advk_pcie *pcie) reg = advk_readl(pcie, PCIE_CORE_PCIEXP_CAP + PCI_EXP_DEVCTL); reg &= ~PCI_EXP_DEVCTL_RELAX_EN; reg &= ~PCI_EXP_DEVCTL_NOSNOOP_EN; + reg &= ~PCI_EXP_DEVCTL_PAYLOAD; reg &= ~PCI_EXP_DEVCTL_READRQ; - reg |= PCI_EXP_DEVCTL_PAYLOAD; /* Set max payload size */ + reg |= PCI_EXP_DEVCTL_PAYLOAD_512B; reg |= PCI_EXP_DEVCTL_READRQ_512B; advk_writel(pcie, reg, PCIE_CORE_PCIEXP_CAP + PCI_EXP_DEVCTL); @@ -574,19 +641,6 @@ static void advk_pcie_setup_hw(struct advk_pcie *pcie) advk_pcie_disable_ob_win(pcie, i); advk_pcie_train_link(pcie); - - /* - * FIXME: The following register update is suspicious. This register is - * applicable only when the PCI controller is configured for Endpoint - * mode, not as a Root Complex. But apparently when this code is - * removed, some cards stop working. This should be investigated and - * a comment explaining this should be put here. - */ - reg = advk_readl(pcie, PCIE_CORE_CMD_STATUS_REG); - reg |= PCIE_CORE_CMD_MEM_ACCESS_EN | - PCIE_CORE_CMD_IO_ACCESS_EN | - PCIE_CORE_CMD_MEM_IO_REQ_EN; - advk_writel(pcie, reg, PCIE_CORE_CMD_STATUS_REG); } static int advk_pcie_check_pio_status(struct advk_pcie *pcie, bool allow_crs, u32 *val) @@ -595,6 +649,7 @@ static int advk_pcie_check_pio_status(struct advk_pcie *pcie, bool allow_crs, u3 u32 reg; unsigned int status; char *strcomp_status, *str_posted; + int ret; reg = advk_readl(pcie, PIO_STAT); status = (reg & PIO_COMPLETION_STATUS_MASK) >> @@ -619,6 +674,7 @@ static int advk_pcie_check_pio_status(struct advk_pcie *pcie, bool allow_crs, u3 case PIO_COMPLETION_STATUS_OK: if (reg & PIO_ERR_STATUS) { strcomp_status = "COMP_ERR"; + ret = -EFAULT; break; } /* Get the read result */ @@ -626,9 +682,11 @@ static int advk_pcie_check_pio_status(struct advk_pcie *pcie, bool allow_crs, u3 *val = advk_readl(pcie, PIO_RD_DATA); /* No error */ strcomp_status = NULL; + ret = 0; break; case PIO_COMPLETION_STATUS_UR: strcomp_status = "UR"; + ret = -EOPNOTSUPP; break; case PIO_COMPLETION_STATUS_CRS: if (allow_crs && val) { @@ -646,6 +704,7 @@ static int advk_pcie_check_pio_status(struct advk_pcie *pcie, bool allow_crs, u3 */ *val = CFG_RD_CRS_VAL; strcomp_status = NULL; + ret = 0; break; } /* PCIe r4.0, sec 2.3.2, says: @@ -661,31 +720,34 @@ static int advk_pcie_check_pio_status(struct advk_pcie *pcie, bool allow_crs, u3 * Request and taking appropriate action, e.g., complete the * Request to the host as a failed transaction. * - * To simplify implementation do not re-issue the Configuration - * Request and complete the Request as a failed transaction. + * So return -EAGAIN and caller (pci-aardvark.c driver) will + * re-issue request again up to the PIO_RETRY_CNT retries. */ strcomp_status = "CRS"; + ret = -EAGAIN; break; case PIO_COMPLETION_STATUS_CA: strcomp_status = "CA"; + ret = -ECANCELED; break; default: strcomp_status = "Unknown"; + ret = -EINVAL; break; } if (!strcomp_status) - return 0; + return ret; if (reg & PIO_NON_POSTED_REQ) str_posted = "Non-posted"; else str_posted = "Posted"; - dev_err(dev, "%s PIO Response Status: %s, %#x @ %#x\n", + dev_dbg(dev, "%s PIO Response Status: %s, %#x @ %#x\n", str_posted, strcomp_status, reg, advk_readl(pcie, PIO_ADDR_LS)); - return -EFAULT; + return ret; } static int advk_pcie_wait_pio(struct advk_pcie *pcie) @@ -693,13 +755,13 @@ static int advk_pcie_wait_pio(struct advk_pcie *pcie) struct device *dev = &pcie->pdev->dev; int i; - for (i = 0; i < PIO_RETRY_CNT; i++) { + for (i = 1; i <= PIO_RETRY_CNT; i++) { u32 start, isr; start = advk_readl(pcie, PIO_START); isr = advk_readl(pcie, PIO_ISR); if (!start && isr) - return 0; + return i; udelay(PIO_RETRY_DELAY); } @@ -707,6 +769,64 @@ static int advk_pcie_wait_pio(struct advk_pcie *pcie) return -ETIMEDOUT; } +static pci_bridge_emul_read_status_t +advk_pci_bridge_emul_base_conf_read(struct pci_bridge_emul *bridge, + int reg, u32 *value) +{ + struct advk_pcie *pcie = bridge->data; + + switch (reg) { + case PCI_COMMAND: + *value = advk_readl(pcie, PCIE_CORE_CMD_STATUS_REG); + return PCI_BRIDGE_EMUL_HANDLED; + + case PCI_INTERRUPT_LINE: { + /* + * From the whole 32bit register we support reading from HW only + * one bit: PCI_BRIDGE_CTL_BUS_RESET. + * Other bits are retrieved only from emulated config buffer. + */ + __le32 *cfgspace = (__le32 *)&bridge->conf; + u32 val = le32_to_cpu(cfgspace[PCI_INTERRUPT_LINE / 4]); + if (advk_readl(pcie, PCIE_CORE_CTRL1_REG) & HOT_RESET_GEN) + val |= PCI_BRIDGE_CTL_BUS_RESET << 16; + else + val &= ~(PCI_BRIDGE_CTL_BUS_RESET << 16); + *value = val; + return PCI_BRIDGE_EMUL_HANDLED; + } + + default: + return PCI_BRIDGE_EMUL_NOT_HANDLED; + } +} + +static void +advk_pci_bridge_emul_base_conf_write(struct pci_bridge_emul *bridge, + int reg, u32 old, u32 new, u32 mask) +{ + struct advk_pcie *pcie = bridge->data; + + switch (reg) { + case PCI_COMMAND: + advk_writel(pcie, new, PCIE_CORE_CMD_STATUS_REG); + break; + + case PCI_INTERRUPT_LINE: + if (mask & (PCI_BRIDGE_CTL_BUS_RESET << 16)) { + u32 val = advk_readl(pcie, PCIE_CORE_CTRL1_REG); + if (new & (PCI_BRIDGE_CTL_BUS_RESET << 16)) + val |= HOT_RESET_GEN; + else + val &= ~HOT_RESET_GEN; + advk_writel(pcie, val, PCIE_CORE_CTRL1_REG); + } + break; + + default: + break; + } +} static pci_bridge_emul_read_status_t advk_pci_bridge_emul_pcie_conf_read(struct pci_bridge_emul *bridge, @@ -723,6 +843,7 @@ advk_pci_bridge_emul_pcie_conf_read(struct pci_bridge_emul *bridge, case PCI_EXP_RTCTL: { u32 val = advk_readl(pcie, PCIE_ISR0_MASK_REG); *value = (val & PCIE_MSG_PM_PME_MASK) ? 0 : PCI_EXP_RTCTL_PMEIE; + *value |= le16_to_cpu(bridge->pcie_conf.rootctl) & PCI_EXP_RTCTL_CRSSVE; *value |= PCI_EXP_RTCAP_CRSVIS << 16; return PCI_BRIDGE_EMUL_HANDLED; } @@ -734,20 +855,32 @@ advk_pci_bridge_emul_pcie_conf_read(struct pci_bridge_emul *bridge, return PCI_BRIDGE_EMUL_HANDLED; } + case PCI_EXP_LNKCAP: { + u32 val = advk_readl(pcie, PCIE_CORE_PCIEXP_CAP + reg); + /* + * PCI_EXP_LNKCAP_DLLLARC bit is hardwired in aardvark HW to 0. + * But support for PCI_EXP_LNKSTA_DLLLA is emulated via ltssm + * state so explicitly enable PCI_EXP_LNKCAP_DLLLARC flag. + */ + val |= PCI_EXP_LNKCAP_DLLLARC; + *value = val; + return PCI_BRIDGE_EMUL_HANDLED; + } + case PCI_EXP_LNKCTL: { /* u32 contains both PCI_EXP_LNKCTL and PCI_EXP_LNKSTA */ u32 val = advk_readl(pcie, PCIE_CORE_PCIEXP_CAP + reg) & ~(PCI_EXP_LNKSTA_LT << 16); - if (!advk_pcie_link_up(pcie)) + if (advk_pcie_link_training(pcie)) val |= (PCI_EXP_LNKSTA_LT << 16); + if (advk_pcie_link_active(pcie)) + val |= (PCI_EXP_LNKSTA_DLLLA << 16); *value = val; return PCI_BRIDGE_EMUL_HANDLED; } - case PCI_CAP_LIST_ID: case PCI_EXP_DEVCAP: case PCI_EXP_DEVCTL: - case PCI_EXP_LNKCAP: *value = advk_readl(pcie, PCIE_CORE_PCIEXP_CAP + reg); return PCI_BRIDGE_EMUL_HANDLED; default: @@ -794,6 +927,8 @@ advk_pci_bridge_emul_pcie_conf_write(struct pci_bridge_emul *bridge, } static struct pci_bridge_emul_ops advk_pci_bridge_emul_ops = { + .read_base = advk_pci_bridge_emul_base_conf_read, + .write_base = advk_pci_bridge_emul_base_conf_write, .read_pcie = advk_pci_bridge_emul_pcie_conf_read, .write_pcie = advk_pci_bridge_emul_pcie_conf_write, }; @@ -805,7 +940,6 @@ static struct pci_bridge_emul_ops advk_pci_bridge_emul_ops = { static int advk_sw_pci_bridge_init(struct advk_pcie *pcie) { struct pci_bridge_emul *bridge = &pcie->bridge; - int ret; bridge->conf.vendor = cpu_to_le16(advk_readl(pcie, PCIE_CORE_DEV_ID_REG) & 0xffff); @@ -825,19 +959,17 @@ static int advk_sw_pci_bridge_init(struct advk_pcie *pcie) /* Support interrupt A for MSI feature */ bridge->conf.intpin = PCIE_CORE_INT_A_ASSERT_ENABLE; - bridge->has_pcie = true; - bridge->data = pcie; - bridge->ops = &advk_pci_bridge_emul_ops; - - /* PCIe config space can be initialized after pci_bridge_emul_init() */ - ret = pci_bridge_emul_init(bridge, 0); - if (ret < 0) - return ret; + /* Aardvark HW provides PCIe Capability structure in version 2 */ + bridge->pcie_conf.cap = cpu_to_le16(2); /* Indicates supports for Completion Retry Status */ bridge->pcie_conf.rootcap = cpu_to_le16(PCI_EXP_RTCAP_CRSVIS); - return 0; + bridge->has_pcie = true; + bridge->data = pcie; + bridge->ops = &advk_pci_bridge_emul_ops; + + return pci_bridge_emul_init(bridge, 0); } static bool advk_pcie_valid_device(struct advk_pcie *pcie, struct pci_bus *bus, @@ -889,6 +1021,7 @@ static int advk_pcie_rd_conf(struct pci_bus *bus, u32 devfn, int where, int size, u32 *val) { struct advk_pcie *pcie = bus->sysdata; + int retry_count; bool allow_crs; u32 reg; int ret; @@ -911,18 +1044,8 @@ static int advk_pcie_rd_conf(struct pci_bus *bus, u32 devfn, (le16_to_cpu(pcie->bridge.pcie_conf.rootctl) & PCI_EXP_RTCTL_CRSSVE); - if (advk_pcie_pio_is_running(pcie)) { - /* - * If it is possible return Completion Retry Status so caller - * tries to issue the request again instead of failing. - */ - if (allow_crs) { - *val = CFG_RD_CRS_VAL; - return PCIBIOS_SUCCESSFUL; - } - *val = 0xffffffff; - return PCIBIOS_SET_FAILED; - } + if (advk_pcie_pio_is_running(pcie)) + goto try_crs; /* Program the control register */ reg = advk_readl(pcie, PIO_CTRL); @@ -941,30 +1064,24 @@ static int advk_pcie_rd_conf(struct pci_bus *bus, u32 devfn, /* Program the data strobe */ advk_writel(pcie, 0xf, PIO_WR_DATA_STRB); - /* Clear PIO DONE ISR and start the transfer */ - advk_writel(pcie, 1, PIO_ISR); - advk_writel(pcie, 1, PIO_START); + retry_count = 0; + do { + /* Clear PIO DONE ISR and start the transfer */ + advk_writel(pcie, 1, PIO_ISR); + advk_writel(pcie, 1, PIO_START); - ret = advk_pcie_wait_pio(pcie); - if (ret < 0) { - /* - * If it is possible return Completion Retry Status so caller - * tries to issue the request again instead of failing. - */ - if (allow_crs) { - *val = CFG_RD_CRS_VAL; - return PCIBIOS_SUCCESSFUL; - } - *val = 0xffffffff; - return PCIBIOS_SET_FAILED; - } + ret = advk_pcie_wait_pio(pcie); + if (ret < 0) + goto try_crs; - /* Check PIO status and get the read result */ - ret = advk_pcie_check_pio_status(pcie, allow_crs, val); - if (ret < 0) { - *val = 0xffffffff; - return PCIBIOS_SET_FAILED; - } + retry_count += ret; + + /* Check PIO status and get the read result */ + ret = advk_pcie_check_pio_status(pcie, allow_crs, val); + } while (ret == -EAGAIN && retry_count < PIO_RETRY_CNT); + + if (ret < 0) + goto fail; if (size == 1) *val = (*val >> (8 * (where & 3))) & 0xff; @@ -972,6 +1089,20 @@ static int advk_pcie_rd_conf(struct pci_bus *bus, u32 devfn, *val = (*val >> (8 * (where & 3))) & 0xffff; return PCIBIOS_SUCCESSFUL; + +try_crs: + /* + * If it is possible, return Completion Retry Status so that caller + * tries to issue the request again instead of failing. + */ + if (allow_crs) { + *val = CFG_RD_CRS_VAL; + return PCIBIOS_SUCCESSFUL; + } + +fail: + *val = 0xffffffff; + return PCIBIOS_SET_FAILED; } static int advk_pcie_wr_conf(struct pci_bus *bus, u32 devfn, @@ -980,6 +1111,7 @@ static int advk_pcie_wr_conf(struct pci_bus *bus, u32 devfn, struct advk_pcie *pcie = bus->sysdata; u32 reg; u32 data_strobe = 0x0; + int retry_count; int offset; int ret; @@ -1021,19 +1153,22 @@ static int advk_pcie_wr_conf(struct pci_bus *bus, u32 devfn, /* Program the data strobe */ advk_writel(pcie, data_strobe, PIO_WR_DATA_STRB); - /* Clear PIO DONE ISR and start the transfer */ - advk_writel(pcie, 1, PIO_ISR); - advk_writel(pcie, 1, PIO_START); + retry_count = 0; + do { + /* Clear PIO DONE ISR and start the transfer */ + advk_writel(pcie, 1, PIO_ISR); + advk_writel(pcie, 1, PIO_START); - ret = advk_pcie_wait_pio(pcie); - if (ret < 0) - return PCIBIOS_SET_FAILED; + ret = advk_pcie_wait_pio(pcie); + if (ret < 0) + return PCIBIOS_SET_FAILED; - ret = advk_pcie_check_pio_status(pcie, false, NULL); - if (ret < 0) - return PCIBIOS_SET_FAILED; + retry_count += ret; - return PCIBIOS_SUCCESSFUL; + ret = advk_pcie_check_pio_status(pcie, false, NULL); + } while (ret == -EAGAIN && retry_count < PIO_RETRY_CNT); + + return ret < 0 ? PCIBIOS_SET_FAILED : PCIBIOS_SUCCESSFUL; } static struct pci_ops advk_pcie_ops = { @@ -1082,7 +1217,7 @@ static int advk_msi_irq_domain_alloc(struct irq_domain *domain, domain->host_data, handle_simple_irq, NULL, NULL); - return hwirq; + return 0; } static void advk_msi_irq_domain_free(struct irq_domain *domain, @@ -1263,8 +1398,12 @@ static void advk_pcie_handle_msi(struct advk_pcie *pcie) if (!(BIT(msi_idx) & msi_status)) continue; + /* + * msi_idx contains bits [4:0] of the msi_data and msi_data + * contains 16bit MSI interrupt number + */ advk_writel(pcie, BIT(msi_idx), PCIE_MSI_STATUS_REG); - msi_data = advk_readl(pcie, PCIE_MSI_PAYLOAD_REG) & 0xFF; + msi_data = advk_readl(pcie, PCIE_MSI_PAYLOAD_REG) & PCIE_MSI_DATA_MASK; generic_handle_irq(msi_data); } @@ -1286,12 +1425,6 @@ static void advk_pcie_handle_int(struct advk_pcie *pcie) isr1_mask = advk_readl(pcie, PCIE_ISR1_MASK_REG); isr1_status = isr1_val & ((~isr1_mask) & PCIE_ISR1_ALL_MASK); - if (!isr0_status && !isr1_status) { - advk_writel(pcie, isr0_val, PCIE_ISR0_REG); - advk_writel(pcie, isr1_val, PCIE_ISR1_REG); - return; - } - /* Process MSI interrupts */ if (isr0_status & PCIE_ISR0_MSI_INT_PENDING) advk_pcie_handle_msi(pcie); diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index b599b31ecab3deb6324cf9094ef39944969dbdd5..308f5c981e4e9c58e1feb70354f0ec8a7678c273 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -1840,8 +1840,17 @@ static void hv_pci_assign_numa_node(struct hv_pcibus_device *hbus) if (!hv_dev) continue; - if (hv_dev->desc.flags & HV_PCI_DEVICE_FLAG_NUMA_AFFINITY) - set_dev_node(&dev->dev, hv_dev->desc.virtual_numa_node); + if (hv_dev->desc.flags & HV_PCI_DEVICE_FLAG_NUMA_AFFINITY && + hv_dev->desc.virtual_numa_node < num_possible_nodes()) + /* + * The kernel may boot with some NUMA nodes offline + * (e.g. in a KDUMP kernel) or with NUMA disabled via + * "numa=off". In those cases, adjust the host provided + * NUMA node to a valid NUMA node used by the kernel. + */ + set_dev_node(&dev->dev, + numa_map_to_online_node( + hv_dev->desc.virtual_numa_node)); put_pcichild(hv_dev); } @@ -3255,9 +3264,17 @@ static int hv_pci_bus_exit(struct hv_device *hdev, bool keep_devs) return 0; if (!keep_devs) { - /* Delete any children which might still exist. */ + struct list_head removed; + + /* Move all present children to the list on stack */ + INIT_LIST_HEAD(&removed); spin_lock_irqsave(&hbus->device_list_lock, flags); - list_for_each_entry_safe(hpdev, tmp, &hbus->children, list_entry) { + list_for_each_entry_safe(hpdev, tmp, &hbus->children, list_entry) + list_move_tail(&hpdev->list_entry, &removed); + spin_unlock_irqrestore(&hbus->device_list_lock, flags); + + /* Remove all children in the list */ + list_for_each_entry_safe(hpdev, tmp, &removed, list_entry) { list_del(&hpdev->list_entry); if (hpdev->pci_slot) pci_destroy_slot(hpdev->pci_slot); @@ -3265,7 +3282,6 @@ static int hv_pci_bus_exit(struct hv_device *hdev, bool keep_devs) put_pcichild(hpdev); put_pcichild(hpdev); } - spin_unlock_irqrestore(&hbus->device_list_lock, flags); } ret = hv_send_resources_released(hdev); diff --git a/drivers/pci/controller/pci-mvebu.c b/drivers/pci/controller/pci-mvebu.c index ed13e81cd691d461708f2d7b798037fee15f8449..2dc6890dbcaa263471714d7989b4185b90e6b738 100644 --- a/drivers/pci/controller/pci-mvebu.c +++ b/drivers/pci/controller/pci-mvebu.c @@ -573,6 +573,8 @@ static struct pci_bridge_emul_ops mvebu_pci_bridge_emul_ops = { static void mvebu_pci_bridge_emul_init(struct mvebu_pcie_port *port) { struct pci_bridge_emul *bridge = &port->bridge; + u32 pcie_cap = mvebu_readl(port, PCIE_CAP_PCIEXP); + u8 pcie_cap_ver = ((pcie_cap >> 16) & PCI_EXP_FLAGS_VERS); bridge->conf.vendor = PCI_VENDOR_ID_MARVELL; bridge->conf.device = mvebu_readl(port, PCIE_DEV_ID_OFF) >> 16; @@ -585,6 +587,12 @@ static void mvebu_pci_bridge_emul_init(struct mvebu_pcie_port *port) bridge->conf.iolimit = PCI_IO_RANGE_TYPE_32; } + /* + * Older mvebu hardware provides PCIe Capability structure only in + * version 1. New hardware provides it in version 2. + */ + bridge->pcie_conf.cap = cpu_to_le16(pcie_cap_ver); + bridge->has_pcie = true; bridge->data = port; bridge->ops = &mvebu_pci_bridge_emul_ops; diff --git a/drivers/pci/controller/pci-xgene.c b/drivers/pci/controller/pci-xgene.c index c33b385ac918e4eeb7805aaf9007b42be4f58398..b651b6f4446917855d92875df4c6f93ce55aed10 100644 --- a/drivers/pci/controller/pci-xgene.c +++ b/drivers/pci/controller/pci-xgene.c @@ -467,7 +467,7 @@ static int xgene_pcie_select_ib_reg(u8 *ib_reg_mask, u64 size) return 1; } - if ((size > SZ_1K) && (size < SZ_1T) && !(*ib_reg_mask & (1 << 0))) { + if ((size > SZ_1K) && (size < SZ_4G) && !(*ib_reg_mask & (1 << 0))) { *ib_reg_mask |= (1 << 0); return 0; } diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h index 4fd200d8b0a9d921f49a3290a3eb2fcaf4c3dbd6..f1f789fe0637a6345db2ca1d9a437223f9c96a90 100644 --- a/drivers/pci/hotplug/pciehp.h +++ b/drivers/pci/hotplug/pciehp.h @@ -72,6 +72,8 @@ extern int pciehp_poll_time; * @reset_lock: prevents access to the Data Link Layer Link Active bit in the * Link Status register and to the Presence Detect State bit in the Slot * Status register during a slot reset which may cause them to flap + * @depth: Number of additional hotplug ports in the path to the root bus, + * used as lock subclass for @reset_lock * @ist_running: flag to keep user request waiting while IRQ thread is running * @request_result: result of last user request submitted to the IRQ thread * @requester: wait queue to wake up on completion of user request, @@ -103,6 +105,7 @@ struct controller { struct hotplug_slot hotplug_slot; /* hotplug core interface */ struct rw_semaphore reset_lock; + unsigned int depth; unsigned int ist_running; int request_result; wait_queue_head_t requester; diff --git a/drivers/pci/hotplug/pciehp_core.c b/drivers/pci/hotplug/pciehp_core.c index ad3393930ecb4d91dc21ef7a5abf575662c76e7c..e7fe4b42f0394e360b20dbefd886a1509541bc29 100644 --- a/drivers/pci/hotplug/pciehp_core.c +++ b/drivers/pci/hotplug/pciehp_core.c @@ -166,7 +166,7 @@ static void pciehp_check_presence(struct controller *ctrl) { int occupied; - down_read(&ctrl->reset_lock); + down_read_nested(&ctrl->reset_lock, ctrl->depth); mutex_lock(&ctrl->state_lock); occupied = pciehp_card_present_or_link_active(ctrl); diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index 9d06939736c0f0634670bf346df2138905854d71..30708af975adc6383fc496088c09bccd475eebce 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -583,7 +583,7 @@ static void pciehp_ignore_dpc_link_change(struct controller *ctrl, * the corresponding link change may have been ignored above. * Synthesize it to ensure that it is acted on. */ - down_read(&ctrl->reset_lock); + down_read_nested(&ctrl->reset_lock, ctrl->depth); if (!pciehp_check_link_active(ctrl)) pciehp_request(ctrl, PCI_EXP_SLTSTA_DLLSC); up_read(&ctrl->reset_lock); @@ -642,6 +642,8 @@ read_status: */ if (ctrl->power_fault_detected) status &= ~PCI_EXP_SLTSTA_PFD; + else if (status & PCI_EXP_SLTSTA_PFD) + ctrl->power_fault_detected = true; events |= status; if (!events) { @@ -651,7 +653,7 @@ read_status: } if (status) { - pcie_capability_write_word(pdev, PCI_EXP_SLTSTA, events); + pcie_capability_write_word(pdev, PCI_EXP_SLTSTA, status); /* * In MSI mode, all event bits must be zero before the port @@ -725,8 +727,7 @@ static irqreturn_t pciehp_ist(int irq, void *dev_id) } /* Check Power Fault Detected */ - if ((events & PCI_EXP_SLTSTA_PFD) && !ctrl->power_fault_detected) { - ctrl->power_fault_detected = 1; + if (events & PCI_EXP_SLTSTA_PFD) { ctrl_err(ctrl, "Slot(%s): Power fault\n", slot_name(ctrl)); pciehp_set_indicators(ctrl, PCI_EXP_SLTCTL_PWR_IND_OFF, PCI_EXP_SLTCTL_ATTN_IND_ON); @@ -746,7 +747,7 @@ static irqreturn_t pciehp_ist(int irq, void *dev_id) * Disable requests have higher priority than Presence Detect Changed * or Data Link Layer State Changed events. */ - down_read(&ctrl->reset_lock); + down_read_nested(&ctrl->reset_lock, ctrl->depth); if (events & DISABLE_SLOT) pciehp_handle_disable_request(ctrl); else if (events & (PCI_EXP_SLTSTA_PDC | PCI_EXP_SLTSTA_DLLSC)) @@ -880,7 +881,7 @@ int pciehp_reset_slot(struct hotplug_slot *hotplug_slot, int probe) if (probe) return 0; - down_write(&ctrl->reset_lock); + down_write_nested(&ctrl->reset_lock, ctrl->depth); if (!ATTN_BUTTN(ctrl)) { ctrl_mask |= PCI_EXP_SLTCTL_PDCE; @@ -936,6 +937,20 @@ static inline void dbg_ctrl(struct controller *ctrl) #define FLAG(x, y) (((x) & (y)) ? '+' : '-') +static inline int pcie_hotplug_depth(struct pci_dev *dev) +{ + struct pci_bus *bus = dev->bus; + int depth = 0; + + while (bus->parent) { + bus = bus->parent; + if (bus->self && bus->self->is_hotplug_bridge) + depth++; + } + + return depth; +} + struct controller *pcie_init(struct pcie_device *dev) { struct controller *ctrl; @@ -949,6 +964,7 @@ struct controller *pcie_init(struct pcie_device *dev) return NULL; ctrl->pcie = dev; + ctrl->depth = pcie_hotplug_depth(dev->port); pcie_capability_read_dword(pdev, PCI_EXP_SLTCAP, &slot_cap); if (pdev->hotplug_user_indicators) diff --git a/drivers/pci/hotplug/s390_pci_hpc.c b/drivers/pci/hotplug/s390_pci_hpc.c index a047c421debe2e10606f8649f015436953daa7d1..93174f503464ebcd5edc44b478fb211bd9d93cd0 100644 --- a/drivers/pci/hotplug/s390_pci_hpc.c +++ b/drivers/pci/hotplug/s390_pci_hpc.c @@ -109,14 +109,7 @@ static int get_power_status(struct hotplug_slot *hotplug_slot, u8 *value) struct zpci_dev *zdev = container_of(hotplug_slot, struct zpci_dev, hotplug_slot); - switch (zdev->state) { - case ZPCI_FN_STATE_STANDBY: - *value = 0; - break; - default: - *value = 1; - break; - } + *value = zpci_is_device_configured(zdev) ? 1 : 0; return 0; } diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 9c6a839b453ff90e785ee994a9e98eed99316aaf..10634c9b8c497d894a9b56c34fa96d475bdfff10 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -366,18 +366,6 @@ static void free_msi_irqs(struct pci_dev *dev) for (i = 0; i < entry->nvec_used; i++) BUG_ON(irq_has_action(entry->irq + i)); - pci_msi_teardown_msi_irqs(dev); - - list_for_each_entry_safe(entry, tmp, msi_list, list) { - if (entry->msi_attrib.is_msix) { - if (list_is_last(&entry->list, msi_list)) - iounmap(entry->mask_base); - } - - list_del(&entry->list); - free_msi_entry(entry); - } - if (dev->msi_irq_groups) { sysfs_remove_groups(&dev->dev.kobj, dev->msi_irq_groups); msi_attrs = dev->msi_irq_groups[0]->attrs; @@ -393,6 +381,18 @@ static void free_msi_irqs(struct pci_dev *dev) kfree(dev->msi_irq_groups); dev->msi_irq_groups = NULL; } + + pci_msi_teardown_msi_irqs(dev); + + list_for_each_entry_safe(entry, tmp, msi_list, list) { + if (entry->msi_attrib.is_msix) { + if (list_is_last(&entry->list, msi_list)) + iounmap(entry->mask_base); + } + + list_del(&entry->list); + free_msi_entry(entry); + } } static void pci_intx_for_msi(struct pci_dev *dev, int enable) @@ -562,6 +562,9 @@ msi_setup_entry(struct pci_dev *dev, int nvec, struct irq_affinity *affd) goto out; pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control); + /* Lies, damned lies, and MSIs */ + if (dev->dev_flags & PCI_DEV_FLAGS_HAS_MSI_MASKING) + control |= PCI_MSI_FLAGS_MASKBIT; entry->msi_attrib.is_msix = 0; entry->msi_attrib.is_64 = !!(control & PCI_MSI_FLAGS_64BIT); @@ -795,9 +798,6 @@ static int msix_capability_init(struct pci_dev *dev, struct msix_entry *entries, goto out_disable; } - /* Ensure that all table entries are masked. */ - msix_mask_all(base, tsize); - ret = msix_setup_entries(dev, base, entries, nvec, affd); if (ret) goto out_disable; @@ -820,6 +820,16 @@ static int msix_capability_init(struct pci_dev *dev, struct msix_entry *entries, /* Set MSI-X enabled bits and unmask the function */ pci_intx_for_msi(dev, 0); dev->msix_enabled = 1; + + /* + * Ensure that all table entries are masked to prevent + * stale entries from firing in a crash kernel. + * + * Done late to deal with a broken Marvell NVME device + * which takes the MSI-X mask bits into account even + * when MSI-X is disabled, which prevents MSI delivery. + */ + msix_mask_all(base, tsize); pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_MASKALL, 0); pcibios_free_irq(dev); @@ -846,7 +856,7 @@ out_free: free_msi_irqs(dev); out_disable: - pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_ENABLE, 0); + pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_MASKALL | PCI_MSIX_FLAGS_ENABLE, 0); return ret; } @@ -1258,19 +1268,24 @@ EXPORT_SYMBOL(pci_free_irq_vectors); /** * pci_irq_vector - return Linux IRQ number of a device vector - * @dev: PCI device to operate on - * @nr: device-relative interrupt vector index (0-based). + * @dev: PCI device to operate on + * @nr: Interrupt vector index (0-based) + * + * @nr has the following meanings depending on the interrupt mode: + * MSI-X: The index in the MSI-X vector table + * MSI: The index of the enabled MSI vectors + * INTx: Must be 0 + * + * Return: The Linux interrupt number or -EINVAl if @nr is out of range. */ int pci_irq_vector(struct pci_dev *dev, unsigned int nr) { if (dev->msix_enabled) { struct msi_desc *entry; - int i = 0; for_each_pci_msi_entry(entry, dev) { - if (i == nr) + if (entry->msi_attrib.entry_nr == nr) return entry->irq; - i++; } WARN_ON_ONCE(1); return -EINVAL; @@ -1294,17 +1309,22 @@ EXPORT_SYMBOL(pci_irq_vector); * pci_irq_get_affinity - return the affinity of a particular MSI vector * @dev: PCI device to operate on * @nr: device-relative interrupt vector index (0-based). + * + * @nr has the following meanings depending on the interrupt mode: + * MSI-X: The index in the MSI-X vector table + * MSI: The index of the enabled MSI vectors + * INTx: Must be 0 + * + * Return: A cpumask pointer or NULL if @nr is out of range */ const struct cpumask *pci_irq_get_affinity(struct pci_dev *dev, int nr) { if (dev->msix_enabled) { struct msi_desc *entry; - int i = 0; for_each_pci_msi_entry(entry, dev) { - if (i == nr) + if (entry->msi_attrib.entry_nr == nr) return &entry->affinity->mask; - i++; } WARN_ON_ONCE(1); return NULL; diff --git a/drivers/pci/pci-bridge-emul.c b/drivers/pci/pci-bridge-emul.c index fdaf86a888b73771eddd1704323a8fa9eb4a0399..37504c2cce9b89165da60b384b895e19ab89aa56 100644 --- a/drivers/pci/pci-bridge-emul.c +++ b/drivers/pci/pci-bridge-emul.c @@ -139,8 +139,13 @@ struct pci_bridge_reg_behavior pci_regs_behavior[PCI_STD_HEADER_SIZEOF / 4] = { .ro = GENMASK(7, 0), }, + /* + * If expansion ROM is unsupported then ROM Base Address register must + * be implemented as read-only register that return 0 when read, same + * as for unused Base Address registers. + */ [PCI_ROM_ADDRESS1 / 4] = { - .rw = GENMASK(31, 11) | BIT(0), + .ro = ~0, }, /* @@ -171,41 +176,55 @@ struct pci_bridge_reg_behavior pcie_cap_regs_behavior[PCI_CAP_PCIE_SIZEOF / 4] = [PCI_CAP_LIST_ID / 4] = { /* * Capability ID, Next Capability Pointer and - * Capabilities register are all read-only. + * bits [14:0] of Capabilities register are all read-only. + * Bit 15 of Capabilities register is reserved. */ - .ro = ~0, + .ro = GENMASK(30, 0), }, [PCI_EXP_DEVCAP / 4] = { - .ro = ~0, + /* + * Bits [31:29] and [17:16] are reserved. + * Bits [27:18] are reserved for non-upstream ports. + * Bits 28 and [14:6] are reserved for non-endpoint devices. + * Other bits are read-only. + */ + .ro = BIT(15) | GENMASK(5, 0), }, [PCI_EXP_DEVCTL / 4] = { - /* Device control register is RW */ - .rw = GENMASK(15, 0), + /* + * Device control register is RW, except bit 15 which is + * reserved for non-endpoints or non-PCIe-to-PCI/X bridges. + */ + .rw = GENMASK(14, 0), /* * Device status register has bits 6 and [3:0] W1C, [5:4] RO, - * the rest is reserved + * the rest is reserved. Also bit 6 is reserved for non-upstream + * ports. */ - .w1c = (BIT(6) | GENMASK(3, 0)) << 16, + .w1c = GENMASK(3, 0) << 16, .ro = GENMASK(5, 4) << 16, }, [PCI_EXP_LNKCAP / 4] = { - /* All bits are RO, except bit 23 which is reserved */ - .ro = lower_32_bits(~BIT(23)), + /* + * All bits are RO, except bit 23 which is reserved and + * bit 18 which is reserved for non-upstream ports. + */ + .ro = lower_32_bits(~(BIT(23) | PCI_EXP_LNKCAP_CLKPM)), }, [PCI_EXP_LNKCTL / 4] = { /* * Link control has bits [15:14], [11:3] and [1:0] RW, the - * rest is reserved. + * rest is reserved. Bit 8 is reserved for non-upstream ports. * * Link status has bits [13:0] RO, and bits [15:14] * W1C. */ - .rw = GENMASK(15, 14) | GENMASK(11, 3) | GENMASK(1, 0), + .rw = GENMASK(15, 14) | GENMASK(11, 9) | GENMASK(7, 3) | GENMASK(1, 0), .ro = GENMASK(13, 0) << 16, .w1c = GENMASK(15, 14) << 16, }, @@ -277,11 +296,9 @@ int pci_bridge_emul_init(struct pci_bridge_emul *bridge, if (bridge->has_pcie) { bridge->conf.capabilities_pointer = PCI_CAP_PCIE_START; + bridge->conf.status |= cpu_to_le16(PCI_STATUS_CAP_LIST); bridge->pcie_conf.cap_id = PCI_CAP_ID_EXP; - /* Set PCIe v2, root port, slot support */ - bridge->pcie_conf.cap = - cpu_to_le16(PCI_EXP_TYPE_ROOT_PORT << 4 | 2 | - PCI_EXP_FLAGS_SLOT); + bridge->pcie_conf.cap |= cpu_to_le16(PCI_EXP_TYPE_ROOT_PORT << 4); bridge->pcie_cap_regs_behavior = kmemdup(pcie_cap_regs_behavior, sizeof(pcie_cap_regs_behavior), @@ -290,6 +307,27 @@ int pci_bridge_emul_init(struct pci_bridge_emul *bridge, kfree(bridge->pci_regs_behavior); return -ENOMEM; } + /* These bits are applicable only for PCI and reserved on PCIe */ + bridge->pci_regs_behavior[PCI_CACHE_LINE_SIZE / 4].ro &= + ~GENMASK(15, 8); + bridge->pci_regs_behavior[PCI_COMMAND / 4].ro &= + ~((PCI_COMMAND_SPECIAL | PCI_COMMAND_INVALIDATE | + PCI_COMMAND_VGA_PALETTE | PCI_COMMAND_WAIT | + PCI_COMMAND_FAST_BACK) | + (PCI_STATUS_66MHZ | PCI_STATUS_FAST_BACK | + PCI_STATUS_DEVSEL_MASK) << 16); + bridge->pci_regs_behavior[PCI_PRIMARY_BUS / 4].ro &= + ~GENMASK(31, 24); + bridge->pci_regs_behavior[PCI_IO_BASE / 4].ro &= + ~((PCI_STATUS_66MHZ | PCI_STATUS_FAST_BACK | + PCI_STATUS_DEVSEL_MASK) << 16); + bridge->pci_regs_behavior[PCI_INTERRUPT_LINE / 4].rw &= + ~((PCI_BRIDGE_CTL_MASTER_ABORT | + BIT(8) | BIT(9) | BIT(11)) << 16); + bridge->pci_regs_behavior[PCI_INTERRUPT_LINE / 4].ro &= + ~((PCI_BRIDGE_CTL_FAST_BACK) << 16); + bridge->pci_regs_behavior[PCI_INTERRUPT_LINE / 4].w1c &= + ~(BIT(10) << 16); } if (flags & PCI_BRIDGE_EMUL_NO_PREFETCHABLE_BAR) { @@ -431,8 +469,21 @@ int pci_bridge_emul_conf_write(struct pci_bridge_emul *bridge, int where, /* Clear the W1C bits */ new &= ~((value << shift) & (behavior[reg / 4].w1c & mask)); + /* Save the new value with the cleared W1C bits into the cfgspace */ cfgspace[reg / 4] = cpu_to_le32(new); + /* + * Clear the W1C bits not specified by the write mask, so that the + * write_op() does not clear them. + */ + new &= ~(behavior[reg / 4].w1c & ~mask); + + /* + * Set the W1C bits specified by the write mask, so that write_op() + * knows about that they are to be cleared. + */ + new |= (value << shift) & (behavior[reg / 4].w1c & mask); + if (write_op) write_op(bridge, reg, old, new, mask); diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 0d7109018a91f50da0f041d1b840a4a8551fd702..fbb4d765b43c8349a5fb0c0befd30216bc7d48db 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -31,6 +31,7 @@ #include #include #include +#include #include "pci.h" DEFINE_MUTEX(pci_slot_mutex); @@ -68,8 +69,11 @@ static void pci_dev_d3_sleep(struct pci_dev *dev) if (delay < pci_pm_d3hot_delay) delay = pci_pm_d3hot_delay; - if (delay) - msleep(delay); + if (delay) { + trace_android_rvh_pci_d3_sleep(dev, &delay); + if (delay) + msleep(delay); + } } #ifdef CONFIG_PCI_DOMAINS diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 438159d45e3e9d116ee4daeac1961450b392c22a..d6974502d9622399e1172f45b956a80c9277e74c 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -3577,6 +3577,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATHEROS, 0x0032, quirk_no_bus_reset); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATHEROS, 0x003c, quirk_no_bus_reset); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATHEROS, 0x0033, quirk_no_bus_reset); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATHEROS, 0x0034, quirk_no_bus_reset); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATHEROS, 0x003e, quirk_no_bus_reset); /* * Root port on some Cavium CN8xxx chips do not successfully complete a bus @@ -4069,6 +4070,9 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9120, quirk_dma_func1_alias); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9123, quirk_dma_func1_alias); +/* https://bugzilla.kernel.org/show_bug.cgi?id=42679#c136 */ +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9125, + quirk_dma_func1_alias); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9128, quirk_dma_func1_alias); /* https://bugzilla.kernel.org/show_bug.cgi?id=42679#c14 */ @@ -5748,3 +5752,9 @@ static void apex_pci_fixup_class(struct pci_dev *pdev) } DECLARE_PCI_FIXUP_CLASS_HEADER(0x1ac1, 0x089a, PCI_CLASS_NOT_DEFINED, 8, apex_pci_fixup_class); + +static void nvidia_ion_ahci_fixup(struct pci_dev *pdev) +{ + pdev->dev_flags |= PCI_DEV_FLAGS_HAS_MSI_MASKING; +} +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NVIDIA, 0x0ab8, nvidia_ion_ahci_fixup); diff --git a/drivers/pcmcia/cs.c b/drivers/pcmcia/cs.c index e211e2619680ce7a1e841878503c532d3b63e7ea..f70197154a36256ce59dcaf31d22faa09cb5fcaf 100644 --- a/drivers/pcmcia/cs.c +++ b/drivers/pcmcia/cs.c @@ -666,18 +666,16 @@ static int pccardd(void *__skt) if (events || sysfs_events) continue; + set_current_state(TASK_INTERRUPTIBLE); if (kthread_should_stop()) break; - set_current_state(TASK_INTERRUPTIBLE); - schedule(); - /* make sure we are running */ - __set_current_state(TASK_RUNNING); - try_to_freeze(); } + /* make sure we are running before we exit */ + __set_current_state(TASK_RUNNING); /* shut down socket, if a device is still present */ if (skt->state & SOCKET_PRESENT) { diff --git a/drivers/pcmcia/rsrc_nonstatic.c b/drivers/pcmcia/rsrc_nonstatic.c index 3b05760e69d6213ecb48ae659d27ce6ebdc5a00a..69a6e9a5d6d269be87c84ea1611443c8e3ea657f 100644 --- a/drivers/pcmcia/rsrc_nonstatic.c +++ b/drivers/pcmcia/rsrc_nonstatic.c @@ -690,6 +690,9 @@ static struct resource *__nonstatic_find_io_region(struct pcmcia_socket *s, unsigned long min = base; int ret; + if (!res) + return NULL; + data.mask = align - 1; data.offset = base & data.mask; data.map = &s_data->io_db; @@ -809,6 +812,9 @@ static struct resource *nonstatic_find_mem_region(u_long base, u_long num, unsigned long min, max; int ret, i, j; + if (!res) + return NULL; + low = low || !(s->features & SS_CAP_PAGE_REGS); data.mask = align - 1; diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index cb2f55f450e4adf05a7b3d81e8b9a01230665584..18bc15a5f418974ac9916542f2ed9f5710112df7 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -986,6 +986,8 @@ int armpmu_register(struct arm_pmu *pmu) pmu->name, pmu->num_events, has_nmi ? ", using NMIs" : ""); + kvm_host_pmu_init(pmu); + return 0; out_destroy: diff --git a/drivers/phy/broadcom/phy-brcm-usb.c b/drivers/phy/broadcom/phy-brcm-usb.c index 99fbc7e4138be7e1262b6e413fbaeda208365623..b901a0d4e2a80af9e3a4b2aef4cadb072066b74e 100644 --- a/drivers/phy/broadcom/phy-brcm-usb.c +++ b/drivers/phy/broadcom/phy-brcm-usb.c @@ -17,6 +17,7 @@ #include #include #include +#include #include "phy-brcm-usb-init.h" @@ -69,12 +70,35 @@ struct brcm_usb_phy_data { int init_count; int wake_irq; struct brcm_usb_phy phys[BRCM_USB_PHY_ID_MAX]; + struct notifier_block pm_notifier; + bool pm_active; }; static s8 *node_reg_names[BRCM_REGS_MAX] = { "crtl", "xhci_ec", "xhci_gbl", "usb_phy", "usb_mdio", "bdc_ec" }; +static int brcm_pm_notifier(struct notifier_block *notifier, + unsigned long pm_event, + void *unused) +{ + struct brcm_usb_phy_data *priv = + container_of(notifier, struct brcm_usb_phy_data, pm_notifier); + + switch (pm_event) { + case PM_HIBERNATION_PREPARE: + case PM_SUSPEND_PREPARE: + priv->pm_active = true; + break; + case PM_POST_RESTORE: + case PM_POST_HIBERNATION: + case PM_POST_SUSPEND: + priv->pm_active = false; + break; + } + return NOTIFY_DONE; +} + static irqreturn_t brcm_usb_phy_wake_isr(int irq, void *dev_id) { struct phy *gphy = dev_id; @@ -90,6 +114,9 @@ static int brcm_usb_phy_init(struct phy *gphy) struct brcm_usb_phy_data *priv = container_of(phy, struct brcm_usb_phy_data, phys[phy->id]); + if (priv->pm_active) + return 0; + /* * Use a lock to make sure a second caller waits until * the base phy is inited before using it. @@ -119,6 +146,9 @@ static int brcm_usb_phy_exit(struct phy *gphy) struct brcm_usb_phy_data *priv = container_of(phy, struct brcm_usb_phy_data, phys[phy->id]); + if (priv->pm_active) + return 0; + dev_dbg(&gphy->dev, "EXIT\n"); if (phy->id == BRCM_USB_PHY_2_0) brcm_usb_uninit_eohci(&priv->ini); @@ -484,6 +514,9 @@ static int brcm_usb_phy_probe(struct platform_device *pdev) if (err) return err; + priv->pm_notifier.notifier_call = brcm_pm_notifier; + register_pm_notifier(&priv->pm_notifier); + mutex_init(&priv->mutex); /* make sure invert settings are correct */ @@ -524,7 +557,10 @@ static int brcm_usb_phy_probe(struct platform_device *pdev) static int brcm_usb_phy_remove(struct platform_device *pdev) { + struct brcm_usb_phy_data *priv = dev_get_drvdata(&pdev->dev); + sysfs_remove_group(&pdev->dev.kobj, &brcm_usb_phy_group); + unregister_pm_notifier(&priv->pm_notifier); return 0; } @@ -535,6 +571,7 @@ static int brcm_usb_phy_suspend(struct device *dev) struct brcm_usb_phy_data *priv = dev_get_drvdata(dev); if (priv->init_count) { + dev_dbg(dev, "SUSPEND\n"); priv->ini.wake_enabled = device_may_wakeup(dev); if (priv->phys[BRCM_USB_PHY_3_0].inited) brcm_usb_uninit_xhci(&priv->ini); @@ -574,6 +611,7 @@ static int brcm_usb_phy_resume(struct device *dev) * Uninitialize anything that wasn't previously initialized. */ if (priv->init_count) { + dev_dbg(dev, "RESUME\n"); if (priv->wake_irq >= 0) disable_irq_wake(priv->wake_irq); brcm_usb_init_common(&priv->ini); diff --git a/drivers/phy/qualcomm/phy-qcom-qusb2.c b/drivers/phy/qualcomm/phy-qcom-qusb2.c index 557547dabfd50eb17086e528502b924213041e65..f531043ec3debe59189126e8b514a4272dd9b596 100644 --- a/drivers/phy/qualcomm/phy-qcom-qusb2.c +++ b/drivers/phy/qualcomm/phy-qcom-qusb2.c @@ -474,7 +474,7 @@ static void qusb2_phy_set_tune2_param(struct qusb2_phy *qphy) { struct device *dev = &qphy->phy->dev; const struct qusb2_phy_cfg *cfg = qphy->cfg; - u8 *val; + u8 *val, hstx_trim; /* efuse register is optional */ if (!qphy->cell) @@ -488,7 +488,13 @@ static void qusb2_phy_set_tune2_param(struct qusb2_phy *qphy) * set while configuring the phy. */ val = nvmem_cell_read(qphy->cell, NULL); - if (IS_ERR(val) || !val[0]) { + if (IS_ERR(val)) { + dev_dbg(dev, "failed to read a valid hs-tx trim value\n"); + return; + } + hstx_trim = val[0]; + kfree(val); + if (!hstx_trim) { dev_dbg(dev, "failed to read a valid hs-tx trim value\n"); return; } @@ -496,12 +502,10 @@ static void qusb2_phy_set_tune2_param(struct qusb2_phy *qphy) /* Fused TUNE1/2 value is the higher nibble only */ if (cfg->update_tune1_with_efuse) qusb2_write_mask(qphy->base, cfg->regs[QUSB2PHY_PORT_TUNE1], - val[0] << HSTX_TRIM_SHIFT, - HSTX_TRIM_MASK); + hstx_trim << HSTX_TRIM_SHIFT, HSTX_TRIM_MASK); else qusb2_write_mask(qphy->base, cfg->regs[QUSB2PHY_PORT_TUNE2], - val[0] << HSTX_TRIM_SHIFT, - HSTX_TRIM_MASK); + hstx_trim << HSTX_TRIM_SHIFT, HSTX_TRIM_MASK); } static int qusb2_phy_set_mode(struct phy *phy, diff --git a/drivers/phy/qualcomm/phy-qcom-snps-femto-v2.c b/drivers/phy/qualcomm/phy-qcom-snps-femto-v2.c index ae4bac024c7b1e69a622fec1af737dff0dbbd048..7e61202aa234ed8c95ff1bdf36fc36f9cadbae31 100644 --- a/drivers/phy/qualcomm/phy-qcom-snps-femto-v2.c +++ b/drivers/phy/qualcomm/phy-qcom-snps-femto-v2.c @@ -33,7 +33,7 @@ #define USB2_PHY_USB_PHY_HS_PHY_CTRL_COMMON0 (0x54) #define RETENABLEN BIT(3) -#define FSEL_MASK GENMASK(7, 5) +#define FSEL_MASK GENMASK(6, 4) #define FSEL_DEFAULT (0x3 << 4) #define USB2_PHY_USB_PHY_HS_PHY_CTRL_COMMON1 (0x58) diff --git a/drivers/phy/socionext/phy-uniphier-usb3ss.c b/drivers/phy/socionext/phy-uniphier-usb3ss.c index 6700645bcbe6bfd6abeeb4432618b5a6c2596055..3b5ffc16a6947c8a32e4a8e341f395ebe1e5fe5d 100644 --- a/drivers/phy/socionext/phy-uniphier-usb3ss.c +++ b/drivers/phy/socionext/phy-uniphier-usb3ss.c @@ -22,11 +22,13 @@ #include #define SSPHY_TESTI 0x0 -#define SSPHY_TESTO 0x4 #define TESTI_DAT_MASK GENMASK(13, 6) #define TESTI_ADR_MASK GENMASK(5, 1) #define TESTI_WR_EN BIT(0) +#define SSPHY_TESTO 0x4 +#define TESTO_DAT_MASK GENMASK(7, 0) + #define PHY_F(regno, msb, lsb) { (regno), (msb), (lsb) } #define CDR_CPD_TRIM PHY_F(7, 3, 0) /* RxPLL charge pump current */ @@ -84,12 +86,12 @@ static void uniphier_u3ssphy_set_param(struct uniphier_u3ssphy_priv *priv, val = FIELD_PREP(TESTI_DAT_MASK, 1); val |= FIELD_PREP(TESTI_ADR_MASK, p->field.reg_no); uniphier_u3ssphy_testio_write(priv, val); - val = readl(priv->base + SSPHY_TESTO); + val = readl(priv->base + SSPHY_TESTO) & TESTO_DAT_MASK; /* update value */ - val &= ~FIELD_PREP(TESTI_DAT_MASK, field_mask); + val &= ~field_mask; data = field_mask & (p->value << p->field.lsb); - val = FIELD_PREP(TESTI_DAT_MASK, data); + val = FIELD_PREP(TESTI_DAT_MASK, data | val); val |= FIELD_PREP(TESTI_ADR_MASK, p->field.reg_no); uniphier_u3ssphy_testio_write(priv, val); uniphier_u3ssphy_testio_write(priv, val | TESTI_WR_EN); diff --git a/drivers/phy/ti/phy-gmii-sel.c b/drivers/phy/ti/phy-gmii-sel.c index 5fd2e8a08bfcf1103115f200bc80ba81b63be197..d0ab69750c6b44ab36c5148af2216c6f235951ac 100644 --- a/drivers/phy/ti/phy-gmii-sel.c +++ b/drivers/phy/ti/phy-gmii-sel.c @@ -320,6 +320,8 @@ static int phy_gmii_sel_init_ports(struct phy_gmii_sel_priv *priv) u64 size; offset = of_get_address(dev->of_node, 0, &size, NULL); + if (!offset) + return -EINVAL; priv->num_ports = size / sizeof(u32); if (!priv->num_ports) return -EINVAL; diff --git a/drivers/phy/ti/phy-j721e-wiz.c b/drivers/phy/ti/phy-j721e-wiz.c index dceac7714872195e73f73cd04eb7d3dda117c42f..5536b8f4bfd13daaf109bc7138da32d19bac11fb 100644 --- a/drivers/phy/ti/phy-j721e-wiz.c +++ b/drivers/phy/ti/phy-j721e-wiz.c @@ -177,6 +177,7 @@ static const struct clk_div_table clk_div_table[] = { { .val = 1, .div = 2, }, { .val = 2, .div = 4, }, { .val = 3, .div = 8, }, + { /* sentinel */ }, }; static struct wiz_clk_div_sel clk_div_sel[] = { diff --git a/drivers/phy/xilinx/phy-zynqmp.c b/drivers/phy/xilinx/phy-zynqmp.c index 2b0f921b6ee3de1c93fb1e96a7c3b13909668283..b8ccac6f314672857b1128b05434778f3aa75631 100644 --- a/drivers/phy/xilinx/phy-zynqmp.c +++ b/drivers/phy/xilinx/phy-zynqmp.c @@ -134,7 +134,8 @@ #define PROT_BUS_WIDTH_10 0x0 #define PROT_BUS_WIDTH_20 0x1 #define PROT_BUS_WIDTH_40 0x2 -#define PROT_BUS_WIDTH_SHIFT 2 +#define PROT_BUS_WIDTH_SHIFT(n) ((n) * 2) +#define PROT_BUS_WIDTH_MASK(n) GENMASK((n) * 2 + 1, (n) * 2) /* Number of GT lanes */ #define NUM_LANES 4 @@ -443,12 +444,12 @@ static void xpsgtr_phy_init_sata(struct xpsgtr_phy *gtr_phy) static void xpsgtr_phy_init_sgmii(struct xpsgtr_phy *gtr_phy) { struct xpsgtr_dev *gtr_dev = gtr_phy->dev; + u32 mask = PROT_BUS_WIDTH_MASK(gtr_phy->lane); + u32 val = PROT_BUS_WIDTH_10 << PROT_BUS_WIDTH_SHIFT(gtr_phy->lane); /* Set SGMII protocol TX and RX bus width to 10 bits. */ - xpsgtr_write(gtr_dev, TX_PROT_BUS_WIDTH, - PROT_BUS_WIDTH_10 << (gtr_phy->lane * PROT_BUS_WIDTH_SHIFT)); - xpsgtr_write(gtr_dev, RX_PROT_BUS_WIDTH, - PROT_BUS_WIDTH_10 << (gtr_phy->lane * PROT_BUS_WIDTH_SHIFT)); + xpsgtr_clr_set(gtr_dev, TX_PROT_BUS_WIDTH, mask, val); + xpsgtr_clr_set(gtr_dev, RX_PROT_BUS_WIDTH, mask, val); xpsgtr_bypass_scrambler_8b10b(gtr_phy); } diff --git a/drivers/pinctrl/bcm/pinctrl-bcm2835.c b/drivers/pinctrl/bcm/pinctrl-bcm2835.c index 1d21129f7751c4c2f45e06f71a6af40ac8fd2d5a..6768b2f03d6859601a6cb16d85deb2bada87e117 100644 --- a/drivers/pinctrl/bcm/pinctrl-bcm2835.c +++ b/drivers/pinctrl/bcm/pinctrl-bcm2835.c @@ -1244,6 +1244,18 @@ static int bcm2835_pinctrl_probe(struct platform_device *pdev) raw_spin_lock_init(&pc->irq_lock[i]); } + pc->pctl_desc = *pdata->pctl_desc; + pc->pctl_dev = devm_pinctrl_register(dev, &pc->pctl_desc, pc); + if (IS_ERR(pc->pctl_dev)) { + gpiochip_remove(&pc->gpio_chip); + return PTR_ERR(pc->pctl_dev); + } + + pc->gpio_range = *pdata->gpio_range; + pc->gpio_range.base = pc->gpio_chip.base; + pc->gpio_range.gc = &pc->gpio_chip; + pinctrl_add_gpio_range(pc->pctl_dev, &pc->gpio_range); + girq = &pc->gpio_chip.irq; girq->chip = &bcm2835_gpio_irq_chip; girq->parent_handler = bcm2835_gpio_irq_handler; @@ -1251,15 +1263,19 @@ static int bcm2835_pinctrl_probe(struct platform_device *pdev) girq->parents = devm_kcalloc(dev, BCM2835_NUM_IRQS, sizeof(*girq->parents), GFP_KERNEL); - if (!girq->parents) - return -ENOMEM; + if (!girq->parents) { + err = -ENOMEM; + goto out_remove; + } if (is_7211) { pc->wake_irq = devm_kcalloc(dev, BCM2835_NUM_IRQS, sizeof(*pc->wake_irq), GFP_KERNEL); - if (!pc->wake_irq) - return -ENOMEM; + if (!pc->wake_irq) { + err = -ENOMEM; + goto out_remove; + } } /* @@ -1283,8 +1299,10 @@ static int bcm2835_pinctrl_probe(struct platform_device *pdev) len = strlen(dev_name(pc->dev)) + 16; name = devm_kzalloc(pc->dev, len, GFP_KERNEL); - if (!name) - return -ENOMEM; + if (!name) { + err = -ENOMEM; + goto out_remove; + } snprintf(name, len, "%s:bank%d", dev_name(pc->dev), i); @@ -1303,22 +1321,14 @@ static int bcm2835_pinctrl_probe(struct platform_device *pdev) err = gpiochip_add_data(&pc->gpio_chip, pc); if (err) { dev_err(dev, "could not add GPIO chip\n"); - return err; - } - - pc->pctl_desc = *pdata->pctl_desc; - pc->pctl_dev = devm_pinctrl_register(dev, &pc->pctl_desc, pc); - if (IS_ERR(pc->pctl_dev)) { - gpiochip_remove(&pc->gpio_chip); - return PTR_ERR(pc->pctl_dev); + goto out_remove; } - pc->gpio_range = *pdata->gpio_range; - pc->gpio_range.base = pc->gpio_chip.base; - pc->gpio_range.gc = &pc->gpio_chip; - pinctrl_add_gpio_range(pc->pctl_dev, &pc->gpio_range); - return 0; + +out_remove: + pinctrl_remove_gpio_range(pc->pctl_dev, &pc->gpio_range); + return err; } static struct platform_driver bcm2835_pinctrl_driver = { diff --git a/drivers/pinctrl/bcm/pinctrl-ns.c b/drivers/pinctrl/bcm/pinctrl-ns.c index e79690bd8b85f25a9e8a9665dd4a1cc08bbb8ca8..d7f8175d2c1c86b2d4799b3faafd0a27da402ca4 100644 --- a/drivers/pinctrl/bcm/pinctrl-ns.c +++ b/drivers/pinctrl/bcm/pinctrl-ns.c @@ -5,7 +5,6 @@ #include #include -#include #include #include #include @@ -13,7 +12,6 @@ #include #include #include -#include #include #define FLAG_BCM4708 BIT(1) @@ -24,8 +22,7 @@ struct ns_pinctrl { struct device *dev; unsigned int chipset_flag; struct pinctrl_dev *pctldev; - struct regmap *regmap; - u32 offset; + void __iomem *base; struct pinctrl_desc pctldesc; struct ns_pinctrl_group *groups; @@ -232,9 +229,9 @@ static int ns_pinctrl_set_mux(struct pinctrl_dev *pctrl_dev, unset |= BIT(pin_number); } - regmap_read(ns_pinctrl->regmap, ns_pinctrl->offset, &tmp); + tmp = readl(ns_pinctrl->base); tmp &= ~unset; - regmap_write(ns_pinctrl->regmap, ns_pinctrl->offset, tmp); + writel(tmp, ns_pinctrl->base); return 0; } @@ -266,13 +263,13 @@ static const struct of_device_id ns_pinctrl_of_match_table[] = { static int ns_pinctrl_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; - struct device_node *np = dev->of_node; const struct of_device_id *of_id; struct ns_pinctrl *ns_pinctrl; struct pinctrl_desc *pctldesc; struct pinctrl_pin_desc *pin; struct ns_pinctrl_group *group; struct ns_pinctrl_function *function; + struct resource *res; int i; ns_pinctrl = devm_kzalloc(dev, sizeof(*ns_pinctrl), GFP_KERNEL); @@ -290,18 +287,12 @@ static int ns_pinctrl_probe(struct platform_device *pdev) return -EINVAL; ns_pinctrl->chipset_flag = (uintptr_t)of_id->data; - ns_pinctrl->regmap = syscon_node_to_regmap(of_get_parent(np)); - if (IS_ERR(ns_pinctrl->regmap)) { - int err = PTR_ERR(ns_pinctrl->regmap); - - dev_err(dev, "Failed to map pinctrl regs: %d\n", err); - - return err; - } - - if (of_property_read_u32(np, "offset", &ns_pinctrl->offset)) { - dev_err(dev, "Failed to get register offset\n"); - return -ENOENT; + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, + "cru_gpio_control"); + ns_pinctrl->base = devm_ioremap_resource(dev, res); + if (IS_ERR(ns_pinctrl->base)) { + dev_err(dev, "Failed to map pinctrl regs\n"); + return PTR_ERR(ns_pinctrl->base); } memcpy(pctldesc, &ns_pinctrl_desc, sizeof(*pctldesc)); diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c index 6e6825d17a1d149eef539b2e0c53f4df1a9125ca..840000870d5a026e6596056991990020d7328df3 100644 --- a/drivers/pinctrl/core.c +++ b/drivers/pinctrl/core.c @@ -2077,6 +2077,8 @@ int pinctrl_enable(struct pinctrl_dev *pctldev) if (error) { dev_err(pctldev->dev, "could not claim hogs: %i\n", error); + pinctrl_free_pindescs(pctldev, pctldev->desc->pins, + pctldev->desc->npins); mutex_destroy(&pctldev->mutex); kfree(pctldev); diff --git a/drivers/pinctrl/intel/pinctrl-intel.c b/drivers/pinctrl/intel/pinctrl-intel.c index b6ef1911c1dd16a3060a1e95d08e21f41d3fe8d3..348c670a7b07d331acdcfd3ca58f5167379bdf28 100644 --- a/drivers/pinctrl/intel/pinctrl-intel.c +++ b/drivers/pinctrl/intel/pinctrl-intel.c @@ -441,8 +441,8 @@ static void intel_gpio_set_gpio_mode(void __iomem *padcfg0) value &= ~PADCFG0_PMODE_MASK; value |= PADCFG0_PMODE_GPIO; - /* Disable input and output buffers */ - value |= PADCFG0_GPIORXDIS; + /* Disable TX buffer and enable RX (this will be input) */ + value &= ~PADCFG0_GPIORXDIS; value |= PADCFG0_GPIOTXDIS; /* Disable SCI/SMI/NMI generation */ @@ -487,9 +487,6 @@ static int intel_gpio_request_enable(struct pinctrl_dev *pctldev, intel_gpio_set_gpio_mode(padcfg0); - /* Disable TX buffer and enable RX (this will be input) */ - __intel_gpio_set_direction(padcfg0, true); - raw_spin_unlock_irqrestore(&pctrl->lock, flags); return 0; @@ -1105,9 +1102,6 @@ static int intel_gpio_irq_type(struct irq_data *d, unsigned int type) intel_gpio_set_gpio_mode(reg); - /* Disable TX buffer and enable RX (this will be input) */ - __intel_gpio_set_direction(reg, true); - value = readl(reg); value &= ~(PADCFG0_RXEVCFG_MASK | PADCFG0_RXINV); @@ -1207,6 +1201,39 @@ static irqreturn_t intel_gpio_irq(int irq, void *data) return IRQ_RETVAL(ret); } +static void intel_gpio_irq_init(struct intel_pinctrl *pctrl) +{ + int i; + + for (i = 0; i < pctrl->ncommunities; i++) { + const struct intel_community *community; + void __iomem *base; + unsigned int gpp; + + community = &pctrl->communities[i]; + base = community->regs; + + for (gpp = 0; gpp < community->ngpps; gpp++) { + /* Mask and clear all interrupts */ + writel(0, base + community->ie_offset + gpp * 4); + writel(0xffff, base + community->is_offset + gpp * 4); + } + } +} + +static int intel_gpio_irq_init_hw(struct gpio_chip *gc) +{ + struct intel_pinctrl *pctrl = gpiochip_get_data(gc); + + /* + * Make sure the interrupt lines are in a proper state before + * further configuration. + */ + intel_gpio_irq_init(pctrl); + + return 0; +} + static int intel_gpio_add_community_ranges(struct intel_pinctrl *pctrl, const struct intel_community *community) { @@ -1311,6 +1338,7 @@ static int intel_gpio_probe(struct intel_pinctrl *pctrl, int irq) girq->num_parents = 0; girq->default_type = IRQ_TYPE_NONE; girq->handler = handle_bad_irq; + girq->init_hw = intel_gpio_irq_init_hw; ret = devm_gpiochip_add_data(pctrl->dev, &pctrl->chip, pctrl); if (ret) { @@ -1640,26 +1668,6 @@ int intel_pinctrl_suspend_noirq(struct device *dev) } EXPORT_SYMBOL_GPL(intel_pinctrl_suspend_noirq); -static void intel_gpio_irq_init(struct intel_pinctrl *pctrl) -{ - size_t i; - - for (i = 0; i < pctrl->ncommunities; i++) { - const struct intel_community *community; - void __iomem *base; - unsigned int gpp; - - community = &pctrl->communities[i]; - base = community->regs; - - for (gpp = 0; gpp < community->ngpps; gpp++) { - /* Mask and clear all interrupts */ - writel(0, base + community->ie_offset + gpp * 4); - writel(0xffff, base + community->is_offset + gpp * 4); - } - } -} - static bool intel_gpio_update_reg(void __iomem *reg, u32 mask, u32 value) { u32 curr, updated; diff --git a/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c b/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c index 10002b8497feae41a3ac23872f6441600268f67d..fbb7807e0da29ca83f26be693a267eb28377546d 100644 --- a/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c +++ b/drivers/pinctrl/mediatek/pinctrl-mtk-common-v2.c @@ -280,8 +280,12 @@ static int mtk_xt_get_gpio_n(void *data, unsigned long eint_n, desc = (const struct mtk_pin_desc *)hw->soc->pins; *gpio_chip = &hw->chip; - /* Be greedy to guess first gpio_n is equal to eint_n */ - if (desc[eint_n].eint.eint_n == eint_n) + /* + * Be greedy to guess first gpio_n is equal to eint_n. + * Only eint virtual eint number is greater than gpio number. + */ + if (hw->soc->npins > eint_n && + desc[eint_n].eint.eint_n == eint_n) *gpio_n = eint_n; else *gpio_n = mtk_xt_find_eint_num(hw, eint_n); diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c index ef49402c0623de8fcfb37e4b53badd1ec0ecd5e9..e20bcc835d6a84f627084f3e61410910f0509b8c 100644 --- a/drivers/pinctrl/pinctrl-amd.c +++ b/drivers/pinctrl/pinctrl-amd.c @@ -764,6 +764,34 @@ static const struct pinconf_ops amd_pinconf_ops = { .pin_config_group_set = amd_pinconf_group_set, }; +static void amd_gpio_irq_init(struct amd_gpio *gpio_dev) +{ + struct pinctrl_desc *desc = gpio_dev->pctrl->desc; + unsigned long flags; + u32 pin_reg, mask; + int i; + + mask = BIT(WAKE_CNTRL_OFF_S0I3) | BIT(WAKE_CNTRL_OFF_S3) | + BIT(INTERRUPT_MASK_OFF) | BIT(INTERRUPT_ENABLE_OFF) | + BIT(WAKE_CNTRL_OFF_S4); + + for (i = 0; i < desc->npins; i++) { + int pin = desc->pins[i].number; + const struct pin_desc *pd = pin_desc_get(gpio_dev->pctrl, pin); + + if (!pd) + continue; + + raw_spin_lock_irqsave(&gpio_dev->lock, flags); + + pin_reg = readl(gpio_dev->base + i * 4); + pin_reg &= ~mask; + writel(pin_reg, gpio_dev->base + i * 4); + + raw_spin_unlock_irqrestore(&gpio_dev->lock, flags); + } +} + #ifdef CONFIG_PM_SLEEP static bool amd_gpio_should_save(struct amd_gpio *gpio_dev, unsigned int pin) { @@ -901,6 +929,9 @@ static int amd_gpio_probe(struct platform_device *pdev) return PTR_ERR(gpio_dev->pctrl); } + /* Disable and mask interrupts */ + amd_gpio_irq_init(gpio_dev); + girq = &gpio_dev->gc.irq; girq->chip = &amd_gpio_irqchip; /* This will let us handle the parent IRQ in the driver */ diff --git a/drivers/pinctrl/pinctrl-equilibrium.c b/drivers/pinctrl/pinctrl-equilibrium.c index ac1c47f542c11fc055ac3fa5f5864086b6853817..3b6dcaa80e0002456d1e5f5dbbf3e7ad108e8f4a 100644 --- a/drivers/pinctrl/pinctrl-equilibrium.c +++ b/drivers/pinctrl/pinctrl-equilibrium.c @@ -674,6 +674,11 @@ static int eqbr_build_functions(struct eqbr_pinctrl_drv_data *drvdata) return ret; for (i = 0; i < nr_funcs; i++) { + + /* Ignore the same function with multiple groups */ + if (funcs[i].name == NULL) + continue; + ret = pinmux_generic_add_function(drvdata->pctl_dev, funcs[i].name, funcs[i].groups, @@ -805,7 +810,7 @@ static int pinctrl_reg(struct eqbr_pinctrl_drv_data *drvdata) ret = eqbr_build_functions(drvdata); if (ret) { - dev_err(dev, "Failed to build groups\n"); + dev_err(dev, "Failed to build functions\n"); return ret; } diff --git a/drivers/pinctrl/qcom/pinctrl-sdm845.c b/drivers/pinctrl/qcom/pinctrl-sdm845.c index c51793f6546f1c20c06aa946ffd52da4a9937f69..fdfd7b8f3a76d7d66659291bfdb497c1f21a8810 100644 --- a/drivers/pinctrl/qcom/pinctrl-sdm845.c +++ b/drivers/pinctrl/qcom/pinctrl-sdm845.c @@ -1310,6 +1310,7 @@ static const struct msm_pinctrl_soc_data sdm845_pinctrl = { .ngpios = 151, .wakeirq_map = sdm845_pdc_map, .nwakeirq_map = ARRAY_SIZE(sdm845_pdc_map), + .wakeirq_dual_edge_errata = true, }; static const struct msm_pinctrl_soc_data sdm845_acpi_pinctrl = { diff --git a/drivers/pinctrl/renesas/core.c b/drivers/pinctrl/renesas/core.c index c528c124fb0e92592f8060ef3c0f67a8e5f7f3c3..9d168b90cd2810e7ae5eb3813ae0dfe5e58e88b7 100644 --- a/drivers/pinctrl/renesas/core.c +++ b/drivers/pinctrl/renesas/core.c @@ -890,7 +890,7 @@ static void __init sh_pfc_check_drive_reg(const struct sh_pfc_soc_info *info, if (!field->pin && !field->offset && !field->size) continue; - mask = GENMASK(field->offset + field->size, field->offset); + mask = GENMASK(field->offset + field->size - 1, field->offset); if (mask & seen) sh_pfc_err("drive_reg 0x%x: field %u overlap\n", drive->reg, i); diff --git a/drivers/pinctrl/stm32/pinctrl-stm32.c b/drivers/pinctrl/stm32/pinctrl-stm32.c index 3af4430543dcae165fdf32ddf72598135f046d2a..e13723bb2be4115d26b306190f84d0071dc9cc29 100644 --- a/drivers/pinctrl/stm32/pinctrl-stm32.c +++ b/drivers/pinctrl/stm32/pinctrl-stm32.c @@ -1255,10 +1255,10 @@ static int stm32_gpiolib_register_bank(struct stm32_pinctrl *pctl, bank_nr = args.args[1] / STM32_GPIO_PINS_PER_BANK; bank->gpio_chip.base = args.args[1]; - npins = args.args[2]; - while (!of_parse_phandle_with_fixed_args(np, "gpio-ranges", 3, - ++i, &args)) - npins += args.args[2]; + /* get the last defined gpio line (offset + nb of pins) */ + npins = args.args[0] + args.args[2]; + while (!of_parse_phandle_with_fixed_args(np, "gpio-ranges", 3, ++i, &args)) + npins = max(npins, (int)(args.args[0] + args.args[2])); } else { bank_nr = pctl->nbanks; bank->gpio_chip.base = bank_nr * STM32_GPIO_PINS_PER_BANK; @@ -1645,8 +1645,8 @@ int __maybe_unused stm32_pinctrl_resume(struct device *dev) struct stm32_pinctrl_group *g = pctl->groups; int i; - for (i = g->pin; i < g->pin + pctl->ngroups; i++) - stm32_pinctrl_restore_gpio_regs(pctl, i); + for (i = 0; i < pctl->ngroups; i++, g++) + stm32_pinctrl_restore_gpio_regs(pctl, g->pin); return 0; } diff --git a/drivers/pinctrl/sunxi/pinctrl-sunxi.c b/drivers/pinctrl/sunxi/pinctrl-sunxi.c index e42a3a0005a72c562346e484b7b3ee307310ed06..be7f4f95f455daeb819746a5b64d4da1a8f7544b 100644 --- a/drivers/pinctrl/sunxi/pinctrl-sunxi.c +++ b/drivers/pinctrl/sunxi/pinctrl-sunxi.c @@ -36,6 +36,13 @@ #include "../core.h" #include "pinctrl-sunxi.h" +/* + * These lock classes tell lockdep that GPIO IRQs are in a different + * category than their parents, so it won't report false recursion. + */ +static struct lock_class_key sunxi_pinctrl_irq_lock_class; +static struct lock_class_key sunxi_pinctrl_irq_request_class; + static struct irq_chip sunxi_pinctrl_edge_irq_chip; static struct irq_chip sunxi_pinctrl_level_irq_chip; @@ -1552,6 +1559,8 @@ int sunxi_pinctrl_init_with_variant(struct platform_device *pdev, for (i = 0; i < (pctl->desc->irq_banks * IRQ_PER_BANK); i++) { int irqno = irq_create_mapping(pctl->domain, i); + irq_set_lockdep_class(irqno, &sunxi_pinctrl_irq_lock_class, + &sunxi_pinctrl_irq_request_class); irq_set_chip_and_handler(irqno, &sunxi_pinctrl_edge_irq_chip, handle_edge_irq); irq_set_chip_data(irqno, pctl); diff --git a/drivers/platform/mellanox/mlxreg-io.c b/drivers/platform/mellanox/mlxreg-io.c index 7646708d57e42bdc0ae57d2f0cdbf41bcc1a2fc2..a916cd89cbbeddf99272f26012a790d43ce08f7b 100644 --- a/drivers/platform/mellanox/mlxreg-io.c +++ b/drivers/platform/mellanox/mlxreg-io.c @@ -98,7 +98,7 @@ mlxreg_io_get_reg(void *regmap, struct mlxreg_core_data *data, u32 in_val, if (ret) goto access_error; - *regval |= rol32(val, regsize * i); + *regval |= rol32(val, regsize * i * 8); } } @@ -141,7 +141,7 @@ mlxreg_io_attr_store(struct device *dev, struct device_attribute *attr, return -EINVAL; /* Convert buffer to input value. */ - ret = kstrtou32(buf, len, &input_val); + ret = kstrtou32(buf, 0, &input_val); if (ret) return ret; diff --git a/drivers/platform/x86/apple-gmux.c b/drivers/platform/x86/apple-gmux.c index 9aae45a452002cf842d9c5fb100010a2f843a6df..57553f9b4d1dcdc2f5514dc5382573a8d1b081bb 100644 --- a/drivers/platform/x86/apple-gmux.c +++ b/drivers/platform/x86/apple-gmux.c @@ -625,7 +625,7 @@ static int gmux_probe(struct pnp_dev *pnp, const struct pnp_device_id *id) } gmux_data->iostart = res->start; - gmux_data->iolen = res->end - res->start; + gmux_data->iolen = resource_size(res); if (gmux_data->iolen < GMUX_MIN_IO_LEN) { pr_err("gmux I/O region too small (%lu < %u)\n", diff --git a/drivers/platform/x86/hp_accel.c b/drivers/platform/x86/hp_accel.c index 8c0867bda8280e3340f28f1ff821db03db0f3937..0dfaa1a43b6747643555b02fa9fdb88b5607d334 100644 --- a/drivers/platform/x86/hp_accel.c +++ b/drivers/platform/x86/hp_accel.c @@ -372,9 +372,11 @@ static int lis3lv02d_add(struct acpi_device *device) INIT_WORK(&hpled_led.work, delayed_set_status_worker); ret = led_classdev_register(NULL, &hpled_led.led_classdev); if (ret) { + i8042_remove_filter(hp_accel_i8042_filter); lis3lv02d_joystick_disable(&lis3_dev); lis3lv02d_poweroff(&lis3_dev); flush_work(&hpled_led.work); + lis3lv02d_remove_fs(&lis3_dev); return ret; } diff --git a/drivers/platform/x86/intel_pmc_core_pltdrv.c b/drivers/platform/x86/intel_pmc_core_pltdrv.c index 73797680b895c370e7b1cf043010dc1f1ab53d4f..15ca8afdd973de2a2b0c7060e8b26b4beb2951bf 100644 --- a/drivers/platform/x86/intel_pmc_core_pltdrv.c +++ b/drivers/platform/x86/intel_pmc_core_pltdrv.c @@ -65,7 +65,7 @@ static int __init pmc_core_platform_init(void) retval = platform_device_register(pmc_core_device); if (retval) - kfree(pmc_core_device); + platform_device_put(pmc_core_device); return retval; } diff --git a/drivers/platform/x86/intel_scu_ipc.c b/drivers/platform/x86/intel_scu_ipc.c index d9cf7f7602b0b4e186b6ebf193e88ea416366941..69d706039cb205bad26ff6d140dbc65b23894c36 100644 --- a/drivers/platform/x86/intel_scu_ipc.c +++ b/drivers/platform/x86/intel_scu_ipc.c @@ -232,7 +232,7 @@ static inline u32 ipc_data_readl(struct intel_scu_ipc_dev *scu, u32 offset) /* Wait till scu status is busy */ static inline int busy_loop(struct intel_scu_ipc_dev *scu) { - unsigned long end = jiffies + msecs_to_jiffies(IPC_TIMEOUT); + unsigned long end = jiffies + IPC_TIMEOUT; do { u32 status; @@ -247,7 +247,7 @@ static inline int busy_loop(struct intel_scu_ipc_dev *scu) return -ETIMEDOUT; } -/* Wait till ipc ioc interrupt is received or timeout in 3 HZ */ +/* Wait till ipc ioc interrupt is received or timeout in 10 HZ */ static inline int ipc_wait_for_interrupt(struct intel_scu_ipc_dev *scu) { int status; diff --git a/drivers/platform/x86/intel_speed_select_if/isst_if_common.c b/drivers/platform/x86/intel_speed_select_if/isst_if_common.c index 0c2aa22c7a12eaf36d9670dc450868fbc64b4543..407afafc7e83f0592e7e96a992974c462e43af42 100644 --- a/drivers/platform/x86/intel_speed_select_if/isst_if_common.c +++ b/drivers/platform/x86/intel_speed_select_if/isst_if_common.c @@ -532,7 +532,10 @@ static long isst_if_def_ioctl(struct file *file, unsigned int cmd, return ret; } -static DEFINE_MUTEX(punit_misc_dev_lock); +/* Lock to prevent module registration when already opened by user space */ +static DEFINE_MUTEX(punit_misc_dev_open_lock); +/* Lock to allow one share misc device for all ISST interace */ +static DEFINE_MUTEX(punit_misc_dev_reg_lock); static int misc_usage_count; static int misc_device_ret; static int misc_device_open; @@ -542,7 +545,7 @@ static int isst_if_open(struct inode *inode, struct file *file) int i, ret = 0; /* Fail open, if a module is going away */ - mutex_lock(&punit_misc_dev_lock); + mutex_lock(&punit_misc_dev_open_lock); for (i = 0; i < ISST_IF_DEV_MAX; ++i) { struct isst_if_cmd_cb *cb = &punit_callbacks[i]; @@ -564,7 +567,7 @@ static int isst_if_open(struct inode *inode, struct file *file) } else { misc_device_open++; } - mutex_unlock(&punit_misc_dev_lock); + mutex_unlock(&punit_misc_dev_open_lock); return ret; } @@ -573,7 +576,7 @@ static int isst_if_relase(struct inode *inode, struct file *f) { int i; - mutex_lock(&punit_misc_dev_lock); + mutex_lock(&punit_misc_dev_open_lock); misc_device_open--; for (i = 0; i < ISST_IF_DEV_MAX; ++i) { struct isst_if_cmd_cb *cb = &punit_callbacks[i]; @@ -581,7 +584,7 @@ static int isst_if_relase(struct inode *inode, struct file *f) if (cb->registered) module_put(cb->owner); } - mutex_unlock(&punit_misc_dev_lock); + mutex_unlock(&punit_misc_dev_open_lock); return 0; } @@ -598,6 +601,43 @@ static struct miscdevice isst_if_char_driver = { .fops = &isst_if_char_driver_ops, }; +static int isst_misc_reg(void) +{ + mutex_lock(&punit_misc_dev_reg_lock); + if (misc_device_ret) + goto unlock_exit; + + if (!misc_usage_count) { + misc_device_ret = isst_if_cpu_info_init(); + if (misc_device_ret) + goto unlock_exit; + + misc_device_ret = misc_register(&isst_if_char_driver); + if (misc_device_ret) { + isst_if_cpu_info_exit(); + goto unlock_exit; + } + } + misc_usage_count++; + +unlock_exit: + mutex_unlock(&punit_misc_dev_reg_lock); + + return misc_device_ret; +} + +static void isst_misc_unreg(void) +{ + mutex_lock(&punit_misc_dev_reg_lock); + if (misc_usage_count) + misc_usage_count--; + if (!misc_usage_count && !misc_device_ret) { + misc_deregister(&isst_if_char_driver); + isst_if_cpu_info_exit(); + } + mutex_unlock(&punit_misc_dev_reg_lock); +} + /** * isst_if_cdev_register() - Register callback for IOCTL * @device_type: The device type this callback handling. @@ -615,38 +655,31 @@ static struct miscdevice isst_if_char_driver = { */ int isst_if_cdev_register(int device_type, struct isst_if_cmd_cb *cb) { - if (misc_device_ret) - return misc_device_ret; + int ret; if (device_type >= ISST_IF_DEV_MAX) return -EINVAL; - mutex_lock(&punit_misc_dev_lock); + mutex_lock(&punit_misc_dev_open_lock); + /* Device is already open, we don't want to add new callbacks */ if (misc_device_open) { - mutex_unlock(&punit_misc_dev_lock); + mutex_unlock(&punit_misc_dev_open_lock); return -EAGAIN; } - if (!misc_usage_count) { - int ret; - - misc_device_ret = misc_register(&isst_if_char_driver); - if (misc_device_ret) - goto unlock_exit; - - ret = isst_if_cpu_info_init(); - if (ret) { - misc_deregister(&isst_if_char_driver); - misc_device_ret = ret; - goto unlock_exit; - } - } memcpy(&punit_callbacks[device_type], cb, sizeof(*cb)); punit_callbacks[device_type].registered = 1; - misc_usage_count++; -unlock_exit: - mutex_unlock(&punit_misc_dev_lock); + mutex_unlock(&punit_misc_dev_open_lock); - return misc_device_ret; + ret = isst_misc_reg(); + if (ret) { + /* + * No need of mutex as the misc device register failed + * as no one can open device yet. Hence no contention. + */ + punit_callbacks[device_type].registered = 0; + return ret; + } + return 0; } EXPORT_SYMBOL_GPL(isst_if_cdev_register); @@ -661,16 +694,12 @@ EXPORT_SYMBOL_GPL(isst_if_cdev_register); */ void isst_if_cdev_unregister(int device_type) { - mutex_lock(&punit_misc_dev_lock); - misc_usage_count--; + isst_misc_unreg(); + mutex_lock(&punit_misc_dev_open_lock); punit_callbacks[device_type].registered = 0; if (device_type == ISST_IF_DEV_MBOX) isst_delete_hash(); - if (!misc_usage_count && !misc_device_ret) { - misc_deregister(&isst_if_char_driver); - isst_if_cpu_info_exit(); - } - mutex_unlock(&punit_misc_dev_lock); + mutex_unlock(&punit_misc_dev_open_lock); } EXPORT_SYMBOL_GPL(isst_if_cdev_unregister); diff --git a/drivers/platform/x86/surface3_power.c b/drivers/platform/x86/surface3_power.c index cc4f9cba68563c137c1c71785e8d6c26479ef2c6..01aacf1bee0749d63836c3553337871754c633a4 100644 --- a/drivers/platform/x86/surface3_power.c +++ b/drivers/platform/x86/surface3_power.c @@ -233,14 +233,21 @@ static int mshw0011_bix(struct mshw0011_data *cdata, struct bix *bix) } bix->last_full_charg_capacity = ret; - /* get serial number */ + /* + * Get serial number, on some devices (with unofficial replacement + * battery?) reading any of the serial number range addresses gets + * nacked in this case just leave the serial number empty. + */ ret = i2c_smbus_read_i2c_block_data(client, MSHW0011_BAT0_REG_SERIAL_NO, sizeof(buf), buf); - if (ret != sizeof(buf)) { + if (ret == -EREMOTEIO) { + /* no serial number available */ + } else if (ret != sizeof(buf)) { dev_err(&client->dev, "Error reading serial no: %d\n", ret); return ret; + } else { + snprintf(bix->serial, ARRAY_SIZE(bix->serial), "%3pE%6pE", buf + 7, buf); } - snprintf(bix->serial, ARRAY_SIZE(bix->serial), "%3pE%6pE", buf + 7, buf); /* get cycle count */ ret = i2c_smbus_read_word_data(client, MSHW0011_BAT0_REG_CYCLE_CNT); diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 5c2f2e337b57b3fac7a45563f265770c260172bc..d8d241344d22dae092110898c1911fcdbedebf47 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -1170,15 +1170,6 @@ static int tpacpi_rfk_update_swstate(const struct tpacpi_rfk *tp_rfk) return status; } -/* Query FW and update rfkill sw state for all rfkill switches */ -static void tpacpi_rfk_update_swstate_all(void) -{ - unsigned int i; - - for (i = 0; i < TPACPI_RFK_SW_MAX; i++) - tpacpi_rfk_update_swstate(tpacpi_rfkill_switches[i]); -} - /* * Sync the HW-blocking state of all rfkill switches, * do notice it causes the rfkill core to schedule uevents @@ -3121,9 +3112,6 @@ static void tpacpi_send_radiosw_update(void) if (wlsw == TPACPI_RFK_RADIO_OFF) tpacpi_rfk_update_hwblock_state(true); - /* Sync sw blocking state */ - tpacpi_rfk_update_swstate_all(); - /* Sync hw blocking state last if it is hw-unblocked */ if (wlsw == TPACPI_RFK_RADIO_ON) tpacpi_rfk_update_hwblock_state(false); @@ -8805,6 +8793,7 @@ static const struct tpacpi_quirk fan_quirk_table[] __initconst = { TPACPI_Q_LNV3('N', '2', 'E', TPACPI_FAN_2CTL), /* P1 / X1 Extreme (1st gen) */ TPACPI_Q_LNV3('N', '2', 'O', TPACPI_FAN_2CTL), /* P1 / X1 Extreme (2nd gen) */ TPACPI_Q_LNV3('N', '2', 'V', TPACPI_FAN_2CTL), /* P1 / X1 Extreme (3nd gen) */ + TPACPI_Q_LNV3('N', '4', '0', TPACPI_FAN_2CTL), /* P1 / X1 Extreme (4nd gen) */ TPACPI_Q_LNV3('N', '3', '0', TPACPI_FAN_2CTL), /* P15 (1st gen) / P15v (1st gen) */ TPACPI_Q_LNV3('N', '3', '2', TPACPI_FAN_2CTL), /* X1 Carbon (9th gen) */ }; @@ -9097,7 +9086,7 @@ static int fan_write_cmd_level(const char *cmd, int *rc) if (strlencmp(cmd, "level auto") == 0) level = TP_EC_FAN_AUTO; - else if ((strlencmp(cmd, "level disengaged") == 0) | + else if ((strlencmp(cmd, "level disengaged") == 0) || (strlencmp(cmd, "level full-speed") == 0)) level = TP_EC_FAN_FULLSPEED; else if (sscanf(cmd, "level %d", &level) != 1) diff --git a/drivers/platform/x86/touchscreen_dmi.c b/drivers/platform/x86/touchscreen_dmi.c index 99260915122c00e101eda9efbaaf1bf34ef1a4a8..ab6a9369649dba450c19fd14324f45b68838b3b3 100644 --- a/drivers/platform/x86/touchscreen_dmi.c +++ b/drivers/platform/x86/touchscreen_dmi.c @@ -100,10 +100,10 @@ static const struct ts_dmi_data chuwi_hi10_air_data = { }; static const struct property_entry chuwi_hi10_plus_props[] = { - PROPERTY_ENTRY_U32("touchscreen-min-x", 0), - PROPERTY_ENTRY_U32("touchscreen-min-y", 5), - PROPERTY_ENTRY_U32("touchscreen-size-x", 1914), - PROPERTY_ENTRY_U32("touchscreen-size-y", 1283), + PROPERTY_ENTRY_U32("touchscreen-min-x", 12), + PROPERTY_ENTRY_U32("touchscreen-min-y", 10), + PROPERTY_ENTRY_U32("touchscreen-size-x", 1908), + PROPERTY_ENTRY_U32("touchscreen-size-y", 1270), PROPERTY_ENTRY_STRING("firmware-name", "gsl1680-chuwi-hi10plus.fw"), PROPERTY_ENTRY_U32("silead,max-fingers", 10), PROPERTY_ENTRY_BOOL("silead,home-button"), @@ -111,6 +111,15 @@ static const struct property_entry chuwi_hi10_plus_props[] = { }; static const struct ts_dmi_data chuwi_hi10_plus_data = { + .embedded_fw = { + .name = "silead/gsl1680-chuwi-hi10plus.fw", + .prefix = { 0xf0, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00 }, + .length = 34056, + .sha256 = { 0xfd, 0x0a, 0x08, 0x08, 0x3c, 0xa6, 0x34, 0x4e, + 0x2c, 0x49, 0x9c, 0xcd, 0x7d, 0x44, 0x9d, 0x38, + 0x10, 0x68, 0xb5, 0xbd, 0xb7, 0x2a, 0x63, 0xb5, + 0x67, 0x0b, 0x96, 0xbd, 0x89, 0x67, 0x85, 0x09 }, + }, .acpi_name = "MSSL0017:00", .properties = chuwi_hi10_plus_props, }; @@ -141,6 +150,33 @@ static const struct ts_dmi_data chuwi_hi10_pro_data = { .properties = chuwi_hi10_pro_props, }; +static const struct property_entry chuwi_hibook_props[] = { + PROPERTY_ENTRY_U32("touchscreen-min-x", 30), + PROPERTY_ENTRY_U32("touchscreen-min-y", 4), + PROPERTY_ENTRY_U32("touchscreen-size-x", 1892), + PROPERTY_ENTRY_U32("touchscreen-size-y", 1276), + PROPERTY_ENTRY_BOOL("touchscreen-inverted-y"), + PROPERTY_ENTRY_BOOL("touchscreen-swapped-x-y"), + PROPERTY_ENTRY_STRING("firmware-name", "gsl1680-chuwi-hibook.fw"), + PROPERTY_ENTRY_U32("silead,max-fingers", 10), + PROPERTY_ENTRY_BOOL("silead,home-button"), + { } +}; + +static const struct ts_dmi_data chuwi_hibook_data = { + .embedded_fw = { + .name = "silead/gsl1680-chuwi-hibook.fw", + .prefix = { 0xf0, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00 }, + .length = 40392, + .sha256 = { 0xf7, 0xc0, 0xe8, 0x5a, 0x6c, 0xf2, 0xeb, 0x8d, + 0x12, 0xc4, 0x45, 0xbf, 0x55, 0x13, 0x4c, 0x1a, + 0x13, 0x04, 0x31, 0x08, 0x65, 0x73, 0xf7, 0xa8, + 0x1b, 0x7d, 0x59, 0xc9, 0xe6, 0x97, 0xf7, 0x38 }, + }, + .acpi_name = "MSSL0017:00", + .properties = chuwi_hibook_props, +}; + static const struct property_entry chuwi_vi8_props[] = { PROPERTY_ENTRY_U32("touchscreen-min-x", 4), PROPERTY_ENTRY_U32("touchscreen-min-y", 6), @@ -720,6 +756,21 @@ static const struct ts_dmi_data predia_basic_data = { .properties = predia_basic_props, }; +static const struct property_entry rwc_nanote_p8_props[] = { + PROPERTY_ENTRY_U32("touchscreen-min-y", 46), + PROPERTY_ENTRY_U32("touchscreen-size-x", 1728), + PROPERTY_ENTRY_U32("touchscreen-size-y", 1140), + PROPERTY_ENTRY_BOOL("touchscreen-inverted-y"), + PROPERTY_ENTRY_STRING("firmware-name", "gsl1680-rwc-nanote-p8.fw"), + PROPERTY_ENTRY_U32("silead,max-fingers", 10), + { } +}; + +static const struct ts_dmi_data rwc_nanote_p8_data = { + .acpi_name = "MSSL1680:00", + .properties = rwc_nanote_p8_props, +}; + static const struct property_entry schneider_sct101ctm_props[] = { PROPERTY_ENTRY_U32("touchscreen-size-x", 1715), PROPERTY_ENTRY_U32("touchscreen-size-y", 1140), @@ -936,6 +987,16 @@ const struct dmi_system_id touchscreen_dmi_table[] = { DMI_MATCH(DMI_BOARD_NAME, "Cherry Trail CR"), }, }, + { + /* Chuwi HiBook (CWI514) */ + .driver_data = (void *)&chuwi_hibook_data, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Hampoo"), + DMI_MATCH(DMI_BOARD_NAME, "Cherry Trail CR"), + /* Above matches are too generic, add bios-date match */ + DMI_MATCH(DMI_BIOS_DATE, "05/07/2016"), + }, + }, { /* Chuwi Vi8 (CWI506) */ .driver_data = (void *)&chuwi_vi8_data, @@ -1280,6 +1341,15 @@ const struct dmi_system_id touchscreen_dmi_table[] = { DMI_EXACT_MATCH(DMI_BOARD_NAME, "0E57"), }, }, + { + /* RWC NANOTE P8 */ + .driver_data = (void *)&rwc_nanote_p8_data, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "Default string"), + DMI_MATCH(DMI_PRODUCT_NAME, "AY07J"), + DMI_MATCH(DMI_PRODUCT_SKU, "0001") + }, + }, { /* Schneider SCT101CTM */ .driver_data = (void *)&schneider_sct101ctm_data, diff --git a/drivers/platform/x86/wmi.c b/drivers/platform/x86/wmi.c index d88f388a3450fa4bd61b5e4042ff3e30c71111d8..1f80b2628162822fb2df03ef8c4fd2d9dd00f2ef 100644 --- a/drivers/platform/x86/wmi.c +++ b/drivers/platform/x86/wmi.c @@ -354,7 +354,14 @@ static acpi_status __query_block(struct wmi_block *wblock, u8 instance, * the WQxx method failed - we should disable collection anyway. */ if ((block->flags & ACPI_WMI_EXPENSIVE) && ACPI_SUCCESS(wc_status)) { - status = acpi_execute_simple_method(handle, wc_method, 0); + /* + * Ignore whether this WCxx call succeeds or not since + * the previously executed WQxx method call might have + * succeeded, and returning the failing status code + * of this call would throw away the result of the WQxx + * call, potentially leaking memory. + */ + acpi_execute_simple_method(handle, wc_method, 0); } return status; diff --git a/drivers/power/reset/ltc2952-poweroff.c b/drivers/power/reset/ltc2952-poweroff.c index 318927938b05b47a1a45af8ef03158fdd348e31e..d3844ae9eca4ab4fc281730756a367349a8e7d3e 100644 --- a/drivers/power/reset/ltc2952-poweroff.c +++ b/drivers/power/reset/ltc2952-poweroff.c @@ -159,8 +159,8 @@ static void ltc2952_poweroff_kill(void) static void ltc2952_poweroff_default(struct ltc2952_poweroff *data) { - data->wde_interval = 300L * 1E6L; - data->trigger_delay = ktime_set(2, 500L*1E6L); + data->wde_interval = 300L * NSEC_PER_MSEC; + data->trigger_delay = ktime_set(2, 500L * NSEC_PER_MSEC); hrtimer_init(&data->timer_trigger, CLOCK_MONOTONIC, HRTIMER_MODE_REL); data->timer_trigger.function = ltc2952_poweroff_timer_trigger; diff --git a/drivers/power/reset/mt6323-poweroff.c b/drivers/power/reset/mt6323-poweroff.c index 0532803e6cbc40225f8151d1716279d899d166a5..d90e76fcb938384a6f3e85acc920bb5394799b42 100644 --- a/drivers/power/reset/mt6323-poweroff.c +++ b/drivers/power/reset/mt6323-poweroff.c @@ -57,6 +57,9 @@ static int mt6323_pwrc_probe(struct platform_device *pdev) return -ENOMEM; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) + return -EINVAL; + pwrc->base = res->start; pwrc->regmap = mt6397_chip->regmap; pwrc->dev = &pdev->dev; diff --git a/drivers/power/supply/bq25890_charger.c b/drivers/power/supply/bq25890_charger.c index 945c3257ca93178c70fdca29f40a7b5be623def2..fe814805c68b541d0921d9243e414356e6b3a31e 100644 --- a/drivers/power/supply/bq25890_charger.c +++ b/drivers/power/supply/bq25890_charger.c @@ -581,12 +581,12 @@ static irqreturn_t __bq25890_handle_irq(struct bq25890_device *bq) if (!new_state.online && bq->state.online) { /* power removed */ /* disable ADC */ - ret = bq25890_field_write(bq, F_CONV_START, 0); + ret = bq25890_field_write(bq, F_CONV_RATE, 0); if (ret < 0) goto error; } else if (new_state.online && !bq->state.online) { /* power inserted */ /* enable ADC, to have control of charge current/voltage */ - ret = bq25890_field_write(bq, F_CONV_START, 1); + ret = bq25890_field_write(bq, F_CONV_RATE, 1); if (ret < 0) goto error; } diff --git a/drivers/power/supply/bq27xxx_battery_i2c.c b/drivers/power/supply/bq27xxx_battery_i2c.c index eb4f4284982fa9eeefac3184ad1b0bf5996be10f..3012eb13a08cb725d665fbccb43a6a5d481822e3 100644 --- a/drivers/power/supply/bq27xxx_battery_i2c.c +++ b/drivers/power/supply/bq27xxx_battery_i2c.c @@ -187,7 +187,8 @@ static int bq27xxx_battery_i2c_probe(struct i2c_client *client, dev_err(&client->dev, "Unable to register IRQ %d error %d\n", client->irq, ret); - return ret; + bq27xxx_battery_teardown(di); + goto err_failed; } } diff --git a/drivers/power/supply/max17040_battery.c b/drivers/power/supply/max17040_battery.c index d956c67d515586eef043625f1e7dc96807da58f3..b6b29ec3d93ec938f571ff7c7a12576a26d2b531 100644 --- a/drivers/power/supply/max17040_battery.c +++ b/drivers/power/supply/max17040_battery.c @@ -482,6 +482,8 @@ static int max17040_probe(struct i2c_client *client, chip->client = client; chip->regmap = devm_regmap_init_i2c(client, &max17040_regmap); chip->pdata = client->dev.platform_data; + if (IS_ERR(chip->regmap)) + return PTR_ERR(chip->regmap); chip_id = (enum chip_id) id->driver_data; if (client->dev.of_node) { ret = max17040_get_of_data(chip); diff --git a/drivers/power/supply/max17042_battery.c b/drivers/power/supply/max17042_battery.c index 69bb0f56e492a2750a460c29ef529901eb246d1a..76b0f45a20b409762b5e996e7194b0e5be6d157b 100644 --- a/drivers/power/supply/max17042_battery.c +++ b/drivers/power/supply/max17042_battery.c @@ -316,7 +316,10 @@ static int max17042_get_property(struct power_supply *psy, val->intval = data * 625 / 8; break; case POWER_SUPPLY_PROP_CAPACITY: - ret = regmap_read(map, MAX17042_RepSOC, &data); + if (chip->pdata->enable_current_sense) + ret = regmap_read(map, MAX17042_RepSOC, &data); + else + ret = regmap_read(map, MAX17042_VFSOC, &data); if (ret < 0) return ret; @@ -851,7 +854,8 @@ static void max17042_set_soc_threshold(struct max17042_chip *chip, u16 off) regmap_read(map, MAX17042_RepSOC, &soc); soc >>= 8; soc_tr = (soc + off) << 8; - soc_tr |= (soc - off); + if (off < soc) + soc_tr |= soc - off; regmap_write(map, MAX17042_SALRT_Th, soc_tr); } @@ -871,6 +875,10 @@ static irqreturn_t max17042_thread_handler(int id, void *dev) max17042_set_soc_threshold(chip, 1); } + /* we implicitly handle all alerts via power_supply_changed */ + regmap_clear_bits(chip->regmap, MAX17042_STATUS, + 0xFFFF & ~(STATUS_POR_BIT | STATUS_BST_BIT)); + power_supply_changed(chip->battery); return IRQ_HANDLED; } diff --git a/drivers/power/supply/power_supply_core.c b/drivers/power/supply/power_supply_core.c index 668369b26f99a863dc3e0596be55af4b7f68869f..2fe31ea5ae5343dadf5d1e0dfedc7b9bdd6cd562 100644 --- a/drivers/power/supply/power_supply_core.c +++ b/drivers/power/supply/power_supply_core.c @@ -912,6 +912,10 @@ power_supply_find_ocv2cap_table(struct power_supply_battery_info *info, return NULL; for (i = 0; i < POWER_SUPPLY_OCV_TEMP_MAX; i++) { + /* Out of capacity tables */ + if (!info->ocv_table[i]) + break; + temp_diff = abs(info->ocv_temp[i] - temp); if (temp_diff < best_temp_diff) { diff --git a/drivers/power/supply/rt5033_battery.c b/drivers/power/supply/rt5033_battery.c index 9ad0afe83d1b785838b79a3eed293ecb613cb2be..7a23c70f48791b119048d3c76f358ce12f3edc4f 100644 --- a/drivers/power/supply/rt5033_battery.c +++ b/drivers/power/supply/rt5033_battery.c @@ -60,7 +60,7 @@ static int rt5033_battery_get_watt_prop(struct i2c_client *client, regmap_read(battery->regmap, regh, &msb); regmap_read(battery->regmap, regl, &lsb); - ret = ((msb << 4) + (lsb >> 4)) * 1250 / 1000; + ret = ((msb << 4) + (lsb >> 4)) * 1250; return ret; } diff --git a/drivers/ptp/ptp_pch.c b/drivers/ptp/ptp_pch.c index ce10ecd41ba0ff497cdf137ac3a8b9686b559719..9492ed09518ff078a225d99a88b1c2d23acb29fc 100644 --- a/drivers/ptp/ptp_pch.c +++ b/drivers/ptp/ptp_pch.c @@ -651,6 +651,7 @@ static const struct pci_device_id pch_ieee1588_pcidev_id[] = { }, {0} }; +MODULE_DEVICE_TABLE(pci, pch_ieee1588_pcidev_id); static SIMPLE_DEV_PM_OPS(pch_pm_ops, pch_suspend, pch_resume); diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 043b5f63b94a167b8641dfbc92699b23cd89a1b0..2c48e55c4104e72485a5ef3e777056876418caf2 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -5862,9 +5862,8 @@ core_initcall(regulator_init); static int regulator_late_cleanup(struct device *dev, void *data) { struct regulator_dev *rdev = dev_to_rdev(dev); - const struct regulator_ops *ops = rdev->desc->ops; struct regulation_constraints *c = rdev->constraints; - int enabled, ret; + int ret; if (c && c->always_on) return 0; @@ -5877,14 +5876,8 @@ static int regulator_late_cleanup(struct device *dev, void *data) if (rdev->use_count) goto unlock; - /* If we can't read the status assume it's always on. */ - if (ops->is_enabled) - enabled = ops->is_enabled(rdev); - else - enabled = 1; - - /* But if reading the status failed, assume that it's off. */ - if (enabled <= 0) + /* If reading the status failed, assume that it's off. */ + if (_regulator_is_enabled(rdev) <= 0) goto unlock; if (have_full_constraints()) { diff --git a/drivers/regulator/qcom_smd-regulator.c b/drivers/regulator/qcom_smd-regulator.c index bb944ee5fe3b174bcdc1d3f12e444e1611ebf270..03e146e98abd5eadb82b213824fba5a252f700b8 100644 --- a/drivers/regulator/qcom_smd-regulator.c +++ b/drivers/regulator/qcom_smd-regulator.c @@ -9,6 +9,7 @@ #include #include #include +#include #include struct qcom_rpm_reg { @@ -1107,52 +1108,91 @@ static const struct of_device_id rpm_of_match[] = { }; MODULE_DEVICE_TABLE(of, rpm_of_match); -static int rpm_reg_probe(struct platform_device *pdev) +/** + * rpm_regulator_init_vreg() - initialize all attributes of a qcom_smd-regulator + * @vreg: Pointer to the individual qcom_smd-regulator resource + * @dev: Pointer to the top level qcom_smd-regulator PMIC device + * @node: Pointer to the individual qcom_smd-regulator resource + * device node + * @rpm: Pointer to the rpm bus node + * @pmic_rpm_data: Pointer to a null-terminated array of qcom_smd-regulator + * resources defined for the top level PMIC device + * + * Return: 0 on success, errno on failure + */ +static int rpm_regulator_init_vreg(struct qcom_rpm_reg *vreg, struct device *dev, + struct device_node *node, struct qcom_smd_rpm *rpm, + const struct rpm_regulator_data *pmic_rpm_data) { - const struct rpm_regulator_data *reg; - const struct of_device_id *match; - struct regulator_config config = { }; + struct regulator_config config = {}; + const struct rpm_regulator_data *rpm_data; struct regulator_dev *rdev; + int ret; + + for (rpm_data = pmic_rpm_data; rpm_data->name; rpm_data++) + if (of_node_name_eq(node, rpm_data->name)) + break; + + if (!rpm_data->name) { + dev_err(dev, "Unknown regulator %pOFn\n", node); + return -EINVAL; + } + + vreg->dev = dev; + vreg->rpm = rpm; + vreg->type = rpm_data->type; + vreg->id = rpm_data->id; + + memcpy(&vreg->desc, rpm_data->desc, sizeof(vreg->desc)); + vreg->desc.name = rpm_data->name; + vreg->desc.supply_name = rpm_data->supply; + vreg->desc.owner = THIS_MODULE; + vreg->desc.type = REGULATOR_VOLTAGE; + vreg->desc.of_match = rpm_data->name; + + config.dev = dev; + config.of_node = node; + config.driver_data = vreg; + + rdev = devm_regulator_register(dev, &vreg->desc, &config); + if (IS_ERR(rdev)) { + ret = PTR_ERR(rdev); + dev_err(dev, "%pOFn: devm_regulator_register() failed, ret=%d\n", node, ret); + return ret; + } + + return 0; +} + +static int rpm_reg_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + const struct rpm_regulator_data *vreg_data; + struct device_node *node; struct qcom_rpm_reg *vreg; struct qcom_smd_rpm *rpm; + int ret; rpm = dev_get_drvdata(pdev->dev.parent); if (!rpm) { - dev_err(&pdev->dev, "unable to retrieve handle to rpm\n"); + dev_err(&pdev->dev, "Unable to retrieve handle to rpm\n"); return -ENODEV; } - match = of_match_device(rpm_of_match, &pdev->dev); - if (!match) { - dev_err(&pdev->dev, "failed to match device\n"); + vreg_data = of_device_get_match_data(dev); + if (!vreg_data) return -ENODEV; - } - for (reg = match->data; reg->name; reg++) { + for_each_available_child_of_node(dev->of_node, node) { vreg = devm_kzalloc(&pdev->dev, sizeof(*vreg), GFP_KERNEL); if (!vreg) return -ENOMEM; - vreg->dev = &pdev->dev; - vreg->type = reg->type; - vreg->id = reg->id; - vreg->rpm = rpm; - - memcpy(&vreg->desc, reg->desc, sizeof(vreg->desc)); - - vreg->desc.id = -1; - vreg->desc.owner = THIS_MODULE; - vreg->desc.type = REGULATOR_VOLTAGE; - vreg->desc.name = reg->name; - vreg->desc.supply_name = reg->supply; - vreg->desc.of_match = reg->name; - - config.dev = &pdev->dev; - config.driver_data = vreg; - rdev = devm_regulator_register(&pdev->dev, &vreg->desc, &config); - if (IS_ERR(rdev)) { - dev_err(&pdev->dev, "failed to register %s\n", reg->name); - return PTR_ERR(rdev); + ret = rpm_regulator_init_vreg(vreg, dev, node, rpm, vreg_data); + + if (ret < 0) { + of_node_put(node); + return ret; } } diff --git a/drivers/regulator/s5m8767.c b/drivers/regulator/s5m8767.c index 7c111bbdc2afa89abefb5a30145020e77cb0e37c..35269f998210534ecc77c9432eb035cc8d174bc7 100644 --- a/drivers/regulator/s5m8767.c +++ b/drivers/regulator/s5m8767.c @@ -850,18 +850,15 @@ static int s5m8767_pmic_probe(struct platform_device *pdev) /* DS4 GPIO */ gpio_direction_output(pdata->buck_ds[2], 0x0); - if (pdata->buck2_gpiodvs || pdata->buck3_gpiodvs || - pdata->buck4_gpiodvs) { - regmap_update_bits(s5m8767->iodev->regmap_pmic, - S5M8767_REG_BUCK2CTRL, 1 << 1, - (pdata->buck2_gpiodvs) ? (1 << 1) : (0 << 1)); - regmap_update_bits(s5m8767->iodev->regmap_pmic, - S5M8767_REG_BUCK3CTRL, 1 << 1, - (pdata->buck3_gpiodvs) ? (1 << 1) : (0 << 1)); - regmap_update_bits(s5m8767->iodev->regmap_pmic, - S5M8767_REG_BUCK4CTRL, 1 << 1, - (pdata->buck4_gpiodvs) ? (1 << 1) : (0 << 1)); - } + regmap_update_bits(s5m8767->iodev->regmap_pmic, + S5M8767_REG_BUCK2CTRL, 1 << 1, + (pdata->buck2_gpiodvs) ? (1 << 1) : (0 << 1)); + regmap_update_bits(s5m8767->iodev->regmap_pmic, + S5M8767_REG_BUCK3CTRL, 1 << 1, + (pdata->buck3_gpiodvs) ? (1 << 1) : (0 << 1)); + regmap_update_bits(s5m8767->iodev->regmap_pmic, + S5M8767_REG_BUCK4CTRL, 1 << 1, + (pdata->buck4_gpiodvs) ? (1 << 1) : (0 << 1)); /* Initialize GPIO DVS registers */ for (i = 0; i < 8; i++) { diff --git a/drivers/remoteproc/qcom_pil_info.c b/drivers/remoteproc/qcom_pil_info.c index 7c007dd7b2000d65d118c8b493ddbca593659e5a..aca21560e20b8ae07fc9c1048b311fad67ff4d7f 100644 --- a/drivers/remoteproc/qcom_pil_info.c +++ b/drivers/remoteproc/qcom_pil_info.c @@ -104,7 +104,7 @@ int qcom_pil_info_store(const char *image, phys_addr_t base, size_t size) return -ENOMEM; found_unused: - memcpy_toio(entry, image, PIL_RELOC_NAME_LEN); + memcpy_toio(entry, image, strnlen(image, PIL_RELOC_NAME_LEN)); found_existing: /* Use two writel() as base is only aligned to 4 bytes on odd entries */ writel(base, entry + PIL_RELOC_NAME_LEN); diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c index 3cbb67494c44ad521ad98a4e6c2d65ec806e1b13..bd9ccf9b6bd8b3ae9dd172b86c9abd6c29558db7 100644 --- a/drivers/remoteproc/remoteproc_core.c +++ b/drivers/remoteproc/remoteproc_core.c @@ -555,9 +555,6 @@ static int rproc_handle_vdev(struct rproc *rproc, void *ptr, /* Initialise vdev subdevice */ snprintf(name, sizeof(name), "vdev%dbuffer", rvdev->index); rvdev->dev.parent = &rproc->dev; - ret = copy_dma_range_map(&rvdev->dev, rproc->dev.parent); - if (ret) - return ret; rvdev->dev.release = rproc_rvdev_release; dev_set_name(&rvdev->dev, "%s#%s", dev_name(rvdev->dev.parent), name); dev_set_drvdata(&rvdev->dev, rvdev); @@ -567,6 +564,11 @@ static int rproc_handle_vdev(struct rproc *rproc, void *ptr, put_device(&rvdev->dev); return ret; } + + ret = copy_dma_range_map(&rvdev->dev, rproc->dev.parent); + if (ret) + goto free_rvdev; + /* Make device dma capable by inheriting from parent's capabilities */ set_dma_ops(&rvdev->dev, get_dma_ops(rproc->dev.parent)); diff --git a/drivers/remoteproc/remoteproc_coredump.c b/drivers/remoteproc/remoteproc_coredump.c index aa45b68c224b15a5f2ebe67b3f0dba0a69284ca0..8ed597a2eef373ab0240d1c523db2a38b5bc227f 100644 --- a/drivers/remoteproc/remoteproc_coredump.c +++ b/drivers/remoteproc/remoteproc_coredump.c @@ -153,8 +153,8 @@ static void rproc_copy_segment(struct rproc *rproc, void *dest, struct rproc_dump_segment *segment, size_t offset, size_t size) { + bool is_iomem = false; void *ptr; - bool is_iomem; if (segment->dump) { segment->dump(rproc, segment, dest, offset, size); diff --git a/drivers/remoteproc/remoteproc_elf_loader.c b/drivers/remoteproc/remoteproc_elf_loader.c index 11423588965abb40198c4d2a5a6d172d38605cc0..3a5d66b6c6e4614d45293fba7ee174de3ed9330e 100644 --- a/drivers/remoteproc/remoteproc_elf_loader.c +++ b/drivers/remoteproc/remoteproc_elf_loader.c @@ -174,8 +174,8 @@ int rproc_elf_load_segments(struct rproc *rproc, const struct firmware *fw) u64 filesz = elf_phdr_get_p_filesz(class, phdr); u64 offset = elf_phdr_get_p_offset(class, phdr); u32 type = elf_phdr_get_p_type(class, phdr); + bool is_iomem = false; void *ptr; - bool is_iomem; if (type != PT_LOAD) continue; @@ -216,7 +216,7 @@ int rproc_elf_load_segments(struct rproc *rproc, const struct firmware *fw) /* put the segment where the remote processor expects it */ if (filesz) { if (is_iomem) - memcpy_fromio(ptr, (void __iomem *)(elf_data + offset), filesz); + memcpy_toio((void __iomem *)ptr, elf_data + offset, filesz); else memcpy(ptr, elf_data + offset, filesz); } diff --git a/drivers/reset/reset-brcmstb-rescal.c b/drivers/reset/reset-brcmstb-rescal.c index b6f074d6a65f8c34686712c96a150a32dc9b3386..433fa0c40e477f8770229aeeb48c7c90a63c222e 100644 --- a/drivers/reset/reset-brcmstb-rescal.c +++ b/drivers/reset/reset-brcmstb-rescal.c @@ -38,7 +38,7 @@ static int brcm_rescal_reset_set(struct reset_controller_dev *rcdev, } ret = readl_poll_timeout(base + BRCM_RESCAL_STATUS, reg, - !(reg & BRCM_RESCAL_STATUS_BIT), 100, 1000); + (reg & BRCM_RESCAL_STATUS_BIT), 100, 1000); if (ret) { dev_err(data->dev, "time out on SATA/PCIe rescal\n"); return ret; diff --git a/drivers/reset/reset-socfpga.c b/drivers/reset/reset-socfpga.c index bdd98429619609afa438285469de8ac6b491dc35..f9fa7fde7afb1e12ef2b28b9ba80ac7b3a3ce978 100644 --- a/drivers/reset/reset-socfpga.c +++ b/drivers/reset/reset-socfpga.c @@ -85,3 +85,29 @@ void __init socfpga_reset_init(void) for_each_matching_node(np, socfpga_early_reset_dt_ids) a10_reset_init(np); } + +/* + * The early driver is problematic, because it doesn't register + * itself as a driver. This causes certain device links to prevent + * consumer devices from probing. The hacky solution is to register + * an empty driver, whose only job is to attach itself to the reset + * manager and call probe. + */ +static const struct of_device_id socfpga_reset_dt_ids[] = { + { .compatible = "altr,rst-mgr", }, + { /* sentinel */ }, +}; + +static int reset_simple_probe(struct platform_device *pdev) +{ + return 0; +} + +static struct platform_driver reset_socfpga_driver = { + .probe = reset_simple_probe, + .driver = { + .name = "socfpga-reset", + .of_match_table = socfpga_reset_dt_ids, + }, +}; +builtin_platform_driver(reset_socfpga_driver); diff --git a/drivers/rpmsg/rpmsg_char.c b/drivers/rpmsg/rpmsg_char.c index 4bbbacdbf3bb7702f185f007b638daf83378d489..be90d77c5168d2784a595e9836fdf1031a4f5326 100644 --- a/drivers/rpmsg/rpmsg_char.c +++ b/drivers/rpmsg/rpmsg_char.c @@ -92,7 +92,7 @@ static int rpmsg_eptdev_destroy(struct device *dev, void *data) /* wake up any blocked readers */ wake_up_interruptible(&eptdev->readq); - device_del(&eptdev->dev); + cdev_device_del(&eptdev->cdev, &eptdev->dev); put_device(&eptdev->dev); return 0; @@ -332,7 +332,6 @@ static void rpmsg_eptdev_release_device(struct device *dev) ida_simple_remove(&rpmsg_ept_ida, dev->id); ida_simple_remove(&rpmsg_minor_ida, MINOR(eptdev->dev.devt)); - cdev_del(&eptdev->cdev); kfree(eptdev); } @@ -377,19 +376,13 @@ static int rpmsg_eptdev_create(struct rpmsg_ctrldev *ctrldev, dev->id = ret; dev_set_name(dev, "rpmsg%d", ret); - ret = cdev_add(&eptdev->cdev, dev->devt, 1); + ret = cdev_device_add(&eptdev->cdev, &eptdev->dev); if (ret) goto free_ept_ida; /* We can now rely on the release function for cleanup */ dev->release = rpmsg_eptdev_release_device; - ret = device_add(dev); - if (ret) { - dev_err(dev, "device_add failed: %d\n", ret); - put_device(dev); - } - return ret; free_ept_ida: @@ -458,7 +451,6 @@ static void rpmsg_ctrldev_release_device(struct device *dev) ida_simple_remove(&rpmsg_ctrl_ida, dev->id); ida_simple_remove(&rpmsg_minor_ida, MINOR(dev->devt)); - cdev_del(&ctrldev->cdev); kfree(ctrldev); } @@ -493,19 +485,13 @@ static int rpmsg_chrdev_probe(struct rpmsg_device *rpdev) dev->id = ret; dev_set_name(&ctrldev->dev, "rpmsg_ctrl%d", ret); - ret = cdev_add(&ctrldev->cdev, dev->devt, 1); + ret = cdev_device_add(&ctrldev->cdev, &ctrldev->dev); if (ret) goto free_ctrl_ida; /* We can now rely on the release function for cleanup */ dev->release = rpmsg_ctrldev_release_device; - ret = device_add(dev); - if (ret) { - dev_err(&rpdev->dev, "device_add failed: %d\n", ret); - put_device(dev); - } - dev_set_drvdata(&rpdev->dev, ctrldev); return ret; @@ -531,7 +517,7 @@ static void rpmsg_chrdev_remove(struct rpmsg_device *rpdev) if (ret) dev_warn(&rpdev->dev, "failed to nuke endpoints: %d\n", ret); - device_del(&ctrldev->dev); + cdev_device_del(&ctrldev->cdev, &ctrldev->dev); put_device(&ctrldev->dev); } diff --git a/drivers/rpmsg/rpmsg_core.c b/drivers/rpmsg/rpmsg_core.c index e6eb5a1195c45de44c2b9509625de2d712440913..2037a4692b84a2eed7084808256a8bd39577eeff 100644 --- a/drivers/rpmsg/rpmsg_core.c +++ b/drivers/rpmsg/rpmsg_core.c @@ -513,13 +513,25 @@ static int rpmsg_dev_probe(struct device *dev) err = rpdrv->probe(rpdev); if (err) { dev_err(dev, "%s: failed: %d\n", __func__, err); - if (ept) - rpmsg_destroy_ept(ept); - goto out; + goto destroy_ept; } - if (ept && rpdev->ops->announce_create) + if (ept && rpdev->ops->announce_create) { err = rpdev->ops->announce_create(rpdev); + if (err) { + dev_err(dev, "failed to announce creation\n"); + goto remove_rpdev; + } + } + + return 0; + +remove_rpdev: + if (rpdrv->remove) + rpdrv->remove(rpdev); +destroy_ept: + if (ept) + rpmsg_destroy_ept(ept); out: return err; } diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c index c633319cdb91319d8af218ec4eda30503e59ec4e..58c6382a2807caa293ed30da83ab0d4809c15463 100644 --- a/drivers/rtc/rtc-cmos.c +++ b/drivers/rtc/rtc-cmos.c @@ -463,7 +463,10 @@ static int cmos_set_alarm(struct device *dev, struct rtc_wkalrm *t) min = t->time.tm_min; sec = t->time.tm_sec; + spin_lock_irq(&rtc_lock); rtc_control = CMOS_READ(RTC_CONTROL); + spin_unlock_irq(&rtc_lock); + if (!(rtc_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) { /* Writing 0xff means "don't care" or "match all". */ mon = (mon <= 12) ? bin2bcd(mon) : 0xff; diff --git a/drivers/rtc/rtc-mc146818-lib.c b/drivers/rtc/rtc-mc146818-lib.c index 2ecd8752b088b6cfe1bdb97fd57511d72778c562..5add637c9ad23ea969379bfb14b71cd6c22c77a4 100644 --- a/drivers/rtc/rtc-mc146818-lib.c +++ b/drivers/rtc/rtc-mc146818-lib.c @@ -83,7 +83,7 @@ unsigned int mc146818_get_time(struct rtc_time *time) time->tm_year += real_year - 72; #endif - if (century > 20) + if (century > 19) time->tm_year += (century - 19) * 100; /* diff --git a/drivers/rtc/rtc-pxa.c b/drivers/rtc/rtc-pxa.c index d2f1d8f754bf3a3797b404ef0dc6cc9fca5d26c9..cf8119b6d32044b343d63e71f669190cc38ecfa1 100644 --- a/drivers/rtc/rtc-pxa.c +++ b/drivers/rtc/rtc-pxa.c @@ -330,6 +330,10 @@ static int __init pxa_rtc_probe(struct platform_device *pdev) if (sa1100_rtc->irq_alarm < 0) return -ENXIO; + sa1100_rtc->rtc = devm_rtc_allocate_device(&pdev->dev); + if (IS_ERR(sa1100_rtc->rtc)) + return PTR_ERR(sa1100_rtc->rtc); + pxa_rtc->base = devm_ioremap(dev, pxa_rtc->ress->start, resource_size(pxa_rtc->ress)); if (!pxa_rtc->base) { diff --git a/drivers/rtc/rtc-rv3032.c b/drivers/rtc/rtc-rv3032.c index 3e67f71f42614c30b82e76f9f39ab222ef12a219..9e6166864bd73015e3d43dd9675741314f08fb11 100644 --- a/drivers/rtc/rtc-rv3032.c +++ b/drivers/rtc/rtc-rv3032.c @@ -617,11 +617,11 @@ static int rv3032_clkout_set_rate(struct clk_hw *hw, unsigned long rate, ret = rv3032_enter_eerd(rv3032, &eerd); if (ret) - goto exit_eerd; + return ret; ret = regmap_write(rv3032->regmap, RV3032_CLKOUT1, hfd & 0xff); if (ret) - return ret; + goto exit_eerd; ret = regmap_write(rv3032->regmap, RV3032_CLKOUT2, RV3032_CLKOUT2_OS | FIELD_PREP(RV3032_CLKOUT2_HFD_MSK, hfd >> 8)); diff --git a/drivers/s390/char/tape_std.c b/drivers/s390/char/tape_std.c index 1f5fab617b67901d73527e02e51c3c819f5af8dc..f7e75d9fedf61d83885e4a8e53cfc1121c981354 100644 --- a/drivers/s390/char/tape_std.c +++ b/drivers/s390/char/tape_std.c @@ -53,7 +53,6 @@ int tape_std_assign(struct tape_device *device) { int rc; - struct timer_list timeout; struct tape_request *request; request = tape_alloc_request(2, 11); @@ -70,7 +69,7 @@ tape_std_assign(struct tape_device *device) * So we set up a timeout for this call. */ timer_setup(&request->timer, tape_std_assign_timeout, 0); - mod_timer(&timeout, jiffies + 2 * HZ); + mod_timer(&request->timer, jiffies + msecs_to_jiffies(2000)); rc = tape_do_io_interruptible(device, request); diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c index 305db4173dcf3d6fdb7997bc60d7c7545098ba0e..cf2c3c4c590f9b1225bdbcc86a0c001007e00e6d 100644 --- a/drivers/s390/cio/css.c +++ b/drivers/s390/cio/css.c @@ -433,8 +433,8 @@ static ssize_t dev_busid_show(struct device *dev, struct subchannel *sch = to_subchannel(dev); struct pmcw *pmcw = &sch->schib.pmcw; - if ((pmcw->st == SUBCHANNEL_TYPE_IO || - pmcw->st == SUBCHANNEL_TYPE_MSG) && pmcw->dnv) + if ((pmcw->st == SUBCHANNEL_TYPE_IO && pmcw->dnv) || + (pmcw->st == SUBCHANNEL_TYPE_MSG && pmcw->w)) return sysfs_emit(buf, "0.%x.%04x\n", sch->schid.ssid, pmcw->dev); else diff --git a/drivers/s390/cio/device_ops.c b/drivers/s390/cio/device_ops.c index 0fe7b2f2e7f5239ee660c7779fe461ac8598fb6b..c533d1dadc6bbb0f3f388ac62b01049ac99a72c5 100644 --- a/drivers/s390/cio/device_ops.c +++ b/drivers/s390/cio/device_ops.c @@ -825,13 +825,23 @@ EXPORT_SYMBOL_GPL(ccw_device_get_chid); */ void *ccw_device_dma_zalloc(struct ccw_device *cdev, size_t size) { - return cio_gp_dma_zalloc(cdev->private->dma_pool, &cdev->dev, size); + void *addr; + + if (!get_device(&cdev->dev)) + return NULL; + addr = cio_gp_dma_zalloc(cdev->private->dma_pool, &cdev->dev, size); + if (IS_ERR_OR_NULL(addr)) + put_device(&cdev->dev); + return addr; } EXPORT_SYMBOL(ccw_device_dma_zalloc); void ccw_device_dma_free(struct ccw_device *cdev, void *cpu_addr, size_t size) { + if (!cpu_addr) + return; cio_gp_dma_free(cdev->private->dma_pool, cpu_addr, size); + put_device(&cdev->dev); } EXPORT_SYMBOL(ccw_device_dma_free); diff --git a/drivers/s390/crypto/ap_queue.c b/drivers/s390/crypto/ap_queue.c index 639f8d25679c3310e1470405072e7e621cf0cd77..ff0018f5bbe5ef9ce244c3078534aa23d3d10bb7 100644 --- a/drivers/s390/crypto/ap_queue.c +++ b/drivers/s390/crypto/ap_queue.c @@ -142,6 +142,8 @@ static struct ap_queue_status ap_sm_recv(struct ap_queue *aq) switch (status.response_code) { case AP_RESPONSE_NORMAL: aq->queue_count = max_t(int, 0, aq->queue_count - 1); + if (!status.queue_empty && !aq->queue_count) + aq->queue_count++; if (aq->queue_count > 0) mod_timer(&aq->timeout, jiffies + aq->request_timeout); diff --git a/drivers/s390/scsi/zfcp_fc.c b/drivers/s390/scsi/zfcp_fc.c index d24cafe02708fa941346510c6fd8aac21f1110e3..511bf8e0a436c2eb40f2601c6a68edf1724ea887 100644 --- a/drivers/s390/scsi/zfcp_fc.c +++ b/drivers/s390/scsi/zfcp_fc.c @@ -521,6 +521,8 @@ static void zfcp_fc_adisc_handler(void *data) goto out; } + /* re-init to undo drop from zfcp_fc_adisc() */ + port->d_id = ntoh24(adisc_resp->adisc_port_id); /* port is good, unblock rport without going through erp */ zfcp_scsi_schedule_rport_register(port); out: @@ -534,6 +536,7 @@ static int zfcp_fc_adisc(struct zfcp_port *port) struct zfcp_fc_req *fc_req; struct zfcp_adapter *adapter = port->adapter; struct Scsi_Host *shost = adapter->scsi_host; + u32 d_id; int ret; fc_req = kmem_cache_zalloc(zfcp_fc_req_cache, GFP_ATOMIC); @@ -558,7 +561,15 @@ static int zfcp_fc_adisc(struct zfcp_port *port) fc_req->u.adisc.req.adisc_cmd = ELS_ADISC; hton24(fc_req->u.adisc.req.adisc_port_id, fc_host_port_id(shost)); - ret = zfcp_fsf_send_els(adapter, port->d_id, &fc_req->ct_els, + d_id = port->d_id; /* remember as destination for send els below */ + /* + * Force fresh GID_PN lookup on next port recovery. + * Must happen after request setup and before sending request, + * to prevent race with port->d_id re-init in zfcp_fc_adisc_handler(). + */ + port->d_id = 0; + + ret = zfcp_fsf_send_els(adapter, d_id, &fc_req->ct_els, ZFCP_FC_CTELS_TMO); if (ret) kmem_cache_free(zfcp_fc_req_cache, fc_req); diff --git a/drivers/scsi/advansys.c b/drivers/scsi/advansys.c index c2c7850ff7b4239d789a954f14cad0b63db523a4..727d8f019eddd957cd04d8c6bacea10a83b3bb1f 100644 --- a/drivers/scsi/advansys.c +++ b/drivers/scsi/advansys.c @@ -3366,8 +3366,8 @@ static void asc_prt_adv_board_info(struct seq_file *m, struct Scsi_Host *shost) shost->host_no); seq_printf(m, - " iop_base 0x%lx, cable_detect: %X, err_code %u\n", - (unsigned long)v->iop_base, + " iop_base 0x%p, cable_detect: %X, err_code %u\n", + v->iop_base, AdvReadWordRegister(iop_base,IOPW_SCSI_CFG1) & CABLE_DETECT, v->err_code); diff --git a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c index 6890bbe04a8c14d431b0cfb4990067c43e01060f..8f47bf83694f6b033ba4031684f12d93fed167a7 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c +++ b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c @@ -80,7 +80,7 @@ static int bnx2fc_bind_pcidev(struct bnx2fc_hba *hba); static void bnx2fc_unbind_pcidev(struct bnx2fc_hba *hba); static struct fc_lport *bnx2fc_if_create(struct bnx2fc_interface *interface, struct device *parent, int npiv); -static void bnx2fc_destroy_work(struct work_struct *work); +static void bnx2fc_port_destroy(struct fcoe_port *port); static struct bnx2fc_hba *bnx2fc_hba_lookup(struct net_device *phys_dev); static struct bnx2fc_interface *bnx2fc_interface_lookup(struct net_device @@ -506,7 +506,8 @@ static int bnx2fc_l2_rcv_thread(void *arg) static void bnx2fc_recv_frame(struct sk_buff *skb) { - u32 fr_len; + u64 crc_err; + u32 fr_len, fr_crc; struct fc_lport *lport; struct fcoe_rcv_info *fr; struct fc_stats *stats; @@ -540,6 +541,11 @@ static void bnx2fc_recv_frame(struct sk_buff *skb) skb_pull(skb, sizeof(struct fcoe_hdr)); fr_len = skb->len - sizeof(struct fcoe_crc_eof); + stats = per_cpu_ptr(lport->stats, get_cpu()); + stats->RxFrames++; + stats->RxWords += fr_len / FCOE_WORD_TO_BYTE; + put_cpu(); + fp = (struct fc_frame *)skb; fc_frame_init(fp); fr_dev(fp) = lport; @@ -622,16 +628,15 @@ static void bnx2fc_recv_frame(struct sk_buff *skb) return; } - stats = per_cpu_ptr(lport->stats, smp_processor_id()); - stats->RxFrames++; - stats->RxWords += fr_len / FCOE_WORD_TO_BYTE; + fr_crc = le32_to_cpu(fr_crc(fp)); - if (le32_to_cpu(fr_crc(fp)) != - ~crc32(~0, skb->data, fr_len)) { - if (stats->InvalidCRCCount < 5) + if (unlikely(fr_crc != ~crc32(~0, skb->data, fr_len))) { + stats = per_cpu_ptr(lport->stats, get_cpu()); + crc_err = (stats->InvalidCRCCount++); + put_cpu(); + if (crc_err < 5) printk(KERN_WARNING PFX "dropping frame with " "CRC error\n"); - stats->InvalidCRCCount++; kfree_skb(skb); return; } @@ -905,9 +910,6 @@ static void bnx2fc_indicate_netevent(void *context, unsigned long event, __bnx2fc_destroy(interface); } mutex_unlock(&bnx2fc_dev_lock); - - /* Ensure ALL destroy work has been completed before return */ - flush_workqueue(bnx2fc_wq); return; default: @@ -1213,8 +1215,8 @@ static int bnx2fc_vport_destroy(struct fc_vport *vport) mutex_unlock(&n_port->lp_mutex); bnx2fc_free_vport(interface->hba, port->lport); bnx2fc_port_shutdown(port->lport); + bnx2fc_port_destroy(port); bnx2fc_interface_put(interface); - queue_work(bnx2fc_wq, &port->destroy_work); return 0; } @@ -1523,7 +1525,6 @@ static struct fc_lport *bnx2fc_if_create(struct bnx2fc_interface *interface, port->lport = lport; port->priv = interface; port->get_netdev = bnx2fc_netdev; - INIT_WORK(&port->destroy_work, bnx2fc_destroy_work); /* Configure fcoe_port */ rc = bnx2fc_lport_config(lport); @@ -1651,8 +1652,8 @@ static void __bnx2fc_destroy(struct bnx2fc_interface *interface) bnx2fc_interface_cleanup(interface); bnx2fc_stop(interface); list_del(&interface->list); + bnx2fc_port_destroy(port); bnx2fc_interface_put(interface); - queue_work(bnx2fc_wq, &port->destroy_work); } /** @@ -1692,15 +1693,12 @@ netdev_err: return rc; } -static void bnx2fc_destroy_work(struct work_struct *work) +static void bnx2fc_port_destroy(struct fcoe_port *port) { - struct fcoe_port *port; struct fc_lport *lport; - port = container_of(work, struct fcoe_port, destroy_work); lport = port->lport; - - BNX2FC_HBA_DBG(lport, "Entered bnx2fc_destroy_work\n"); + BNX2FC_HBA_DBG(lport, "Entered %s, destroying lport %p\n", __func__, lport); bnx2fc_if_destroy(lport); } @@ -2554,9 +2552,6 @@ static void bnx2fc_ulp_exit(struct cnic_dev *dev) __bnx2fc_destroy(interface); mutex_unlock(&bnx2fc_dev_lock); - /* Ensure ALL destroy work has been completed before return */ - flush_workqueue(bnx2fc_wq); - bnx2fc_ulp_stop(hba); /* unregister cnic device */ if (test_and_clear_bit(BNX2FC_CNIC_REGISTERED, &hba->reg_with_cnic)) diff --git a/drivers/scsi/csiostor/csio_init.c b/drivers/scsi/csiostor/csio_init.c index 390b07bf92b9794d0727de8e942c4d5e9eba3834..ccbded3353bd0ea0b8d239b73608fbb075508c2a 100644 --- a/drivers/scsi/csiostor/csio_init.c +++ b/drivers/scsi/csiostor/csio_init.c @@ -1254,3 +1254,4 @@ MODULE_DEVICE_TABLE(pci, csio_pci_tbl); MODULE_VERSION(CSIO_DRV_VERSION); MODULE_FIRMWARE(FW_FNAME_T5); MODULE_FIRMWARE(FW_FNAME_T6); +MODULE_SOFTDEP("pre: cxgb4"); diff --git a/drivers/scsi/csiostor/csio_lnode.c b/drivers/scsi/csiostor/csio_lnode.c index dc98f51f466fb3e0d3bc51034beaad0e05d5f348..d5ac9389702327fdf6d6767324ec7f8bdaa642cb 100644 --- a/drivers/scsi/csiostor/csio_lnode.c +++ b/drivers/scsi/csiostor/csio_lnode.c @@ -619,7 +619,7 @@ csio_ln_vnp_read_cbfn(struct csio_hw *hw, struct csio_mb *mbp) struct fc_els_csp *csp; struct fc_els_cssp *clsp; enum fw_retval retval; - __be32 nport_id; + __be32 nport_id = 0; retval = FW_CMD_RETVAL_G(ntohl(rsp->alloc_to_len16)); if (retval != FW_SUCCESS) { diff --git a/drivers/scsi/dc395x.c b/drivers/scsi/dc395x.c index fa16894d8758c8a0cd1fdd941c3002922d03a7a4..6cb48ae8e1241ffe4bc9353295bece392a785510 100644 --- a/drivers/scsi/dc395x.c +++ b/drivers/scsi/dc395x.c @@ -4658,6 +4658,7 @@ static int dc395x_init_one(struct pci_dev *dev, const struct pci_device_id *id) /* initialise the adapter and everything we need */ if (adapter_init(acb, io_port_base, io_port_len, irq)) { dprintkl(KERN_INFO, "adapter init failed\n"); + acb = NULL; goto fail; } diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c index da3920a19d53d036a4dc4c590ca8960d8413e144..1f3f5fd00179cdb563582484384d4b9be3e91989 100644 --- a/drivers/scsi/hosts.c +++ b/drivers/scsi/hosts.c @@ -220,7 +220,8 @@ int scsi_add_host_with_dma(struct Scsi_Host *shost, struct device *dev, goto fail; } - shost->cmd_per_lun = min_t(short, shost->cmd_per_lun, + /* Use min_t(int, ...) in case shost->can_queue exceeds SHRT_MAX */ + shost->cmd_per_lun = min_t(int, shost->cmd_per_lun, shost->can_queue); error = scsi_init_sense_cache(shost); @@ -480,6 +481,7 @@ struct Scsi_Host *scsi_host_alloc(struct scsi_host_template *sht, int privsize) dev_set_name(&shost->shost_gendev, "host%d", shost->host_no); shost->shost_gendev.bus = &scsi_bus_type; shost->shost_gendev.type = &scsi_host_type; + scsi_enable_async_suspend(&shost->shost_gendev); device_initialize(&shost->shost_dev); shost->shost_dev.parent = &shost->shost_gendev; diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index 30d27b6706746497ace6d23b6e8361e9e1f9ecbd..d4e66c595eb8764fc81830674ce298b1230b348c 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -2950,6 +2950,8 @@ void iscsi_conn_teardown(struct iscsi_cls_conn *cls_conn) { struct iscsi_conn *conn = cls_conn->dd_data; struct iscsi_session *session = conn->session; + char *tmp_persistent_address = conn->persistent_address; + char *tmp_local_ipaddr = conn->local_ipaddr; del_timer_sync(&conn->transport_timer); @@ -2971,8 +2973,6 @@ void iscsi_conn_teardown(struct iscsi_cls_conn *cls_conn) spin_lock_bh(&session->frwd_lock); free_pages((unsigned long) conn->data, get_order(ISCSI_DEF_MAX_RECV_SEG_LEN)); - kfree(conn->persistent_address); - kfree(conn->local_ipaddr); /* regular RX path uses back_lock */ spin_lock_bh(&session->back_lock); kfifo_in(&session->cmdpool.queue, (void*)&conn->login_task, @@ -2984,6 +2984,8 @@ void iscsi_conn_teardown(struct iscsi_cls_conn *cls_conn) mutex_unlock(&session->eh_mutex); iscsi_destroy_conn(cls_conn); + kfree(tmp_persistent_address); + kfree(tmp_local_ipaddr); } EXPORT_SYMBOL_GPL(iscsi_conn_teardown); diff --git a/drivers/scsi/libsas/sas_discover.c b/drivers/scsi/libsas/sas_discover.c index 161c9b387da76d61fcd26067d165fd85a5faf586..2661a3727e682616db1981bb9a917905ea774041 100644 --- a/drivers/scsi/libsas/sas_discover.c +++ b/drivers/scsi/libsas/sas_discover.c @@ -8,7 +8,6 @@ #include #include -#include #include #include #include "sas_internal.h" diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h index 93e507677bdcb35934d8a563cb0d5d36df224657..03bc472f302a290607cfde8ac7cddb3bd6389776 100644 --- a/drivers/scsi/lpfc/lpfc.h +++ b/drivers/scsi/lpfc/lpfc.h @@ -374,6 +374,7 @@ struct lpfc_vport { #define FC_VPORT_LOGO_RCVD 0x200 /* LOGO received on vport */ #define FC_RSCN_DISCOVERY 0x400 /* Auth all devices after RSCN */ #define FC_LOGO_RCVD_DID_CHNG 0x800 /* FDISC on phys port detect DID chng*/ +#define FC_PT2PT_NO_NVME 0x1000 /* Don't send NVME PRLI */ #define FC_SCSI_SCAN_TMO 0x4000 /* scsi scan timer running */ #define FC_ABORT_DISCOVERY 0x8000 /* we want to abort discovery */ #define FC_NDISC_ACTIVE 0x10000 /* NPort discovery active */ @@ -763,7 +764,6 @@ struct lpfc_hba { #define HBA_DEVLOSS_TMO 0x2000 /* HBA in devloss timeout */ #define HBA_RRQ_ACTIVE 0x4000 /* process the rrq active list */ #define HBA_IOQ_FLUSH 0x8000 /* FCP/NVME I/O queues being flushed */ -#define HBA_FW_DUMP_OP 0x10000 /* Skips fn reset before FW dump */ #define HBA_RECOVERABLE_UE 0x20000 /* Firmware supports recoverable UE */ #define HBA_FORCED_LINK_SPEED 0x40000 /* * Firmware supports Forced Link Speed @@ -772,6 +772,7 @@ struct lpfc_hba { #define HBA_FLOGI_ISSUED 0x100000 /* FLOGI was issued */ #define HBA_DEFER_FLOGI 0x800000 /* Defer FLOGI till read_sparm cmpl */ + struct completion *fw_dump_cmpl; /* cmpl event tracker for fw_dump */ uint32_t fcp_ring_in_use; /* When polling test if intr-hndlr active*/ struct lpfc_dmabuf slim2p; @@ -898,6 +899,16 @@ struct lpfc_hba { uint32_t cfg_hostmem_hgp; uint32_t cfg_log_verbose; uint32_t cfg_enable_fc4_type; +#define LPFC_ENABLE_FCP 1 +#define LPFC_ENABLE_NVME 2 +#define LPFC_ENABLE_BOTH 3 +#if (IS_ENABLED(CONFIG_NVME_FC)) +#define LPFC_MAX_ENBL_FC4_TYPE LPFC_ENABLE_BOTH +#define LPFC_DEF_ENBL_FC4_TYPE LPFC_ENABLE_BOTH +#else +#define LPFC_MAX_ENBL_FC4_TYPE LPFC_ENABLE_FCP +#define LPFC_DEF_ENBL_FC4_TYPE LPFC_ENABLE_FCP +#endif uint32_t cfg_aer_support; uint32_t cfg_sriov_nr_virtfn; uint32_t cfg_request_firmware_upgrade; @@ -918,9 +929,6 @@ struct lpfc_hba { uint32_t cfg_ras_fwlog_func; uint32_t cfg_enable_bbcr; /* Enable BB Credit Recovery */ uint32_t cfg_enable_dpp; /* Enable Direct Packet Push */ -#define LPFC_ENABLE_FCP 1 -#define LPFC_ENABLE_NVME 2 -#define LPFC_ENABLE_BOTH 3 uint32_t cfg_enable_pbde; struct nvmet_fc_target_port *targetport; lpfc_vpd_t vpd; /* vital product data */ diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c index 2c59a5bf353907237c648794608b5510c909e4d0..f0d1ced6301624f71aa274f56dc26cb77f1191c0 100644 --- a/drivers/scsi/lpfc/lpfc_attr.c +++ b/drivers/scsi/lpfc/lpfc_attr.c @@ -1142,6 +1142,9 @@ lpfc_issue_lip(struct Scsi_Host *shost) pmboxq->u.mb.mbxCommand = MBX_DOWN_LINK; pmboxq->u.mb.mbxOwner = OWN_HOST; + if ((vport->fc_flag & FC_PT2PT) && (vport->fc_flag & FC_PT2PT_NO_NVME)) + vport->fc_flag &= ~FC_PT2PT_NO_NVME; + mbxstatus = lpfc_sli_issue_mbox_wait(phba, pmboxq, LPFC_MBOX_TMO * 2); if ((mbxstatus == MBX_SUCCESS) && @@ -1536,25 +1539,25 @@ lpfc_sli4_pdev_reg_request(struct lpfc_hba *phba, uint32_t opcode) before_fc_flag = phba->pport->fc_flag; sriov_nr_virtfn = phba->cfg_sriov_nr_virtfn; - /* Disable SR-IOV virtual functions if enabled */ - if (phba->cfg_sriov_nr_virtfn) { - pci_disable_sriov(pdev); - phba->cfg_sriov_nr_virtfn = 0; - } + if (opcode == LPFC_FW_DUMP) { + init_completion(&online_compl); + phba->fw_dump_cmpl = &online_compl; + } else { + /* Disable SR-IOV virtual functions if enabled */ + if (phba->cfg_sriov_nr_virtfn) { + pci_disable_sriov(pdev); + phba->cfg_sriov_nr_virtfn = 0; + } - if (opcode == LPFC_FW_DUMP) - phba->hba_flag |= HBA_FW_DUMP_OP; + status = lpfc_do_offline(phba, LPFC_EVT_OFFLINE); - status = lpfc_do_offline(phba, LPFC_EVT_OFFLINE); + if (status != 0) + return status; - if (status != 0) { - phba->hba_flag &= ~HBA_FW_DUMP_OP; - return status; + /* wait for the device to be quiesced before firmware reset */ + msleep(100); } - /* wait for the device to be quiesced before firmware reset */ - msleep(100); - reg_val = readl(phba->sli4_hba.conf_regs_memmap_p + LPFC_CTL_PDEV_CTL_OFFSET); @@ -1583,24 +1586,42 @@ lpfc_sli4_pdev_reg_request(struct lpfc_hba *phba, uint32_t opcode) lpfc_printf_log(phba, KERN_ERR, LOG_SLI, "3153 Fail to perform the requested " "access: x%x\n", reg_val); + if (phba->fw_dump_cmpl) + phba->fw_dump_cmpl = NULL; return rc; } /* keep the original port state */ - if (before_fc_flag & FC_OFFLINE_MODE) - goto out; - - init_completion(&online_compl); - job_posted = lpfc_workq_post_event(phba, &status, &online_compl, - LPFC_EVT_ONLINE); - if (!job_posted) + if (before_fc_flag & FC_OFFLINE_MODE) { + if (phba->fw_dump_cmpl) + phba->fw_dump_cmpl = NULL; goto out; + } - wait_for_completion(&online_compl); + /* Firmware dump will trigger an HA_ERATT event, and + * lpfc_handle_eratt_s4 routine already handles bringing the port back + * online. + */ + if (opcode == LPFC_FW_DUMP) { + wait_for_completion(phba->fw_dump_cmpl); + } else { + init_completion(&online_compl); + job_posted = lpfc_workq_post_event(phba, &status, &online_compl, + LPFC_EVT_ONLINE); + if (!job_posted) + goto out; + wait_for_completion(&online_compl); + } out: /* in any case, restore the virtual functions enabled as before */ if (sriov_nr_virtfn) { + /* If fw_dump was performed, first disable to clean up */ + if (opcode == LPFC_FW_DUMP) { + pci_disable_sriov(pdev); + phba->cfg_sriov_nr_virtfn = 0; + } + sriov_err = lpfc_sli_probe_sriov_nr_virtfn(phba, sriov_nr_virtfn); if (!sriov_err) @@ -3779,8 +3800,8 @@ LPFC_ATTR_R(nvmet_mrq_post, * 3 - register both FCP and NVME * Supported values are [1,3]. Default value is 3 */ -LPFC_ATTR_R(enable_fc4_type, LPFC_ENABLE_BOTH, - LPFC_ENABLE_FCP, LPFC_ENABLE_BOTH, +LPFC_ATTR_R(enable_fc4_type, LPFC_DEF_ENBL_FC4_TYPE, + LPFC_ENABLE_FCP, LPFC_MAX_ENBL_FC4_TYPE, "Enable FC4 Protocol support - FCP / NVME"); /* diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c index b89c5513243e8e81dfcdfe0a7c1949b5eef154c6..beaf3a8d206f8096396a95600652262df7a65917 100644 --- a/drivers/scsi/lpfc/lpfc_debugfs.c +++ b/drivers/scsi/lpfc/lpfc_debugfs.c @@ -2956,8 +2956,8 @@ lpfc_debugfs_nvmeio_trc_write(struct file *file, const char __user *buf, char mybuf[64]; char *pbuf; - if (nbytes > 64) - nbytes = 64; + if (nbytes > 63) + nbytes = 63; memset(mybuf, 0, sizeof(mybuf)); diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index 3d9889b3d5c8a6ce487d4137501f9bb2ccbcee0b..387b0cd1ea18f8624c49ddd521a4a1db64ece118 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -1067,7 +1067,8 @@ stop_rr_fcf_flogi: /* FLOGI failed, so there is no fabric */ spin_lock_irq(shost->host_lock); - vport->fc_flag &= ~(FC_FABRIC | FC_PUBLIC_LOOP); + vport->fc_flag &= ~(FC_FABRIC | FC_PUBLIC_LOOP | + FC_PT2PT_NO_NVME); spin_unlock_irq(shost->host_lock); /* If private loop, then allow max outstanding els to be @@ -3945,6 +3946,23 @@ lpfc_els_retry(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, /* Added for Vendor specifc support * Just keep retrying for these Rsn / Exp codes */ + if ((vport->fc_flag & FC_PT2PT) && + cmd == ELS_CMD_NVMEPRLI) { + switch (stat.un.b.lsRjtRsnCode) { + case LSRJT_UNABLE_TPC: + case LSRJT_INVALID_CMD: + case LSRJT_LOGICAL_ERR: + case LSRJT_CMD_UNSUPPORTED: + lpfc_printf_vlog(vport, KERN_WARNING, LOG_ELS, + "0168 NVME PRLI LS_RJT " + "reason %x port doesn't " + "support NVME, disabling NVME\n", + stat.un.b.lsRjtRsnCode); + retry = 0; + vport->fc_flag |= FC_PT2PT_NO_NVME; + goto out_retry; + } + } switch (stat.un.b.lsRjtRsnCode) { case LSRJT_UNABLE_TPC: /* The driver has a VALID PLOGI but the rport has diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c index f4a672e5497161e1cb4666244e0c4a05e2937ffb..68ff233f936e5bde73c1718e48a3c9ebbea09161 100644 --- a/drivers/scsi/lpfc/lpfc_hbadisc.c +++ b/drivers/scsi/lpfc/lpfc_hbadisc.c @@ -635,10 +635,16 @@ lpfc_work_done(struct lpfc_hba *phba) if (phba->pci_dev_grp == LPFC_PCI_DEV_OC) lpfc_sli4_post_async_mbox(phba); - if (ha_copy & HA_ERATT) + if (ha_copy & HA_ERATT) { /* Handle the error attention event */ lpfc_handle_eratt(phba); + if (phba->fw_dump_cmpl) { + complete(phba->fw_dump_cmpl); + phba->fw_dump_cmpl = NULL; + } + } + if (ha_copy & HA_MBATT) lpfc_sli_handle_mb_event(phba); diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index 37612299a34a14ca4825cee107e19b3e04e9035c..1149bfc42fe6442e955912d28062c9bb312280b8 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -1998,7 +1998,7 @@ lpfc_handle_eratt_s4(struct lpfc_hba *phba) } if (reg_err1 == SLIPORT_ERR1_REG_ERR_CODE_2 && reg_err2 == SLIPORT_ERR2_REG_FW_RESTART) { - lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT, + lpfc_printf_log(phba, KERN_ERR, LOG_SLI, "3143 Port Down: Firmware Update " "Detected\n"); en_rn_msg = false; diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c index 6afcb1426e35724f48d61219e8776110204d2b70..e33f752318c198657b6d534c26f0c0fea883fe3f 100644 --- a/drivers/scsi/lpfc/lpfc_nportdisc.c +++ b/drivers/scsi/lpfc/lpfc_nportdisc.c @@ -2010,8 +2010,9 @@ lpfc_cmpl_reglogin_reglogin_issue(struct lpfc_vport *vport, * is configured try it. */ ndlp->nlp_fc4_type |= NLP_FC4_FCP; - if ((vport->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) || - (vport->cfg_enable_fc4_type == LPFC_ENABLE_NVME)) { + if ((!(vport->fc_flag & FC_PT2PT_NO_NVME)) && + (vport->cfg_enable_fc4_type == LPFC_ENABLE_BOTH || + vport->cfg_enable_fc4_type == LPFC_ENABLE_NVME)) { ndlp->nlp_fc4_type |= NLP_FC4_NVME; /* We need to update the localport also */ lpfc_nvme_update_localport(vport); diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 990b700de6892a8d5e260665393b29231037a7f8..a50f870c5f7250efc2106df130fc8cd509185f56 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -4629,12 +4629,6 @@ lpfc_sli4_brdreset(struct lpfc_hba *phba) phba->fcf.fcf_flag = 0; spin_unlock_irq(&phba->hbalock); - /* SLI4 INTF 2: if FW dump is being taken skip INIT_PORT */ - if (phba->hba_flag & HBA_FW_DUMP_OP) { - phba->hba_flag &= ~HBA_FW_DUMP_OP; - return rc; - } - /* Now physically reset the device */ lpfc_printf_log(phba, KERN_INFO, LOG_INIT, "0389 Performing PCI function reset!\n"); @@ -7378,6 +7372,7 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba) struct lpfc_vport *vport = phba->pport; struct lpfc_dmabuf *mp; struct lpfc_rqb *rqbp; + u32 flg; /* Perform a PCI function reset to start from clean */ rc = lpfc_pci_function_reset(phba); @@ -7391,7 +7386,17 @@ lpfc_sli4_hba_setup(struct lpfc_hba *phba) else { spin_lock_irq(&phba->hbalock); phba->sli.sli_flag |= LPFC_SLI_ACTIVE; + flg = phba->sli.sli_flag; spin_unlock_irq(&phba->hbalock); + /* Allow a little time after setting SLI_ACTIVE for any polled + * MBX commands to complete via BSG. + */ + for (i = 0; i < 50 && (flg & LPFC_SLI_MBOX_ACTIVE); i++) { + msleep(20); + spin_lock_irq(&phba->hbalock); + flg = phba->sli.sli_flag; + spin_unlock_irq(&phba->hbalock); + } } lpfc_sli4_dip(phba); @@ -8928,7 +8933,7 @@ lpfc_sli_issue_mbox_s4(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq, "(%d):2541 Mailbox command x%x " "(x%x/x%x) failure: " "mqe_sta: x%x mcqe_sta: x%x/x%x " - "Data: x%x x%x\n,", + "Data: x%x x%x\n", mboxq->vport ? mboxq->vport->vpi : 0, mboxq->u.mb.mbxCommand, lpfc_sli_config_mbox_subsys_get(phba, @@ -8962,7 +8967,7 @@ lpfc_sli_issue_mbox_s4(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq, "(%d):2597 Sync Mailbox command " "x%x (x%x/x%x) failure: " "mqe_sta: x%x mcqe_sta: x%x/x%x " - "Data: x%x x%x\n,", + "Data: x%x x%x\n", mboxq->vport ? mboxq->vport->vpi : 0, mboxq->u.mb.mbxCommand, lpfc_sli_config_mbox_subsys_get(phba, @@ -12408,6 +12413,7 @@ lpfc_sli4_eratt_read(struct lpfc_hba *phba) uint32_t uerr_sta_hi, uerr_sta_lo; uint32_t if_type, portsmphr; struct lpfc_register portstat_reg; + u32 logmask; /* * For now, use the SLI4 device internal unrecoverable error @@ -12458,7 +12464,12 @@ lpfc_sli4_eratt_read(struct lpfc_hba *phba) readl(phba->sli4_hba.u.if_type2.ERR1regaddr); phba->work_status[1] = readl(phba->sli4_hba.u.if_type2.ERR2regaddr); - lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT, + logmask = LOG_TRACE_EVENT; + if (phba->work_status[0] == + SLIPORT_ERR1_REG_ERR_CODE_2 && + phba->work_status[1] == SLIPORT_ERR2_REG_FW_RESTART) + logmask = LOG_SLI; + lpfc_printf_log(phba, KERN_ERR, logmask, "2885 Port Status Event: " "port status reg 0x%x, " "port smphr reg 0x%x, " @@ -20080,6 +20091,7 @@ lpfc_drain_txq(struct lpfc_hba *phba) fail_msg, piocbq->iotag, piocbq->sli4_xritag); list_add_tail(&piocbq->list, &completions); + fail_msg = NULL; } spin_unlock_irqrestore(&pring->ring_lock, iflags); } diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index 31c384108bc9c209298c4b6181817941f3ee4ee2..8418b59b3743b9964bd472b3056448b4f91fcea4 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -3675,7 +3675,7 @@ _scsih_ublock_io_device(struct MPT3SAS_ADAPTER *ioc, u64 sas_address) shost_for_each_device(sdev, ioc->shost) { sas_device_priv_data = sdev->hostdata; - if (!sas_device_priv_data) + if (!sas_device_priv_data || !sas_device_priv_data->sas_target) continue; if (sas_device_priv_data->sas_target->sas_address != sas_address) diff --git a/drivers/scsi/myrs.c b/drivers/scsi/myrs.c index 78c41bbf67562c03c514ff342568071041ed7ddd..e6a6678967e52e0b4a181d3510b8b56f5fa4ff1f 100644 --- a/drivers/scsi/myrs.c +++ b/drivers/scsi/myrs.c @@ -2272,7 +2272,8 @@ static void myrs_cleanup(struct myrs_hba *cs) myrs_unmap(cs); if (cs->mmio_base) { - cs->disable_intr(cs); + if (cs->disable_intr) + cs->disable_intr(cs); iounmap(cs->mmio_base); cs->mmio_base = NULL; } diff --git a/drivers/scsi/pm8001/pm8001_hwi.c b/drivers/scsi/pm8001/pm8001_hwi.c index 2114d2dd3501a7365bbab84911369011367dd557..9b318958d78cc393a2a33c5db0b73d401e9d6aae 100644 --- a/drivers/scsi/pm8001/pm8001_hwi.c +++ b/drivers/scsi/pm8001/pm8001_hwi.c @@ -1323,7 +1323,9 @@ int pm8001_mpi_build_cmd(struct pm8001_hba_info *pm8001_ha, int q_index = circularQ - pm8001_ha->inbnd_q_tbl; int rv = -1; - WARN_ON(q_index >= PM8001_MAX_INB_NUM); + if (WARN_ON(q_index >= pm8001_ha->max_q_num)) + return -EINVAL; + spin_lock_irqsave(&circularQ->iq_lock, flags); rv = pm8001_mpi_msg_free_get(circularQ, pm8001_ha->iomb_size, &pMessage); @@ -3107,7 +3109,7 @@ pm8001_mpi_get_nvmd_resp(struct pm8001_hba_info *pm8001_ha, void *piomb) * fw_control_context->usrAddr */ complete(pm8001_ha->nvmd_completion); - pm8001_dbg(pm8001_ha, MSG, "Set nvm data complete!\n"); + pm8001_dbg(pm8001_ha, MSG, "Get nvmd data complete!\n"); ccb->task = NULL; ccb->ccb_tag = 0xFFFFFFFF; pm8001_tag_free(pm8001_ha, tag); diff --git a/drivers/scsi/pm8001/pm8001_init.c b/drivers/scsi/pm8001/pm8001_init.c index 13b8ddec61894f5d3aaeae3bf56fd9180cd81e2e..01eb2ade207092d365e5a9d7318bd345ada41ea1 100644 --- a/drivers/scsi/pm8001/pm8001_init.c +++ b/drivers/scsi/pm8001/pm8001_init.c @@ -280,12 +280,12 @@ static int pm8001_alloc(struct pm8001_hba_info *pm8001_ha, if (rc) { pm8001_dbg(pm8001_ha, FAIL, "pm8001_setup_irq failed [ret: %d]\n", rc); - goto err_out_shost; + goto err_out; } /* Request Interrupt */ rc = pm8001_request_irq(pm8001_ha); if (rc) - goto err_out_shost; + goto err_out; count = pm8001_ha->max_q_num; /* Queues are chosen based on the number of cores/msix availability */ @@ -419,8 +419,6 @@ static int pm8001_alloc(struct pm8001_hba_info *pm8001_ha, pm8001_tag_init(pm8001_ha); return 0; -err_out_shost: - scsi_remove_host(pm8001_ha->shost); err_out_nodev: for (i = 0; i < pm8001_ha->max_memcnt; i++) { if (pm8001_ha->memoryMap.region[i].virt_ptr != NULL) { diff --git a/drivers/scsi/pm8001/pm8001_sas.c b/drivers/scsi/pm8001/pm8001_sas.c index c3bb58885033b33889a40761bfffd0f80283b9c4..75ac4d86d9c4bc43995cab5410319bac0c0a6816 100644 --- a/drivers/scsi/pm8001/pm8001_sas.c +++ b/drivers/scsi/pm8001/pm8001_sas.c @@ -753,8 +753,13 @@ static int pm8001_exec_internal_tmf_task(struct domain_device *dev, res = -TMF_RESP_FUNC_FAILED; /* Even TMF timed out, return direct. */ if (task->task_state_flags & SAS_TASK_STATE_ABORTED) { + struct pm8001_ccb_info *ccb = task->lldd_task; + pm8001_dbg(pm8001_ha, FAIL, "TMF task[%x]timeout.\n", tmf->tmf); + + if (ccb) + ccb->task = NULL; goto ex_err; } diff --git a/drivers/scsi/pm8001/pm80xx_hwi.c b/drivers/scsi/pm8001/pm80xx_hwi.c index a203a4fc2674a0b6aac60be64d0fcf7a81580737..2a3ce4680734be48f141037bdccbc954337df186 100644 --- a/drivers/scsi/pm8001/pm80xx_hwi.c +++ b/drivers/scsi/pm8001/pm80xx_hwi.c @@ -2133,9 +2133,9 @@ mpi_ssp_completion(struct pm8001_hba_info *pm8001_ha , void *piomb) pm8001_dbg(pm8001_ha, FAIL, "task 0x%p done with io_status 0x%x resp 0x%x stat 0x%x but aborted by upper layer!\n", t, status, ts->resp, ts->stat); + pm8001_ccb_task_free(pm8001_ha, t, ccb, tag); if (t->slow_task) complete(&t->slow_task->completion); - pm8001_ccb_task_free(pm8001_ha, t, ccb, tag); } else { spin_unlock_irqrestore(&t->task_state_lock, flags); pm8001_ccb_task_free(pm8001_ha, t, ccb, tag); @@ -2726,9 +2726,9 @@ mpi_sata_completion(struct pm8001_hba_info *pm8001_ha, void *piomb) pm8001_dbg(pm8001_ha, FAIL, "task 0x%p done with io_status 0x%x resp 0x%x stat 0x%x but aborted by upper layer!\n", t, status, ts->resp, ts->stat); + pm8001_ccb_task_free(pm8001_ha, t, ccb, tag); if (t->slow_task) complete(&t->slow_task->completion); - pm8001_ccb_task_free(pm8001_ha, t, ccb, tag); } else { spin_unlock_irqrestore(&t->task_state_lock, flags); pm8001_ccb_task_free_done(pm8001_ha, t, ccb, tag); @@ -4057,10 +4057,22 @@ static int process_oq(struct pm8001_hba_info *pm8001_ha, u8 vec) unsigned long flags; u32 regval; + /* + * Fatal errors are programmed to be signalled in irq vector + * pm8001_ha->max_q_num - 1 through pm8001_ha->main_cfg_tbl.pm80xx_tbl. + * fatal_err_interrupt + */ if (vec == (pm8001_ha->max_q_num - 1)) { + u32 mipsall_ready; + + if (pm8001_ha->chip_id == chip_8008 || + pm8001_ha->chip_id == chip_8009) + mipsall_ready = SCRATCH_PAD_MIPSALL_READY_8PORT; + else + mipsall_ready = SCRATCH_PAD_MIPSALL_READY_16PORT; + regval = pm8001_cr32(pm8001_ha, 0, MSGU_SCRATCH_PAD_1); - if ((regval & SCRATCH_PAD_MIPSALL_READY) != - SCRATCH_PAD_MIPSALL_READY) { + if ((regval & mipsall_ready) != mipsall_ready) { pm8001_ha->controller_fatal_error = true; pm8001_dbg(pm8001_ha, FAIL, "Firmware Fatal error! Regval:0x%x\n", diff --git a/drivers/scsi/pm8001/pm80xx_hwi.h b/drivers/scsi/pm8001/pm80xx_hwi.h index 701951a0f715bb18f943df9b3771d875a330eb24..0dfe9034f7e7f69b58bee99d0c3294de3ca2a0f2 100644 --- a/drivers/scsi/pm8001/pm80xx_hwi.h +++ b/drivers/scsi/pm8001/pm80xx_hwi.h @@ -1391,8 +1391,12 @@ typedef struct SASProtocolTimerConfig SASProtocolTimerConfig_t; #define SCRATCH_PAD_BOOT_LOAD_SUCCESS 0x0 #define SCRATCH_PAD_IOP0_READY 0xC00 #define SCRATCH_PAD_IOP1_READY 0x3000 -#define SCRATCH_PAD_MIPSALL_READY (SCRATCH_PAD_IOP1_READY | \ +#define SCRATCH_PAD_MIPSALL_READY_16PORT (SCRATCH_PAD_IOP1_READY | \ SCRATCH_PAD_IOP0_READY | \ + SCRATCH_PAD_ILA_READY | \ + SCRATCH_PAD_RAAE_READY) +#define SCRATCH_PAD_MIPSALL_READY_8PORT (SCRATCH_PAD_IOP0_READY | \ + SCRATCH_PAD_ILA_READY | \ SCRATCH_PAD_RAAE_READY) /* boot loader state */ diff --git a/drivers/scsi/qedf/qedf_io.c b/drivers/scsi/qedf/qedf_io.c index 63f99f4eeed972bb8d149d599050bc876fde7db1..472374d83cede79f44e41c65d9511ec3c758820b 100644 --- a/drivers/scsi/qedf/qedf_io.c +++ b/drivers/scsi/qedf/qedf_io.c @@ -2268,6 +2268,7 @@ process_els: io_req->tm_flags == FCP_TMF_TGT_RESET) { clear_bit(QEDF_CMD_OUTSTANDING, &io_req->flags); io_req->sc_cmd = NULL; + kref_put(&io_req->refcount, qedf_release_cmd); complete(&io_req->tm_done); } diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c index c63dcc39f76c2ae5df87e67f7b847f490c81bce8..e64457f53da86da662c9b7ee4b3ccfa844b04c70 100644 --- a/drivers/scsi/qedf/qedf_main.c +++ b/drivers/scsi/qedf/qedf_main.c @@ -1859,6 +1859,7 @@ static int qedf_vport_create(struct fc_vport *vport, bool disabled) vport_qedf->cmd_mgr = base_qedf->cmd_mgr; init_completion(&vport_qedf->flogi_compl); INIT_LIST_HEAD(&vport_qedf->fcports); + INIT_DELAYED_WORK(&vport_qedf->stag_work, qedf_stag_change_work); rc = qedf_vport_libfc_config(vport, vn_port); if (rc) { diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c index 6a2c4a6fcded86fa4112099ea1d18288d25af7cd..e40a37236aa10a5d0c2728d957122f3b858d56da 100644 --- a/drivers/scsi/qla2xxx/qla_attr.c +++ b/drivers/scsi/qla2xxx/qla_attr.c @@ -1862,6 +1862,18 @@ qla2x00_port_speed_store(struct device *dev, struct device_attribute *attr, return strlen(buf); } +static const struct { + u16 rate; + char *str; +} port_speed_str[] = { + { PORT_SPEED_4GB, "4" }, + { PORT_SPEED_8GB, "8" }, + { PORT_SPEED_16GB, "16" }, + { PORT_SPEED_32GB, "32" }, + { PORT_SPEED_64GB, "64" }, + { PORT_SPEED_10GB, "10" }, +}; + static ssize_t qla2x00_port_speed_show(struct device *dev, struct device_attribute *attr, char *buf) @@ -1869,7 +1881,8 @@ qla2x00_port_speed_show(struct device *dev, struct device_attribute *attr, struct scsi_qla_host *vha = shost_priv(dev_to_shost(dev)); struct qla_hw_data *ha = vha->hw; ssize_t rval; - char *spd[7] = {"0", "0", "0", "4", "8", "16", "32"}; + u16 i; + char *speed = "Unknown"; rval = qla2x00_get_data_rate(vha); if (rval != QLA_SUCCESS) { @@ -1878,7 +1891,14 @@ qla2x00_port_speed_show(struct device *dev, struct device_attribute *attr, return -EINVAL; } - return scnprintf(buf, PAGE_SIZE, "%s\n", spd[ha->link_data_rate]); + for (i = 0; i < ARRAY_SIZE(port_speed_str); i++) { + if (port_speed_str[i].rate != ha->link_data_rate) + continue; + speed = port_speed_str[i].str; + break; + } + + return scnprintf(buf, PAGE_SIZE, "%s\n", speed); } /* ----- */ diff --git a/drivers/scsi/qla2xxx/qla_bsg.c b/drivers/scsi/qla2xxx/qla_bsg.c index 7fa085969a63a7bf27acb39b9d13833633309271..1fd292a6ac881a9f882867a193847cba3b4cc5ff 100644 --- a/drivers/scsi/qla2xxx/qla_bsg.c +++ b/drivers/scsi/qla2xxx/qla_bsg.c @@ -414,7 +414,7 @@ done_unmap_sg: goto done_free_fcport; done_free_fcport: - if (bsg_request->msgcode == FC_BSG_RPT_ELS) + if (bsg_request->msgcode != FC_BSG_RPT_ELS) qla2x00_free_fcport(fcport); done: return rval; diff --git a/drivers/scsi/qla2xxx/qla_dbg.c b/drivers/scsi/qla2xxx/qla_dbg.c index 144a893e7335b2782fb163437af3a8d3ca8d05fc..00b4d033b07a945ecaf980f190883585169d0793 100644 --- a/drivers/scsi/qla2xxx/qla_dbg.c +++ b/drivers/scsi/qla2xxx/qla_dbg.c @@ -12,8 +12,7 @@ * ---------------------------------------------------------------------- * | Module Init and Probe | 0x0199 | | * | Mailbox commands | 0x1206 | 0x11a5-0x11ff | - * | Device Discovery | 0x2134 | 0x210e-0x2116 | - * | | | 0x211a | + * | Device Discovery | 0x2134 | 0x210e-0x2115 | * | | | 0x211c-0x2128 | * | | | 0x212c-0x2134 | * | Queue Command and IO tracing | 0x3074 | 0x300b | @@ -2478,6 +2477,9 @@ ql_dbg(uint level, scsi_qla_host_t *vha, uint id, const char *fmt, ...) struct va_format vaf; char pbuf[64]; + if (!ql_mask_match(level) && !trace_ql_dbg_log_enabled()) + return; + va_start(va, fmt); vaf.fmt = fmt; diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index 4f0486fe30dd7f7386ff29914be89a03c0b25928..e1fd91a58120240b281842dff166e3f10f3d94b5 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -3913,7 +3913,6 @@ struct qla_hw_data { uint32_t scm_supported_f:1; /* Enabled in Driver */ uint32_t scm_enabled:1; - uint32_t max_req_queue_warned:1; uint32_t plogi_template_valid:1; } flags; diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h index e39b4f2da73a00f495f4b423cf1d1c13372cd655..3bc1850273421f49f61de8955572a6dd8c5e1904 100644 --- a/drivers/scsi/qla2xxx/qla_gbl.h +++ b/drivers/scsi/qla2xxx/qla_gbl.h @@ -158,7 +158,6 @@ extern int ql2xasynctmfenable; extern int ql2xgffidenable; extern int ql2xenabledif; extern int ql2xenablehba_err_chk; -extern int ql2xtargetreset; extern int ql2xdontresethba; extern uint64_t ql2xmaxlun; extern int ql2xmdcapmask; @@ -791,7 +790,6 @@ extern void qlafx00_abort_iocb(srb_t *, struct abort_iocb_entry_fx00 *); extern void qlafx00_fxdisc_iocb(srb_t *, struct fxdisc_entry_fx00 *); extern void qlafx00_timer_routine(scsi_qla_host_t *); extern int qlafx00_rescan_isp(scsi_qla_host_t *); -extern int qlafx00_loop_reset(scsi_qla_host_t *vha); /* qla82xx related functions */ diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index b7aac3116f2db6d12f739edaf81e0efe4319e75e..fdae25ec554d9f1ba2ee6b35ba52dea859f76e5f 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -976,8 +976,6 @@ static void qla24xx_async_gnl_sp_done(srb_t *sp, int res) sp->name, res, sp->u.iocb_cmd.u.mbx.in_mb[1], sp->u.iocb_cmd.u.mbx.in_mb[2]); - if (res == QLA_FUNCTION_TIMEOUT) - return; sp->fcport->flags &= ~(FCF_ASYNC_SENT|FCF_ASYNC_ACTIVE); memset(&ea, 0, sizeof(ea)); @@ -1015,8 +1013,8 @@ static void qla24xx_async_gnl_sp_done(srb_t *sp, int res) spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags); list_for_each_entry_safe(fcport, tf, &h, gnl_entry) { - list_del_init(&fcport->gnl_entry); spin_lock_irqsave(&vha->hw->tgt.sess_lock, flags); + list_del_init(&fcport->gnl_entry); fcport->flags &= ~(FCF_ASYNC_SENT | FCF_ASYNC_ACTIVE); spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags); ea.fcport = fcport; @@ -1708,10 +1706,52 @@ void qla2x00_handle_rscn(scsi_qla_host_t *vha, struct event_arg *ea) fc_port_t *fcport; unsigned long flags; - fcport = qla2x00_find_fcport_by_nportid(vha, &ea->id, 1); - if (fcport) { - fcport->scan_needed = 1; - fcport->rscn_gen++; + switch (ea->id.b.rsvd_1) { + case RSCN_PORT_ADDR: + fcport = qla2x00_find_fcport_by_nportid(vha, &ea->id, 1); + if (fcport) { + if (fcport->flags & FCF_FCP2_DEVICE) { + ql_dbg(ql_dbg_disc, vha, 0x2115, + "Delaying session delete for FCP2 portid=%06x %8phC ", + fcport->d_id.b24, fcport->port_name); + return; + } + fcport->scan_needed = 1; + fcport->rscn_gen++; + } + break; + case RSCN_AREA_ADDR: + list_for_each_entry(fcport, &vha->vp_fcports, list) { + if (fcport->flags & FCF_FCP2_DEVICE) + continue; + + if ((ea->id.b24 & 0xffff00) == (fcport->d_id.b24 & 0xffff00)) { + fcport->scan_needed = 1; + fcport->rscn_gen++; + } + } + break; + case RSCN_DOM_ADDR: + list_for_each_entry(fcport, &vha->vp_fcports, list) { + if (fcport->flags & FCF_FCP2_DEVICE) + continue; + + if ((ea->id.b24 & 0xff0000) == (fcport->d_id.b24 & 0xff0000)) { + fcport->scan_needed = 1; + fcport->rscn_gen++; + } + } + break; + case RSCN_FAB_ADDR: + default: + list_for_each_entry(fcport, &vha->vp_fcports, list) { + if (fcport->flags & FCF_FCP2_DEVICE) + continue; + + fcport->scan_needed = 1; + fcport->rscn_gen++; + } + break; } spin_lock_irqsave(&vha->work_lock, flags); diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c index a24b82de4aab7f7ee808ecd1a686dc73d0f74ed7..5e040b6debc84ef224dbf7e05ffac1dc931eea16 100644 --- a/drivers/scsi/qla2xxx/qla_isr.c +++ b/drivers/scsi/qla2xxx/qla_isr.c @@ -4158,6 +4158,8 @@ skip_msi: ql_dbg(ql_dbg_init, vha, 0x0125, "INTa mode: Enabled.\n"); ha->flags.mr_intr_valid = 1; + /* Set max_qpair to 0, as MSI-X and MSI in not enabled */ + ha->max_qpairs = 0; } clear_risc_ints: diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c index 4ebd8851a0c9fa90f4dda2512a561a1822409718..734745f450211a921beda952e2371011a6f663d5 100644 --- a/drivers/scsi/qla2xxx/qla_mbx.c +++ b/drivers/scsi/qla2xxx/qla_mbx.c @@ -1650,10 +1650,8 @@ qla2x00_get_adapter_id(scsi_qla_host_t *vha, uint16_t *id, uint8_t *al_pa, mcp->in_mb |= MBX_13|MBX_12|MBX_11|MBX_10; if (IS_FWI2_CAPABLE(vha->hw)) mcp->in_mb |= MBX_19|MBX_18|MBX_17|MBX_16; - if (IS_QLA27XX(vha->hw) || IS_QLA28XX(vha->hw)) { - mcp->in_mb |= MBX_15; - mcp->out_mb |= MBX_7|MBX_21|MBX_22|MBX_23; - } + if (IS_QLA27XX(vha->hw) || IS_QLA28XX(vha->hw)) + mcp->in_mb |= MBX_15|MBX_21|MBX_22|MBX_23; mcp->tov = MBX_TOV_SECONDS; mcp->flags = 0; diff --git a/drivers/scsi/qla2xxx/qla_mr.c b/drivers/scsi/qla2xxx/qla_mr.c index ca73066853255658aa85b022c78c3089c2973dd2..7178646ee0f06a87f2801b75a281378bd1974e9a 100644 --- a/drivers/scsi/qla2xxx/qla_mr.c +++ b/drivers/scsi/qla2xxx/qla_mr.c @@ -738,29 +738,6 @@ qlafx00_lun_reset(fc_port_t *fcport, uint64_t l, int tag) return qla2x00_async_tm_cmd(fcport, TCF_LUN_RESET, l, tag); } -int -qlafx00_loop_reset(scsi_qla_host_t *vha) -{ - int ret; - struct fc_port *fcport; - struct qla_hw_data *ha = vha->hw; - - if (ql2xtargetreset) { - list_for_each_entry(fcport, &vha->vp_fcports, list) { - if (fcport->port_type != FCT_TARGET) - continue; - - ret = ha->isp_ops->target_reset(fcport, 0, 0); - if (ret != QLA_SUCCESS) { - ql_dbg(ql_dbg_taskm, vha, 0x803d, - "Bus Reset failed: Reset=%d " - "d_id=%x.\n", ret, fcport->d_id.b24); - } - } - } - return QLA_SUCCESS; -} - int qlafx00_iospace_config(struct qla_hw_data *ha) { diff --git a/drivers/scsi/qla2xxx/qla_nvme.c b/drivers/scsi/qla2xxx/qla_nvme.c index f6c76a063294b217a1832f84d53507718f351c85..5acee3c798d42771e739201f043e9c44ce42dcb6 100644 --- a/drivers/scsi/qla2xxx/qla_nvme.c +++ b/drivers/scsi/qla2xxx/qla_nvme.c @@ -109,19 +109,24 @@ static int qla_nvme_alloc_queue(struct nvme_fc_local_port *lport, return -EINVAL; } - if (ha->queue_pair_map[qidx]) { - *handle = ha->queue_pair_map[qidx]; - ql_log(ql_log_info, vha, 0x2121, - "Returning existing qpair of %p for idx=%x\n", - *handle, qidx); - return 0; - } + /* Use base qpair if max_qpairs is 0 */ + if (!ha->max_qpairs) { + qpair = ha->base_qpair; + } else { + if (ha->queue_pair_map[qidx]) { + *handle = ha->queue_pair_map[qidx]; + ql_log(ql_log_info, vha, 0x2121, + "Returning existing qpair of %p for idx=%x\n", + *handle, qidx); + return 0; + } - qpair = qla2xxx_create_qpair(vha, 5, vha->vp_idx, true); - if (qpair == NULL) { - ql_log(ql_log_warn, vha, 0x2122, - "Failed to allocate qpair\n"); - return -EINVAL; + qpair = qla2xxx_create_qpair(vha, 5, vha->vp_idx, true); + if (!qpair) { + ql_log(ql_log_warn, vha, 0x2122, + "Failed to allocate qpair\n"); + return -EINVAL; + } } *handle = qpair; @@ -715,18 +720,9 @@ int qla_nvme_register_hba(struct scsi_qla_host *vha) WARN_ON(vha->nvme_local_port); - if (ha->max_req_queues < 3) { - if (!ha->flags.max_req_queue_warned) - ql_log(ql_log_info, vha, 0x2120, - "%s: Disabling FC-NVME due to lack of free queue pairs (%d).\n", - __func__, ha->max_req_queues); - ha->flags.max_req_queue_warned = 1; - return ret; - } - qla_nvme_fc_transport.max_hw_queues = min((uint8_t)(qla_nvme_fc_transport.max_hw_queues), - (uint8_t)(ha->max_req_queues - 2)); + (uint8_t)(ha->max_qpairs ? ha->max_qpairs : 1)); pinfo.node_name = wwn_to_u64(vha->node_name); pinfo.port_name = wwn_to_u64(vha->port_name); diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 4af794c46d17584536ff5c2755841b97a2ba619d..e7f73a167fbd64136e64ad305d4eb0ee399724da 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -197,12 +197,6 @@ MODULE_PARM_DESC(ql2xdbwr, " 0 -- Regular doorbell.\n" " 1 -- CAMRAM doorbell (faster).\n"); -int ql2xtargetreset = 1; -module_param(ql2xtargetreset, int, S_IRUGO); -MODULE_PARM_DESC(ql2xtargetreset, - "Enable target reset." - "Default is 1 - use hw defaults."); - int ql2xgffidenable; module_param(ql2xgffidenable, int, S_IRUGO); MODULE_PARM_DESC(ql2xgffidenable, @@ -1254,6 +1248,7 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd) uint32_t ratov_j; struct qla_qpair *qpair; unsigned long flags; + int fast_fail_status = SUCCESS; if (qla2x00_isp_reg_stat(ha)) { ql_log(ql_log_info, vha, 0x8042, @@ -1261,15 +1256,16 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd) return FAILED; } + /* Save any FAST_IO_FAIL value to return later if abort succeeds */ ret = fc_block_scsi_eh(cmd); if (ret != 0) - return ret; + fast_fail_status = ret; sp = scsi_cmd_priv(cmd); qpair = sp->qpair; if ((sp->fcport && sp->fcport->deleted) || !qpair) - return SUCCESS; + return fast_fail_status != SUCCESS ? fast_fail_status : FAILED; spin_lock_irqsave(qpair->qp_lock_ptr, flags); sp->comp = ∁ @@ -1304,7 +1300,7 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd) __func__, ha->r_a_tov/10); ret = FAILED; } else { - ret = SUCCESS; + ret = fast_fail_status; } break; default: @@ -1650,27 +1646,10 @@ int qla2x00_loop_reset(scsi_qla_host_t *vha) { int ret; - struct fc_port *fcport; struct qla_hw_data *ha = vha->hw; - if (IS_QLAFX00(ha)) { - return qlafx00_loop_reset(vha); - } - - if (ql2xtargetreset == 1 && ha->flags.enable_target_reset) { - list_for_each_entry(fcport, &vha->vp_fcports, list) { - if (fcport->port_type != FCT_TARGET) - continue; - - ret = ha->isp_ops->target_reset(fcport, 0, 0); - if (ret != QLA_SUCCESS) { - ql_dbg(ql_dbg_taskm, vha, 0x802c, - "Bus Reset failed: Reset=%d " - "d_id=%x.\n", ret, fcport->d_id.b24); - } - } - } - + if (IS_QLAFX00(ha)) + return QLA_SUCCESS; if (ha->flags.enable_lip_full_login && !IS_CNA_CAPABLE(ha)) { atomic_set(&vha->loop_state, LOOP_DOWN); @@ -3953,6 +3932,16 @@ qla2x00_mark_all_devices_lost(scsi_qla_host_t *vha) "Mark all dev lost\n"); list_for_each_entry(fcport, &vha->vp_fcports, list) { + if (fcport->loop_id != FC_NO_LOOP_ID && + (fcport->flags & FCF_FCP2_DEVICE) && + fcport->port_type == FCT_TARGET && + !qla2x00_reset_active(vha)) { + ql_dbg(ql_dbg_disc, vha, 0x211a, + "Delaying session delete for FCP2 flags 0x%x port_type = 0x%x port_id=%06x %phC", + fcport->flags, fcport->port_type, + fcport->d_id.b24, fcport->port_name); + continue; + } fcport->scan_state = 0; qlt_schedule_sess_for_deletion(fcport); } @@ -4077,7 +4066,7 @@ qla2x00_mem_alloc(struct qla_hw_data *ha, uint16_t req_len, uint16_t rsp_len, ql_dbg_pci(ql_dbg_init, ha->pdev, 0xe0ee, "%s: failed alloc dsd\n", __func__); - return 1; + return -ENOMEM; } ha->dif_bundle_kallocs++; diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index 8d4976725a75aa1fad0337c979de23bdc18084ab..ebed14bed7835cc46a1512c6afc05198bd67f08a 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -3256,8 +3256,7 @@ int qlt_xmit_response(struct qla_tgt_cmd *cmd, int xmit_type, "RESET-RSP online/active/old-count/new-count = %d/%d/%d/%d.\n", vha->flags.online, qla2x00_reset_active(vha), cmd->reset_count, qpair->chip_reset); - spin_unlock_irqrestore(qpair->qp_lock_ptr, flags); - return 0; + goto out_unmap_unlock; } /* Does F/W have an IOCBs for this request */ @@ -3380,10 +3379,6 @@ int qlt_rdy_to_xfer(struct qla_tgt_cmd *cmd) prm.sg = NULL; prm.req_cnt = 1; - /* Calculate number of entries and segments required */ - if (qlt_pci_map_calc_cnt(&prm) != 0) - return -EAGAIN; - if (!qpair->fw_started || (cmd->reset_count != qpair->chip_reset) || (cmd->sess && cmd->sess->deleted)) { /* @@ -3401,6 +3396,10 @@ int qlt_rdy_to_xfer(struct qla_tgt_cmd *cmd) return 0; } + /* Calculate number of entries and segments required */ + if (qlt_pci_map_calc_cnt(&prm) != 0) + return -EAGAIN; + spin_lock_irqsave(qpair->qp_lock_ptr, flags); /* Does F/W have an IOCBs for this request */ res = qlt_check_reserve_free_req(qpair, prm.req_cnt); @@ -3805,9 +3804,6 @@ void qlt_free_cmd(struct qla_tgt_cmd *cmd) BUG_ON(cmd->cmd_in_wq); - if (cmd->sg_mapped) - qlt_unmap_sg(cmd->vha, cmd); - if (!cmd->q_full) qlt_decr_num_pend_cmds(cmd->vha); diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index 24619c3bebd521fc4b21f2eddd73ba2d95bcb1a2..49702b0dcee3a31bba0daa4cd0152337c58ed930 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -55,7 +55,6 @@ #include #include #include -#include #include #include @@ -86,14 +85,6 @@ unsigned int scsi_logging_level; EXPORT_SYMBOL(scsi_logging_level); #endif -/* - * Domain for asynchronous system resume operations. It is marked 'exclusive' - * to avoid being included in the async_synchronize_full() that is invoked by - * dpm_resume(). - */ -ASYNC_DOMAIN_EXCLUSIVE(scsi_sd_pm_domain); -EXPORT_SYMBOL(scsi_sd_pm_domain); - #ifdef CONFIG_SCSI_LOGGING void scsi_log_send(struct scsi_cmnd *cmd) { @@ -545,8 +536,10 @@ EXPORT_SYMBOL(scsi_device_get); */ void scsi_device_put(struct scsi_device *sdev) { - module_put(sdev->host->hostt->module); + struct module *mod = sdev->host->hostt->module; + put_device(&sdev->sdev_gendev); + module_put(mod); } EXPORT_SYMBOL(scsi_device_put); diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c index b6540b92f56610829e3cda18199755a70aaf3830..6b00de6b6f0ef297f09df260c3d843c39508cadc 100644 --- a/drivers/scsi/scsi_debug.c +++ b/drivers/scsi/scsi_debug.c @@ -1188,7 +1188,7 @@ static int p_fill_from_dev_buffer(struct scsi_cmnd *scp, const void *arr, __func__, off_dst, scsi_bufflen(scp), act_len, scsi_get_resid(scp)); n = scsi_bufflen(scp) - (off_dst + act_len); - scsi_set_resid(scp, min_t(int, scsi_get_resid(scp), n)); + scsi_set_resid(scp, min_t(u32, scsi_get_resid(scp), n)); return 0; } @@ -1561,7 +1561,8 @@ static int resp_inquiry(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) unsigned char pq_pdt; unsigned char *arr; unsigned char *cmd = scp->cmnd; - int alloc_len, n, ret; + u32 alloc_len, n; + int ret; bool have_wlun, is_disk, is_zbc, is_disk_zbc; alloc_len = get_unaligned_be16(cmd + 3); @@ -1584,7 +1585,8 @@ static int resp_inquiry(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) kfree(arr); return check_condition_result; } else if (0x1 & cmd[1]) { /* EVPD bit set */ - int lu_id_num, port_group_id, target_dev_id, len; + int lu_id_num, port_group_id, target_dev_id; + u32 len; char lu_id_str[6]; int host_no = devip->sdbg_host->shost->host_no; @@ -1675,9 +1677,9 @@ static int resp_inquiry(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) kfree(arr); return check_condition_result; } - len = min(get_unaligned_be16(arr + 2) + 4, alloc_len); + len = min_t(u32, get_unaligned_be16(arr + 2) + 4, alloc_len); ret = fill_from_dev_buffer(scp, arr, - min(len, SDEBUG_MAX_INQ_ARR_SZ)); + min_t(u32, len, SDEBUG_MAX_INQ_ARR_SZ)); kfree(arr); return ret; } @@ -1713,7 +1715,7 @@ static int resp_inquiry(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) } put_unaligned_be16(0x2100, arr + n); /* SPL-4 no version claimed */ ret = fill_from_dev_buffer(scp, arr, - min_t(int, alloc_len, SDEBUG_LONG_INQ_SZ)); + min_t(u32, alloc_len, SDEBUG_LONG_INQ_SZ)); kfree(arr); return ret; } @@ -1728,8 +1730,8 @@ static int resp_requests(struct scsi_cmnd *scp, unsigned char *cmd = scp->cmnd; unsigned char arr[SCSI_SENSE_BUFFERSIZE]; /* assume >= 18 bytes */ bool dsense = !!(cmd[1] & 1); - int alloc_len = cmd[4]; - int len = 18; + u32 alloc_len = cmd[4]; + u32 len = 18; int stopped_state = atomic_read(&devip->stopped); memset(arr, 0, sizeof(arr)); @@ -1773,7 +1775,7 @@ static int resp_requests(struct scsi_cmnd *scp, arr[7] = 0xa; } } - return fill_from_dev_buffer(scp, arr, min_t(int, len, alloc_len)); + return fill_from_dev_buffer(scp, arr, min_t(u32, len, alloc_len)); } static int resp_start_stop(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) @@ -1855,7 +1857,7 @@ static int resp_readcap16(struct scsi_cmnd *scp, { unsigned char *cmd = scp->cmnd; unsigned char arr[SDEBUG_READCAP16_ARR_SZ]; - int alloc_len; + u32 alloc_len; alloc_len = get_unaligned_be32(cmd + 10); /* following just in case virtual_gb changed */ @@ -1884,7 +1886,7 @@ static int resp_readcap16(struct scsi_cmnd *scp, } return fill_from_dev_buffer(scp, arr, - min_t(int, alloc_len, SDEBUG_READCAP16_ARR_SZ)); + min_t(u32, alloc_len, SDEBUG_READCAP16_ARR_SZ)); } #define SDEBUG_MAX_TGTPGS_ARR_SZ 1412 @@ -1895,8 +1897,9 @@ static int resp_report_tgtpgs(struct scsi_cmnd *scp, unsigned char *cmd = scp->cmnd; unsigned char *arr; int host_no = devip->sdbg_host->shost->host_no; - int n, ret, alen, rlen; int port_group_a, port_group_b, port_a, port_b; + u32 alen, n, rlen; + int ret; alen = get_unaligned_be32(cmd + 6); arr = kzalloc(SDEBUG_MAX_TGTPGS_ARR_SZ, GFP_ATOMIC); @@ -1958,9 +1961,9 @@ static int resp_report_tgtpgs(struct scsi_cmnd *scp, * - The constructed command length * - The maximum array size */ - rlen = min_t(int, alen, n); + rlen = min(alen, n); ret = fill_from_dev_buffer(scp, arr, - min_t(int, rlen, SDEBUG_MAX_TGTPGS_ARR_SZ)); + min_t(u32, rlen, SDEBUG_MAX_TGTPGS_ARR_SZ)); kfree(arr); return ret; } @@ -2310,7 +2313,8 @@ static int resp_mode_sense(struct scsi_cmnd *scp, { int pcontrol, pcode, subpcode, bd_len; unsigned char dev_spec; - int alloc_len, offset, len, target_dev_id; + u32 alloc_len, offset, len; + int target_dev_id; int target = scp->device->id; unsigned char *ap; unsigned char arr[SDEBUG_MAX_MSENSE_SZ]; @@ -2466,7 +2470,7 @@ static int resp_mode_sense(struct scsi_cmnd *scp, arr[0] = offset - 1; else put_unaligned_be16((offset - 2), arr + 0); - return fill_from_dev_buffer(scp, arr, min_t(int, alloc_len, offset)); + return fill_from_dev_buffer(scp, arr, min_t(u32, alloc_len, offset)); } #define SDEBUG_MAX_MSELECT_SZ 512 @@ -2497,11 +2501,11 @@ static int resp_mode_select(struct scsi_cmnd *scp, __func__, param_len, res); md_len = mselect6 ? (arr[0] + 1) : (get_unaligned_be16(arr + 0) + 2); bd_len = mselect6 ? arr[3] : get_unaligned_be16(arr + 6); - if (md_len > 2) { + off = bd_len + (mselect6 ? 4 : 8); + if (md_len > 2 || off >= res) { mk_sense_invalid_fld(scp, SDEB_IN_DATA, 0, -1); return check_condition_result; } - off = bd_len + (mselect6 ? 4 : 8); mpage = arr[off] & 0x3f; ps = !!(arr[off] & 0x80); if (ps) { @@ -2581,7 +2585,8 @@ static int resp_ie_l_pg(unsigned char *arr) static int resp_log_sense(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) { - int ppc, sp, pcode, subpcode, alloc_len, len, n; + int ppc, sp, pcode, subpcode; + u32 alloc_len, len, n; unsigned char arr[SDEBUG_MAX_LSENSE_SZ]; unsigned char *cmd = scp->cmnd; @@ -2651,9 +2656,9 @@ static int resp_log_sense(struct scsi_cmnd *scp, mk_sense_invalid_fld(scp, SDEB_IN_CDB, 3, -1); return check_condition_result; } - len = min_t(int, get_unaligned_be16(arr + 2) + 4, alloc_len); + len = min_t(u32, get_unaligned_be16(arr + 2) + 4, alloc_len); return fill_from_dev_buffer(scp, arr, - min_t(int, len, SDEBUG_MAX_INQ_ARR_SZ)); + min_t(u32, len, SDEBUG_MAX_INQ_ARR_SZ)); } static inline bool sdebug_dev_is_zoned(struct sdebug_dev_info *devip) @@ -4237,6 +4242,8 @@ static int resp_verify(struct scsi_cmnd *scp, struct sdebug_dev_info *devip) mk_sense_invalid_opcode(scp); return check_condition_result; } + if (vnum == 0) + return 0; /* not an error */ a_num = is_bytchk3 ? 1 : vnum; /* Treat following check like one for read (i.e. no write) access */ ret = check_device_access_params(scp, lba, a_num, false); @@ -4300,6 +4307,8 @@ static int resp_report_zones(struct scsi_cmnd *scp, } zs_lba = get_unaligned_be64(cmd + 2); alloc_len = get_unaligned_be32(cmd + 10); + if (alloc_len == 0) + return 0; /* not an error */ rep_opts = cmd[14] & 0x3f; partial = cmd[14] & 0x80; @@ -4312,7 +4321,7 @@ static int resp_report_zones(struct scsi_cmnd *scp, rep_max_zones = min((alloc_len - 64) >> ilog2(RZONES_DESC_HD), max_zones); - arr = kcalloc(RZONES_DESC_HD, alloc_len, GFP_ATOMIC); + arr = kzalloc(alloc_len, GFP_ATOMIC); if (!arr) { mk_sense_buffer(scp, ILLEGAL_REQUEST, INSUFF_RES_ASC, INSUFF_RES_ASCQ); @@ -4404,7 +4413,7 @@ static int resp_report_zones(struct scsi_cmnd *scp, put_unaligned_be64(sdebug_capacity - 1, arr + 8); rep_len = (unsigned long)desc - (unsigned long)arr; - ret = fill_from_dev_buffer(scp, arr, min_t(int, alloc_len, rep_len)); + ret = fill_from_dev_buffer(scp, arr, min_t(u32, alloc_len, rep_len)); fini: read_unlock(macc_lckp); @@ -4627,6 +4636,7 @@ static void zbc_rwp_zone(struct sdebug_dev_info *devip, struct sdeb_zone_state *zsp) { enum sdebug_z_cond zc; + struct sdeb_store_info *sip = devip2sip(devip, false); if (zbc_zone_is_conv(zsp)) return; @@ -4638,6 +4648,10 @@ static void zbc_rwp_zone(struct sdebug_dev_info *devip, if (zsp->z_cond == ZC4_CLOSED) devip->nr_closed--; + if (zsp->z_wp > zsp->z_start) + memset(sip->storep + zsp->z_start * sdebug_sector_size, 0, + (zsp->z_wp - zsp->z_start) * sdebug_sector_size); + zsp->z_non_seq_resource = false; zsp->z_wp = zsp->z_start; zsp->z_cond = ZC1_EMPTY; diff --git a/drivers/scsi/scsi_debugfs.c b/drivers/scsi/scsi_debugfs.c index c19ea7ab54cbd2b7daf96e19550454fd75ffaf34..d9a18124cfc9d4604930d431a9f2cca118f9ffc1 100644 --- a/drivers/scsi/scsi_debugfs.c +++ b/drivers/scsi/scsi_debugfs.c @@ -10,6 +10,7 @@ static const char *const scsi_cmd_flags[] = { SCSI_CMD_FLAG_NAME(TAGGED), SCSI_CMD_FLAG_NAME(UNCHECKED_ISA_DMA), SCSI_CMD_FLAG_NAME(INITIALIZED), + SCSI_CMD_FLAG_NAME(LAST), }; #undef SCSI_CMD_FLAG_NAME diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index f11f51e2465f5dea81fef1810d7368e11ff005cb..cfa588e915619a5b3d434d1c079158ba1d071d63 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -448,8 +448,13 @@ static void scsi_report_sense(struct scsi_device *sdev, if (sshdr->asc == 0x29) { evt_type = SDEV_EVT_POWER_ON_RESET_OCCURRED; - sdev_printk(KERN_WARNING, sdev, - "Power-on or device reset occurred\n"); + /* + * Do not print message if it is an expected side-effect + * of runtime PM. + */ + if (!sdev->silence_suspend) + sdev_printk(KERN_WARNING, sdev, + "Power-on or device reset occurred\n"); } if (sshdr->asc == 0x2a && sshdr->ascq == 0x01) { diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index d89db29fa829c066b501f0dadc64e1ca8870193e..06242aea243ab23583f72815032856b2babd7e18 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1193,8 +1193,6 @@ static blk_status_t scsi_setup_scsi_cmnd(struct scsi_device *sdev, } cmd->cmd_len = scsi_req(req)->cmd_len; - if (cmd->cmd_len == 0) - cmd->cmd_len = scsi_command_size(cmd->cmnd); cmd->cmnd = scsi_req(req)->cmd; cmd->transfersize = blk_rq_bytes(req); cmd->allowed = scsi_req(req)->retries; @@ -1781,6 +1779,19 @@ static void scsi_mq_exit_request(struct blk_mq_tag_set *set, struct request *rq, cmd->sense_buffer); } + +static int scsi_mq_poll(struct blk_mq_hw_ctx *hctx) +{ + struct request_queue *q = hctx->queue; + struct scsi_device *sdev = q->queuedata; + struct Scsi_Host *shost = sdev->host; + + if (shost->hostt->mq_poll) + return shost->hostt->mq_poll(shost, hctx->queue_num); + + return 0; +} + static int scsi_map_queues(struct blk_mq_tag_set *set) { struct Scsi_Host *shost = container_of(set, struct Scsi_Host, tag_set); @@ -1848,6 +1859,7 @@ static const struct blk_mq_ops scsi_mq_ops_no_commit = { .cleanup_rq = scsi_cleanup_rq, .busy = scsi_mq_lld_busy, .map_queues = scsi_map_queues, + .poll = scsi_mq_poll, }; @@ -1876,6 +1888,7 @@ static const struct blk_mq_ops scsi_mq_ops = { .cleanup_rq = scsi_cleanup_rq, .busy = scsi_mq_lld_busy, .map_queues = scsi_map_queues, + .poll = scsi_mq_poll, }; struct request_queue *scsi_mq_alloc_queue(struct scsi_device *sdev) @@ -1894,6 +1907,7 @@ int scsi_mq_setup_tags(struct Scsi_Host *shost) { unsigned int cmd_size, sgl_size; struct blk_mq_tag_set *tag_set = &shost->tag_set; + unsigned int reserved_tags = shost->hostt->reserved_tags; sgl_size = max_t(unsigned int, sizeof(struct scatterlist), scsi_mq_inline_sgl_size(shost)); @@ -1908,7 +1922,9 @@ int scsi_mq_setup_tags(struct Scsi_Host *shost) else tag_set->ops = &scsi_mq_ops_no_commit; tag_set->nr_hw_queues = shost->nr_hw_queues ? : 1; - tag_set->queue_depth = shost->can_queue; + tag_set->nr_maps = shost->nr_maps ? : 1; + tag_set->queue_depth = shost->can_queue + reserved_tags; + tag_set->reserved_tags = reserved_tags; tag_set->cmd_size = cmd_size; tag_set->numa_node = NUMA_NO_NODE; tag_set->flags = BLK_MQ_F_SHOULD_MERGE; diff --git a/drivers/scsi/scsi_pm.c b/drivers/scsi/scsi_pm.c index 3717eea37ecb386fd772e6dd90182b3674d41ceb..d581613d87c7dfff1b7c1ac6800a88ff16c1fa70 100644 --- a/drivers/scsi/scsi_pm.c +++ b/drivers/scsi/scsi_pm.c @@ -8,7 +8,6 @@ #include #include -#include #include #include @@ -56,9 +55,6 @@ static int scsi_dev_type_suspend(struct device *dev, const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; int err; - /* flush pending in-flight resume operations, suspend is synchronous */ - async_synchronize_full_domain(&scsi_sd_pm_domain); - err = scsi_device_quiesce(to_scsi_device(dev)); if (err == 0) { err = cb(dev, pm); @@ -69,108 +65,30 @@ static int scsi_dev_type_suspend(struct device *dev, return err; } -static int scsi_dev_type_resume(struct device *dev, - int (*cb)(struct device *, const struct dev_pm_ops *)) -{ - const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; - int err = 0; - - err = cb(dev, pm); - scsi_device_resume(to_scsi_device(dev)); - dev_dbg(dev, "scsi resume: %d\n", err); - - if (err == 0) { - pm_runtime_disable(dev); - err = pm_runtime_set_active(dev); - pm_runtime_enable(dev); - - /* - * Forcibly set runtime PM status of request queue to "active" - * to make sure we can again get requests from the queue - * (see also blk_pm_peek_request()). - * - * The resume hook will correct runtime PM status of the disk. - */ - if (!err && scsi_is_sdev_device(dev)) { - struct scsi_device *sdev = to_scsi_device(dev); - - blk_set_runtime_active(sdev->request_queue); - } - } - - return err; -} - static int scsi_bus_suspend_common(struct device *dev, int (*cb)(struct device *, const struct dev_pm_ops *)) { - int err = 0; - - if (scsi_is_sdev_device(dev)) { - /* - * All the high-level SCSI drivers that implement runtime - * PM treat runtime suspend, system suspend, and system - * hibernate nearly identically. In all cases the requirements - * for runtime suspension are stricter. - */ - if (pm_runtime_suspended(dev)) - return 0; - - err = scsi_dev_type_suspend(dev, cb); - } - - return err; -} - -static void async_sdev_resume(void *dev, async_cookie_t cookie) -{ - scsi_dev_type_resume(dev, do_scsi_resume); -} - -static void async_sdev_thaw(void *dev, async_cookie_t cookie) -{ - scsi_dev_type_resume(dev, do_scsi_thaw); -} + if (!scsi_is_sdev_device(dev)) + return 0; -static void async_sdev_restore(void *dev, async_cookie_t cookie) -{ - scsi_dev_type_resume(dev, do_scsi_restore); + return scsi_dev_type_suspend(dev, cb); } static int scsi_bus_resume_common(struct device *dev, int (*cb)(struct device *, const struct dev_pm_ops *)) { - async_func_t fn; + const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; + int err; if (!scsi_is_sdev_device(dev)) - fn = NULL; - else if (cb == do_scsi_resume) - fn = async_sdev_resume; - else if (cb == do_scsi_thaw) - fn = async_sdev_thaw; - else if (cb == do_scsi_restore) - fn = async_sdev_restore; - else - fn = NULL; - - if (fn) { - async_schedule_domain(fn, dev, &scsi_sd_pm_domain); - - /* - * If a user has disabled async probing a likely reason - * is due to a storage enclosure that does not inject - * staggered spin-ups. For safety, make resume - * synchronous as well in that case. - */ - if (strncmp(scsi_scan_type, "async", 5) != 0) - async_synchronize_full_domain(&scsi_sd_pm_domain); - } else { - pm_runtime_disable(dev); - pm_runtime_set_active(dev); - pm_runtime_enable(dev); - } - return 0; + return 0; + + err = cb(dev, pm); + scsi_device_resume(to_scsi_device(dev)); + dev_dbg(dev, "scsi resume: %d\n", err); + + return err; } static int scsi_bus_prepare(struct device *dev) @@ -262,7 +180,7 @@ static int sdev_runtime_resume(struct device *dev) blk_pre_runtime_resume(sdev->request_queue); if (pm && pm->runtime_resume) err = pm->runtime_resume(dev); - blk_post_runtime_resume(sdev->request_queue, err); + blk_post_runtime_resume(sdev->request_queue); return err; } diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h index 180636d54982d0db670ecaf1b3b158f1eca08956..7695dfe1a15cfb198e7cebac54ddf200440f3a94 100644 --- a/drivers/scsi/scsi_priv.h +++ b/drivers/scsi/scsi_priv.h @@ -3,7 +3,6 @@ #define _SCSI_PRIV_H #include -#include #include struct request_queue; @@ -117,7 +116,7 @@ extern void scsi_exit_procfs(void); #endif /* CONFIG_PROC_FS */ /* scsi_scan.c */ -extern char scsi_scan_type[]; +void scsi_enable_async_suspend(struct device *dev); extern int scsi_complete_async_scans(void); extern int scsi_scan_host_selected(struct Scsi_Host *, unsigned int, unsigned int, u64, enum scsi_scan_mode); @@ -171,8 +170,6 @@ static inline int scsi_autopm_get_host(struct Scsi_Host *h) { return 0; } static inline void scsi_autopm_put_host(struct Scsi_Host *h) {} #endif /* CONFIG_PM */ -extern struct async_domain scsi_sd_pm_domain; - /* scsi_dh.c */ #ifdef CONFIG_SCSI_DH void scsi_dh_add_device(struct scsi_device *sdev); diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index 8e474b1452495819224e5928d86c875933dcdf0d..6f3a1f1f7ec1fa1d05ff349e31aa477a8ca6f633 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -122,6 +122,22 @@ struct async_scan_data { struct completion prev_finished; }; +/** + * scsi_enable_async_suspend - Enable async suspend and resume + */ +void scsi_enable_async_suspend(struct device *dev) +{ + /* + * If a user has disabled async probing a likely reason is due to a + * storage enclosure that does not inject staggered spin-ups. For + * safety, make resume synchronous as well in that case. + */ + if (strncmp(scsi_scan_type, "async", 5) != 0) + return; + /* Enable asynchronous suspend and resume. */ + device_enable_async_suspend(dev); +} + /** * scsi_complete_async_scans - Wait for asynchronous scans to complete * @@ -430,6 +446,7 @@ static struct scsi_target *scsi_alloc_target(struct device *parent, dev_set_name(dev, "target%d:%d:%d", shost->host_no, channel, id); dev->bus = &scsi_bus_type; dev->type = &scsi_target_type; + scsi_enable_async_suspend(dev); starget->id = id; starget->channel = channel; starget->can_queue = 0; @@ -952,6 +969,9 @@ static int scsi_add_lun(struct scsi_device *sdev, unsigned char *inq_result, if (*bflags & BLIST_UNMAP_LIMIT_WS) sdev->unmap_limit_for_ws = 1; + if (*bflags & BLIST_IGN_MEDIA_CHANGE) + sdev->ignore_media_change = 1; + sdev->eh_timeout = SCSI_DEFAULT_EH_TIMEOUT; if (*bflags & BLIST_TRY_VPD_PAGES) diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index 8173b67ec7b0fc4413e332307c8d462bab6e333a..29db4db76b39370059013dce013e59a8588f33a7 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -450,9 +450,12 @@ static void scsi_device_dev_release_usercontext(struct work_struct *work) struct scsi_vpd *vpd_pg80 = NULL, *vpd_pg83 = NULL; struct scsi_vpd *vpd_pg0 = NULL, *vpd_pg89 = NULL; unsigned long flags; + struct module *mod; sdev = container_of(work, struct scsi_device, ew.work); + mod = sdev->host->hostt->module; + scsi_dh_release_device(sdev); parent = sdev->sdev_gendev.parent; @@ -501,11 +504,17 @@ static void scsi_device_dev_release_usercontext(struct work_struct *work) if (parent) put_device(parent); + module_put(mod); } static void scsi_device_dev_release(struct device *dev) { struct scsi_device *sdp = to_scsi_device(dev); + + /* Set module pointer as NULL in case of module unloading */ + if (!try_module_get(sdp->host->hostt->module)) + sdp->host->hostt->module = NULL; + execute_in_process_context(scsi_device_dev_release_usercontext, &sdp->ew); } @@ -787,6 +796,7 @@ store_state_field(struct device *dev, struct device_attribute *attr, int i, ret; struct scsi_device *sdev = to_scsi_device(dev); enum scsi_device_state state = 0; + bool rescan_dev = false; for (i = 0; i < ARRAY_SIZE(sdev_states); i++) { const int len = strlen(sdev_states[i].name); @@ -805,20 +815,27 @@ store_state_field(struct device *dev, struct device_attribute *attr, } mutex_lock(&sdev->state_mutex); - ret = scsi_device_set_state(sdev, state); - /* - * If the device state changes to SDEV_RUNNING, we need to - * run the queue to avoid I/O hang, and rescan the device - * to revalidate it. Running the queue first is necessary - * because another thread may be waiting inside - * blk_mq_freeze_queue_wait() and because that call may be - * waiting for pending I/O to finish. - */ - if (ret == 0 && state == SDEV_RUNNING) { + if (sdev->sdev_state == SDEV_RUNNING && state == SDEV_RUNNING) { + ret = 0; + } else { + ret = scsi_device_set_state(sdev, state); + if (ret == 0 && state == SDEV_RUNNING) + rescan_dev = true; + } + mutex_unlock(&sdev->state_mutex); + + if (rescan_dev) { + /* + * If the device state changes to SDEV_RUNNING, we need to + * run the queue to avoid I/O hang, and rescan the device + * to revalidate it. Running the queue first is necessary + * because another thread may be waiting inside + * blk_mq_freeze_queue_wait() and because that call may be + * waiting for pending I/O to finish. + */ blk_mq_run_hw_queues(sdev->request_queue, true); scsi_rescan_device(dev); } - mutex_unlock(&sdev->state_mutex); return ret == 0 ? count : -EINVAL; } @@ -1608,6 +1625,7 @@ void scsi_sysfs_device_initialize(struct scsi_device *sdev) device_initialize(&sdev->sdev_gendev); sdev->sdev_gendev.bus = &scsi_bus_type; sdev->sdev_gendev.type = &scsi_dev_type; + scsi_enable_async_suspend(&sdev->sdev_gendev); dev_set_name(&sdev->sdev_gendev, "%d:%d:%d:%llu", sdev->host->host_no, sdev->channel, sdev->id, sdev->lun); diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index 41772b88610ae047aca5e5a9f9bc7de7528f05b8..a5759d0e388a851397ea2eb9fc9e8d72ea02d2f0 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -1909,12 +1909,12 @@ static void session_recovery_timedout(struct work_struct *work) } spin_unlock_irqrestore(&session->lock, flags); - if (session->transport->session_recovery_timedout) - session->transport->session_recovery_timedout(session); - ISCSI_DBG_TRANS_SESSION(session, "Unblocking SCSI target\n"); scsi_target_unblock(&session->dev, SDEV_TRANSPORT_OFFLINE); ISCSI_DBG_TRANS_SESSION(session, "Completed unblocking SCSI target\n"); + + if (session->transport->session_recovery_timedout) + session->transport->session_recovery_timedout(session); } static void __iscsi_unblock_session(struct work_struct *work) @@ -2907,8 +2907,6 @@ iscsi_set_param(struct iscsi_transport *transport, struct iscsi_uevent *ev) session->recovery_tmo = value; break; default: - err = transport->set_param(conn, ev->u.set_param.param, - data, ev->u.set_param.len); if ((conn->state == ISCSI_CONN_BOUND) || (conn->state == ISCSI_CONN_UP)) { err = transport->set_param(conn, ev->u.set_param.param, diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index f0c0935d79099ec4ab47437eb138b32eead8d216..edb1e26c734b1a28542d63f58d575c36c8ae17ac 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -50,7 +50,6 @@ #include #include #include -#include #include #include #include @@ -113,7 +112,8 @@ static int sd_remove(struct device *); static void sd_shutdown(struct device *); static int sd_suspend_system(struct device *); static int sd_suspend_runtime(struct device *); -static int sd_resume(struct device *); +static int sd_resume_system(struct device *); +static int sd_resume_runtime(struct device *); static void sd_rescan(struct device *); static blk_status_t sd_init_command(struct scsi_cmnd *SCpnt); static void sd_uninit_command(struct scsi_cmnd *SCpnt); @@ -604,11 +604,11 @@ static struct class sd_disk_class = { static const struct dev_pm_ops sd_pm_ops = { .suspend = sd_suspend_system, - .resume = sd_resume, + .resume = sd_resume_system, .poweroff = sd_suspend_system, - .restore = sd_resume, + .restore = sd_resume_system, .runtime_suspend = sd_suspend_runtime, - .runtime_resume = sd_resume, + .runtime_resume = sd_resume_runtime, }; static struct scsi_driver sd_template = { @@ -3443,15 +3443,16 @@ static int sd_probe(struct device *dev) } device_initialize(&sdkp->dev); - sdkp->dev.parent = dev; + sdkp->dev.parent = get_device(dev); sdkp->dev.class = &sd_disk_class; dev_set_name(&sdkp->dev, "%s", dev_name(dev)); error = device_add(&sdkp->dev); - if (error) - goto out_free_index; + if (error) { + put_device(&sdkp->dev); + goto out; + } - get_device(dev); dev_set_drvdata(dev, sdkp); gd->major = sd_major((index & 0xf0) >> 4); @@ -3537,7 +3538,6 @@ static int sd_remove(struct device *dev) devt = disk_devt(sdkp->disk); scsi_autopm_get_device(sdkp->device); - async_synchronize_full_domain(&scsi_sd_pm_domain); device_del(&sdkp->dev); del_gendisk(sdkp->disk); sd_shutdown(dev); @@ -3663,7 +3663,8 @@ static int sd_suspend_common(struct device *dev, bool ignore_stop_errors) return 0; if (sdkp->WCE && sdkp->media_present) { - sd_printk(KERN_NOTICE, sdkp, "Synchronizing SCSI cache\n"); + if (!sdkp->device->silence_suspend) + sd_printk(KERN_NOTICE, sdkp, "Synchronizing SCSI cache\n"); ret = sd_sync_cache(sdkp, &sshdr); if (ret) { @@ -3685,7 +3686,8 @@ static int sd_suspend_common(struct device *dev, bool ignore_stop_errors) } if (sdkp->device->manage_start_stop) { - sd_printk(KERN_NOTICE, sdkp, "Stopping disk\n"); + if (!sdkp->device->silence_suspend) + sd_printk(KERN_NOTICE, sdkp, "Stopping disk\n"); /* an error is not worth aborting a system sleep */ ret = sd_start_stop_device(sdkp, 0); if (ignore_stop_errors) @@ -3697,6 +3699,9 @@ static int sd_suspend_common(struct device *dev, bool ignore_stop_errors) static int sd_suspend_system(struct device *dev) { + if (pm_runtime_suspended(dev)) + return 0; + return sd_suspend_common(dev, true); } @@ -3723,6 +3728,38 @@ static int sd_resume(struct device *dev) return ret; } +static int sd_resume_system(struct device *dev) +{ + if (pm_runtime_suspended(dev)) + return 0; + + return sd_resume(dev); +} + +static int sd_resume_runtime(struct device *dev) +{ + struct scsi_disk *sdkp = dev_get_drvdata(dev); + struct scsi_device *sdp; + + if (!sdkp) /* E.g.: runtime resume at the start of sd_probe() */ + return 0; + + sdp = sdkp->device; + + if (sdp->ignore_media_change) { + /* clear the device's sense data */ + static const u8 cmd[10] = { REQUEST_SENSE }; + + if (scsi_execute(sdp, cmd, DMA_NONE, NULL, 0, NULL, + NULL, sdp->request_queue->rq_timeout, 1, 0, + RQF_PM, NULL)) + sd_printk(KERN_NOTICE, sdkp, + "Failed to clear sense data\n"); + } + + return sd_resume(dev); +} + /** * init_sd - entry point for this driver (both when built in or when * a module). diff --git a/drivers/scsi/ses.c b/drivers/scsi/ses.c index c2afba2a5414df7115ed16a289b33fde9002487a..0a1734f34587dd4dc9e8b78468b05d37336e86b5 100644 --- a/drivers/scsi/ses.c +++ b/drivers/scsi/ses.c @@ -87,9 +87,16 @@ static int ses_recv_diag(struct scsi_device *sdev, int page_code, 0 }; unsigned char recv_page_code; + unsigned int retries = SES_RETRIES; + struct scsi_sense_hdr sshdr; + + do { + ret = scsi_execute_req(sdev, cmd, DMA_FROM_DEVICE, buf, bufflen, + &sshdr, SES_TIMEOUT, 1, NULL); + } while (ret > 0 && --retries && scsi_sense_valid(&sshdr) && + (sshdr.sense_key == NOT_READY || + (sshdr.sense_key == UNIT_ATTENTION && sshdr.asc == 0x29))); - ret = scsi_execute_req(sdev, cmd, DMA_FROM_DEVICE, buf, bufflen, - NULL, SES_TIMEOUT, SES_RETRIES, NULL); if (unlikely(ret)) return ret; @@ -111,7 +118,7 @@ static int ses_recv_diag(struct scsi_device *sdev, int page_code, static int ses_send_diag(struct scsi_device *sdev, int page_code, void *buf, int bufflen) { - u32 result; + int result; unsigned char cmd[] = { SEND_DIAGNOSTIC, @@ -121,9 +128,16 @@ static int ses_send_diag(struct scsi_device *sdev, int page_code, bufflen & 0xff, 0 }; + struct scsi_sense_hdr sshdr; + unsigned int retries = SES_RETRIES; + + do { + result = scsi_execute_req(sdev, cmd, DMA_TO_DEVICE, buf, bufflen, + &sshdr, SES_TIMEOUT, 1, NULL); + } while (result > 0 && --retries && scsi_sense_valid(&sshdr) && + (sshdr.sense_key == NOT_READY || + (sshdr.sense_key == UNIT_ATTENTION && sshdr.asc == 0x29))); - result = scsi_execute_req(sdev, cmd, DMA_TO_DEVICE, buf, bufflen, - NULL, SES_TIMEOUT, SES_RETRIES, NULL); if (result) sdev_printk(KERN_ERR, sdev, "SEND DIAGNOSTIC result: %8x\n", result); diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c index 4cb4ab9c6137ee7d4c5ba509b37438c76c644aef..464418413ced0ec7b6ecabcbd4f0a8448017a0cb 100644 --- a/drivers/scsi/sr.c +++ b/drivers/scsi/sr.c @@ -917,7 +917,7 @@ static void get_capabilities(struct scsi_cd *cd) /* allocate transfer buffer */ - buffer = kmalloc(512, GFP_KERNEL | GFP_DMA); + buffer = kmalloc(512, GFP_KERNEL); if (!buffer) { sr_printk(KERN_ERR, cd, "out of memory.\n"); return; diff --git a/drivers/scsi/sr_vendor.c b/drivers/scsi/sr_vendor.c index 1f988a1b9166f49f83384e7525b35f37861580a4..a61635326ae0ac07e7a042b18ceb708ebd243de6 100644 --- a/drivers/scsi/sr_vendor.c +++ b/drivers/scsi/sr_vendor.c @@ -131,7 +131,7 @@ int sr_set_blocklength(Scsi_CD *cd, int blocklength) if (cd->vendor == VENDOR_TOSHIBA) density = (blocklength > 2048) ? 0x81 : 0x83; - buffer = kmalloc(512, GFP_KERNEL | GFP_DMA); + buffer = kmalloc(512, GFP_KERNEL); if (!buffer) return -ENOMEM; @@ -179,7 +179,7 @@ int sr_cd_check(struct cdrom_device_info *cdi) if (cd->cdi.mask & CDC_MULTI_SESSION) return 0; - buffer = kmalloc(512, GFP_KERNEL | GFP_DMA); + buffer = kmalloc(512, GFP_KERNEL); if (!buffer) return -ENOMEM; diff --git a/drivers/scsi/ufs/Kconfig b/drivers/scsi/ufs/Kconfig index 87900016448fcc9ca10e34f83d2a8a73a1e5397f..9fe27b01904e7477bafb3fd777c9baaafb29e14c 100644 --- a/drivers/scsi/ufs/Kconfig +++ b/drivers/scsi/ufs/Kconfig @@ -39,7 +39,7 @@ config SCSI_UFSHCD select DEVFREQ_GOV_SIMPLE_ONDEMAND select NLS help - This selects the support for UFS devices in Linux, say Y and make + This selects the support for UFS devices in Linux, say Y and make sure that you know the name of your UFS host adapter (the card inside your computer that "speaks" the UFS protocol, also called UFS Host Controller), because you will be asked for it. @@ -50,12 +50,14 @@ config SCSI_UFSHCD However, do not compile this as a module if your root file system (the one containing the directory /) is located on a UFS device. +if SCSI_UFSHCD + config SCSI_UFSHCD_PCI tristate "PCI bus based UFS Controller support" - depends on SCSI_UFSHCD && PCI + depends on PCI help - This selects the PCI UFS Host Controller Interface. Select this if - you have UFS Host Controller with PCI Interface. + This selects the PCI UFS Host Controller Interface. Select this if + you have UFS Host Controller with PCI Interface. If you have a controller with this interface, say Y or M here. @@ -71,13 +73,12 @@ config SCSI_UFS_DWC_TC_PCI config SCSI_UFSHCD_PLATFORM tristate "Platform bus based UFS Controller support" - depends on SCSI_UFSHCD depends on HAS_IOMEM help - This selects the UFS host controller support. Select this if - you have an UFS controller on Platform bus. + This selects the UFS host controller support. Select this if + you have an UFS controller on Platform bus. - If you have a controller with this interface, say Y or M here. + If you have a controller with this interface, say Y or M here. If unsure, say N. @@ -85,7 +86,7 @@ config SCSI_UFS_CDNS_PLATFORM tristate "Cadence UFS Controller platform driver" depends on SCSI_UFSHCD_PLATFORM help - This selects the Cadence-specific additions to UFSHCD platform driver. + This selects the Cadence-specific additions to UFSHCD platform driver. If unsure, say N. @@ -100,7 +101,7 @@ config SCSI_UFS_DWC_TC_PLATFORM config SCSI_UFS_QCOM tristate "QCOM specific hooks to UFS controller platform driver" depends on SCSI_UFSHCD_PLATFORM && ARCH_QCOM - select QCOM_SCM + select QCOM_SCM if SCSI_UFS_CRYPTO select RESET_CONTROLLER help This selects the QCOM specific additions to UFSHCD platform driver. @@ -147,7 +148,6 @@ config SCSI_UFS_TI_J721E config SCSI_UFS_BSG bool "Universal Flash Storage BSG device node" - depends on SCSI_UFSHCD select BLK_DEV_BSGLIB help Universal Flash Storage (UFS) is SCSI transport specification for @@ -165,19 +165,19 @@ config SCSI_UFS_BSG If unsure, say N. config SCSI_UFS_EXYNOS - tristate "EXYNOS specific hooks to UFS controller platform driver" + tristate "Exynos specific hooks to UFS controller platform driver" depends on SCSI_UFSHCD_PLATFORM && (ARCH_EXYNOS || COMPILE_TEST) help - This selects the EXYNOS specific additions to UFSHCD platform driver. - UFS host on EXYNOS includes HCI and UNIPRO layer, and associates with - UFS-PHY driver. + This selects the Samsung Exynos SoC specific additions to UFSHCD + platform driver. UFS host on Samsung Exynos SoC includes HCI and + UNIPRO layer, and associates with UFS-PHY driver. - Select this if you have UFS host controller on EXYNOS chipset. + Select this if you have UFS host controller on Samsung Exynos SoC. If unsure, say N. config SCSI_UFS_CRYPTO bool "UFS Crypto Engine Support" - depends on SCSI_UFSHCD && BLK_INLINE_ENCRYPTION + depends on BLK_INLINE_ENCRYPTION help Enable Crypto Engine Support in UFS. Enabling this makes it possible for the kernel to use the crypto @@ -186,9 +186,26 @@ config SCSI_UFS_CRYPTO config SCSI_UFS_HPB bool "Support UFS Host Performance Booster" - depends on SCSI_UFSHCD help The UFS HPB feature improves random read performance. It caches L2P (logical to physical) map of UFS to host DRAM. The driver uses HPB read command by piggybacking physical page number for bypassing FTL (flash translation layer)'s L2P address translation. + +config SCSI_UFS_FAULT_INJECTION + bool "UFS Fault Injection Support" + depends on FAULT_INJECTION + help + Enable fault injection support in the UFS driver. This makes it easier + to test the UFS error handler and abort handler. + +config SCSI_UFS_HWMON + bool "UFS Temperature Notification" + depends on SCSI_UFSHCD=HWMON || HWMON=y + help + This provides support for UFS hardware monitoring. If enabled, + a hardware monitoring device will be created for the UFS device. + + If unsure, say N. + +endif diff --git a/drivers/scsi/ufs/Makefile b/drivers/scsi/ufs/Makefile index cce9b3916f5b37063db8073c870e7da6f1f9d592..966048875b50386118cd6baf44d51aa18f43d76f 100644 --- a/drivers/scsi/ufs/Makefile +++ b/drivers/scsi/ufs/Makefile @@ -9,6 +9,8 @@ ufshcd-core-$(CONFIG_DEBUG_FS) += ufs-debugfs.o ufshcd-core-$(CONFIG_SCSI_UFS_BSG) += ufs_bsg.o ufshcd-core-$(CONFIG_SCSI_UFS_CRYPTO) += ufshcd-crypto.o ufshcd-core-$(CONFIG_SCSI_UFS_HPB) += ufshpb.o +ufshcd-core-$(CONFIG_SCSI_UFS_FAULT_INJECTION) += ufs-fault-injection.o +ufshcd-core-$(CONFIG_SCSI_UFS_HWMON) += ufs-hwmon.o obj-$(CONFIG_SCSI_UFS_DWC_TC_PCI) += tc-dwc-g210-pci.o ufshcd-dwc.o tc-dwc-g210.o obj-$(CONFIG_SCSI_UFS_DWC_TC_PLATFORM) += tc-dwc-g210-pltfrm.o ufshcd-dwc.o tc-dwc-g210.o diff --git a/drivers/scsi/ufs/cdns-pltfrm.c b/drivers/scsi/ufs/cdns-pltfrm.c index da065a259f6e4ea9b09e92597ba4046919516922..7da8be2f35c42d64778c89a71c2358daf82b28e3 100644 --- a/drivers/scsi/ufs/cdns-pltfrm.c +++ b/drivers/scsi/ufs/cdns-pltfrm.c @@ -100,6 +100,7 @@ static void cdns_ufs_set_l4_attr(struct ufs_hba *hba) } /** + * cdns_ufs_set_hclkdiv() * Sets HCLKDIV register value based on the core_clk * @hba: host controller instance * @@ -141,6 +142,7 @@ static int cdns_ufs_set_hclkdiv(struct ufs_hba *hba) } /** + * cdns_ufs_hce_enable_notify() * Called before and after HCE enable bit is set. * @hba: host controller instance * @status: notify stage (pre, post change) @@ -157,6 +159,7 @@ static int cdns_ufs_hce_enable_notify(struct ufs_hba *hba, } /** + * cdns_ufs_hibern8_notify() * Called around hibern8 enter/exit. * @hba: host controller instance * @cmd: UIC Command @@ -173,6 +176,7 @@ static void cdns_ufs_hibern8_notify(struct ufs_hba *hba, enum uic_cmd_dme cmd, } /** + * cdns_ufs_link_startup_notify() * Called before and after Link startup is carried out. * @hba: host controller instance * @status: notify stage (pre, post change) @@ -221,8 +225,7 @@ static int cdns_ufs_init(struct ufs_hba *hba) return -ENOMEM; ufshcd_set_variant(hba, host); - if (hba->vops && hba->vops->phy_initialization) - status = hba->vops->phy_initialization(hba); + status = ufshcd_vops_phy_initialization(hba); return status; } @@ -315,11 +318,10 @@ static int cdns_ufs_pltfrm_remove(struct platform_device *pdev) } static const struct dev_pm_ops cdns_ufs_dev_pm_ops = { - .suspend = ufshcd_pltfrm_suspend, - .resume = ufshcd_pltfrm_resume, - .runtime_suspend = ufshcd_pltfrm_runtime_suspend, - .runtime_resume = ufshcd_pltfrm_runtime_resume, - .runtime_idle = ufshcd_pltfrm_runtime_idle, + SET_SYSTEM_SLEEP_PM_OPS(ufshcd_system_suspend, ufshcd_system_resume) + SET_RUNTIME_PM_OPS(ufshcd_runtime_suspend, ufshcd_runtime_resume, NULL) + .prepare = ufshcd_suspend_prepare, + .complete = ufshcd_resume_complete, }; static struct platform_driver cdns_ufs_pltfrm_driver = { diff --git a/drivers/scsi/ufs/tc-dwc-g210-pci.c b/drivers/scsi/ufs/tc-dwc-g210-pci.c index 67a6a61154b7184771bbe2447f19a84f69ac4b18..7b08e2e07cc5fd009815a6bf1377ca96e8a9fc77 100644 --- a/drivers/scsi/ufs/tc-dwc-g210-pci.c +++ b/drivers/scsi/ufs/tc-dwc-g210-pci.c @@ -23,31 +23,6 @@ static int tc_type = TC_G210_INV; module_param(tc_type, int, 0); MODULE_PARM_DESC(tc_type, "Test Chip Type (20 = 20-bit, 40 = 40-bit)"); -static int tc_dwc_g210_pci_suspend(struct device *dev) -{ - return ufshcd_system_suspend(dev_get_drvdata(dev)); -} - -static int tc_dwc_g210_pci_resume(struct device *dev) -{ - return ufshcd_system_resume(dev_get_drvdata(dev)); -} - -static int tc_dwc_g210_pci_runtime_suspend(struct device *dev) -{ - return ufshcd_runtime_suspend(dev_get_drvdata(dev)); -} - -static int tc_dwc_g210_pci_runtime_resume(struct device *dev) -{ - return ufshcd_runtime_resume(dev_get_drvdata(dev)); -} - -static int tc_dwc_g210_pci_runtime_idle(struct device *dev) -{ - return ufshcd_runtime_idle(dev_get_drvdata(dev)); -} - /* * struct ufs_hba_dwc_vops - UFS DWC specific variant operations */ @@ -135,7 +110,6 @@ tc_dwc_g210_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) return err; } - pci_set_drvdata(pdev, hba); pm_runtime_put_noidle(&pdev->dev); pm_runtime_allow(&pdev->dev); @@ -143,11 +117,10 @@ tc_dwc_g210_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) } static const struct dev_pm_ops tc_dwc_g210_pci_pm_ops = { - .suspend = tc_dwc_g210_pci_suspend, - .resume = tc_dwc_g210_pci_resume, - .runtime_suspend = tc_dwc_g210_pci_runtime_suspend, - .runtime_resume = tc_dwc_g210_pci_runtime_resume, - .runtime_idle = tc_dwc_g210_pci_runtime_idle, + SET_SYSTEM_SLEEP_PM_OPS(ufshcd_system_suspend, ufshcd_system_resume) + SET_RUNTIME_PM_OPS(ufshcd_runtime_suspend, ufshcd_runtime_resume, NULL) + .prepare = ufshcd_suspend_prepare, + .complete = ufshcd_resume_complete, }; static const struct pci_device_id tc_dwc_g210_pci_tbl[] = { diff --git a/drivers/scsi/ufs/tc-dwc-g210-pltfrm.c b/drivers/scsi/ufs/tc-dwc-g210-pltfrm.c index a1268e4f44d6f10a7479bae244fdfe640c103052..783ec43efa78949266112b9bc368935a78027ec2 100644 --- a/drivers/scsi/ufs/tc-dwc-g210-pltfrm.c +++ b/drivers/scsi/ufs/tc-dwc-g210-pltfrm.c @@ -84,11 +84,8 @@ static int tc_dwc_g210_pltfm_remove(struct platform_device *pdev) } static const struct dev_pm_ops tc_dwc_g210_pltfm_pm_ops = { - .suspend = ufshcd_pltfrm_suspend, - .resume = ufshcd_pltfrm_resume, - .runtime_suspend = ufshcd_pltfrm_runtime_suspend, - .runtime_resume = ufshcd_pltfrm_runtime_resume, - .runtime_idle = ufshcd_pltfrm_runtime_idle, + SET_SYSTEM_SLEEP_PM_OPS(ufshcd_system_suspend, ufshcd_system_resume) + SET_RUNTIME_PM_OPS(ufshcd_runtime_suspend, ufshcd_runtime_resume, NULL) }; static struct platform_driver tc_dwc_g210_pltfm_driver = { diff --git a/drivers/scsi/ufs/ufs-debugfs.c b/drivers/scsi/ufs/ufs-debugfs.c index dee98dc72d29eac1568f30692ce3d0815f9fb176..4a0bbcf1757aba823bfd15757b058386014d84dd 100644 --- a/drivers/scsi/ufs/ufs-debugfs.c +++ b/drivers/scsi/ufs/ufs-debugfs.c @@ -8,19 +8,31 @@ static struct dentry *ufs_debugfs_root; +struct ufs_debugfs_attr { + const char *name; + mode_t mode; + const struct file_operations *fops; +}; + +/* @file corresponds to a debugfs attribute in directory hba->debugfs_root. */ +static inline struct ufs_hba *hba_from_file(const struct file *file) +{ + return d_inode(file->f_path.dentry->d_parent)->i_private; +} + void __init ufs_debugfs_init(void) { ufs_debugfs_root = debugfs_create_dir("ufshcd", NULL); } -void __exit ufs_debugfs_exit(void) +void ufs_debugfs_exit(void) { debugfs_remove_recursive(ufs_debugfs_root); } static int ufs_debugfs_stats_show(struct seq_file *s, void *data) { - struct ufs_hba *hba = s->private; + struct ufs_hba *hba = hba_from_file(s->file); struct ufs_event_hist *e = hba->ufs_stats.event; #define PRT(fmt, typ) \ @@ -44,13 +56,183 @@ static int ufs_debugfs_stats_show(struct seq_file *s, void *data) } DEFINE_SHOW_ATTRIBUTE(ufs_debugfs_stats); +static int ee_usr_mask_get(void *data, u64 *val) +{ + struct ufs_hba *hba = data; + + *val = hba->ee_usr_mask; + return 0; +} + +static int ufs_debugfs_get_user_access(struct ufs_hba *hba) +__acquires(&hba->host_sem) +{ + down(&hba->host_sem); + if (!ufshcd_is_user_access_allowed(hba)) { + up(&hba->host_sem); + return -EBUSY; + } + ufshcd_rpm_get_sync(hba); + return 0; +} + +static void ufs_debugfs_put_user_access(struct ufs_hba *hba) +__releases(&hba->host_sem) +{ + ufshcd_rpm_put_sync(hba); + up(&hba->host_sem); +} + +static int ee_usr_mask_set(void *data, u64 val) +{ + struct ufs_hba *hba = data; + int err; + + if (val & ~(u64)MASK_EE_STATUS) + return -EINVAL; + err = ufs_debugfs_get_user_access(hba); + if (err) + return err; + err = ufshcd_update_ee_usr_mask(hba, val, MASK_EE_STATUS); + ufs_debugfs_put_user_access(hba); + return err; +} + +DEFINE_DEBUGFS_ATTRIBUTE(ee_usr_mask_fops, ee_usr_mask_get, ee_usr_mask_set, "%#llx\n"); + +void ufs_debugfs_exception_event(struct ufs_hba *hba, u16 status) +{ + bool chgd = false; + u16 ee_ctrl_mask; + int err = 0; + + if (!hba->debugfs_ee_rate_limit_ms || !status) + return; + + mutex_lock(&hba->ee_ctrl_mutex); + ee_ctrl_mask = hba->ee_drv_mask | (hba->ee_usr_mask & ~status); + chgd = ee_ctrl_mask != hba->ee_ctrl_mask; + if (chgd) { + err = __ufshcd_write_ee_control(hba, ee_ctrl_mask); + if (err) + dev_err(hba->dev, "%s: failed to write ee control %d\n", + __func__, err); + } + mutex_unlock(&hba->ee_ctrl_mutex); + + if (chgd && !err) { + unsigned long delay = msecs_to_jiffies(hba->debugfs_ee_rate_limit_ms); + + queue_delayed_work(system_freezable_wq, &hba->debugfs_ee_work, delay); + } +} + +static void ufs_debugfs_restart_ee(struct work_struct *work) +{ + struct ufs_hba *hba = container_of(work, struct ufs_hba, debugfs_ee_work.work); + + if (!hba->ee_usr_mask || pm_runtime_suspended(hba->dev) || + ufs_debugfs_get_user_access(hba)) + return; + ufshcd_write_ee_control(hba); + ufs_debugfs_put_user_access(hba); +} + +static int ufs_saved_err_show(struct seq_file *s, void *data) +{ + struct ufs_debugfs_attr *attr = s->private; + struct ufs_hba *hba = hba_from_file(s->file); + const int *p; + + if (strcmp(attr->name, "saved_err") == 0) { + p = &hba->saved_err; + } else if (strcmp(attr->name, "saved_uic_err") == 0) { + p = &hba->saved_uic_err; + } else { + return -ENOENT; + } + + seq_printf(s, "%d\n", *p); + return 0; +} + +static ssize_t ufs_saved_err_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct ufs_debugfs_attr *attr = file->f_inode->i_private; + struct ufs_hba *hba = hba_from_file(file); + char val_str[16] = { }; + int val, ret; + + if (count > sizeof(val_str)) + return -EINVAL; + if (copy_from_user(val_str, buf, count)) + return -EFAULT; + ret = kstrtoint(val_str, 0, &val); + if (ret < 0) + return ret; + + spin_lock_irq(hba->host->host_lock); + if (strcmp(attr->name, "saved_err") == 0) { + hba->saved_err = val; + } else if (strcmp(attr->name, "saved_uic_err") == 0) { + hba->saved_uic_err = val; + } else { + ret = -ENOENT; + } + if (ret == 0) + ufshcd_schedule_eh_work(hba); + spin_unlock_irq(hba->host->host_lock); + + return ret < 0 ? ret : count; +} + +static int ufs_saved_err_open(struct inode *inode, struct file *file) +{ + return single_open(file, ufs_saved_err_show, inode->i_private); +} + +static const struct file_operations ufs_saved_err_fops = { + .owner = THIS_MODULE, + .open = ufs_saved_err_open, + .read = seq_read, + .write = ufs_saved_err_write, + .llseek = seq_lseek, + .release = single_release, +}; + +static const struct ufs_debugfs_attr ufs_attrs[] = { + { "stats", 0400, &ufs_debugfs_stats_fops }, + { "saved_err", 0600, &ufs_saved_err_fops }, + { "saved_uic_err", 0600, &ufs_saved_err_fops }, + { } +}; + void ufs_debugfs_hba_init(struct ufs_hba *hba) { - hba->debugfs_root = debugfs_create_dir(dev_name(hba->dev), ufs_debugfs_root); - debugfs_create_file("stats", 0400, hba->debugfs_root, hba, &ufs_debugfs_stats_fops); + const struct ufs_debugfs_attr *attr; + struct dentry *root; + + /* Set default exception event rate limit period to 20ms */ + hba->debugfs_ee_rate_limit_ms = 20; + INIT_DELAYED_WORK(&hba->debugfs_ee_work, ufs_debugfs_restart_ee); + + root = debugfs_create_dir(dev_name(hba->dev), ufs_debugfs_root); + if (IS_ERR_OR_NULL(root)) + return; + hba->debugfs_root = root; + d_inode(root)->i_private = hba; + for (attr = ufs_attrs; attr->name; attr++) + debugfs_create_file(attr->name, attr->mode, root, (void *)attr, + attr->fops); + debugfs_create_file("exception_event_mask", 0600, hba->debugfs_root, + hba, &ee_usr_mask_fops); + debugfs_create_u32("exception_event_rate_limit_ms", 0600, hba->debugfs_root, + &hba->debugfs_ee_rate_limit_ms); } void ufs_debugfs_hba_exit(struct ufs_hba *hba) { debugfs_remove_recursive(hba->debugfs_root); + cancel_delayed_work_sync(&hba->debugfs_ee_work); } diff --git a/drivers/scsi/ufs/ufs-debugfs.h b/drivers/scsi/ufs/ufs-debugfs.h index f35b39c4b4f50d3451872dae3fa78ca0a698865c..97548a3f90eb8f5d72f53d4e63aa56c543560682 100644 --- a/drivers/scsi/ufs/ufs-debugfs.h +++ b/drivers/scsi/ufs/ufs-debugfs.h @@ -9,14 +9,16 @@ struct ufs_hba; #ifdef CONFIG_DEBUG_FS void __init ufs_debugfs_init(void); -void __exit ufs_debugfs_exit(void); +void ufs_debugfs_exit(void); void ufs_debugfs_hba_init(struct ufs_hba *hba); void ufs_debugfs_hba_exit(struct ufs_hba *hba); +void ufs_debugfs_exception_event(struct ufs_hba *hba, u16 status); #else static inline void ufs_debugfs_init(void) {} static inline void ufs_debugfs_exit(void) {} static inline void ufs_debugfs_hba_init(struct ufs_hba *hba) {} static inline void ufs_debugfs_hba_exit(struct ufs_hba *hba) {} +static inline void ufs_debugfs_exception_event(struct ufs_hba *hba, u16 status) {} #endif #endif diff --git a/drivers/scsi/ufs/ufs-exynos.c b/drivers/scsi/ufs/ufs-exynos.c index f0c74398ccca322998f159570025145a079bff7e..474a4a064a68da3097b2ea4a414c989abe596881 100644 --- a/drivers/scsi/ufs/ufs-exynos.c +++ b/drivers/scsi/ufs/ufs-exynos.c @@ -12,8 +12,10 @@ #include #include #include +#include #include #include +#include #include "ufshcd.h" #include "ufshcd-pltfrm.h" @@ -48,10 +50,11 @@ #define HCI_ERR_EN_T_LAYER 0x84 #define HCI_ERR_EN_DME_LAYER 0x88 #define HCI_CLKSTOP_CTRL 0xB0 +#define REFCLKOUT_STOP BIT(4) #define REFCLK_STOP BIT(2) #define UNIPRO_MCLK_STOP BIT(1) #define UNIPRO_PCLK_STOP BIT(0) -#define CLK_STOP_MASK (REFCLK_STOP |\ +#define CLK_STOP_MASK (REFCLKOUT_STOP | REFCLK_STOP |\ UNIPRO_MCLK_STOP |\ UNIPRO_PCLK_STOP) #define HCI_MISC 0xB4 @@ -74,6 +77,52 @@ UIC_TRANSPORT_NO_CONNECTION_RX |\ UIC_TRANSPORT_BAD_TC) +/* FSYS UFS Shareability */ +#define UFS_WR_SHARABLE BIT(2) +#define UFS_RD_SHARABLE BIT(1) +#define UFS_SHARABLE (UFS_WR_SHARABLE | UFS_RD_SHARABLE) +#define UFS_SHAREABILITY_OFFSET 0x710 + +/* Multi-host registers */ +#define MHCTRL 0xC4 +#define MHCTRL_EN_VH_MASK (0xE) +#define MHCTRL_EN_VH(vh) (vh << 1) +#define PH2VH_MBOX 0xD8 + +#define MH_MSG_MASK (0xFF) + +#define MH_MSG(id, msg) ((id << 8) | (msg & 0xFF)) +#define MH_MSG_PH_READY 0x1 +#define MH_MSG_VH_READY 0x2 + +#define ALLOW_INQUIRY BIT(25) +#define ALLOW_MODE_SELECT BIT(24) +#define ALLOW_MODE_SENSE BIT(23) +#define ALLOW_PRE_FETCH GENMASK(22, 21) +#define ALLOW_READ_CMD_ALL GENMASK(20, 18) /* read_6/10/16 */ +#define ALLOW_READ_BUFFER BIT(17) +#define ALLOW_READ_CAPACITY GENMASK(16, 15) +#define ALLOW_REPORT_LUNS BIT(14) +#define ALLOW_REQUEST_SENSE BIT(13) +#define ALLOW_SYNCHRONIZE_CACHE GENMASK(8, 7) +#define ALLOW_TEST_UNIT_READY BIT(6) +#define ALLOW_UNMAP BIT(5) +#define ALLOW_VERIFY BIT(4) +#define ALLOW_WRITE_CMD_ALL GENMASK(3, 1) /* write_6/10/16 */ + +#define ALLOW_TRANS_VH_DEFAULT (ALLOW_INQUIRY | ALLOW_MODE_SELECT | \ + ALLOW_MODE_SENSE | ALLOW_PRE_FETCH | \ + ALLOW_READ_CMD_ALL | ALLOW_READ_BUFFER | \ + ALLOW_READ_CAPACITY | ALLOW_REPORT_LUNS | \ + ALLOW_REQUEST_SENSE | ALLOW_SYNCHRONIZE_CACHE | \ + ALLOW_TEST_UNIT_READY | ALLOW_UNMAP | \ + ALLOW_VERIFY | ALLOW_WRITE_CMD_ALL) + +#define HCI_MH_ALLOWABLE_TRAN_OF_VH 0x30C +#define HCI_MH_IID_IN_TASK_TAG 0X308 + +#define PH_READY_TIMEOUT_MS (5 * MSEC_PER_SEC) + enum { UNIPRO_L1_5 = 0,/* PHY Adapter */ UNIPRO_L2, /* Data Link */ @@ -107,6 +156,7 @@ enum { #define CNTR_DIV_VAL 40 +static struct exynos_ufs_drv_data exynos_ufs_drvs; static void exynos_ufs_auto_ctrl_hcc(struct exynos_ufs *ufs, bool en); static void exynos_ufs_ctrl_clkstop(struct exynos_ufs *ufs, bool en); @@ -148,6 +198,117 @@ static int exynos7_ufs_drv_init(struct device *dev, struct exynos_ufs *ufs) return 0; } +static int exynosauto_ufs_drv_init(struct device *dev, struct exynos_ufs *ufs) +{ + struct exynos_ufs_uic_attr *attr = ufs->drv_data->uic_attr; + + /* IO Coherency setting */ + if (ufs->sysreg) { + return regmap_update_bits(ufs->sysreg, + ufs->shareability_reg_offset, + UFS_SHARABLE, UFS_SHARABLE); + } + + attr->tx_dif_p_nsec = 3200000; + + return 0; +} + +static int exynosauto_ufs_post_hce_enable(struct exynos_ufs *ufs) +{ + struct ufs_hba *hba = ufs->hba; + + /* Enable Virtual Host #1 */ + ufshcd_rmwl(hba, MHCTRL_EN_VH_MASK, MHCTRL_EN_VH(1), MHCTRL); + /* Default VH Transfer permissions */ + hci_writel(ufs, ALLOW_TRANS_VH_DEFAULT, HCI_MH_ALLOWABLE_TRAN_OF_VH); + /* IID information is replaced in TASKTAG[7:5] instead of IID in UCD */ + hci_writel(ufs, 0x1, HCI_MH_IID_IN_TASK_TAG); + + return 0; +} + +static int exynosauto_ufs_pre_link(struct exynos_ufs *ufs) +{ + struct ufs_hba *hba = ufs->hba; + int i; + u32 tx_line_reset_period, rx_line_reset_period; + + rx_line_reset_period = (RX_LINE_RESET_TIME * ufs->mclk_rate) / NSEC_PER_MSEC; + tx_line_reset_period = (TX_LINE_RESET_TIME * ufs->mclk_rate) / NSEC_PER_MSEC; + + ufshcd_dme_set(hba, UIC_ARG_MIB(0x200), 0x40); + for_each_ufs_rx_lane(ufs, i) { + ufshcd_dme_set(hba, UIC_ARG_MIB_SEL(VND_RX_CLK_PRD, i), + DIV_ROUND_UP(NSEC_PER_SEC, ufs->mclk_rate)); + ufshcd_dme_set(hba, UIC_ARG_MIB_SEL(VND_RX_CLK_PRD_EN, i), 0x0); + + ufshcd_dme_set(hba, UIC_ARG_MIB_SEL(VND_RX_LINERESET_VALUE2, i), + (rx_line_reset_period >> 16) & 0xFF); + ufshcd_dme_set(hba, UIC_ARG_MIB_SEL(VND_RX_LINERESET_VALUE1, i), + (rx_line_reset_period >> 8) & 0xFF); + ufshcd_dme_set(hba, UIC_ARG_MIB_SEL(VND_RX_LINERESET_VALUE0, i), + (rx_line_reset_period) & 0xFF); + + ufshcd_dme_set(hba, UIC_ARG_MIB_SEL(0x2f, i), 0x79); + ufshcd_dme_set(hba, UIC_ARG_MIB_SEL(0x84, i), 0x1); + ufshcd_dme_set(hba, UIC_ARG_MIB_SEL(0x25, i), 0xf6); + } + + for_each_ufs_tx_lane(ufs, i) { + ufshcd_dme_set(hba, UIC_ARG_MIB_SEL(VND_TX_CLK_PRD, i), + DIV_ROUND_UP(NSEC_PER_SEC, ufs->mclk_rate)); + /* Not to affect VND_TX_LINERESET_PVALUE to VND_TX_CLK_PRD */ + ufshcd_dme_set(hba, UIC_ARG_MIB_SEL(VND_TX_CLK_PRD_EN, i), + 0x02); + + ufshcd_dme_set(hba, UIC_ARG_MIB_SEL(VND_TX_LINERESET_PVALUE2, i), + (tx_line_reset_period >> 16) & 0xFF); + ufshcd_dme_set(hba, UIC_ARG_MIB_SEL(VND_TX_LINERESET_PVALUE1, i), + (tx_line_reset_period >> 8) & 0xFF); + ufshcd_dme_set(hba, UIC_ARG_MIB_SEL(VND_TX_LINERESET_PVALUE0, i), + (tx_line_reset_period) & 0xFF); + + /* TX PWM Gear Capability / PWM_G1_ONLY */ + ufshcd_dme_set(hba, UIC_ARG_MIB_SEL(0x04, i), 0x1); + } + + ufshcd_dme_set(hba, UIC_ARG_MIB(0x200), 0x0); + + ufshcd_dme_set(hba, UIC_ARG_MIB(PA_LOCAL_TX_LCC_ENABLE), 0x0); + + ufshcd_dme_set(hba, UIC_ARG_MIB(0xa011), 0x8000); + + return 0; +} + +static int exynosauto_ufs_pre_pwr_change(struct exynos_ufs *ufs, + struct ufs_pa_layer_attr *pwr) +{ + struct ufs_hba *hba = ufs->hba; + + /* PACP_PWR_req and delivered to the remote DME */ + ufshcd_dme_set(hba, UIC_ARG_MIB(PA_PWRMODEUSERDATA0), 12000); + ufshcd_dme_set(hba, UIC_ARG_MIB(PA_PWRMODEUSERDATA1), 32000); + ufshcd_dme_set(hba, UIC_ARG_MIB(PA_PWRMODEUSERDATA2), 16000); + + return 0; +} + +static int exynosauto_ufs_post_pwr_change(struct exynos_ufs *ufs, + struct ufs_pa_layer_attr *pwr) +{ + struct ufs_hba *hba = ufs->hba; + u32 enabled_vh; + + enabled_vh = ufshcd_readl(hba, MHCTRL) & MHCTRL_EN_VH_MASK; + + /* Send physical host ready message to virtual hosts */ + ufshcd_writel(hba, MH_MSG(enabled_vh, MH_MSG_PH_READY), PH2VH_MBOX); + + return 0; +} + static int exynos7_ufs_pre_link(struct exynos_ufs *ufs) { struct ufs_hba *hba = ufs->hba; @@ -617,20 +778,7 @@ static int exynos_ufs_pre_pwr_mode(struct ufs_hba *hba, goto out; } - - ufs_exynos_cap.tx_lanes = UFS_EXYNOS_LIMIT_NUM_LANES_TX; - ufs_exynos_cap.rx_lanes = UFS_EXYNOS_LIMIT_NUM_LANES_RX; - ufs_exynos_cap.hs_rx_gear = UFS_EXYNOS_LIMIT_HSGEAR_RX; - ufs_exynos_cap.hs_tx_gear = UFS_EXYNOS_LIMIT_HSGEAR_TX; - ufs_exynos_cap.pwm_rx_gear = UFS_EXYNOS_LIMIT_PWMGEAR_RX; - ufs_exynos_cap.pwm_tx_gear = UFS_EXYNOS_LIMIT_PWMGEAR_TX; - ufs_exynos_cap.rx_pwr_pwm = UFS_EXYNOS_LIMIT_RX_PWR_PWM; - ufs_exynos_cap.tx_pwr_pwm = UFS_EXYNOS_LIMIT_TX_PWR_PWM; - ufs_exynos_cap.rx_pwr_hs = UFS_EXYNOS_LIMIT_RX_PWR_HS; - ufs_exynos_cap.tx_pwr_hs = UFS_EXYNOS_LIMIT_TX_PWR_HS; - ufs_exynos_cap.hs_rate = UFS_EXYNOS_LIMIT_HS_RATE; - ufs_exynos_cap.desired_working_mode = - UFS_EXYNOS_LIMIT_DESIRED_MODE; + ufshcd_init_pwr_dev_param(&ufs_exynos_cap); ret = ufshcd_get_pwr_dev_param(&ufs_exynos_cap, dev_max_params, dev_req_params); @@ -654,9 +802,9 @@ static int exynos_ufs_pre_pwr_mode(struct ufs_hba *hba, } /* setting for three timeout values for traffic class #0 */ - ufshcd_dme_set(hba, UIC_ARG_MIB(PA_PWRMODEUSERDATA0), 8064); - ufshcd_dme_set(hba, UIC_ARG_MIB(PA_PWRMODEUSERDATA1), 28224); - ufshcd_dme_set(hba, UIC_ARG_MIB(PA_PWRMODEUSERDATA2), 20160); + ufshcd_dme_set(hba, UIC_ARG_MIB(DL_FC0PROTTIMEOUTVAL), 8064); + ufshcd_dme_set(hba, UIC_ARG_MIB(DL_TC0REPLAYTIMEOUTVAL), 28224); + ufshcd_dme_set(hba, UIC_ARG_MIB(DL_AFC0REQTIMEOUTVAL), 20160); return 0; out: @@ -665,7 +813,6 @@ out: #define PWR_MODE_STR_LEN 64 static int exynos_ufs_post_pwr_mode(struct ufs_hba *hba, - struct ufs_pa_layer_attr *pwr_max, struct ufs_pa_layer_attr *pwr_req) { struct exynos_ufs *ufs = ufshcd_get_variant(hba); @@ -706,14 +853,14 @@ static int exynos_ufs_post_pwr_mode(struct ufs_hba *hba, } static void exynos_ufs_specify_nexus_t_xfer_req(struct ufs_hba *hba, - int tag, bool op) + int tag, bool is_scsi_cmd) { struct exynos_ufs *ufs = ufshcd_get_variant(hba); u32 type; type = hci_readl(ufs, HCI_UTRL_NEXUS_TYPE); - if (op) + if (is_scsi_cmd) hci_writel(ufs, type | (1 << tag), HCI_UTRL_NEXUS_TYPE); else hci_writel(ufs, type & ~(1 << tag), HCI_UTRL_NEXUS_TYPE); @@ -806,6 +953,27 @@ static void exynos_ufs_config_intr(struct exynos_ufs *ufs, u32 errs, u8 index) } } +static int exynos_ufs_setup_clocks(struct ufs_hba *hba, bool on, + enum ufs_notify_change_status status) +{ + struct exynos_ufs *ufs = ufshcd_get_variant(hba); + + if (!ufs) + return 0; + + if (on && status == PRE_CHANGE) { + if (ufs->opts & EXYNOS_UFS_OPT_BROKEN_AUTO_CLK_CTRL) + exynos_ufs_disable_auto_ctrl_hcc(ufs); + exynos_ufs_ungate_clks(ufs); + } else if (!on && status == POST_CHANGE) { + exynos_ufs_gate_clks(ufs); + if (ufs->opts & EXYNOS_UFS_OPT_BROKEN_AUTO_CLK_CTRL) + exynos_ufs_enable_auto_ctrl_hcc(ufs); + } + + return 0; +} + static int exynos_ufs_pre_link(struct ufs_hba *hba) { struct exynos_ufs *ufs = ufshcd_get_variant(hba); @@ -821,8 +989,12 @@ static int exynos_ufs_pre_link(struct ufs_hba *hba) /* m-phy */ exynos_ufs_phy_init(ufs); - exynos_ufs_config_phy_time_attr(ufs); - exynos_ufs_config_phy_cap_attr(ufs); + if (!(ufs->opts & EXYNOS_UFS_OPT_SKIP_CONFIG_PHY_ATTR)) { + exynos_ufs_config_phy_time_attr(ufs); + exynos_ufs_config_phy_cap_attr(ufs); + } + + exynos_ufs_setup_clocks(hba, true, PRE_CHANGE); if (ufs->drv_data->pre_link) ufs->drv_data->pre_link(ufs); @@ -906,17 +1078,10 @@ static int exynos_ufs_post_link(struct ufs_hba *hba) static int exynos_ufs_parse_dt(struct device *dev, struct exynos_ufs *ufs) { struct device_node *np = dev->of_node; - struct exynos_ufs_drv_data *drv_data = &exynos_ufs_drvs; struct exynos_ufs_uic_attr *attr; int ret = 0; - while (drv_data->compatible) { - if (of_device_is_compatible(np, drv_data->compatible)) { - ufs->drv_data = drv_data; - break; - } - drv_data++; - } + ufs->drv_data = device_get_match_data(dev); if (ufs->drv_data && ufs->drv_data->uic_attr) { attr = ufs->drv_data->uic_attr; @@ -926,6 +1091,17 @@ static int exynos_ufs_parse_dt(struct device *dev, struct exynos_ufs *ufs) goto out; } + ufs->sysreg = syscon_regmap_lookup_by_phandle(np, "samsung,sysreg"); + if (IS_ERR(ufs->sysreg)) + ufs->sysreg = NULL; + else { + if (of_property_read_u32_index(np, "samsung,sysreg", 1, + &ufs->shareability_reg_offset)) { + dev_warn(dev, "can't get an offset from sysreg. Set to default value\n"); + ufs->shareability_reg_offset = UFS_SHAREABILITY_OFFSET; + } + } + ufs->pclk_avail_min = PCLK_AVAIL_MIN; ufs->pclk_avail_max = PCLK_AVAIL_MAX; @@ -940,6 +1116,18 @@ out: return ret; } +static inline void exynos_ufs_priv_init(struct ufs_hba *hba, + struct exynos_ufs *ufs) +{ + ufs->hba = hba; + ufs->opts = ufs->drv_data->opts; + ufs->rx_sel_idx = PA_MAXDATALANES; + if (ufs->opts & EXYNOS_UFS_OPT_BROKEN_RX_SEL_IDX) + ufs->rx_sel_idx = 0; + hba->priv = (void *)ufs; + hba->quirks = ufs->drv_data->quirks; +} + static int exynos_ufs_init(struct ufs_hba *hba) { struct device *dev = hba->dev; @@ -989,13 +1177,8 @@ static int exynos_ufs_init(struct ufs_hba *hba) if (ret) goto phy_off; - ufs->hba = hba; - ufs->opts = ufs->drv_data->opts; - ufs->rx_sel_idx = PA_MAXDATALANES; - if (ufs->opts & EXYNOS_UFS_OPT_BROKEN_RX_SEL_IDX) - ufs->rx_sel_idx = 0; - hba->priv = (void *)ufs; - hba->quirks = ufs->drv_data->quirks; + exynos_ufs_priv_init(hba, ufs); + if (ufs->drv_data->drv_init) { ret = ufs->drv_data->drv_init(dev, ufs); if (ret) { @@ -1062,7 +1245,7 @@ static void exynos_ufs_pre_hibern8(struct ufs_hba *hba, u8 enter) exynos_ufs_ungate_clks(ufs); if (ufs->opts & EXYNOS_UFS_OPT_USE_SW_HIBERN8_TIMER) { - const unsigned int granularity_tbl[] = { + static const unsigned int granularity_tbl[] = { 1, 4, 8, 16, 32, 100 }; int h8_time = attr->pa_hibern8time * @@ -1123,6 +1306,12 @@ static int exynos_ufs_hce_enable_notify(struct ufs_hba *hba, switch (status) { case PRE_CHANGE: + if (ufs->drv_data->pre_hce_enable) { + ret = ufs->drv_data->pre_hce_enable(ufs); + if (ret) + return ret; + } + ret = exynos_ufs_host_reset(hba); if (ret) return ret; @@ -1132,6 +1321,10 @@ static int exynos_ufs_hce_enable_notify(struct ufs_hba *hba, exynos_ufs_calc_pwm_clk_div(ufs); if (!(ufs->opts & EXYNOS_UFS_OPT_BROKEN_AUTO_CLK_CTRL)) exynos_ufs_enable_auto_ctrl_hcc(ufs); + + if (ufs->drv_data->post_hce_enable) + ret = ufs->drv_data->post_hce_enable(ufs); + break; } @@ -1168,7 +1361,7 @@ static int exynos_ufs_pwr_change_notify(struct ufs_hba *hba, dev_req_params); break; case POST_CHANGE: - ret = exynos_ufs_post_pwr_mode(hba, NULL, dev_req_params); + ret = exynos_ufs_post_pwr_mode(hba, dev_req_params); break; } @@ -1189,10 +1382,14 @@ static void exynos_ufs_hibern8_notify(struct ufs_hba *hba, } } -static int exynos_ufs_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op) +static int exynos_ufs_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op, + enum ufs_notify_change_status status) { struct exynos_ufs *ufs = ufshcd_get_variant(hba); + if (status == PRE_CHANGE) + return 0; + if (!ufshcd_is_link_active(hba)) phy_power_off(ufs->phy); @@ -1211,12 +1408,77 @@ static int exynos_ufs_resume(struct ufs_hba *hba, enum ufs_pm_op pm_op) return 0; } +static int exynosauto_ufs_vh_link_startup_notify(struct ufs_hba *hba, + enum ufs_notify_change_status status) +{ + if (status == POST_CHANGE) { + ufshcd_set_link_active(hba); + ufshcd_set_ufs_dev_active(hba); + } + + return 0; +} + +static int exynosauto_ufs_vh_wait_ph_ready(struct ufs_hba *hba) +{ + u32 mbox; + ktime_t start, stop; + + start = ktime_get(); + stop = ktime_add(start, ms_to_ktime(PH_READY_TIMEOUT_MS)); + + do { + mbox = ufshcd_readl(hba, PH2VH_MBOX); + /* TODO: Mailbox message protocols between the PH and VHs are + * not implemented yet. This will be supported later + */ + if ((mbox & MH_MSG_MASK) == MH_MSG_PH_READY) + return 0; + + usleep_range(40, 50); + } while (ktime_before(ktime_get(), stop)); + + return -ETIME; +} + +static int exynosauto_ufs_vh_init(struct ufs_hba *hba) +{ + struct device *dev = hba->dev; + struct platform_device *pdev = to_platform_device(dev); + struct exynos_ufs *ufs; + int ret; + + ufs = devm_kzalloc(dev, sizeof(*ufs), GFP_KERNEL); + if (!ufs) + return -ENOMEM; + + /* exynos-specific hci */ + ufs->reg_hci = devm_platform_ioremap_resource_byname(pdev, "vs_hci"); + if (IS_ERR(ufs->reg_hci)) { + dev_err(dev, "cannot ioremap for hci vendor register\n"); + return PTR_ERR(ufs->reg_hci); + } + + ret = exynosauto_ufs_vh_wait_ph_ready(hba); + if (ret) + return ret; + + ufs->drv_data = device_get_match_data(dev); + if (!ufs->drv_data) + return -ENODEV; + + exynos_ufs_priv_init(hba, ufs); + + return 0; +} + static struct ufs_hba_variant_ops ufs_hba_exynos_ops = { .name = "exynos_ufs", .init = exynos_ufs_init, .hce_enable_notify = exynos_ufs_hce_enable_notify, .link_startup_notify = exynos_ufs_link_startup_notify, .pwr_change_notify = exynos_ufs_pwr_change_notify, + .setup_clocks = exynos_ufs_setup_clocks, .setup_xfer_req = exynos_ufs_specify_nexus_t_xfer_req, .setup_task_mgmt = exynos_ufs_specify_nexus_t_tm_req, .hibern8_notify = exynos_ufs_hibern8_notify, @@ -1224,12 +1486,24 @@ static struct ufs_hba_variant_ops ufs_hba_exynos_ops = { .resume = exynos_ufs_resume, }; +static struct ufs_hba_variant_ops ufs_hba_exynosauto_vh_ops = { + .name = "exynosauto_ufs_vh", + .init = exynosauto_ufs_vh_init, + .link_startup_notify = exynosauto_ufs_vh_link_startup_notify, +}; + static int exynos_ufs_probe(struct platform_device *pdev) { int err; struct device *dev = &pdev->dev; + const struct ufs_hba_variant_ops *vops = &ufs_hba_exynos_ops; + const struct exynos_ufs_drv_data *drv_data = + device_get_match_data(dev); + + if (drv_data && drv_data->vops) + vops = drv_data->vops; - err = ufshcd_pltfrm_init(pdev, &ufs_hba_exynos_ops); + err = ufshcd_pltfrm_init(pdev, vops); if (err) dev_err(dev, "ufshcd_pltfrm_init() failed %d\n", err); @@ -1245,9 +1519,60 @@ static int exynos_ufs_remove(struct platform_device *pdev) return 0; } -struct exynos_ufs_drv_data exynos_ufs_drvs = { +static struct exynos_ufs_uic_attr exynos7_uic_attr = { + .tx_trailingclks = 0x10, + .tx_dif_p_nsec = 3000000, /* unit: ns */ + .tx_dif_n_nsec = 1000000, /* unit: ns */ + .tx_high_z_cnt_nsec = 20000, /* unit: ns */ + .tx_base_unit_nsec = 100000, /* unit: ns */ + .tx_gran_unit_nsec = 4000, /* unit: ns */ + .tx_sleep_cnt = 1000, /* unit: ns */ + .tx_min_activatetime = 0xa, + .rx_filler_enable = 0x2, + .rx_dif_p_nsec = 1000000, /* unit: ns */ + .rx_hibern8_wait_nsec = 4000000, /* unit: ns */ + .rx_base_unit_nsec = 100000, /* unit: ns */ + .rx_gran_unit_nsec = 4000, /* unit: ns */ + .rx_sleep_cnt = 1280, /* unit: ns */ + .rx_stall_cnt = 320, /* unit: ns */ + .rx_hs_g1_sync_len_cap = SYNC_LEN_COARSE(0xf), + .rx_hs_g2_sync_len_cap = SYNC_LEN_COARSE(0xf), + .rx_hs_g3_sync_len_cap = SYNC_LEN_COARSE(0xf), + .rx_hs_g1_prep_sync_len_cap = PREP_LEN(0xf), + .rx_hs_g2_prep_sync_len_cap = PREP_LEN(0xf), + .rx_hs_g3_prep_sync_len_cap = PREP_LEN(0xf), + .pa_dbg_option_suite = 0x30103, +}; - .compatible = "samsung,exynos7-ufs", +static struct exynos_ufs_drv_data exynosauto_ufs_drvs = { + .uic_attr = &exynos7_uic_attr, + .quirks = UFSHCD_QUIRK_PRDT_BYTE_GRAN | + UFSHCI_QUIRK_SKIP_RESET_INTR_AGGR | + UFSHCD_QUIRK_BROKEN_OCS_FATAL_ERROR | + UFSHCD_QUIRK_SKIP_DEF_UNIPRO_TIMEOUT_SETTING, + .opts = EXYNOS_UFS_OPT_BROKEN_AUTO_CLK_CTRL | + EXYNOS_UFS_OPT_SKIP_CONFIG_PHY_ATTR | + EXYNOS_UFS_OPT_BROKEN_RX_SEL_IDX, + .drv_init = exynosauto_ufs_drv_init, + .post_hce_enable = exynosauto_ufs_post_hce_enable, + .pre_link = exynosauto_ufs_pre_link, + .pre_pwr_change = exynosauto_ufs_pre_pwr_change, + .post_pwr_change = exynosauto_ufs_post_pwr_change, +}; + +static struct exynos_ufs_drv_data exynosauto_ufs_vh_drvs = { + .vops = &ufs_hba_exynosauto_vh_ops, + .quirks = UFSHCD_QUIRK_PRDT_BYTE_GRAN | + UFSHCI_QUIRK_SKIP_RESET_INTR_AGGR | + UFSHCD_QUIRK_BROKEN_OCS_FATAL_ERROR | + UFSHCI_QUIRK_BROKEN_HCE | + UFSHCD_QUIRK_BROKEN_UIC_CMD | + UFSHCD_QUIRK_SKIP_PH_CONFIGURATION | + UFSHCD_QUIRK_SKIP_DEF_UNIPRO_TIMEOUT_SETTING, + .opts = EXYNOS_UFS_OPT_BROKEN_RX_SEL_IDX, +}; + +static struct exynos_ufs_drv_data exynos_ufs_drvs = { .uic_attr = &exynos7_uic_attr, .quirks = UFSHCD_QUIRK_PRDT_BYTE_GRAN | UFSHCI_QUIRK_BROKEN_REQ_LIST_CLR | @@ -1261,8 +1586,7 @@ struct exynos_ufs_drv_data exynos_ufs_drvs = { EXYNOS_UFS_OPT_BROKEN_AUTO_CLK_CTRL | EXYNOS_UFS_OPT_BROKEN_RX_SEL_IDX | EXYNOS_UFS_OPT_SKIP_CONNECTION_ESTAB | - EXYNOS_UFS_OPT_USE_SW_HIBERN8_TIMER | - UFSHCD_QUIRK_ALIGN_SG_WITH_PAGE_SIZE, + EXYNOS_UFS_OPT_USE_SW_HIBERN8_TIMER, .drv_init = exynos7_ufs_drv_init, .pre_link = exynos7_ufs_pre_link, .post_link = exynos7_ufs_post_link, @@ -1273,15 +1597,18 @@ struct exynos_ufs_drv_data exynos_ufs_drvs = { static const struct of_device_id exynos_ufs_of_match[] = { { .compatible = "samsung,exynos7-ufs", .data = &exynos_ufs_drvs }, + { .compatible = "samsung,exynosautov9-ufs", + .data = &exynosauto_ufs_drvs }, + { .compatible = "samsung,exynosautov9-ufs-vh", + .data = &exynosauto_ufs_vh_drvs }, {}, }; static const struct dev_pm_ops exynos_ufs_pm_ops = { - .suspend = ufshcd_pltfrm_suspend, - .resume = ufshcd_pltfrm_resume, - .runtime_suspend = ufshcd_pltfrm_runtime_suspend, - .runtime_resume = ufshcd_pltfrm_runtime_resume, - .runtime_idle = ufshcd_pltfrm_runtime_idle, + SET_SYSTEM_SLEEP_PM_OPS(ufshcd_system_suspend, ufshcd_system_resume) + SET_RUNTIME_PM_OPS(ufshcd_runtime_suspend, ufshcd_runtime_resume, NULL) + .prepare = ufshcd_suspend_prepare, + .complete = ufshcd_resume_complete, }; static struct platform_driver exynos_ufs_pltform = { diff --git a/drivers/scsi/ufs/ufs-exynos.h b/drivers/scsi/ufs/ufs-exynos.h index 541b577c371ce2628aae1bc005cf2be335122ba4..1c33e5466082bf50e1610bc297449eafb6549725 100644 --- a/drivers/scsi/ufs/ufs-exynos.h +++ b/drivers/scsi/ufs/ufs-exynos.h @@ -56,6 +56,22 @@ #define TX_GRAN_NVAL_10_08 0x0296 #define TX_GRAN_NVAL_H(v) (((v) >> 8) & 0x3) +#define VND_TX_CLK_PRD 0xAA +#define VND_TX_CLK_PRD_EN 0xA9 +#define VND_TX_LINERESET_PVALUE0 0xAD +#define VND_TX_LINERESET_PVALUE1 0xAC +#define VND_TX_LINERESET_PVALUE2 0xAB + +#define TX_LINE_RESET_TIME 3200 + +#define VND_RX_CLK_PRD 0x12 +#define VND_RX_CLK_PRD_EN 0x11 +#define VND_RX_LINERESET_VALUE0 0x1D +#define VND_RX_LINERESET_VALUE1 0x1C +#define VND_RX_LINERESET_VALUE2 0x1B + +#define RX_LINE_RESET_TIME 1000 + #define RX_FILLER_ENABLE 0x0316 #define RX_FILLER_EN (1 << 1) #define RX_LINERESET_VAL 0x0317 @@ -90,19 +106,6 @@ struct exynos_ufs; #define SLOW 1 #define FAST 2 -#define UFS_EXYNOS_LIMIT_NUM_LANES_RX 2 -#define UFS_EXYNOS_LIMIT_NUM_LANES_TX 2 -#define UFS_EXYNOS_LIMIT_HSGEAR_RX UFS_HS_G3 -#define UFS_EXYNOS_LIMIT_HSGEAR_TX UFS_HS_G3 -#define UFS_EXYNOS_LIMIT_PWMGEAR_RX UFS_PWM_G4 -#define UFS_EXYNOS_LIMIT_PWMGEAR_TX UFS_PWM_G4 -#define UFS_EXYNOS_LIMIT_RX_PWR_PWM SLOW_MODE -#define UFS_EXYNOS_LIMIT_TX_PWR_PWM SLOW_MODE -#define UFS_EXYNOS_LIMIT_RX_PWR_HS FAST_MODE -#define UFS_EXYNOS_LIMIT_TX_PWR_HS FAST_MODE -#define UFS_EXYNOS_LIMIT_HS_RATE PA_HS_MODE_B -#define UFS_EXYNOS_LIMIT_DESIRED_MODE FAST - #define RX_ADV_FINE_GRAN_SUP_EN 0x1 #define RX_ADV_FINE_GRAN_STEP_VAL 0x3 #define RX_ADV_MIN_ACTV_TIME_CAP 0x9 @@ -112,7 +115,7 @@ struct exynos_ufs; #define PA_HIBERN8TIME_VAL 0x20 #define PCLK_AVAIL_MIN 70000000 -#define PCLK_AVAIL_MAX 133000000 +#define PCLK_AVAIL_MAX 167000000 struct exynos_ufs_uic_attr { /* TX Attributes */ @@ -155,7 +158,7 @@ struct exynos_ufs_uic_attr { }; struct exynos_ufs_drv_data { - char *compatible; + const struct ufs_hba_variant_ops *vops; struct exynos_ufs_uic_attr *uic_attr; unsigned int quirks; unsigned int opts; @@ -167,6 +170,8 @@ struct exynos_ufs_drv_data { struct ufs_pa_layer_attr *pwr); int (*post_pwr_change)(struct exynos_ufs *ufs, struct ufs_pa_layer_attr *pwr); + int (*pre_hce_enable)(struct exynos_ufs *ufs); + int (*post_hce_enable)(struct exynos_ufs *ufs); }; struct ufs_phy_time_cfg { @@ -204,7 +209,9 @@ struct exynos_ufs { struct ufs_pa_layer_attr dev_req_params; struct ufs_phy_time_cfg t_cfg; ktime_t entry_hibern8_t; - struct exynos_ufs_drv_data *drv_data; + const struct exynos_ufs_drv_data *drv_data; + struct regmap *sysreg; + u32 shareability_reg_offset; u32 opts; #define EXYNOS_UFS_OPT_HAS_APB_CLK_CTRL BIT(0) @@ -212,6 +219,7 @@ struct exynos_ufs { #define EXYNOS_UFS_OPT_BROKEN_AUTO_CLK_CTRL BIT(2) #define EXYNOS_UFS_OPT_BROKEN_RX_SEL_IDX BIT(3) #define EXYNOS_UFS_OPT_USE_SW_HIBERN8_TIMER BIT(4) +#define EXYNOS_UFS_OPT_SKIP_CONFIG_PHY_ATTR BIT(5) }; #define for_each_ufs_rx_lane(ufs, i) \ @@ -258,30 +266,4 @@ static inline void exynos_ufs_disable_dbg_mode(struct ufs_hba *hba) ufshcd_dme_set(hba, UIC_ARG_MIB(PA_DBG_MODE), FALSE); } -struct exynos_ufs_drv_data exynos_ufs_drvs; - -struct exynos_ufs_uic_attr exynos7_uic_attr = { - .tx_trailingclks = 0x10, - .tx_dif_p_nsec = 3000000, /* unit: ns */ - .tx_dif_n_nsec = 1000000, /* unit: ns */ - .tx_high_z_cnt_nsec = 20000, /* unit: ns */ - .tx_base_unit_nsec = 100000, /* unit: ns */ - .tx_gran_unit_nsec = 4000, /* unit: ns */ - .tx_sleep_cnt = 1000, /* unit: ns */ - .tx_min_activatetime = 0xa, - .rx_filler_enable = 0x2, - .rx_dif_p_nsec = 1000000, /* unit: ns */ - .rx_hibern8_wait_nsec = 4000000, /* unit: ns */ - .rx_base_unit_nsec = 100000, /* unit: ns */ - .rx_gran_unit_nsec = 4000, /* unit: ns */ - .rx_sleep_cnt = 1280, /* unit: ns */ - .rx_stall_cnt = 320, /* unit: ns */ - .rx_hs_g1_sync_len_cap = SYNC_LEN_COARSE(0xf), - .rx_hs_g2_sync_len_cap = SYNC_LEN_COARSE(0xf), - .rx_hs_g3_sync_len_cap = SYNC_LEN_COARSE(0xf), - .rx_hs_g1_prep_sync_len_cap = PREP_LEN(0xf), - .rx_hs_g2_prep_sync_len_cap = PREP_LEN(0xf), - .rx_hs_g3_prep_sync_len_cap = PREP_LEN(0xf), - .pa_dbg_option_suite = 0x30103, -}; #endif /* _UFS_EXYNOS_H_ */ diff --git a/drivers/scsi/ufs/ufs-fault-injection.c b/drivers/scsi/ufs/ufs-fault-injection.c new file mode 100644 index 0000000000000000000000000000000000000000..7ac7c4e7ff831518921d96c1c62db67ba8ac5d5a --- /dev/null +++ b/drivers/scsi/ufs/ufs-fault-injection.c @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include +#include +#include +#include +#include "ufs-fault-injection.h" + +static int ufs_fault_get(char *buffer, const struct kernel_param *kp); +static int ufs_fault_set(const char *val, const struct kernel_param *kp); + +static const struct kernel_param_ops ufs_fault_ops = { + .get = ufs_fault_get, + .set = ufs_fault_set, +}; + +enum { FAULT_INJ_STR_SIZE = 80 }; + +/* + * For more details about fault injection, please refer to + * Documentation/fault-injection/fault-injection.rst. + */ +static char g_trigger_eh_str[FAULT_INJ_STR_SIZE]; +module_param_cb(trigger_eh, &ufs_fault_ops, g_trigger_eh_str, 0644); +MODULE_PARM_DESC(trigger_eh, + "Fault injection. trigger_eh=,,,"); +static DECLARE_FAULT_ATTR(ufs_trigger_eh_attr); + +static char g_timeout_str[FAULT_INJ_STR_SIZE]; +module_param_cb(timeout, &ufs_fault_ops, g_timeout_str, 0644); +MODULE_PARM_DESC(timeout, + "Fault injection. timeout=,,,"); +static DECLARE_FAULT_ATTR(ufs_timeout_attr); + +static int ufs_fault_get(char *buffer, const struct kernel_param *kp) +{ + const char *fault_str = kp->arg; + + return sysfs_emit(buffer, "%s\n", fault_str); +} + +static int ufs_fault_set(const char *val, const struct kernel_param *kp) +{ + struct fault_attr *attr = NULL; + + if (kp->arg == g_trigger_eh_str) + attr = &ufs_trigger_eh_attr; + else if (kp->arg == g_timeout_str) + attr = &ufs_timeout_attr; + + if (WARN_ON_ONCE(!attr)) + return -EINVAL; + + if (!setup_fault_attr(attr, (char *)val)) + return -EINVAL; + + strlcpy(kp->arg, val, FAULT_INJ_STR_SIZE); + + return 0; +} + +bool ufs_trigger_eh(void) +{ + return should_fail(&ufs_trigger_eh_attr, 1); +} + +bool ufs_fail_completion(void) +{ + return should_fail(&ufs_timeout_attr, 1); +} diff --git a/drivers/scsi/ufs/ufs-fault-injection.h b/drivers/scsi/ufs/ufs-fault-injection.h new file mode 100644 index 0000000000000000000000000000000000000000..6d0cd8e10c87aa6ccf28da744b43bc4a7553b55c --- /dev/null +++ b/drivers/scsi/ufs/ufs-fault-injection.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _UFS_FAULT_INJECTION_H +#define _UFS_FAULT_INJECTION_H + +#include +#include + +#ifdef CONFIG_SCSI_UFS_FAULT_INJECTION +bool ufs_trigger_eh(void); +bool ufs_fail_completion(void); +#else +static inline bool ufs_trigger_eh(void) +{ + return false; +} + +static inline bool ufs_fail_completion(void) +{ + return false; +} +#endif + +#endif /* _UFS_FAULT_INJECTION_H */ diff --git a/drivers/scsi/ufs/ufs-hisi.c b/drivers/scsi/ufs/ufs-hisi.c index 55b7161d7697791d9b65a7e513a03ab27ef255af..ab1a7ebd89b1ce808ead593ace5d068eaf6f88da 100644 --- a/drivers/scsi/ufs/ufs-hisi.c +++ b/drivers/scsi/ufs/ufs-hisi.c @@ -293,18 +293,7 @@ static int ufs_hisi_link_startup_notify(struct ufs_hba *hba, static void ufs_hisi_set_dev_cap(struct ufs_dev_params *hisi_param) { - hisi_param->rx_lanes = UFS_HISI_LIMIT_NUM_LANES_RX; - hisi_param->tx_lanes = UFS_HISI_LIMIT_NUM_LANES_TX; - hisi_param->hs_rx_gear = UFS_HISI_LIMIT_HSGEAR_RX; - hisi_param->hs_tx_gear = UFS_HISI_LIMIT_HSGEAR_TX; - hisi_param->pwm_rx_gear = UFS_HISI_LIMIT_PWMGEAR_RX; - hisi_param->pwm_tx_gear = UFS_HISI_LIMIT_PWMGEAR_TX; - hisi_param->rx_pwr_pwm = UFS_HISI_LIMIT_RX_PWR_PWM; - hisi_param->tx_pwr_pwm = UFS_HISI_LIMIT_TX_PWR_PWM; - hisi_param->rx_pwr_hs = UFS_HISI_LIMIT_RX_PWR_HS; - hisi_param->tx_pwr_hs = UFS_HISI_LIMIT_TX_PWR_HS; - hisi_param->hs_rate = UFS_HISI_LIMIT_HS_RATE; - hisi_param->desired_working_mode = UFS_HISI_LIMIT_DESIRED_MODE; + ufshcd_init_pwr_dev_param(hisi_param); } static void ufs_hisi_pwr_change_pre_change(struct ufs_hba *hba) @@ -407,11 +396,21 @@ out: return ret; } -static int ufs_hisi_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op) +static int ufs_hisi_suspend_prepare(struct device *dev) +{ + /* RPM and SPM are different. Refer ufs_hisi_suspend() */ + return __ufshcd_suspend_prepare(dev, false); +} + +static int ufs_hisi_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op, + enum ufs_notify_change_status status) { struct ufs_hisi_host *host = ufshcd_get_variant(hba); - if (ufshcd_is_runtime_pm(pm_op)) + if (status == PRE_CHANGE) + return 0; + + if (pm_op == UFS_RUNTIME_PM) return 0; if (host->in_suspend) { @@ -583,11 +582,10 @@ static int ufs_hisi_remove(struct platform_device *pdev) } static const struct dev_pm_ops ufs_hisi_pm_ops = { - .suspend = ufshcd_pltfrm_suspend, - .resume = ufshcd_pltfrm_resume, - .runtime_suspend = ufshcd_pltfrm_runtime_suspend, - .runtime_resume = ufshcd_pltfrm_runtime_resume, - .runtime_idle = ufshcd_pltfrm_runtime_idle, + SET_SYSTEM_SLEEP_PM_OPS(ufshcd_system_suspend, ufshcd_system_resume) + SET_RUNTIME_PM_OPS(ufshcd_runtime_suspend, ufshcd_runtime_resume, NULL) + .prepare = ufs_hisi_suspend_prepare, + .complete = ufshcd_resume_complete, }; static struct platform_driver ufs_hisi_pltform = { diff --git a/drivers/scsi/ufs/ufs-hisi.h b/drivers/scsi/ufs/ufs-hisi.h index 3231d3d81c985798e85ab95b412bf1792b7591ca..5a90c0f4e90c7cadad336184b605e9c42f54e9b3 100644 --- a/drivers/scsi/ufs/ufs-hisi.h +++ b/drivers/scsi/ufs/ufs-hisi.h @@ -76,19 +76,6 @@ enum { #define SLOW 1 #define FAST 2 -#define UFS_HISI_LIMIT_NUM_LANES_RX 2 -#define UFS_HISI_LIMIT_NUM_LANES_TX 2 -#define UFS_HISI_LIMIT_HSGEAR_RX UFS_HS_G3 -#define UFS_HISI_LIMIT_HSGEAR_TX UFS_HS_G3 -#define UFS_HISI_LIMIT_PWMGEAR_RX UFS_PWM_G4 -#define UFS_HISI_LIMIT_PWMGEAR_TX UFS_PWM_G4 -#define UFS_HISI_LIMIT_RX_PWR_PWM SLOW_MODE -#define UFS_HISI_LIMIT_TX_PWR_PWM SLOW_MODE -#define UFS_HISI_LIMIT_RX_PWR_HS FAST_MODE -#define UFS_HISI_LIMIT_TX_PWR_HS FAST_MODE -#define UFS_HISI_LIMIT_HS_RATE PA_HS_MODE_B -#define UFS_HISI_LIMIT_DESIRED_MODE FAST - #define UFS_HISI_CAP_RESERVED BIT(0) #define UFS_HISI_CAP_PHY10nm BIT(1) diff --git a/drivers/scsi/ufs/ufs-hwmon.c b/drivers/scsi/ufs/ufs-hwmon.c new file mode 100644 index 0000000000000000000000000000000000000000..74855491dc8f6c76f6afb5a858bd5da5c69b115d --- /dev/null +++ b/drivers/scsi/ufs/ufs-hwmon.c @@ -0,0 +1,210 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * UFS hardware monitoring support + * Copyright (c) 2021, Western Digital Corporation + */ + +#include +#include + +#include "ufshcd.h" + +struct ufs_hwmon_data { + struct ufs_hba *hba; + u8 mask; +}; + +static int ufs_read_temp_enable(struct ufs_hba *hba, u8 mask, long *val) +{ + u32 ee_mask; + int err; + + err = ufshcd_query_attr(hba, UPIU_QUERY_OPCODE_READ_ATTR, QUERY_ATTR_IDN_EE_CONTROL, 0, 0, + &ee_mask); + if (err) + return err; + + *val = (mask & ee_mask & MASK_EE_TOO_HIGH_TEMP) || (mask & ee_mask & MASK_EE_TOO_LOW_TEMP); + + return 0; +} + +static int ufs_get_temp(struct ufs_hba *hba, enum attr_idn idn, long *val) +{ + u32 value; + int err; + + err = ufshcd_query_attr(hba, UPIU_QUERY_OPCODE_READ_ATTR, idn, 0, 0, &value); + if (err) + return err; + + if (value == 0) + return -ENODATA; + + *val = ((long)value - 80) * MILLIDEGREE_PER_DEGREE; + + return 0; +} + +static int ufs_hwmon_read(struct device *dev, enum hwmon_sensor_types type, u32 attr, int channel, + long *val) +{ + struct ufs_hwmon_data *data = dev_get_drvdata(dev); + struct ufs_hba *hba = data->hba; + int err; + + down(&hba->host_sem); + + if (!ufshcd_is_user_access_allowed(hba)) { + up(&hba->host_sem); + return -EBUSY; + } + + ufshcd_rpm_get_sync(hba); + + switch (attr) { + case hwmon_temp_enable: + err = ufs_read_temp_enable(hba, data->mask, val); + + break; + case hwmon_temp_crit: + err = ufs_get_temp(hba, QUERY_ATTR_IDN_HIGH_TEMP_BOUND, val); + + break; + case hwmon_temp_lcrit: + err = ufs_get_temp(hba, QUERY_ATTR_IDN_LOW_TEMP_BOUND, val); + + break; + case hwmon_temp_input: + err = ufs_get_temp(hba, QUERY_ATTR_IDN_CASE_ROUGH_TEMP, val); + + break; + default: + err = -EOPNOTSUPP; + + break; + } + + ufshcd_rpm_put_sync(hba); + + up(&hba->host_sem); + + return err; +} + +static int ufs_hwmon_write(struct device *dev, enum hwmon_sensor_types type, u32 attr, int channel, + long val) +{ + struct ufs_hwmon_data *data = dev_get_drvdata(dev); + struct ufs_hba *hba = data->hba; + int err; + + if (attr != hwmon_temp_enable) + return -EINVAL; + + if (val != 0 && val != 1) + return -EINVAL; + + down(&hba->host_sem); + + if (!ufshcd_is_user_access_allowed(hba)) { + up(&hba->host_sem); + return -EBUSY; + } + + ufshcd_rpm_get_sync(hba); + + if (val == 1) + err = ufshcd_update_ee_usr_mask(hba, MASK_EE_URGENT_TEMP, 0); + else + err = ufshcd_update_ee_usr_mask(hba, 0, MASK_EE_URGENT_TEMP); + + ufshcd_rpm_put_sync(hba); + + up(&hba->host_sem); + + return err; +} + +static umode_t ufs_hwmon_is_visible(const void *_data, enum hwmon_sensor_types type, u32 attr, + int channel) +{ + if (type != hwmon_temp) + return 0; + + switch (attr) { + case hwmon_temp_enable: + return 0644; + case hwmon_temp_crit: + case hwmon_temp_lcrit: + case hwmon_temp_input: + return 0444; + default: + break; + } + return 0; +} + +static const struct hwmon_channel_info *ufs_hwmon_info[] = { + HWMON_CHANNEL_INFO(temp, HWMON_T_ENABLE | HWMON_T_INPUT | HWMON_T_CRIT | HWMON_T_LCRIT), + NULL +}; + +static const struct hwmon_ops ufs_hwmon_ops = { + .is_visible = ufs_hwmon_is_visible, + .read = ufs_hwmon_read, + .write = ufs_hwmon_write, +}; + +static const struct hwmon_chip_info ufs_hwmon_hba_info = { + .ops = &ufs_hwmon_ops, + .info = ufs_hwmon_info, +}; + +void ufs_hwmon_probe(struct ufs_hba *hba, u8 mask) +{ + struct device *dev = hba->dev; + struct ufs_hwmon_data *data; + struct device *hwmon; + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return; + + data->hba = hba; + data->mask = mask; + + hwmon = hwmon_device_register_with_info(dev, "ufs", data, &ufs_hwmon_hba_info, NULL); + if (IS_ERR(hwmon)) { + dev_warn(dev, "Failed to instantiate hwmon device\n"); + kfree(data); + return; + } + + hba->hwmon_device = hwmon; +} + +void ufs_hwmon_remove(struct ufs_hba *hba) +{ + struct ufs_hwmon_data *data; + + if (!hba->hwmon_device) + return; + + data = dev_get_drvdata(hba->hwmon_device); + hwmon_device_unregister(hba->hwmon_device); + hba->hwmon_device = NULL; + kfree(data); +} + +void ufs_hwmon_notify_event(struct ufs_hba *hba, u8 ee_mask) +{ + if (!hba->hwmon_device) + return; + + if (ee_mask & MASK_EE_TOO_HIGH_TEMP) + hwmon_notify_event(hba->hwmon_device, hwmon_temp, hwmon_temp_max_alarm, 0); + + if (ee_mask & MASK_EE_TOO_LOW_TEMP) + hwmon_notify_event(hba->hwmon_device, hwmon_temp, hwmon_temp_min_alarm, 0); +} diff --git a/drivers/scsi/ufs/ufs-mediatek.c b/drivers/scsi/ufs/ufs-mediatek.c index ca0ebb2c9b965610dd2c09108f1750f82f8daee6..86a938075f308bd41d568127b93214fb129154e2 100644 --- a/drivers/scsi/ufs/ufs-mediatek.c +++ b/drivers/scsi/ufs/ufs-mediatek.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include "ufshcd.h" @@ -246,9 +247,9 @@ static int ufs_mtk_setup_ref_clk(struct ufs_hba *hba, bool on) if (on) { ufs_mtk_ref_clk_notify(on, res); - ufshcd_delay_us(host->ref_clk_ungating_wait_us, 10); ufshcd_writel(hba, REFCLK_REQUEST, REG_UFS_REFCLK_CTRL); } else { + ufshcd_delay_us(host->ref_clk_gating_wait_us, 10); ufshcd_writel(hba, REFCLK_RELEASE, REG_UFS_REFCLK_CTRL); } @@ -273,16 +274,16 @@ static int ufs_mtk_setup_ref_clk(struct ufs_hba *hba, bool on) out: host->ref_clk_enabled = on; - if (!on) { - ufshcd_delay_us(host->ref_clk_gating_wait_us, 10); + if (on) + ufshcd_delay_us(host->ref_clk_ungating_wait_us, 10); + else ufs_mtk_ref_clk_notify(on, res); - } return 0; } static void ufs_mtk_setup_ref_clk_wait_us(struct ufs_hba *hba, - u16 gating_us, u16 ungating_us) + u16 gating_us) { struct ufs_mtk_host *host = ufshcd_get_variant(hba); @@ -293,7 +294,62 @@ static void ufs_mtk_setup_ref_clk_wait_us(struct ufs_hba *hba, host->ref_clk_gating_wait_us = gating_us; } - host->ref_clk_ungating_wait_us = ungating_us; + host->ref_clk_ungating_wait_us = REFCLK_DEFAULT_WAIT_US; +} + +static void ufs_mtk_dbg_sel(struct ufs_hba *hba) +{ + struct ufs_mtk_host *host = ufshcd_get_variant(hba); + + if (((host->ip_ver >> 16) & 0xFF) >= 0x36) { + ufshcd_writel(hba, 0x820820, REG_UFS_DEBUG_SEL); + ufshcd_writel(hba, 0x0, REG_UFS_DEBUG_SEL_B0); + ufshcd_writel(hba, 0x55555555, REG_UFS_DEBUG_SEL_B1); + ufshcd_writel(hba, 0xaaaaaaaa, REG_UFS_DEBUG_SEL_B2); + ufshcd_writel(hba, 0xffffffff, REG_UFS_DEBUG_SEL_B3); + } else { + ufshcd_writel(hba, 0x20, REG_UFS_DEBUG_SEL); + } +} + +static void ufs_mtk_wait_idle_state(struct ufs_hba *hba, + unsigned long retry_ms) +{ + u64 timeout, time_checked; + u32 val, sm; + bool wait_idle; + + /* cannot use plain ktime_get() in suspend */ + timeout = ktime_get_mono_fast_ns() + retry_ms * 1000000UL; + + /* wait a specific time after check base */ + udelay(10); + wait_idle = false; + + do { + time_checked = ktime_get_mono_fast_ns(); + ufs_mtk_dbg_sel(hba); + val = ufshcd_readl(hba, REG_UFS_PROBE); + + sm = val & 0x1f; + + /* + * if state is in H8 enter and H8 enter confirm + * wait until return to idle state. + */ + if ((sm >= VS_HIB_ENTER) && (sm <= VS_HIB_EXIT)) { + wait_idle = true; + udelay(50); + continue; + } else if (!wait_idle) + break; + + if (wait_idle && (sm == VS_HCE_BASE)) + break; + } while (time_checked < timeout); + + if (wait_idle && sm != VS_HCE_BASE) + dev_info(hba->dev, "wait idle tmo: 0x%x\n", val); } static int ufs_mtk_wait_link_state(struct ufs_hba *hba, u32 state, @@ -305,7 +361,7 @@ static int ufs_mtk_wait_link_state(struct ufs_hba *hba, u32 state, timeout = ktime_add_ms(ktime_get(), max_wait_ms); do { time_checked = ktime_get(); - ufshcd_writel(hba, 0x20, REG_UFS_DEBUG_SEL); + ufs_mtk_dbg_sel(hba); val = ufshcd_readl(hba, REG_UFS_PROBE); val = val >> 28; @@ -501,7 +557,7 @@ static void ufs_mtk_init_va09_pwr_ctrl(struct ufs_hba *hba) struct ufs_mtk_host *host = ufshcd_get_variant(hba); host->reg_va09 = regulator_get(hba->dev, "va09"); - if (!host->reg_va09) + if (IS_ERR(host->reg_va09)) dev_info(hba->dev, "failed to get va09"); else host->caps |= UFS_MTK_CAP_VA09_PWR_CTRL; @@ -527,6 +583,19 @@ static void ufs_mtk_init_host_caps(struct ufs_hba *hba) dev_info(hba->dev, "caps: 0x%x", host->caps); } +static void ufs_mtk_scale_perf(struct ufs_hba *hba, bool up) +{ + struct ufs_mtk_host *host = ufshcd_get_variant(hba); + + ufs_mtk_boost_crypt(hba, up); + ufs_mtk_setup_ref_clk(hba, up); + + if (up) + phy_power_on(host->mphy); + else + phy_power_off(host->mphy); +} + /** * ufs_mtk_setup_clocks - enables/disable clocks * @hba: host controller instance @@ -568,15 +637,10 @@ static int ufs_mtk_setup_clocks(struct ufs_hba *hba, bool on, clk_pwr_off = true; } - if (clk_pwr_off) { - ufs_mtk_boost_crypt(hba, on); - ufs_mtk_setup_ref_clk(hba, on); - phy_power_off(host->mphy); - } + if (clk_pwr_off) + ufs_mtk_scale_perf(hba, false); } else if (on && status == POST_CHANGE) { - phy_power_on(host->mphy); - ufs_mtk_setup_ref_clk(hba, on); - ufs_mtk_boost_crypt(hba, on); + ufs_mtk_scale_perf(hba, true); } return ret; @@ -681,6 +745,8 @@ static int ufs_mtk_init(struct ufs_hba *hba) ufs_mtk_mphy_power_on(hba, true); ufs_mtk_setup_clocks(hba, true, POST_CHANGE); + host->ip_ver = ufshcd_readl(hba, REG_UFS_MTK_IP_VER); + goto out; out_variant_clear: @@ -695,22 +761,11 @@ static int ufs_mtk_pre_pwr_change(struct ufs_hba *hba, { struct ufs_mtk_host *host = ufshcd_get_variant(hba); struct ufs_dev_params host_cap; - u32 adapt_val; int ret; - host_cap.tx_lanes = UFS_MTK_LIMIT_NUM_LANES_TX; - host_cap.rx_lanes = UFS_MTK_LIMIT_NUM_LANES_RX; - host_cap.hs_rx_gear = UFS_MTK_LIMIT_HSGEAR_RX; - host_cap.hs_tx_gear = UFS_MTK_LIMIT_HSGEAR_TX; - host_cap.pwm_rx_gear = UFS_MTK_LIMIT_PWMGEAR_RX; - host_cap.pwm_tx_gear = UFS_MTK_LIMIT_PWMGEAR_TX; - host_cap.rx_pwr_pwm = UFS_MTK_LIMIT_RX_PWR_PWM; - host_cap.tx_pwr_pwm = UFS_MTK_LIMIT_TX_PWR_PWM; - host_cap.rx_pwr_hs = UFS_MTK_LIMIT_RX_PWR_HS; - host_cap.tx_pwr_hs = UFS_MTK_LIMIT_TX_PWR_HS; - host_cap.hs_rate = UFS_MTK_LIMIT_HS_RATE; - host_cap.desired_working_mode = - UFS_MTK_LIMIT_DESIRED_MODE; + ufshcd_init_pwr_dev_param(&host_cap); + host_cap.hs_rx_gear = UFS_HS_G4; + host_cap.hs_tx_gear = UFS_HS_G4; ret = ufshcd_get_pwr_dev_param(&host_cap, dev_max_params, @@ -721,13 +776,9 @@ static int ufs_mtk_pre_pwr_change(struct ufs_hba *hba, } if (host->hw_ver.major >= 3) { - if (dev_req_params->gear_tx == UFS_HS_G4) - adapt_val = PA_INITIAL_ADAPT; - else - adapt_val = PA_NO_ADAPT; - ufshcd_dme_set(hba, - UIC_ARG_MIB(PA_TXHSADAPTTYPE), - adapt_val); + ret = ufshcd_dme_configure_adapt(hba, + dev_req_params->gear_tx, + PA_INITIAL_ADAPT); } return ret; @@ -755,14 +806,14 @@ static int ufs_mtk_pwr_change_notify(struct ufs_hba *hba, return ret; } -static int ufs_mtk_unipro_set_pm(struct ufs_hba *hba, bool lpm) +static int ufs_mtk_unipro_set_lpm(struct ufs_hba *hba, bool lpm) { int ret; struct ufs_mtk_host *host = ufshcd_get_variant(hba); ret = ufshcd_dme_set(hba, UIC_ARG_MIB_SEL(VS_UNIPROPOWERDOWNCONTROL, 0), - lpm); + lpm ? 1 : 0); if (!ret || !lpm) { /* * Forcibly set as non-LPM mode if UIC commands is failed @@ -782,7 +833,7 @@ static int ufs_mtk_pre_link(struct ufs_hba *hba) ufs_mtk_get_controller_version(hba); - ret = ufs_mtk_unipro_set_pm(hba, false); + ret = ufs_mtk_unipro_set_lpm(hba, false); if (ret) return ret; @@ -829,12 +880,10 @@ static int ufs_mtk_post_link(struct ufs_hba *hba) /* enable unipro clock gating feature */ ufs_mtk_cfg_unipro_cg(hba, true); - /* configure auto-hibern8 timer to 10ms */ - if (ufshcd_is_auto_hibern8_supported(hba)) { - ufshcd_auto_hibern8_update(hba, - FIELD_PREP(UFSHCI_AHIBERN8_TIMER_MASK, 10) | - FIELD_PREP(UFSHCI_AHIBERN8_SCALE_MASK, 3)); - } + /* will be configured during probe hba */ + if (ufshcd_is_auto_hibern8_supported(hba)) + hba->ahit = FIELD_PREP(UFSHCI_AHIBERN8_TIMER_MASK, 10) | + FIELD_PREP(UFSHCI_AHIBERN8_SCALE_MASK, 3); ufs_mtk_setup_clk_gating(hba); @@ -897,7 +946,7 @@ static int ufs_mtk_link_set_hpm(struct ufs_hba *hba) if (err) return err; - err = ufs_mtk_unipro_set_pm(hba, false); + err = ufs_mtk_unipro_set_lpm(hba, false); if (err) return err; @@ -918,10 +967,10 @@ static int ufs_mtk_link_set_lpm(struct ufs_hba *hba) { int err; - err = ufs_mtk_unipro_set_pm(hba, true); + err = ufs_mtk_unipro_set_lpm(hba, true); if (err) { /* Resume UniPro state for following error recovery */ - ufs_mtk_unipro_set_pm(hba, false); + ufs_mtk_unipro_set_lpm(hba, false); return err; } @@ -941,11 +990,37 @@ static void ufs_mtk_vreg_set_lpm(struct ufs_hba *hba, bool lpm) REGULATOR_MODE_NORMAL); } -static int ufs_mtk_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op) +static void ufs_mtk_auto_hibern8_disable(struct ufs_hba *hba) +{ + unsigned long flags; + int ret; + + /* disable auto-hibern8 */ + spin_lock_irqsave(hba->host->host_lock, flags); + ufshcd_writel(hba, 0, REG_AUTO_HIBERNATE_IDLE_TIMER); + spin_unlock_irqrestore(hba->host->host_lock, flags); + + /* wait host return to idle state when auto-hibern8 off */ + ufs_mtk_wait_idle_state(hba, 5); + + ret = ufs_mtk_wait_link_state(hba, VS_LINK_UP, 100); + if (ret) + dev_warn(hba->dev, "exit h8 state fail, ret=%d\n", ret); +} + +static int ufs_mtk_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op, + enum ufs_notify_change_status status) { int err; struct arm_smccc_res res; + if (status == PRE_CHANGE) { + if (!ufshcd_is_auto_hibern8_supported(hba)) + return 0; + ufs_mtk_auto_hibern8_disable(hba); + return 0; + } + if (ufshcd_is_link_hibern8(hba)) { err = ufs_mtk_link_set_lpm(hba); if (err) @@ -1010,7 +1085,7 @@ static void ufs_mtk_dbg_register_dump(struct ufs_hba *hba) "MPHY Ctrl "); /* Direct debugging information to REG_MTK_PROBE */ - ufshcd_writel(hba, 0x20, REG_UFS_DEBUG_SEL); + ufs_mtk_dbg_sel(hba); ufshcd_dump_regs(hba, REG_UFS_PROBE, 0x4, "Debug Probe "); } @@ -1028,11 +1103,14 @@ static int ufs_mtk_apply_dev_quirks(struct ufs_hba *hba) * requirements. */ if (mid == UFS_VENDOR_SAMSUNG) - ufs_mtk_setup_ref_clk_wait_us(hba, 1, 1); + ufs_mtk_setup_ref_clk_wait_us(hba, 1); else if (mid == UFS_VENDOR_SKHYNIX) - ufs_mtk_setup_ref_clk_wait_us(hba, 30, 30); + ufs_mtk_setup_ref_clk_wait_us(hba, 30); else if (mid == UFS_VENDOR_TOSHIBA) - ufs_mtk_setup_ref_clk_wait_us(hba, 100, 32); + ufs_mtk_setup_ref_clk_wait_us(hba, 100); + else + ufs_mtk_setup_ref_clk_wait_us(hba, + REFCLK_DEFAULT_WAIT_US); return 0; } @@ -1111,6 +1189,7 @@ static int ufs_mtk_probe(struct platform_device *pdev) } link = device_link_add(dev, &reset_pdev->dev, DL_FLAG_AUTOPROBE_CONSUMER); + put_device(&reset_pdev->dev); if (!link) { dev_notice(dev, "add reset device_link fail\n"); goto skip_reset; @@ -1149,11 +1228,10 @@ static int ufs_mtk_remove(struct platform_device *pdev) } static const struct dev_pm_ops ufs_mtk_pm_ops = { - .suspend = ufshcd_pltfrm_suspend, - .resume = ufshcd_pltfrm_resume, - .runtime_suspend = ufshcd_pltfrm_runtime_suspend, - .runtime_resume = ufshcd_pltfrm_runtime_resume, - .runtime_idle = ufshcd_pltfrm_runtime_idle, + SET_SYSTEM_SLEEP_PM_OPS(ufshcd_system_suspend, ufshcd_system_resume) + SET_RUNTIME_PM_OPS(ufshcd_runtime_suspend, ufshcd_runtime_resume, NULL) + .prepare = ufshcd_suspend_prepare, + .complete = ufshcd_resume_complete, }; static struct platform_driver ufs_mtk_pltform = { diff --git a/drivers/scsi/ufs/ufs-mediatek.h b/drivers/scsi/ufs/ufs-mediatek.h index ccfcc445f70cae61bbf23d0f61e1e3bd7024f324..414dca86c09ff6d58f4c7cc22c68436d31fcaae1 100644 --- a/drivers/scsi/ufs/ufs-mediatek.h +++ b/drivers/scsi/ufs/ufs-mediatek.h @@ -15,9 +15,14 @@ #define REG_UFS_REFCLK_CTRL 0x144 #define REG_UFS_EXTREG 0x2100 #define REG_UFS_MPHYCTRL 0x2200 +#define REG_UFS_MTK_IP_VER 0x2240 #define REG_UFS_REJECT_MON 0x22AC #define REG_UFS_DEBUG_SEL 0x22C0 #define REG_UFS_PROBE 0x22C8 +#define REG_UFS_DEBUG_SEL_B0 0x22D0 +#define REG_UFS_DEBUG_SEL_B1 0x22D4 +#define REG_UFS_DEBUG_SEL_B2 0x22D8 +#define REG_UFS_DEBUG_SEL_B3 0x22DC /* * Ref-clk control @@ -29,22 +34,7 @@ #define REFCLK_ACK BIT(1) #define REFCLK_REQ_TIMEOUT_US 3000 - -/* - * Vendor specific pre-defined parameters - */ -#define UFS_MTK_LIMIT_NUM_LANES_RX 2 -#define UFS_MTK_LIMIT_NUM_LANES_TX 2 -#define UFS_MTK_LIMIT_HSGEAR_RX UFS_HS_G4 -#define UFS_MTK_LIMIT_HSGEAR_TX UFS_HS_G4 -#define UFS_MTK_LIMIT_PWMGEAR_RX UFS_PWM_G4 -#define UFS_MTK_LIMIT_PWMGEAR_TX UFS_PWM_G4 -#define UFS_MTK_LIMIT_RX_PWR_PWM SLOW_MODE -#define UFS_MTK_LIMIT_TX_PWR_PWM SLOW_MODE -#define UFS_MTK_LIMIT_RX_PWR_HS FAST_MODE -#define UFS_MTK_LIMIT_TX_PWR_HS FAST_MODE -#define UFS_MTK_LIMIT_HS_RATE PA_HS_MODE_B -#define UFS_MTK_LIMIT_DESIRED_MODE UFS_HS_MODE +#define REFCLK_DEFAULT_WAIT_US 32 /* * Other attributes @@ -65,6 +55,26 @@ enum { VS_LINK_CFG = 5, }; +/* + * Vendor specific host controller state + */ +enum { + VS_HCE_RESET = 0, + VS_HCE_BASE = 1, + VS_HCE_OOCPR_WAIT = 2, + VS_HCE_DME_RESET = 3, + VS_HCE_MIDDLE = 4, + VS_HCE_DME_ENABLE = 5, + VS_HCE_DEFAULTS = 6, + VS_HIB_IDLEEN = 7, + VS_HIB_ENTER = 8, + VS_HIB_ENTER_CONF = 9, + VS_HIB_MIDDLE = 10, + VS_HIB_WAITTIMER = 11, + VS_HIB_EXIT_CONF = 12, + VS_HIB_EXIT = 13, +}; + /* * SiP commands */ @@ -117,18 +127,19 @@ struct ufs_mtk_hw_ver { struct ufs_mtk_host { struct phy *mphy; struct regulator *reg_va09; - struct ufs_mtk_hw_ver hw_ver; struct reset_control *hci_reset; struct reset_control *unipro_reset; struct reset_control *crypto_reset; struct ufs_hba *hba; struct ufs_mtk_crypt_cfg *crypt; + struct ufs_mtk_hw_ver hw_ver; enum ufs_mtk_host_caps caps; bool mphy_powered_on; bool unipro_lpm; bool ref_clk_enabled; u16 ref_clk_ungating_wait_us; u16 ref_clk_gating_wait_us; + u32 ip_ver; }; #endif /* !_UFS_MEDIATEK_H */ diff --git a/drivers/scsi/ufs/ufs-qcom.c b/drivers/scsi/ufs/ufs-qcom.c index 9fe3b93440f6bffb139789be911791a81dc35c9f..0d2e950d0865eb7f629e4d5c901fa252d38d44bb 100644 --- a/drivers/scsi/ufs/ufs-qcom.c +++ b/drivers/scsi/ufs/ufs-qcom.c @@ -589,11 +589,15 @@ static void ufs_qcom_device_reset_ctrl(struct ufs_hba *hba, bool asserted) gpiod_set_value_cansleep(host->device_reset, asserted); } -static int ufs_qcom_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op) +static int ufs_qcom_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op, + enum ufs_notify_change_status status) { struct ufs_qcom_host *host = ufshcd_get_variant(hba); struct phy *phy = host->generic_phy; + if (status == PRE_CHANGE) + return 0; + if (ufs_qcom_is_link_off(hba)) { /* * Disable the tx/rx lane symbol clocks before PHY is @@ -715,19 +719,8 @@ static int ufs_qcom_pwr_change_notify(struct ufs_hba *hba, switch (status) { case PRE_CHANGE: - ufs_qcom_cap.tx_lanes = UFS_QCOM_LIMIT_NUM_LANES_TX; - ufs_qcom_cap.rx_lanes = UFS_QCOM_LIMIT_NUM_LANES_RX; - ufs_qcom_cap.hs_rx_gear = UFS_QCOM_LIMIT_HSGEAR_RX; - ufs_qcom_cap.hs_tx_gear = UFS_QCOM_LIMIT_HSGEAR_TX; - ufs_qcom_cap.pwm_rx_gear = UFS_QCOM_LIMIT_PWMGEAR_RX; - ufs_qcom_cap.pwm_tx_gear = UFS_QCOM_LIMIT_PWMGEAR_TX; - ufs_qcom_cap.rx_pwr_pwm = UFS_QCOM_LIMIT_RX_PWR_PWM; - ufs_qcom_cap.tx_pwr_pwm = UFS_QCOM_LIMIT_TX_PWR_PWM; - ufs_qcom_cap.rx_pwr_hs = UFS_QCOM_LIMIT_RX_PWR_HS; - ufs_qcom_cap.tx_pwr_hs = UFS_QCOM_LIMIT_TX_PWR_HS; + ufshcd_init_pwr_dev_param(&ufs_qcom_cap); ufs_qcom_cap.hs_rate = UFS_QCOM_LIMIT_HS_RATE; - ufs_qcom_cap.desired_working_mode = - UFS_QCOM_LIMIT_DESIRED_MODE; if (host->hw_ver.major == 0x1) { /* @@ -758,17 +751,9 @@ static int ufs_qcom_pwr_change_notify(struct ufs_hba *hba, ufs_qcom_dev_ref_clk_ctrl(host, true); if (host->hw_ver.major >= 0x4) { - if (dev_req_params->gear_tx == UFS_HS_G4) { - /* INITIAL ADAPT */ - ufshcd_dme_set(hba, - UIC_ARG_MIB(PA_TXHSADAPTTYPE), - PA_INITIAL_ADAPT); - } else { - /* NO ADAPT */ - ufshcd_dme_set(hba, - UIC_ARG_MIB(PA_TXHSADAPTTYPE), - PA_NO_ADAPT); - } + ufshcd_dme_configure_adapt(hba, + dev_req_params->gear_tx, + PA_INITIAL_ADAPT); } break; case POST_CHANGE: @@ -887,6 +872,7 @@ static void ufs_qcom_set_caps(struct ufs_hba *hba) hba->caps |= UFSHCD_CAP_AUTO_BKOPS_SUSPEND; hba->caps |= UFSHCD_CAP_WB_EN; hba->caps |= UFSHCD_CAP_CRYPTO; + hba->caps |= UFSHCD_CAP_AGGR_POWER_COLLAPSE; if (host->hw_ver.major >= 0x2) { host->caps = UFS_QCOM_CAP_QUNIPRO | @@ -906,7 +892,6 @@ static int ufs_qcom_setup_clocks(struct ufs_hba *hba, bool on, enum ufs_notify_change_status status) { struct ufs_qcom_host *host = ufshcd_get_variant(hba); - int err = 0; /* * In case ufs_qcom_init() is not yet done, simply ignore. @@ -934,7 +919,7 @@ static int ufs_qcom_setup_clocks(struct ufs_hba *hba, bool on, break; } - return err; + return 0; } static int @@ -1001,6 +986,7 @@ static int ufs_qcom_init(struct ufs_hba *hba) struct platform_device *pdev = to_platform_device(dev); struct ufs_qcom_host *host; struct resource *res; + struct ufs_clk_info *clki; if (strlen(android_boot_dev) && strcmp(android_boot_dev, dev_name(dev))) return -ENODEV; @@ -1088,17 +1074,17 @@ static int ufs_qcom_init(struct ufs_hba *hba) if (res) { host->dev_ref_clk_ctrl_mmio = devm_ioremap_resource(dev, res); - if (IS_ERR(host->dev_ref_clk_ctrl_mmio)) { - dev_warn(dev, - "%s: could not map dev_ref_clk_ctrl_mmio, err %ld\n", - __func__, - PTR_ERR(host->dev_ref_clk_ctrl_mmio)); + if (IS_ERR(host->dev_ref_clk_ctrl_mmio)) host->dev_ref_clk_ctrl_mmio = NULL; - } host->dev_ref_clk_en_mask = BIT(5); } } + list_for_each_entry(clki, &hba->clk_list_head, list) { + if (!strcmp(clki->name, "core_clk_unipro")) + clki->keep_link_active = true; + } + err = ufs_qcom_init_lane_clks(host); if (err) goto out_variant_clear; @@ -1573,11 +1559,10 @@ MODULE_DEVICE_TABLE(acpi, ufs_qcom_acpi_match); #endif static const struct dev_pm_ops ufs_qcom_pm_ops = { - .suspend = ufshcd_pltfrm_suspend, - .resume = ufshcd_pltfrm_resume, - .runtime_suspend = ufshcd_pltfrm_runtime_suspend, - .runtime_resume = ufshcd_pltfrm_runtime_resume, - .runtime_idle = ufshcd_pltfrm_runtime_idle, + SET_SYSTEM_SLEEP_PM_OPS(ufshcd_system_suspend, ufshcd_system_resume) + SET_RUNTIME_PM_OPS(ufshcd_runtime_suspend, ufshcd_runtime_resume, NULL) + .prepare = ufshcd_suspend_prepare, + .complete = ufshcd_resume_complete, }; static struct platform_driver ufs_qcom_pltform = { diff --git a/drivers/scsi/ufs/ufs-qcom.h b/drivers/scsi/ufs/ufs-qcom.h index 3f4922743b3e3000eac2b54006a06dda19ae625c..8208e3a3ef59d2b55f02695b645aae80c1c9cf37 100644 --- a/drivers/scsi/ufs/ufs-qcom.h +++ b/drivers/scsi/ufs/ufs-qcom.h @@ -27,18 +27,7 @@ #define SLOW 1 #define FAST 2 -#define UFS_QCOM_LIMIT_NUM_LANES_RX 2 -#define UFS_QCOM_LIMIT_NUM_LANES_TX 2 -#define UFS_QCOM_LIMIT_HSGEAR_RX UFS_HS_G3 -#define UFS_QCOM_LIMIT_HSGEAR_TX UFS_HS_G3 -#define UFS_QCOM_LIMIT_PWMGEAR_RX UFS_PWM_G4 -#define UFS_QCOM_LIMIT_PWMGEAR_TX UFS_PWM_G4 -#define UFS_QCOM_LIMIT_RX_PWR_PWM SLOW_MODE -#define UFS_QCOM_LIMIT_TX_PWR_PWM SLOW_MODE -#define UFS_QCOM_LIMIT_RX_PWR_HS FAST_MODE -#define UFS_QCOM_LIMIT_TX_PWR_HS FAST_MODE #define UFS_QCOM_LIMIT_HS_RATE PA_HS_MODE_B -#define UFS_QCOM_LIMIT_DESIRED_MODE FAST /* QCOM UFS host controller vendor specific registers */ enum { diff --git a/drivers/scsi/ufs/ufs-sysfs.c b/drivers/scsi/ufs/ufs-sysfs.c index 4c02591058c6b56832d8c4c9dc365db6962f1793..f59e569fcbc9a3a076719692754dd4b2cb2950fa 100644 --- a/drivers/scsi/ufs/ufs-sysfs.c +++ b/drivers/scsi/ufs/ufs-sysfs.c @@ -11,7 +11,7 @@ #include -static const char *ufschd_uic_link_state_to_string( +static const char *ufshcd_uic_link_state_to_string( enum uic_link_state state) { switch (state) { @@ -23,13 +23,14 @@ static const char *ufschd_uic_link_state_to_string( } } -static const char *ufschd_ufs_dev_pwr_mode_to_string( +static const char *ufshcd_ufs_dev_pwr_mode_to_string( enum ufs_dev_pwr_mode state) { switch (state) { case UFS_ACTIVE_PWR_MODE: return "ACTIVE"; case UFS_SLEEP_PWR_MODE: return "SLEEP"; case UFS_POWERDOWN_PWR_MODE: return "POWERDOWN"; + case UFS_DEEPSLEEP_PWR_MODE: return "DEEPSLEEP"; default: return "UNKNOWN"; } } @@ -40,6 +41,7 @@ static inline ssize_t ufs_sysfs_pm_lvl_store(struct device *dev, bool rpm) { struct ufs_hba *hba = dev_get_drvdata(dev); + struct ufs_dev_info *dev_info = &hba->dev_info; unsigned long flags, value; if (kstrtoul(buf, 0, &value)) @@ -48,6 +50,11 @@ static inline ssize_t ufs_sysfs_pm_lvl_store(struct device *dev, if (value >= UFS_PM_LVL_MAX) return -EINVAL; + if (ufs_pm_lvl_states[value].dev_state == UFS_DEEPSLEEP_PWR_MODE && + (!(hba->caps & UFSHCD_CAP_DEEPSLEEP) || + !(dev_info->wspecversion >= 0x310))) + return -EINVAL; + spin_lock_irqsave(hba->host->host_lock, flags); if (rpm) hba->rpm_lvl = value; @@ -76,7 +83,7 @@ static ssize_t rpm_target_dev_state_show(struct device *dev, { struct ufs_hba *hba = dev_get_drvdata(dev); - return sysfs_emit(buf, "%s\n", ufschd_ufs_dev_pwr_mode_to_string( + return sysfs_emit(buf, "%s\n", ufshcd_ufs_dev_pwr_mode_to_string( ufs_pm_lvl_states[hba->rpm_lvl].dev_state)); } @@ -85,7 +92,7 @@ static ssize_t rpm_target_link_state_show(struct device *dev, { struct ufs_hba *hba = dev_get_drvdata(dev); - return sysfs_emit(buf, "%s\n", ufschd_uic_link_state_to_string( + return sysfs_emit(buf, "%s\n", ufshcd_uic_link_state_to_string( ufs_pm_lvl_states[hba->rpm_lvl].link_state)); } @@ -108,7 +115,7 @@ static ssize_t spm_target_dev_state_show(struct device *dev, { struct ufs_hba *hba = dev_get_drvdata(dev); - return sysfs_emit(buf, "%s\n", ufschd_ufs_dev_pwr_mode_to_string( + return sysfs_emit(buf, "%s\n", ufshcd_ufs_dev_pwr_mode_to_string( ufs_pm_lvl_states[hba->spm_lvl].dev_state)); } @@ -117,7 +124,7 @@ static ssize_t spm_target_link_state_show(struct device *dev, { struct ufs_hba *hba = dev_get_drvdata(dev); - return sysfs_emit(buf, "%s\n", ufschd_uic_link_state_to_string( + return sysfs_emit(buf, "%s\n", ufshcd_uic_link_state_to_string( ufs_pm_lvl_states[hba->spm_lvl].link_state)); } @@ -149,18 +156,29 @@ static ssize_t auto_hibern8_show(struct device *dev, struct device_attribute *attr, char *buf) { u32 ahit; + int ret; struct ufs_hba *hba = dev_get_drvdata(dev); if (!ufshcd_is_auto_hibern8_supported(hba)) return -EOPNOTSUPP; + down(&hba->host_sem); + if (!ufshcd_is_user_access_allowed(hba)) { + ret = -EBUSY; + goto out; + } + pm_runtime_get_sync(hba->dev); ufshcd_hold(hba, false); ahit = ufshcd_readl(hba, REG_AUTO_HIBERNATE_IDLE_TIMER); ufshcd_release(hba); pm_runtime_put_sync(hba->dev); - return sysfs_emit(buf, "%d\n", ufshcd_ahit_to_us(ahit)); + ret = sysfs_emit(buf, "%d\n", ufshcd_ahit_to_us(ahit)); + +out: + up(&hba->host_sem); + return ret; } static ssize_t auto_hibern8_store(struct device *dev, @@ -169,6 +187,7 @@ static ssize_t auto_hibern8_store(struct device *dev, { struct ufs_hba *hba = dev_get_drvdata(dev); unsigned int timer; + int ret = 0; if (!ufshcd_is_auto_hibern8_supported(hba)) return -EOPNOTSUPP; @@ -179,9 +198,61 @@ static ssize_t auto_hibern8_store(struct device *dev, if (timer > UFSHCI_AHIBERN8_MAX) return -EINVAL; + down(&hba->host_sem); + if (!ufshcd_is_user_access_allowed(hba)) { + ret = -EBUSY; + goto out; + } + ufshcd_auto_hibern8_update(hba, ufshcd_us_to_ahit(timer)); - return count; +out: + up(&hba->host_sem); + return ret ? ret : count; +} + +static ssize_t wb_on_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct ufs_hba *hba = dev_get_drvdata(dev); + + return sysfs_emit(buf, "%d\n", hba->dev_info.wb_enabled); +} + +static ssize_t wb_on_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct ufs_hba *hba = dev_get_drvdata(dev); + unsigned int wb_enable; + ssize_t res; + + if (!ufshcd_is_wb_allowed(hba) || ufshcd_is_clkscaling_supported(hba)) { + /* + * If the platform supports UFSHCD_CAP_CLK_SCALING, turn WB + * on/off will be done while clock scaling up/down. + */ + dev_warn(dev, "To control WB through wb_on is not allowed!\n"); + return -EOPNOTSUPP; + } + + if (kstrtouint(buf, 0, &wb_enable)) + return -EINVAL; + + if (wb_enable != 0 && wb_enable != 1) + return -EINVAL; + + down(&hba->host_sem); + if (!ufshcd_is_user_access_allowed(hba)) { + res = -EBUSY; + goto out; + } + + ufshcd_rpm_get_sync(hba); + res = ufshcd_wb_toggle(hba, wb_enable); + ufshcd_rpm_put_sync(hba); +out: + up(&hba->host_sem); + return res < 0 ? res : count; } static DEVICE_ATTR_RW(rpm_lvl); @@ -191,6 +262,7 @@ static DEVICE_ATTR_RW(spm_lvl); static DEVICE_ATTR_RO(spm_target_dev_state); static DEVICE_ATTR_RO(spm_target_link_state); static DEVICE_ATTR_RW(auto_hibern8); +static DEVICE_ATTR_RW(wb_on); static struct attribute *ufs_sysfs_ufshcd_attrs[] = { &dev_attr_rpm_lvl.attr, @@ -200,6 +272,7 @@ static struct attribute *ufs_sysfs_ufshcd_attrs[] = { &dev_attr_spm_target_dev_state.attr, &dev_attr_spm_target_link_state.attr, &dev_attr_auto_hibern8.attr, + &dev_attr_wb_on.attr, NULL }; @@ -456,12 +529,21 @@ static ssize_t ufs_sysfs_read_desc_param(struct ufs_hba *hba, if (param_size > 8) return -EINVAL; - pm_runtime_get_sync(hba->dev); + down(&hba->host_sem); + if (!ufshcd_is_user_access_allowed(hba)) { + ret = -EBUSY; + goto out; + } + + ufshcd_rpm_get_sync(hba); ret = ufshcd_read_desc_param(hba, desc_id, desc_index, param_offset, desc_buf, param_size); - pm_runtime_put_sync(hba->dev); - if (ret) - return -EINVAL; + ufshcd_rpm_put_sync(hba); + if (ret) { + ret = -EINVAL; + goto out; + } + switch (param_size) { case 1: ret = sysfs_emit(sysfs_buf, "0x%02X\n", *desc_buf); @@ -480,6 +562,8 @@ static ssize_t ufs_sysfs_read_desc_param(struct ufs_hba *hba, break; } +out: + up(&hba->host_sem); return ret; } @@ -834,10 +918,17 @@ static ssize_t _name##_show(struct device *dev, \ int desc_len = QUERY_DESC_MAX_SIZE; \ u8 *desc_buf; \ \ + down(&hba->host_sem); \ + if (!ufshcd_is_user_access_allowed(hba)) { \ + up(&hba->host_sem); \ + return -EBUSY; \ + } \ desc_buf = kzalloc(QUERY_DESC_MAX_SIZE, GFP_ATOMIC); \ - if (!desc_buf) \ - return -ENOMEM; \ - pm_runtime_get_sync(hba->dev); \ + if (!desc_buf) { \ + up(&hba->host_sem); \ + return -ENOMEM; \ + } \ + ufshcd_rpm_get_sync(hba); \ ret = ufshcd_query_descriptor_retry(hba, \ UPIU_QUERY_OPCODE_READ_DESC, QUERY_DESC_IDN_DEVICE, \ 0, 0, desc_buf, &desc_len); \ @@ -852,10 +943,11 @@ static ssize_t _name##_show(struct device *dev, \ SD_ASCII_STD); \ if (ret < 0) \ goto out; \ - ret = sysfs_emit(buf, "%s\n", desc_buf); \ + ret = sysfs_emit(buf, "%s\n", desc_buf); \ out: \ - pm_runtime_put_sync(hba->dev); \ + ufshcd_rpm_put_sync(hba); \ kfree(desc_buf); \ + up(&hba->host_sem); \ return ret; \ } \ static DEVICE_ATTR_RO(_name) @@ -882,8 +974,8 @@ static const struct attribute_group ufs_sysfs_string_descriptors_group = { static inline bool ufshcd_is_wb_flags(enum flag_idn idn) { - return ((idn >= QUERY_FLAG_IDN_WB_EN) && - (idn <= QUERY_FLAG_IDN_WB_BUFF_FLUSH_DURING_HIBERN8)); + return idn >= QUERY_FLAG_IDN_WB_EN && + idn <= QUERY_FLAG_IDN_WB_BUFF_FLUSH_DURING_HIBERN8; } #define UFS_FLAG(_name, _uname) \ @@ -894,15 +986,26 @@ static ssize_t _name##_show(struct device *dev, \ u8 index = 0; \ int ret; \ struct ufs_hba *hba = dev_get_drvdata(dev); \ + \ + down(&hba->host_sem); \ + if (!ufshcd_is_user_access_allowed(hba)) { \ + up(&hba->host_sem); \ + return -EBUSY; \ + } \ if (ufshcd_is_wb_flags(QUERY_FLAG_IDN##_uname)) \ index = ufshcd_wb_get_query_index(hba); \ - pm_runtime_get_sync(hba->dev); \ + ufshcd_rpm_get_sync(hba); \ ret = ufshcd_query_flag(hba, UPIU_QUERY_OPCODE_READ_FLAG, \ QUERY_FLAG_IDN##_uname, index, &flag); \ - pm_runtime_put_sync(hba->dev); \ - if (ret) \ - return -EINVAL; \ - return sysfs_emit(buf, "%s\n", flag ? "true" : "false"); \ + ufshcd_rpm_put_sync(hba); \ + if (ret) { \ + ret = -EINVAL; \ + goto out; \ + } \ + ret = sysfs_emit(buf, "%s\n", flag ? "true" : "false"); \ +out: \ + up(&hba->host_sem); \ + return ret; \ } \ static DEVICE_ATTR_RO(_name) @@ -942,8 +1045,8 @@ static const struct attribute_group ufs_sysfs_flags_group = { static inline bool ufshcd_is_wb_attrs(enum attr_idn idn) { - return ((idn >= QUERY_ATTR_IDN_WB_FLUSH_STATUS) && - (idn <= QUERY_ATTR_IDN_CURR_WB_BUFF_SIZE)); + return idn >= QUERY_ATTR_IDN_WB_FLUSH_STATUS && + idn <= QUERY_ATTR_IDN_CURR_WB_BUFF_SIZE; } #define UFS_ATTRIBUTE(_name, _uname) \ @@ -954,15 +1057,26 @@ static ssize_t _name##_show(struct device *dev, \ u32 value; \ int ret; \ u8 index = 0; \ + \ + down(&hba->host_sem); \ + if (!ufshcd_is_user_access_allowed(hba)) { \ + up(&hba->host_sem); \ + return -EBUSY; \ + } \ if (ufshcd_is_wb_attrs(QUERY_ATTR_IDN##_uname)) \ index = ufshcd_wb_get_query_index(hba); \ - pm_runtime_get_sync(hba->dev); \ + ufshcd_rpm_get_sync(hba); \ ret = ufshcd_query_attr(hba, UPIU_QUERY_OPCODE_READ_ATTR, \ QUERY_ATTR_IDN##_uname, index, 0, &value); \ - pm_runtime_put_sync(hba->dev); \ - if (ret) \ - return -EINVAL; \ - return sysfs_emit(buf, "0x%08X\n", value); \ + ufshcd_rpm_put_sync(hba); \ + if (ret) { \ + ret = -EINVAL; \ + goto out; \ + } \ + ret = sysfs_emit(buf, "0x%08X\n", value); \ +out: \ + up(&hba->host_sem); \ + return ret; \ } \ static DEVICE_ATTR_RO(_name) @@ -1051,6 +1165,7 @@ static DEVICE_ATTR_RO(_pname) #define UFS_UNIT_DESC_PARAM(_name, _uname, _size) \ UFS_LUN_DESC_PARAM(_name, _uname, UNIT, _size) +UFS_UNIT_DESC_PARAM(lu_enable, _LU_ENABLE, 1); UFS_UNIT_DESC_PARAM(boot_lun_id, _BOOT_LUN_ID, 1); UFS_UNIT_DESC_PARAM(lun_write_protect, _LU_WR_PROTECT, 1); UFS_UNIT_DESC_PARAM(lun_queue_depth, _LU_Q_DEPTH, 1); @@ -1069,8 +1184,8 @@ UFS_UNIT_DESC_PARAM(hpb_pinned_region_start_offset, _HPB_PIN_RGN_START_OFF, 2); UFS_UNIT_DESC_PARAM(hpb_number_pinned_regions, _HPB_NUM_PIN_RGNS, 2); UFS_UNIT_DESC_PARAM(wb_buf_alloc_units, _WB_BUF_ALLOC_UNITS, 4); - static struct attribute *ufs_sysfs_unit_descriptor[] = { + &dev_attr_lu_enable.attr, &dev_attr_boot_lun_id.attr, &dev_attr_lun_write_protect.attr, &dev_attr_lun_queue_depth.attr, @@ -1105,13 +1220,26 @@ static ssize_t dyn_cap_needed_attribute_show(struct device *dev, u8 lun = ufshcd_scsi_to_upiu_lun(sdev->lun); int ret; - pm_runtime_get_sync(hba->dev); + down(&hba->host_sem); + if (!ufshcd_is_user_access_allowed(hba)) { + ret = -EBUSY; + goto out; + } + + ufshcd_rpm_get_sync(hba); ret = ufshcd_query_attr(hba, UPIU_QUERY_OPCODE_READ_ATTR, QUERY_ATTR_IDN_DYN_CAP_NEEDED, lun, 0, &value); - pm_runtime_put_sync(hba->dev); - if (ret) - return -EINVAL; - return sysfs_emit(buf, "0x%08X\n", value); + ufshcd_rpm_put_sync(hba); + if (ret) { + ret = -EINVAL; + goto out; + } + + ret = sysfs_emit(buf, "0x%08X\n", value); + +out: + up(&hba->host_sem); + return ret; } static DEVICE_ATTR_RO(dyn_cap_needed_attribute); diff --git a/drivers/scsi/ufs/ufs.h b/drivers/scsi/ufs/ufs.h index 1ee80cda3d01cb532f5af42d3c53cb7e4316f4ee..280c56db3b12672dd3a59ed1cdb03eff50e645d0 100644 --- a/drivers/scsi/ufs/ufs.h +++ b/drivers/scsi/ufs/ufs.h @@ -44,6 +44,12 @@ /* WriteBooster buffer is available only for the logical unit from 0 to 7 */ #define UFS_UPIU_MAX_WB_LUN_ID 8 +/* + * WriteBooster buffer lifetime has a limit setted by vendor. + * If it is over the limit, WriteBooster feature will be disabled. + */ +#define UFS_WB_EXCEED_LIFETIME 0x0B + /* Well known logical unit id in LUN field of UPIU */ enum { UFS_UPIU_REPORT_LUNS_WLUN = 0x81, @@ -153,6 +159,9 @@ enum attr_idn { QUERY_ATTR_IDN_PSA_STATE = 0x15, QUERY_ATTR_IDN_PSA_DATA_SIZE = 0x16, QUERY_ATTR_IDN_REF_CLK_GATING_WAIT_TIME = 0x17, + QUERY_ATTR_IDN_CASE_ROUGH_TEMP = 0x18, + QUERY_ATTR_IDN_HIGH_TEMP_BOUND = 0x19, + QUERY_ATTR_IDN_LOW_TEMP_BOUND = 0x1A, QUERY_ATTR_IDN_WB_FLUSH_STATUS = 0x1C, QUERY_ATTR_IDN_AVAIL_WB_BUFF_SIZE = 0x1D, QUERY_ATTR_IDN_WB_BUFF_LIFE_TIME_EST = 0x1E, @@ -339,12 +348,14 @@ enum { /* Possible values for dExtendedUFSFeaturesSupport */ enum { + UFS_DEV_LOW_TEMP_NOTIF = BIT(4), + UFS_DEV_HIGH_TEMP_NOTIF = BIT(5), + UFS_DEV_EXT_TEMP_NOTIF = BIT(6), UFS_DEV_HPB_SUPPORT = BIT(7), UFS_DEV_WRITE_BOOSTER_SUP = BIT(8), }; #define UFS_DEV_HPB_SUPPORT_VERSION 0x310 -#define POWER_DESC_MAX_SIZE 0x62 #define POWER_DESC_MAX_ACTV_ICC_LVLS 16 /* Attribute bActiveICCLevel parameter bit masks definitions */ @@ -363,9 +374,16 @@ enum power_desc_param_offset { /* Exception event mask values */ enum { - MASK_EE_STATUS = 0xFFFF, - MASK_EE_URGENT_BKOPS = (1 << 2), + MASK_EE_STATUS = 0xFFFF, + MASK_EE_DYNCAP_EVENT = BIT(0), + MASK_EE_SYSPOOL_EVENT = BIT(1), + MASK_EE_URGENT_BKOPS = BIT(2), + MASK_EE_TOO_HIGH_TEMP = BIT(3), + MASK_EE_TOO_LOW_TEMP = BIT(4), + MASK_EE_WRITEBOOSTER_EVENT = BIT(5), + MASK_EE_PERFORMANCE_THROTTLING = BIT(6), }; +#define MASK_EE_URGENT_TEMP (MASK_EE_TOO_HIGH_TEMP | MASK_EE_TOO_LOW_TEMP) /* Background operation status */ enum bkops_status { @@ -456,6 +474,7 @@ enum ufs_dev_pwr_mode { UFS_ACTIVE_PWR_MODE = 1, UFS_SLEEP_PWR_MODE = 2, UFS_POWERDOWN_PWR_MODE = 3, + UFS_DEEPSLEEP_PWR_MODE = 4, }; #define UFS_WB_BUF_REMAIN_PERCENT(val) ((val) / 10) @@ -577,27 +596,49 @@ struct ufs_vreg_info { }; struct ufs_dev_info { - bool f_power_on_wp_en; + bool f_power_on_wp_en; /* Keeps information if any of the LU is power on write protected */ - bool is_lu_power_on_wp; + bool is_lu_power_on_wp; /* Maximum number of general LU supported by the UFS device */ - u8 max_lu_supported; - u8 wb_dedicated_lu; - u16 wmanufacturerid; + u8 max_lu_supported; + u16 wmanufacturerid; /*UFS device Product Name */ - u8 *model; - u16 wspecversion; - u32 clk_gating_wait_us; - u32 d_ext_ufs_feature_sup; - u8 b_wb_buffer_type; - u32 d_wb_alloc_units; - bool b_rpm_dev_flush_capable; - u8 b_presrv_uspc_en; + u8 *model; + u16 wspecversion; + u32 clk_gating_wait_us; + /* UFS HPB related flag */ bool hpb_enabled; + + /* UFS WB related flags */ + bool wb_enabled; + bool wb_buf_flush_enabled; + u8 wb_dedicated_lu; + u8 wb_buffer_type; + + bool b_rpm_dev_flush_capable; + u8 b_presrv_uspc_en; + ANDROID_KABI_RESERVE(1); }; +/* + * This enum is used in string mapping in include/trace/events/ufs.h. + */ +enum ufs_trace_str_t { + UFS_CMD_SEND, UFS_CMD_COMP, UFS_DEV_COMP, + UFS_QUERY_SEND, UFS_QUERY_COMP, UFS_QUERY_ERR, + UFS_TM_SEND, UFS_TM_COMP, UFS_TM_ERR +}; + +/* + * Transaction Specific Fields (TSF) type in the UPIU package, this enum is + * used in include/trace/events/ufs.h for UFS command trace. + */ +enum ufs_trace_tsf_t { + UFS_TSF_CDB, UFS_TSF_OSF, UFS_TSF_TM_INPUT, UFS_TSF_TM_OUTPUT +}; + /** * ufs_is_valid_unit_desc_lun - checks if the given LUN has a unit descriptor * @dev_info: pointer of instance of struct ufs_dev_info diff --git a/drivers/scsi/ufs/ufs_bsg.c b/drivers/scsi/ufs/ufs_bsg.c index 5b2bc1a6f9226c2b1cfd925b9a6d83d014f9f839..39bf204c6ec3e4e63e2aedce487e798f075fb1d7 100644 --- a/drivers/scsi/ufs/ufs_bsg.c +++ b/drivers/scsi/ufs/ufs_bsg.c @@ -97,7 +97,7 @@ static int ufs_bsg_request(struct bsg_job *job) bsg_reply->reply_payload_rcv_len = 0; - pm_runtime_get_sync(hba->dev); + ufshcd_rpm_get_sync(hba); msgcode = bsg_request->msgcode; switch (msgcode) { @@ -106,7 +106,7 @@ static int ufs_bsg_request(struct bsg_job *job) ret = ufs_bsg_alloc_desc_buffer(hba, job, &desc_buff, &desc_len, desc_op); if (ret) { - pm_runtime_put_sync(hba->dev); + ufshcd_rpm_put_sync(hba); goto out; } @@ -138,7 +138,7 @@ static int ufs_bsg_request(struct bsg_job *job) break; } - pm_runtime_put_sync(hba->dev); + ufshcd_rpm_put_sync(hba); if (!desc_buff) goto out; diff --git a/drivers/scsi/ufs/ufs_quirks.h b/drivers/scsi/ufs/ufs_quirks.h index 07f559ac5883a9011c01cc426b0307a199f0be9a..35ec9ea79869af3c69e1c5fd7eccab81da528b32 100644 --- a/drivers/scsi/ufs/ufs_quirks.h +++ b/drivers/scsi/ufs/ufs_quirks.h @@ -116,4 +116,10 @@ struct ufs_dev_fix { */ #define UFS_DEVICE_QUIRK_DELAY_AFTER_LPM (1 << 11) +/* + * Some UFS devices require L2P entry should be swapped before being sent to the + * UFS device for HPB READ command. + */ +#define UFS_DEVICE_QUIRK_SWAP_L2P_ENTRY_FOR_HPB_READ (1 << 12) + #endif /* UFS_QUIRKS_H_ */ diff --git a/drivers/scsi/ufs/ufshcd-crypto.c b/drivers/scsi/ufs/ufshcd-crypto.c index 7604c0744746413a0028c4510a3327263d5d0d85..072ef2790a71c81dbb1c657962cc20072f8aac8f 100644 --- a/drivers/scsi/ufs/ufshcd-crypto.c +++ b/drivers/scsi/ufs/ufshcd-crypto.c @@ -198,7 +198,7 @@ int ufshcd_hba_init_crypto_capabilities(struct ufs_hba *hba) err = devm_blk_ksm_init(hba->dev, &hba->ksm, hba->crypto_capabilities.config_count + 1); if (err) - goto out_free_caps; + goto out; hba->ksm.ksm_ll_ops = ufshcd_ksm_ops; /* UFS only supports 8 bytes for any DUN */ @@ -225,8 +225,6 @@ int ufshcd_hba_init_crypto_capabilities(struct ufs_hba *hba) return 0; -out_free_caps: - devm_kfree(hba->dev, hba->crypto_cap_array); out: /* Indicate that init failed by clearing UFSHCD_CAP_CRYPTO */ hba->caps &= ~UFSHCD_CAP_CRYPTO; diff --git a/drivers/scsi/ufs/ufshcd-dwc.c b/drivers/scsi/ufs/ufshcd-dwc.c index 6a901da2d15a15b14fc6179c098ff4c937d339f9..5bb9d3a88795c305e2fb73e248b5b1132bfa4e8f 100644 --- a/drivers/scsi/ufs/ufshcd-dwc.c +++ b/drivers/scsi/ufs/ufshcd-dwc.c @@ -120,13 +120,10 @@ int ufshcd_dwc_link_startup_notify(struct ufs_hba *hba, if (status == PRE_CHANGE) { ufshcd_dwc_program_clk_div(hba, DWC_UFS_REG_HCLKDIV_DIV_125); - if (hba->vops->phy_initialization) { - err = hba->vops->phy_initialization(hba); - if (err) { - dev_err(hba->dev, "Phy setup failed (%d)\n", - err); - goto out; - } + err = ufshcd_vops_phy_initialization(hba); + if (err) { + dev_err(hba->dev, "Phy setup failed (%d)\n", err); + goto out; } } else { /* POST_CHANGE */ err = ufshcd_dwc_link_is_up(hba); diff --git a/drivers/scsi/ufs/ufshcd-pci.c b/drivers/scsi/ufs/ufshcd-pci.c index fadd566025b86ab4ffe18aefd366823ea6f1ae98..f76692053ca17813a1d237fb35aed0e327a832ea 100644 --- a/drivers/scsi/ufs/ufshcd-pci.c +++ b/drivers/scsi/ufs/ufshcd-pci.c @@ -15,13 +15,89 @@ #include #include #include +#include +#include +#include + +struct ufs_host { + void (*late_init)(struct ufs_hba *hba); +}; + +enum { + INTEL_DSM_FNS = 0, + INTEL_DSM_RESET = 1, +}; struct intel_host { + struct ufs_host ufs_host; + u32 dsm_fns; u32 active_ltr; u32 idle_ltr; struct dentry *debugfs_root; + struct gpio_desc *reset_gpio; }; +static const guid_t intel_dsm_guid = + GUID_INIT(0x1A4832A0, 0x7D03, 0x43CA, + 0xB0, 0x20, 0xF6, 0xDC, 0xD1, 0x2A, 0x19, 0x50); + +static int __intel_dsm(struct intel_host *intel_host, struct device *dev, + unsigned int fn, u32 *result) +{ + union acpi_object *obj; + int err = 0; + size_t len; + + obj = acpi_evaluate_dsm(ACPI_HANDLE(dev), &intel_dsm_guid, 0, fn, NULL); + if (!obj) + return -EOPNOTSUPP; + + if (obj->type != ACPI_TYPE_BUFFER || obj->buffer.length < 1) { + err = -EINVAL; + goto out; + } + + len = min_t(size_t, obj->buffer.length, 4); + + *result = 0; + memcpy(result, obj->buffer.pointer, len); +out: + ACPI_FREE(obj); + + return err; +} + +static int intel_dsm(struct intel_host *intel_host, struct device *dev, + unsigned int fn, u32 *result) +{ + if (fn > 31 || !(intel_host->dsm_fns & (1 << fn))) + return -EOPNOTSUPP; + + return __intel_dsm(intel_host, dev, fn, result); +} + +static void intel_dsm_init(struct intel_host *intel_host, struct device *dev) +{ + int err; + + err = __intel_dsm(intel_host, dev, INTEL_DSM_FNS, &intel_host->dsm_fns); + dev_dbg(dev, "DSM fns %#x, error %d\n", intel_host->dsm_fns, err); +} + +static int ufs_intel_hce_enable_notify(struct ufs_hba *hba, + enum ufs_notify_change_status status) +{ + /* Cannot enable ICE until after HC enable */ + if (status == POST_CHANGE && hba->caps & UFSHCD_CAP_CRYPTO) { + u32 hce = ufshcd_readl(hba, REG_CONTROLLER_ENABLE); + + hce |= CRYPTO_GENERAL_ENABLE; + ufshcd_writel(hba, hce, REG_CONTROLLER_ENABLE); + } + + return 0; +} + static int ufs_intel_disable_lcc(struct ufs_hba *hba) { u32 attr = UIC_ARG_MIB(PA_LOCAL_TX_LCC_ENABLE); @@ -52,6 +128,81 @@ static int ufs_intel_link_startup_notify(struct ufs_hba *hba, return err; } +static int ufs_intel_set_lanes(struct ufs_hba *hba, u32 lanes) +{ + struct ufs_pa_layer_attr pwr_info = hba->pwr_info; + int ret; + + pwr_info.lane_rx = lanes; + pwr_info.lane_tx = lanes; + ret = ufshcd_config_pwr_mode(hba, &pwr_info); + if (ret) + dev_err(hba->dev, "%s: Setting %u lanes, err = %d\n", + __func__, lanes, ret); + return ret; +} + +static int ufs_intel_lkf_pwr_change_notify(struct ufs_hba *hba, + enum ufs_notify_change_status status, + struct ufs_pa_layer_attr *dev_max_params, + struct ufs_pa_layer_attr *dev_req_params) +{ + int err = 0; + + switch (status) { + case PRE_CHANGE: + if (ufshcd_is_hs_mode(dev_max_params) && + (hba->pwr_info.lane_rx != 2 || hba->pwr_info.lane_tx != 2)) + ufs_intel_set_lanes(hba, 2); + memcpy(dev_req_params, dev_max_params, sizeof(*dev_req_params)); + break; + case POST_CHANGE: + if (ufshcd_is_hs_mode(dev_req_params)) { + u32 peer_granularity; + + usleep_range(1000, 1250); + err = ufshcd_dme_peer_get(hba, UIC_ARG_MIB(PA_GRANULARITY), + &peer_granularity); + } + break; + default: + break; + } + + return err; +} + +static int ufs_intel_lkf_apply_dev_quirks(struct ufs_hba *hba) +{ + u32 granularity, peer_granularity; + u32 pa_tactivate, peer_pa_tactivate; + int ret; + + ret = ufshcd_dme_get(hba, UIC_ARG_MIB(PA_GRANULARITY), &granularity); + if (ret) + goto out; + + ret = ufshcd_dme_peer_get(hba, UIC_ARG_MIB(PA_GRANULARITY), &peer_granularity); + if (ret) + goto out; + + ret = ufshcd_dme_get(hba, UIC_ARG_MIB(PA_TACTIVATE), &pa_tactivate); + if (ret) + goto out; + + ret = ufshcd_dme_peer_get(hba, UIC_ARG_MIB(PA_TACTIVATE), &peer_pa_tactivate); + if (ret) + goto out; + + if (granularity == peer_granularity) { + u32 new_peer_pa_tactivate = pa_tactivate + 2; + + ret = ufshcd_dme_peer_set(hba, UIC_ARG_MIB(PA_TACTIVATE), new_peer_pa_tactivate); + } +out: + return ret; +} + #define INTEL_ACTIVELTR 0x804 #define INTEL_IDLELTR 0x808 @@ -144,6 +295,41 @@ static void intel_remove_debugfs(struct ufs_hba *hba) debugfs_remove_recursive(host->debugfs_root); } +static int ufs_intel_device_reset(struct ufs_hba *hba) +{ + struct intel_host *host = ufshcd_get_variant(hba); + + if (host->dsm_fns & INTEL_DSM_RESET) { + u32 result = 0; + int err; + + err = intel_dsm(host, hba->dev, INTEL_DSM_RESET, &result); + if (!err && !result) + err = -EIO; + if (err) + dev_err(hba->dev, "%s: DSM error %d result %u\n", + __func__, err, result); + return err; + } + + if (!host->reset_gpio) + return -EOPNOTSUPP; + + gpiod_set_value_cansleep(host->reset_gpio, 1); + usleep_range(10, 15); + + gpiod_set_value_cansleep(host->reset_gpio, 0); + usleep_range(10, 15); + + return 0; +} + +static struct gpio_desc *ufs_intel_get_reset_gpio(struct device *dev) +{ + /* GPIO in _DSD has active low setting */ + return devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_LOW); +} + static int ufs_intel_common_init(struct ufs_hba *hba) { struct intel_host *host; @@ -154,6 +340,23 @@ static int ufs_intel_common_init(struct ufs_hba *hba) if (!host) return -ENOMEM; ufshcd_set_variant(hba, host); + intel_dsm_init(host, hba->dev); + if (host->dsm_fns & INTEL_DSM_RESET) { + if (hba->vops->device_reset) + hba->caps |= UFSHCD_CAP_DEEPSLEEP; + } else { + if (hba->vops->device_reset) + host->reset_gpio = ufs_intel_get_reset_gpio(hba->dev); + if (IS_ERR(host->reset_gpio)) { + dev_err(hba->dev, "%s: failed to get reset GPIO, error %ld\n", + __func__, PTR_ERR(host->reset_gpio)); + host->reset_gpio = NULL; + } + if (host->reset_gpio) { + gpiod_set_value_cansleep(host->reset_gpio, 0); + hba->caps |= UFSHCD_CAP_DEEPSLEEP; + } + } intel_ltr_expose(hba->dev); intel_add_debugfs(hba); return 0; @@ -167,20 +370,6 @@ static void ufs_intel_common_exit(struct ufs_hba *hba) static int ufs_intel_resume(struct ufs_hba *hba, enum ufs_pm_op op) { - /* - * To support S4 (suspend-to-disk) with spm_lvl other than 5, the base - * address registers must be restored because the restore kernel can - * have used different addresses. - */ - ufshcd_writel(hba, lower_32_bits(hba->utrdl_dma_addr), - REG_UTP_TRANSFER_REQ_LIST_BASE_L); - ufshcd_writel(hba, upper_32_bits(hba->utrdl_dma_addr), - REG_UTP_TRANSFER_REQ_LIST_BASE_H); - ufshcd_writel(hba, lower_32_bits(hba->utmrdl_dma_addr), - REG_UTP_TASK_REQ_LIST_BASE_L); - ufshcd_writel(hba, upper_32_bits(hba->utmrdl_dma_addr), - REG_UTP_TASK_REQ_LIST_BASE_H); - if (ufshcd_is_link_hibern8(hba)) { int ret = ufshcd_uic_hibern8_exit(hba); @@ -206,6 +395,39 @@ static int ufs_intel_ehl_init(struct ufs_hba *hba) return ufs_intel_common_init(hba); } +static void ufs_intel_lkf_late_init(struct ufs_hba *hba) +{ + /* LKF always needs a full reset, so set PM accordingly */ + if (hba->caps & UFSHCD_CAP_DEEPSLEEP) { + hba->spm_lvl = UFS_PM_LVL_6; + hba->rpm_lvl = UFS_PM_LVL_6; + } else { + hba->spm_lvl = UFS_PM_LVL_5; + hba->rpm_lvl = UFS_PM_LVL_5; + } +} + +static int ufs_intel_lkf_init(struct ufs_hba *hba) +{ + struct ufs_host *ufs_host; + int err; + + hba->nop_out_timeout = 200; + hba->quirks |= UFSHCD_QUIRK_BROKEN_AUTO_HIBERN8; + hba->caps |= UFSHCD_CAP_CRYPTO; + err = ufs_intel_common_init(hba); + ufs_host = ufshcd_get_variant(hba); + ufs_host->late_init = ufs_intel_lkf_late_init; + return err; +} + +static int ufs_intel_adl_init(struct ufs_hba *hba) +{ + hba->nop_out_timeout = 200; + hba->quirks |= UFSHCD_QUIRK_BROKEN_AUTO_HIBERN8; + return ufs_intel_common_init(hba); +} + static struct ufs_hba_variant_ops ufs_intel_cnl_hba_vops = { .name = "intel-pci", .init = ufs_intel_common_init, @@ -222,70 +444,38 @@ static struct ufs_hba_variant_ops ufs_intel_ehl_hba_vops = { .resume = ufs_intel_resume, }; -#ifdef CONFIG_PM_SLEEP -/** - * ufshcd_pci_suspend - suspend power management function - * @dev: pointer to PCI device handle - * - * Returns 0 if successful - * Returns non-zero otherwise - */ -static int ufshcd_pci_suspend(struct device *dev) -{ - return ufshcd_system_suspend(dev_get_drvdata(dev)); -} +static struct ufs_hba_variant_ops ufs_intel_lkf_hba_vops = { + .name = "intel-pci", + .init = ufs_intel_lkf_init, + .exit = ufs_intel_common_exit, + .hce_enable_notify = ufs_intel_hce_enable_notify, + .link_startup_notify = ufs_intel_link_startup_notify, + .pwr_change_notify = ufs_intel_lkf_pwr_change_notify, + .apply_dev_quirks = ufs_intel_lkf_apply_dev_quirks, + .resume = ufs_intel_resume, + .device_reset = ufs_intel_device_reset, +}; -/** - * ufshcd_pci_resume - resume power management function - * @dev: pointer to PCI device handle - * - * Returns 0 if successful - * Returns non-zero otherwise - */ -static int ufshcd_pci_resume(struct device *dev) -{ - return ufshcd_system_resume(dev_get_drvdata(dev)); -} +static struct ufs_hba_variant_ops ufs_intel_adl_hba_vops = { + .name = "intel-pci", + .init = ufs_intel_adl_init, + .exit = ufs_intel_common_exit, + .link_startup_notify = ufs_intel_link_startup_notify, + .resume = ufs_intel_resume, + .device_reset = ufs_intel_device_reset, +}; -/** - * ufshcd_pci_poweroff - suspend-to-disk poweroff function - * @dev: pointer to PCI device handle - * - * Returns 0 if successful - * Returns non-zero otherwise - */ -static int ufshcd_pci_poweroff(struct device *dev) +#ifdef CONFIG_PM_SLEEP +static int ufshcd_pci_restore(struct device *dev) { struct ufs_hba *hba = dev_get_drvdata(dev); - int spm_lvl = hba->spm_lvl; - int ret; - /* - * For poweroff we need to set the UFS device to PowerDown mode. - * Force spm_lvl to ensure that. - */ - hba->spm_lvl = 5; - ret = ufshcd_system_suspend(hba); - hba->spm_lvl = spm_lvl; - return ret; -} + /* Force a full reset and restore */ + ufshcd_set_link_off(hba); -#endif /* !CONFIG_PM_SLEEP */ - -#ifdef CONFIG_PM -static int ufshcd_pci_runtime_suspend(struct device *dev) -{ - return ufshcd_runtime_suspend(dev_get_drvdata(dev)); + return ufshcd_system_resume(dev); } -static int ufshcd_pci_runtime_resume(struct device *dev) -{ - return ufshcd_runtime_resume(dev_get_drvdata(dev)); -} -static int ufshcd_pci_runtime_idle(struct device *dev) -{ - return ufshcd_runtime_idle(dev_get_drvdata(dev)); -} -#endif /* !CONFIG_PM */ +#endif /** * ufshcd_pci_shutdown - main function to put the controller in reset state @@ -321,6 +511,7 @@ static void ufshcd_pci_remove(struct pci_dev *pdev) static int ufshcd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { + struct ufs_host *ufs_host; struct ufs_hba *hba; void __iomem *mmio_base; int err; @@ -347,8 +538,6 @@ ufshcd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) return err; } - pci_set_drvdata(pdev, hba); - hba->vops = (struct ufs_hba_variant_ops *)id->driver_data; err = ufshcd_init(hba, mmio_base, pdev->irq); @@ -358,6 +547,10 @@ ufshcd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) return err; } + ufs_host = ufshcd_get_variant(hba); + if (ufs_host && ufs_host->late_init) + ufs_host->late_init(hba); + pm_runtime_put_noidle(&pdev->dev); pm_runtime_allow(&pdev->dev); @@ -365,17 +558,17 @@ ufshcd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) } static const struct dev_pm_ops ufshcd_pci_pm_ops = { + SET_RUNTIME_PM_OPS(ufshcd_runtime_suspend, ufshcd_runtime_resume, NULL) #ifdef CONFIG_PM_SLEEP - .suspend = ufshcd_pci_suspend, - .resume = ufshcd_pci_resume, - .freeze = ufshcd_pci_suspend, - .thaw = ufshcd_pci_resume, - .poweroff = ufshcd_pci_poweroff, - .restore = ufshcd_pci_resume, + .suspend = ufshcd_system_suspend, + .resume = ufshcd_system_resume, + .freeze = ufshcd_system_suspend, + .thaw = ufshcd_system_resume, + .poweroff = ufshcd_system_suspend, + .restore = ufshcd_pci_restore, + .prepare = ufshcd_suspend_prepare, + .complete = ufshcd_resume_complete, #endif - SET_RUNTIME_PM_OPS(ufshcd_pci_runtime_suspend, - ufshcd_pci_runtime_resume, - ufshcd_pci_runtime_idle) }; static const struct pci_device_id ufshcd_pci_tbl[] = { @@ -383,6 +576,9 @@ static const struct pci_device_id ufshcd_pci_tbl[] = { { PCI_VDEVICE(INTEL, 0x9DFA), (kernel_ulong_t)&ufs_intel_cnl_hba_vops }, { PCI_VDEVICE(INTEL, 0x4B41), (kernel_ulong_t)&ufs_intel_ehl_hba_vops }, { PCI_VDEVICE(INTEL, 0x4B43), (kernel_ulong_t)&ufs_intel_ehl_hba_vops }, + { PCI_VDEVICE(INTEL, 0x98FA), (kernel_ulong_t)&ufs_intel_lkf_hba_vops }, + { PCI_VDEVICE(INTEL, 0x51FF), (kernel_ulong_t)&ufs_intel_adl_hba_vops }, + { PCI_VDEVICE(INTEL, 0x54FF), (kernel_ulong_t)&ufs_intel_adl_hba_vops }, { } /* terminate list */ }; diff --git a/drivers/scsi/ufs/ufshcd-pltfrm.c b/drivers/scsi/ufs/ufshcd-pltfrm.c index 4839a109216c6c2ae7778c5eabd11da1f53984da..87975d1a21c8b1f2f227355181bd5b3d546e0d93 100644 --- a/drivers/scsi/ufs/ufshcd-pltfrm.c +++ b/drivers/scsi/ufs/ufshcd-pltfrm.c @@ -91,7 +91,12 @@ static int ufshcd_parse_clock_info(struct ufs_hba *hba) clki->min_freq = clkfreq[i]; clki->max_freq = clkfreq[i+1]; - clki->name = kstrdup(name, GFP_KERNEL); + clki->name = devm_kstrdup(dev, name, GFP_KERNEL); + if (!clki->name) { + ret = -ENOMEM; + goto out; + } + if (!strcmp(name, "ref_clk")) clki->keep_link_active = true; dev_dbg(dev, "%s: min %u max %u name %s\n", "freq-table-hz", @@ -106,7 +111,6 @@ out: static int ufshcd_populate_vreg(struct device *dev, const char *name, struct ufs_vreg **out_vreg) { - int ret = 0; char prop_name[MAX_PROP_SIZE]; struct ufs_vreg *vreg = NULL; struct device_node *np = dev->of_node; @@ -127,7 +131,9 @@ static int ufshcd_populate_vreg(struct device *dev, const char *name, if (!vreg) return -ENOMEM; - vreg->name = kstrdup(name, GFP_KERNEL); + vreg->name = devm_kstrdup(dev, name, GFP_KERNEL); + if (!vreg->name) + return -ENOMEM; snprintf(prop_name, MAX_PROP_SIZE, "%s-max-microamp", name); if (of_property_read_u32(np, prop_name, &vreg->max_uA)) { @@ -135,9 +141,8 @@ static int ufshcd_populate_vreg(struct device *dev, const char *name, vreg->max_uA = 0; } out: - if (!ret) - *out_vreg = vreg; - return ret; + *out_vreg = vreg; + return 0; } /** @@ -172,53 +177,6 @@ out: return err; } -#ifdef CONFIG_PM -/** - * ufshcd_pltfrm_suspend - suspend power management function - * @dev: pointer to device handle - * - * Returns 0 if successful - * Returns non-zero otherwise - */ -int ufshcd_pltfrm_suspend(struct device *dev) -{ - return ufshcd_system_suspend(dev_get_drvdata(dev)); -} -EXPORT_SYMBOL_GPL(ufshcd_pltfrm_suspend); - -/** - * ufshcd_pltfrm_resume - resume power management function - * @dev: pointer to device handle - * - * Returns 0 if successful - * Returns non-zero otherwise - */ -int ufshcd_pltfrm_resume(struct device *dev) -{ - return ufshcd_system_resume(dev_get_drvdata(dev)); -} -EXPORT_SYMBOL_GPL(ufshcd_pltfrm_resume); - -int ufshcd_pltfrm_runtime_suspend(struct device *dev) -{ - return ufshcd_runtime_suspend(dev_get_drvdata(dev)); -} -EXPORT_SYMBOL_GPL(ufshcd_pltfrm_runtime_suspend); - -int ufshcd_pltfrm_runtime_resume(struct device *dev) -{ - return ufshcd_runtime_resume(dev_get_drvdata(dev)); -} -EXPORT_SYMBOL_GPL(ufshcd_pltfrm_runtime_resume); - -int ufshcd_pltfrm_runtime_idle(struct device *dev) -{ - return ufshcd_runtime_idle(dev_get_drvdata(dev)); -} -EXPORT_SYMBOL_GPL(ufshcd_pltfrm_runtime_idle); - -#endif /* CONFIG_PM */ - void ufshcd_pltfrm_shutdown(struct platform_device *pdev) { ufshcd_shutdown((struct ufs_hba *)platform_get_drvdata(pdev)); @@ -337,6 +295,23 @@ int ufshcd_get_pwr_dev_param(struct ufs_dev_params *pltfrm_param, } EXPORT_SYMBOL_GPL(ufshcd_get_pwr_dev_param); +void ufshcd_init_pwr_dev_param(struct ufs_dev_params *dev_param) +{ + dev_param->tx_lanes = 2; + dev_param->rx_lanes = 2; + dev_param->hs_rx_gear = UFS_HS_G3; + dev_param->hs_tx_gear = UFS_HS_G3; + dev_param->pwm_rx_gear = UFS_PWM_G4; + dev_param->pwm_tx_gear = UFS_PWM_G4; + dev_param->rx_pwr_pwm = SLOW_MODE; + dev_param->tx_pwr_pwm = SLOW_MODE; + dev_param->rx_pwr_hs = FAST_MODE; + dev_param->tx_pwr_hs = FAST_MODE; + dev_param->hs_rate = PA_HS_MODE_B; + dev_param->desired_working_mode = UFS_HS_MODE; +} +EXPORT_SYMBOL_GPL(ufshcd_init_pwr_dev_param); + /** * ufshcd_pltfrm_init - probe routine of the driver * @pdev: pointer to Platform device handle @@ -393,8 +368,6 @@ int ufshcd_pltfrm_init(struct platform_device *pdev, goto dealloc_host; } - platform_set_drvdata(pdev, hba); - pm_runtime_set_active(&pdev->dev); pm_runtime_enable(&pdev->dev); diff --git a/drivers/scsi/ufs/ufshcd-pltfrm.h b/drivers/scsi/ufs/ufshcd-pltfrm.h index b79cdf9129a08ad46cf65075001bf1f74df9c514..c33e28ac6ef621d310b82178afb68dbca4253c5a 100644 --- a/drivers/scsi/ufs/ufshcd-pltfrm.h +++ b/drivers/scsi/ufs/ufshcd-pltfrm.h @@ -28,26 +28,9 @@ struct ufs_dev_params { int ufshcd_get_pwr_dev_param(struct ufs_dev_params *dev_param, struct ufs_pa_layer_attr *dev_max, struct ufs_pa_layer_attr *agreed_pwr); +void ufshcd_init_pwr_dev_param(struct ufs_dev_params *dev_param); int ufshcd_pltfrm_init(struct platform_device *pdev, const struct ufs_hba_variant_ops *vops); void ufshcd_pltfrm_shutdown(struct platform_device *pdev); -#ifdef CONFIG_PM - -int ufshcd_pltfrm_suspend(struct device *dev); -int ufshcd_pltfrm_resume(struct device *dev); -int ufshcd_pltfrm_runtime_suspend(struct device *dev); -int ufshcd_pltfrm_runtime_resume(struct device *dev); -int ufshcd_pltfrm_runtime_idle(struct device *dev); - -#else /* !CONFIG_PM */ - -#define ufshcd_pltfrm_suspend NULL -#define ufshcd_pltfrm_resume NULL -#define ufshcd_pltfrm_runtime_suspend NULL -#define ufshcd_pltfrm_runtime_resume NULL -#define ufshcd_pltfrm_runtime_idle NULL - -#endif /* CONFIG_PM */ - #endif /* UFSHCD_PLTFRM_H_ */ diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index eb2baf3b69b67c39458edfa4cebbac98e06d29f1..9b0f7e76c0f3b22fecfc29c7f2404af3c33260d7 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -16,16 +16,17 @@ #include #include #include +#include #include "ufshcd.h" #include "ufs_quirks.h" #include "unipro.h" #include "ufs-sysfs.h" #include "ufs-debugfs.h" +#include "ufs-fault-injection.h" #include "ufs_bsg.h" #include "ufshcd-crypto.h" #include "ufshpb.h" #include -#include #define CREATE_TRACE_POINTS #include @@ -64,6 +65,9 @@ /* maximum number of reset retries before giving up */ #define MAX_HOST_RESET_RETRIES 5 +/* Maximum number of error handler retries before giving up */ +#define MAX_ERR_HANDLER_RETRIES 5 + /* Expose the flag value from utp_upiu_query.value */ #define MASK_QUERY_UPIU_FLAG_LOC 0xFF @@ -125,17 +129,17 @@ EXPORT_SYMBOL_GPL(ufshcd_dump_regs); enum { UFSHCD_MAX_CHANNEL = 0, UFSHCD_MAX_ID = 1, - UFSHCD_CMD_PER_LUN = 32, - UFSHCD_CAN_QUEUE = 32, + UFSHCD_NUM_RESERVED = 1, + UFSHCD_CMD_PER_LUN = 32 - UFSHCD_NUM_RESERVED, + UFSHCD_CAN_QUEUE = 32 - UFSHCD_NUM_RESERVED, }; -/* UFSHCD states */ -enum { - UFSHCD_STATE_RESET, - UFSHCD_STATE_ERROR, - UFSHCD_STATE_OPERATIONAL, - UFSHCD_STATE_EH_SCHEDULED_FATAL, - UFSHCD_STATE_EH_SCHEDULED_NON_FATAL, +static const char *const ufshcd_state_name[] = { + [UFSHCD_STATE_RESET] = "reset", + [UFSHCD_STATE_OPERATIONAL] = "operational", + [UFSHCD_STATE_ERROR] = "error", + [UFSHCD_STATE_EH_SCHEDULED_FATAL] = "eh_fatal", + [UFSHCD_STATE_EH_SCHEDULED_NON_FATAL] = "eh_non_fatal", }; /* UFSHCD error handling flags */ @@ -162,12 +166,17 @@ enum { ((h)->eh_flags &= ~UFSHCD_EH_IN_PROGRESS) struct ufs_pm_lvl_states ufs_pm_lvl_states[] = { - {UFS_ACTIVE_PWR_MODE, UIC_LINK_ACTIVE_STATE}, - {UFS_ACTIVE_PWR_MODE, UIC_LINK_HIBERN8_STATE}, - {UFS_SLEEP_PWR_MODE, UIC_LINK_ACTIVE_STATE}, - {UFS_SLEEP_PWR_MODE, UIC_LINK_HIBERN8_STATE}, - {UFS_POWERDOWN_PWR_MODE, UIC_LINK_HIBERN8_STATE}, - {UFS_POWERDOWN_PWR_MODE, UIC_LINK_OFF_STATE}, + [UFS_PM_LVL_0] = {UFS_ACTIVE_PWR_MODE, UIC_LINK_ACTIVE_STATE}, + [UFS_PM_LVL_1] = {UFS_ACTIVE_PWR_MODE, UIC_LINK_HIBERN8_STATE}, + [UFS_PM_LVL_2] = {UFS_SLEEP_PWR_MODE, UIC_LINK_ACTIVE_STATE}, + [UFS_PM_LVL_3] = {UFS_SLEEP_PWR_MODE, UIC_LINK_HIBERN8_STATE}, + [UFS_PM_LVL_4] = {UFS_POWERDOWN_PWR_MODE, UIC_LINK_HIBERN8_STATE}, + [UFS_PM_LVL_5] = {UFS_POWERDOWN_PWR_MODE, UIC_LINK_OFF_STATE}, + /* + * For DeepSleep, the link is first put in hibern8 and then off. + * Leaving the link in hibern8 is not supported. + */ + [UFS_PM_LVL_6] = {UFS_DEEPSLEEP_PWR_MODE, UIC_LINK_OFF_STATE}, }; static inline enum ufs_dev_pwr_mode @@ -201,7 +210,8 @@ ufs_get_desired_pm_lvl_for_dev_link_state(enum ufs_dev_pwr_mode dev_state, static struct ufs_dev_fix ufs_fixups[] = { /* UFS cards deviations table */ UFS_FIX(UFS_VENDOR_MICRON, UFS_ANY_MODEL, - UFS_DEVICE_QUIRK_DELAY_BEFORE_LPM), + UFS_DEVICE_QUIRK_DELAY_BEFORE_LPM | + UFS_DEVICE_QUIRK_SWAP_L2P_ENTRY_FOR_HPB_READ), UFS_FIX(UFS_VENDOR_SAMSUNG, UFS_ANY_MODEL, UFS_DEVICE_QUIRK_DELAY_BEFORE_LPM | UFS_DEVICE_QUIRK_HOST_PA_TACTIVATE | @@ -225,8 +235,7 @@ static int ufshcd_reset_and_restore(struct ufs_hba *hba); static int ufshcd_eh_host_reset_handler(struct scsi_cmnd *cmd); static int ufshcd_clear_tm_cmd(struct ufs_hba *hba, int tag); static void ufshcd_hba_exit(struct ufs_hba *hba); -static int ufshcd_clear_ua_wluns(struct ufs_hba *hba); -static int ufshcd_probe_hba(struct ufs_hba *hba, bool async); +static int ufshcd_probe_hba(struct ufs_hba *hba, bool init_dev_params); static int ufshcd_setup_clocks(struct ufs_hba *hba, bool on); static inline void ufshcd_add_delay_before_dme_cmd(struct ufs_hba *hba); static int ufshcd_host_reset_and_restore(struct ufs_hba *hba); @@ -237,25 +246,16 @@ static int ufshcd_scale_clks(struct ufs_hba *hba, bool scale_up); static irqreturn_t ufshcd_intr(int irq, void *__hba); static int ufshcd_change_power_mode(struct ufs_hba *hba, struct ufs_pa_layer_attr *pwr_mode); -static void ufshcd_schedule_eh_work(struct ufs_hba *hba); static int ufshcd_setup_hba_vreg(struct ufs_hba *hba, bool on); static int ufshcd_setup_vreg(struct ufs_hba *hba, bool on); static inline int ufshcd_config_vreg_hpm(struct ufs_hba *hba, struct ufs_vreg *vreg); static int ufshcd_try_to_abort_task(struct ufs_hba *hba, int tag); -static int ufshcd_wb_buf_flush_enable(struct ufs_hba *hba); -static int ufshcd_wb_buf_flush_disable(struct ufs_hba *hba); -static int ufshcd_wb_ctrl(struct ufs_hba *hba, bool enable); -static int ufshcd_wb_toggle_flush_during_h8(struct ufs_hba *hba, bool set); +static void ufshcd_wb_toggle_flush_during_h8(struct ufs_hba *hba, bool set); static inline void ufshcd_wb_toggle_flush(struct ufs_hba *hba, bool enable); static void ufshcd_hba_vreg_set_lpm(struct ufs_hba *hba); static void ufshcd_hba_vreg_set_hpm(struct ufs_hba *hba); -static inline bool ufshcd_valid_tag(struct ufs_hba *hba, int tag) -{ - return tag >= 0 && tag < hba->nutrs; -} - static inline void ufshcd_enable_irq(struct ufs_hba *hba) { if (!hba->is_irq_enabled) { @@ -274,20 +274,12 @@ static inline void ufshcd_disable_irq(struct ufs_hba *hba) static inline void ufshcd_wb_config(struct ufs_hba *hba) { - int ret; - if (!ufshcd_is_wb_allowed(hba)) return; - ret = ufshcd_wb_ctrl(hba, true); - if (ret) - dev_err(hba->dev, "%s: Enable WB failed: %d\n", __func__, ret); - else - dev_info(hba->dev, "%s: Write Booster Configured\n", __func__); - ret = ufshcd_wb_toggle_flush_during_h8(hba, true); - if (ret) - dev_err(hba->dev, "%s: En WB flush during H8: failed: %d\n", - __func__, ret); + ufshcd_wb_toggle(hba, true); + + ufshcd_wb_toggle_flush_during_h8(hba, true); if (!(hba->quirks & UFSHCI_QUIRK_SKIP_MANUAL_WB_FLUSH_CTRL)) ufshcd_wb_toggle_flush(hba, true); } @@ -305,97 +297,130 @@ static void ufshcd_scsi_block_requests(struct ufs_hba *hba) } static void ufshcd_add_cmd_upiu_trace(struct ufs_hba *hba, unsigned int tag, - const char *str) + enum ufs_trace_str_t str_t) { struct utp_upiu_req *rq = hba->lrb[tag].ucd_req_ptr; + struct utp_upiu_header *header; + + if (!trace_ufshcd_upiu_enabled()) + return; + + if (str_t == UFS_CMD_SEND) + header = &rq->header; + else + header = &hba->lrb[tag].ucd_rsp_ptr->header; - trace_ufshcd_upiu(dev_name(hba->dev), str, &rq->header, &rq->sc.cdb); + trace_ufshcd_upiu(dev_name(hba->dev), str_t, header, &rq->sc.cdb, + UFS_TSF_CDB); } -static void ufshcd_add_query_upiu_trace(struct ufs_hba *hba, unsigned int tag, - const char *str) +static void ufshcd_add_query_upiu_trace(struct ufs_hba *hba, + enum ufs_trace_str_t str_t, + struct utp_upiu_req *rq_rsp) { - struct utp_upiu_req *rq = hba->lrb[tag].ucd_req_ptr; + if (!trace_ufshcd_upiu_enabled()) + return; - trace_ufshcd_upiu(dev_name(hba->dev), str, &rq->header, &rq->qr); + trace_ufshcd_upiu(dev_name(hba->dev), str_t, &rq_rsp->header, + &rq_rsp->qr, UFS_TSF_OSF); } +#undef EM +#undef EMe +#define EM(label, string) [label] = string, +#define EMe(label, string) [label] = string + +static const char *str_t_to_str[] = { + UFS_CMD_TRACE_STRINGS +}; + +#undef EMe +#undef EM + static void ufshcd_add_tm_upiu_trace(struct ufs_hba *hba, unsigned int tag, - const char *str) + enum ufs_trace_str_t str_t) { - int off = (int)tag - hba->nutrs; - struct utp_task_req_desc *descp = &hba->utmrdl_base_addr[off]; + struct utp_task_req_desc *descp = &hba->utmrdl_base_addr[tag]; + + if (!trace_ufshcd_upiu_enabled()) + return; - trace_android_vh_ufs_send_tm_command(hba, tag, str); - trace_ufshcd_upiu(dev_name(hba->dev), str, &descp->req_header, - &descp->input_param1); + trace_android_vh_ufs_send_tm_command(hba, tag, str_t_to_str[str_t]); + if (str_t == UFS_TM_SEND) + trace_ufshcd_upiu(dev_name(hba->dev), str_t, + &descp->upiu_req.req_header, + &descp->upiu_req.input_param1, + UFS_TSF_TM_INPUT); + else + trace_ufshcd_upiu(dev_name(hba->dev), str_t, + &descp->upiu_rsp.rsp_header, + &descp->upiu_rsp.output_param1, + UFS_TSF_TM_OUTPUT); } static void ufshcd_add_uic_command_trace(struct ufs_hba *hba, struct uic_command *ucmd, - const char *str) + enum ufs_trace_str_t str_t) { u32 cmd; - trace_android_vh_ufs_send_uic_command(hba, ucmd, str); + trace_android_vh_ufs_send_uic_command(hba, ucmd, str_t_to_str[str_t]); if (!trace_ufshcd_uic_command_enabled()) return; - if (!strcmp(str, "send")) + if (str_t == UFS_CMD_SEND) cmd = ucmd->command; else cmd = ufshcd_readl(hba, REG_UIC_COMMAND); - trace_ufshcd_uic_command(dev_name(hba->dev), str, cmd, + trace_ufshcd_uic_command(dev_name(hba->dev), str_t, cmd, ufshcd_readl(hba, REG_UIC_COMMAND_ARG_1), ufshcd_readl(hba, REG_UIC_COMMAND_ARG_2), ufshcd_readl(hba, REG_UIC_COMMAND_ARG_3)); } -static void ufshcd_add_command_trace(struct ufs_hba *hba, - unsigned int tag, const char *str) +static void ufshcd_add_command_trace(struct ufs_hba *hba, unsigned int tag, + enum ufs_trace_str_t str_t) { - sector_t lba = -1; + u64 lba = 0; u8 opcode = 0, group_id = 0; u32 intr, doorbell; struct ufshcd_lrb *lrbp = &hba->lrb[tag]; struct scsi_cmnd *cmd = lrbp->cmd; + struct request *rq = scsi_cmd_to_rq(cmd); int transfer_len = -1; - if (!trace_ufshcd_command_enabled()) { - /* trace UPIU W/O tracing command */ - if (cmd) - ufshcd_add_cmd_upiu_trace(hba, tag, str); + if (!cmd) return; - } - if (cmd) { /* data phase exists */ - /* trace UPIU also */ - ufshcd_add_cmd_upiu_trace(hba, tag, str); - opcode = cmd->cmnd[0]; - if ((opcode == READ_10) || (opcode == WRITE_10)) { - /* - * Currently we only fully trace read(10) and write(10) - * commands - */ - if (cmd->request && cmd->request->bio) - lba = cmd->request->bio->bi_iter.bi_sector; - transfer_len = be32_to_cpu( - lrbp->ucd_req_ptr->sc.exp_data_transfer_len); - if (opcode == WRITE_10) - group_id = lrbp->cmd->cmnd[6]; - } else if (opcode == UNMAP) { - if (cmd->request) { - lba = scsi_get_lba(cmd); - transfer_len = blk_rq_bytes(cmd->request); - } - } + /* trace UPIU also */ + ufshcd_add_cmd_upiu_trace(hba, tag, str_t); + if (!trace_ufshcd_command_enabled()) + return; + + opcode = cmd->cmnd[0]; + + if (opcode == READ_10 || opcode == WRITE_10) { + /* + * Currently we only fully trace read(10) and write(10) commands + */ + transfer_len = + be32_to_cpu(lrbp->ucd_req_ptr->sc.exp_data_transfer_len); + lba = scsi_get_lba(cmd); + if (opcode == WRITE_10) + group_id = lrbp->cmd->cmnd[6]; + } else if (opcode == UNMAP) { + /* + * The number of Bytes to be unmapped beginning with the lba. + */ + transfer_len = blk_rq_bytes(rq); + lba = scsi_get_lba(cmd); } intr = ufshcd_readl(hba, REG_INTERRUPT_STATUS); doorbell = ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL); - trace_ufshcd_command(dev_name(hba->dev), str, tag, + trace_ufshcd_command(dev_name(hba->dev), str_t, tag, doorbell, transfer_len, intr, lba, opcode, group_id); } @@ -439,6 +464,8 @@ static void ufshcd_print_evt(struct ufs_hba *hba, u32 id, if (!found) dev_err(hba->dev, "No record of %s\n", err_name); + else + dev_err(hba->dev, "%s: total cnt=%llu\n", err_name, e->cnt); } static void ufshcd_print_evt_hist(struct ufs_hba *hba) @@ -575,7 +602,12 @@ static void ufshcd_print_pwr_info(struct ufs_hba *hba) "INVALID MODE", }; - dev_err(hba->dev, "%s:[RX, TX]: gear=[%d, %d], lane[%d, %d], pwr[%s, %s], rate = %d\n", + /* + * Using dev_dbg to avoid messages during runtime PM to avoid + * never-ending cycles of messages written back to storage by user space + * causing runtime resume, causing more messages and so on. + */ + dev_dbg(hba->dev, "%s:[RX, TX]: gear=[%d, %d], lane[%d, %d], pwr[%s, %s], rate = %d\n", __func__, hba->pwr_info.gear_rx, hba->pwr_info.gear_tx, hba->pwr_info.lane_rx, hba->pwr_info.lane_tx, @@ -584,6 +616,23 @@ static void ufshcd_print_pwr_info(struct ufs_hba *hba) hba->pwr_info.hs_rate); } +static void ufshcd_device_reset(struct ufs_hba *hba) +{ + int err; + + err = ufshcd_vops_device_reset(hba); + + if (!err) { + ufshcd_set_ufs_dev_active(hba); + if (ufshcd_is_wb_allowed(hba)) { + hba->dev_info.wb_enabled = false; + hba->dev_info.wb_buf_flush_enabled = false; + } + } + if (err != -EOPNOTSUPP) + ufshcd_update_evt_hist(hba, UFS_EVT_DEV_RESET, err); +} + void ufshcd_delay_us(unsigned long us, unsigned long tolerance) { if (!us) @@ -692,7 +741,7 @@ static inline bool ufshcd_is_device_present(struct ufs_hba *hba) * This function is used to get the OCS field from UTRD * Returns the OCS field in the UTRD */ -static inline int ufshcd_get_tr_ocs(struct ufshcd_lrb *lrbp) +static enum utp_ocs ufshcd_get_tr_ocs(struct ufshcd_lrb *lrbp) { return le32_to_cpu(lrbp->utr_descriptor_ptr->header.dword_2) & MASK_OCS; } @@ -724,16 +773,6 @@ static inline void ufshcd_utmrl_clear(struct ufs_hba *hba, u32 pos) ufshcd_writel(hba, ~(1 << pos), REG_UTP_TASK_REQ_LIST_CLEAR); } -/** - * ufshcd_outstanding_req_clear - Clear a bit in outstanding request field - * @hba: per adapter instance - * @tag: position of the bit to be cleared - */ -static inline void ufshcd_outstanding_req_clear(struct ufs_hba *hba, int tag) -{ - clear_bit(tag, &hba->outstanding_reqs); -} - /** * ufshcd_get_lists_status - Check UCRDY, UTRLRDY and UTMRLRDY * @reg: Register value of host controller status @@ -1053,13 +1092,38 @@ static bool ufshcd_is_devfreq_scaling_required(struct ufs_hba *hba, return false; } +/* + * Determine the number of pending commands by counting the bits in the SCSI + * device budget maps. This approach has been selected because a bit is set in + * the budget map before scsi_host_queue_ready() checks the host_self_blocked + * flag. The host_self_blocked flag can be modified by calling + * scsi_block_requests() or scsi_unblock_requests(). + */ +static u32 ufshcd_pending_cmds(struct ufs_hba *hba) +{ + struct scsi_device *sdev; + u32 pending = 0; + + lockdep_assert_held(hba->host->host_lock); + __shost_for_each_device(sdev, hba->host) + pending += atomic_read(&sdev->device_busy); + + return pending; +} + +/* + * Wait until all pending SCSI commands and TMFs have finished or the timeout + * has expired. + * + * Return: 0 upon success; -EBUSY upon timeout. + */ static int ufshcd_wait_for_doorbell_clr(struct ufs_hba *hba, u64 wait_timeout_us) { unsigned long flags; int ret = 0; u32 tm_doorbell; - u32 tr_doorbell; + u32 tr_pending; bool timeout = false, do_last_check = false; ktime_t start; @@ -1077,8 +1141,8 @@ static int ufshcd_wait_for_doorbell_clr(struct ufs_hba *hba, } tm_doorbell = ufshcd_readl(hba, REG_UTP_TASK_REQ_DOOR_BELL); - tr_doorbell = ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL); - if (!tm_doorbell && !tr_doorbell) { + tr_pending = ufshcd_pending_cmds(hba); + if (!tm_doorbell && !tr_pending) { timeout = false; break; } else if (do_last_check) { @@ -1086,7 +1150,7 @@ static int ufshcd_wait_for_doorbell_clr(struct ufs_hba *hba, } spin_unlock_irqrestore(hba->host->host_lock, flags); - schedule(); + io_schedule_timeout(msecs_to_jiffies(20)); if (ktime_to_us(ktime_sub(ktime_get(), start)) > wait_timeout_us) { timeout = true; @@ -1098,12 +1162,12 @@ static int ufshcd_wait_for_doorbell_clr(struct ufs_hba *hba, do_last_check = true; } spin_lock_irqsave(hba->host->host_lock, flags); - } while (tm_doorbell || tr_doorbell); + } while (tm_doorbell || tr_pending); if (timeout) { dev_err(hba->dev, "%s: timedout waiting for doorbell to clear (tm=0x%x, tr=0x%x)\n", - __func__, tm_doorbell, tr_doorbell); + __func__, tm_doorbell, tr_pending); ret = -EBUSY; } out: @@ -1157,9 +1221,14 @@ static int ufshcd_scale_gear(struct ufs_hba *hba, bool scale_up) return ret; } -static int ufshcd_clock_scaling_prepare(struct ufs_hba *hba) +/* + * Wait until all pending SCSI commands and TMFs have finished or the timeout + * has expired. + * + * Return: 0 upon success; -EBUSY upon timeout. + */ +static int ufshcd_clock_scaling_prepare(struct ufs_hba *hba, u64 timeout_us) { - #define DOORBELL_CLR_TOUT_US (1000 * 1000) /* 1 sec */ int ret = 0; /* * make sure that there are no outstanding requests when @@ -1168,8 +1237,7 @@ static int ufshcd_clock_scaling_prepare(struct ufs_hba *hba) ufshcd_scsi_block_requests(hba); down_write(&hba->clk_scaling_lock); - if (!hba->clk_scaling.is_allowed || - ufshcd_wait_for_doorbell_clr(hba, DOORBELL_CLR_TOUT_US)) { + if (ufshcd_wait_for_doorbell_clr(hba, timeout_us)) { ret = -EBUSY; up_write(&hba->clk_scaling_lock); ufshcd_scsi_unblock_requests(hba); @@ -1207,10 +1275,18 @@ static int ufshcd_devfreq_scale(struct ufs_hba *hba, bool scale_up) int ret = 0; bool is_writelock = true; - ret = ufshcd_clock_scaling_prepare(hba); + if (!hba->clk_scaling.is_allowed) + return -EBUSY; + + ret = ufshcd_clock_scaling_prepare(hba, 1 * USEC_PER_SEC); if (ret) return ret; + if (!hba->clk_scaling.is_allowed) { + ret = -EBUSY; + goto out_unprepare; + } + /* scale down the gear before scaling down clocks */ if (!scale_up) { ret = ufshcd_scale_gear(hba, false); @@ -1237,7 +1313,7 @@ static int ufshcd_devfreq_scale(struct ufs_hba *hba, bool scale_up) /* Enable Write Booster if we have scaled up else disable it */ downgrade_write(&hba->clk_scaling_lock); is_writelock = false; - ufshcd_wb_ctrl(hba, scale_up); + ufshcd_wb_toggle(hba, scale_up); out_unprepare: ufshcd_clock_scaling_unprepare(hba, is_writelock); @@ -1341,25 +1417,6 @@ out: return ret; } -static bool ufshcd_is_busy(struct request *req, void *priv, bool reserved) -{ - int *busy = priv; - - WARN_ON_ONCE(reserved); - (*busy)++; - return false; -} - -/* Whether or not any tag is in use by a request that is in progress. */ -static bool ufshcd_any_tag_in_use(struct ufs_hba *hba) -{ - struct request_queue *q = hba->cmd_queue; - int busy = 0; - - blk_mq_tagset_busy_iter(q->tag_set, ufshcd_is_busy, &busy); - return busy; -} - static int ufshcd_devfreq_get_dev_status(struct device *dev, struct devfreq_dev_status *stat) { @@ -1509,7 +1566,7 @@ static ssize_t ufshcd_clkscale_enable_show(struct device *dev, { struct ufs_hba *hba = dev_get_drvdata(dev); - return snprintf(buf, PAGE_SIZE, "%d\n", hba->clk_scaling.is_enabled); + return sysfs_emit(buf, "%d\n", hba->clk_scaling.is_enabled); } static ssize_t ufshcd_clkscale_enable_store(struct device *dev, @@ -1532,7 +1589,7 @@ static ssize_t ufshcd_clkscale_enable_store(struct device *dev, if (value == hba->clk_scaling.is_enabled) goto out; - pm_runtime_get_sync(hba->dev); + ufshcd_rpm_get_sync(hba); ufshcd_hold(hba, false); hba->clk_scaling.is_enabled = value; @@ -1548,7 +1605,7 @@ static ssize_t ufshcd_clkscale_enable_store(struct device *dev, } ufshcd_release(hba); - pm_runtime_put_sync(hba->dev); + ufshcd_rpm_put_sync(hba); out: up(&hba->host_sem); return err ? err : count; @@ -1643,6 +1700,26 @@ unblock_reqs: ufshcd_scsi_unblock_requests(hba); } +/* + * Block processing of new SCSI commands and wait until pending SCSI + * commands and TMFs have finished. ufshcd_exec_dev_cmd() and + * ufshcd_issue_devman_upiu_cmd() are not affected by this function. + * + * Return: 0 upon success; -EBUSY upon timeout. + */ +int ufshcd_freeze_scsi_devs(struct ufs_hba *hba, u64 timeout_us) +{ + return ufshcd_clock_scaling_prepare(hba, timeout_us); +} +EXPORT_SYMBOL_GPL(ufshcd_freeze_scsi_devs); + +/* Resume processing of SCSI commands. */ +void ufshcd_unfreeze_scsi_devs(struct ufs_hba *hba) +{ + ufshcd_clock_scaling_unprepare(hba, true); +} +EXPORT_SYMBOL_GPL(ufshcd_unfreeze_scsi_devs); + /** * ufshcd_hold - Enable clocks that were gated earlier due to ufshcd_release. * Also, exit from hibern8 mode and set the link as active. @@ -1655,7 +1732,8 @@ int ufshcd_hold(struct ufs_hba *hba, bool async) bool flush_result; unsigned long flags; - if (!ufshcd_is_clkgating_allowed(hba)) + if (!ufshcd_is_clkgating_allowed(hba) || + !hba->clk_gating.is_initialized) goto out; spin_lock_irqsave(hba->host->host_lock, flags); hba->clk_gating.active_reqs++; @@ -1758,7 +1836,7 @@ static void ufshcd_gate_work(struct work_struct *work) if (hba->clk_gating.active_reqs || hba->ufshcd_state != UFSHCD_STATE_OPERATIONAL - || ufshcd_any_tag_in_use(hba) || hba->outstanding_tasks + || hba->outstanding_reqs || hba->outstanding_tasks || hba->active_uic_cmd || hba->uic_async_done) goto rel_lock; @@ -1815,7 +1893,7 @@ static void __ufshcd_release(struct ufs_hba *hba) if (hba->clk_gating.active_reqs || hba->clk_gating.is_suspended || hba->ufshcd_state != UFSHCD_STATE_OPERATIONAL || - hba->outstanding_tasks || + hba->outstanding_tasks || !hba->clk_gating.is_initialized || hba->active_uic_cmd || hba->uic_async_done || hba->clk_gating.state == CLKS_OFF) return; @@ -1842,7 +1920,7 @@ static ssize_t ufshcd_clkgate_delay_show(struct device *dev, { struct ufs_hba *hba = dev_get_drvdata(dev); - return snprintf(buf, PAGE_SIZE, "%lu\n", hba->clk_gating.delay_ms); + return sysfs_emit(buf, "%lu\n", hba->clk_gating.delay_ms); } static ssize_t ufshcd_clkgate_delay_store(struct device *dev, @@ -1865,7 +1943,7 @@ static ssize_t ufshcd_clkgate_enable_show(struct device *dev, { struct ufs_hba *hba = dev_get_drvdata(dev); - return snprintf(buf, PAGE_SIZE, "%d\n", hba->clk_gating.is_enabled); + return sysfs_emit(buf, "%d\n", hba->clk_gating.is_enabled); } static ssize_t ufshcd_clkgate_enable_store(struct device *dev, @@ -1950,11 +2028,15 @@ static void ufshcd_exit_clk_gating(struct ufs_hba *hba) { if (!hba->clk_gating.is_initialized) return; + ufshcd_remove_clk_gating_sysfs(hba); - cancel_work_sync(&hba->clk_gating.ungate_work); - cancel_delayed_work_sync(&hba->clk_gating.gate_work); - destroy_workqueue(hba->clk_gating.clk_gating_workq); + + /* Ungate the clock if necessary. */ + ufshcd_hold(hba, false); hba->clk_gating.is_initialized = false; + ufshcd_release(hba); + + destroy_workqueue(hba->clk_gating.clk_gating_workq); } /* Must be called with host lock acquired */ @@ -2050,7 +2132,7 @@ static void ufshcd_update_monitor(struct ufs_hba *hba, struct ufshcd_lrb *lrbp) spin_lock_irqsave(hba->host->host_lock, flags); if (dir >= 0 && hba->monitor.nr_queued[dir] > 0) { - struct request *req = lrbp->cmd->request; + struct request *req = scsi_cmd_to_rq(lrbp->cmd); struct ufs_hba_monitor *m = &hba->monitor; ktime_t now, inc, lat; @@ -2084,28 +2166,23 @@ static inline void ufshcd_send_command(struct ufs_hba *hba, unsigned int task_tag) { struct ufshcd_lrb *lrbp = &hba->lrb[task_tag]; + unsigned long flags; lrbp->issue_time_stamp = ktime_get(); lrbp->compl_time_stamp = ktime_set(0, 0); - ufshcd_vops_setup_xfer_req(hba, task_tag, (lrbp->cmd ? true : false)); trace_android_vh_ufs_send_command(hba, lrbp); - ufshcd_add_command_trace(hba, task_tag, "send"); + ufshcd_add_command_trace(hba, task_tag, UFS_CMD_SEND); ufshcd_clk_scaling_start_busy(hba); if (unlikely(ufshcd_should_inform_monitor(hba, lrbp))) ufshcd_start_monitor(hba, lrbp); - if (ufshcd_has_utrlcnr(hba)) { - set_bit(task_tag, &hba->outstanding_reqs); - ufshcd_writel(hba, 1 << task_tag, - REG_UTP_TRANSFER_REQ_DOOR_BELL); - } else { - unsigned long flags; - spin_lock_irqsave(hba->host->host_lock, flags); - set_bit(task_tag, &hba->outstanding_reqs); - ufshcd_writel(hba, 1 << task_tag, - REG_UTP_TRANSFER_REQ_DOOR_BELL); - spin_unlock_irqrestore(hba->host->host_lock, flags); - } + spin_lock_irqsave(&hba->outstanding_lock, flags); + if (hba->vops && hba->vops->setup_xfer_req) + hba->vops->setup_xfer_req(hba, task_tag, !!lrbp->cmd); + __set_bit(task_tag, &hba->outstanding_reqs); + ufshcd_writel(hba, 1 << task_tag, REG_UTP_TRANSFER_REQ_DOOR_BELL); + spin_unlock_irqrestore(&hba->outstanding_lock, flags); + /* Make sure that doorbell is committed immediately */ wmb(); } @@ -2184,6 +2261,7 @@ static inline int ufshcd_hba_capabilities(struct ufs_hba *hba) hba->nutrs = (hba->capabilities & MASK_TRANSFER_REQUESTS_SLOTS) + 1; hba->nutmrs = ((hba->capabilities & MASK_TASK_MANAGEMENT_REQUEST_SLOTS) >> 16) + 1; + hba->reserved_slot = hba->nutrs - 1; /* Read crypto capabilities */ err = ufshcd_hba_init_crypto_capabilities(hba); @@ -2220,15 +2298,15 @@ static inline u8 ufshcd_get_upmcrs(struct ufs_hba *hba) } /** - * ufshcd_dispatch_uic_cmd - Dispatch UIC commands to unipro layers + * ufshcd_dispatch_uic_cmd - Dispatch an UIC command to the Unipro layer * @hba: per adapter instance * @uic_cmd: UIC command - * - * Mutex must be held. */ static inline void ufshcd_dispatch_uic_cmd(struct ufs_hba *hba, struct uic_command *uic_cmd) { + lockdep_assert_held(&hba->uic_cmd_mutex); + WARN_ON(hba->active_uic_cmd); hba->active_uic_cmd = uic_cmd; @@ -2238,7 +2316,7 @@ ufshcd_dispatch_uic_cmd(struct ufs_hba *hba, struct uic_command *uic_cmd) ufshcd_writel(hba, uic_cmd->argument2, REG_UIC_COMMAND_ARG_2); ufshcd_writel(hba, uic_cmd->argument3, REG_UIC_COMMAND_ARG_3); - ufshcd_add_uic_command_trace(hba, uic_cmd, "send"); + ufshcd_add_uic_command_trace(hba, uic_cmd, UFS_CMD_SEND); /* Write UIC Cmd */ ufshcd_writel(hba, uic_cmd->command & COMMAND_OPCODE_MASK, @@ -2246,11 +2324,10 @@ ufshcd_dispatch_uic_cmd(struct ufs_hba *hba, struct uic_command *uic_cmd) } /** - * ufshcd_wait_for_uic_cmd - Wait complectioin of UIC command + * ufshcd_wait_for_uic_cmd - Wait for completion of an UIC command * @hba: per adapter instance * @uic_cmd: UIC command * - * Must be called with mutex held. * Returns 0 only if success. */ static int @@ -2259,6 +2336,8 @@ ufshcd_wait_for_uic_cmd(struct ufs_hba *hba, struct uic_command *uic_cmd) int ret; unsigned long flags; + lockdep_assert_held(&hba->uic_cmd_mutex); + if (wait_for_completion_timeout(&uic_cmd->done, msecs_to_jiffies(UIC_CMD_TIMEOUT))) { ret = uic_cmd->argument2 & MASK_UIC_COMMAND_RESULT; @@ -2288,14 +2367,15 @@ ufshcd_wait_for_uic_cmd(struct ufs_hba *hba, struct uic_command *uic_cmd) * @uic_cmd: UIC command * @completion: initialize the completion only if this is set to true * - * Identical to ufshcd_send_uic_cmd() expect mutex. Must be called - * with mutex held and host_lock locked. * Returns 0 only if success. */ static int __ufshcd_send_uic_cmd(struct ufs_hba *hba, struct uic_command *uic_cmd, bool completion) { + lockdep_assert_held(&hba->uic_cmd_mutex); + lockdep_assert_held(hba->host->host_lock); + if (!ufshcd_ready_for_uic_cmd(hba)) { dev_err(hba->dev, "Controller not ready to accept UIC commands\n"); @@ -2370,17 +2450,24 @@ static int ufshcd_map_sg(struct ufs_hba *hba, struct ufshcd_lrb *lrbp) cpu_to_le16(sg_segments * hba->sg_entry_size); else lrbp->utr_descriptor_ptr->prd_table_length = - cpu_to_le16((u16) (sg_segments)); + cpu_to_le16(sg_segments); - prd = (struct ufshcd_sg_entry *)lrbp->ucd_prdt_ptr; + prd = lrbp->ucd_prdt_ptr; scsi_for_each_sg(cmd, sg, sg_segments, i) { - prd->size = - cpu_to_le32(((u32) sg_dma_len(sg))-1); - prd->base_addr = - cpu_to_le32(lower_32_bits(sg->dma_address)); - prd->upper_addr = - cpu_to_le32(upper_32_bits(sg->dma_address)); + const unsigned int len = sg_dma_len(sg); + + /* + * From the UFSHCI spec: "Data Byte Count (DBC): A '0' + * based value that indicates the length, in bytes, of + * the data block. A maximum of length of 256KB may + * exist for any entry. Bits 1:0 of this field shall be + * 11b to indicate Dword granularity. A value of '3' + * indicates 4 bytes, '7' indicates 8 bytes, etc." + */ + WARN_ONCE(len > 256 * 1024, "len = %#x\n", len); + prd->size = cpu_to_le32(len - 1); + prd->addr = cpu_to_le64(sg->dma_address); prd->reserved = 0; prd = (void *)prd + hba->sg_entry_size; } @@ -2639,6 +2726,42 @@ static inline u16 ufshcd_upiu_wlun_to_scsi_wlun(u8 upiu_wlun_id) return (upiu_wlun_id & ~UFS_UPIU_WLUN_ID) | SCSI_W_LUN_BASE; } +static inline bool is_device_wlun(struct scsi_device *sdev) +{ + return sdev->lun == + ufshcd_upiu_wlun_to_scsi_wlun(UFS_UPIU_UFS_DEVICE_WLUN); +} + +/* + * Associate the UFS controller queue with the default and poll HCTX types. + * Initialize the mq_map[] arrays. + */ +static int ufshcd_map_queues(struct Scsi_Host *shost) +{ + int i, ret; + + for (i = 0; i < shost->nr_maps; i++) { + struct blk_mq_queue_map *map = &shost->tag_set.map[i]; + + switch (i) { + case HCTX_TYPE_DEFAULT: + case HCTX_TYPE_POLL: + map->nr_queues = 1; + break; + case HCTX_TYPE_READ: + map->nr_queues = 0; + break; + default: + WARN_ON_ONCE(true); + } + map->queue_offset = 0; + ret = blk_mq_map_queues(map); + WARN_ON_ONCE(ret); + } + + return 0; +} + static void ufshcd_init_lrb(struct ufs_hba *hba, struct ufshcd_lrb *lrb, int i) { struct utp_transfer_cmd_desc *cmd_descp = (void *)hba->ucdl_base_addr + @@ -2670,27 +2793,34 @@ static void ufshcd_init_lrb(struct ufs_hba *hba, struct ufshcd_lrb *lrb, int i) */ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd) { + struct ufs_hba *hba = shost_priv(host); + int tag = scsi_cmd_to_rq(cmd)->tag; struct ufshcd_lrb *lrbp; - struct ufs_hba *hba; - int tag; int err = 0; - hba = shost_priv(host); - - tag = cmd->request->tag; - if (!ufshcd_valid_tag(hba, tag)) { - dev_err(hba->dev, - "%s: invalid command tag %d: cmd=0x%p, cmd->request=0x%p", - __func__, tag, cmd, cmd->request); - BUG(); - } + WARN_ONCE(tag < 0 || tag >= hba->nutrs, "Invalid tag %d\n", tag); - if (!down_read_trylock(&hba->clk_scaling_lock)) - return SCSI_MLQUEUE_HOST_BUSY; + /* + * Allows the UFS error handler to wait for prior ufshcd_queuecommand() + * calls. + */ + rcu_read_lock(); switch (hba->ufshcd_state) { case UFSHCD_STATE_OPERATIONAL: + break; case UFSHCD_STATE_EH_SCHEDULED_NON_FATAL: + /* + * SCSI error handler can call ->queuecommand() while UFS error + * handler is in progress. Error interrupts could change the + * state from UFSHCD_STATE_RESET to + * UFSHCD_STATE_EH_SCHEDULED_NON_FATAL. Prevent requests + * being issued in that case. + */ + if (ufshcd_eh_in_progress(hba)) { + err = SCSI_MLQUEUE_HOST_BUSY; + goto out; + } break; case UFSHCD_STATE_EH_SCHEDULED_FATAL: /* @@ -2706,7 +2836,7 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd) if (hba->pm_op_in_progress) { hba->force_reset = true; set_host_byte(cmd, DID_BAD_TARGET); - cmd->scsi_done(cmd); + scsi_done(cmd); goto out; } fallthrough; @@ -2715,13 +2845,7 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd) goto out; case UFSHCD_STATE_ERROR: set_host_byte(cmd, DID_ERROR); - cmd->scsi_done(cmd); - goto out; - default: - dev_WARN_ONCE(hba->dev, 1, "%s: invalid state %d\n", - __func__, hba->ufshcd_state); - set_host_byte(cmd, DID_BAD_TARGET); - cmd->scsi_done(cmd); + scsi_done(cmd); goto out; } @@ -2744,7 +2868,7 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd) lrbp->lun = ufshcd_scsi_to_upiu_lun(cmd->device->lun); lrbp->intr_cmd = !ufshcd_is_intr_aggr_allowed(hba) ? true : false; - ufshcd_prepare_lrbp_crypto(cmd->request, lrbp); + ufshcd_prepare_lrbp_crypto(scsi_cmd_to_rq(cmd), lrbp); trace_android_vh_ufs_prepare_command(hba, cmd->request, lrbp, &err); if (err) { @@ -2755,12 +2879,7 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd) lrbp->req_abort_skip = false; - err = ufshpb_prep(hba, lrbp); - if (err == -EAGAIN) { - lrbp->cmd = NULL; - ufshcd_release(hba); - goto out; - } + ufshpb_prep(hba, lrbp); ufshcd_comp_scsi_upiu(hba, lrbp); @@ -2770,12 +2889,20 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd) ufshcd_release(hba); goto out; } - /* Make sure descriptors are ready before ringing the doorbell */ - wmb(); ufshcd_send_command(hba, tag); + out: - up_read(&hba->clk_scaling_lock); + rcu_read_unlock(); + + if (ufs_trigger_eh()) { + unsigned long flags; + + spin_lock_irqsave(hba->host->host_lock, flags); + ufshcd_schedule_eh_work(hba); + spin_unlock_irqrestore(hba->host->host_lock, flags); + } + return err; } @@ -2807,7 +2934,7 @@ ufshcd_clear_cmd(struct ufs_hba *hba, int tag) spin_unlock_irqrestore(hba->host->host_lock, flags); /* - * wait for for h/w to clear corresponding bit in door-bell. + * wait for h/w to clear corresponding bit in door-bell. * max. wait is 1 sec. */ err = ufshcd_wait_for_register(hba, @@ -2881,8 +3008,6 @@ static int ufshcd_wait_for_dev_cmd(struct ufs_hba *hba, time_left = wait_for_completion_timeout(hba->dev_cmd.complete, msecs_to_jiffies(max_timeout)); - /* Make sure descriptors are ready before ringing the doorbell */ - wmb(); spin_lock_irqsave(hba->host->host_lock, flags); hba->dev_cmd.complete = NULL; if (likely(time_left)) { @@ -2904,7 +3029,9 @@ static int ufshcd_wait_for_dev_cmd(struct ufs_hba *hba, * we also need to clear the outstanding_request * field in hba */ - ufshcd_outstanding_req_clear(hba, lrbp->task_tag); + spin_lock_irqsave(&hba->outstanding_lock, flags); + __clear_bit(lrbp->task_tag, &hba->outstanding_reqs); + spin_unlock_irqrestore(&hba->outstanding_lock, flags); } return err; @@ -2922,32 +3049,16 @@ static int ufshcd_wait_for_dev_cmd(struct ufs_hba *hba, static int ufshcd_exec_dev_cmd(struct ufs_hba *hba, enum dev_cmd_type cmd_type, int timeout) { - struct request_queue *q = hba->cmd_queue; - struct request *req; + DECLARE_COMPLETION_ONSTACK(wait); + const u32 tag = hba->reserved_slot; struct ufshcd_lrb *lrbp; int err; - int tag; - struct completion wait; - down_read(&hba->clk_scaling_lock); + /* Protects use of hba->reserved_slot. */ + lockdep_assert_held(&hba->dev_cmd.lock); - /* - * Get free slot, sleep if slots are unavailable. - * Even though we use wait_event() which sleeps indefinitely, - * the maximum wait time is bounded by SCSI request timeout. - */ - req = blk_get_request(q, REQ_OP_DRV_OUT, 0); - if (IS_ERR(req)) { - err = PTR_ERR(req); - goto out_unlock; - } - tag = req->tag; - WARN_ON_ONCE(!ufshcd_valid_tag(hba, tag)); - /* Set the timeout such that the SCSI error handler is not activated. */ - req->timeout = msecs_to_jiffies(2 * timeout); - blk_mq_start_request(req); + down_read(&hba->clk_scaling_lock); - init_completion(&wait); lrbp = &hba->lrb[tag]; WARN_ON(lrbp->cmd); err = ufshcd_compose_dev_cmd(hba, lrbp, cmd_type, tag); @@ -2956,18 +3067,14 @@ static int ufshcd_exec_dev_cmd(struct ufs_hba *hba, hba->dev_cmd.complete = &wait; - ufshcd_add_query_upiu_trace(hba, tag, "query_send"); - /* Make sure descriptors are ready before ringing the doorbell */ - wmb(); + ufshcd_add_query_upiu_trace(hba, UFS_QUERY_SEND, lrbp->ucd_req_ptr); ufshcd_send_command(hba, tag); err = ufshcd_wait_for_dev_cmd(hba, lrbp, timeout); - ufshcd_add_query_upiu_trace(hba, tag, - err ? "query_complete_err" : "query_complete"); + ufshcd_add_query_upiu_trace(hba, err ? UFS_QUERY_ERR : UFS_QUERY_COMP, + (struct utp_upiu_req *)lrbp->ucd_rsp_ptr); out: - blk_put_request(req); -out_unlock: up_read(&hba->clk_scaling_lock); return err; } @@ -3753,6 +3860,22 @@ static int ufshcd_dme_reset(struct ufs_hba *hba) return ret; } +int ufshcd_dme_configure_adapt(struct ufs_hba *hba, + int agreed_gear, + int adapt_val) +{ + int ret; + + if (agreed_gear != UFS_HS_G4) + adapt_val = PA_NO_ADAPT; + + ret = ufshcd_dme_set(hba, + UIC_ARG_MIB(PA_TXHSADAPTTYPE), + adapt_val); + return ret; +} +EXPORT_SYMBOL_GPL(ufshcd_dme_configure_adapt); + /** * ufshcd_dme_enable - UIC command for DME_ENABLE * @hba: per adapter instance @@ -3944,14 +4067,13 @@ EXPORT_SYMBOL_GPL(ufshcd_dme_get_attr); */ static int ufshcd_uic_pwr_ctrl(struct ufs_hba *hba, struct uic_command *cmd) { - struct completion uic_async_done; + DECLARE_COMPLETION_ONSTACK(uic_async_done); unsigned long flags; u8 status; int ret; bool reenable_intr = false; mutex_lock(&hba->uic_cmd_mutex); - init_completion(&uic_async_done); ufshcd_add_delay_before_dme_cmd(hba); spin_lock_irqsave(hba->host->host_lock, flags); @@ -4070,7 +4192,7 @@ int ufshcd_link_recovery(struct ufs_hba *hba) spin_unlock_irqrestore(hba->host->host_lock, flags); /* Reset the attached device */ - ufshcd_vops_device_reset(hba); + ufshcd_device_reset(hba); ret = ufshcd_host_reset_and_restore(hba); @@ -4083,8 +4205,6 @@ int ufshcd_link_recovery(struct ufs_hba *hba) if (ret) dev_err(hba->dev, "%s: link recovery failed, err %d", __func__, ret); - else - ufshcd_clear_ua_wluns(hba); return ret; } @@ -4156,12 +4276,13 @@ void ufshcd_auto_hibern8_update(struct ufs_hba *hba, u32 ahit) } spin_unlock_irqrestore(hba->host->host_lock, flags); - if (update && !pm_runtime_suspended(hba->dev)) { - pm_runtime_get_sync(hba->dev); + if (update && + !pm_runtime_suspended(&hba->sdev_ufs_device->sdev_gendev)) { + ufshcd_rpm_get_sync(hba); ufshcd_hold(hba, false); ufshcd_auto_hibern8_enable(hba); ufshcd_release(hba); - pm_runtime_put(hba->dev); + ufshcd_rpm_put_sync(hba); } } EXPORT_SYMBOL_GPL(ufshcd_auto_hibern8_update); @@ -4750,7 +4871,7 @@ static int ufshcd_verify_dev_init(struct ufs_hba *hba) mutex_lock(&hba->dev_cmd.lock); for (retries = NOP_OUT_RETRIES; retries > 0; retries--) { err = ufshcd_exec_dev_cmd(hba, DEV_CMD_TYPE_NOP, - NOP_OUT_TIMEOUT); + hba->nop_out_timeout); if (!err || err == -ETIMEDOUT) break; @@ -4859,6 +4980,43 @@ static inline void ufshcd_get_lu_power_on_wp_status(struct ufs_hba *hba, } } +/** + * ufshcd_setup_links - associate link b/w device wlun and other luns + * @sdev: pointer to SCSI device + * @hba: pointer to ufs hba + */ +static void ufshcd_setup_links(struct ufs_hba *hba, struct scsi_device *sdev) +{ + struct device_link *link; + + /* + * Device wlun is the supplier & rest of the luns are consumers. + * This ensures that device wlun suspends after all other luns. + */ + if (hba->sdev_ufs_device) { + link = device_link_add(&sdev->sdev_gendev, + &hba->sdev_ufs_device->sdev_gendev, + DL_FLAG_PM_RUNTIME | DL_FLAG_RPM_ACTIVE); + if (!link) { + dev_err(&sdev->sdev_gendev, "Failed establishing link - %s\n", + dev_name(&hba->sdev_ufs_device->sdev_gendev)); + return; + } + hba->luns_avail--; + /* Ignore REPORT_LUN wlun probing */ + if (hba->luns_avail == 1) { + ufshcd_rpm_put(hba); + return; + } + } else { + /* + * Device wlun is probed. The assumption is that WLUNs are + * scanned before other LUNs. + */ + hba->luns_avail--; + } +} + /** * ufshcd_slave_alloc - handle initial SCSI device configurations * @sdev: pointer to SCSI device @@ -4890,6 +5048,8 @@ static int ufshcd_slave_alloc(struct scsi_device *sdev) ufshcd_get_lu_power_on_wp_status(hba, sdev); + ufshcd_setup_links(hba, sdev); + return 0; } @@ -4902,11 +5062,7 @@ static int ufshcd_slave_alloc(struct scsi_device *sdev) */ static int ufshcd_change_queue_depth(struct scsi_device *sdev, int depth) { - struct ufs_hba *hba = shost_priv(sdev->host); - - if (depth > hba->nutrs) - depth = hba->nutrs; - return scsi_change_queue_depth(sdev, depth); + return scsi_change_queue_depth(sdev, min(depth, sdev->host->can_queue)); } static void ufshcd_hpb_destroy(struct ufs_hba *hba, struct scsi_device *sdev) @@ -4943,9 +5099,20 @@ static int ufshcd_slave_configure(struct scsi_device *sdev) blk_queue_update_dma_pad(q, PRDT_DATA_BYTE_COUNT_PAD - 1); if (hba->quirks & UFSHCD_QUIRK_ALIGN_SG_WITH_PAGE_SIZE) blk_queue_update_dma_alignment(q, PAGE_SIZE - 1); - - if (ufshcd_is_rpm_autosuspend_allowed(hba)) + /* + * Block runtime-pm until all consumers are added. + * Refer ufshcd_setup_links(). + */ + if (is_device_wlun(sdev)) + pm_runtime_get_noresume(&sdev->sdev_gendev); + else if (ufshcd_is_rpm_autosuspend_allowed(hba)) sdev->rpm_autosuspend = 1; + /* + * Do not print messages during runtime PM to avoid never-ending cycles + * of messages written back to storage by user space causing runtime + * resume, causing more messages and so on. + */ + sdev->silence_suspend = 1; ufshcd_crypto_setup_rq_keyslot_manager(hba, q); @@ -4961,6 +5128,7 @@ static int ufshcd_slave_configure(struct scsi_device *sdev) static void ufshcd_slave_destroy(struct scsi_device *sdev) { struct ufs_hba *hba; + unsigned long flags; hba = shost_priv(sdev->host); @@ -4968,11 +5136,29 @@ static void ufshcd_slave_destroy(struct scsi_device *sdev) /* Drop the reference as it won't be needed anymore */ if (ufshcd_scsi_to_upiu_lun(sdev->lun) == UFS_UPIU_UFS_DEVICE_WLUN) { - unsigned long flags; - spin_lock_irqsave(hba->host->host_lock, flags); hba->sdev_ufs_device = NULL; spin_unlock_irqrestore(hba->host->host_lock, flags); + } else if (hba->sdev_ufs_device) { + struct device *supplier = NULL; + + /* Ensure UFS Device WLUN exists and does not disappear */ + spin_lock_irqsave(hba->host->host_lock, flags); + if (hba->sdev_ufs_device) { + supplier = &hba->sdev_ufs_device->sdev_gendev; + get_device(supplier); + } + spin_unlock_irqrestore(hba->host->host_lock, flags); + + if (supplier) { + /* + * If a LUN fails to probe (e.g. absent BOOT WLUN), the + * device will not have been registered but can still + * have a device link holding a reference to the device. + */ + device_link_remove(&sdev->sdev_gendev, supplier); + put_device(supplier); + } } } @@ -4993,9 +5179,7 @@ ufshcd_scsi_cmd_status(struct ufshcd_lrb *lrbp, int scsi_status) ufshcd_copy_sense_data(lrbp); fallthrough; case SAM_STAT_GOOD: - result |= DID_OK << 16 | - COMMAND_COMPLETE << 8 | - scsi_status; + result |= DID_OK << 16 | scsi_status; break; case SAM_STAT_TASK_SET_FULL: case SAM_STAT_BUSY: @@ -5023,7 +5207,7 @@ ufshcd_transfer_rsp_status(struct ufs_hba *hba, struct ufshcd_lrb *lrbp) { int result = 0; int scsi_status; - int ocs; + enum utp_ocs ocs; /* overall command status of utrd */ ocs = ufshcd_get_tr_ocs(lrbp); @@ -5067,15 +5251,9 @@ ufshcd_transfer_rsp_status(struct ufs_hba *hba, struct ufshcd_lrb *lrbp) */ if (!hba->pm_op_in_progress && !ufshcd_eh_in_progress(hba) && - ufshcd_is_exception_event(lrbp->ucd_rsp_ptr) && - schedule_work(&hba->eeh_work)) { - /* - * Prevent suspend once eeh_work is scheduled - * to avoid deadlock between ufshcd_suspend - * and exception event handler. - */ - pm_runtime_get_noresume(hba->dev); - } + ufshcd_is_exception_event(lrbp->ucd_rsp_ptr)) + /* Flushed in suspend */ + schedule_work(&hba->eeh_work); if (scsi_status == SAM_STAT_GOOD) ufshpb_rsp_upiu(hba, lrbp); @@ -5179,28 +5357,37 @@ static irqreturn_t ufshcd_uic_cmd_compl(struct ufs_hba *hba, u32 intr_status) if (retval == IRQ_HANDLED) ufshcd_add_uic_command_trace(hba, hba->active_uic_cmd, - "complete"); + UFS_CMD_COMP); spin_unlock(hba->host->host_lock); return retval; } +/* Release the resources allocated for processing a SCSI command. */ +static void ufshcd_release_scsi_cmd(struct ufs_hba *hba, + struct ufshcd_lrb *lrbp) +{ + struct scsi_cmnd *cmd = lrbp->cmd; + + scsi_dma_unmap(cmd); + ufshcd_crypto_clear_prdt(hba, lrbp); + lrbp->cmd = NULL; /* Mark the command as completed. */ + ufshcd_release(hba); + ufshcd_clk_scaling_update_busy(hba); +} + /** * __ufshcd_transfer_req_compl - handle SCSI and query command completion * @hba: per adapter instance - * @completed_reqs: requests to complete + * @completed_reqs: bitmask that indicates which requests to complete */ static void __ufshcd_transfer_req_compl(struct ufs_hba *hba, unsigned long completed_reqs) { struct ufshcd_lrb *lrbp; struct scsi_cmnd *cmd; - int result; int index; - bool update_scaling = false; for_each_set_bit(index, &completed_reqs, hba->nutrs) { - if (!test_and_clear_bit(index, &hba->outstanding_reqs)) - continue; lrbp = &hba->lrb[index]; lrbp->compl_time_stamp = ktime_get(); cmd = lrbp->cmd; @@ -5208,45 +5395,59 @@ static void __ufshcd_transfer_req_compl(struct ufs_hba *hba, if (unlikely(ufshcd_should_inform_monitor(hba, lrbp))) ufshcd_update_monitor(hba, lrbp); trace_android_vh_ufs_compl_command(hba, lrbp); - ufshcd_add_command_trace(hba, index, "complete"); - result = ufshcd_transfer_rsp_status(hba, lrbp); - scsi_dma_unmap(cmd); - cmd->result = result; - ufshcd_crypto_clear_prdt(hba, lrbp); - /* Mark completed command as NULL in LRB */ - lrbp->cmd = NULL; + ufshcd_add_command_trace(hba, index, UFS_CMD_COMP); + cmd->result = ufshcd_transfer_rsp_status(hba, lrbp); + ufshcd_release_scsi_cmd(hba, lrbp); /* Do not touch lrbp after scsi done */ - cmd->scsi_done(cmd); - ufshcd_release(hba); - update_scaling = true; + scsi_done(cmd); } else if (lrbp->command_type == UTP_CMD_TYPE_DEV_MANAGE || lrbp->command_type == UTP_CMD_TYPE_UFS_STORAGE) { if (hba->dev_cmd.complete) { trace_android_vh_ufs_compl_command(hba, lrbp); ufshcd_add_command_trace(hba, index, - "dev_complete"); + UFS_DEV_COMP); complete(hba->dev_cmd.complete); - update_scaling = true; + ufshcd_clk_scaling_update_busy(hba); } } - if (update_scaling) - ufshcd_clk_scaling_update_busy(hba); } } +/* + * Returns > 0 if one or more commands have been completed or 0 if no + * requests have been completed. + */ +static int ufshcd_poll(struct Scsi_Host *shost, unsigned int queue_num) +{ + struct ufs_hba *hba = shost_priv(shost); + unsigned long completed_reqs, flags; + u32 tr_doorbell; + + spin_lock_irqsave(&hba->outstanding_lock, flags); + tr_doorbell = ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL); + completed_reqs = ~tr_doorbell & hba->outstanding_reqs; + WARN_ONCE(completed_reqs & ~hba->outstanding_reqs, + "completed: %#lx; outstanding: %#lx\n", completed_reqs, + hba->outstanding_reqs); + hba->outstanding_reqs &= ~completed_reqs; + spin_unlock_irqrestore(&hba->outstanding_lock, flags); + + if (completed_reqs) + __ufshcd_transfer_req_compl(hba, completed_reqs); + + return completed_reqs; +} + /** - * ufshcd_trc_handler - handle transfer requests completion + * ufshcd_transfer_req_compl - handle SCSI and query command completion * @hba: per adapter instance - * @use_utrlcnr: get completed requests from UTRLCNR * * Returns * IRQ_HANDLED - If interrupt is valid * IRQ_NONE - If invalid interrupt */ -static irqreturn_t ufshcd_trc_handler(struct ufs_hba *hba, bool use_utrlcnr) +static irqreturn_t ufshcd_transfer_req_compl(struct ufs_hba *hba) { - unsigned long completed_reqs = 0; - /* Resetting interrupt aggregation counters first and reading the * DOOR_BELL afterward allows us to handle all the completed requests. * In order to prevent other interrupts starvation the DB is read once @@ -5258,31 +5459,56 @@ static irqreturn_t ufshcd_trc_handler(struct ufs_hba *hba, bool use_utrlcnr) !(hba->quirks & UFSHCI_QUIRK_SKIP_RESET_INTR_AGGR)) ufshcd_reset_intr_aggr(hba); - if (use_utrlcnr) { - u32 utrlcnr; + if (ufs_fail_completion()) + return IRQ_HANDLED; - utrlcnr = ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_LIST_COMPL); - if (utrlcnr) { - ufshcd_writel(hba, utrlcnr, - REG_UTP_TRANSFER_REQ_LIST_COMPL); - completed_reqs = utrlcnr; - } - } else { - unsigned long flags; - u32 tr_doorbell; + /* + * Ignore the ufshcd_poll() return value and return IRQ_HANDLED since we + * do not want polling to trigger spurious interrupt complaints. + */ + ufshcd_poll(hba->host, 0); - spin_lock_irqsave(hba->host->host_lock, flags); - tr_doorbell = ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL); - completed_reqs = tr_doorbell ^ hba->outstanding_reqs; - spin_unlock_irqrestore(hba->host->host_lock, flags); - } + return IRQ_HANDLED; +} - if (completed_reqs) { - __ufshcd_transfer_req_compl(hba, completed_reqs); - return IRQ_HANDLED; - } else { - return IRQ_NONE; +int __ufshcd_write_ee_control(struct ufs_hba *hba, u32 ee_ctrl_mask) +{ + return ufshcd_query_attr_retry(hba, UPIU_QUERY_OPCODE_WRITE_ATTR, + QUERY_ATTR_IDN_EE_CONTROL, 0, 0, + &ee_ctrl_mask); +} + +int ufshcd_write_ee_control(struct ufs_hba *hba) +{ + int err; + + mutex_lock(&hba->ee_ctrl_mutex); + err = __ufshcd_write_ee_control(hba, hba->ee_ctrl_mask); + mutex_unlock(&hba->ee_ctrl_mutex); + if (err) + dev_err(hba->dev, "%s: failed to write ee control %d\n", + __func__, err); + return err; +} + +int ufshcd_update_ee_control(struct ufs_hba *hba, u16 *mask, u16 *other_mask, + u16 set, u16 clr) +{ + u16 new_mask, ee_ctrl_mask; + int err = 0; + + mutex_lock(&hba->ee_ctrl_mutex); + new_mask = (*mask & ~clr) | set; + ee_ctrl_mask = new_mask | *other_mask; + if (ee_ctrl_mask != hba->ee_ctrl_mask) + err = __ufshcd_write_ee_control(hba, ee_ctrl_mask); + /* Still need to update 'mask' even if 'ee_ctrl_mask' was unchanged */ + if (!err) { + hba->ee_ctrl_mask = ee_ctrl_mask; + *mask = new_mask; } + mutex_unlock(&hba->ee_ctrl_mutex); + return err; } /** @@ -5295,22 +5521,9 @@ static irqreturn_t ufshcd_trc_handler(struct ufs_hba *hba, bool use_utrlcnr) * * Returns zero on success, non-zero error value on failure. */ -static int ufshcd_disable_ee(struct ufs_hba *hba, u16 mask) +static inline int ufshcd_disable_ee(struct ufs_hba *hba, u16 mask) { - int err = 0; - u32 val; - - if (!(hba->ee_ctrl_mask & mask)) - goto out; - - val = hba->ee_ctrl_mask & ~mask; - val &= MASK_EE_STATUS; - err = ufshcd_query_attr_retry(hba, UPIU_QUERY_OPCODE_WRITE_ATTR, - QUERY_ATTR_IDN_EE_CONTROL, 0, 0, &val); - if (!err) - hba->ee_ctrl_mask &= ~mask; -out: - return err; + return ufshcd_update_ee_drv_mask(hba, 0, mask); } /** @@ -5323,22 +5536,9 @@ out: * * Returns zero on success, non-zero error value on failure. */ -static int ufshcd_enable_ee(struct ufs_hba *hba, u16 mask) +static inline int ufshcd_enable_ee(struct ufs_hba *hba, u16 mask) { - int err = 0; - u32 val; - - if (hba->ee_ctrl_mask & mask) - goto out; - - val = hba->ee_ctrl_mask | mask; - val &= MASK_EE_STATUS; - err = ufshcd_query_attr_retry(hba, UPIU_QUERY_OPCODE_WRITE_ATTR, - QUERY_ATTR_IDN_EE_CONTROL, 0, 0, &val); - if (!err) - hba->ee_ctrl_mask |= mask; -out: - return err; + return ufshcd_update_ee_drv_mask(hba, mask, 0); } /** @@ -5556,106 +5756,92 @@ out: __func__, err); } -static int ufshcd_wb_ctrl(struct ufs_hba *hba, bool enable) +static void ufshcd_temp_exception_event_handler(struct ufs_hba *hba, u16 status) { - int ret; - u8 index; - enum query_opcode opcode; - - if (!ufshcd_is_wb_allowed(hba)) - return 0; + u32 value; - if (!(enable ^ hba->wb_enabled)) - return 0; - if (enable) - opcode = UPIU_QUERY_OPCODE_SET_FLAG; - else - opcode = UPIU_QUERY_OPCODE_CLEAR_FLAG; + if (ufshcd_query_attr_retry(hba, UPIU_QUERY_OPCODE_READ_ATTR, + QUERY_ATTR_IDN_CASE_ROUGH_TEMP, 0, 0, &value)) + return; - index = ufshcd_wb_get_query_index(hba); - ret = ufshcd_query_flag_retry(hba, opcode, - QUERY_FLAG_IDN_WB_EN, index, NULL); - if (ret) { - dev_err(hba->dev, "%s write booster %s failed %d\n", - __func__, enable ? "enable" : "disable", ret); - return ret; - } + dev_info(hba->dev, "exception Tcase %d\n", value - 80); - hba->wb_enabled = enable; - dev_dbg(hba->dev, "%s write booster %s %d\n", - __func__, enable ? "enable" : "disable", ret); + ufs_hwmon_notify_event(hba, status & MASK_EE_URGENT_TEMP); - return ret; + /* + * A placeholder for the platform vendors to add whatever additional + * steps required + */ } -static int ufshcd_wb_toggle_flush_during_h8(struct ufs_hba *hba, bool set) +static int __ufshcd_wb_toggle(struct ufs_hba *hba, bool set, enum flag_idn idn) { - int val; u8 index; - - if (set) - val = UPIU_QUERY_OPCODE_SET_FLAG; - else - val = UPIU_QUERY_OPCODE_CLEAR_FLAG; + enum query_opcode opcode = set ? UPIU_QUERY_OPCODE_SET_FLAG : + UPIU_QUERY_OPCODE_CLEAR_FLAG; index = ufshcd_wb_get_query_index(hba); - return ufshcd_query_flag_retry(hba, val, - QUERY_FLAG_IDN_WB_BUFF_FLUSH_DURING_HIBERN8, - index, NULL); -} - -static inline void ufshcd_wb_toggle_flush(struct ufs_hba *hba, bool enable) -{ - if (enable) - ufshcd_wb_buf_flush_enable(hba); - else - ufshcd_wb_buf_flush_disable(hba); - + return ufshcd_query_flag_retry(hba, opcode, idn, index, NULL); } -static int ufshcd_wb_buf_flush_enable(struct ufs_hba *hba) +int ufshcd_wb_toggle(struct ufs_hba *hba, bool enable) { int ret; - u8 index; - if (!ufshcd_is_wb_allowed(hba) || hba->wb_buf_flush_enabled) + if (!ufshcd_is_wb_allowed(hba)) return 0; - index = ufshcd_wb_get_query_index(hba); - ret = ufshcd_query_flag_retry(hba, UPIU_QUERY_OPCODE_SET_FLAG, - QUERY_FLAG_IDN_WB_BUFF_FLUSH_EN, - index, NULL); - if (ret) - dev_err(hba->dev, "%s WB - buf flush enable failed %d\n", - __func__, ret); - else - hba->wb_buf_flush_enabled = true; + if (!(enable ^ hba->dev_info.wb_enabled)) + return 0; + + ret = __ufshcd_wb_toggle(hba, enable, QUERY_FLAG_IDN_WB_EN); + if (ret) { + dev_err(hba->dev, "%s Write Booster %s failed %d\n", + __func__, enable ? "enable" : "disable", ret); + return ret; + } + + hba->dev_info.wb_enabled = enable; + dev_info(hba->dev, "%s Write Booster %s\n", + __func__, enable ? "enabled" : "disabled"); - dev_dbg(hba->dev, "WB - Flush enabled: %d\n", ret); return ret; } -static int ufshcd_wb_buf_flush_disable(struct ufs_hba *hba) +static void ufshcd_wb_toggle_flush_during_h8(struct ufs_hba *hba, bool set) { int ret; - u8 index; - if (!ufshcd_is_wb_allowed(hba) || !hba->wb_buf_flush_enabled) - return 0; + ret = __ufshcd_wb_toggle(hba, set, + QUERY_FLAG_IDN_WB_BUFF_FLUSH_DURING_HIBERN8); + if (ret) { + dev_err(hba->dev, "%s: WB-Buf Flush during H8 %s failed: %d\n", + __func__, set ? "enable" : "disable", ret); + return; + } + dev_dbg(hba->dev, "%s WB-Buf Flush during H8 %s\n", + __func__, set ? "enabled" : "disabled"); +} - index = ufshcd_wb_get_query_index(hba); - ret = ufshcd_query_flag_retry(hba, UPIU_QUERY_OPCODE_CLEAR_FLAG, - QUERY_FLAG_IDN_WB_BUFF_FLUSH_EN, - index, NULL); +static inline void ufshcd_wb_toggle_flush(struct ufs_hba *hba, bool enable) +{ + int ret; + + if (!ufshcd_is_wb_allowed(hba) || + hba->dev_info.wb_buf_flush_enabled == enable) + return; + + ret = __ufshcd_wb_toggle(hba, enable, QUERY_FLAG_IDN_WB_BUFF_FLUSH_EN); if (ret) { - dev_warn(hba->dev, "%s: WB - buf flush disable failed %d\n", - __func__, ret); - } else { - hba->wb_buf_flush_enabled = false; - dev_dbg(hba->dev, "WB - Flush disabled: %d\n", ret); + dev_err(hba->dev, "%s WB-Buf Flush %s failed %d\n", __func__, + enable ? "enable" : "disable", ret); + return; } - return ret; + hba->dev_info.wb_buf_flush_enabled = enable; + + dev_dbg(hba->dev, "%s WB-Buf Flush %s\n", + __func__, enable ? "enabled" : "disabled"); } static bool ufshcd_wb_presrv_usrspc_keep_vcc_on(struct ufs_hba *hba, @@ -5687,6 +5873,47 @@ static bool ufshcd_wb_presrv_usrspc_keep_vcc_on(struct ufs_hba *hba, return false; } +static void ufshcd_wb_force_disable(struct ufs_hba *hba) +{ + if (!(hba->quirks & UFSHCI_QUIRK_SKIP_MANUAL_WB_FLUSH_CTRL)) + ufshcd_wb_toggle_flush(hba, false); + + ufshcd_wb_toggle_flush_during_h8(hba, false); + ufshcd_wb_toggle(hba, false); + hba->caps &= ~UFSHCD_CAP_WB_EN; + + dev_info(hba->dev, "%s: WB force disabled\n", __func__); +} + +static bool ufshcd_is_wb_buf_lifetime_available(struct ufs_hba *hba) +{ + u32 lifetime; + int ret; + u8 index; + + index = ufshcd_wb_get_query_index(hba); + ret = ufshcd_query_attr_retry(hba, UPIU_QUERY_OPCODE_READ_ATTR, + QUERY_ATTR_IDN_WB_BUFF_LIFE_TIME_EST, + index, 0, &lifetime); + if (ret) { + dev_err(hba->dev, + "%s: bWriteBoosterBufferLifeTimeEst read failed %d\n", + __func__, ret); + return false; + } + + if (lifetime == UFS_WB_EXCEED_LIFETIME) { + dev_err(hba->dev, "%s: WB buf lifetime is exhausted 0x%02X\n", + __func__, lifetime); + return false; + } + + dev_dbg(hba->dev, "%s: WB buf lifetime is 0x%02X\n", + __func__, lifetime); + + return true; +} + static bool ufshcd_wb_need_flush(struct ufs_hba *hba) { int ret; @@ -5695,6 +5922,12 @@ static bool ufshcd_wb_need_flush(struct ufs_hba *hba) if (!ufshcd_is_wb_allowed(hba)) return false; + + if (!ufshcd_is_wb_buf_lifetime_available(hba)) { + ufshcd_wb_force_disable(hba); + return false; + } + /* * The ufs device needs the vcc to be ON to flush. * With user-space reduction enabled, it's enough to enable flush @@ -5736,8 +5969,8 @@ static void ufshcd_rpm_dev_flush_recheck_work(struct work_struct *work) * after a certain delay to recheck the threshold by next runtime * suspend. */ - pm_runtime_get_sync(hba->dev); - pm_runtime_put_sync(hba->dev); + ufshcd_rpm_get_sync(hba); + ufshcd_rpm_put_sync(hba); } /** @@ -5754,7 +5987,6 @@ static void ufshcd_exception_event_handler(struct work_struct *work) u32 status = 0; hba = container_of(work, struct ufs_hba, eeh_work); - pm_runtime_get_sync(hba->dev); ufshcd_scsi_block_requests(hba); err = ufshcd_get_ee_status(hba, &status); if (err) { @@ -5763,28 +5995,23 @@ static void ufshcd_exception_event_handler(struct work_struct *work) goto out; } - status &= hba->ee_ctrl_mask; + trace_ufshcd_exception_event(dev_name(hba->dev), status); - if (status & MASK_EE_URGENT_BKOPS) + if (status & hba->ee_drv_mask & MASK_EE_URGENT_BKOPS) ufshcd_bkops_exception_event_handler(hba); + if (status & hba->ee_drv_mask & MASK_EE_URGENT_TEMP) + ufshcd_temp_exception_event_handler(hba, status); + + ufs_debugfs_exception_event(hba, status); out: ufshcd_scsi_unblock_requests(hba); - /* - * pm_runtime_get_noresume is called while scheduling - * eeh_work to avoid suspend racing with exception work. - * Hence decrement usage counter using pm_runtime_put_noidle - * to allow suspend on completion of exception event handler. - */ - pm_runtime_put_noidle(hba->dev); - pm_runtime_put(hba->dev); - return; } /* Complete requests that have door-bell cleared */ static void ufshcd_complete_requests(struct ufs_hba *hba) { - ufshcd_trc_handler(hba, false); + ufshcd_transfer_req_compl(hba); ufshcd_tmc_handler(hba); } @@ -5866,9 +6093,10 @@ static inline bool ufshcd_is_saved_err_fatal(struct ufs_hba *hba) (hba->saved_err & (INT_FATAL_ERRORS | UFSHCD_UIC_HIBERN8_MASK)); } -/* host lock must be held before calling this func */ -static inline void ufshcd_schedule_eh_work(struct ufs_hba *hba) +void ufshcd_schedule_eh_work(struct ufs_hba *hba) { + lockdep_assert_held(hba->host->host_lock); + /* handle fatal errors only when link is not in error state */ if (hba->ufshcd_state != UFSHCD_STATE_ERROR) { if (hba->force_reset || ufshcd_is_link_broken(hba) || @@ -5902,12 +6130,13 @@ static void ufshcd_clk_scaling_suspend(struct ufs_hba *hba, bool suspend) static void ufshcd_err_handling_prepare(struct ufs_hba *hba) { - pm_runtime_get_sync(hba->dev); - if (pm_runtime_status_suspended(hba->dev) || hba->is_sys_suspended) { + ufshcd_rpm_get_sync(hba); + if (pm_runtime_status_suspended(&hba->sdev_ufs_device->sdev_gendev) || + hba->is_sys_suspended) { enum ufs_pm_op pm_op; /* - * Don't assume anything of pm_runtime_get_sync(), if + * Don't assume anything of resume, if * resume fails, irq and clocks can be OFF, and powers * can be OFF or in LPM. */ @@ -5931,8 +6160,7 @@ static void ufshcd_err_handling_prepare(struct ufs_hba *hba) } ufshcd_scsi_block_requests(hba); /* Drain ufshcd_queuecommand() */ - down_write(&hba->clk_scaling_lock); - up_write(&hba->clk_scaling_lock); + synchronize_rcu(); cancel_work_sync(&hba->eeh_work); } @@ -5942,13 +6170,13 @@ static void ufshcd_err_handling_unprepare(struct ufs_hba *hba) ufshcd_release(hba); if (ufshcd_is_clkscaling_supported(hba)) ufshcd_clk_scaling_suspend(hba, false); - ufshcd_clear_ua_wluns(hba); - pm_runtime_put(hba->dev); + ufshcd_rpm_put(hba); } static inline bool ufshcd_err_handling_should_stop(struct ufs_hba *hba) { return (!hba->is_powered || hba->shutting_down || + !hba->sdev_ufs_device || hba->ufshcd_state == UFSHCD_STATE_ERROR || (!(hba->saved_err || hba->saved_uic_err || hba->force_reset || ufshcd_is_link_broken(hba)))); @@ -5964,14 +6192,18 @@ static void ufshcd_recover_pm_error(struct ufs_hba *hba) hba->is_sys_suspended = false; /* - * Set RPM status of hba device to RPM_ACTIVE, + * Set RPM status of wlun device to RPM_ACTIVE, * this also clears its runtime error. */ - ret = pm_runtime_set_active(hba->dev); + ret = pm_runtime_set_active(&hba->sdev_ufs_device->sdev_gendev); + + /* hba device might have a runtime error otherwise */ + if (ret) + ret = pm_runtime_set_active(hba->dev); /* - * If hba device had runtime error, we also need to resume those - * scsi devices under hba in case any of them has failed to be - * resumed due to hba runtime resume failure. This is to unblock + * If wlun device had runtime error, we also need to resume those + * consumer scsi devices in case any of them has failed to be + * resumed due to supplier runtime resume failure. This is to unblock * blk_queue_enter in case there are bios waiting inside it. */ if (!ret) { @@ -6011,16 +6243,25 @@ static bool ufshcd_is_pwr_mode_restore_needed(struct ufs_hba *hba) */ static void ufshcd_err_handler(struct work_struct *work) { + int retries = MAX_ERR_HANDLER_RETRIES; struct ufs_hba *hba; unsigned long flags; - bool err_xfer = false; - bool err_tm = false; - int err = 0, pmc_err; + bool needs_restore; + bool needs_reset; + bool err_xfer; + bool err_tm; + int pmc_err; int tag; - bool needs_reset = false, needs_restore = false; hba = container_of(work, struct ufs_hba, eh_work); + dev_info(hba->dev, + "%s started; HBA state %s; powered %d; shutting down %d; saved_err = %d; saved_uic_err = %d; force_reset = %d%s\n", + __func__, ufshcd_state_name[hba->ufshcd_state], + hba->is_powered, hba->shutting_down, hba->saved_err, + hba->saved_uic_err, hba->force_reset, + ufshcd_is_link_broken(hba) ? "; link is broken" : ""); + down(&hba->host_sem); spin_lock_irqsave(hba->host->host_lock, flags); if (ufshcd_err_handling_should_stop(hba)) { @@ -6036,6 +6277,12 @@ static void ufshcd_err_handler(struct work_struct *work) /* Complete requests that have door-bell cleared by h/w */ ufshcd_complete_requests(hba); spin_lock_irqsave(hba->host->host_lock, flags); +again: + needs_restore = false; + needs_reset = false; + err_xfer = false; + err_tm = false; + if (hba->ufshcd_state != UFSHCD_STATE_ERROR) hba->ufshcd_state = UFSHCD_STATE_RESET; /* @@ -6109,6 +6356,8 @@ static void ufshcd_err_handler(struct work_struct *work) err_xfer = true; goto lock_skip_pending_xfer_clear; } + dev_err(hba->dev, "Aborted tag %d / CDB %#02x\n", tag, + hba->lrb[tag].cmd ? hba->lrb[tag].cmd->cmnd[0] : -1); } /* Clear pending task management requests */ @@ -6157,6 +6406,8 @@ lock_skip_pending_xfer_clear: do_reset: /* Fatal errors need reset */ if (needs_reset) { + int err; + hba->force_reset = false; spin_unlock_irqrestore(hba->host->host_lock, flags); err = ufshcd_reset_and_restore(hba); @@ -6176,10 +6427,20 @@ skip_err_handling: dev_err_ratelimited(hba->dev, "%s: exit: saved_err 0x%x saved_uic_err 0x%x", __func__, hba->saved_err, hba->saved_uic_err); } + /* Exit in an operational state or dead */ + if (hba->ufshcd_state != UFSHCD_STATE_OPERATIONAL && + hba->ufshcd_state != UFSHCD_STATE_ERROR) { + if (--retries) + goto again; + hba->ufshcd_state = UFSHCD_STATE_ERROR; + } ufshcd_clear_eh_in_progress(hba); spin_unlock_irqrestore(hba->host->host_lock, flags); ufshcd_err_handling_unprepare(hba); up(&hba->host_sem); + + dev_info(hba->dev, "%s finished; HBA state %s\n", __func__, + ufshcd_state_name[hba->ufshcd_state]); } /** @@ -6353,27 +6614,6 @@ static irqreturn_t ufshcd_check_errors(struct ufs_hba *hba, u32 intr_status) return retval; } -struct ctm_info { - struct ufs_hba *hba; - unsigned long pending; - unsigned int ncpl; -}; - -static bool ufshcd_compl_tm(struct request *req, void *priv, bool reserved) -{ - struct ctm_info *const ci = priv; - struct completion *c; - - WARN_ON_ONCE(reserved); - if (test_bit(req->tag, &ci->pending)) - return true; - ci->ncpl++; - c = req->end_io_data; - if (c) - complete(c); - return true; -} - /** * ufshcd_tmc_handler - handle task management function completion * @hba: per adapter instance @@ -6384,18 +6624,23 @@ static bool ufshcd_compl_tm(struct request *req, void *priv, bool reserved) */ static irqreturn_t ufshcd_tmc_handler(struct ufs_hba *hba) { - unsigned long flags; - struct request_queue *q = hba->tmf_queue; - struct ctm_info ci = { - .hba = hba, - }; + unsigned long flags, pending, issued; + irqreturn_t ret = IRQ_NONE; + int tag; spin_lock_irqsave(hba->host->host_lock, flags); - ci.pending = ufshcd_readl(hba, REG_UTP_TASK_REQ_DOOR_BELL); - blk_mq_tagset_busy_iter(q->tag_set, ufshcd_compl_tm, &ci); + pending = ufshcd_readl(hba, REG_UTP_TASK_REQ_DOOR_BELL); + issued = hba->outstanding_tasks & ~pending; + for_each_set_bit(tag, &issued, hba->nutmrs) { + struct request *req = hba->tmf_rqs[tag]; + struct completion *c = req->end_io_data; + + complete(c); + ret = IRQ_HANDLED; + } spin_unlock_irqrestore(hba->host->host_lock, flags); - return ci.ncpl ? IRQ_HANDLED : IRQ_NONE; + return ret; } /** @@ -6421,7 +6666,7 @@ static irqreturn_t ufshcd_sl_intr(struct ufs_hba *hba, u32 intr_status) retval |= ufshcd_tmc_handler(hba); if (intr_status & UTP_TRANSFER_REQ_COMPL) - retval |= ufshcd_trc_handler(hba, ufshcd_has_utrlcnr(hba)); + retval |= ufshcd_transfer_req_compl(hba); return retval; } @@ -6455,8 +6700,7 @@ static irqreturn_t ufshcd_intr(int irq, void *__hba) while (intr_status && retries--) { enabled_intr_status = intr_status & ufshcd_readl(hba, REG_INTERRUPT_ENABLE); - if (intr_status) - ufshcd_writel(hba, intr_status, REG_INTERRUPT_STATUS); + ufshcd_writel(hba, intr_status, REG_INTERRUPT_STATUS); if (enabled_intr_status) retval |= ufshcd_sl_intr(hba, enabled_intr_status); @@ -6464,7 +6708,8 @@ static irqreturn_t ufshcd_intr(int irq, void *__hba) } if (enabled_intr_status && retval == IRQ_NONE && - !ufshcd_eh_in_progress(hba)) { + (!(enabled_intr_status & UTP_TRANSFER_REQ_COMPL) || + hba->outstanding_reqs) && !ufshcd_eh_in_progress(hba)) { dev_err(hba->dev, "%s: Unhandled interrupt 0x%08x (0x%08x, 0x%08x)\n", __func__, intr_status, @@ -6493,6 +6738,10 @@ static int ufshcd_clear_tm_cmd(struct ufs_hba *hba, int tag) err = ufshcd_wait_for_register(hba, REG_UTP_TASK_REQ_DOOR_BELL, mask, 0, 1000, 1000); + + dev_err(hba->dev, "Clearing task management function with tag %d %s\n", + tag, err ? "succeeded" : "failed"); + out: return err; } @@ -6518,10 +6767,12 @@ static int __ufshcd_issue_tm_cmd(struct ufs_hba *hba, ufshcd_hold(hba, false); spin_lock_irqsave(host->host_lock, flags); - blk_mq_start_request(req); task_tag = req->tag; - treq->req_header.dword_0 |= cpu_to_be32(task_tag); + WARN_ONCE(task_tag < 0 || task_tag >= hba->nutmrs, "Invalid tag %d\n", + task_tag); + hba->tmf_rqs[req->tag] = req; + treq->upiu_req.req_header.dword_0 |= cpu_to_be32(task_tag); memcpy(hba->utmrdl_base_addr + task_tag, treq, sizeof(*treq)); ufshcd_vops_setup_task_mgmt(hba, task_tag, tm_function); @@ -6529,27 +6780,19 @@ static int __ufshcd_issue_tm_cmd(struct ufs_hba *hba, /* send command to the controller */ __set_bit(task_tag, &hba->outstanding_tasks); - /* Make sure descriptors are ready before ringing the task doorbell */ - wmb(); - ufshcd_writel(hba, 1 << task_tag, REG_UTP_TASK_REQ_DOOR_BELL); /* Make sure that doorbell is committed immediately */ wmb(); spin_unlock_irqrestore(host->host_lock, flags); - ufshcd_add_tm_upiu_trace(hba, task_tag, "tm_send"); + ufshcd_add_tm_upiu_trace(hba, task_tag, UFS_TM_SEND); /* wait until the task management command is completed */ err = wait_for_completion_io_timeout(&wait, msecs_to_jiffies(TM_CMD_TIMEOUT)); if (!err) { - /* - * Make sure that ufshcd_compl_tm() does not trigger a - * use-after-free. - */ - req->end_io_data = NULL; - ufshcd_add_tm_upiu_trace(hba, task_tag, "tm_complete_err"); + ufshcd_add_tm_upiu_trace(hba, task_tag, UFS_TM_ERR); dev_err(hba->dev, "%s: task management cmd 0x%.2x timed-out\n", __func__, tm_function); if (ufshcd_clear_tm_cmd(hba, task_tag)) @@ -6560,10 +6803,11 @@ static int __ufshcd_issue_tm_cmd(struct ufs_hba *hba, err = 0; memcpy(treq, hba->utmrdl_base_addr + task_tag, sizeof(*treq)); - ufshcd_add_tm_upiu_trace(hba, task_tag, "tm_complete"); + ufshcd_add_tm_upiu_trace(hba, task_tag, UFS_TM_COMP); } spin_lock_irqsave(hba->host->host_lock, flags); + hba->tmf_rqs[req->tag] = NULL; __clear_bit(task_tag, &hba->outstanding_tasks); spin_unlock_irqrestore(hba->host->host_lock, flags); @@ -6587,23 +6831,24 @@ static int ufshcd_issue_tm_cmd(struct ufs_hba *hba, int lun_id, int task_id, u8 tm_function, u8 *tm_response) { struct utp_task_req_desc treq = { { 0 }, }; - int ocs_value, err; + enum utp_ocs ocs_value; + int err; /* Configure task request descriptor */ treq.header.dword_0 = cpu_to_le32(UTP_REQ_DESC_INT_CMD); treq.header.dword_2 = cpu_to_le32(OCS_INVALID_COMMAND_STATUS); /* Configure task request UPIU */ - treq.req_header.dword_0 = cpu_to_be32(lun_id << 8) | + treq.upiu_req.req_header.dword_0 = cpu_to_be32(lun_id << 8) | cpu_to_be32(UPIU_TRANSACTION_TASK_REQ << 24); - treq.req_header.dword_1 = cpu_to_be32(tm_function << 16); + treq.upiu_req.req_header.dword_1 = cpu_to_be32(tm_function << 16); /* * The host shall provide the same value for LUN field in the basic * header and for Input Parameter. */ - treq.input_param1 = cpu_to_be32(lun_id); - treq.input_param2 = cpu_to_be32(task_id); + treq.upiu_req.input_param1 = cpu_to_be32(lun_id); + treq.upiu_req.input_param2 = cpu_to_be32(task_id); err = __ufshcd_issue_tm_cmd(hba, &treq, tm_function); if (err == -ETIMEDOUT) @@ -6614,7 +6859,7 @@ static int ufshcd_issue_tm_cmd(struct ufs_hba *hba, int lun_id, int task_id, dev_err(hba->dev, "%s: failed, ocs = 0x%x\n", __func__, ocs_value); else if (tm_response) - *tm_response = be32_to_cpu(treq.output_param1) & + *tm_response = be32_to_cpu(treq.upiu_rsp.output_param1) & MASK_TM_SERVICE_RESP; return err; } @@ -6643,25 +6888,17 @@ static int ufshcd_issue_devman_upiu_cmd(struct ufs_hba *hba, enum dev_cmd_type cmd_type, enum query_opcode desc_op) { - struct request_queue *q = hba->cmd_queue; - struct request *req; + DECLARE_COMPLETION_ONSTACK(wait); + const u32 tag = hba->reserved_slot; struct ufshcd_lrb *lrbp; int err = 0; - int tag; - struct completion wait; u8 upiu_flags; - down_read(&hba->clk_scaling_lock); + /* Protects use of hba->reserved_slot. */ + lockdep_assert_held(&hba->dev_cmd.lock); - req = blk_get_request(q, REQ_OP_DRV_OUT, 0); - if (IS_ERR(req)) { - err = PTR_ERR(req); - goto out_unlock; - } - tag = req->tag; - WARN_ON_ONCE(!ufshcd_valid_tag(hba, tag)); + down_read(&hba->clk_scaling_lock); - init_completion(&wait); lrbp = &hba->lrb[tag]; WARN_ON(lrbp->cmd); lrbp->cmd = NULL; @@ -6698,8 +6935,7 @@ static int ufshcd_issue_devman_upiu_cmd(struct ufs_hba *hba, hba->dev_cmd.complete = &wait; - /* Make sure descriptors are ready before ringing the doorbell */ - wmb(); + ufshcd_add_query_upiu_trace(hba, UFS_QUERY_SEND, lrbp->ucd_req_ptr); ufshcd_send_command(hba, tag); /* @@ -6727,9 +6963,9 @@ static int ufshcd_issue_devman_upiu_cmd(struct ufs_hba *hba, err = -EINVAL; } } + ufshcd_add_query_upiu_trace(hba, err ? UFS_QUERY_ERR : UFS_QUERY_COMP, + (struct utp_upiu_req *)lrbp->ucd_rsp_ptr); - blk_put_request(req); -out_unlock: up_read(&hba->clk_scaling_lock); return err; } @@ -6759,7 +6995,7 @@ int ufshcd_exec_raw_upiu_cmd(struct ufs_hba *hba, int err; enum dev_cmd_type cmd_type = DEV_CMD_TYPE_QUERY; struct utp_task_req_desc treq = { { 0 }, }; - int ocs_value; + enum utp_ocs ocs_value; u8 tm_f = be32_to_cpu(req_upiu->header.dword_1) >> 16 & MASK_TM_FUNC; switch (msgcode) { @@ -6780,7 +7016,7 @@ int ufshcd_exec_raw_upiu_cmd(struct ufs_hba *hba, treq.header.dword_0 = cpu_to_le32(UTP_REQ_DESC_INT_CMD); treq.header.dword_2 = cpu_to_le32(OCS_INVALID_COMMAND_STATUS); - memcpy(&treq.req_header, req_upiu, sizeof(*req_upiu)); + memcpy(&treq.upiu_req, req_upiu, sizeof(*req_upiu)); err = __ufshcd_issue_tm_cmd(hba, &treq, tm_f); if (err == -ETIMEDOUT) @@ -6793,7 +7029,7 @@ int ufshcd_exec_raw_upiu_cmd(struct ufs_hba *hba, break; } - memcpy(rsp_upiu, &treq.rsp_header, sizeof(*rsp_upiu)); + memcpy(rsp_upiu, &treq.upiu_rsp, sizeof(*rsp_upiu)); break; default: @@ -6837,7 +7073,7 @@ static int ufshcd_eh_device_reset_handler(struct scsi_cmnd *cmd) err = ufshcd_clear_cmd(hba, pos); if (err) break; - __ufshcd_transfer_req_compl(hba, pos); + __ufshcd_transfer_req_compl(hba, 1U << pos); } } @@ -6953,24 +7189,16 @@ out: */ static int ufshcd_abort(struct scsi_cmnd *cmd) { - struct Scsi_Host *host; - struct ufs_hba *hba; + struct Scsi_Host *host = cmd->device->host; + struct ufs_hba *hba = shost_priv(host); + int tag = scsi_cmd_to_rq(cmd)->tag; + struct ufshcd_lrb *lrbp = &hba->lrb[tag]; unsigned long flags; - unsigned int tag; - int err = FAILED, res; - struct ufshcd_lrb *lrbp; + int err = FAILED; + bool outstanding; u32 reg; - host = cmd->device->host; - hba = shost_priv(host); - tag = cmd->request->tag; - lrbp = &hba->lrb[tag]; - if (!ufshcd_valid_tag(hba, tag)) { - dev_err(hba->dev, - "%s: invalid command tag %d: cmd=0x%p, cmd->request=0x%p", - __func__, tag, cmd, cmd->request); - BUG(); - } + WARN_ONCE(tag < 0, "Invalid tag %d\n", tag); ufshcd_hold(hba, false); reg = ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL); @@ -7037,13 +7265,25 @@ static int ufshcd_abort(struct scsi_cmnd *cmd) goto release; } - res = ufshcd_try_to_abort_task(hba, tag); - if (res) { - dev_err(hba->dev, "%s: failed with err %d\n", __func__, res); + err = ufshcd_try_to_abort_task(hba, tag); + if (err) { + dev_err(hba->dev, "%s: failed with err %d\n", __func__, err); ufshcd_set_req_abort_skip(hba, hba->outstanding_reqs); + err = FAILED; goto release; } + /* + * Clear the corresponding bit from outstanding_reqs since the command + * has been aborted successfully. + */ + spin_lock_irqsave(&hba->outstanding_lock, flags); + outstanding = __test_and_clear_bit(tag, &hba->outstanding_reqs); + spin_unlock_irqrestore(&hba->outstanding_lock, flags); + + if (outstanding) + ufshcd_release_scsi_cmd(hba, lrbp); + err = SUCCESS; release: @@ -7066,11 +7306,11 @@ static int ufshcd_host_reset_and_restore(struct ufs_hba *hba) { int err; - ufshpb_reset_host(hba); /* * Stop the host controller and complete the requests * cleared by h/w */ + ufshpb_reset_host(hba); ufshcd_hba_stop(hba); hba->silence_err_logs = true; ufshcd_complete_requests(hba); @@ -7102,31 +7342,41 @@ static int ufshcd_host_reset_and_restore(struct ufs_hba *hba) */ static int ufshcd_reset_and_restore(struct ufs_hba *hba) { - u32 saved_err; - u32 saved_uic_err; + u32 saved_err = 0; + u32 saved_uic_err = 0; int err = 0; unsigned long flags; int retries = MAX_HOST_RESET_RETRIES; - /* - * This is a fresh start, cache and clear saved error first, - * in case new error generated during reset and restore. - */ spin_lock_irqsave(hba->host->host_lock, flags); - saved_err = hba->saved_err; - saved_uic_err = hba->saved_uic_err; - hba->saved_err = 0; - hba->saved_uic_err = 0; - spin_unlock_irqrestore(hba->host->host_lock, flags); - do { + /* + * This is a fresh start, cache and clear saved error first, + * in case new error generated during reset and restore. + */ + saved_err |= hba->saved_err; + saved_uic_err |= hba->saved_uic_err; + hba->saved_err = 0; + hba->saved_uic_err = 0; + hba->force_reset = false; + hba->ufshcd_state = UFSHCD_STATE_RESET; + spin_unlock_irqrestore(hba->host->host_lock, flags); + /* Reset the attached device */ - ufshcd_vops_device_reset(hba); + ufshcd_device_reset(hba); err = ufshcd_host_reset_and_restore(hba); + + spin_lock_irqsave(hba->host->host_lock, flags); + if (err) + continue; + /* Do not exit unless operational or dead */ + if (hba->ufshcd_state != UFSHCD_STATE_OPERATIONAL && + hba->ufshcd_state != UFSHCD_STATE_ERROR && + hba->ufshcd_state != UFSHCD_STATE_EH_SCHEDULED_NON_FATAL) + err = -EAGAIN; } while (err && --retries); - spin_lock_irqsave(hba->host->host_lock, flags); /* * Inform scsi mid-layer that we did reset and allow to handle * Unit Attention properly. @@ -7218,7 +7468,7 @@ static u32 ufshcd_get_max_icc_level(int sup_curr_uA, u32 start_scan, char *buff) } /** - * ufshcd_calc_icc_level - calculate the max ICC level + * ufshcd_find_max_sup_active_icc_level - calculate the max ICC level * In case regulators are not initialized we'll return 0 * @hba: per-adapter instance * @desc_buf: power descriptor buffer to extract ICC levels from. @@ -7233,25 +7483,31 @@ static u32 ufshcd_find_max_sup_active_icc_level(struct ufs_hba *hba, if (!hba->vreg_info.vcc || !hba->vreg_info.vccq || !hba->vreg_info.vccq2) { - dev_err(hba->dev, + /* + * Using dev_dbg to avoid messages during runtime PM to avoid + * never-ending cycles of messages written back to storage by + * user space causing runtime resume, causing more messages and + * so on. + */ + dev_dbg(hba->dev, "%s: Regulator capability was not set, actvIccLevel=%d", __func__, icc_level); goto out; } - if (hba->vreg_info.vcc && hba->vreg_info.vcc->max_uA) + if (hba->vreg_info.vcc->max_uA) icc_level = ufshcd_get_max_icc_level( hba->vreg_info.vcc->max_uA, POWER_DESC_MAX_ACTV_ICC_LVLS - 1, &desc_buf[PWR_DESC_ACTIVE_LVLS_VCC_0]); - if (hba->vreg_info.vccq && hba->vreg_info.vccq->max_uA) + if (hba->vreg_info.vccq->max_uA) icc_level = ufshcd_get_max_icc_level( hba->vreg_info.vccq->max_uA, icc_level, &desc_buf[PWR_DESC_ACTIVE_LVLS_VCCQ_0]); - if (hba->vreg_info.vccq2 && hba->vreg_info.vccq2->max_uA) + if (hba->vreg_info.vccq2->max_uA) icc_level = ufshcd_get_max_icc_level( hba->vreg_info.vccq2->max_uA, icc_level, @@ -7296,6 +7552,16 @@ out: kfree(desc_buf); } +static inline void ufshcd_blk_pm_runtime_init(struct scsi_device *sdev) +{ + scsi_autopm_get_device(sdev); + blk_pm_runtime_init(sdev->request_queue, &sdev->sdev_gendev); + if (sdev->rpm_autosuspend) + pm_runtime_set_autosuspend_delay(&sdev->sdev_gendev, + RPM_AUTOSUSPEND_DELAY_MS); + scsi_autopm_put_device(sdev); +} + /** * ufshcd_scsi_add_wlus - Adds required W-LUs * @hba: per-adapter instance @@ -7325,7 +7591,7 @@ out: static int ufshcd_scsi_add_wlus(struct ufs_hba *hba) { int ret = 0; - struct scsi_device *sdev_boot; + struct scsi_device *sdev_boot, *sdev_rpmb; hba->sdev_ufs_device = __scsi_add_device(hba->host, 0, 0, ufshcd_upiu_wlun_to_scsi_wlun(UFS_UPIU_UFS_DEVICE_WLUN), NULL); @@ -7336,20 +7602,23 @@ static int ufshcd_scsi_add_wlus(struct ufs_hba *hba) } scsi_device_put(hba->sdev_ufs_device); - hba->sdev_rpmb = __scsi_add_device(hba->host, 0, 0, + sdev_rpmb = __scsi_add_device(hba->host, 0, 0, ufshcd_upiu_wlun_to_scsi_wlun(UFS_UPIU_RPMB_WLUN), NULL); - if (IS_ERR(hba->sdev_rpmb)) { - ret = PTR_ERR(hba->sdev_rpmb); + if (IS_ERR(sdev_rpmb)) { + ret = PTR_ERR(sdev_rpmb); goto remove_sdev_ufs_device; } - scsi_device_put(hba->sdev_rpmb); + ufshcd_blk_pm_runtime_init(sdev_rpmb); + scsi_device_put(sdev_rpmb); sdev_boot = __scsi_add_device(hba->host, 0, 0, ufshcd_upiu_wlun_to_scsi_wlun(UFS_UPIU_BOOT_WLUN), NULL); - if (IS_ERR(sdev_boot)) + if (IS_ERR(sdev_boot)) { dev_err(hba->dev, "%s: BOOT WLUN not found\n", __func__); - else + } else { + ufshcd_blk_pm_runtime_init(sdev_boot); scsi_device_put(sdev_boot); + } goto out; remove_sdev_ufs_device: @@ -7363,9 +7632,11 @@ static void ufshcd_wb_probe(struct ufs_hba *hba, u8 *desc_buf) struct ufs_dev_info *dev_info = &hba->dev_info; u8 lun; u32 d_lu_wb_buf_alloc; + u32 ext_ufs_feature; if (!ufshcd_is_wb_allowed(hba)) return; + /* * Probe WB only for UFS-2.2 and UFS-3.1 (and later) devices or * UFS devices with quirk UFS_DEVICE_QUIRK_SUPPORT_EXTENDED_FEATURES @@ -7380,30 +7651,25 @@ static void ufshcd_wb_probe(struct ufs_hba *hba, u8 *desc_buf) DEVICE_DESC_PARAM_EXT_UFS_FEATURE_SUP + 4) goto wb_disabled; - dev_info->d_ext_ufs_feature_sup = - get_unaligned_be32(desc_buf + - DEVICE_DESC_PARAM_EXT_UFS_FEATURE_SUP); + ext_ufs_feature = get_unaligned_be32(desc_buf + + DEVICE_DESC_PARAM_EXT_UFS_FEATURE_SUP); - if (!(dev_info->d_ext_ufs_feature_sup & UFS_DEV_WRITE_BOOSTER_SUP)) + if (!(ext_ufs_feature & UFS_DEV_WRITE_BOOSTER_SUP)) goto wb_disabled; /* - * WB may be supported but not configured while provisioning. - * The spec says, in dedicated wb buffer mode, - * a max of 1 lun would have wb buffer configured. - * Now only shared buffer mode is supported. + * WB may be supported but not configured while provisioning. The spec + * says, in dedicated wb buffer mode, a max of 1 lun would have wb + * buffer configured. */ - dev_info->b_wb_buffer_type = - desc_buf[DEVICE_DESC_PARAM_WB_TYPE]; + dev_info->wb_buffer_type = desc_buf[DEVICE_DESC_PARAM_WB_TYPE]; dev_info->b_presrv_uspc_en = desc_buf[DEVICE_DESC_PARAM_WB_PRESRV_USRSPC_EN]; - if (dev_info->b_wb_buffer_type == WB_BUF_MODE_SHARED) { - dev_info->d_wb_alloc_units = - get_unaligned_be32(desc_buf + - DEVICE_DESC_PARAM_WB_SHARED_ALLOC_UNITS); - if (!dev_info->d_wb_alloc_units) + if (dev_info->wb_buffer_type == WB_BUF_MODE_SHARED) { + if (!get_unaligned_be32(desc_buf + + DEVICE_DESC_PARAM_WB_SHARED_ALLOC_UNITS)) goto wb_disabled; } else { for (lun = 0; lun < UFS_UPIU_MAX_WB_LUN_ID; lun++) { @@ -7422,12 +7688,39 @@ static void ufshcd_wb_probe(struct ufs_hba *hba, u8 *desc_buf) if (!d_lu_wb_buf_alloc) goto wb_disabled; } + + if (!ufshcd_is_wb_buf_lifetime_available(hba)) + goto wb_disabled; + return; wb_disabled: hba->caps &= ~UFSHCD_CAP_WB_EN; } +static void ufshcd_temp_notif_probe(struct ufs_hba *hba, u8 *desc_buf) +{ + struct ufs_dev_info *dev_info = &hba->dev_info; + u32 ext_ufs_feature; + u8 mask = 0; + + if (!(hba->caps & UFSHCD_CAP_TEMP_NOTIF) || dev_info->wspecversion < 0x300) + return; + + ext_ufs_feature = get_unaligned_be32(desc_buf + DEVICE_DESC_PARAM_EXT_UFS_FEATURE_SUP); + + if (ext_ufs_feature & UFS_DEV_LOW_TEMP_NOTIF) + mask |= MASK_EE_TOO_LOW_TEMP; + + if (ext_ufs_feature & UFS_DEV_HIGH_TEMP_NOTIF) + mask |= MASK_EE_TOO_HIGH_TEMP; + + if (mask) { + ufshcd_enable_ee(hba, mask); + ufs_hwmon_probe(hba, mask); + } +} + void ufshcd_fixup_dev_quirks(struct ufs_hba *hba, struct ufs_dev_fix *fixups) { struct ufs_dev_fix *f; @@ -7516,10 +7809,15 @@ static int ufs_get_device_desc(struct ufs_hba *hba) goto out; } + hba->luns_avail = desc_buf[DEVICE_DESC_PARAM_NUM_LU] + + desc_buf[DEVICE_DESC_PARAM_NUM_WLU]; + ufs_fixup_device_setup(hba); ufshcd_wb_probe(hba, desc_buf); + ufshcd_temp_notif_probe(hba, desc_buf); + /* * ufshcd_read_string_desc returns size of the string * reset the error value @@ -7672,7 +7970,7 @@ static int ufshcd_quirk_tune_host_pa_tactivate(struct ufs_hba *hba) peer_pa_tactivate_us = peer_pa_tactivate * gran_to_us_table[peer_granularity - 1]; - if (pa_tactivate_us > peer_pa_tactivate_us) { + if (pa_tactivate_us >= peer_pa_tactivate_us) { u32 new_peer_pa_tactivate; new_peer_pa_tactivate = pa_tactivate_us / @@ -7863,8 +8161,6 @@ static int ufshcd_add_lus(struct ufs_hba *hba) if (ret) goto out; - ufshcd_clear_ua_wluns(hba); - /* Initialize devfreq after UFS device is detected */ if (ufshcd_is_clkscaling_supported(hba)) { memcpy(&hba->clk_scaling.saved_pwr_info.info, @@ -7890,75 +8186,18 @@ out: return ret; } -static int -ufshcd_send_request_sense(struct ufs_hba *hba, struct scsi_device *sdp); - -static int ufshcd_clear_ua_wlun(struct ufs_hba *hba, u8 wlun) +/** + * ufshcd_probe_hba - probe hba to detect device and initialize it + * @hba: per-adapter instance + * @init_dev_params: whether or not to call ufshcd_device_params_init(). + * + * Execute link-startup and verify device initialization + */ +static int ufshcd_probe_hba(struct ufs_hba *hba, bool init_dev_params) { - struct scsi_device *sdp; + int ret; unsigned long flags; - int ret = 0; - - spin_lock_irqsave(hba->host->host_lock, flags); - if (wlun == UFS_UPIU_UFS_DEVICE_WLUN) - sdp = hba->sdev_ufs_device; - else if (wlun == UFS_UPIU_RPMB_WLUN) - sdp = hba->sdev_rpmb; - else - BUG(); - if (sdp) { - ret = scsi_device_get(sdp); - if (!ret && !scsi_device_online(sdp)) { - ret = -ENODEV; - scsi_device_put(sdp); - } - } else { - ret = -ENODEV; - } - spin_unlock_irqrestore(hba->host->host_lock, flags); - if (ret) - goto out_err; - - ret = ufshcd_send_request_sense(hba, sdp); - scsi_device_put(sdp); -out_err: - if (ret) - dev_err(hba->dev, "%s: UAC clear LU=%x ret = %d\n", - __func__, wlun, ret); - return ret; -} - -static int ufshcd_clear_ua_wluns(struct ufs_hba *hba) -{ - int ret = 0; - - if (!hba->wlun_dev_clr_ua) - goto out; - - ret = ufshcd_clear_ua_wlun(hba, UFS_UPIU_UFS_DEVICE_WLUN); - if (!ret) - ret = ufshcd_clear_ua_wlun(hba, UFS_UPIU_RPMB_WLUN); - if (!ret) - hba->wlun_dev_clr_ua = false; -out: - if (ret) - dev_err(hba->dev, "%s: Failed to clear UAC WLUNS ret = %d\n", - __func__, ret); - return ret; -} - -/** - * ufshcd_probe_hba - probe hba to detect device and initialize - * @hba: per-adapter instance - * @async: asynchronous execution or not - * - * Execute link-startup and verify device initialization - */ -static int ufshcd_probe_hba(struct ufs_hba *hba, bool async) -{ - int ret; - unsigned long flags; - ktime_t start = ktime_get(); + ktime_t start = ktime_get(); hba->ufshcd_state = UFSHCD_STATE_RESET; @@ -7966,7 +8205,7 @@ static int ufshcd_probe_hba(struct ufs_hba *hba, bool async) if (ret) goto out; - if (hba->quirks & UFSHCD_QUIRK_SKIP_INTERFACE_CONFIGURATION) + if (hba->quirks & UFSHCD_QUIRK_SKIP_PH_CONFIGURATION) goto out; /* Debug counters initialization */ @@ -7989,7 +8228,7 @@ static int ufshcd_probe_hba(struct ufs_hba *hba, bool async) * Initialize UFS device parameters used by driver, these * parameters are associated with UFS descriptors. */ - if (async) { + if (init_dev_params) { ret = ufshcd_device_params_init(hba); if (ret) goto out; @@ -8000,7 +8239,6 @@ static int ufshcd_probe_hba(struct ufs_hba *hba, bool async) /* UFS device is also active now */ ufshcd_set_ufs_dev_active(hba); ufshcd_force_reset_auto_bkops(hba); - hba->wlun_dev_clr_ua = true; /* Gear up to HS gear if supported */ if (hba->max_pwr_info.is_valid) { @@ -8028,10 +8266,14 @@ static int ufshcd_probe_hba(struct ufs_hba *hba, bool async) ufshcd_set_active_icc_lvl(hba); ufshcd_wb_config(hba); + if (hba->ee_usr_mask) + ufshcd_write_ee_control(hba); /* Enable Auto-Hibernate if configured */ ufshcd_auto_hibern8_enable(hba); ufshpb_reset(hba); + + trace_android_rvh_ufs_complete_init(hba); out: spin_lock_irqsave(hba->host->host_lock, flags); if (ret) @@ -8100,7 +8342,9 @@ static struct scsi_host_template ufshcd_driver_template = { .module = THIS_MODULE, .name = UFSHCD, .proc_name = UFSHCD, + .map_queues = ufshcd_map_queues, .queuecommand = ufshcd_queuecommand, + .mq_poll = ufshcd_poll, .slave_alloc = ufshcd_slave_alloc, .slave_configure = ufshcd_slave_configure, .slave_destroy = ufshcd_slave_destroy, @@ -8433,16 +8677,10 @@ static int ufshcd_variant_hba_init(struct ufs_hba *hba) goto out; err = ufshcd_vops_init(hba); - if (err) - goto out; - - err = ufshcd_vops_setup_regulators(hba, true); - if (err) - ufshcd_vops_exit(hba); -out: if (err) dev_err(hba->dev, "%s: variant %s init failed err %d\n", __func__, ufshcd_get_var_name(hba), err); +out: return err; } @@ -8451,8 +8689,6 @@ static void ufshcd_variant_hba_exit(struct ufs_hba *hba) if (!hba->vops) return; - ufshcd_vops_setup_regulators(hba, false); - ufshcd_vops_exit(hba); } @@ -8527,35 +8763,6 @@ static void ufshcd_hba_exit(struct ufs_hba *hba) } } -static int -ufshcd_send_request_sense(struct ufs_hba *hba, struct scsi_device *sdp) -{ - unsigned char cmd[6] = {REQUEST_SENSE, - 0, - 0, - 0, - UFS_SENSE_SIZE, - 0}; - char *buffer; - int ret; - - buffer = kzalloc(UFS_SENSE_SIZE, GFP_KERNEL); - if (!buffer) { - ret = -ENOMEM; - goto out; - } - - ret = scsi_execute(sdp, cmd, DMA_FROM_DEVICE, buffer, - UFS_SENSE_SIZE, NULL, NULL, - msecs_to_jiffies(1000), 3, 0, RQF_PM, NULL); - if (ret) - pr_err("%s: failed with err %d\n", __func__, ret); - - kfree(buffer); -out: - return ret; -} - /** * ufshcd_set_dev_pwr_mode - sends START STOP UNIT command to set device * power mode @@ -8563,7 +8770,7 @@ out: * @pwr_mode: device power mode to set * * Returns 0 if requested power mode is set successfully - * Returns non-zero if failed to set the requested power mode + * Returns < 0 if failed to set the requested power mode */ static int ufshcd_set_dev_pwr_mode(struct ufs_hba *hba, enum ufs_dev_pwr_mode pwr_mode) @@ -8572,7 +8779,7 @@ static int ufshcd_set_dev_pwr_mode(struct ufs_hba *hba, struct scsi_sense_hdr sshdr; struct scsi_device *sdp; unsigned long flags; - int ret; + int ret, retries; spin_lock_irqsave(hba->host->host_lock, flags); sdp = hba->sdev_ufs_device; @@ -8597,7 +8804,6 @@ static int ufshcd_set_dev_pwr_mode(struct ufs_hba *hba, * handling context. */ hba->host->eh_noresume = 1; - ufshcd_clear_ua_wluns(hba); cmd[4] = pwr_mode << 4; @@ -8606,14 +8812,23 @@ static int ufshcd_set_dev_pwr_mode(struct ufs_hba *hba, * callbacks hence set the RQF_PM flag so that it doesn't resume the * already suspended childs. */ - ret = scsi_execute(sdp, cmd, DMA_NONE, NULL, 0, NULL, &sshdr, - START_STOP_TIMEOUT, 0, 0, RQF_PM, NULL); + for (retries = 3; retries > 0; --retries) { + ret = scsi_execute(sdp, cmd, DMA_NONE, NULL, 0, NULL, &sshdr, + START_STOP_TIMEOUT, 0, 0, RQF_PM, NULL); + if (!scsi_status_is_check_condition(ret) || + !scsi_sense_valid(&sshdr) || + sshdr.sense_key != UNIT_ATTENTION) + break; + } if (ret) { sdev_printk(KERN_WARNING, sdp, "START_STOP failed for power mode: %d, result %x\n", pwr_mode, ret); - if (driver_byte(ret) == DRIVER_SENSE) - scsi_print_sense_hdr(sdp, NULL, &sshdr); + if (ret > 0) { + if (scsi_sense_valid(&sshdr)) + scsi_print_sense_hdr(sdp, NULL, &sshdr); + ret = -EIO; + } } if (!ret) @@ -8645,7 +8860,8 @@ static int ufshcd_link_state_transition(struct ufs_hba *hba, } /* * If autobkops is enabled, link can't be turned off because - * turning off the link would also turn off the device. + * turning off the link would also turn off the device, except in the + * case of DeepSleep where the device is expected to remain powered. */ else if ((req_link_state == UIC_LINK_OFF_STATE) && (!check_for_bkops || !hba->auto_bkops_enabled)) { @@ -8655,6 +8871,9 @@ static int ufshcd_link_state_transition(struct ufs_hba *hba, * put the link in low power mode is to send the DME end point * to device and then send the DME reset command to local * unipro. But putting the link in hibern8 is much faster. + * + * Note also that putting the link in Hibern8 is a requirement + * for entering DeepSleep. */ ret = ufshcd_uic_hibern8_enter(hba); if (ret) { @@ -8728,6 +8947,7 @@ static void ufshcd_vreg_set_lpm(struct ufs_hba *hba) usleep_range(5000, 5100); } +#ifdef CONFIG_PM static int ufshcd_vreg_set_hpm(struct ufs_hba *hba) { int ret = 0; @@ -8755,6 +8975,7 @@ vcc_disable: out: return ret; } +#endif /* CONFIG_PM */ static void ufshcd_hba_vreg_set_lpm(struct ufs_hba *hba) { @@ -8768,32 +8989,17 @@ static void ufshcd_hba_vreg_set_hpm(struct ufs_hba *hba) ufshcd_setup_hba_vreg(hba, true); } -/** - * ufshcd_suspend - helper function for suspend operations - * @hba: per adapter instance - * @pm_op: desired low power operation type - * - * This function will try to put the UFS device and link into low power - * mode based on the "rpm_lvl" (Runtime PM level) or "spm_lvl" - * (System PM level). - * - * If this function is called during shutdown, it will make sure that - * both UFS device and UFS link is powered off. - * - * NOTE: UFS device & link must be active before we enter in this function. - * - * Returns 0 for success and non-zero for failure - */ -static int ufshcd_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op) +static int __ufshcd_wl_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op) { int ret = 0; + int check_for_bkops; enum ufs_pm_level pm_lvl; enum ufs_dev_pwr_mode req_dev_pwr_mode; enum uic_link_state req_link_state; - hba->pm_op_in_progress = 1; - if (!ufshcd_is_shutdown_pm(pm_op)) { - pm_lvl = ufshcd_is_runtime_pm(pm_op) ? + hba->pm_op_in_progress = true; + if (pm_op != UFS_SHUTDOWN_PM) { + pm_lvl = pm_op == UFS_RUNTIME_PM ? hba->rpm_lvl : hba->spm_lvl; req_dev_pwr_mode = ufs_get_pm_lvl_to_dev_pwr_mode(pm_lvl); req_link_state = ufs_get_pm_lvl_to_link_pwr_state(pm_lvl); @@ -8816,20 +9022,20 @@ static int ufshcd_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op) if (req_dev_pwr_mode == UFS_ACTIVE_PWR_MODE && req_link_state == UIC_LINK_ACTIVE_STATE) { - goto disable_clks; + goto vops_suspend; } if ((req_dev_pwr_mode == hba->curr_dev_pwr_mode) && (req_link_state == hba->uic_link_state)) - goto enable_gating; + goto enable_scaling; /* UFS device & link must be active before we enter in this function */ if (!ufshcd_is_ufs_dev_active(hba) || !ufshcd_is_link_active(hba)) { ret = -EINVAL; - goto enable_gating; + goto enable_scaling; } - if (ufshcd_is_runtime_pm(pm_op)) { + if (pm_op == UFS_RUNTIME_PM) { if (ufshcd_can_autobkops_during_suspend(hba)) { /* * The device is idle with no requests in the queue, @@ -8838,7 +9044,7 @@ static int ufshcd_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op) */ ret = ufshcd_urgent_bkops(hba); if (ret) - goto enable_gating; + goto enable_scaling; } else { /* make sure that auto bkops is disabled */ ufshcd_disable_auto_bkops(hba); @@ -8858,114 +9064,92 @@ static int ufshcd_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op) flush_work(&hba->eeh_work); + ret = ufshcd_vops_suspend(hba, pm_op, PRE_CHANGE); + if (ret) + goto enable_scaling; + if (req_dev_pwr_mode != hba->curr_dev_pwr_mode) { - if ((ufshcd_is_runtime_pm(pm_op) && !hba->auto_bkops_enabled) || - !ufshcd_is_runtime_pm(pm_op)) { + if (pm_op != UFS_RUNTIME_PM) /* ensure that bkops is disabled */ ufshcd_disable_auto_bkops(hba); - } if (!hba->dev_info.b_rpm_dev_flush_capable) { ret = ufshcd_set_dev_pwr_mode(hba, req_dev_pwr_mode); if (ret) - goto enable_gating; + goto enable_scaling; } } - ret = ufshcd_link_state_transition(hba, req_link_state, 1); + /* + * In the case of DeepSleep, the device is expected to remain powered + * with the link off, so do not check for bkops. + */ + check_for_bkops = !ufshcd_is_ufs_dev_deepsleep(hba); + ret = ufshcd_link_state_transition(hba, req_link_state, check_for_bkops); if (ret) goto set_dev_active; -disable_clks: +vops_suspend: /* * Call vendor specific suspend callback. As these callbacks may access * vendor specific host controller register space call them before the * host clocks are ON. */ - ret = ufshcd_vops_suspend(hba, pm_op); + ret = ufshcd_vops_suspend(hba, pm_op, POST_CHANGE); if (ret) goto set_link_active; - /* - * Disable the host irq as host controller as there won't be any - * host controller transaction expected till resume. - */ - ufshcd_disable_irq(hba); - - ufshcd_setup_clocks(hba, false); - - if (ufshcd_is_clkgating_allowed(hba)) { - hba->clk_gating.state = CLKS_OFF; - trace_ufshcd_clk_gating(dev_name(hba->dev), - hba->clk_gating.state); - } - - ufshcd_vreg_set_lpm(hba); - - /* Put the host controller in low power mode if possible */ - ufshcd_hba_vreg_set_lpm(hba); goto out; set_link_active: - ufshcd_vreg_set_hpm(hba); + /* + * Device hardware reset is required to exit DeepSleep. Also, for + * DeepSleep, the link is off so host reset and restore will be done + * further below. + */ + if (ufshcd_is_ufs_dev_deepsleep(hba)) { + ufshcd_device_reset(hba); + WARN_ON(!ufshcd_is_link_off(hba)); + } if (ufshcd_is_link_hibern8(hba) && !ufshcd_uic_hibern8_exit(hba)) ufshcd_set_link_active(hba); else if (ufshcd_is_link_off(hba)) ufshcd_host_reset_and_restore(hba); set_dev_active: + /* Can also get here needing to exit DeepSleep */ + if (ufshcd_is_ufs_dev_deepsleep(hba)) { + ufshcd_device_reset(hba); + ufshcd_host_reset_and_restore(hba); + } if (!ufshcd_set_dev_pwr_mode(hba, UFS_ACTIVE_PWR_MODE)) ufshcd_disable_auto_bkops(hba); -enable_gating: +enable_scaling: if (ufshcd_is_clkscaling_supported(hba)) ufshcd_clk_scaling_suspend(hba, false); - hba->clk_gating.is_suspended = false; hba->dev_info.b_rpm_dev_flush_capable = false; - ufshcd_clear_ua_wluns(hba); - ufshcd_release(hba); - ufshpb_resume(hba); out: if (hba->dev_info.b_rpm_dev_flush_capable) { schedule_delayed_work(&hba->rpm_dev_flush_recheck_work, msecs_to_jiffies(RPM_DEV_FLUSH_RECHECK_WORK_DELAY_MS)); } - hba->pm_op_in_progress = 0; - - if (ret) - ufshcd_update_evt_hist(hba, UFS_EVT_SUSPEND_ERR, (u32)ret); + if (ret) { + ufshcd_update_evt_hist(hba, UFS_EVT_WL_SUSP_ERR, (u32)ret); + hba->clk_gating.is_suspended = false; + ufshcd_release(hba); + ufshpb_resume(hba); + } + hba->pm_op_in_progress = false; return ret; } -/** - * ufshcd_resume - helper function for resume operations - * @hba: per adapter instance - * @pm_op: runtime PM or system PM - * - * This function basically brings the UFS device, UniPro link and controller - * to active state. - * - * Returns 0 for success and non-zero for failure - */ -static int ufshcd_resume(struct ufs_hba *hba, enum ufs_pm_op pm_op) +#ifdef CONFIG_PM +static int __ufshcd_wl_resume(struct ufs_hba *hba, enum ufs_pm_op pm_op) { int ret; - enum uic_link_state old_link_state; - - hba->pm_op_in_progress = 1; - old_link_state = hba->uic_link_state; + enum uic_link_state old_link_state = hba->uic_link_state; - ufshcd_hba_vreg_set_hpm(hba); - ret = ufshcd_vreg_set_hpm(hba); - if (ret) - goto out; - - /* Make sure clocks are enabled before accessing controller */ - ret = ufshcd_setup_clocks(hba, true); - if (ret) - goto disable_vreg; - - /* enable the host irq as host controller would be active soon */ - ufshcd_enable_irq(hba); + hba->pm_op_in_progress = true; /* * Call vendor specific resume callback. As these callbacks may access @@ -8974,7 +9158,10 @@ static int ufshcd_resume(struct ufs_hba *hba, enum ufs_pm_op pm_op) */ ret = ufshcd_vops_resume(hba, pm_op); if (ret) - goto disable_irq_and_vops_clks; + goto out; + + /* For DeepSleep, the only supported option is to have the link off */ + WARN_ON(ufshcd_is_ufs_dev_deepsleep(hba) && !ufshcd_is_link_off(hba)); if (ufshcd_is_link_hibern8(hba)) { ret = ufshcd_uic_hibern8_exit(hba); @@ -8989,6 +9176,8 @@ static int ufshcd_resume(struct ufs_hba *hba, enum ufs_pm_op pm_op) /* * A full initialization of the host and the device is * required since the link was put to off during suspend. + * Note, in the case of DeepSleep, the device will exit + * DeepSleep due to device reset. */ ret = ufshcd_reset_and_restore(hba); /* @@ -9014,153 +9203,306 @@ static int ufshcd_resume(struct ufs_hba *hba, enum ufs_pm_op pm_op) */ ufshcd_urgent_bkops(hba); - hba->clk_gating.is_suspended = false; + if (hba->ee_usr_mask) + ufshcd_write_ee_control(hba); if (ufshcd_is_clkscaling_supported(hba)) ufshcd_clk_scaling_suspend(hba, false); - /* Enable Auto-Hibernate if configured */ - ufshcd_auto_hibern8_enable(hba); - - ufshpb_resume(hba); - if (hba->dev_info.b_rpm_dev_flush_capable) { hba->dev_info.b_rpm_dev_flush_capable = false; cancel_delayed_work(&hba->rpm_dev_flush_recheck_work); } - ufshcd_clear_ua_wluns(hba); - - /* Schedule clock gating in case of no access to UFS device yet */ - ufshcd_release(hba); + /* Enable Auto-Hibernate if configured */ + ufshcd_auto_hibern8_enable(hba); + ufshpb_resume(hba); goto out; set_old_link_state: ufshcd_link_state_transition(hba, old_link_state, 0); vendor_suspend: - ufshcd_vops_suspend(hba, pm_op); -disable_irq_and_vops_clks: + ufshcd_vops_suspend(hba, pm_op, PRE_CHANGE); + ufshcd_vops_suspend(hba, pm_op, POST_CHANGE); +out: + if (ret) + ufshcd_update_evt_hist(hba, UFS_EVT_WL_RES_ERR, (u32)ret); + hba->clk_gating.is_suspended = false; + ufshcd_release(hba); + hba->pm_op_in_progress = false; + return ret; +} + +static int ufshcd_wl_runtime_suspend(struct device *dev) +{ + struct scsi_device *sdev = to_scsi_device(dev); + struct ufs_hba *hba; + int ret; + ktime_t start = ktime_get(); + + hba = shost_priv(sdev->host); + + ret = __ufshcd_wl_suspend(hba, UFS_RUNTIME_PM); + if (ret) + dev_err(&sdev->sdev_gendev, "%s failed: %d\n", __func__, ret); + + trace_ufshcd_wl_runtime_suspend(dev_name(dev), ret, + ktime_to_us(ktime_sub(ktime_get(), start)), + hba->curr_dev_pwr_mode, hba->uic_link_state); + + return ret; +} + +static int ufshcd_wl_runtime_resume(struct device *dev) +{ + struct scsi_device *sdev = to_scsi_device(dev); + struct ufs_hba *hba; + int ret = 0; + ktime_t start = ktime_get(); + + hba = shost_priv(sdev->host); + + ret = __ufshcd_wl_resume(hba, UFS_RUNTIME_PM); + if (ret) + dev_err(&sdev->sdev_gendev, "%s failed: %d\n", __func__, ret); + + trace_ufshcd_wl_runtime_resume(dev_name(dev), ret, + ktime_to_us(ktime_sub(ktime_get(), start)), + hba->curr_dev_pwr_mode, hba->uic_link_state); + + return ret; +} +#endif + +#ifdef CONFIG_PM_SLEEP +static int ufshcd_wl_suspend(struct device *dev) +{ + struct scsi_device *sdev = to_scsi_device(dev); + struct ufs_hba *hba; + int ret = 0; + ktime_t start = ktime_get(); + + hba = shost_priv(sdev->host); + down(&hba->host_sem); + + if (pm_runtime_suspended(dev)) + goto out; + + ret = __ufshcd_wl_suspend(hba, UFS_SYSTEM_PM); + if (ret) { + dev_err(&sdev->sdev_gendev, "%s failed: %d\n", __func__, ret); + up(&hba->host_sem); + } + +out: + if (!ret) + hba->is_sys_suspended = true; + trace_ufshcd_wl_suspend(dev_name(dev), ret, + ktime_to_us(ktime_sub(ktime_get(), start)), + hba->curr_dev_pwr_mode, hba->uic_link_state); + + return ret; +} + +static int ufshcd_wl_resume(struct device *dev) +{ + struct scsi_device *sdev = to_scsi_device(dev); + struct ufs_hba *hba; + int ret = 0; + ktime_t start = ktime_get(); + + hba = shost_priv(sdev->host); + + if (pm_runtime_suspended(dev)) + goto out; + + ret = __ufshcd_wl_resume(hba, UFS_SYSTEM_PM); + if (ret) + dev_err(&sdev->sdev_gendev, "%s failed: %d\n", __func__, ret); +out: + trace_ufshcd_wl_resume(dev_name(dev), ret, + ktime_to_us(ktime_sub(ktime_get(), start)), + hba->curr_dev_pwr_mode, hba->uic_link_state); + if (!ret) + hba->is_sys_suspended = false; + up(&hba->host_sem); + return ret; +} +#endif + +static void ufshcd_wl_shutdown(struct device *dev) +{ + struct scsi_device *sdev = to_scsi_device(dev); + struct ufs_hba *hba; + + hba = shost_priv(sdev->host); + + down(&hba->host_sem); + hba->shutting_down = true; + up(&hba->host_sem); + + /* Turn on everything while shutting down */ + ufshcd_rpm_get_sync(hba); + scsi_device_quiesce(sdev); + shost_for_each_device(sdev, hba->host) { + if (sdev == hba->sdev_ufs_device) + continue; + scsi_device_quiesce(sdev); + } + __ufshcd_wl_suspend(hba, UFS_SHUTDOWN_PM); +} + +/** + * ufshcd_suspend - helper function for suspend operations + * @hba: per adapter instance + * + * This function will put disable irqs, turn off clocks + * and set vreg and hba-vreg in lpm mode. + */ +static int ufshcd_suspend(struct ufs_hba *hba) +{ + int ret; + + if (!hba->is_powered) + return 0; + /* + * Disable the host irq as host controller as there won't be any + * host controller transaction expected till resume. + */ ufshcd_disable_irq(hba); - ufshcd_setup_clocks(hba, false); + ret = ufshcd_setup_clocks(hba, false); + if (ret) { + ufshcd_enable_irq(hba); + return ret; + } if (ufshcd_is_clkgating_allowed(hba)) { hba->clk_gating.state = CLKS_OFF; trace_ufshcd_clk_gating(dev_name(hba->dev), hba->clk_gating.state); } + + ufshcd_vreg_set_lpm(hba); + /* Put the host controller in low power mode if possible */ + ufshcd_hba_vreg_set_lpm(hba); + return ret; +} + +#ifdef CONFIG_PM +/** + * ufshcd_resume - helper function for resume operations + * @hba: per adapter instance + * + * This function basically turns on the regulators, clocks and + * irqs of the hba. + * + * Returns 0 for success and non-zero for failure + */ +static int ufshcd_resume(struct ufs_hba *hba) +{ + int ret; + + if (!hba->is_powered) + return 0; + + ufshcd_hba_vreg_set_hpm(hba); + ret = ufshcd_vreg_set_hpm(hba); + if (ret) + goto out; + + /* Make sure clocks are enabled before accessing controller */ + ret = ufshcd_setup_clocks(hba, true); + if (ret) + goto disable_vreg; + + /* enable the host irq as host controller would be active soon */ + ufshcd_enable_irq(hba); + goto out; + disable_vreg: ufshcd_vreg_set_lpm(hba); out: - hba->pm_op_in_progress = 0; if (ret) ufshcd_update_evt_hist(hba, UFS_EVT_RESUME_ERR, (u32)ret); return ret; } +#endif /* CONFIG_PM */ +#ifdef CONFIG_PM_SLEEP /** - * ufshcd_system_suspend - system suspend routine - * @hba: per adapter instance + * ufshcd_system_suspend - system suspend callback + * @dev: Device associated with the UFS controller. * - * Check the description of ufshcd_suspend() function for more details. + * Executed before putting the system into a sleep state in which the contents + * of main memory are preserved. * * Returns 0 for success and non-zero for failure */ -int ufshcd_system_suspend(struct ufs_hba *hba) +int ufshcd_system_suspend(struct device *dev) { + struct ufs_hba *hba = dev_get_drvdata(dev); int ret = 0; ktime_t start = ktime_get(); - down(&hba->host_sem); - - if (!hba->is_powered) - return 0; - - cancel_delayed_work_sync(&hba->rpm_dev_flush_recheck_work); - - if ((ufs_get_pm_lvl_to_dev_pwr_mode(hba->spm_lvl) == - hba->curr_dev_pwr_mode) && - (ufs_get_pm_lvl_to_link_pwr_state(hba->spm_lvl) == - hba->uic_link_state) && - pm_runtime_suspended(hba->dev) && - !hba->dev_info.b_rpm_dev_flush_capable) + if (pm_runtime_suspended(hba->dev)) goto out; - if (pm_runtime_suspended(hba->dev)) { - /* - * UFS device and/or UFS link low power states during runtime - * suspend seems to be different than what is expected during - * system suspend. Hence runtime resume the devic & link and - * let the system suspend low power states to take effect. - * TODO: If resume takes longer time, we might have optimize - * it in future by not resuming everything if possible. - */ - ret = ufshcd_runtime_resume(hba); - if (ret) - goto out; - } - - ret = ufshcd_suspend(hba, UFS_SYSTEM_PM); + ret = ufshcd_suspend(hba); out: trace_ufshcd_system_suspend(dev_name(hba->dev), ret, ktime_to_us(ktime_sub(ktime_get(), start)), hba->curr_dev_pwr_mode, hba->uic_link_state); - if (!ret) - hba->is_sys_suspended = true; - else - up(&hba->host_sem); return ret; } EXPORT_SYMBOL(ufshcd_system_suspend); /** - * ufshcd_system_resume - system resume routine - * @hba: per adapter instance + * ufshcd_system_resume - system resume callback + * @dev: Device associated with the UFS controller. + * + * Executed after waking the system up from a sleep state in which the contents + * of main memory were preserved. * * Returns 0 for success and non-zero for failure */ - -int ufshcd_system_resume(struct ufs_hba *hba) +int ufshcd_system_resume(struct device *dev) { - int ret = 0; + struct ufs_hba *hba = dev_get_drvdata(dev); ktime_t start = ktime_get(); + int ret = 0; - if (!hba->is_powered || pm_runtime_suspended(hba->dev)) - /* - * Let the runtime resume take care of resuming - * if runtime suspended. - */ + if (pm_runtime_suspended(hba->dev)) goto out; - else - ret = ufshcd_resume(hba, UFS_SYSTEM_PM); + + ret = ufshcd_resume(hba); + out: trace_ufshcd_system_resume(dev_name(hba->dev), ret, ktime_to_us(ktime_sub(ktime_get(), start)), hba->curr_dev_pwr_mode, hba->uic_link_state); - if (!ret) - hba->is_sys_suspended = false; - up(&hba->host_sem); + return ret; } EXPORT_SYMBOL(ufshcd_system_resume); +#endif /* CONFIG_PM_SLEEP */ +#ifdef CONFIG_PM /** - * ufshcd_runtime_suspend - runtime suspend routine - * @hba: per adapter instance + * ufshcd_runtime_suspend - runtime suspend callback + * @dev: Device associated with the UFS controller. * * Check the description of ufshcd_suspend() function for more details. * * Returns 0 for success and non-zero for failure */ -int ufshcd_runtime_suspend(struct ufs_hba *hba) +int ufshcd_runtime_suspend(struct device *dev) { - int ret = 0; + struct ufs_hba *hba = dev_get_drvdata(dev); + int ret; ktime_t start = ktime_get(); - if (!hba->is_powered) - goto out; - else - ret = ufshcd_suspend(hba, UFS_RUNTIME_PM); -out: + ret = ufshcd_suspend(hba); + trace_ufshcd_runtime_suspend(dev_name(hba->dev), ret, ktime_to_us(ktime_sub(ktime_get(), start)), hba->curr_dev_pwr_mode, hba->uic_link_state); @@ -9170,76 +9512,48 @@ EXPORT_SYMBOL(ufshcd_runtime_suspend); /** * ufshcd_runtime_resume - runtime resume routine - * @hba: per adapter instance + * @dev: Device associated with the UFS controller. * - * This function basically brings the UFS device, UniPro link and controller + * This function basically brings controller * to active state. Following operations are done in this function: * * 1. Turn on all the controller related clocks - * 2. Bring the UniPro link out of Hibernate state - * 3. If UFS device is in sleep state, turn ON VCC rail and bring the UFS device - * to active state. - * 4. If auto-bkops is enabled on the device, disable it. - * - * So following would be the possible power state after this function return - * successfully: - * S1: UFS device in Active state with VCC rail ON - * UniPro link in Active state - * All the UFS/UniPro controller clocks are ON - * - * Returns 0 for success and non-zero for failure + * 2. Turn ON VCC rail */ -int ufshcd_runtime_resume(struct ufs_hba *hba) +int ufshcd_runtime_resume(struct device *dev) { - int ret = 0; + struct ufs_hba *hba = dev_get_drvdata(dev); + int ret; ktime_t start = ktime_get(); - if (!hba->is_powered) - goto out; - else - ret = ufshcd_resume(hba, UFS_RUNTIME_PM); -out: + ret = ufshcd_resume(hba); + trace_ufshcd_runtime_resume(dev_name(hba->dev), ret, ktime_to_us(ktime_sub(ktime_get(), start)), hba->curr_dev_pwr_mode, hba->uic_link_state); return ret; } EXPORT_SYMBOL(ufshcd_runtime_resume); - -int ufshcd_runtime_idle(struct ufs_hba *hba) -{ - return 0; -} -EXPORT_SYMBOL(ufshcd_runtime_idle); +#endif /* CONFIG_PM */ /** * ufshcd_shutdown - shutdown routine * @hba: per adapter instance * - * This function would power off both UFS device and UFS link. + * This function would turn off both UFS device and UFS hba + * regulators. It would also disable clocks. * * Returns 0 always to allow force shutdown even in case of errors. */ int ufshcd_shutdown(struct ufs_hba *hba) { - int ret = 0; - - down(&hba->host_sem); - hba->shutting_down = true; - up(&hba->host_sem); - - if (!hba->is_powered) - goto out; - if (ufshcd_is_ufs_dev_poweroff(hba) && ufshcd_is_link_off(hba)) goto out; pm_runtime_get_sync(hba->dev); - ret = ufshcd_suspend(hba, UFS_SHUTDOWN_PM); + ufshcd_suspend(hba); out: - if (ret) - dev_err(hba->dev, "%s failed, err %d\n", __func__, ret); hba->is_powered = false; /* allow force shutdown even in case of errors */ return 0; @@ -9253,12 +9567,14 @@ EXPORT_SYMBOL(ufshcd_shutdown); */ void ufshcd_remove(struct ufs_hba *hba) { + if (hba->sdev_ufs_device) + ufshcd_rpm_get_sync(hba); + ufs_hwmon_remove(hba); ufs_bsg_remove(hba); ufshpb_remove(hba); ufs_sysfs_remove_nodes(hba->dev); blk_cleanup_queue(hba->tmf_queue); blk_mq_free_tag_set(&hba->tmf_tag_set); - blk_cleanup_queue(hba->cmd_queue); scsi_remove_host(hba->host); /* disable interrupts */ ufshcd_disable_intr(hba, hba->intr_mask); @@ -9319,14 +9635,17 @@ int ufshcd_alloc_host(struct device *dev, struct ufs_hba **hba_handle) err = -ENOMEM; goto out_error; } + host->nr_maps = HCTX_TYPE_POLL + 1; hba = shost_priv(host); hba->host = host; hba->dev = dev; - *hba_handle = hba; hba->dev_ref_clk_freq = REF_CLK_FREQ_INVAL; + hba->nop_out_timeout = NOP_OUT_TIMEOUT; hba->sg_entry_size = sizeof(struct ufshcd_sg_entry); - INIT_LIST_HEAD(&hba->clk_list_head); + spin_lock_init(&hba->outstanding_lock); + + *hba_handle = hba; out_error: return err; @@ -9359,6 +9678,13 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq) struct device *dev = hba->dev; char eh_wq_name[sizeof("ufs_eh_wq_00")]; + /* + * dev_set_drvdata() must be called before any callbacks are registered + * that use dev_get_drvdata() (frequency scaling, clock scaling, hwmon, + * sysfs). + */ + dev_set_drvdata(dev, hba); + if (!mmio_base) { dev_err(hba->dev, "Invalid memory reference for mmio_base is NULL\n"); @@ -9382,10 +9708,6 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq) /* Get UFS version supported by the controller */ hba->ufs_version = ufshcd_get_ufs_version(hba); - if (hba->ufs_version < ufshci_version(1, 0)) - dev_err(hba->dev, "invalid UFS version 0x%x\n", - hba->ufs_version); - /* Get Interrupt bit mask per version */ hba->intr_mask = ufshcd_get_intr_mask(hba); @@ -9405,8 +9727,8 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq) /* Configure LRB */ ufshcd_host_memory_configure(hba); - host->can_queue = hba->nutrs; - host->cmd_per_lun = hba->nutrs; + host->can_queue = hba->nutrs - UFSHCD_NUM_RESERVED; + host->cmd_per_lun = hba->nutrs - UFSHCD_NUM_RESERVED; host->max_id = UFSHCD_MAX_ID; host->max_lun = UFS_MAX_LUNS; host->max_channel = UFSHCD_MAX_CHANNEL; @@ -9421,7 +9743,7 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq) hba->eh_wq = create_singlethread_workqueue(eh_wq_name); if (!hba->eh_wq) { dev_err(hba->dev, "%s: failed to create eh workqueue\n", - __func__); + __func__); err = -ENOMEM; goto out_disable; } @@ -9436,6 +9758,9 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq) /* Initialize mutex for device management commands */ mutex_init(&hba->dev_cmd.lock); + /* Initialize mutex for exception event control */ + mutex_init(&hba->ee_ctrl_mutex); + init_rwsem(&hba->clk_scaling_lock); ufshcd_init_clk_gating(hba); @@ -9471,12 +9796,6 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq) goto out_disable; } - hba->cmd_queue = blk_mq_init_queue(&hba->host->tag_set); - if (IS_ERR(hba->cmd_queue)) { - err = PTR_ERR(hba->cmd_queue); - goto out_remove_scsi_host; - } - hba->tmf_tag_set = (struct blk_mq_tag_set) { .nr_hw_queues = 1, .queue_depth = hba->nutmrs, @@ -9485,15 +9804,21 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq) }; err = blk_mq_alloc_tag_set(&hba->tmf_tag_set); if (err < 0) - goto free_cmd_queue; + goto out_remove_scsi_host; hba->tmf_queue = blk_mq_init_queue(&hba->tmf_tag_set); if (IS_ERR(hba->tmf_queue)) { err = PTR_ERR(hba->tmf_queue); goto free_tmf_tag_set; } + hba->tmf_rqs = devm_kcalloc(hba->dev, hba->nutmrs, + sizeof(*hba->tmf_rqs), GFP_KERNEL); + if (!hba->tmf_rqs) { + err = -ENOMEM; + goto free_tmf_queue; + } /* Reset the attached device */ - ufshcd_vops_device_reset(hba); + ufshcd_device_reset(hba); ufshcd_init_crypto(hba); @@ -9548,8 +9873,6 @@ free_tmf_queue: blk_cleanup_queue(hba->tmf_queue); free_tmf_tag_set: blk_mq_free_tag_set(&hba->tmf_tag_set); -free_cmd_queue: - blk_cleanup_queue(hba->cmd_queue); out_remove_scsi_host: scsi_remove_host(hba->host); out_disable: @@ -9560,15 +9883,155 @@ out_error: } EXPORT_SYMBOL_GPL(ufshcd_init); +void ufshcd_resume_complete(struct device *dev) +{ + struct ufs_hba *hba = dev_get_drvdata(dev); + + if (hba->complete_put) { + ufshcd_rpm_put(hba); + hba->complete_put = false; + } +} +EXPORT_SYMBOL_GPL(ufshcd_resume_complete); + +static bool ufshcd_rpm_ok_for_spm(struct ufs_hba *hba) +{ + struct device *dev = &hba->sdev_ufs_device->sdev_gendev; + enum ufs_dev_pwr_mode dev_pwr_mode; + enum uic_link_state link_state; + unsigned long flags; + bool res; + + spin_lock_irqsave(&dev->power.lock, flags); + dev_pwr_mode = ufs_get_pm_lvl_to_dev_pwr_mode(hba->spm_lvl); + link_state = ufs_get_pm_lvl_to_link_pwr_state(hba->spm_lvl); + res = pm_runtime_suspended(dev) && + hba->curr_dev_pwr_mode == dev_pwr_mode && + hba->uic_link_state == link_state && + !hba->dev_info.b_rpm_dev_flush_capable; + spin_unlock_irqrestore(&dev->power.lock, flags); + + return res; +} + +int __ufshcd_suspend_prepare(struct device *dev, bool rpm_ok_for_spm) +{ + struct ufs_hba *hba = dev_get_drvdata(dev); + int ret; + + /* + * SCSI assumes that runtime-pm and system-pm for scsi drivers + * are same. And it doesn't wake up the device for system-suspend + * if it's runtime suspended. But ufs doesn't follow that. + * Refer ufshcd_resume_complete() + */ + if (hba->sdev_ufs_device) { + /* Prevent runtime suspend */ + ufshcd_rpm_get_noresume(hba); + /* + * Check if already runtime suspended in same state as system + * suspend would be. + */ + if (!rpm_ok_for_spm || !ufshcd_rpm_ok_for_spm(hba)) { + /* RPM state is not ok for SPM, so runtime resume */ + ret = ufshcd_rpm_resume(hba); + if (ret < 0 && ret != -EACCES) { + ufshcd_rpm_put(hba); + return ret; + } + } + hba->complete_put = true; + } + return 0; +} +EXPORT_SYMBOL_GPL(__ufshcd_suspend_prepare); + +int ufshcd_suspend_prepare(struct device *dev) +{ + return __ufshcd_suspend_prepare(dev, true); +} +EXPORT_SYMBOL_GPL(ufshcd_suspend_prepare); + +#ifdef CONFIG_PM_SLEEP +static int ufshcd_wl_poweroff(struct device *dev) +{ + struct scsi_device *sdev = to_scsi_device(dev); + struct ufs_hba *hba = shost_priv(sdev->host); + + __ufshcd_wl_suspend(hba, UFS_SHUTDOWN_PM); + return 0; +} +#endif + +static int ufshcd_wl_probe(struct device *dev) +{ + struct scsi_device *sdev = to_scsi_device(dev); + + if (!is_device_wlun(sdev)) + return -ENODEV; + + blk_pm_runtime_init(sdev->request_queue, dev); + pm_runtime_set_autosuspend_delay(dev, 0); + pm_runtime_allow(dev); + + return 0; +} + +static int ufshcd_wl_remove(struct device *dev) +{ + pm_runtime_forbid(dev); + return 0; +} + +static const struct dev_pm_ops ufshcd_wl_pm_ops = { +#ifdef CONFIG_PM_SLEEP + .suspend = ufshcd_wl_suspend, + .resume = ufshcd_wl_resume, + .freeze = ufshcd_wl_suspend, + .thaw = ufshcd_wl_resume, + .poweroff = ufshcd_wl_poweroff, + .restore = ufshcd_wl_resume, +#endif + SET_RUNTIME_PM_OPS(ufshcd_wl_runtime_suspend, ufshcd_wl_runtime_resume, NULL) +}; + +/* + * ufs_dev_wlun_template - describes ufs device wlun + * ufs-device wlun - used to send pm commands + * All luns are consumers of ufs-device wlun. + * + * Currently, no sd driver is present for wluns. + * Hence the no specific pm operations are performed. + * With ufs design, SSU should be sent to ufs-device wlun. + * Hence register a scsi driver for ufs wluns only. + */ +static struct scsi_driver ufs_dev_wlun_template = { + .gendrv = { + .name = "ufs_device_wlun", + .owner = THIS_MODULE, + .probe = ufshcd_wl_probe, + .remove = ufshcd_wl_remove, + .pm = &ufshcd_wl_pm_ops, + .shutdown = ufshcd_wl_shutdown, + }, +}; + static int __init ufshcd_core_init(void) { + int ret; + ufs_debugfs_init(); - return 0; + + ret = scsi_register_driver(&ufs_dev_wlun_template.gendrv); + if (ret) + ufs_debugfs_exit(); + return ret; } static void __exit ufshcd_core_exit(void) { ufs_debugfs_exit(); + scsi_unregister_driver(&ufs_dev_wlun_template.gendrv); } module_init(ufshcd_core_init); diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h index f4d0fb6fdd972db7feab2c181760284a1ede8a9f..4fc5deaa99dd62ddf8be120687a8bfcbd3f183f4 100644 --- a/drivers/scsi/ufs/ufshcd.h +++ b/drivers/scsi/ufs/ufshcd.h @@ -73,6 +73,8 @@ enum ufs_event_type { UFS_EVT_LINK_STARTUP_FAIL, UFS_EVT_RESUME_ERR, UFS_EVT_SUSPEND_ERR, + UFS_EVT_WL_SUSP_ERR, + UFS_EVT_WL_RES_ERR, /* abnormal events */ UFS_EVT_DEV_RESET, @@ -107,10 +109,6 @@ enum ufs_pm_op { UFS_SHUTDOWN_PM, }; -#define ufshcd_is_runtime_pm(op) ((op) == UFS_RUNTIME_PM) -#define ufshcd_is_system_pm(op) ((op) == UFS_SYSTEM_PM) -#define ufshcd_is_shutdown_pm(op) ((op) == UFS_SHUTDOWN_PM) - /* Host <-> Device UniPro Link state */ enum uic_link_state { UIC_LINK_OFF_STATE = 0, /* Link powered down or disabled */ @@ -140,24 +138,31 @@ enum uic_link_state { ((h)->curr_dev_pwr_mode = UFS_SLEEP_PWR_MODE) #define ufshcd_set_ufs_dev_poweroff(h) \ ((h)->curr_dev_pwr_mode = UFS_POWERDOWN_PWR_MODE) +#define ufshcd_set_ufs_dev_deepsleep(h) \ + ((h)->curr_dev_pwr_mode = UFS_DEEPSLEEP_PWR_MODE) #define ufshcd_is_ufs_dev_active(h) \ ((h)->curr_dev_pwr_mode == UFS_ACTIVE_PWR_MODE) #define ufshcd_is_ufs_dev_sleep(h) \ ((h)->curr_dev_pwr_mode == UFS_SLEEP_PWR_MODE) #define ufshcd_is_ufs_dev_poweroff(h) \ ((h)->curr_dev_pwr_mode == UFS_POWERDOWN_PWR_MODE) +#define ufshcd_is_ufs_dev_deepsleep(h) \ + ((h)->curr_dev_pwr_mode == UFS_DEEPSLEEP_PWR_MODE) /* * UFS Power management levels. - * Each level is in increasing order of power savings. + * Each level is in increasing order of power savings, except DeepSleep + * which is lower than PowerDown with power on but not PowerDown with + * power off. */ enum ufs_pm_level { - UFS_PM_LVL_0, /* UFS_ACTIVE_PWR_MODE, UIC_LINK_ACTIVE_STATE */ - UFS_PM_LVL_1, /* UFS_ACTIVE_PWR_MODE, UIC_LINK_HIBERN8_STATE */ - UFS_PM_LVL_2, /* UFS_SLEEP_PWR_MODE, UIC_LINK_ACTIVE_STATE */ - UFS_PM_LVL_3, /* UFS_SLEEP_PWR_MODE, UIC_LINK_HIBERN8_STATE */ - UFS_PM_LVL_4, /* UFS_POWERDOWN_PWR_MODE, UIC_LINK_HIBERN8_STATE */ - UFS_PM_LVL_5, /* UFS_POWERDOWN_PWR_MODE, UIC_LINK_OFF_STATE */ + UFS_PM_LVL_0, + UFS_PM_LVL_1, + UFS_PM_LVL_2, + UFS_PM_LVL_3, + UFS_PM_LVL_4, + UFS_PM_LVL_5, + UFS_PM_LVL_6, UFS_PM_LVL_MAX }; @@ -298,7 +303,6 @@ struct ufs_pwr_mode_info { * @get_ufs_hci_version: called to get UFS HCI version * @clk_scale_notify: notifies that clks are scaled up/down * @setup_clocks: called before touching any of the controller registers - * @setup_regulators: called before accessing the host controller * @hce_enable_notify: called before and after HCE enable bit is set to allow * variant specific Uni-Pro initialization. * @link_startup_notify: called before and after Link startup is carried out @@ -329,7 +333,6 @@ struct ufs_hba_variant_ops { enum ufs_notify_change_status); int (*setup_clocks)(struct ufs_hba *, bool, enum ufs_notify_change_status); - int (*setup_regulators)(struct ufs_hba *, bool); int (*hce_enable_notify)(struct ufs_hba *, enum ufs_notify_change_status); int (*link_startup_notify)(struct ufs_hba *, @@ -338,13 +341,15 @@ struct ufs_hba_variant_ops { enum ufs_notify_change_status status, struct ufs_pa_layer_attr *, struct ufs_pa_layer_attr *); - void (*setup_xfer_req)(struct ufs_hba *, int, bool); + void (*setup_xfer_req)(struct ufs_hba *hba, int tag, + bool is_scsi_cmd); void (*setup_task_mgmt)(struct ufs_hba *, int, u8); void (*hibern8_notify)(struct ufs_hba *, enum uic_cmd_dme, enum ufs_notify_change_status); int (*apply_dev_quirks)(struct ufs_hba *hba); void (*fixup_dev_quirks)(struct ufs_hba *hba); - int (*suspend)(struct ufs_hba *, enum ufs_pm_op); + int (*suspend)(struct ufs_hba *, enum ufs_pm_op, + enum ufs_notify_change_status); int (*resume)(struct ufs_hba *, enum ufs_pm_op); void (*dbg_register_dump)(struct ufs_hba *hba); int (*phy_initialization)(struct ufs_hba *); @@ -485,6 +490,27 @@ struct ufs_stats { struct ufs_event_hist event[UFS_EVT_CNT]; }; +/** + * enum ufshcd_state - UFS host controller state + * @UFSHCD_STATE_RESET: Link is not operational. Postpone SCSI command + * processing. + * @UFSHCD_STATE_OPERATIONAL: The host controller is operational and can process + * SCSI commands. + * @UFSHCD_STATE_EH_SCHEDULED_NON_FATAL: The error handler has been scheduled. + * SCSI commands may be submitted to the controller. + * @UFSHCD_STATE_EH_SCHEDULED_FATAL: The error handler has been scheduled. Fail + * newly submitted SCSI commands with error code DID_BAD_TARGET. + * @UFSHCD_STATE_ERROR: An unrecoverable error occurred, e.g. link recovery + * failed. Fail all SCSI commands with error code DID_ERROR. + */ +enum ufshcd_state { + UFSHCD_STATE_RESET, + UFSHCD_STATE_OPERATIONAL, + UFSHCD_STATE_EH_SCHEDULED_NON_FATAL, + UFSHCD_STATE_EH_SCHEDULED_FATAL, + UFSHCD_STATE_ERROR, +}; + enum ufshcd_quirks { /* Interrupt aggregation support is broken */ UFSHCD_QUIRK_BROKEN_INTR_AGGR = 1 << 0, @@ -585,9 +611,9 @@ enum ufshcd_quirks { /* * This quirk needs to be enabled if the host controller cannot - * support interface configuration. + * support physical host configuration. */ - UFSHCD_QUIRK_SKIP_INTERFACE_CONFIGURATION = 1 << 16, + UFSHCD_QUIRK_SKIP_PH_CONFIGURATION = 1 << 16, /* * This quirk needs to be enabled if the host controller supports inline @@ -668,6 +694,20 @@ enum ufshcd_caps { * This would increase power savings. */ UFSHCD_CAP_AGGR_POWER_COLLAPSE = 1 << 9, + + /* + * This capability allows the host controller driver to use DeepSleep, + * if it is supported by the UFS device. The host controller driver must + * support device hardware reset via the hba->device_reset() callback, + * in order to exit DeepSleep state. + */ + UFSHCD_CAP_DEEPSLEEP = 1 << 10, + + /* + * This capability allows the host controller driver to use temperature + * notification if it is supported by the UFS device. + */ + UFSHCD_CAP_TEMP_NOTIF = 1 << 11, }; struct ufs_hba_variant_params { @@ -733,23 +773,24 @@ struct ufs_hba_monitor { * @host: Scsi_Host instance of the driver * @dev: device handle * @lrb: local reference block - * @cmd_queue: Used to allocate command tags from hba->host->tag_set. * @outstanding_tasks: Bits representing outstanding task requests + * @outstanding_lock: Protects @outstanding_reqs. * @outstanding_reqs: Bits representing outstanding transfer requests * @capabilities: UFS Controller Capabilities * @nutrs: Transfer Request Queue depth supported by controller * @nutmrs: Task Management Queue depth supported by controller + * @reserved_slot: Used to submit device commands. Protected by @dev_cmd.lock. * @ufs_version: UFS Version to which controller complies * @vops: pointer to variant specific operations * @priv: pointer to variant specific private data * @sg_entry_size: size of struct ufshcd_sg_entry (may include variant fields) * @irq: Irq number of the controller * @active_uic_cmd: handle of active UIC command - * @uic_cmd_mutex: mutex for uic command + * @uic_cmd_mutex: mutex for UIC command * @tmf_tag_set: TMF tag set. * @tmf_queue: Used to allocate TMF tags. * @pwr_done: completion for power mode change - * @ufshcd_state: UFSHCD states + * @ufshcd_state: UFSHCD state * @eh_flags: Error handling flags * @intr_mask: Interrupt Mask Bits * @ee_ctrl_mask: Exception event control mask @@ -773,6 +814,7 @@ struct ufs_hba_monitor { * @clk_list_head: UFS host controller clocks list node head * @pwr_info: holds current power mode * @max_pwr_info: keeps the device max valid pwm + * @clk_scaling_lock: used to serialize device commands and clock scaling * @desc_size: descriptor sizes reported by device * @urgent_bkops_lvl: keeps track of urgent bkops level for device * @is_urgent_bkops_lvl_checked: keeps track if the urgent bkops level for @@ -798,13 +840,15 @@ struct ufs_hba { struct Scsi_Host *host; struct device *dev; - struct request_queue *cmd_queue; /* * This field is to keep a reference to "scsi_device" corresponding to * "UFS device" W-LU. */ struct scsi_device *sdev_ufs_device; - struct scsi_device *sdev_rpmb; + +#ifdef CONFIG_SCSI_UFS_HWMON + struct device *hwmon_device; +#endif enum ufs_dev_pwr_mode curr_dev_pwr_mode; enum uic_link_state uic_link_state; @@ -822,11 +866,13 @@ struct ufs_hba { struct ufshcd_lrb *lrb; unsigned long outstanding_tasks; + spinlock_t outstanding_lock; unsigned long outstanding_reqs; u32 capabilities; int nutrs; int nutmrs; + u32 reserved_slot; u32 ufs_version; const struct ufs_hba_variant_ops *vops; struct ufs_hba_variant_params *vps; @@ -843,15 +889,19 @@ struct ufs_hba { struct blk_mq_tag_set tmf_tag_set; struct request_queue *tmf_queue; + struct request **tmf_rqs; struct uic_command *active_uic_cmd; struct mutex uic_cmd_mutex; struct completion *uic_async_done; - u32 ufshcd_state; + enum ufshcd_state ufshcd_state; u32 eh_flags; u32 intr_mask; - u16 ee_ctrl_mask; + u16 ee_ctrl_mask; /* Exception event mask */ + u16 ee_drv_mask; /* Exception event mask for driver */ + u16 ee_usr_mask; /* Exception event mask for user (via debugfs) */ + struct mutex ee_ctrl_mutex; bool is_powered; bool shutting_down; struct semaphore host_sem; @@ -874,6 +924,7 @@ struct ufs_hba { /* Device management request data */ struct ufs_dev_cmd dev_cmd; ktime_t last_dme_cmd_tstamp; + int nop_out_timeout; /* Keeps information of the UFS device connected to this host */ struct ufs_dev_info dev_info; @@ -881,8 +932,6 @@ struct ufs_hba { struct ufs_vreg_info vreg_info; struct list_head clk_list_head; - bool wlun_dev_clr_ua; - /* Number of requests aborts */ int req_abort_count; @@ -908,8 +957,6 @@ struct ufs_hba { struct device bsg_dev; struct request_queue *bsg_queue; - bool wb_buf_flush_enabled; - bool wb_enabled; struct delayed_work rpm_dev_flush_recheck_work; #ifdef CONFIG_SCSI_UFS_HPB @@ -926,7 +973,11 @@ struct ufs_hba { #endif #ifdef CONFIG_DEBUG_FS struct dentry *debugfs_root; + struct delayed_work debugfs_ee_work; + u32 debugfs_ee_rate_limit_ms; #endif + u32 luns_avail; + bool complete_put; ANDROID_KABI_RESERVE(1); ANDROID_KABI_RESERVE(2); @@ -958,16 +1009,8 @@ static inline bool ufshcd_is_rpm_autosuspend_allowed(struct ufs_hba *hba) static inline bool ufshcd_is_intr_aggr_allowed(struct ufs_hba *hba) { -/* DWC UFS Core has the Interrupt aggregation feature but is not detectable*/ -#ifndef CONFIG_SCSI_UFS_DWC - if ((hba->caps & UFSHCD_CAP_INTR_AGGR) && - !(hba->quirks & UFSHCD_QUIRK_BROKEN_INTR_AGGR)) - return true; - else - return false; -#else -return true; -#endif + return (hba->caps & UFSHCD_CAP_INTR_AGGR) && + !(hba->quirks & UFSHCD_QUIRK_BROKEN_INTR_AGGR); } static inline bool ufshcd_can_aggressive_pc(struct ufs_hba *hba) @@ -1022,10 +1065,11 @@ static inline void ufshcd_rmwl(struct ufs_hba *hba, u32 mask, u32 val, u32 reg) int ufshcd_alloc_host(struct device *, struct ufs_hba **); void ufshcd_dealloc_host(struct ufs_hba *); int ufshcd_hba_enable(struct ufs_hba *hba); -int ufshcd_init(struct ufs_hba * , void __iomem * , unsigned int); +int ufshcd_init(struct ufs_hba *, void __iomem *, unsigned int); int ufshcd_link_recovery(struct ufs_hba *hba); int ufshcd_make_hba_operational(struct ufs_hba *hba); void ufshcd_remove(struct ufs_hba *); +int ufshcd_uic_hibern8_enter(struct ufs_hba *hba); int ufshcd_uic_hibern8_exit(struct ufs_hba *hba); void ufshcd_delay_us(unsigned long us, unsigned long tolerance); int ufshcd_wait_for_register(struct ufs_hba *hba, u32 reg, u32 mask, @@ -1034,6 +1078,7 @@ int ufshcd_wait_for_register(struct ufs_hba *hba, u32 reg, u32 mask, void ufshcd_parse_dev_ref_clk_freq(struct ufs_hba *hba, struct clk *refclk); void ufshcd_update_evt_hist(struct ufs_hba *hba, u32 id, u32 val); void ufshcd_hba_stop(struct ufs_hba *hba); +void ufshcd_schedule_eh_work(struct ufs_hba *hba); static inline void check_upiu_size(void) { @@ -1069,17 +1114,33 @@ static inline bool ufshcd_keep_autobkops_enabled_except_suspend( static inline u8 ufshcd_wb_get_query_index(struct ufs_hba *hba) { - if (hba->dev_info.b_wb_buffer_type == WB_BUF_MODE_LU_DEDICATED) + if (hba->dev_info.wb_buffer_type == WB_BUF_MODE_LU_DEDICATED) return hba->dev_info.wb_dedicated_lu; return 0; } -extern int ufshcd_runtime_suspend(struct ufs_hba *hba); -extern int ufshcd_runtime_resume(struct ufs_hba *hba); -extern int ufshcd_runtime_idle(struct ufs_hba *hba); -extern int ufshcd_system_suspend(struct ufs_hba *hba); -extern int ufshcd_system_resume(struct ufs_hba *hba); +#ifdef CONFIG_SCSI_UFS_HWMON +void ufs_hwmon_probe(struct ufs_hba *hba, u8 mask); +void ufs_hwmon_remove(struct ufs_hba *hba); +void ufs_hwmon_notify_event(struct ufs_hba *hba, u8 ee_mask); +#else +static inline void ufs_hwmon_probe(struct ufs_hba *hba, u8 mask) {} +static inline void ufs_hwmon_remove(struct ufs_hba *hba) {} +static inline void ufs_hwmon_notify_event(struct ufs_hba *hba, u8 ee_mask) {} +#endif + +#ifdef CONFIG_PM +extern int ufshcd_runtime_suspend(struct device *dev); +extern int ufshcd_runtime_resume(struct device *dev); +#endif +#ifdef CONFIG_PM_SLEEP +extern int ufshcd_system_suspend(struct device *dev); +extern int ufshcd_system_resume(struct device *dev); +#endif extern int ufshcd_shutdown(struct ufs_hba *hba); +extern int ufshcd_dme_configure_adapt(struct ufs_hba *hba, + int agreed_gear, + int adapt_val); extern int ufshcd_dme_set_attr(struct ufs_hba *hba, u32 attr_sel, u8 attr_set, u32 mib_val, u8 peer); extern int ufshcd_dme_get_attr(struct ufs_hba *hba, u32 attr_sel, @@ -1158,11 +1219,11 @@ int ufshcd_read_desc_param(struct ufs_hba *hba, u8 param_offset, u8 *param_read_buf, u8 param_size); +int ufshcd_query_attr_retry(struct ufs_hba *hba, enum query_opcode opcode, + enum attr_idn idn, u8 index, u8 selector, + u32 *attr_val); int ufshcd_query_attr(struct ufs_hba *hba, enum query_opcode opcode, enum attr_idn idn, u8 index, u8 selector, u32 *attr_val); -int ufshcd_query_attr_retry(struct ufs_hba *hba, - enum query_opcode opcode, enum attr_idn idn, u8 index, u8 selector, - u32 *attr_val); int ufshcd_query_flag(struct ufs_hba *hba, enum query_opcode opcode, enum flag_idn idn, u8 index, bool *flag_res); int ufshcd_query_flag_retry(struct ufs_hba *hba, @@ -1180,6 +1241,9 @@ int ufshcd_read_string_desc(struct ufs_hba *hba, u8 desc_index, int ufshcd_hold(struct ufs_hba *hba, bool async); void ufshcd_release(struct ufs_hba *hba); +int ufshcd_freeze_scsi_devs(struct ufs_hba *hba, u64 timeout_us); +void ufshcd_unfreeze_scsi_devs(struct ufs_hba *hba); + void ufshcd_map_desc_id_to_length(struct ufs_hba *hba, enum desc_idn desc_id, int *desc_length); @@ -1194,6 +1258,11 @@ int ufshcd_exec_raw_upiu_cmd(struct ufs_hba *hba, u8 *desc_buff, int *buff_len, enum query_opcode desc_op); +int ufshcd_wb_toggle(struct ufs_hba *hba, bool enable); +int ufshcd_suspend_prepare(struct device *dev); +int __ufshcd_suspend_prepare(struct device *dev, bool rpm_ok_for_spm); +void ufshcd_resume_complete(struct device *dev); + /* Wrapper functions for safely calling variant operations */ static inline const char *ufshcd_get_var_name(struct ufs_hba *hba) { @@ -1224,11 +1293,6 @@ static inline u32 ufshcd_vops_get_ufs_hci_version(struct ufs_hba *hba) return ufshcd_readl(hba, REG_UFS_VERSION); } -static inline bool ufshcd_has_utrlcnr(struct ufs_hba *hba) -{ - return (hba->ufs_version >= ufshci_version(3, 0)); -} - static inline int ufshcd_vops_clk_scale_notify(struct ufs_hba *hba, bool up, enum ufs_notify_change_status status) { @@ -1253,14 +1317,6 @@ static inline int ufshcd_vops_setup_clocks(struct ufs_hba *hba, bool on, return 0; } -static inline int ufshcd_vops_setup_regulators(struct ufs_hba *hba, bool status) -{ - if (hba->vops && hba->vops->setup_regulators) - return hba->vops->setup_regulators(hba, status); - - return 0; -} - static inline int ufshcd_vops_hce_enable_notify(struct ufs_hba *hba, bool status) { @@ -1278,8 +1334,16 @@ static inline int ufshcd_vops_link_startup_notify(struct ufs_hba *hba, return 0; } +static inline int ufshcd_vops_phy_initialization(struct ufs_hba *hba) +{ + if (hba->vops && hba->vops->phy_initialization) + return hba->vops->phy_initialization(hba); + + return 0; +} + static inline int ufshcd_vops_pwr_change_notify(struct ufs_hba *hba, - bool status, + enum ufs_notify_change_status status, struct ufs_pa_layer_attr *dev_max_params, struct ufs_pa_layer_attr *dev_req_params) { @@ -1290,18 +1354,6 @@ static inline int ufshcd_vops_pwr_change_notify(struct ufs_hba *hba, return -ENOTSUPP; } -static inline void ufshcd_vops_setup_xfer_req(struct ufs_hba *hba, int tag, - bool is_scsi_cmd) -{ - if (hba->vops && hba->vops->setup_xfer_req) { - unsigned long flags; - - spin_lock_irqsave(hba->host->host_lock, flags); - hba->vops->setup_xfer_req(hba, tag, is_scsi_cmd); - spin_unlock_irqrestore(hba->host->host_lock, flags); - } -} - static inline void ufshcd_vops_setup_task_mgmt(struct ufs_hba *hba, int tag, u8 tm_function) { @@ -1330,10 +1382,11 @@ static inline void ufshcd_vops_fixup_dev_quirks(struct ufs_hba *hba) hba->vops->fixup_dev_quirks(hba); } -static inline int ufshcd_vops_suspend(struct ufs_hba *hba, enum ufs_pm_op op) +static inline int ufshcd_vops_suspend(struct ufs_hba *hba, enum ufs_pm_op op, + enum ufs_notify_change_status status) { if (hba->vops && hba->vops->suspend) - return hba->vops->suspend(hba, op); + return hba->vops->suspend(hba, op, status); return 0; } @@ -1352,21 +1405,12 @@ static inline void ufshcd_vops_dbg_register_dump(struct ufs_hba *hba) hba->vops->dbg_register_dump(hba); } -static inline void ufshcd_vops_device_reset(struct ufs_hba *hba) +static inline int ufshcd_vops_device_reset(struct ufs_hba *hba) { - if (hba->vops && hba->vops->device_reset) { - int err = hba->vops->device_reset(hba); - - if (!err) { - ufshcd_set_ufs_dev_active(hba); - if (ufshcd_is_wb_allowed(hba)) { - hba->wb_enabled = false; - hba->wb_buf_flush_enabled = false; - } - } - if (err != -EOPNOTSUPP) - ufshcd_update_evt_hist(hba, UFS_EVT_DEV_RESET, err); - } + if (hba->vops && hba->vops->device_reset) + return hba->vops->device_reset(hba); + + return -EOPNOTSUPP; } static inline void ufshcd_vops_config_scaling_param(struct ufs_hba *hba, @@ -1396,6 +1440,49 @@ static inline u8 ufshcd_scsi_to_upiu_lun(unsigned int scsi_lun) int ufshcd_dump_regs(struct ufs_hba *hba, size_t offset, size_t len, const char *prefix); -int ufshcd_uic_hibern8_enter(struct ufs_hba *hba); -int ufshcd_uic_hibern8_exit(struct ufs_hba *hba); + +int __ufshcd_write_ee_control(struct ufs_hba *hba, u32 ee_ctrl_mask); +int ufshcd_write_ee_control(struct ufs_hba *hba); +int ufshcd_update_ee_control(struct ufs_hba *hba, u16 *mask, u16 *other_mask, + u16 set, u16 clr); + +static inline int ufshcd_update_ee_drv_mask(struct ufs_hba *hba, + u16 set, u16 clr) +{ + return ufshcd_update_ee_control(hba, &hba->ee_drv_mask, + &hba->ee_usr_mask, set, clr); +} + +static inline int ufshcd_update_ee_usr_mask(struct ufs_hba *hba, + u16 set, u16 clr) +{ + return ufshcd_update_ee_control(hba, &hba->ee_usr_mask, + &hba->ee_drv_mask, set, clr); +} + +static inline int ufshcd_rpm_get_sync(struct ufs_hba *hba) +{ + return pm_runtime_get_sync(&hba->sdev_ufs_device->sdev_gendev); +} + +static inline int ufshcd_rpm_put_sync(struct ufs_hba *hba) +{ + return pm_runtime_put_sync(&hba->sdev_ufs_device->sdev_gendev); +} + +static inline void ufshcd_rpm_get_noresume(struct ufs_hba *hba) +{ + pm_runtime_get_noresume(&hba->sdev_ufs_device->sdev_gendev); +} + +static inline int ufshcd_rpm_resume(struct ufs_hba *hba) +{ + return pm_runtime_resume(&hba->sdev_ufs_device->sdev_gendev); +} + +static inline int ufshcd_rpm_put(struct ufs_hba *hba) +{ + return pm_runtime_put(&hba->sdev_ufs_device->sdev_gendev); +} + #endif /* End of Header */ diff --git a/drivers/scsi/ufs/ufshci.h b/drivers/scsi/ufs/ufshci.h index 757727dbab815c50c2c165dde2b59d2dfe3a4a91..745e1560919e9d754d998e2ce7cbe35e0886d694 100644 --- a/drivers/scsi/ufs/ufshci.h +++ b/drivers/scsi/ufs/ufshci.h @@ -39,7 +39,6 @@ enum { REG_UTP_TRANSFER_REQ_DOOR_BELL = 0x58, REG_UTP_TRANSFER_REQ_LIST_CLEAR = 0x5C, REG_UTP_TRANSFER_REQ_LIST_RUN_STOP = 0x60, - REG_UTP_TRANSFER_REQ_LIST_COMPL = 0x64, REG_UTP_TASK_REQ_LIST_BASE_L = 0x70, REG_UTP_TASK_REQ_LIST_BASE_H = 0x74, REG_UTP_TASK_REQ_DOOR_BELL = 0x78, @@ -143,7 +142,8 @@ static inline u32 ufshci_version(u32 major, u32 minor) #define INT_FATAL_ERRORS (DEVICE_FATAL_ERROR |\ CONTROLLER_FATAL_ERROR |\ SYSTEM_BUS_FATAL_ERROR |\ - CRYPTO_ENGINE_FATAL_ERROR) + CRYPTO_ENGINE_FATAL_ERROR |\ + UIC_LINK_LOST) /* HCS - Host Controller Status 30h */ #define DEVICE_PRESENT 0x1 @@ -390,7 +390,7 @@ enum { }; /* Overall command status values */ -enum { +enum utp_ocs { OCS_SUCCESS = 0x0, OCS_INVALID_CMD_TABLE_ATTR = 0x1, OCS_INVALID_PRDT_ATTR = 0x2, @@ -403,6 +403,9 @@ enum { OCS_INVALID_CRYPTO_CONFIG = 0x9, OCS_GENERAL_CRYPTO_ERROR = 0xA, OCS_INVALID_COMMAND_STATUS = 0x0F, +}; + +enum { MASK_OCS = 0x0F, }; @@ -413,14 +416,12 @@ enum { /** * struct ufshcd_sg_entry - UFSHCI PRD Entry - * @base_addr: Lower 32bit physical address DW-0 - * @upper_addr: Upper 32bit physical address DW-1 + * @addr: Physical address; DW-0 and DW-1. * @reserved: Reserved for future use DW-2 * @size: size of physical segment DW-3 */ struct ufshcd_sg_entry { - __le32 base_addr; - __le32 upper_addr; + __le64 addr; __le32 reserved; __le32 size; /* @@ -430,7 +431,7 @@ struct ufshcd_sg_entry { }; /** - * struct utp_transfer_cmd_desc - UFS Command Descriptor structure + * struct utp_transfer_cmd_desc - UTP Command Descriptor (UCD) * @command_upiu: Command UPIU Frame address * @response_upiu: Response UPIU Frame address * @prd_table: Physical Region Descriptor: an array of SG_ALL struct @@ -460,7 +461,7 @@ struct request_desc_header { }; /** - * struct utp_transfer_req_desc - UTRD structure + * struct utp_transfer_req_desc - UTP Transfer Request Descriptor (UTRD) * @header: UTRD header DW-0 to DW-3 * @command_desc_base_addr_lo: UCD base address low DW-4 * @command_desc_base_addr_hi: UCD base address high DW-5 @@ -495,17 +496,21 @@ struct utp_task_req_desc { struct request_desc_header header; /* DW 4-11 - Task request UPIU structure */ - struct utp_upiu_header req_header; - __be32 input_param1; - __be32 input_param2; - __be32 input_param3; - __be32 __reserved1[2]; + struct { + struct utp_upiu_header req_header; + __be32 input_param1; + __be32 input_param2; + __be32 input_param3; + __be32 __reserved1[2]; + } upiu_req; /* DW 12-19 - Task Management Response UPIU structure */ - struct utp_upiu_header rsp_header; - __be32 output_param1; - __be32 output_param2; - __be32 __reserved2[3]; + struct { + struct utp_upiu_header rsp_header; + __be32 output_param1; + __be32 output_param2; + __be32 __reserved2[3]; + } upiu_rsp; }; #endif /* End of Header */ diff --git a/drivers/scsi/ufs/ufshpb.c b/drivers/scsi/ufs/ufshpb.c index cb4a34ae5be105db7d9859db5f310a6ede831c0e..63078ce2a70e524851c708aaf946e0978e7ae23e 100644 --- a/drivers/scsi/ufs/ufshpb.c +++ b/drivers/scsi/ufs/ufshpb.c @@ -10,7 +10,6 @@ */ #include -#include #include "ufshcd.h" #include "ufshpb.h" @@ -70,13 +69,13 @@ static int ufshpb_is_valid_srgn(struct ufshpb_region *rgn, static bool ufshpb_is_read_cmd(struct scsi_cmnd *cmd) { - return req_op(cmd->request) == REQ_OP_READ; + return req_op(scsi_cmd_to_rq(cmd)) == REQ_OP_READ; } static bool ufshpb_is_write_or_discard(struct scsi_cmnd *cmd) { - return op_is_write(req_op(cmd->request)) || - op_is_discard(req_op(cmd->request)); + return op_is_write(req_op(scsi_cmd_to_rq(cmd))) || + op_is_discard(req_op(scsi_cmd_to_rq(cmd))); } static bool ufshpb_is_supported_chunk(struct ufshpb_lu *hpb, int transfer_len) @@ -84,16 +83,6 @@ static bool ufshpb_is_supported_chunk(struct ufshpb_lu *hpb, int transfer_len) return transfer_len <= hpb->pre_req_max_tr_len; } -/* - * In this driver, WRITE_BUFFER CMD support 36KB (len=9) ~ 1MB (len=256) as - * default. It is possible to change range of transfer_len through sysfs. - */ -static inline bool ufshpb_is_required_wb(struct ufshpb_lu *hpb, int len) -{ - return len > hpb->pre_req_min_tr_len && - len <= hpb->pre_req_max_tr_len; -} - static bool ufshpb_is_general_lun(int lun) { return lun < UFS_UPIU_MAX_UNIT_NUM_ID; @@ -323,266 +312,24 @@ ufshpb_get_pos_from_lpn(struct ufshpb_lu *hpb, unsigned long lpn, int *rgn_idx, } static void -ufshpb_set_hpb_read_to_upiu(struct ufshpb_lu *hpb, struct ufshcd_lrb *lrbp, - u32 lpn, __be64 ppn, u8 transfer_len, int read_id) +ufshpb_set_hpb_read_to_upiu(struct ufs_hba *hba, struct ufshcd_lrb *lrbp, + __be64 ppn, u8 transfer_len) { unsigned char *cdb = lrbp->cmd->cmnd; - + __be64 ppn_tmp = ppn; cdb[0] = UFSHPB_READ; + if (hba->dev_quirks & UFS_DEVICE_QUIRK_SWAP_L2P_ENTRY_FOR_HPB_READ) + ppn_tmp = (__force __be64)swab64((__force u64)ppn); + /* ppn value is stored as big-endian in the host memory */ - memcpy(&cdb[6], &ppn, sizeof(__be64)); + memcpy(&cdb[6], &ppn_tmp, sizeof(__be64)); cdb[14] = transfer_len; - cdb[15] = read_id; + cdb[15] = 0; lrbp->cmd->cmd_len = UFS_CDB_SIZE; } -static inline void ufshpb_set_write_buf_cmd(unsigned char *cdb, - unsigned long lpn, unsigned int len, - int read_id) -{ - cdb[0] = UFSHPB_WRITE_BUFFER; - cdb[1] = UFSHPB_WRITE_BUFFER_PREFETCH_ID; - - put_unaligned_be32(lpn, &cdb[2]); - cdb[6] = read_id; - put_unaligned_be16(len * HPB_ENTRY_SIZE, &cdb[7]); - - cdb[9] = 0x00; /* Control = 0x00 */ -} - -static struct ufshpb_req *ufshpb_get_pre_req(struct ufshpb_lu *hpb) -{ - struct ufshpb_req *pre_req; - - if (hpb->num_inflight_pre_req >= hpb->throttle_pre_req) { - dev_info(&hpb->sdev_ufs_lu->sdev_dev, - "pre_req throttle. inflight %d throttle %d", - hpb->num_inflight_pre_req, hpb->throttle_pre_req); - return NULL; - } - - pre_req = list_first_entry_or_null(&hpb->lh_pre_req_free, - struct ufshpb_req, list_req); - if (!pre_req) { - dev_info(&hpb->sdev_ufs_lu->sdev_dev, "There is no pre_req"); - return NULL; - } - - list_del_init(&pre_req->list_req); - hpb->num_inflight_pre_req++; - - return pre_req; -} - -static inline void ufshpb_put_pre_req(struct ufshpb_lu *hpb, - struct ufshpb_req *pre_req) -{ - pre_req->req = NULL; - bio_reset(pre_req->bio); - list_add_tail(&pre_req->list_req, &hpb->lh_pre_req_free); - hpb->num_inflight_pre_req--; -} - -static void ufshpb_pre_req_compl_fn(struct request *req, blk_status_t error) -{ - struct ufshpb_req *pre_req = (struct ufshpb_req *)req->end_io_data; - struct ufshpb_lu *hpb = pre_req->hpb; - unsigned long flags; - - if (error) { - struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req); - struct scsi_sense_hdr sshdr; - - dev_err(&hpb->sdev_ufs_lu->sdev_dev, "block status %d", error); - scsi_command_normalize_sense(cmd, &sshdr); - dev_err(&hpb->sdev_ufs_lu->sdev_dev, - "code %x sense_key %x asc %x ascq %x", - sshdr.response_code, - sshdr.sense_key, sshdr.asc, sshdr.ascq); - dev_err(&hpb->sdev_ufs_lu->sdev_dev, - "byte4 %x byte5 %x byte6 %x additional_len %x", - sshdr.byte4, sshdr.byte5, - sshdr.byte6, sshdr.additional_length); - } - - blk_mq_free_request(req); - spin_lock_irqsave(&hpb->rgn_state_lock, flags); - ufshpb_put_pre_req(pre_req->hpb, pre_req); - spin_unlock_irqrestore(&hpb->rgn_state_lock, flags); -} - -static int ufshpb_prep_entry(struct ufshpb_req *pre_req, struct page *page) -{ - struct ufshpb_lu *hpb = pre_req->hpb; - struct ufshpb_region *rgn; - struct ufshpb_subregion *srgn; - __be64 *addr; - int offset = 0; - int copied; - unsigned long lpn = pre_req->wb.lpn; - int rgn_idx, srgn_idx, srgn_offset; - unsigned long flags; - - addr = page_address(page); - ufshpb_get_pos_from_lpn(hpb, lpn, &rgn_idx, &srgn_idx, &srgn_offset); - - spin_lock_irqsave(&hpb->rgn_state_lock, flags); - -next_offset: - rgn = hpb->rgn_tbl + rgn_idx; - srgn = rgn->srgn_tbl + srgn_idx; - - if (!ufshpb_is_valid_srgn(rgn, srgn)) - goto mctx_error; - - if (!srgn->mctx) - goto mctx_error; - - copied = ufshpb_fill_ppn_from_page(hpb, srgn->mctx, srgn_offset, - pre_req->wb.len - offset, - &addr[offset]); - - if (copied < 0) - goto mctx_error; - - offset += copied; - srgn_offset += copied; - - if (srgn_offset == hpb->entries_per_srgn) { - srgn_offset = 0; - - if (++srgn_idx == hpb->srgns_per_rgn) { - srgn_idx = 0; - rgn_idx++; - } - } - - if (offset < pre_req->wb.len) - goto next_offset; - - spin_unlock_irqrestore(&hpb->rgn_state_lock, flags); - return 0; -mctx_error: - spin_unlock_irqrestore(&hpb->rgn_state_lock, flags); - return -ENOMEM; -} - -static int ufshpb_pre_req_add_bio_page(struct ufshpb_lu *hpb, - struct request_queue *q, - struct ufshpb_req *pre_req) -{ - struct page *page = pre_req->wb.m_page; - struct bio *bio = pre_req->bio; - int entries_bytes, ret; - - if (!page) - return -ENOMEM; - - if (ufshpb_prep_entry(pre_req, page)) - return -ENOMEM; - - entries_bytes = pre_req->wb.len * sizeof(__be64); - - ret = bio_add_pc_page(q, bio, page, entries_bytes, 0); - if (ret != entries_bytes) { - dev_err(&hpb->sdev_ufs_lu->sdev_dev, - "bio_add_pc_page fail: %d", ret); - return -ENOMEM; - } - return 0; -} - -static inline int ufshpb_get_read_id(struct ufshpb_lu *hpb) -{ - if (++hpb->cur_read_id >= MAX_HPB_READ_ID) - hpb->cur_read_id = 1; - return hpb->cur_read_id; -} - -static int ufshpb_execute_pre_req(struct ufshpb_lu *hpb, struct scsi_cmnd *cmd, - struct ufshpb_req *pre_req, int read_id) -{ - struct scsi_device *sdev = cmd->device; - struct request_queue *q = sdev->request_queue; - struct request *req; - struct scsi_request *rq; - struct bio *bio = pre_req->bio; - - pre_req->hpb = hpb; - pre_req->wb.lpn = sectors_to_logical(cmd->device, - blk_rq_pos(cmd->request)); - pre_req->wb.len = sectors_to_logical(cmd->device, - blk_rq_sectors(cmd->request)); - if (ufshpb_pre_req_add_bio_page(hpb, q, pre_req)) - return -ENOMEM; - - req = pre_req->req; - - /* 1. request setup */ - blk_rq_append_bio(req, &bio); - req->rq_disk = NULL; - req->end_io_data = (void *)pre_req; - req->end_io = ufshpb_pre_req_compl_fn; - - /* 2. scsi_request setup */ - rq = scsi_req(req); - rq->retries = 1; - - ufshpb_set_write_buf_cmd(rq->cmd, pre_req->wb.lpn, pre_req->wb.len, - read_id); - rq->cmd_len = scsi_command_size(rq->cmd); - - if (blk_insert_cloned_request(q, req) != BLK_STS_OK) - return -EAGAIN; - - hpb->stats.pre_req_cnt++; - - return 0; -} - -static int ufshpb_issue_pre_req(struct ufshpb_lu *hpb, struct scsi_cmnd *cmd, - int *read_id) -{ - struct ufshpb_req *pre_req; - struct request *req = NULL; - unsigned long flags; - int _read_id; - int ret = 0; - - req = blk_get_request(cmd->device->request_queue, - REQ_OP_SCSI_OUT | REQ_SYNC, BLK_MQ_REQ_NOWAIT); - if (IS_ERR(req)) - return -EAGAIN; - - spin_lock_irqsave(&hpb->rgn_state_lock, flags); - pre_req = ufshpb_get_pre_req(hpb); - if (!pre_req) { - ret = -EAGAIN; - goto unlock_out; - } - _read_id = ufshpb_get_read_id(hpb); - spin_unlock_irqrestore(&hpb->rgn_state_lock, flags); - - pre_req->req = req; - - ret = ufshpb_execute_pre_req(hpb, cmd, pre_req, _read_id); - if (ret) - goto free_pre_req; - - *read_id = _read_id; - - return ret; -free_pre_req: - spin_lock_irqsave(&hpb->rgn_state_lock, flags); - ufshpb_put_pre_req(hpb, pre_req); -unlock_out: - spin_unlock_irqrestore(&hpb->rgn_state_lock, flags); - blk_put_request(req); - return ret; -} - /* * This function will set up HPB read command using host-side L2P map data. */ @@ -596,7 +343,6 @@ int ufshpb_prep(struct ufs_hba *hba, struct ufshcd_lrb *lrbp) __be64 ppn; unsigned long flags; int transfer_len, rgn_idx, srgn_idx, srgn_offset; - int read_id = 0; int err = 0; hpb = ufshpb_get_hpb_data(cmd->device); @@ -612,17 +358,17 @@ int ufshpb_prep(struct ufs_hba *hba, struct ufshcd_lrb *lrbp) return -ENODEV; } - if (blk_rq_is_scsi(cmd->request) || + if (blk_rq_is_scsi(scsi_cmd_to_rq(cmd)) || (!ufshpb_is_write_or_discard(cmd) && !ufshpb_is_read_cmd(cmd))) return 0; transfer_len = sectors_to_logical(cmd->device, - blk_rq_sectors(cmd->request)); + blk_rq_sectors(scsi_cmd_to_rq(cmd))); if (unlikely(!transfer_len)) return 0; - lpn = sectors_to_logical(cmd->device, blk_rq_pos(cmd->request)); + lpn = sectors_to_logical(cmd->device, blk_rq_pos(scsi_cmd_to_rq(cmd))); ufshpb_get_pos_from_lpn(hpb, lpn, &rgn_idx, &srgn_idx, &srgn_offset); rgn = hpb->rgn_tbl + rgn_idx; srgn = rgn->srgn_tbl + srgn_idx; @@ -637,8 +383,6 @@ int ufshpb_prep(struct ufs_hba *hba, struct ufshcd_lrb *lrbp) if (!ufshpb_is_supported_chunk(hpb, transfer_len)) return 0; - WARN_ON_ONCE(transfer_len > HPB_MULTI_CHUNK_HIGH); - if (hpb->is_hcm) { /* * in host control mode, reads are the main source for @@ -673,24 +417,7 @@ int ufshpb_prep(struct ufs_hba *hba, struct ufshcd_lrb *lrbp) return err; } - if (!ufshpb_is_legacy(hba) && - ufshpb_is_required_wb(hpb, transfer_len)) { - err = ufshpb_issue_pre_req(hpb, cmd, &read_id); - if (err) { - unsigned long timeout; - - timeout = cmd->jiffies_at_alloc + msecs_to_jiffies( - hpb->params.requeue_timeout_ms); - - if (time_before(jiffies, timeout)) - return -EAGAIN; - - hpb->stats.miss_cnt++; - return 0; - } - } - - ufshpb_set_hpb_read_to_upiu(hpb, lrbp, lpn, ppn, transfer_len, read_id); + ufshpb_set_hpb_read_to_upiu(hba, lrbp, ppn, transfer_len); hpb->stats.hit_cnt++; return 0; @@ -1818,16 +1545,11 @@ static void ufshpb_lu_parameter_init(struct ufs_hba *hba, u32 entries_per_rgn; u64 rgn_mem_size, tmp; - /* for pre_req */ - hpb->pre_req_min_tr_len = hpb_dev_info->max_hpb_single_cmd + 1; - if (ufshpb_is_legacy(hba)) hpb->pre_req_max_tr_len = HPB_LEGACY_CHUNK_HIGH; else - hpb->pre_req_max_tr_len = HPB_MULTI_CHUNK_HIGH; - + hpb->pre_req_max_tr_len = hpb_dev_info->max_hpb_single_cmd; - hpb->cur_read_id = 0; hpb->lu_pinned_start = hpb_lu_info->pinned_start; hpb->lu_pinned_end = hpb_lu_info->num_pinned ? @@ -2838,7 +2560,7 @@ void ufshpb_get_dev_info(struct ufs_hba *hba, u8 *desc_buf) { struct ufshpb_dev_info *hpb_dev_info = &hba->ufshpb_dev; int version, ret; - u32 max_hpb_single_cmd = HPB_MULTI_CHUNK_LOW; + int max_single_cmd; hpb_dev_info->control_mode = desc_buf[DEVICE_DESC_PARAM_HPB_CONTROL]; @@ -2854,21 +2576,24 @@ void ufshpb_get_dev_info(struct ufs_hba *hba, u8 *desc_buf) if (version == HPB_SUPPORT_LEGACY_VERSION) hpb_dev_info->is_legacy = true; - pm_runtime_get_sync(hba->dev); - ret = ufshcd_query_attr_retry(hba, UPIU_QUERY_OPCODE_READ_ATTR, - QUERY_ATTR_IDN_MAX_HPB_SINGLE_CMD, 0, 0, &max_hpb_single_cmd); - pm_runtime_put_sync(hba->dev); - - if (ret) - dev_err(hba->dev, "%s: idn: read max size of single hpb cmd query request failed", - __func__); - hpb_dev_info->max_hpb_single_cmd = max_hpb_single_cmd; - /* * Get the number of user logical unit to check whether all * scsi_device finish initialization */ hpb_dev_info->num_lu = desc_buf[DEVICE_DESC_PARAM_NUM_LU]; + + if (hpb_dev_info->is_legacy) + return; + + pm_runtime_get_sync(hba->dev); + ret = ufshcd_query_attr_retry(hba, UPIU_QUERY_OPCODE_READ_ATTR, + QUERY_ATTR_IDN_MAX_HPB_SINGLE_CMD, 0, 0, &max_single_cmd); + pm_runtime_put_sync(hba->dev); + + if (ret) + hpb_dev_info->max_hpb_single_cmd = HPB_LEGACY_CHUNK_HIGH; + else + hpb_dev_info->max_hpb_single_cmd = min(max_single_cmd + 1, HPB_MULTI_CHUNK_HIGH); } void ufshpb_init(struct ufs_hba *hba) diff --git a/drivers/scsi/ufs/ufshpb.h b/drivers/scsi/ufs/ufshpb.h index c74a6c35a446064e8bff55be96875b7083b4f0cb..af7a9a3c5d759713430949e40d37786398ad25db 100644 --- a/drivers/scsi/ufs/ufshpb.h +++ b/drivers/scsi/ufs/ufshpb.h @@ -31,7 +31,6 @@ /* hpb support chunk size */ #define HPB_LEGACY_CHUNK_HIGH 1 -#define HPB_MULTI_CHUNK_LOW 7 #define HPB_MULTI_CHUNK_HIGH 256 /* hpb vender defined opcode */ @@ -239,8 +238,6 @@ struct ufshpb_lu { int throttle_pre_req; int num_inflight_map_req; struct list_head lh_pre_req_free; - int cur_read_id; - int pre_req_min_tr_len; int pre_req_max_tr_len; /* cached L2P map management worker */ diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index b9c86a7e3b97d6e5adea59063d89e2a2fe2fc014..0afba9454e9b66425c72906293b1edd7587c8a88 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -156,13 +156,11 @@ static void virtscsi_complete_cmd(struct virtio_scsi *vscsi, void *buf) WARN_ON(virtio32_to_cpu(vscsi->vdev, resp->sense_len) > VIRTIO_SCSI_SENSE_SIZE); - if (sc->sense_buffer) { + if (resp->sense_len) { memcpy(sc->sense_buffer, resp->sense, min_t(u32, virtio32_to_cpu(vscsi->vdev, resp->sense_len), VIRTIO_SCSI_SENSE_SIZE)); - if (resp->sense_len) - set_driver_byte(sc, DRIVER_SENSE); } sc->scsi_done(sc); @@ -302,7 +300,7 @@ static void virtscsi_handle_transport_reset(struct virtio_scsi *vscsi, } break; default: - pr_info("Unsupport virtio scsi event reason %x\n", event->reason); + pr_info("Unsupported virtio scsi event reason %x\n", event->reason); } } @@ -394,7 +392,7 @@ static void virtscsi_handle_event(struct work_struct *work) virtscsi_handle_param_change(vscsi, event); break; default: - pr_err("Unsupport virtio scsi event %x\n", event->event); + pr_err("Unsupported virtio scsi event %x\n", event->event); } virtscsi_kick_event(vscsi, event_node); } diff --git a/drivers/scsi/vmw_pvscsi.c b/drivers/scsi/vmw_pvscsi.c index 1421b1394d816aec4635ef2db4887e6bc43341aa..7d51ff4672d758f1d6da8b68412ea70e81cec5d7 100644 --- a/drivers/scsi/vmw_pvscsi.c +++ b/drivers/scsi/vmw_pvscsi.c @@ -591,9 +591,12 @@ static void pvscsi_complete_request(struct pvscsi_adapter *adapter, * Commands like INQUIRY may transfer less data than * requested by the initiator via bufflen. Set residual * count to make upper layer aware of the actual amount - * of data returned. + * of data returned. There are cases when controller + * returns zero dataLen with non zero data - do not set + * residual count in that case. */ - scsi_set_resid(cmd, scsi_bufflen(cmd) - e->dataLen); + if (e->dataLen && (e->dataLen < scsi_bufflen(cmd))) + scsi_set_resid(cmd, scsi_bufflen(cmd) - e->dataLen); cmd->result = (DID_OK << 16); break; diff --git a/drivers/scsi/xen-scsifront.c b/drivers/scsi/xen-scsifront.c index 259fc248d06cffa7278592d79cff2b6207987bda..a25c9386fdf785b3061a1c4a7550507c1010e076 100644 --- a/drivers/scsi/xen-scsifront.c +++ b/drivers/scsi/xen-scsifront.c @@ -233,12 +233,11 @@ static void scsifront_gnttab_done(struct vscsifrnt_info *info, return; for (i = 0; i < shadow->nr_grants; i++) { - if (unlikely(gnttab_query_foreign_access(shadow->gref[i]))) { + if (unlikely(!gnttab_try_end_foreign_access(shadow->gref[i]))) { shost_printk(KERN_ALERT, info->host, KBUILD_MODNAME "grant still in use by backend\n"); BUG(); } - gnttab_end_foreign_access(shadow->gref[i], 0, 0UL); } kfree(shadow->sg); diff --git a/drivers/sh/maple/maple.c b/drivers/sh/maple/maple.c index e5d7fb81ad66568d64107c4b592e2b87be33a215..44a931d41a132159d9b058c16b3bf7b86af9c64b 100644 --- a/drivers/sh/maple/maple.c +++ b/drivers/sh/maple/maple.c @@ -835,8 +835,10 @@ static int __init maple_bus_init(void) maple_queue_cache = KMEM_CACHE(maple_buffer, SLAB_HWCACHE_ALIGN); - if (!maple_queue_cache) + if (!maple_queue_cache) { + retval = -ENOMEM; goto cleanup_bothirqs; + } INIT_LIST_HEAD(&maple_waitq); INIT_LIST_HEAD(&maple_sentq); @@ -849,6 +851,7 @@ static int __init maple_bus_init(void) if (!mdev[i]) { while (i-- > 0) maple_free_dev(mdev[i]); + retval = -ENOMEM; goto cleanup_cache; } baseunits[i] = mdev[i]; diff --git a/drivers/soc/aspeed/aspeed-lpc-ctrl.c b/drivers/soc/aspeed/aspeed-lpc-ctrl.c index 040c7dc1d479242d6843e6eb56fa0901b297c82c..71b555c715d2e68d8f08bafa2daddb870fdbc5e5 100644 --- a/drivers/soc/aspeed/aspeed-lpc-ctrl.c +++ b/drivers/soc/aspeed/aspeed-lpc-ctrl.c @@ -251,10 +251,9 @@ static int aspeed_lpc_ctrl_probe(struct platform_device *pdev) } lpc_ctrl->clk = devm_clk_get(dev, NULL); - if (IS_ERR(lpc_ctrl->clk)) { - dev_err(dev, "couldn't get clock\n"); - return PTR_ERR(lpc_ctrl->clk); - } + if (IS_ERR(lpc_ctrl->clk)) + return dev_err_probe(dev, PTR_ERR(lpc_ctrl->clk), + "couldn't get clock\n"); rc = clk_prepare_enable(lpc_ctrl->clk); if (rc) { dev_err(dev, "couldn't enable clock\n"); diff --git a/drivers/soc/fsl/dpaa2-console.c b/drivers/soc/fsl/dpaa2-console.c index 27243f706f3761c932f95eca8ec4fb9e19849bb6..53917410f2bdb9c4f9ea759c77bba79f0a04ef5e 100644 --- a/drivers/soc/fsl/dpaa2-console.c +++ b/drivers/soc/fsl/dpaa2-console.c @@ -231,6 +231,7 @@ static ssize_t dpaa2_console_read(struct file *fp, char __user *buf, cd->cur_ptr += bytes; written += bytes; + kfree(kbuf); return written; err_free_buf: diff --git a/drivers/soc/fsl/dpio/dpio-service.c b/drivers/soc/fsl/dpio/dpio-service.c index 7351f303055063c7a93cd1e1f1e3fb85297c5818..779c319a4b8200109ffc0f65bdd55836ad7fb480 100644 --- a/drivers/soc/fsl/dpio/dpio-service.c +++ b/drivers/soc/fsl/dpio/dpio-service.c @@ -59,7 +59,7 @@ static inline struct dpaa2_io *service_select_by_cpu(struct dpaa2_io *d, * potentially being migrated away. */ if (cpu < 0) - cpu = smp_processor_id(); + cpu = raw_smp_processor_id(); /* If a specific cpu was requested, pick it up immediately */ return dpio_by_cpu[cpu]; diff --git a/drivers/soc/fsl/dpio/qbman-portal.c b/drivers/soc/fsl/dpio/qbman-portal.c index 659b4a570d5b59bbe68facabce9fff131e8b2866..3d69f56d9b9f4cff0cc848a43d77d764af79044f 100644 --- a/drivers/soc/fsl/dpio/qbman-portal.c +++ b/drivers/soc/fsl/dpio/qbman-portal.c @@ -732,8 +732,7 @@ int qbman_swp_enqueue_multiple_mem_back(struct qbman_swp *s, int i, num_enqueued = 0; unsigned long irq_flags; - spin_lock(&s->access_spinlock); - local_irq_save(irq_flags); + spin_lock_irqsave(&s->access_spinlock, irq_flags); half_mask = (s->eqcr.pi_ci_mask>>1); full_mask = s->eqcr.pi_ci_mask; @@ -744,8 +743,7 @@ int qbman_swp_enqueue_multiple_mem_back(struct qbman_swp *s, s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size, eqcr_ci, s->eqcr.ci); if (!s->eqcr.available) { - local_irq_restore(irq_flags); - spin_unlock(&s->access_spinlock); + spin_unlock_irqrestore(&s->access_spinlock, irq_flags); return 0; } } @@ -784,8 +782,7 @@ int qbman_swp_enqueue_multiple_mem_back(struct qbman_swp *s, dma_wmb(); qbman_write_register(s, QBMAN_CINH_SWP_EQCR_PI, (QB_RT_BIT)|(s->eqcr.pi)|s->eqcr.pi_vb); - local_irq_restore(irq_flags); - spin_unlock(&s->access_spinlock); + spin_unlock_irqrestore(&s->access_spinlock, irq_flags); return num_enqueued; } diff --git a/drivers/soc/fsl/guts.c b/drivers/soc/fsl/guts.c index 34810f9bb2ee781b3dbf96aa73326b2b5552a57f..091e94c04f3095ceb57a9c48b4c03960d1793850 100644 --- a/drivers/soc/fsl/guts.c +++ b/drivers/soc/fsl/guts.c @@ -28,7 +28,6 @@ struct fsl_soc_die_attr { static struct guts *guts; static struct soc_device_attribute soc_dev_attr; static struct soc_device *soc_dev; -static struct device_node *root; /* SoC die attribute definition for QorIQ platform */ @@ -138,7 +137,7 @@ static u32 fsl_guts_get_svr(void) static int fsl_guts_probe(struct platform_device *pdev) { - struct device_node *np = pdev->dev.of_node; + struct device_node *root, *np = pdev->dev.of_node; struct device *dev = &pdev->dev; struct resource *res; const struct fsl_soc_die_attr *soc_die; @@ -161,8 +160,14 @@ static int fsl_guts_probe(struct platform_device *pdev) root = of_find_node_by_path("/"); if (of_property_read_string(root, "model", &machine)) of_property_read_string_index(root, "compatible", 0, &machine); - if (machine) - soc_dev_attr.machine = machine; + if (machine) { + soc_dev_attr.machine = devm_kstrdup(dev, machine, GFP_KERNEL); + if (!soc_dev_attr.machine) { + of_node_put(root); + return -ENOMEM; + } + } + of_node_put(root); svr = fsl_guts_get_svr(); soc_die = fsl_soc_die_match(svr, fsl_soc_die); @@ -197,7 +202,6 @@ static int fsl_guts_probe(struct platform_device *pdev) static int fsl_guts_remove(struct platform_device *dev) { soc_device_unregister(soc_dev); - of_node_put(root); return 0; } diff --git a/drivers/soc/fsl/qe/qe_io.c b/drivers/soc/fsl/qe/qe_io.c index 11ea08e97db75e0e2d06a5504c661a5ace8bcab0..1bb46d955d52576bf0cfc7cf4ffe72e200df2915 100644 --- a/drivers/soc/fsl/qe/qe_io.c +++ b/drivers/soc/fsl/qe/qe_io.c @@ -35,6 +35,8 @@ int par_io_init(struct device_node *np) if (ret) return ret; par_io = ioremap(res.start, resource_size(&res)); + if (!par_io) + return -ENOMEM; if (!of_property_read_u32(np, "num-ports", &num_ports)) num_par_io_ports = num_ports; diff --git a/drivers/soc/imx/soc-imx.c b/drivers/soc/imx/soc-imx.c index 01bfea1cb64a86c2ccaa8871263754c1f4b2f4a3..1e8780299d5c48308c73745aa7a27be2b1cf9b3f 100644 --- a/drivers/soc/imx/soc-imx.c +++ b/drivers/soc/imx/soc-imx.c @@ -33,6 +33,10 @@ static int __init imx_soc_device_init(void) u32 val; int ret; + /* Return early if this is running on devices with different SoCs */ + if (!__mxc_cpu_type) + return 0; + if (of_machine_is_compatible("fsl,ls1021a")) return 0; diff --git a/drivers/soc/qcom/apr.c b/drivers/soc/qcom/apr.c index 7abfc8c4fdc7219403032631b643b13ad2828483..f736d208362c9a2e0cd6746058b35c8f4a77c047 100644 --- a/drivers/soc/qcom/apr.c +++ b/drivers/soc/qcom/apr.c @@ -323,12 +323,14 @@ static int of_apr_add_pd_lookups(struct device *dev) 1, &service_path); if (ret < 0) { dev_err(dev, "pdr service path missing: %d\n", ret); + of_node_put(node); return ret; } pds = pdr_add_lookup(apr->pdr, service_name, service_path); if (IS_ERR(pds) && PTR_ERR(pds) != -EALREADY) { dev_err(dev, "pdr add lookup failed: %ld\n", PTR_ERR(pds)); + of_node_put(node); return PTR_ERR(pds); } } diff --git a/drivers/soc/qcom/cpr.c b/drivers/soc/qcom/cpr.c index b24cc77d1889fb75a404cd100975ac97164f7750..6298561bc29c9dfd30739416c73e87585ae5d611 100644 --- a/drivers/soc/qcom/cpr.c +++ b/drivers/soc/qcom/cpr.c @@ -1043,7 +1043,7 @@ static int cpr_interpolate(const struct corner *corner, int step_volt, return corner->uV; temp = f_diff * (uV_high - uV_low); - do_div(temp, f_high - f_low); + temp = div64_ul(temp, f_high - f_low); /* * max_volt_scale has units of uV/MHz while freq values diff --git a/drivers/soc/qcom/mdt_loader.c b/drivers/soc/qcom/mdt_loader.c index eba7f76f9d61af329d45381ae7f624c2f5bbd878..6034cd8992b0ea4b5e1206f78a92d8f72bedc042 100644 --- a/drivers/soc/qcom/mdt_loader.c +++ b/drivers/soc/qcom/mdt_loader.c @@ -98,7 +98,7 @@ void *qcom_mdt_read_metadata(const struct firmware *fw, size_t *data_len) if (ehdr->e_phnum < 2) return ERR_PTR(-EINVAL); - if (phdrs[0].p_type == PT_LOAD || phdrs[1].p_type == PT_LOAD) + if (phdrs[0].p_type == PT_LOAD) return ERR_PTR(-EINVAL); if ((phdrs[1].p_flags & QCOM_MDT_TYPE_MASK) != QCOM_MDT_TYPE_HASH) diff --git a/drivers/soc/qcom/rpmhpd.c b/drivers/soc/qcom/rpmhpd.c index c8b584d0c8fb4f546644b5a5b5d7e585fc20e733..436ec79122ed234194236f61112cb4ee55f0508c 100644 --- a/drivers/soc/qcom/rpmhpd.c +++ b/drivers/soc/qcom/rpmhpd.c @@ -24,9 +24,13 @@ * struct rpmhpd - top level RPMh power domain resource data structure * @dev: rpmh power domain controller device * @pd: generic_pm_domain corrresponding to the power domain + * @parent: generic_pm_domain corrresponding to the parent's power domain * @peer: A peer power domain in case Active only Voting is * supported * @active_only: True if it represents an Active only peer + * @corner: current corner + * @active_corner: current active corner + * @enable_corner: lowest non-zero corner * @level: An array of level (vlvl) to corner (hlvl) mappings * derived from cmd-db * @level_count: Number of levels supported by the power domain. max @@ -44,6 +48,7 @@ struct rpmhpd { const bool active_only; unsigned int corner; unsigned int active_corner; + unsigned int enable_corner; u32 level[RPMH_ARC_MAX_LEVELS]; size_t level_count; bool enabled; @@ -292,13 +297,13 @@ static int rpmhpd_aggregate_corner(struct rpmhpd *pd, unsigned int corner) static int rpmhpd_power_on(struct generic_pm_domain *domain) { struct rpmhpd *pd = domain_to_rpmhpd(domain); - int ret = 0; + unsigned int corner; + int ret; mutex_lock(&rpmhpd_lock); - if (pd->corner) - ret = rpmhpd_aggregate_corner(pd, pd->corner); - + corner = max(pd->corner, pd->enable_corner); + ret = rpmhpd_aggregate_corner(pd, corner); if (!ret) pd->enabled = true; @@ -343,6 +348,10 @@ static int rpmhpd_set_performance_state(struct generic_pm_domain *domain, i--; if (pd->enabled) { + /* Ensure that the domain isn't turn off */ + if (i < pd->enable_corner) + i = pd->enable_corner; + ret = rpmhpd_aggregate_corner(pd, i); if (ret) goto out; @@ -379,6 +388,10 @@ static int rpmhpd_update_level_mapping(struct rpmhpd *rpmhpd) for (i = 0; i < rpmhpd->level_count; i++) { rpmhpd->level[i] = buf[i]; + /* Remember the first corner with non-zero level */ + if (!rpmhpd->level[rpmhpd->enable_corner] && rpmhpd->level[i]) + rpmhpd->enable_corner = i; + /* * The AUX data may be zero padded. These 0 valued entries at * the end of the map must be ignored. diff --git a/drivers/soc/qcom/socinfo.c b/drivers/soc/qcom/socinfo.c index e0620416e5743aa15b73307f4c03af304a70a77c..60c82dcaa8d1d50d0e5b5469a76f93350148f9c8 100644 --- a/drivers/soc/qcom/socinfo.c +++ b/drivers/soc/qcom/socinfo.c @@ -521,7 +521,7 @@ static int qcom_socinfo_probe(struct platform_device *pdev) /* Feed the soc specific unique data into entropy pool */ add_device_randomness(info, item_size); - platform_set_drvdata(pdev, qs->soc_dev); + platform_set_drvdata(pdev, qs); return 0; } diff --git a/drivers/soc/tegra/fuse/fuse-tegra.c b/drivers/soc/tegra/fuse/fuse-tegra.c index 94b60a692b515d656ec5134e19ca0334f344ebc2..4388a4a5e0919c69318c92a63b6e350dd7d1bc76 100644 --- a/drivers/soc/tegra/fuse/fuse-tegra.c +++ b/drivers/soc/tegra/fuse/fuse-tegra.c @@ -260,7 +260,7 @@ static struct platform_driver tegra_fuse_driver = { }; builtin_platform_driver(tegra_fuse_driver); -bool __init tegra_fuse_read_spare(unsigned int spare) +u32 __init tegra_fuse_read_spare(unsigned int spare) { unsigned int offset = fuse->soc->info->spare + spare * 4; diff --git a/drivers/soc/tegra/fuse/fuse.h b/drivers/soc/tegra/fuse/fuse.h index e057a58e20603250878c68db46fab7e23fcb25be..21887a57cf2c2f9faa0fa9438819148af109ef7a 100644 --- a/drivers/soc/tegra/fuse/fuse.h +++ b/drivers/soc/tegra/fuse/fuse.h @@ -63,7 +63,7 @@ struct tegra_fuse { void tegra_init_revision(void); void tegra_init_apbmisc(void); -bool __init tegra_fuse_read_spare(unsigned int spare); +u32 __init tegra_fuse_read_spare(unsigned int spare); u32 __init tegra_fuse_read_early(unsigned int offset); u8 tegra_get_major_rev(void); diff --git a/drivers/soc/tegra/pmc.c b/drivers/soc/tegra/pmc.c index 0118bd986f902f5998987851dd6ed45973211b00..5726c232e61d5b748bff834b16406bb609ef05e9 100644 --- a/drivers/soc/tegra/pmc.c +++ b/drivers/soc/tegra/pmc.c @@ -693,7 +693,7 @@ static int tegra_powergate_power_up(struct tegra_powergate *pg, err = tegra_powergate_enable_clocks(pg); if (err) - goto disable_clks; + goto powergate_off; usleep_range(10, 20); @@ -705,7 +705,7 @@ static int tegra_powergate_power_up(struct tegra_powergate *pg, err = reset_control_deassert(pg->reset); if (err) - goto powergate_off; + goto disable_clks; usleep_range(10, 20); diff --git a/drivers/soc/ti/omap_prm.c b/drivers/soc/ti/omap_prm.c index fb067b5e4a977c991202c01595ee3bc02f90bd13..4a782bfd753c35ef112290914bb82dd97feab964 100644 --- a/drivers/soc/ti/omap_prm.c +++ b/drivers/soc/ti/omap_prm.c @@ -509,25 +509,28 @@ static int omap_reset_deassert(struct reset_controller_dev *rcdev, writel_relaxed(v, reset->prm->base + reset->prm->data->rstctrl); spin_unlock_irqrestore(&reset->lock, flags); - if (!has_rstst) - goto exit; + /* wait for the reset bit to clear */ + ret = readl_relaxed_poll_timeout_atomic(reset->prm->base + + reset->prm->data->rstctrl, + v, !(v & BIT(id)), 1, + OMAP_RESET_MAX_WAIT); + if (ret) + pr_err("%s: timedout waiting for %s:%lu\n", __func__, + reset->prm->data->name, id); /* wait for the status to be set */ - ret = readl_relaxed_poll_timeout_atomic(reset->prm->base + + if (has_rstst) { + ret = readl_relaxed_poll_timeout_atomic(reset->prm->base + reset->prm->data->rstst, v, v & BIT(st_bit), 1, OMAP_RESET_MAX_WAIT); - if (ret) - pr_err("%s: timedout waiting for %s:%lu\n", __func__, - reset->prm->data->name, id); + if (ret) + pr_err("%s: timedout waiting for %s:%lu\n", __func__, + reset->prm->data->name, id); + } -exit: - if (reset->clkdm) { - /* At least dra7 iva needs a delay before clkdm idle */ - if (has_rstst) - udelay(1); + if (reset->clkdm) pdata->clkdm_allow_idle(reset->clkdm); - } return ret; } diff --git a/drivers/soc/ti/pruss.c b/drivers/soc/ti/pruss.c index cc0b4ad7a3d34fb06654f0f04c2ef37c12d496a7..30695172a508f610d9571f841aa0461701f905db 100644 --- a/drivers/soc/ti/pruss.c +++ b/drivers/soc/ti/pruss.c @@ -131,7 +131,7 @@ static int pruss_clk_init(struct pruss *pruss, struct device_node *cfg_node) clks_np = of_get_child_by_name(cfg_node, "clocks"); if (!clks_np) { - dev_err(dev, "%pOF is missing its 'clocks' node\n", clks_np); + dev_err(dev, "%pOF is missing its 'clocks' node\n", cfg_node); return -ENODEV; } diff --git a/drivers/soundwire/debugfs.c b/drivers/soundwire/debugfs.c index b6cad0d59b7b9f2562d6a1c275315b10f5c02581..49900cd207bc7633202a24e45cbe191fbc84cbdb 100644 --- a/drivers/soundwire/debugfs.c +++ b/drivers/soundwire/debugfs.c @@ -19,7 +19,7 @@ void sdw_bus_debugfs_init(struct sdw_bus *bus) return; /* create the debugfs master-N */ - snprintf(name, sizeof(name), "master-%d", bus->link_id); + snprintf(name, sizeof(name), "master-%d-%d", bus->id, bus->link_id); bus->debugfs = debugfs_create_dir(name, sdw_debugfs_root); } diff --git a/drivers/spi/spi-armada-3700.c b/drivers/spi/spi-armada-3700.c index 46feafe4e201c9b30ed006eb346fc47b5ac8887f..d8cc4b270644adfb06a0b55262eb8eecf9abf9d7 100644 --- a/drivers/spi/spi-armada-3700.c +++ b/drivers/spi/spi-armada-3700.c @@ -901,7 +901,7 @@ static int a3700_spi_probe(struct platform_device *pdev) return 0; error_clk: - clk_disable_unprepare(spi->clk); + clk_unprepare(spi->clk); error: spi_master_put(master); out: diff --git a/drivers/spi/spi-bcm-qspi.c b/drivers/spi/spi-bcm-qspi.c index c028446c746045f0f830a291f9ddda7ce0bad460..4a80f043b7b17375e6f66ac92336452e40f9431a 100644 --- a/drivers/spi/spi-bcm-qspi.c +++ b/drivers/spi/spi-bcm-qspi.c @@ -551,7 +551,7 @@ static void bcm_qspi_chip_select(struct bcm_qspi *qspi, int cs) u32 rd = 0; u32 wr = 0; - if (qspi->base[CHIP_SELECT]) { + if (cs >= 0 && qspi->base[CHIP_SELECT]) { rd = bcm_qspi_read(qspi, CHIP_SELECT, 0); wr = (rd & ~0xff) | (1 << cs); if (rd == wr) @@ -1250,10 +1250,14 @@ static void bcm_qspi_hw_init(struct bcm_qspi *qspi) static void bcm_qspi_hw_uninit(struct bcm_qspi *qspi) { + u32 status = bcm_qspi_read(qspi, MSPI, MSPI_MSPI_STATUS); + bcm_qspi_write(qspi, MSPI, MSPI_SPCR2, 0); if (has_bspi(qspi)) bcm_qspi_write(qspi, MSPI, MSPI_WRITE_LOCK, 0); + /* clear interrupt */ + bcm_qspi_write(qspi, MSPI, MSPI_MSPI_STATUS, status & ~1); } static const struct spi_controller_mem_ops bcm_qspi_mem_ops = { @@ -1397,6 +1401,47 @@ int bcm_qspi_probe(struct platform_device *pdev, if (!qspi->dev_ids) return -ENOMEM; + /* + * Some SoCs integrate spi controller (e.g., its interrupt bits) + * in specific ways + */ + if (soc_intc) { + qspi->soc_intc = soc_intc; + soc_intc->bcm_qspi_int_set(soc_intc, MSPI_DONE, true); + } else { + qspi->soc_intc = NULL; + } + + if (qspi->clk) { + ret = clk_prepare_enable(qspi->clk); + if (ret) { + dev_err(dev, "failed to prepare clock\n"); + goto qspi_probe_err; + } + qspi->base_clk = clk_get_rate(qspi->clk); + } else { + qspi->base_clk = MSPI_BASE_FREQ; + } + + if (data->has_mspi_rev) { + rev = bcm_qspi_read(qspi, MSPI, MSPI_REV); + /* some older revs do not have a MSPI_REV register */ + if ((rev & 0xff) == 0xff) + rev = 0; + } + + qspi->mspi_maj_rev = (rev >> 4) & 0xf; + qspi->mspi_min_rev = rev & 0xf; + qspi->mspi_spcr3_sysclk = data->has_spcr3_sysclk; + + qspi->max_speed_hz = qspi->base_clk / (bcm_qspi_spbr_min(qspi) * 2); + + /* + * On SW resets it is possible to have the mask still enabled + * Need to disable the mask and clear the status while we init + */ + bcm_qspi_hw_uninit(qspi); + for (val = 0; val < num_irqs; val++) { irq = -1; name = qspi_irq_tab[val].irq_name; @@ -1415,7 +1460,7 @@ int bcm_qspi_probe(struct platform_device *pdev, &qspi->dev_ids[val]); if (ret < 0) { dev_err(&pdev->dev, "IRQ %s not found\n", name); - goto qspi_probe_err; + goto qspi_unprepare_err; } qspi->dev_ids[val].dev = qspi; @@ -1430,41 +1475,9 @@ int bcm_qspi_probe(struct platform_device *pdev, if (!num_ints) { dev_err(&pdev->dev, "no IRQs registered, cannot init driver\n"); ret = -EINVAL; - goto qspi_probe_err; - } - - /* - * Some SoCs integrate spi controller (e.g., its interrupt bits) - * in specific ways - */ - if (soc_intc) { - qspi->soc_intc = soc_intc; - soc_intc->bcm_qspi_int_set(soc_intc, MSPI_DONE, true); - } else { - qspi->soc_intc = NULL; + goto qspi_unprepare_err; } - ret = clk_prepare_enable(qspi->clk); - if (ret) { - dev_err(dev, "failed to prepare clock\n"); - goto qspi_probe_err; - } - - qspi->base_clk = clk_get_rate(qspi->clk); - - if (data->has_mspi_rev) { - rev = bcm_qspi_read(qspi, MSPI, MSPI_REV); - /* some older revs do not have a MSPI_REV register */ - if ((rev & 0xff) == 0xff) - rev = 0; - } - - qspi->mspi_maj_rev = (rev >> 4) & 0xf; - qspi->mspi_min_rev = rev & 0xf; - qspi->mspi_spcr3_sysclk = data->has_spcr3_sysclk; - - qspi->max_speed_hz = qspi->base_clk / (bcm_qspi_spbr_min(qspi) * 2); - bcm_qspi_hw_init(qspi); init_completion(&qspi->mspi_done); init_completion(&qspi->bspi_done); @@ -1486,6 +1499,7 @@ int bcm_qspi_probe(struct platform_device *pdev, qspi_reg_err: bcm_qspi_hw_uninit(qspi); +qspi_unprepare_err: clk_disable_unprepare(qspi->clk); qspi_probe_err: kfree(qspi->dev_ids); diff --git a/drivers/spi/spi-meson-spicc.c b/drivers/spi/spi-meson-spicc.c index c208efeadd1847a4be807ce4dc886851f031dc35..0bc7daa7afc83d0d20424800845bb16f1c2c473f 100644 --- a/drivers/spi/spi-meson-spicc.c +++ b/drivers/spi/spi-meson-spicc.c @@ -693,6 +693,11 @@ static int meson_spicc_probe(struct platform_device *pdev) writel_relaxed(0, spicc->base + SPICC_INTREG); irq = platform_get_irq(pdev, 0); + if (irq < 0) { + ret = irq; + goto out_master; + } + ret = devm_request_irq(&pdev->dev, irq, meson_spicc_irq, 0, NULL, spicc); if (ret) { diff --git a/drivers/spi/spi-meson-spifc.c b/drivers/spi/spi-meson-spifc.c index 8eca6f24cb799b461c6e63f3e190b87f2c600c56..c8ed7815c4ba6d84b18da6adc73111d44ce0f5f2 100644 --- a/drivers/spi/spi-meson-spifc.c +++ b/drivers/spi/spi-meson-spifc.c @@ -349,6 +349,7 @@ static int meson_spifc_probe(struct platform_device *pdev) return 0; out_clk: clk_disable_unprepare(spifc->clk); + pm_runtime_disable(spifc->dev); out_err: spi_master_put(master); return ret; diff --git a/drivers/spi/spi-mt65xx.c b/drivers/spi/spi-mt65xx.c index 83e56ee62649d445ea08b05b99a54059e17264f7..92a09dfb99a8e7a124d8be3a0823af4da977cb5f 100644 --- a/drivers/spi/spi-mt65xx.c +++ b/drivers/spi/spi-mt65xx.c @@ -540,7 +540,7 @@ static irqreturn_t mtk_spi_interrupt(int irq, void *dev_id) else mdata->state = MTK_SPI_IDLE; - if (!master->can_dma(master, master->cur_msg->spi, trans)) { + if (!master->can_dma(master, NULL, trans)) { if (trans->rx_buf) { cnt = mdata->xfer_len / 4; ioread32_rep(mdata->base + SPI_RX_DATA_REG, diff --git a/drivers/spi/spi-pl022.c b/drivers/spi/spi-pl022.c index fd74ddfbb686975fef4598d823cdbe37db1864d6..f7603c209e9d5fd89ce96d1006836e3d1d066813 100644 --- a/drivers/spi/spi-pl022.c +++ b/drivers/spi/spi-pl022.c @@ -1723,12 +1723,13 @@ static int verify_controller_parameters(struct pl022 *pl022, return -EINVAL; } } else { - if (chip_info->duplex != SSP_MICROWIRE_CHANNEL_FULL_DUPLEX) + if (chip_info->duplex != SSP_MICROWIRE_CHANNEL_FULL_DUPLEX) { dev_err(&pl022->adev->dev, "Microwire half duplex mode requested," " but this is only available in the" " ST version of PL022\n"); - return -EINVAL; + return -EINVAL; + } } } return 0; diff --git a/drivers/spi/spi-rockchip.c b/drivers/spi/spi-rockchip.c index 0aab37cd64e74b3c564b5246c774c32b9b8e0165..a9f97023d5a00da66064abdfbc2d3abaaf4db0d7 100644 --- a/drivers/spi/spi-rockchip.c +++ b/drivers/spi/spi-rockchip.c @@ -567,6 +567,12 @@ static int rockchip_spi_slave_abort(struct spi_controller *ctlr) { struct rockchip_spi *rs = spi_controller_get_devdata(ctlr); + if (atomic_read(&rs->state) & RXDMA) + dmaengine_terminate_sync(ctlr->dma_rx); + if (atomic_read(&rs->state) & TXDMA) + dmaengine_terminate_sync(ctlr->dma_tx); + atomic_set(&rs->state, 0); + spi_enable_chip(rs, false); rs->slave_abort = true; complete(&ctlr->xfer_completion); @@ -582,6 +588,12 @@ static int rockchip_spi_transfer_one( int ret; bool use_dma; + /* Zero length transfers won't trigger an interrupt on completion */ + if (!xfer->len) { + spi_finalize_current_transfer(ctlr); + return 1; + } + WARN_ON(readl_relaxed(rs->regs + ROCKCHIP_SPI_SSIENR) && (readl_relaxed(rs->regs + ROCKCHIP_SPI_SR) & SR_BUSY)); @@ -630,7 +642,7 @@ static int rockchip_spi_probe(struct platform_device *pdev) struct spi_controller *ctlr; struct resource *mem; struct device_node *np = pdev->dev.of_node; - u32 rsd_nsecs; + u32 rsd_nsecs, num_cs; bool slave_mode; slave_mode = of_property_read_bool(np, "spi-slave"); @@ -738,8 +750,9 @@ static int rockchip_spi_probe(struct platform_device *pdev) * rk spi0 has two native cs, spi1..5 one cs only * if num-cs is missing in the dts, default to 1 */ - if (of_property_read_u16(np, "num-cs", &ctlr->num_chipselect)) - ctlr->num_chipselect = 1; + if (of_property_read_u32(np, "num-cs", &num_cs)) + num_cs = 1; + ctlr->num_chipselect = num_cs; ctlr->use_gpio_descriptors = true; } ctlr->dev.of_node = pdev->dev.of_node; diff --git a/drivers/spi/spi-rpc-if.c b/drivers/spi/spi-rpc-if.c index 3579675485a5ef3ab28d455d9fc00a6d54d4e7e7..727d7cf0a6ad8518090dc4a8c31c096d9859f826 100644 --- a/drivers/spi/spi-rpc-if.c +++ b/drivers/spi/spi-rpc-if.c @@ -139,7 +139,9 @@ static int rpcif_spi_probe(struct platform_device *pdev) return -ENOMEM; rpc = spi_controller_get_devdata(ctlr); - rpcif_sw_init(rpc, parent); + error = rpcif_sw_init(rpc, parent); + if (error) + return error; platform_set_drvdata(pdev, ctlr); diff --git a/drivers/spi/spi-uniphier.c b/drivers/spi/spi-uniphier.c index 6a9ef8ee3cc9028f73c3bcf0a1f4ab3a8252373e..ad0088e3947237a8c70c37c9de385b67b8e2febb 100644 --- a/drivers/spi/spi-uniphier.c +++ b/drivers/spi/spi-uniphier.c @@ -726,7 +726,7 @@ static int uniphier_spi_probe(struct platform_device *pdev) if (ret) { dev_err(&pdev->dev, "failed to get TX DMA capacities: %d\n", ret); - goto out_disable_clk; + goto out_release_dma; } dma_tx_burst = caps.max_burst; } @@ -735,7 +735,7 @@ static int uniphier_spi_probe(struct platform_device *pdev) if (IS_ERR_OR_NULL(master->dma_rx)) { if (PTR_ERR(master->dma_rx) == -EPROBE_DEFER) { ret = -EPROBE_DEFER; - goto out_disable_clk; + goto out_release_dma; } master->dma_rx = NULL; dma_rx_burst = INT_MAX; @@ -744,7 +744,7 @@ static int uniphier_spi_probe(struct platform_device *pdev) if (ret) { dev_err(&pdev->dev, "failed to get RX DMA capacities: %d\n", ret); - goto out_disable_clk; + goto out_release_dma; } dma_rx_burst = caps.max_burst; } @@ -753,10 +753,20 @@ static int uniphier_spi_probe(struct platform_device *pdev) ret = devm_spi_register_master(&pdev->dev, master); if (ret) - goto out_disable_clk; + goto out_release_dma; return 0; +out_release_dma: + if (!IS_ERR_OR_NULL(master->dma_rx)) { + dma_release_channel(master->dma_rx); + master->dma_rx = NULL; + } + if (!IS_ERR_OR_NULL(master->dma_tx)) { + dma_release_channel(master->dma_tx); + master->dma_tx = NULL; + } + out_disable_clk: clk_disable_unprepare(priv->clk); @@ -767,12 +777,13 @@ out_master_put: static int uniphier_spi_remove(struct platform_device *pdev) { - struct uniphier_spi_priv *priv = platform_get_drvdata(pdev); + struct spi_master *master = platform_get_drvdata(pdev); + struct uniphier_spi_priv *priv = spi_master_get_devdata(master); - if (priv->master->dma_tx) - dma_release_channel(priv->master->dma_tx); - if (priv->master->dma_rx) - dma_release_channel(priv->master->dma_rx); + if (master->dma_tx) + dma_release_channel(master->dma_tx); + if (master->dma_rx) + dma_release_channel(master->dma_rx); clk_disable_unprepare(priv->clk); diff --git a/drivers/spi/spi-zynq-qspi.c b/drivers/spi/spi-zynq-qspi.c index b635835729d66401501b99a31782760ddee54763..13c0b15fe17649490b7075c0d5119e4d012b7413 100644 --- a/drivers/spi/spi-zynq-qspi.c +++ b/drivers/spi/spi-zynq-qspi.c @@ -570,6 +570,9 @@ static int zynq_qspi_exec_mem_op(struct spi_mem *mem, if (op->dummy.nbytes) { tmpbuf = kzalloc(op->dummy.nbytes, GFP_KERNEL); + if (!tmpbuf) + return -ENOMEM; + memset(tmpbuf, 0xff, op->dummy.nbytes); reinit_completion(&xqspi->data_completion); xqspi->txbuf = tmpbuf; diff --git a/drivers/staging/comedi/drivers/dt9812.c b/drivers/staging/comedi/drivers/dt9812.c index 634f57730c1e02208853ae2b698c697c450e497d..704b04d2980d3089d6ade4ee72b8412b9e05aac7 100644 --- a/drivers/staging/comedi/drivers/dt9812.c +++ b/drivers/staging/comedi/drivers/dt9812.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include "../comedi_usb.h" @@ -237,22 +238,42 @@ static int dt9812_read_info(struct comedi_device *dev, { struct usb_device *usb = comedi_to_usb_dev(dev); struct dt9812_private *devpriv = dev->private; - struct dt9812_usb_cmd cmd; + struct dt9812_usb_cmd *cmd; + size_t tbuf_size; int count, ret; + void *tbuf; - cmd.cmd = cpu_to_le32(DT9812_R_FLASH_DATA); - cmd.u.flash_data_info.address = + tbuf_size = max(sizeof(*cmd), buf_size); + + tbuf = kzalloc(tbuf_size, GFP_KERNEL); + if (!tbuf) + return -ENOMEM; + + cmd = tbuf; + + cmd->cmd = cpu_to_le32(DT9812_R_FLASH_DATA); + cmd->u.flash_data_info.address = cpu_to_le16(DT9812_DIAGS_BOARD_INFO_ADDR + offset); - cmd.u.flash_data_info.numbytes = cpu_to_le16(buf_size); + cmd->u.flash_data_info.numbytes = cpu_to_le16(buf_size); /* DT9812 only responds to 32 byte writes!! */ ret = usb_bulk_msg(usb, usb_sndbulkpipe(usb, devpriv->cmd_wr.addr), - &cmd, 32, &count, DT9812_USB_TIMEOUT); + cmd, sizeof(*cmd), &count, DT9812_USB_TIMEOUT); if (ret) - return ret; + goto out; + + ret = usb_bulk_msg(usb, usb_rcvbulkpipe(usb, devpriv->cmd_rd.addr), + tbuf, buf_size, &count, DT9812_USB_TIMEOUT); + if (!ret) { + if (count == buf_size) + memcpy(buf, tbuf, buf_size); + else + ret = -EREMOTEIO; + } +out: + kfree(tbuf); - return usb_bulk_msg(usb, usb_rcvbulkpipe(usb, devpriv->cmd_rd.addr), - buf, buf_size, &count, DT9812_USB_TIMEOUT); + return ret; } static int dt9812_read_multiple_registers(struct comedi_device *dev, @@ -261,22 +282,42 @@ static int dt9812_read_multiple_registers(struct comedi_device *dev, { struct usb_device *usb = comedi_to_usb_dev(dev); struct dt9812_private *devpriv = dev->private; - struct dt9812_usb_cmd cmd; + struct dt9812_usb_cmd *cmd; int i, count, ret; + size_t buf_size; + void *buf; - cmd.cmd = cpu_to_le32(DT9812_R_MULTI_BYTE_REG); - cmd.u.read_multi_info.count = reg_count; + buf_size = max_t(size_t, sizeof(*cmd), reg_count); + + buf = kzalloc(buf_size, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + cmd = buf; + + cmd->cmd = cpu_to_le32(DT9812_R_MULTI_BYTE_REG); + cmd->u.read_multi_info.count = reg_count; for (i = 0; i < reg_count; i++) - cmd.u.read_multi_info.address[i] = address[i]; + cmd->u.read_multi_info.address[i] = address[i]; /* DT9812 only responds to 32 byte writes!! */ ret = usb_bulk_msg(usb, usb_sndbulkpipe(usb, devpriv->cmd_wr.addr), - &cmd, 32, &count, DT9812_USB_TIMEOUT); + cmd, sizeof(*cmd), &count, DT9812_USB_TIMEOUT); if (ret) - return ret; + goto out; + + ret = usb_bulk_msg(usb, usb_rcvbulkpipe(usb, devpriv->cmd_rd.addr), + buf, reg_count, &count, DT9812_USB_TIMEOUT); + if (!ret) { + if (count == reg_count) + memcpy(value, buf, reg_count); + else + ret = -EREMOTEIO; + } +out: + kfree(buf); - return usb_bulk_msg(usb, usb_rcvbulkpipe(usb, devpriv->cmd_rd.addr), - value, reg_count, &count, DT9812_USB_TIMEOUT); + return ret; } static int dt9812_write_multiple_registers(struct comedi_device *dev, @@ -285,19 +326,27 @@ static int dt9812_write_multiple_registers(struct comedi_device *dev, { struct usb_device *usb = comedi_to_usb_dev(dev); struct dt9812_private *devpriv = dev->private; - struct dt9812_usb_cmd cmd; + struct dt9812_usb_cmd *cmd; int i, count; + int ret; + + cmd = kzalloc(sizeof(*cmd), GFP_KERNEL); + if (!cmd) + return -ENOMEM; - cmd.cmd = cpu_to_le32(DT9812_W_MULTI_BYTE_REG); - cmd.u.read_multi_info.count = reg_count; + cmd->cmd = cpu_to_le32(DT9812_W_MULTI_BYTE_REG); + cmd->u.read_multi_info.count = reg_count; for (i = 0; i < reg_count; i++) { - cmd.u.write_multi_info.write[i].address = address[i]; - cmd.u.write_multi_info.write[i].value = value[i]; + cmd->u.write_multi_info.write[i].address = address[i]; + cmd->u.write_multi_info.write[i].value = value[i]; } /* DT9812 only responds to 32 byte writes!! */ - return usb_bulk_msg(usb, usb_sndbulkpipe(usb, devpriv->cmd_wr.addr), - &cmd, 32, &count, DT9812_USB_TIMEOUT); + ret = usb_bulk_msg(usb, usb_sndbulkpipe(usb, devpriv->cmd_wr.addr), + cmd, sizeof(*cmd), &count, DT9812_USB_TIMEOUT); + kfree(cmd); + + return ret; } static int dt9812_rmw_multiple_registers(struct comedi_device *dev, @@ -306,17 +355,25 @@ static int dt9812_rmw_multiple_registers(struct comedi_device *dev, { struct usb_device *usb = comedi_to_usb_dev(dev); struct dt9812_private *devpriv = dev->private; - struct dt9812_usb_cmd cmd; + struct dt9812_usb_cmd *cmd; int i, count; + int ret; + + cmd = kzalloc(sizeof(*cmd), GFP_KERNEL); + if (!cmd) + return -ENOMEM; - cmd.cmd = cpu_to_le32(DT9812_RMW_MULTI_BYTE_REG); - cmd.u.rmw_multi_info.count = reg_count; + cmd->cmd = cpu_to_le32(DT9812_RMW_MULTI_BYTE_REG); + cmd->u.rmw_multi_info.count = reg_count; for (i = 0; i < reg_count; i++) - cmd.u.rmw_multi_info.rmw[i] = rmw[i]; + cmd->u.rmw_multi_info.rmw[i] = rmw[i]; /* DT9812 only responds to 32 byte writes!! */ - return usb_bulk_msg(usb, usb_sndbulkpipe(usb, devpriv->cmd_wr.addr), - &cmd, 32, &count, DT9812_USB_TIMEOUT); + ret = usb_bulk_msg(usb, usb_sndbulkpipe(usb, devpriv->cmd_wr.addr), + cmd, sizeof(*cmd), &count, DT9812_USB_TIMEOUT); + kfree(cmd); + + return ret; } static int dt9812_digital_in(struct comedi_device *dev, u8 *bits) diff --git a/drivers/staging/comedi/drivers/ni_usb6501.c b/drivers/staging/comedi/drivers/ni_usb6501.c index 5b6d9d783b2f74339159a7bb814a78d9d8e77b26..c42987b74b1dcb6d3aec08aac574116c46c5dbca 100644 --- a/drivers/staging/comedi/drivers/ni_usb6501.c +++ b/drivers/staging/comedi/drivers/ni_usb6501.c @@ -144,6 +144,10 @@ static const u8 READ_COUNTER_RESPONSE[] = {0x00, 0x01, 0x00, 0x10, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00}; +/* Largest supported packets */ +static const size_t TX_MAX_SIZE = sizeof(SET_PORT_DIR_REQUEST); +static const size_t RX_MAX_SIZE = sizeof(READ_PORT_RESPONSE); + enum commands { READ_PORT, WRITE_PORT, @@ -501,6 +505,12 @@ static int ni6501_find_endpoints(struct comedi_device *dev) if (!devpriv->ep_rx || !devpriv->ep_tx) return -ENODEV; + if (usb_endpoint_maxp(devpriv->ep_rx) < RX_MAX_SIZE) + return -ENODEV; + + if (usb_endpoint_maxp(devpriv->ep_tx) < TX_MAX_SIZE) + return -ENODEV; + return 0; } diff --git a/drivers/staging/comedi/drivers/vmk80xx.c b/drivers/staging/comedi/drivers/vmk80xx.c index 7956abcbae22bfdd2294b1220d892763bd431b3a..7769eadfaf61d519c058b92eb0f70bbafb0f1159 100644 --- a/drivers/staging/comedi/drivers/vmk80xx.c +++ b/drivers/staging/comedi/drivers/vmk80xx.c @@ -90,6 +90,9 @@ enum { #define IC3_VERSION BIT(0) #define IC6_VERSION BIT(1) +#define MIN_BUF_SIZE 64 +#define PACKET_TIMEOUT 10000 /* ms */ + enum vmk80xx_model { VMK8055_MODEL, VMK8061_MODEL @@ -157,22 +160,21 @@ static void vmk80xx_do_bulk_msg(struct comedi_device *dev) __u8 rx_addr; unsigned int tx_pipe; unsigned int rx_pipe; - size_t size; + size_t tx_size; + size_t rx_size; tx_addr = devpriv->ep_tx->bEndpointAddress; rx_addr = devpriv->ep_rx->bEndpointAddress; tx_pipe = usb_sndbulkpipe(usb, tx_addr); rx_pipe = usb_rcvbulkpipe(usb, rx_addr); + tx_size = usb_endpoint_maxp(devpriv->ep_tx); + rx_size = usb_endpoint_maxp(devpriv->ep_rx); - /* - * The max packet size attributes of the K8061 - * input/output endpoints are identical - */ - size = usb_endpoint_maxp(devpriv->ep_tx); + usb_bulk_msg(usb, tx_pipe, devpriv->usb_tx_buf, tx_size, NULL, + PACKET_TIMEOUT); - usb_bulk_msg(usb, tx_pipe, devpriv->usb_tx_buf, - size, NULL, devpriv->ep_tx->bInterval); - usb_bulk_msg(usb, rx_pipe, devpriv->usb_rx_buf, size, NULL, HZ * 10); + usb_bulk_msg(usb, rx_pipe, devpriv->usb_rx_buf, rx_size, NULL, + PACKET_TIMEOUT); } static int vmk80xx_read_packet(struct comedi_device *dev) @@ -191,7 +193,7 @@ static int vmk80xx_read_packet(struct comedi_device *dev) pipe = usb_rcvintpipe(usb, ep->bEndpointAddress); return usb_interrupt_msg(usb, pipe, devpriv->usb_rx_buf, usb_endpoint_maxp(ep), NULL, - HZ * 10); + PACKET_TIMEOUT); } static int vmk80xx_write_packet(struct comedi_device *dev, int cmd) @@ -212,7 +214,7 @@ static int vmk80xx_write_packet(struct comedi_device *dev, int cmd) pipe = usb_sndintpipe(usb, ep->bEndpointAddress); return usb_interrupt_msg(usb, pipe, devpriv->usb_tx_buf, usb_endpoint_maxp(ep), NULL, - HZ * 10); + PACKET_TIMEOUT); } static int vmk80xx_reset_device(struct comedi_device *dev) @@ -678,12 +680,12 @@ static int vmk80xx_alloc_usb_buffers(struct comedi_device *dev) struct vmk80xx_private *devpriv = dev->private; size_t size; - size = usb_endpoint_maxp(devpriv->ep_rx); + size = max(usb_endpoint_maxp(devpriv->ep_rx), MIN_BUF_SIZE); devpriv->usb_rx_buf = kzalloc(size, GFP_KERNEL); if (!devpriv->usb_rx_buf) return -ENOMEM; - size = usb_endpoint_maxp(devpriv->ep_tx); + size = max(usb_endpoint_maxp(devpriv->ep_rx), MIN_BUF_SIZE); devpriv->usb_tx_buf = kzalloc(size, GFP_KERNEL); if (!devpriv->usb_tx_buf) return -ENOMEM; diff --git a/drivers/staging/fbtft/fb_ssd1351.c b/drivers/staging/fbtft/fb_ssd1351.c index cf263a58a1489fe4dd507dfce6b59718ffd1fb68..6fd549a424d53fc89fb938a070fa2be0442c1e89 100644 --- a/drivers/staging/fbtft/fb_ssd1351.c +++ b/drivers/staging/fbtft/fb_ssd1351.c @@ -187,7 +187,6 @@ static struct fbtft_display display = { }, }; -#ifdef CONFIG_FB_BACKLIGHT static int update_onboard_backlight(struct backlight_device *bd) { struct fbtft_par *par = bl_get_data(bd); @@ -231,9 +230,6 @@ static void register_onboard_backlight(struct fbtft_par *par) if (!par->fbtftops.unregister_backlight) par->fbtftops.unregister_backlight = fbtft_unregister_backlight; } -#else -static void register_onboard_backlight(struct fbtft_par *par) { }; -#endif FBTFT_REGISTER_DRIVER(DRVNAME, "solomon,ssd1351", &display); diff --git a/drivers/staging/fbtft/fbtft-core.c b/drivers/staging/fbtft/fbtft-core.c index 3723269890d5ff613470d0e5c067f42897600837..d0c8d85f3db0f566965d586ea432c5ad14eb14bf 100644 --- a/drivers/staging/fbtft/fbtft-core.c +++ b/drivers/staging/fbtft/fbtft-core.c @@ -128,7 +128,6 @@ static int fbtft_request_gpios(struct fbtft_par *par) return 0; } -#ifdef CONFIG_FB_BACKLIGHT static int fbtft_backlight_update_status(struct backlight_device *bd) { struct fbtft_par *par = bl_get_data(bd); @@ -161,6 +160,7 @@ void fbtft_unregister_backlight(struct fbtft_par *par) par->info->bl_dev = NULL; } } +EXPORT_SYMBOL(fbtft_unregister_backlight); static const struct backlight_ops fbtft_bl_ops = { .get_brightness = fbtft_backlight_get_brightness, @@ -198,12 +198,7 @@ void fbtft_register_backlight(struct fbtft_par *par) if (!par->fbtftops.unregister_backlight) par->fbtftops.unregister_backlight = fbtft_unregister_backlight; } -#else -void fbtft_register_backlight(struct fbtft_par *par) { }; -void fbtft_unregister_backlight(struct fbtft_par *par) { }; -#endif EXPORT_SYMBOL(fbtft_register_backlight); -EXPORT_SYMBOL(fbtft_unregister_backlight); static void fbtft_set_addr_win(struct fbtft_par *par, int xs, int ys, int xe, int ye) @@ -853,13 +848,11 @@ int fbtft_register_framebuffer(struct fb_info *fb_info) fb_info->fix.smem_len >> 10, text1, HZ / fb_info->fbdefio->delay, text2); -#ifdef CONFIG_FB_BACKLIGHT /* Turn on backlight if available */ if (fb_info->bl_dev) { fb_info->bl_dev->props.power = FB_BLANK_UNBLANK; fb_info->bl_dev->ops->update_status(fb_info->bl_dev); } -#endif return 0; diff --git a/drivers/staging/fbtft/fbtft.h b/drivers/staging/fbtft/fbtft.h index 76f8c090a83702b56b2399194898d002b1151af1..06afaa9d505bac250b5f3b7a215307dfbaf05977 100644 --- a/drivers/staging/fbtft/fbtft.h +++ b/drivers/staging/fbtft/fbtft.h @@ -332,7 +332,10 @@ static int __init fbtft_driver_module_init(void) \ ret = spi_register_driver(&fbtft_driver_spi_driver); \ if (ret < 0) \ return ret; \ - return platform_driver_register(&fbtft_driver_platform_driver); \ + ret = platform_driver_register(&fbtft_driver_platform_driver); \ + if (ret < 0) \ + spi_unregister_driver(&fbtft_driver_spi_driver); \ + return ret; \ } \ \ static void __exit fbtft_driver_module_exit(void) \ diff --git a/drivers/staging/gdm724x/gdm_lte.c b/drivers/staging/gdm724x/gdm_lte.c index bd5f874334043217014a2c59c5167d82c838be80..de30262c3fae023e2e34d58f21aa6444370fa706 100644 --- a/drivers/staging/gdm724x/gdm_lte.c +++ b/drivers/staging/gdm724x/gdm_lte.c @@ -76,14 +76,15 @@ static void tx_complete(void *arg) static int gdm_lte_rx(struct sk_buff *skb, struct nic *nic, int nic_type) { - int ret; + int ret, len; + len = skb->len + ETH_HLEN; ret = netif_rx_ni(skb); if (ret == NET_RX_DROP) { nic->stats.rx_dropped++; } else { nic->stats.rx_packets++; - nic->stats.rx_bytes += skb->len + ETH_HLEN; + nic->stats.rx_bytes += len; } return 0; diff --git a/drivers/staging/greybus/audio_helper.c b/drivers/staging/greybus/audio_helper.c index 3011b8abce389d765a7e9cbf7a8fb9882a599622..a9576f92efaa46150ac18b8b64ddbef172be7efa 100644 --- a/drivers/staging/greybus/audio_helper.c +++ b/drivers/staging/greybus/audio_helper.c @@ -192,7 +192,11 @@ int gbaudio_remove_component_controls(struct snd_soc_component *component, unsigned int num_controls) { struct snd_card *card = component->card->snd_card; + int err; - return gbaudio_remove_controls(card, component->dev, controls, - num_controls, component->name_prefix); + down_write(&card->controls_rwsem); + err = gbaudio_remove_controls(card, component->dev, controls, + num_controls, component->name_prefix); + up_write(&card->controls_rwsem); + return err; } diff --git a/drivers/staging/greybus/audio_topology.c b/drivers/staging/greybus/audio_topology.c index 662e3e8b4b634d038b4ecc83e7a9829d3dc1d1ee..e1579f356af5c1f49cadc9f2a5790ed9f49de1d7 100644 --- a/drivers/staging/greybus/audio_topology.c +++ b/drivers/staging/greybus/audio_topology.c @@ -147,6 +147,9 @@ static const char **gb_generate_enum_strings(struct gbaudio_module_info *gb, items = le32_to_cpu(gbenum->items); strings = devm_kcalloc(gb->dev, items, sizeof(char *), GFP_KERNEL); + if (!strings) + return NULL; + data = gbenum->names; for (i = 0; i < items; i++) { @@ -655,6 +658,8 @@ static int gbaudio_tplg_create_enum_kctl(struct gbaudio_module_info *gb, /* since count=1, and reg is dummy */ gbe->items = le32_to_cpu(gb_enum->items); gbe->texts = gb_generate_enum_strings(gb, gb_enum); + if (!gbe->texts) + return -ENOMEM; /* debug enum info */ dev_dbg(gb->dev, "Max:%d, name_length:%d\n", gbe->items, @@ -862,6 +867,8 @@ static int gbaudio_tplg_create_enum_ctl(struct gbaudio_module_info *gb, /* since count=1, and reg is dummy */ gbe->items = le32_to_cpu(gb_enum->items); gbe->texts = gb_generate_enum_strings(gb, gb_enum); + if (!gbe->texts) + return -ENOMEM; /* debug enum info */ dev_dbg(gb->dev, "Max:%d, name_length:%d\n", gbe->items, @@ -974,6 +981,44 @@ static int gbaudio_widget_event(struct snd_soc_dapm_widget *w, return ret; } +static const struct snd_soc_dapm_widget gbaudio_widgets[] = { + [snd_soc_dapm_spk] = SND_SOC_DAPM_SPK(NULL, gbcodec_event_spk), + [snd_soc_dapm_hp] = SND_SOC_DAPM_HP(NULL, gbcodec_event_hp), + [snd_soc_dapm_mic] = SND_SOC_DAPM_MIC(NULL, gbcodec_event_int_mic), + [snd_soc_dapm_output] = SND_SOC_DAPM_OUTPUT(NULL), + [snd_soc_dapm_input] = SND_SOC_DAPM_INPUT(NULL), + [snd_soc_dapm_switch] = SND_SOC_DAPM_SWITCH_E(NULL, SND_SOC_NOPM, + 0, 0, NULL, + gbaudio_widget_event, + SND_SOC_DAPM_PRE_PMU | + SND_SOC_DAPM_POST_PMD), + [snd_soc_dapm_pga] = SND_SOC_DAPM_PGA_E(NULL, SND_SOC_NOPM, + 0, 0, NULL, 0, + gbaudio_widget_event, + SND_SOC_DAPM_PRE_PMU | + SND_SOC_DAPM_POST_PMD), + [snd_soc_dapm_mixer] = SND_SOC_DAPM_MIXER_E(NULL, SND_SOC_NOPM, + 0, 0, NULL, 0, + gbaudio_widget_event, + SND_SOC_DAPM_PRE_PMU | + SND_SOC_DAPM_POST_PMD), + [snd_soc_dapm_mux] = SND_SOC_DAPM_MUX_E(NULL, SND_SOC_NOPM, + 0, 0, NULL, + gbaudio_widget_event, + SND_SOC_DAPM_PRE_PMU | + SND_SOC_DAPM_POST_PMD), + [snd_soc_dapm_aif_in] = SND_SOC_DAPM_AIF_IN_E(NULL, NULL, 0, + SND_SOC_NOPM, 0, 0, + gbaudio_widget_event, + SND_SOC_DAPM_PRE_PMU | + SND_SOC_DAPM_POST_PMD), + [snd_soc_dapm_aif_out] = SND_SOC_DAPM_AIF_OUT_E(NULL, NULL, 0, + SND_SOC_NOPM, 0, 0, + gbaudio_widget_event, + SND_SOC_DAPM_PRE_PMU | + SND_SOC_DAPM_POST_PMD), +}; + static int gbaudio_tplg_create_widget(struct gbaudio_module_info *module, struct snd_soc_dapm_widget *dw, struct gb_audio_widget *w, int *w_size) @@ -1034,6 +1079,10 @@ static int gbaudio_tplg_create_widget(struct gbaudio_module_info *module, csize += le16_to_cpu(gbenum->names_length); control->texts = (const char * const *) gb_generate_enum_strings(module, gbenum); + if (!control->texts) { + ret = -ENOMEM; + goto error; + } control->items = le32_to_cpu(gbenum->items); } else { csize = sizeof(struct gb_audio_control); @@ -1052,77 +1101,37 @@ static int gbaudio_tplg_create_widget(struct gbaudio_module_info *module, switch (w->type) { case snd_soc_dapm_spk: - *dw = (struct snd_soc_dapm_widget) - SND_SOC_DAPM_SPK(w->name, gbcodec_event_spk); + *dw = gbaudio_widgets[w->type]; module->op_devices |= GBAUDIO_DEVICE_OUT_SPEAKER; break; case snd_soc_dapm_hp: - *dw = (struct snd_soc_dapm_widget) - SND_SOC_DAPM_HP(w->name, gbcodec_event_hp); + *dw = gbaudio_widgets[w->type]; module->op_devices |= (GBAUDIO_DEVICE_OUT_WIRED_HEADSET | GBAUDIO_DEVICE_OUT_WIRED_HEADPHONE); module->ip_devices |= GBAUDIO_DEVICE_IN_WIRED_HEADSET; break; case snd_soc_dapm_mic: - *dw = (struct snd_soc_dapm_widget) - SND_SOC_DAPM_MIC(w->name, gbcodec_event_int_mic); + *dw = gbaudio_widgets[w->type]; module->ip_devices |= GBAUDIO_DEVICE_IN_BUILTIN_MIC; break; case snd_soc_dapm_output: - *dw = (struct snd_soc_dapm_widget)SND_SOC_DAPM_OUTPUT(w->name); - break; case snd_soc_dapm_input: - *dw = (struct snd_soc_dapm_widget)SND_SOC_DAPM_INPUT(w->name); - break; case snd_soc_dapm_switch: - *dw = (struct snd_soc_dapm_widget) - SND_SOC_DAPM_SWITCH_E(w->name, SND_SOC_NOPM, 0, 0, - widget_kctls, - gbaudio_widget_event, - SND_SOC_DAPM_PRE_PMU | - SND_SOC_DAPM_POST_PMD); - break; case snd_soc_dapm_pga: - *dw = (struct snd_soc_dapm_widget) - SND_SOC_DAPM_PGA_E(w->name, SND_SOC_NOPM, 0, 0, NULL, 0, - gbaudio_widget_event, - SND_SOC_DAPM_PRE_PMU | - SND_SOC_DAPM_POST_PMD); - break; case snd_soc_dapm_mixer: - *dw = (struct snd_soc_dapm_widget) - SND_SOC_DAPM_MIXER_E(w->name, SND_SOC_NOPM, 0, 0, NULL, - 0, gbaudio_widget_event, - SND_SOC_DAPM_PRE_PMU | - SND_SOC_DAPM_POST_PMD); - break; case snd_soc_dapm_mux: - *dw = (struct snd_soc_dapm_widget) - SND_SOC_DAPM_MUX_E(w->name, SND_SOC_NOPM, 0, 0, - widget_kctls, gbaudio_widget_event, - SND_SOC_DAPM_PRE_PMU | - SND_SOC_DAPM_POST_PMD); + *dw = gbaudio_widgets[w->type]; break; case snd_soc_dapm_aif_in: - *dw = (struct snd_soc_dapm_widget) - SND_SOC_DAPM_AIF_IN_E(w->name, w->sname, 0, - SND_SOC_NOPM, - 0, 0, gbaudio_widget_event, - SND_SOC_DAPM_PRE_PMU | - SND_SOC_DAPM_POST_PMD); - break; case snd_soc_dapm_aif_out: - *dw = (struct snd_soc_dapm_widget) - SND_SOC_DAPM_AIF_OUT_E(w->name, w->sname, 0, - SND_SOC_NOPM, - 0, 0, gbaudio_widget_event, - SND_SOC_DAPM_PRE_PMU | - SND_SOC_DAPM_POST_PMD); + *dw = gbaudio_widgets[w->type]; + dw->sname = w->sname; break; default: ret = -EINVAL; goto error; } + dw->name = w->name; dev_dbg(module->dev, "%s: widget of type %d created\n", dw->name, dw->id); @@ -1183,6 +1192,10 @@ static int gbaudio_tplg_process_kcontrols(struct gbaudio_module_info *module, csize += le16_to_cpu(gbenum->names_length); control->texts = (const char * const *) gb_generate_enum_strings(module, gbenum); + if (!control->texts) { + ret = -ENOMEM; + goto error; + } control->items = le32_to_cpu(gbenum->items); } else { csize = sizeof(struct gb_audio_control); diff --git a/drivers/staging/ks7010/Kconfig b/drivers/staging/ks7010/Kconfig index 0987fdc2f70db85af9c35e5a47479d63d3e6ac9d..8ea6c0928679807077ace5bf4751083a4b9fae67 100644 --- a/drivers/staging/ks7010/Kconfig +++ b/drivers/staging/ks7010/Kconfig @@ -5,6 +5,9 @@ config KS7010 select WIRELESS_EXT select WEXT_PRIV select FW_LOADER + select CRYPTO + select CRYPTO_HASH + select CRYPTO_MICHAEL_MIC help This is a driver for KeyStream KS7010 based SDIO WIFI cards. It is found on at least later Spectec SDW-821 (FCC-ID "S2Y-WLAN-11G-K" only, diff --git a/drivers/staging/media/allegro-dvt/allegro-core.c b/drivers/staging/media/allegro-dvt/allegro-core.c index 640451134072bcd5f292a3fae5a2698b0e1fc91a..28b6ba895ccd5663c5547fcb7d2f557b1065136a 100644 --- a/drivers/staging/media/allegro-dvt/allegro-core.c +++ b/drivers/staging/media/allegro-dvt/allegro-core.c @@ -1802,6 +1802,15 @@ static irqreturn_t allegro_irq_thread(int irq, void *data) { struct allegro_dev *dev = data; + /* + * The firmware is initialized after the mailbox is setup. We further + * check the AL5_ITC_CPU_IRQ_STA register, if the firmware actually + * triggered the interrupt. Although this should not happen, make sure + * that we ignore interrupts, if the mailbox is not initialized. + */ + if (!dev->mbox_status) + return IRQ_NONE; + allegro_mbox_notify(dev->mbox_status); return IRQ_HANDLED; diff --git a/drivers/staging/media/atomisp/i2c/atomisp-lm3554.c b/drivers/staging/media/atomisp/i2c/atomisp-lm3554.c index 0ab67b2aec6718543af3f4f79d8da88340163f72..8739f0874103e8fc56fa10681f84af6fe733a12b 100644 --- a/drivers/staging/media/atomisp/i2c/atomisp-lm3554.c +++ b/drivers/staging/media/atomisp/i2c/atomisp-lm3554.c @@ -836,7 +836,6 @@ static int lm3554_probe(struct i2c_client *client) int err = 0; struct lm3554 *flash; unsigned int i; - int ret; flash = kzalloc(sizeof(*flash), GFP_KERNEL); if (!flash) @@ -845,7 +844,7 @@ static int lm3554_probe(struct i2c_client *client) flash->pdata = lm3554_platform_data_func(client); if (IS_ERR(flash->pdata)) { err = PTR_ERR(flash->pdata); - goto fail1; + goto free_flash; } v4l2_i2c_subdev_init(&flash->sd, client, &lm3554_ops); @@ -853,12 +852,12 @@ static int lm3554_probe(struct i2c_client *client) flash->sd.flags |= V4L2_SUBDEV_FL_HAS_DEVNODE; flash->mode = ATOMISP_FLASH_MODE_OFF; flash->timeout = LM3554_MAX_TIMEOUT / LM3554_TIMEOUT_STEPSIZE - 1; - ret = + err = v4l2_ctrl_handler_init(&flash->ctrl_handler, ARRAY_SIZE(lm3554_controls)); - if (ret) { + if (err) { dev_err(&client->dev, "error initialize a ctrl_handler.\n"); - goto fail3; + goto unregister_subdev; } for (i = 0; i < ARRAY_SIZE(lm3554_controls); i++) @@ -867,14 +866,15 @@ static int lm3554_probe(struct i2c_client *client) if (flash->ctrl_handler.error) { dev_err(&client->dev, "ctrl_handler error.\n"); - goto fail3; + err = flash->ctrl_handler.error; + goto free_handler; } flash->sd.ctrl_handler = &flash->ctrl_handler; err = media_entity_pads_init(&flash->sd.entity, 0, NULL); if (err) { dev_err(&client->dev, "error initialize a media entity.\n"); - goto fail2; + goto free_handler; } flash->sd.entity.function = MEDIA_ENT_F_FLASH; @@ -885,16 +885,27 @@ static int lm3554_probe(struct i2c_client *client) err = lm3554_gpio_init(client); if (err) { - dev_err(&client->dev, "gpio request/direction_output fail"); - goto fail3; + dev_err(&client->dev, "gpio request/direction_output fail.\n"); + goto cleanup_media; + } + + err = atomisp_register_i2c_module(&flash->sd, NULL, LED_FLASH); + if (err) { + dev_err(&client->dev, "fail to register atomisp i2c module.\n"); + goto uninit_gpio; } - return atomisp_register_i2c_module(&flash->sd, NULL, LED_FLASH); -fail3: + + return 0; + +uninit_gpio: + lm3554_gpio_uninit(client); +cleanup_media: media_entity_cleanup(&flash->sd.entity); +free_handler: v4l2_ctrl_handler_free(&flash->ctrl_handler); -fail2: +unregister_subdev: v4l2_device_unregister_subdev(&flash->sd); -fail1: +free_flash: kfree(flash); return err; diff --git a/drivers/staging/media/atomisp/i2c/ov2680.h b/drivers/staging/media/atomisp/i2c/ov2680.h index 49920245e0647ba231fafc9206720c6441c8fede..cafb798a71abeb76d784bb93c9d09a4f0c0cca08 100644 --- a/drivers/staging/media/atomisp/i2c/ov2680.h +++ b/drivers/staging/media/atomisp/i2c/ov2680.h @@ -289,8 +289,6 @@ static struct ov2680_reg const ov2680_global_setting[] = { */ static struct ov2680_reg const ov2680_QCIF_30fps[] = { {0x3086, 0x01}, - {0x3501, 0x24}, - {0x3502, 0x40}, {0x370a, 0x23}, {0x3801, 0xa0}, {0x3802, 0x00}, @@ -334,8 +332,6 @@ static struct ov2680_reg const ov2680_QCIF_30fps[] = { */ static struct ov2680_reg const ov2680_CIF_30fps[] = { {0x3086, 0x01}, - {0x3501, 0x24}, - {0x3502, 0x40}, {0x370a, 0x23}, {0x3801, 0xa0}, {0x3802, 0x00}, @@ -377,8 +373,6 @@ static struct ov2680_reg const ov2680_CIF_30fps[] = { */ static struct ov2680_reg const ov2680_QVGA_30fps[] = { {0x3086, 0x01}, - {0x3501, 0x24}, - {0x3502, 0x40}, {0x370a, 0x23}, {0x3801, 0xa0}, {0x3802, 0x00}, @@ -420,8 +414,6 @@ static struct ov2680_reg const ov2680_QVGA_30fps[] = { */ static struct ov2680_reg const ov2680_656x496_30fps[] = { {0x3086, 0x01}, - {0x3501, 0x24}, - {0x3502, 0x40}, {0x370a, 0x23}, {0x3801, 0xa0}, {0x3802, 0x00}, @@ -463,8 +455,6 @@ static struct ov2680_reg const ov2680_656x496_30fps[] = { */ static struct ov2680_reg const ov2680_720x592_30fps[] = { {0x3086, 0x01}, - {0x3501, 0x26}, - {0x3502, 0x40}, {0x370a, 0x23}, {0x3801, 0x00}, // X_ADDR_START; {0x3802, 0x00}, @@ -508,8 +498,6 @@ static struct ov2680_reg const ov2680_720x592_30fps[] = { */ static struct ov2680_reg const ov2680_800x600_30fps[] = { {0x3086, 0x01}, - {0x3501, 0x26}, - {0x3502, 0x40}, {0x370a, 0x23}, {0x3801, 0x00}, {0x3802, 0x00}, @@ -551,8 +539,6 @@ static struct ov2680_reg const ov2680_800x600_30fps[] = { */ static struct ov2680_reg const ov2680_720p_30fps[] = { {0x3086, 0x00}, - {0x3501, 0x48}, - {0x3502, 0xe0}, {0x370a, 0x21}, {0x3801, 0xa0}, {0x3802, 0x00}, @@ -594,8 +580,6 @@ static struct ov2680_reg const ov2680_720p_30fps[] = { */ static struct ov2680_reg const ov2680_1296x976_30fps[] = { {0x3086, 0x00}, - {0x3501, 0x48}, - {0x3502, 0xe0}, {0x370a, 0x21}, {0x3801, 0xa0}, {0x3802, 0x00}, @@ -637,8 +621,6 @@ static struct ov2680_reg const ov2680_1296x976_30fps[] = { */ static struct ov2680_reg const ov2680_1456x1096_30fps[] = { {0x3086, 0x00}, - {0x3501, 0x48}, - {0x3502, 0xe0}, {0x370a, 0x21}, {0x3801, 0x90}, {0x3802, 0x00}, @@ -682,8 +664,6 @@ static struct ov2680_reg const ov2680_1456x1096_30fps[] = { static struct ov2680_reg const ov2680_1616x916_30fps[] = { {0x3086, 0x00}, - {0x3501, 0x48}, - {0x3502, 0xe0}, {0x370a, 0x21}, {0x3801, 0x00}, {0x3802, 0x00}, @@ -726,8 +706,6 @@ static struct ov2680_reg const ov2680_1616x916_30fps[] = { #if 0 static struct ov2680_reg const ov2680_1616x1082_30fps[] = { {0x3086, 0x00}, - {0x3501, 0x48}, - {0x3502, 0xe0}, {0x370a, 0x21}, {0x3801, 0x00}, {0x3802, 0x00}, @@ -769,8 +747,6 @@ static struct ov2680_reg const ov2680_1616x1082_30fps[] = { */ static struct ov2680_reg const ov2680_1616x1216_30fps[] = { {0x3086, 0x00}, - {0x3501, 0x48}, - {0x3502, 0xe0}, {0x370a, 0x21}, {0x3801, 0x00}, {0x3802, 0x00}, diff --git a/drivers/staging/media/atomisp/pci/atomisp_cmd.c b/drivers/staging/media/atomisp/pci/atomisp_cmd.c index 592ea990d4ca4014b72a7666d5be077e7b888280..90d50a693ce57243784d3c9f1354473d3978945e 100644 --- a/drivers/staging/media/atomisp/pci/atomisp_cmd.c +++ b/drivers/staging/media/atomisp/pci/atomisp_cmd.c @@ -1138,9 +1138,10 @@ void atomisp_buf_done(struct atomisp_sub_device *asd, int error, asd->frame_status[vb->i] = ATOMISP_FRAME_STATUS_OK; } - } else + } else { asd->frame_status[vb->i] = ATOMISP_FRAME_STATUS_OK; + } } else { asd->frame_status[vb->i] = ATOMISP_FRAME_STATUS_OK; } @@ -1714,6 +1715,12 @@ void atomisp_wdt_refresh_pipe(struct atomisp_video_pipe *pipe, { unsigned long next; + if (!pipe->asd) { + dev_err(pipe->isp->dev, "%s(): asd is NULL, device is %s\n", + __func__, pipe->vdev.name); + return; + } + if (delay != ATOMISP_WDT_KEEP_CURRENT_DELAY) pipe->wdt_duration = delay; @@ -1776,6 +1783,12 @@ void atomisp_wdt_refresh(struct atomisp_sub_device *asd, unsigned int delay) /* ISP2401 */ void atomisp_wdt_stop_pipe(struct atomisp_video_pipe *pipe, bool sync) { + if (!pipe->asd) { + dev_err(pipe->isp->dev, "%s(): asd is NULL, device is %s\n", + __func__, pipe->vdev.name); + return; + } + if (!atomisp_is_wdt_running(pipe)) return; @@ -4108,6 +4121,12 @@ void atomisp_handle_parameter_and_buffer(struct atomisp_video_pipe *pipe) unsigned long irqflags; bool need_to_enqueue_buffer = false; + if (!asd) { + dev_err(pipe->isp->dev, "%s(): asd is NULL, device is %s\n", + __func__, pipe->vdev.name); + return; + } + if (atomisp_is_vf_pipe(pipe)) return; @@ -4195,6 +4214,12 @@ int atomisp_set_parameters(struct video_device *vdev, struct atomisp_css_params *css_param = &asd->params.css_param; int ret; + if (!asd) { + dev_err(pipe->isp->dev, "%s(): asd is NULL, device is %s\n", + __func__, vdev->name); + return -EINVAL; + } + if (!asd->stream_env[ATOMISP_INPUT_STREAM_GENERAL].stream) { dev_err(asd->isp->dev, "%s: internal error!\n", __func__); return -EINVAL; @@ -4855,6 +4880,12 @@ int atomisp_try_fmt(struct video_device *vdev, struct v4l2_format *f, int source_pad = atomisp_subdev_source_pad(vdev); int ret; + if (!asd) { + dev_err(isp->dev, "%s(): asd is NULL, device is %s\n", + __func__, vdev->name); + return -EINVAL; + } + if (!isp->inputs[asd->input_curr].camera) return -EINVAL; @@ -4945,9 +4976,9 @@ atomisp_try_fmt_file(struct atomisp_device *isp, struct v4l2_format *f) depth = get_pixel_depth(pixelformat); - if (field == V4L2_FIELD_ANY) + if (field == V4L2_FIELD_ANY) { field = V4L2_FIELD_NONE; - else if (field != V4L2_FIELD_NONE) { + } else if (field != V4L2_FIELD_NONE) { dev_err(isp->dev, "Wrong output field\n"); return -EINVAL; } @@ -5201,6 +5232,12 @@ static int atomisp_set_fmt_to_isp(struct video_device *vdev, const struct atomisp_in_fmt_conv *fc; int ret, i; + if (!asd) { + dev_err(isp->dev, "%s(): asd is NULL, device is %s\n", + __func__, vdev->name); + return -EINVAL; + } + v4l2_fh_init(&fh.vfh, vdev); isp_sink_crop = atomisp_subdev_get_rect( @@ -5512,6 +5549,7 @@ static int atomisp_set_fmt_to_snr(struct video_device *vdev, unsigned int dvs_env_w, unsigned int dvs_env_h) { struct atomisp_sub_device *asd = atomisp_to_video_pipe(vdev)->asd; + struct atomisp_video_pipe *pipe = atomisp_to_video_pipe(vdev); const struct atomisp_format_bridge *format; struct v4l2_subdev_pad_config pad_cfg; struct v4l2_subdev_format vformat = { @@ -5527,6 +5565,12 @@ static int atomisp_set_fmt_to_snr(struct video_device *vdev, struct v4l2_subdev_fh fh; int ret; + if (!asd) { + dev_err(pipe->isp->dev, "%s(): asd is NULL, device is %s\n", + __func__, vdev->name); + return -EINVAL; + } + v4l2_fh_init(&fh.vfh, vdev); stream_index = atomisp_source_pad_to_stream_id(asd, source_pad); @@ -5617,6 +5661,12 @@ int atomisp_set_fmt(struct video_device *vdev, struct v4l2_format *f) struct v4l2_subdev_fh fh; int ret; + if (!asd) { + dev_err(isp->dev, "%s(): asd is NULL, device is %s\n", + __func__, vdev->name); + return -EINVAL; + } + if (source_pad >= ATOMISP_SUBDEV_PADS_NUM) return -EINVAL; @@ -6050,6 +6100,12 @@ int atomisp_set_fmt_file(struct video_device *vdev, struct v4l2_format *f) struct v4l2_subdev_fh fh; int ret; + if (!asd) { + dev_err(isp->dev, "%s(): asd is NULL, device is %s\n", + __func__, vdev->name); + return -EINVAL; + } + v4l2_fh_init(&fh.vfh, vdev); dev_dbg(isp->dev, "setting fmt %ux%u 0x%x for file inject\n", @@ -6374,6 +6430,12 @@ bool atomisp_is_vf_pipe(struct atomisp_video_pipe *pipe) { struct atomisp_sub_device *asd = pipe->asd; + if (!asd) { + dev_err(pipe->isp->dev, "%s(): asd is NULL, device is %s\n", + __func__, pipe->vdev.name); + return false; + } + if (pipe == &asd->video_out_vf) return true; @@ -6587,17 +6649,23 @@ static int atomisp_get_pipe_id(struct atomisp_video_pipe *pipe) { struct atomisp_sub_device *asd = pipe->asd; - if (ATOMISP_USE_YUVPP(asd)) + if (!asd) { + dev_err(pipe->isp->dev, "%s(): asd is NULL, device is %s\n", + __func__, pipe->vdev.name); + return -EINVAL; + } + + if (ATOMISP_USE_YUVPP(asd)) { return IA_CSS_PIPE_ID_YUVPP; - else if (asd->vfpp->val == ATOMISP_VFPP_DISABLE_SCALER) + } else if (asd->vfpp->val == ATOMISP_VFPP_DISABLE_SCALER) { return IA_CSS_PIPE_ID_VIDEO; - else if (asd->vfpp->val == ATOMISP_VFPP_DISABLE_LOWLAT) + } else if (asd->vfpp->val == ATOMISP_VFPP_DISABLE_LOWLAT) { return IA_CSS_PIPE_ID_CAPTURE; - else if (pipe == &asd->video_out_video_capture) + } else if (pipe == &asd->video_out_video_capture) { return IA_CSS_PIPE_ID_VIDEO; - else if (pipe == &asd->video_out_vf) + } else if (pipe == &asd->video_out_vf) { return IA_CSS_PIPE_ID_CAPTURE; - else if (pipe == &asd->video_out_preview) { + } else if (pipe == &asd->video_out_preview) { if (asd->run_mode->val == ATOMISP_RUN_MODE_VIDEO) return IA_CSS_PIPE_ID_VIDEO; else @@ -6624,6 +6692,12 @@ int atomisp_get_invalid_frame_num(struct video_device *vdev, struct ia_css_pipe_info p_info; int ret; + if (!asd) { + dev_err(pipe->isp->dev, "%s(): asd is NULL, device is %s\n", + __func__, vdev->name); + return -EINVAL; + } + if (asd->isp->inputs[asd->input_curr].camera_caps-> sensor[asd->sensor_curr].stream_num > 1) { /* External ISP */ diff --git a/drivers/staging/media/atomisp/pci/atomisp_fops.c b/drivers/staging/media/atomisp/pci/atomisp_fops.c index f1e6b25978534143b450da34c817f5c6bb866bac..b751df31cc24cd4181f0750ccbe97155779c5ef7 100644 --- a/drivers/staging/media/atomisp/pci/atomisp_fops.c +++ b/drivers/staging/media/atomisp/pci/atomisp_fops.c @@ -877,6 +877,11 @@ done: else pipe->users++; rt_mutex_unlock(&isp->mutex); + + /* Ensure that a mode is set */ + if (asd) + v4l2_ctrl_s_ctrl(asd->run_mode, pipe->default_run_mode); + return 0; css_error: @@ -1171,6 +1176,12 @@ static int atomisp_mmap(struct file *file, struct vm_area_struct *vma) u32 origin_size, new_size; int ret; + if (!asd) { + dev_err(isp->dev, "%s(): asd is NULL, device is %s\n", + __func__, vdev->name); + return -EINVAL; + } + if (!(vma->vm_flags & (VM_WRITE | VM_READ))) return -EACCES; diff --git a/drivers/staging/media/atomisp/pci/atomisp_gmin_platform.c b/drivers/staging/media/atomisp/pci/atomisp_gmin_platform.c index 135994d44802c70b3b6415cf38cd787261888f5e..34480ca164746fe58d1f141ba1dbc7ff4fbcd223 100644 --- a/drivers/staging/media/atomisp/pci/atomisp_gmin_platform.c +++ b/drivers/staging/media/atomisp/pci/atomisp_gmin_platform.c @@ -481,7 +481,7 @@ fail: static u8 gmin_get_pmic_id_and_addr(struct device *dev) { - struct i2c_client *power; + struct i2c_client *power = NULL; static u8 pmic_i2c_addr; if (pmic_id) diff --git a/drivers/staging/media/atomisp/pci/atomisp_ioctl.c b/drivers/staging/media/atomisp/pci/atomisp_ioctl.c index 9da82855552dec0c65b80c1aeb35e12940b5c4a9..8a0648fd7c8135bbb371dc7f1c3b236548ce3e03 100644 --- a/drivers/staging/media/atomisp/pci/atomisp_ioctl.c +++ b/drivers/staging/media/atomisp/pci/atomisp_ioctl.c @@ -646,6 +646,12 @@ static int atomisp_g_input(struct file *file, void *fh, unsigned int *input) struct atomisp_device *isp = video_get_drvdata(vdev); struct atomisp_sub_device *asd = atomisp_to_video_pipe(vdev)->asd; + if (!asd) { + dev_err(isp->dev, "%s(): asd is NULL, device is %s\n", + __func__, vdev->name); + return -EINVAL; + } + rt_mutex_lock(&isp->mutex); *input = asd->input_curr; rt_mutex_unlock(&isp->mutex); @@ -665,6 +671,12 @@ static int atomisp_s_input(struct file *file, void *fh, unsigned int input) struct v4l2_subdev *motor; int ret; + if (!asd) { + dev_err(isp->dev, "%s(): asd is NULL, device is %s\n", + __func__, vdev->name); + return -EINVAL; + } + rt_mutex_lock(&isp->mutex); if (input >= ATOM_ISP_MAX_INPUTS || input >= isp->input_cnt) { dev_dbg(isp->dev, "input_cnt: %d\n", isp->input_cnt); @@ -761,18 +773,33 @@ static int atomisp_enum_fmt_cap(struct file *file, void *fh, struct video_device *vdev = video_devdata(file); struct atomisp_device *isp = video_get_drvdata(vdev); struct atomisp_sub_device *asd = atomisp_to_video_pipe(vdev)->asd; - struct v4l2_subdev_mbus_code_enum code = { 0 }; + struct v4l2_subdev_mbus_code_enum code = { + .which = V4L2_SUBDEV_FORMAT_ACTIVE, + }; + struct v4l2_subdev *camera; unsigned int i, fi = 0; int rval; + if (!asd) { + dev_err(isp->dev, "%s(): asd is NULL, device is %s\n", + __func__, vdev->name); + return -EINVAL; + } + + camera = isp->inputs[asd->input_curr].camera; + if(!camera) { + dev_err(isp->dev, "%s(): camera is NULL, device is %s\n", + __func__, vdev->name); + return -EINVAL; + } + rt_mutex_lock(&isp->mutex); - rval = v4l2_subdev_call(isp->inputs[asd->input_curr].camera, pad, - enum_mbus_code, NULL, &code); + + rval = v4l2_subdev_call(camera, pad, enum_mbus_code, NULL, &code); if (rval == -ENOIOCTLCMD) { dev_warn(isp->dev, - "enum_mbus_code pad op not supported. Please fix your sensor driver!\n"); - // rval = v4l2_subdev_call(isp->inputs[asd->input_curr].camera, - // video, enum_mbus_fmt, 0, &code.code); + "enum_mbus_code pad op not supported by %s. Please fix your sensor driver!\n", + camera->name); } rt_mutex_unlock(&isp->mutex); @@ -802,6 +829,8 @@ static int atomisp_enum_fmt_cap(struct file *file, void *fh, f->pixelformat = format->pixelformat; return 0; } + dev_err(isp->dev, "%s(): format for code %x not found.\n", + __func__, code.code); return -EINVAL; } @@ -834,6 +863,72 @@ static int atomisp_g_fmt_file(struct file *file, void *fh, return 0; } +static int atomisp_adjust_fmt(struct v4l2_format *f) +{ + const struct atomisp_format_bridge *format_bridge; + u32 padded_width; + + format_bridge = atomisp_get_format_bridge(f->fmt.pix.pixelformat); + + padded_width = f->fmt.pix.width + pad_w; + + if (format_bridge->planar) { + f->fmt.pix.bytesperline = padded_width; + f->fmt.pix.sizeimage = PAGE_ALIGN(f->fmt.pix.height * + DIV_ROUND_UP(format_bridge->depth * + padded_width, 8)); + } else { + f->fmt.pix.bytesperline = DIV_ROUND_UP(format_bridge->depth * + padded_width, 8); + f->fmt.pix.sizeimage = PAGE_ALIGN(f->fmt.pix.height * f->fmt.pix.bytesperline); + } + + if (f->fmt.pix.field == V4L2_FIELD_ANY) + f->fmt.pix.field = V4L2_FIELD_NONE; + + format_bridge = atomisp_get_format_bridge(f->fmt.pix.pixelformat); + if (!format_bridge) + return -EINVAL; + + /* Currently, raw formats are broken!!! */ + if (format_bridge->sh_fmt == IA_CSS_FRAME_FORMAT_RAW) { + f->fmt.pix.pixelformat = V4L2_PIX_FMT_YUV420; + + format_bridge = atomisp_get_format_bridge(f->fmt.pix.pixelformat); + if (!format_bridge) + return -EINVAL; + } + + padded_width = f->fmt.pix.width + pad_w; + + if (format_bridge->planar) { + f->fmt.pix.bytesperline = padded_width; + f->fmt.pix.sizeimage = PAGE_ALIGN(f->fmt.pix.height * + DIV_ROUND_UP(format_bridge->depth * + padded_width, 8)); + } else { + f->fmt.pix.bytesperline = DIV_ROUND_UP(format_bridge->depth * + padded_width, 8); + f->fmt.pix.sizeimage = PAGE_ALIGN(f->fmt.pix.height * f->fmt.pix.bytesperline); + } + + if (f->fmt.pix.field == V4L2_FIELD_ANY) + f->fmt.pix.field = V4L2_FIELD_NONE; + + /* + * FIXME: do we need to setup this differently, depending on the + * sensor or the pipeline? + */ + f->fmt.pix.colorspace = V4L2_COLORSPACE_REC709; + f->fmt.pix.ycbcr_enc = V4L2_YCBCR_ENC_709; + f->fmt.pix.xfer_func = V4L2_XFER_FUNC_709; + + f->fmt.pix.width -= pad_w; + f->fmt.pix.height -= pad_h; + + return 0; +} + /* This function looks up the closest available resolution. */ static int atomisp_try_fmt_cap(struct file *file, void *fh, struct v4l2_format *f) @@ -845,7 +940,11 @@ static int atomisp_try_fmt_cap(struct file *file, void *fh, rt_mutex_lock(&isp->mutex); ret = atomisp_try_fmt(vdev, f, NULL); rt_mutex_unlock(&isp->mutex); - return ret; + + if (ret) + return ret; + + return atomisp_adjust_fmt(f); } static int atomisp_s_fmt_cap(struct file *file, void *fh, @@ -1027,6 +1126,12 @@ int __atomisp_reqbufs(struct file *file, void *fh, u16 stream_id = atomisp_source_pad_to_stream_id(asd, source_pad); int ret = 0, i = 0; + if (!asd) { + dev_err(pipe->isp->dev, "%s(): asd is NULL, device is %s\n", + __func__, vdev->name); + return -EINVAL; + } + if (req->count == 0) { mutex_lock(&pipe->capq.vb_lock); if (!list_empty(&pipe->capq.stream)) @@ -1154,6 +1259,12 @@ static int atomisp_qbuf(struct file *file, void *fh, struct v4l2_buffer *buf) u32 pgnr; int ret = 0; + if (!asd) { + dev_err(isp->dev, "%s(): asd is NULL, device is %s\n", + __func__, vdev->name); + return -EINVAL; + } + rt_mutex_lock(&isp->mutex); if (isp->isp_fatal_error) { ret = -EIO; @@ -1389,6 +1500,12 @@ static int atomisp_dqbuf(struct file *file, void *fh, struct v4l2_buffer *buf) struct atomisp_device *isp = video_get_drvdata(vdev); int ret = 0; + if (!asd) { + dev_err(isp->dev, "%s(): asd is NULL, device is %s\n", + __func__, vdev->name); + return -EINVAL; + } + rt_mutex_lock(&isp->mutex); if (isp->isp_fatal_error) { @@ -1640,6 +1757,12 @@ static int atomisp_streamon(struct file *file, void *fh, int ret = 0; unsigned long irqflags; + if (!asd) { + dev_err(isp->dev, "%s(): asd is NULL, device is %s\n", + __func__, vdev->name); + return -EINVAL; + } + dev_dbg(isp->dev, "Start stream on pad %d for asd%d\n", atomisp_subdev_source_pad(vdev), asd->index); @@ -1901,6 +2024,12 @@ int __atomisp_streamoff(struct file *file, void *fh, enum v4l2_buf_type type) unsigned long flags; bool first_streamoff = false; + if (!asd) { + dev_err(isp->dev, "%s(): asd is NULL, device is %s\n", + __func__, vdev->name); + return -EINVAL; + } + dev_dbg(isp->dev, "Stop stream on pad %d for asd%d\n", atomisp_subdev_source_pad(vdev), asd->index); @@ -2150,6 +2279,12 @@ static int atomisp_g_ctrl(struct file *file, void *fh, struct atomisp_device *isp = video_get_drvdata(vdev); int i, ret = -EINVAL; + if (!asd) { + dev_err(isp->dev, "%s(): asd is NULL, device is %s\n", + __func__, vdev->name); + return -EINVAL; + } + for (i = 0; i < ctrls_num; i++) { if (ci_v4l2_controls[i].id == control->id) { ret = 0; @@ -2229,6 +2364,12 @@ static int atomisp_s_ctrl(struct file *file, void *fh, struct atomisp_device *isp = video_get_drvdata(vdev); int i, ret = -EINVAL; + if (!asd) { + dev_err(isp->dev, "%s(): asd is NULL, device is %s\n", + __func__, vdev->name); + return -EINVAL; + } + for (i = 0; i < ctrls_num; i++) { if (ci_v4l2_controls[i].id == control->id) { ret = 0; @@ -2310,6 +2451,12 @@ static int atomisp_queryctl(struct file *file, void *fh, struct atomisp_sub_device *asd = atomisp_to_video_pipe(vdev)->asd; struct atomisp_device *isp = video_get_drvdata(vdev); + if (!asd) { + dev_err(isp->dev, "%s(): asd is NULL, device is %s\n", + __func__, vdev->name); + return -EINVAL; + } + switch (qc->id) { case V4L2_CID_FOCUS_ABSOLUTE: case V4L2_CID_FOCUS_RELATIVE: @@ -2355,6 +2502,12 @@ static int atomisp_camera_g_ext_ctrls(struct file *file, void *fh, int i; int ret = 0; + if (!asd) { + dev_err(isp->dev, "%s(): asd is NULL, device is %s\n", + __func__, vdev->name); + return -EINVAL; + } + if (!IS_ISP2401) motor = isp->inputs[asd->input_curr].motor; else @@ -2466,6 +2619,12 @@ static int atomisp_camera_s_ext_ctrls(struct file *file, void *fh, int i; int ret = 0; + if (!asd) { + dev_err(isp->dev, "%s(): asd is NULL, device is %s\n", + __func__, vdev->name); + return -EINVAL; + } + if (!IS_ISP2401) motor = isp->inputs[asd->input_curr].motor; else @@ -2591,6 +2750,12 @@ static int atomisp_g_parm(struct file *file, void *fh, struct atomisp_sub_device *asd = atomisp_to_video_pipe(vdev)->asd; struct atomisp_device *isp = video_get_drvdata(vdev); + if (!asd) { + dev_err(isp->dev, "%s(): asd is NULL, device is %s\n", + __func__, vdev->name); + return -EINVAL; + } + if (parm->type != V4L2_BUF_TYPE_VIDEO_CAPTURE) { dev_err(isp->dev, "unsupported v4l2 buf type\n"); return -EINVAL; @@ -2613,6 +2778,12 @@ static int atomisp_s_parm(struct file *file, void *fh, int rval; int fps; + if (!asd) { + dev_err(isp->dev, "%s(): asd is NULL, device is %s\n", + __func__, vdev->name); + return -EINVAL; + } + if (parm->type != V4L2_BUF_TYPE_VIDEO_CAPTURE) { dev_err(isp->dev, "unsupported v4l2 buf type\n"); return -EINVAL; diff --git a/drivers/staging/media/atomisp/pci/atomisp_subdev.c b/drivers/staging/media/atomisp/pci/atomisp_subdev.c index dcc2dd981ca6077c5fcbafaa41d0e8944c8711f1..628e85799274d6cf50c5787e92c00a543cbe384a 100644 --- a/drivers/staging/media/atomisp/pci/atomisp_subdev.c +++ b/drivers/staging/media/atomisp/pci/atomisp_subdev.c @@ -1178,23 +1178,28 @@ static int isp_subdev_init_entities(struct atomisp_sub_device *asd) atomisp_init_acc_pipe(asd, &asd->video_acc); - ret = atomisp_video_init(&asd->video_in, "MEMORY"); + ret = atomisp_video_init(&asd->video_in, "MEMORY", + ATOMISP_RUN_MODE_SDV); if (ret < 0) return ret; - ret = atomisp_video_init(&asd->video_out_capture, "CAPTURE"); + ret = atomisp_video_init(&asd->video_out_capture, "CAPTURE", + ATOMISP_RUN_MODE_STILL_CAPTURE); if (ret < 0) return ret; - ret = atomisp_video_init(&asd->video_out_vf, "VIEWFINDER"); + ret = atomisp_video_init(&asd->video_out_vf, "VIEWFINDER", + ATOMISP_RUN_MODE_CONTINUOUS_CAPTURE); if (ret < 0) return ret; - ret = atomisp_video_init(&asd->video_out_preview, "PREVIEW"); + ret = atomisp_video_init(&asd->video_out_preview, "PREVIEW", + ATOMISP_RUN_MODE_PREVIEW); if (ret < 0) return ret; - ret = atomisp_video_init(&asd->video_out_video_capture, "VIDEO"); + ret = atomisp_video_init(&asd->video_out_video_capture, "VIDEO", + ATOMISP_RUN_MODE_VIDEO); if (ret < 0) return ret; diff --git a/drivers/staging/media/atomisp/pci/atomisp_subdev.h b/drivers/staging/media/atomisp/pci/atomisp_subdev.h index 330a77eed8aa65501491c2bd63e6720a4ac2b66a..12215d7406169066e3d3b232dd4752f4216142f1 100644 --- a/drivers/staging/media/atomisp/pci/atomisp_subdev.h +++ b/drivers/staging/media/atomisp/pci/atomisp_subdev.h @@ -81,6 +81,9 @@ struct atomisp_video_pipe { /* the link list to store per_frame parameters */ struct list_head per_frame_params; + /* Store here the initial run mode */ + unsigned int default_run_mode; + unsigned int buffers_in_css; /* irq_lock is used to protect video buffer state change operations and diff --git a/drivers/staging/media/atomisp/pci/atomisp_v4l2.c b/drivers/staging/media/atomisp/pci/atomisp_v4l2.c index fa1bd99cd6f17758ac3626378aecc6ac9a54f4f2..8aeea74cfd06bb0244c45ae9c726a79bb35024d5 100644 --- a/drivers/staging/media/atomisp/pci/atomisp_v4l2.c +++ b/drivers/staging/media/atomisp/pci/atomisp_v4l2.c @@ -447,7 +447,8 @@ const struct atomisp_dfs_config dfs_config_cht_soc = { .dfs_table_size = ARRAY_SIZE(dfs_rules_cht_soc), }; -int atomisp_video_init(struct atomisp_video_pipe *video, const char *name) +int atomisp_video_init(struct atomisp_video_pipe *video, const char *name, + unsigned int run_mode) { int ret; const char *direction; @@ -478,6 +479,7 @@ int atomisp_video_init(struct atomisp_video_pipe *video, const char *name) "ATOMISP ISP %s %s", name, direction); video->vdev.release = video_device_release_empty; video_set_drvdata(&video->vdev, video->isp); + video->default_run_mode = run_mode; return 0; } @@ -711,15 +713,15 @@ static int atomisp_mrfld_power(struct atomisp_device *isp, bool enable) dev_dbg(isp->dev, "IUNIT power-%s.\n", enable ? "on" : "off"); - /*WA:Enable DVFS*/ + /* WA for P-Unit, if DVFS enabled, ISP timeout observed */ if (IS_CHT && enable) - punit_ddr_dvfs_enable(true); + punit_ddr_dvfs_enable(false); /* * FIXME:WA for ECS28A, with this sleep, CTS * android.hardware.camera2.cts.CameraDeviceTest#testCameraDeviceAbort * PASS, no impact on other platforms - */ + */ if (IS_BYT && enable) msleep(10); @@ -727,7 +729,7 @@ static int atomisp_mrfld_power(struct atomisp_device *isp, bool enable) iosf_mbi_modify(BT_MBI_UNIT_PMC, MBI_REG_READ, MRFLD_ISPSSPM0, val, MRFLD_ISPSSPM0_ISPSSC_MASK); - /*WA:Enable DVFS*/ + /* WA:Enable DVFS */ if (IS_CHT && !enable) punit_ddr_dvfs_enable(true); @@ -1182,6 +1184,7 @@ static void atomisp_unregister_entities(struct atomisp_device *isp) v4l2_device_unregister(&isp->v4l2_dev); media_device_unregister(&isp->media_dev); + media_device_cleanup(&isp->media_dev); } static int atomisp_register_entities(struct atomisp_device *isp) diff --git a/drivers/staging/media/atomisp/pci/atomisp_v4l2.h b/drivers/staging/media/atomisp/pci/atomisp_v4l2.h index 81bb356b81720a359c8996acfce0f42a89b1abc1..72611b8286a4aec66728b175447bb7673be57cb3 100644 --- a/drivers/staging/media/atomisp/pci/atomisp_v4l2.h +++ b/drivers/staging/media/atomisp/pci/atomisp_v4l2.h @@ -27,7 +27,8 @@ struct v4l2_device; struct atomisp_device; struct firmware; -int atomisp_video_init(struct atomisp_video_pipe *video, const char *name); +int atomisp_video_init(struct atomisp_video_pipe *video, const char *name, + unsigned int run_mode); void atomisp_acc_init(struct atomisp_acc_pipe *video, const char *name); void atomisp_video_unregister(struct atomisp_video_pipe *video); void atomisp_acc_unregister(struct atomisp_acc_pipe *video); diff --git a/drivers/staging/media/atomisp/pci/sh_css.c b/drivers/staging/media/atomisp/pci/sh_css.c index ddee04c8248d0433552b0c41cdce24b1cb84393c..54a18921fbd15d8dd0a6ae41c0d09f2cf81c0e4a 100644 --- a/drivers/staging/media/atomisp/pci/sh_css.c +++ b/drivers/staging/media/atomisp/pci/sh_css.c @@ -527,6 +527,7 @@ ia_css_stream_input_format_bits_per_pixel(struct ia_css_stream *stream) return bpp; } +/* TODO: move define to proper file in tools */ #define GP_ISEL_TPG_MODE 0x90058 #if !defined(ISP2401) @@ -579,12 +580,8 @@ sh_css_config_input_network(struct ia_css_stream *stream) { vblank_cycles = vblank_lines * (width + hblank_cycles); sh_css_sp_configure_sync_gen(width, height, hblank_cycles, vblank_cycles); - if (!IS_ISP2401) { - if (pipe->stream->config.mode == IA_CSS_INPUT_MODE_TPG) { - /* TODO: move define to proper file in tools */ - ia_css_device_store_uint32(GP_ISEL_TPG_MODE, 0); - } - } + if (pipe->stream->config.mode == IA_CSS_INPUT_MODE_TPG) + ia_css_device_store_uint32(GP_ISEL_TPG_MODE, 0); } ia_css_debug_dtrace(IA_CSS_DEBUG_TRACE_PRIVATE, "sh_css_config_input_network() leave:\n"); @@ -1019,16 +1016,14 @@ static bool sh_css_translate_stream_cfg_to_isys_stream_descr( * ia_css_isys_stream_capture_indication() instead of * ia_css_pipeline_sp_wait_for_isys_stream_N() as isp processing of * capture takes longer than getting an ISYS frame - * - * Only 2401 relevant ?? */ -#if 0 // FIXME: NOT USED on Yocto Aero - isys_stream_descr->polling_mode - = early_polling ? INPUT_SYSTEM_POLL_ON_CAPTURE_REQUEST - : INPUT_SYSTEM_POLL_ON_WAIT_FOR_FRAME; - ia_css_debug_dtrace(IA_CSS_DEBUG_TRACE_PRIVATE, - "sh_css_translate_stream_cfg_to_isys_stream_descr() leave:\n"); -#endif + if (IS_ISP2401) { + isys_stream_descr->polling_mode + = early_polling ? INPUT_SYSTEM_POLL_ON_CAPTURE_REQUEST + : INPUT_SYSTEM_POLL_ON_WAIT_FOR_FRAME; + ia_css_debug_dtrace(IA_CSS_DEBUG_TRACE_PRIVATE, + "sh_css_translate_stream_cfg_to_isys_stream_descr() leave:\n"); + } return rc; } @@ -1451,7 +1446,7 @@ static void start_pipe( assert(me); /* all callers are in this file and call with non null argument */ - if (!IS_ISP2401) { + if (IS_ISP2401) { coord = &me->config.internal_frame_origin_bqs_on_sctbl; params = me->stream->isp_params_configs; } diff --git a/drivers/staging/media/atomisp/pci/sh_css_mipi.c b/drivers/staging/media/atomisp/pci/sh_css_mipi.c index d5ae7f0b5864bb7acde86e6b8adb62c660960f5f..651eda0469b23eed8d090752f819d7c7657e19dc 100644 --- a/drivers/staging/media/atomisp/pci/sh_css_mipi.c +++ b/drivers/staging/media/atomisp/pci/sh_css_mipi.c @@ -389,17 +389,17 @@ static bool buffers_needed(struct ia_css_pipe *pipe) { if (!IS_ISP2401) { if (pipe->stream->config.mode == IA_CSS_INPUT_MODE_BUFFERED_SENSOR) - return false; - else return true; + else + return false; } if (pipe->stream->config.mode == IA_CSS_INPUT_MODE_BUFFERED_SENSOR || pipe->stream->config.mode == IA_CSS_INPUT_MODE_TPG || pipe->stream->config.mode == IA_CSS_INPUT_MODE_PRBS) - return false; + return true; - return true; + return false; } int @@ -439,14 +439,17 @@ allocate_mipi_frames(struct ia_css_pipe *pipe, return 0; /* AM TODO: Check */ } - if (!IS_ISP2401) + if (!IS_ISP2401) { port = (unsigned int)pipe->stream->config.source.port.port; - else - err = ia_css_mipi_is_source_port_valid(pipe, &port); + } else { + /* Returns true if port is valid. So, invert it */ + err = !ia_css_mipi_is_source_port_valid(pipe, &port); + } assert(port < N_CSI_PORTS); - if (port >= N_CSI_PORTS || err) { + if ((!IS_ISP2401 && port >= N_CSI_PORTS) || + (IS_ISP2401 && err)) { ia_css_debug_dtrace(IA_CSS_DEBUG_TRACE_PRIVATE, "allocate_mipi_frames(%p) exit: error: port is not correct (port=%d).\n", pipe, port); @@ -571,14 +574,17 @@ free_mipi_frames(struct ia_css_pipe *pipe) { return err; } - if (!IS_ISP2401) + if (!IS_ISP2401) { port = (unsigned int)pipe->stream->config.source.port.port; - else - err = ia_css_mipi_is_source_port_valid(pipe, &port); + } else { + /* Returns true if port is valid. So, invert it */ + err = !ia_css_mipi_is_source_port_valid(pipe, &port); + } assert(port < N_CSI_PORTS); - if (port >= N_CSI_PORTS || err) { + if ((!IS_ISP2401 && port >= N_CSI_PORTS) || + (IS_ISP2401 && err)) { ia_css_debug_dtrace(IA_CSS_DEBUG_TRACE_PRIVATE, "free_mipi_frames(%p, %d) exit: error: pipe port is not correct.\n", pipe, port); @@ -683,14 +689,17 @@ send_mipi_frames(struct ia_css_pipe *pipe) { /* TODO: AM: maybe this should be returning an error. */ } - if (!IS_ISP2401) + if (!IS_ISP2401) { port = (unsigned int)pipe->stream->config.source.port.port; - else - err = ia_css_mipi_is_source_port_valid(pipe, &port); + } else { + /* Returns true if port is valid. So, invert it */ + err = !ia_css_mipi_is_source_port_valid(pipe, &port); + } assert(port < N_CSI_PORTS); - if (port >= N_CSI_PORTS || err) { + if ((!IS_ISP2401 && port >= N_CSI_PORTS) || + (IS_ISP2401 && err)) { IA_CSS_ERROR("send_mipi_frames(%p) exit: invalid port specified (port=%d).\n", pipe, port); return err; diff --git a/drivers/staging/media/atomisp/pci/sh_css_params.c b/drivers/staging/media/atomisp/pci/sh_css_params.c index 24fc497bd4915eb2e138f839dcb4d25bfa348c02..8d6514c45eeb63ea8e5cfea0e9dd0234a9462a09 100644 --- a/drivers/staging/media/atomisp/pci/sh_css_params.c +++ b/drivers/staging/media/atomisp/pci/sh_css_params.c @@ -2437,7 +2437,7 @@ sh_css_create_isp_params(struct ia_css_stream *stream, unsigned int i; struct sh_css_ddr_address_map *ddr_ptrs; struct sh_css_ddr_address_map_size *ddr_ptrs_size; - int err = 0; + int err; size_t params_size; struct ia_css_isp_parameters *params = kvmalloc(sizeof(struct ia_css_isp_parameters), GFP_KERNEL); @@ -2482,7 +2482,11 @@ sh_css_create_isp_params(struct ia_css_stream *stream, succ &= (ddr_ptrs->macc_tbl != mmgr_NULL); *isp_params_out = params; - return err; + + if (!succ) + return -ENOMEM; + + return 0; } static bool diff --git a/drivers/staging/media/hantro/hantro_drv.c b/drivers/staging/media/hantro/hantro_drv.c index 7749ca9a8ebbfe7a432bdf4a9b4976b0eebf4a80..bc97ec0a7e4af193a22e4fd5b0855552e79b8666 100644 --- a/drivers/staging/media/hantro/hantro_drv.c +++ b/drivers/staging/media/hantro/hantro_drv.c @@ -829,7 +829,7 @@ static int hantro_probe(struct platform_device *pdev) ret = clk_bulk_prepare(vpu->variant->num_clocks, vpu->clocks); if (ret) { dev_err(&pdev->dev, "Failed to prepare clocks\n"); - return ret; + goto err_pm_disable; } ret = v4l2_device_register(&pdev->dev, &vpu->v4l2_dev); @@ -885,6 +885,7 @@ err_v4l2_unreg: v4l2_device_unregister(&vpu->v4l2_dev); err_clk_unprepare: clk_bulk_unprepare(vpu->variant->num_clocks, vpu->clocks); +err_pm_disable: pm_runtime_dont_use_autosuspend(vpu->dev); pm_runtime_disable(vpu->dev); return ret; diff --git a/drivers/staging/media/imx/imx-media-dev-common.c b/drivers/staging/media/imx/imx-media-dev-common.c index 5fe4b22ab847324efaa37177824b5a9689f823f0..7e0d769566bdd67fb1e7b2271ae99f9bc6389030 100644 --- a/drivers/staging/media/imx/imx-media-dev-common.c +++ b/drivers/staging/media/imx/imx-media-dev-common.c @@ -363,6 +363,8 @@ struct imx_media_dev *imx_media_dev_init(struct device *dev, imxmd->v4l2_dev.notify = imx_media_notify; strscpy(imxmd->v4l2_dev.name, "imx-media", sizeof(imxmd->v4l2_dev.name)); + snprintf(imxmd->md.bus_info, sizeof(imxmd->md.bus_info), + "platform:%s", dev_name(imxmd->md.dev)); media_device_init(&imxmd->md); diff --git a/drivers/staging/media/ipu3/ipu3-css-fw.c b/drivers/staging/media/ipu3/ipu3-css-fw.c index 45aff76198e2c5f0c4b1a46807b92105293ff538..981693eed81551f0246eeb80caf14739ddc631e4 100644 --- a/drivers/staging/media/ipu3/ipu3-css-fw.c +++ b/drivers/staging/media/ipu3/ipu3-css-fw.c @@ -124,12 +124,11 @@ int imgu_css_fw_init(struct imgu_css *css) /* Check and display fw header info */ css->fwp = (struct imgu_fw_header *)css->fw->data; - if (css->fw->size < sizeof(struct imgu_fw_header *) || + if (css->fw->size < struct_size(css->fwp, binary_header, 1) || css->fwp->file_header.h_size != sizeof(struct imgu_fw_bi_file_h)) goto bad_fw; - if (sizeof(struct imgu_fw_bi_file_h) + - css->fwp->file_header.binary_nr * sizeof(struct imgu_fw_info) > - css->fw->size) + if (struct_size(css->fwp, binary_header, + css->fwp->file_header.binary_nr) > css->fw->size) goto bad_fw; dev_info(dev, "loaded firmware version %.64s, %u binaries, %zu bytes\n", diff --git a/drivers/staging/media/ipu3/ipu3-css-fw.h b/drivers/staging/media/ipu3/ipu3-css-fw.h index 79ffa704513904969ca8af7051dbb7576b695d5c..650fd25fc79ee5f648ea436464ee7a3462576920 100644 --- a/drivers/staging/media/ipu3/ipu3-css-fw.h +++ b/drivers/staging/media/ipu3/ipu3-css-fw.h @@ -170,7 +170,7 @@ struct imgu_fw_bi_file_h { struct imgu_fw_header { struct imgu_fw_bi_file_h file_header; - struct imgu_fw_info binary_header[1]; /* binary_nr items */ + struct imgu_fw_info binary_header[]; /* binary_nr items */ }; /******************* Firmware functions *******************/ diff --git a/drivers/staging/media/ipu3/ipu3-v4l2.c b/drivers/staging/media/ipu3/ipu3-v4l2.c index e0179616a29cf6e3f26deb0674bd71a6bbca0084..103f84466f6fc9ee1ddeaaa1b1c56a76da196837 100644 --- a/drivers/staging/media/ipu3/ipu3-v4l2.c +++ b/drivers/staging/media/ipu3/ipu3-v4l2.c @@ -592,11 +592,12 @@ static const struct imgu_fmt *find_format(struct v4l2_format *f, u32 type) static int imgu_vidioc_querycap(struct file *file, void *fh, struct v4l2_capability *cap) { - struct imgu_video_device *node = file_to_intel_imgu_node(file); + struct imgu_device *imgu = video_drvdata(file); strscpy(cap->driver, IMGU_NAME, sizeof(cap->driver)); strscpy(cap->card, IMGU_NAME, sizeof(cap->card)); - snprintf(cap->bus_info, sizeof(cap->bus_info), "PCI:%s", node->name); + snprintf(cap->bus_info, sizeof(cap->bus_info), "PCI:%s", + pci_name(imgu->pci_dev)); return 0; } @@ -696,7 +697,7 @@ static int imgu_fmt(struct imgu_device *imgu, unsigned int pipe, int node, /* CSS expects some format on OUT queue */ if (i != IPU3_CSS_QUEUE_OUT && - !imgu_pipe->nodes[inode].enabled) { + !imgu_pipe->nodes[inode].enabled && !try) { fmts[i] = NULL; continue; } diff --git a/drivers/staging/media/rkvdec/rkvdec-h264.c b/drivers/staging/media/rkvdec/rkvdec-h264.c index 7cc3b478a5f4940419b888d9e9c82bad22f9e1ea..5487f6d0bcb63542fb6b6decb0677bea852e4758 100644 --- a/drivers/staging/media/rkvdec/rkvdec-h264.c +++ b/drivers/staging/media/rkvdec/rkvdec-h264.c @@ -1015,8 +1015,9 @@ static int rkvdec_h264_adjust_fmt(struct rkvdec_ctx *ctx, struct v4l2_pix_format_mplane *fmt = &f->fmt.pix_mp; fmt->num_planes = 1; - fmt->plane_fmt[0].sizeimage = fmt->width * fmt->height * - RKVDEC_H264_MAX_DEPTH_IN_BYTES; + if (!fmt->plane_fmt[0].sizeimage) + fmt->plane_fmt[0].sizeimage = fmt->width * fmt->height * + RKVDEC_H264_MAX_DEPTH_IN_BYTES; return 0; } diff --git a/drivers/staging/media/rkvdec/rkvdec.c b/drivers/staging/media/rkvdec/rkvdec.c index e68303e2b3907c87a95af982c923827afafcd4b7..a7788e7a9542a341880deb5196be7e652d952ab7 100644 --- a/drivers/staging/media/rkvdec/rkvdec.c +++ b/drivers/staging/media/rkvdec/rkvdec.c @@ -270,31 +270,20 @@ static int rkvdec_try_output_fmt(struct file *file, void *priv, return 0; } -static int rkvdec_s_fmt(struct file *file, void *priv, - struct v4l2_format *f, - int (*try_fmt)(struct file *, void *, - struct v4l2_format *)) +static int rkvdec_s_capture_fmt(struct file *file, void *priv, + struct v4l2_format *f) { struct rkvdec_ctx *ctx = fh_to_rkvdec_ctx(priv); struct vb2_queue *vq; + int ret; - if (!try_fmt) - return -EINVAL; - - vq = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, f->type); + /* Change not allowed if queue is busy */ + vq = v4l2_m2m_get_vq(ctx->fh.m2m_ctx, + V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE); if (vb2_is_busy(vq)) return -EBUSY; - return try_fmt(file, priv, f); -} - -static int rkvdec_s_capture_fmt(struct file *file, void *priv, - struct v4l2_format *f) -{ - struct rkvdec_ctx *ctx = fh_to_rkvdec_ctx(priv); - int ret; - - ret = rkvdec_s_fmt(file, priv, f, rkvdec_try_capture_fmt); + ret = rkvdec_try_capture_fmt(file, priv, f); if (ret) return ret; @@ -309,9 +298,20 @@ static int rkvdec_s_output_fmt(struct file *file, void *priv, struct v4l2_m2m_ctx *m2m_ctx = ctx->fh.m2m_ctx; const struct rkvdec_coded_fmt_desc *desc; struct v4l2_format *cap_fmt; - struct vb2_queue *peer_vq; + struct vb2_queue *peer_vq, *vq; int ret; + /* + * In order to support dynamic resolution change, the decoder admits + * a resolution change, as long as the pixelformat remains. Can't be + * done if streaming. + */ + vq = v4l2_m2m_get_vq(m2m_ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE); + if (vb2_is_streaming(vq) || + (vb2_is_busy(vq) && + f->fmt.pix_mp.pixelformat != ctx->coded_fmt.fmt.pix_mp.pixelformat)) + return -EBUSY; + /* * Since format change on the OUTPUT queue will reset the CAPTURE * queue, we can't allow doing so when the CAPTURE queue has buffers @@ -321,7 +321,7 @@ static int rkvdec_s_output_fmt(struct file *file, void *priv, if (vb2_is_busy(peer_vq)) return -EBUSY; - ret = rkvdec_s_fmt(file, priv, f, rkvdec_try_output_fmt); + ret = rkvdec_try_output_fmt(file, priv, f); if (ret) return ret; diff --git a/drivers/staging/most/dim2/Makefile b/drivers/staging/most/dim2/Makefile index 861adacf6c7293728902164b29eedc9e9d93d803..5f9612af3fa3c45bfe207a40597284cb22bba520 100644 --- a/drivers/staging/most/dim2/Makefile +++ b/drivers/staging/most/dim2/Makefile @@ -1,4 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_MOST_DIM2) += most_dim2.o -most_dim2-objs := dim2.o hal.o sysfs.o +most_dim2-objs := dim2.o hal.o diff --git a/drivers/staging/most/dim2/dim2.c b/drivers/staging/most/dim2/dim2.c index b34e3c130f53f2b11baa3740e2e96dcc0eb80db9..2fd6886f7728c8cea3059e232d4dd9a52ba683e2 100644 --- a/drivers/staging/most/dim2/dim2.c +++ b/drivers/staging/most/dim2/dim2.c @@ -115,7 +115,8 @@ struct dim2_platform_data { (((p)[1] == 0x18) && ((p)[2] == 0x05) && ((p)[3] == 0x0C) && \ ((p)[13] == 0x3C) && ((p)[14] == 0x00) && ((p)[15] == 0x0A)) -bool dim2_sysfs_get_state_cb(void) +static ssize_t state_show(struct device *dev, struct device_attribute *attr, + char *buf) { bool state; unsigned long flags; @@ -124,9 +125,18 @@ bool dim2_sysfs_get_state_cb(void) state = dim_get_lock_state(); spin_unlock_irqrestore(&dim_lock, flags); - return state; + return sysfs_emit(buf, "%s\n", state ? "locked" : ""); } +static DEVICE_ATTR_RO(state); + +static struct attribute *dim2_attrs[] = { + &dev_attr_state.attr, + NULL, +}; + +ATTRIBUTE_GROUPS(dim2); + /** * dimcb_on_error - callback from HAL to report miscommunication between * HDM and HAL @@ -713,6 +723,23 @@ static int get_dim2_clk_speed(const char *clock_speed, u8 *val) return -EINVAL; } +static void dim2_release(struct device *d) +{ + struct dim2_hdm *dev = container_of(d, struct dim2_hdm, dev); + unsigned long flags; + + kthread_stop(dev->netinfo_task); + + spin_lock_irqsave(&dim_lock, flags); + dim_shutdown(); + spin_unlock_irqrestore(&dim_lock, flags); + + if (dev->disable_platform) + dev->disable_platform(to_platform_device(d->parent)); + + kfree(dev); +} + /* * dim2_probe - dim2 probe handler * @pdev: platform device structure @@ -733,7 +760,7 @@ static int dim2_probe(struct platform_device *pdev) enum { MLB_INT_IDX, AHB0_INT_IDX }; - dev = devm_kzalloc(&pdev->dev, sizeof(*dev), GFP_KERNEL); + dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) return -ENOMEM; @@ -745,25 +772,27 @@ static int dim2_probe(struct platform_device *pdev) "microchip,clock-speed", &clock_speed); if (ret) { dev_err(&pdev->dev, "missing dt property clock-speed\n"); - return ret; + goto err_free_dev; } ret = get_dim2_clk_speed(clock_speed, &dev->clk_speed); if (ret) { dev_err(&pdev->dev, "bad dt property clock-speed\n"); - return ret; + goto err_free_dev; } res = platform_get_resource(pdev, IORESOURCE_MEM, 0); dev->io_base = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(dev->io_base)) - return PTR_ERR(dev->io_base); + if (IS_ERR(dev->io_base)) { + ret = PTR_ERR(dev->io_base); + goto err_free_dev; + } of_id = of_match_node(dim2_of_match, pdev->dev.of_node); pdata = of_id->data; ret = pdata && pdata->enable ? pdata->enable(pdev) : 0; if (ret) - return ret; + goto err_free_dev; dev->disable_platform = pdata ? pdata->disable : NULL; @@ -854,32 +883,19 @@ static int dim2_probe(struct platform_device *pdev) dev->most_iface.request_netinfo = request_netinfo; dev->most_iface.driver_dev = &pdev->dev; dev->most_iface.dev = &dev->dev; - dev->dev.init_name = "dim2_state"; + dev->dev.init_name = dev->name; dev->dev.parent = &pdev->dev; + dev->dev.release = dim2_release; - ret = most_register_interface(&dev->most_iface); - if (ret) { - dev_err(&pdev->dev, "failed to register MOST interface\n"); - goto err_stop_thread; - } - - ret = dim2_sysfs_probe(&dev->dev); - if (ret) { - dev_err(&pdev->dev, "failed to create sysfs attribute\n"); - goto err_unreg_iface; - } - - return 0; + return most_register_interface(&dev->most_iface); -err_unreg_iface: - most_deregister_interface(&dev->most_iface); -err_stop_thread: - kthread_stop(dev->netinfo_task); err_shutdown_dim: dim_shutdown(); err_disable_platform: if (dev->disable_platform) dev->disable_platform(pdev); +err_free_dev: + kfree(dev); return ret; } @@ -893,18 +909,8 @@ err_disable_platform: static int dim2_remove(struct platform_device *pdev) { struct dim2_hdm *dev = platform_get_drvdata(pdev); - unsigned long flags; - dim2_sysfs_destroy(&dev->dev); most_deregister_interface(&dev->most_iface); - kthread_stop(dev->netinfo_task); - - spin_lock_irqsave(&dim_lock, flags); - dim_shutdown(); - spin_unlock_irqrestore(&dim_lock, flags); - - if (dev->disable_platform) - dev->disable_platform(pdev); return 0; } @@ -1079,6 +1085,7 @@ static struct platform_driver dim2_driver = { .driver = { .name = "hdm_dim2", .of_match_table = dim2_of_match, + .dev_groups = dim2_groups, }, }; diff --git a/drivers/staging/most/dim2/sysfs.c b/drivers/staging/most/dim2/sysfs.c deleted file mode 100644 index c85b2cdcdca3d0749771ff4a7e0c1eee96b2c850..0000000000000000000000000000000000000000 --- a/drivers/staging/most/dim2/sysfs.c +++ /dev/null @@ -1,49 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * sysfs.c - MediaLB sysfs information - * - * Copyright (C) 2015, Microchip Technology Germany II GmbH & Co. KG - */ - -/* Author: Andrey Shvetsov */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include -#include "sysfs.h" -#include - -static ssize_t state_show(struct device *dev, struct device_attribute *attr, - char *buf) -{ - bool state = dim2_sysfs_get_state_cb(); - - return sprintf(buf, "%s\n", state ? "locked" : ""); -} - -static DEVICE_ATTR_RO(state); - -static struct attribute *dev_attrs[] = { - &dev_attr_state.attr, - NULL, -}; - -static struct attribute_group dev_attr_group = { - .attrs = dev_attrs, -}; - -static const struct attribute_group *dev_attr_groups[] = { - &dev_attr_group, - NULL, -}; - -int dim2_sysfs_probe(struct device *dev) -{ - dev->groups = dev_attr_groups; - return device_register(dev); -} - -void dim2_sysfs_destroy(struct device *dev) -{ - device_unregister(dev); -} diff --git a/drivers/staging/most/dim2/sysfs.h b/drivers/staging/most/dim2/sysfs.h index 24277a17cff3dd01c19168ea6914a4bb615d9675..09115cf4ed00e2faa877f52e830461bfdb70cfd1 100644 --- a/drivers/staging/most/dim2/sysfs.h +++ b/drivers/staging/most/dim2/sysfs.h @@ -16,15 +16,4 @@ struct medialb_bus { struct kobject kobj_group; }; -struct device; - -int dim2_sysfs_probe(struct device *dev); -void dim2_sysfs_destroy(struct device *dev); - -/* - * callback, - * must deliver MediaLB state as true if locked or false if unlocked - */ -bool dim2_sysfs_get_state_cb(void); - #endif /* DIM2_SYSFS_H */ diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c index 663675efcfe4c0d2b2529bbd886a66da9de19682..99c27d6b423332a652c861dd4a26d7dd6af75f15 100644 --- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c +++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c @@ -2551,13 +2551,14 @@ static void _rtl92e_pci_disconnect(struct pci_dev *pdev) free_irq(dev->irq, dev); priv->irq = 0; } - free_rtllib(dev); if (dev->mem_start != 0) { iounmap((void __iomem *)dev->mem_start); release_mem_region(pci_resource_start(pdev, 1), pci_resource_len(pdev, 1)); } + + free_rtllib(dev); } else { priv = rtllib_priv(dev); } diff --git a/drivers/staging/rtl8192e/rtllib.h b/drivers/staging/rtl8192e/rtllib.h index 4cabaf21c1ca051b28296b27f1aff8322d0955cc..367db4acc78523ae8b198346a9ff626cee0ead5c 100644 --- a/drivers/staging/rtl8192e/rtllib.h +++ b/drivers/staging/rtl8192e/rtllib.h @@ -1982,7 +1982,7 @@ void rtllib_softmac_xmit(struct rtllib_txb *txb, struct rtllib_device *ieee); void rtllib_stop_send_beacons(struct rtllib_device *ieee); void notify_wx_assoc_event(struct rtllib_device *ieee); void rtllib_start_ibss(struct rtllib_device *ieee); -void rtllib_softmac_init(struct rtllib_device *ieee); +int rtllib_softmac_init(struct rtllib_device *ieee); void rtllib_softmac_free(struct rtllib_device *ieee); void rtllib_disassociate(struct rtllib_device *ieee); void rtllib_stop_scan(struct rtllib_device *ieee); diff --git a/drivers/staging/rtl8192e/rtllib_module.c b/drivers/staging/rtl8192e/rtllib_module.c index 64d9feee1f392d33fb575336289570e565caa300..f00ac94b2639b4ca467c693cfe5be87771c5813b 100644 --- a/drivers/staging/rtl8192e/rtllib_module.c +++ b/drivers/staging/rtl8192e/rtllib_module.c @@ -88,7 +88,7 @@ struct net_device *alloc_rtllib(int sizeof_priv) err = rtllib_networks_allocate(ieee); if (err) { pr_err("Unable to allocate beacon storage: %d\n", err); - goto failed; + goto free_netdev; } rtllib_networks_initialize(ieee); @@ -121,11 +121,13 @@ struct net_device *alloc_rtllib(int sizeof_priv) ieee->hwsec_active = 0; memset(ieee->swcamtable, 0, sizeof(struct sw_cam_table) * 32); - rtllib_softmac_init(ieee); + err = rtllib_softmac_init(ieee); + if (err) + goto free_crypt_info; ieee->pHTInfo = kzalloc(sizeof(struct rt_hi_throughput), GFP_KERNEL); if (!ieee->pHTInfo) - return NULL; + goto free_softmac; HTUpdateDefaultSetting(ieee); HTInitializeHTInfo(ieee); @@ -141,8 +143,14 @@ struct net_device *alloc_rtllib(int sizeof_priv) return dev; - failed: +free_softmac: + rtllib_softmac_free(ieee); +free_crypt_info: + lib80211_crypt_info_free(&ieee->crypt_info); + rtllib_networks_free(ieee); +free_netdev: free_netdev(dev); + return NULL; } EXPORT_SYMBOL(alloc_rtllib); diff --git a/drivers/staging/rtl8192e/rtllib_softmac.c b/drivers/staging/rtl8192e/rtllib_softmac.c index 2c752ba5a802a275fe36e1be366eb9974ac86a18..e8e72f79ca00790671ef024b59ab359f1db6620b 100644 --- a/drivers/staging/rtl8192e/rtllib_softmac.c +++ b/drivers/staging/rtl8192e/rtllib_softmac.c @@ -2953,7 +2953,7 @@ void rtllib_start_protocol(struct rtllib_device *ieee) } } -void rtllib_softmac_init(struct rtllib_device *ieee) +int rtllib_softmac_init(struct rtllib_device *ieee) { int i; @@ -2964,7 +2964,8 @@ void rtllib_softmac_init(struct rtllib_device *ieee) ieee->seq_ctrl[i] = 0; ieee->dot11d_info = kzalloc(sizeof(struct rt_dot11d_info), GFP_ATOMIC); if (!ieee->dot11d_info) - netdev_err(ieee->dev, "Can't alloc memory for DOT11D\n"); + return -ENOMEM; + ieee->LinkDetectInfo.SlotIndex = 0; ieee->LinkDetectInfo.SlotNum = 2; ieee->LinkDetectInfo.NumRecvBcnInPeriod = 0; @@ -3030,6 +3031,7 @@ void rtllib_softmac_init(struct rtllib_device *ieee) tasklet_setup(&ieee->ps_task, rtllib_sta_ps); + return 0; } void rtllib_softmac_free(struct rtllib_device *ieee) diff --git a/drivers/staging/rtl8192u/r8192U_core.c b/drivers/staging/rtl8192u/r8192U_core.c index 4523e825a61a8a3f98bfc1171f6aac7c3680287a..7f90af8a7c7c9bcd2afca758019ea6c3f2d034a9 100644 --- a/drivers/staging/rtl8192u/r8192U_core.c +++ b/drivers/staging/rtl8192u/r8192U_core.c @@ -229,7 +229,7 @@ int write_nic_byte_E(struct net_device *dev, int indx, u8 data) status = usb_control_msg(udev, usb_sndctrlpipe(udev, 0), RTL8187_REQ_SET_REGS, RTL8187_REQT_WRITE, - indx | 0xfe00, 0, usbdata, 1, HZ / 2); + indx | 0xfe00, 0, usbdata, 1, 500); kfree(usbdata); if (status < 0) { @@ -251,7 +251,7 @@ int read_nic_byte_E(struct net_device *dev, int indx, u8 *data) status = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0), RTL8187_REQ_GET_REGS, RTL8187_REQT_READ, - indx | 0xfe00, 0, usbdata, 1, HZ / 2); + indx | 0xfe00, 0, usbdata, 1, 500); *data = *usbdata; kfree(usbdata); @@ -279,7 +279,7 @@ int write_nic_byte(struct net_device *dev, int indx, u8 data) status = usb_control_msg(udev, usb_sndctrlpipe(udev, 0), RTL8187_REQ_SET_REGS, RTL8187_REQT_WRITE, (indx & 0xff) | 0xff00, (indx >> 8) & 0x0f, - usbdata, 1, HZ / 2); + usbdata, 1, 500); kfree(usbdata); if (status < 0) { @@ -305,7 +305,7 @@ int write_nic_word(struct net_device *dev, int indx, u16 data) status = usb_control_msg(udev, usb_sndctrlpipe(udev, 0), RTL8187_REQ_SET_REGS, RTL8187_REQT_WRITE, (indx & 0xff) | 0xff00, (indx >> 8) & 0x0f, - usbdata, 2, HZ / 2); + usbdata, 2, 500); kfree(usbdata); if (status < 0) { @@ -331,7 +331,7 @@ int write_nic_dword(struct net_device *dev, int indx, u32 data) status = usb_control_msg(udev, usb_sndctrlpipe(udev, 0), RTL8187_REQ_SET_REGS, RTL8187_REQT_WRITE, (indx & 0xff) | 0xff00, (indx >> 8) & 0x0f, - usbdata, 4, HZ / 2); + usbdata, 4, 500); kfree(usbdata); if (status < 0) { @@ -355,7 +355,7 @@ int read_nic_byte(struct net_device *dev, int indx, u8 *data) status = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0), RTL8187_REQ_GET_REGS, RTL8187_REQT_READ, (indx & 0xff) | 0xff00, (indx >> 8) & 0x0f, - usbdata, 1, HZ / 2); + usbdata, 1, 500); *data = *usbdata; kfree(usbdata); @@ -380,7 +380,7 @@ int read_nic_word(struct net_device *dev, int indx, u16 *data) status = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0), RTL8187_REQ_GET_REGS, RTL8187_REQT_READ, (indx & 0xff) | 0xff00, (indx >> 8) & 0x0f, - usbdata, 2, HZ / 2); + usbdata, 2, 500); *data = *usbdata; kfree(usbdata); @@ -404,7 +404,7 @@ static int read_nic_word_E(struct net_device *dev, int indx, u16 *data) status = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0), RTL8187_REQ_GET_REGS, RTL8187_REQT_READ, - indx | 0xfe00, 0, usbdata, 2, HZ / 2); + indx | 0xfe00, 0, usbdata, 2, 500); *data = *usbdata; kfree(usbdata); @@ -430,7 +430,7 @@ int read_nic_dword(struct net_device *dev, int indx, u32 *data) status = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0), RTL8187_REQ_GET_REGS, RTL8187_REQT_READ, (indx & 0xff) | 0xff00, (indx >> 8) & 0x0f, - usbdata, 4, HZ / 2); + usbdata, 4, 500); *data = *usbdata; kfree(usbdata); diff --git a/drivers/staging/rtl8712/usb_intf.c b/drivers/staging/rtl8712/usb_intf.c index 17d28af0d0867c9abeb3e85316914bd890d9239e..fed96d4251bfaec4fca78994534c8eb6011b58e4 100644 --- a/drivers/staging/rtl8712/usb_intf.c +++ b/drivers/staging/rtl8712/usb_intf.c @@ -598,12 +598,12 @@ static void r871xu_dev_remove(struct usb_interface *pusb_intf) /* never exit with a firmware callback pending */ wait_for_completion(&padapter->rtl8712_fw_ready); + if (pnetdev->reg_state != NETREG_UNINITIALIZED) + unregister_netdev(pnetdev); /* will call netdev_close() */ usb_set_intfdata(pusb_intf, NULL); release_firmware(padapter->fw); if (drvpriv.drv_registered) padapter->surprise_removed = true; - if (pnetdev->reg_state != NETREG_UNINITIALIZED) - unregister_netdev(pnetdev); /* will call netdev_close() */ r8712_flush_rwctrl_works(padapter); r8712_flush_led_works(padapter); udelay(1); diff --git a/drivers/staging/rtl8712/usb_ops_linux.c b/drivers/staging/rtl8712/usb_ops_linux.c index 655497cead12231ebe81636e0bd6baab5b297a41..f984a5ab2c6ff713cc40696ef9e6a8fd5d02509b 100644 --- a/drivers/staging/rtl8712/usb_ops_linux.c +++ b/drivers/staging/rtl8712/usb_ops_linux.c @@ -494,7 +494,7 @@ int r8712_usbctrl_vendorreq(struct intf_priv *pintfpriv, u8 request, u16 value, memcpy(pIo_buf, pdata, len); } status = usb_control_msg(udev, pipe, request, reqtype, value, index, - pIo_buf, len, HZ / 2); + pIo_buf, len, 500); if (status > 0) { /* Success this control transfer. */ if (requesttype == 0x01) { /* For Control read transfer, we have to copy the read diff --git a/drivers/staging/wfx/bus_sdio.c b/drivers/staging/wfx/bus_sdio.c index e06d7e1ebe9c318dac085d03627c7cc9a410b1d8..61b8cc05f293584407c4e9b5304d750f4240b5b3 100644 --- a/drivers/staging/wfx/bus_sdio.c +++ b/drivers/staging/wfx/bus_sdio.c @@ -120,19 +120,22 @@ static int wfx_sdio_irq_subscribe(void *priv) return ret; } + flags = irq_get_trigger_type(bus->of_irq); + if (!flags) + flags = IRQF_TRIGGER_HIGH; + flags |= IRQF_ONESHOT; + ret = devm_request_threaded_irq(&bus->func->dev, bus->of_irq, NULL, + wfx_sdio_irq_handler_ext, flags, + "wfx", bus); + if (ret) + return ret; sdio_claim_host(bus->func); cccr = sdio_f0_readb(bus->func, SDIO_CCCR_IENx, NULL); cccr |= BIT(0); cccr |= BIT(bus->func->num); sdio_f0_writeb(bus->func, cccr, SDIO_CCCR_IENx, NULL); sdio_release_host(bus->func); - flags = irq_get_trigger_type(bus->of_irq); - if (!flags) - flags = IRQF_TRIGGER_HIGH; - flags |= IRQF_ONESHOT; - return devm_request_threaded_irq(&bus->func->dev, bus->of_irq, NULL, - wfx_sdio_irq_handler_ext, flags, - "wfx", bus); + return 0; } static int wfx_sdio_irq_unsubscribe(void *priv) diff --git a/drivers/staging/wlan-ng/hfa384x_usb.c b/drivers/staging/wlan-ng/hfa384x_usb.c index f2a0e16b0318c2ae7dc4275e07147b993208def2..fac3f34d4a1f595d8be8b8100f664861a02526a1 100644 --- a/drivers/staging/wlan-ng/hfa384x_usb.c +++ b/drivers/staging/wlan-ng/hfa384x_usb.c @@ -3779,18 +3779,18 @@ static void hfa384x_usb_throttlefn(struct timer_list *t) spin_lock_irqsave(&hw->ctlxq.lock, flags); - /* - * We need to check BOTH the RX and the TX throttle controls, - * so we use the bitwise OR instead of the logical OR. - */ pr_debug("flags=0x%lx\n", hw->usb_flags); - if (!hw->wlandev->hwremoved && - ((test_and_clear_bit(THROTTLE_RX, &hw->usb_flags) && - !test_and_set_bit(WORK_RX_RESUME, &hw->usb_flags)) | - (test_and_clear_bit(THROTTLE_TX, &hw->usb_flags) && - !test_and_set_bit(WORK_TX_RESUME, &hw->usb_flags)) - )) { - schedule_work(&hw->usb_work); + if (!hw->wlandev->hwremoved) { + bool rx_throttle = test_and_clear_bit(THROTTLE_RX, &hw->usb_flags) && + !test_and_set_bit(WORK_RX_RESUME, &hw->usb_flags); + bool tx_throttle = test_and_clear_bit(THROTTLE_TX, &hw->usb_flags) && + !test_and_set_bit(WORK_TX_RESUME, &hw->usb_flags); + /* + * We need to check BOTH the RX and the TX throttle controls, + * so we use the bitwise OR instead of the logical OR. + */ + if (rx_throttle | tx_throttle) + schedule_work(&hw->usb_work); } spin_unlock_irqrestore(&hw->ctlxq.lock, flags); diff --git a/drivers/target/iscsi/iscsi_target_tpg.c b/drivers/target/iscsi/iscsi_target_tpg.c index 8075f60fd02c337a5fcb0efc8ad74a066737cfb3..2d5cf1714ae05de28eb17ba66db9b5be12338e98 100644 --- a/drivers/target/iscsi/iscsi_target_tpg.c +++ b/drivers/target/iscsi/iscsi_target_tpg.c @@ -443,6 +443,9 @@ static bool iscsit_tpg_check_network_portal( break; } spin_unlock(&tpg->tpg_np_lock); + + if (match) + break; } spin_unlock(&tiqn->tiqn_tpg_lock); diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c index 6b72afee2f8b7e14b53310e7d9206e56362b0cc0..b240bd1ccb71d8bdabeaa7f3b591b21aa926ae24 100644 --- a/drivers/target/target_core_alua.c +++ b/drivers/target/target_core_alua.c @@ -1702,7 +1702,6 @@ int core_alua_set_tg_pt_gp_id( pr_err("Maximum ALUA alua_tg_pt_gps_count:" " 0x0000ffff reached\n"); spin_unlock(&dev->t10_alua.tg_pt_gps_lock); - kmem_cache_free(t10_alua_tg_pt_gp_cache, tg_pt_gp); return -ENOSPC; } again: diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c index 56ae882fb7b39c930f71ce689165e0f0f2bf5fb5..1411f0cb40c51c4cb150726b18d13902883ccc45 100644 --- a/drivers/target/target_core_configfs.c +++ b/drivers/target/target_core_configfs.c @@ -3652,6 +3652,7 @@ static void __exit target_core_exit_configfs(void) MODULE_DESCRIPTION("Target_Core_Mod/ConfigFS"); MODULE_AUTHOR("nab@Linux-iSCSI.org"); MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(VFS_internal_I_am_really_a_filesystem_and_am_NOT_a_driver); module_init(target_core_init_configfs); module_exit(target_core_exit_configfs); diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index 405d82d4471768beff16669bb1b3865958adeabb..109f019d214809a8398ebf94b46de0e38c10985b 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -758,6 +758,8 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name) INIT_LIST_HEAD(&dev->t10_alua.lba_map_list); spin_lock_init(&dev->t10_alua.lba_map_lock); + INIT_WORK(&dev->delayed_cmd_work, target_do_delayed_work); + dev->t10_wwn.t10_dev = dev; dev->t10_alua.t10_dev = dev; diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c index 7143d03f0e027e916e179cd2b44301609816fb6e..cfa1bbef32e2ec2023748d6f663b6de8e14df7f8 100644 --- a/drivers/target/target_core_file.c +++ b/drivers/target/target_core_file.c @@ -956,6 +956,7 @@ static void __exit fileio_module_exit(void) MODULE_DESCRIPTION("TCM FILEIO subsystem plugin"); MODULE_AUTHOR("nab@Linux-iSCSI.org"); MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(VFS_internal_I_am_really_a_filesystem_and_am_NOT_a_driver); module_init(fileio_module_init); module_exit(fileio_module_exit); diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c index f2bd2e207e0b362e0d1667abde17db07f2896489..b09878674df5e3e1cbe1a5d87ee636f78d23ac2d 100644 --- a/drivers/target/target_core_iblock.c +++ b/drivers/target/target_core_iblock.c @@ -314,10 +314,8 @@ iblock_get_bio(struct se_cmd *cmd, sector_t lba, u32 sg_num, int op, * Only allocate as many vector entries as the bio code allows us to, * we'll loop later on until we have handled the whole request. */ - if (sg_num > BIO_MAX_PAGES) - sg_num = BIO_MAX_PAGES; - - bio = bio_alloc_bioset(GFP_NOIO, sg_num, &ib_dev->ibd_bio_set); + bio = bio_alloc_bioset(GFP_NOIO, bio_max_segs(sg_num), + &ib_dev->ibd_bio_set); if (!bio) { pr_err("Unable to allocate memory for bio\n"); return NULL; @@ -637,8 +635,7 @@ iblock_alloc_bip(struct se_cmd *cmd, struct bio *bio, return -ENODEV; } - bip = bio_integrity_alloc(bio, GFP_NOIO, - min_t(unsigned int, cmd->t_prot_nents, BIO_MAX_PAGES)); + bip = bio_integrity_alloc(bio, GFP_NOIO, bio_max_segs(cmd->t_prot_nents)); if (IS_ERR(bip)) { pr_err("Unable to allocate bio_integrity_payload\n"); return PTR_ERR(bip); diff --git a/drivers/target/target_core_internal.h b/drivers/target/target_core_internal.h index e7b3c6e5d5744ca75cff1d17ac000f2e2125213d..e4f072a680d41266ec77f5f7d2fa14b542975257 100644 --- a/drivers/target/target_core_internal.h +++ b/drivers/target/target_core_internal.h @@ -150,6 +150,7 @@ int transport_dump_vpd_ident(struct t10_vpd *, unsigned char *, int); void transport_clear_lun_ref(struct se_lun *); sense_reason_t target_cmd_size_check(struct se_cmd *cmd, unsigned int size); void target_qf_do_work(struct work_struct *work); +void target_do_delayed_work(struct work_struct *work); bool target_check_wce(struct se_device *dev); bool target_check_fua(struct se_device *dev); void __target_execute_cmd(struct se_cmd *, bool); diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c index f10f0aa6cd3744f1ca4ccf19ab5e42f2f646efe4..34d932e649c7cbda677e2c5765a2b518d356b57c 100644 --- a/drivers/target/target_core_pscsi.c +++ b/drivers/target/target_core_pscsi.c @@ -882,7 +882,7 @@ pscsi_map_sg(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, if (!bio) { new_bio: - nr_vecs = min_t(int, BIO_MAX_PAGES, nr_pages); + nr_vecs = bio_max_segs(nr_pages); nr_pages -= nr_vecs; /* * Calls bio_kmalloc() and sets bio->bi_end_io() diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 61b79804d462c589d6c40b461015dd5eb61dd7ef..bca3a32a4bfb70674dd2083da659c51054b3b23e 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -2065,32 +2065,35 @@ static bool target_handle_task_attr(struct se_cmd *cmd) */ switch (cmd->sam_task_attr) { case TCM_HEAD_TAG: + atomic_inc_mb(&dev->non_ordered); pr_debug("Added HEAD_OF_QUEUE for CDB: 0x%02x\n", cmd->t_task_cdb[0]); return false; case TCM_ORDERED_TAG: - atomic_inc_mb(&dev->dev_ordered_sync); + atomic_inc_mb(&dev->delayed_cmd_count); pr_debug("Added ORDERED for CDB: 0x%02x to ordered list\n", cmd->t_task_cdb[0]); - - /* - * Execute an ORDERED command if no other older commands - * exist that need to be completed first. - */ - if (!atomic_read(&dev->simple_cmds)) - return false; break; default: /* * For SIMPLE and UNTAGGED Task Attribute commands */ - atomic_inc_mb(&dev->simple_cmds); + atomic_inc_mb(&dev->non_ordered); + + if (atomic_read(&dev->delayed_cmd_count) == 0) + return false; break; } - if (atomic_read(&dev->dev_ordered_sync) == 0) - return false; + if (cmd->sam_task_attr != TCM_ORDERED_TAG) { + atomic_inc_mb(&dev->delayed_cmd_count); + /* + * We will account for this when we dequeue from the delayed + * list. + */ + atomic_dec_mb(&dev->non_ordered); + } spin_lock(&dev->delayed_cmd_lock); list_add_tail(&cmd->se_delayed_node, &dev->delayed_cmd_list); @@ -2098,6 +2101,12 @@ static bool target_handle_task_attr(struct se_cmd *cmd) pr_debug("Added CDB: 0x%02x Task Attr: 0x%02x to delayed CMD listn", cmd->t_task_cdb[0], cmd->sam_task_attr); + /* + * We may have no non ordered cmds when this function started or we + * could have raced with the last simple/head cmd completing, so kick + * the delayed handler here. + */ + schedule_work(&dev->delayed_cmd_work); return true; } @@ -2135,29 +2144,48 @@ EXPORT_SYMBOL(target_execute_cmd); * Process all commands up to the last received ORDERED task attribute which * requires another blocking boundary */ -static void target_restart_delayed_cmds(struct se_device *dev) +void target_do_delayed_work(struct work_struct *work) { - for (;;) { + struct se_device *dev = container_of(work, struct se_device, + delayed_cmd_work); + + spin_lock(&dev->delayed_cmd_lock); + while (!dev->ordered_sync_in_progress) { struct se_cmd *cmd; - spin_lock(&dev->delayed_cmd_lock); - if (list_empty(&dev->delayed_cmd_list)) { - spin_unlock(&dev->delayed_cmd_lock); + if (list_empty(&dev->delayed_cmd_list)) break; - } cmd = list_entry(dev->delayed_cmd_list.next, struct se_cmd, se_delayed_node); + + if (cmd->sam_task_attr == TCM_ORDERED_TAG) { + /* + * Check if we started with: + * [ordered] [simple] [ordered] + * and we are now at the last ordered so we have to wait + * for the simple cmd. + */ + if (atomic_read(&dev->non_ordered) > 0) + break; + + dev->ordered_sync_in_progress = true; + } + list_del(&cmd->se_delayed_node); + atomic_dec_mb(&dev->delayed_cmd_count); spin_unlock(&dev->delayed_cmd_lock); + if (cmd->sam_task_attr != TCM_ORDERED_TAG) + atomic_inc_mb(&dev->non_ordered); + cmd->transport_state |= CMD_T_SENT; __target_execute_cmd(cmd, true); - if (cmd->sam_task_attr == TCM_ORDERED_TAG) - break; + spin_lock(&dev->delayed_cmd_lock); } + spin_unlock(&dev->delayed_cmd_lock); } /* @@ -2175,14 +2203,17 @@ static void transport_complete_task_attr(struct se_cmd *cmd) goto restart; if (cmd->sam_task_attr == TCM_SIMPLE_TAG) { - atomic_dec_mb(&dev->simple_cmds); + atomic_dec_mb(&dev->non_ordered); dev->dev_cur_ordered_id++; } else if (cmd->sam_task_attr == TCM_HEAD_TAG) { + atomic_dec_mb(&dev->non_ordered); dev->dev_cur_ordered_id++; pr_debug("Incremented dev_cur_ordered_id: %u for HEAD_OF_QUEUE\n", dev->dev_cur_ordered_id); } else if (cmd->sam_task_attr == TCM_ORDERED_TAG) { - atomic_dec_mb(&dev->dev_ordered_sync); + spin_lock(&dev->delayed_cmd_lock); + dev->ordered_sync_in_progress = false; + spin_unlock(&dev->delayed_cmd_lock); dev->dev_cur_ordered_id++; pr_debug("Incremented dev_cur_ordered_id: %u for ORDERED\n", @@ -2191,7 +2222,8 @@ static void transport_complete_task_attr(struct se_cmd *cmd) cmd->se_cmd_flags &= ~SCF_TASK_ATTR_SET; restart: - target_restart_delayed_cmds(dev); + if (atomic_read(&dev->delayed_cmd_count) > 0) + schedule_work(&dev->delayed_cmd_work); } static void transport_complete_qf(struct se_cmd *cmd) diff --git a/drivers/tee/amdtee/core.c b/drivers/tee/amdtee/core.c index da6b88e80dc07e0afd1fc1d7eb9235c27a53a60b..297dc62bca2986f014c4b4060c95d112a807fe7d 100644 --- a/drivers/tee/amdtee/core.c +++ b/drivers/tee/amdtee/core.c @@ -203,9 +203,8 @@ static int copy_ta_binary(struct tee_context *ctx, void *ptr, void **ta, *ta_size = roundup(fw->size, PAGE_SIZE); *ta = (void *)__get_free_pages(GFP_KERNEL, get_order(*ta_size)); - if (IS_ERR(*ta)) { - pr_err("%s: get_free_pages failed 0x%llx\n", __func__, - (u64)*ta); + if (!*ta) { + pr_err("%s: get_free_pages failed\n", __func__); rc = -ENOMEM; goto rel_fw; } diff --git a/drivers/tee/optee/core.c b/drivers/tee/optee/core.c index 823a81d8ff0ed49187296d4e408793155d6ac451..6ea80add7378fce0de69adecad0b611d38ce0247 100644 --- a/drivers/tee/optee/core.c +++ b/drivers/tee/optee/core.c @@ -585,6 +585,10 @@ static int optee_remove(struct platform_device *pdev) { struct optee *optee = platform_get_drvdata(pdev); + /* Unregister OP-TEE specific client devices on TEE bus */ + optee_unregister_devices(); + + teedev_close_context(optee->ctx); /* * Ask OP-TEE to free all cached shared memory objects to decrease * reference counters and also avoid wild pointers in secure world @@ -630,6 +634,7 @@ static int optee_probe(struct platform_device *pdev) struct optee *optee = NULL; void *memremaped_shm = NULL; struct tee_device *teedev; + struct tee_context *ctx; u32 sec_caps; int rc; @@ -716,6 +721,12 @@ static int optee_probe(struct platform_device *pdev) optee_supp_init(&optee->supp); optee->memremaped_shm = memremaped_shm; optee->pool = pool; + ctx = teedev_open(optee->teedev); + if (IS_ERR(ctx)) { + rc = PTR_ERR(ctx); + goto err; + } + optee->ctx = ctx; /* * Ensure that there are no pre-existing shm objects before enabling diff --git a/drivers/tee/optee/device.c b/drivers/tee/optee/device.c index 7a897d51969fc8c54298b31af4fe068dc7c300e0..031806468af48c1635ea5f316fcf01e335a20ed5 100644 --- a/drivers/tee/optee/device.c +++ b/drivers/tee/optee/device.c @@ -53,6 +53,13 @@ static int get_devices(struct tee_context *ctx, u32 session, return 0; } +static void optee_release_device(struct device *dev) +{ + struct tee_client_device *optee_device = to_tee_client_device(dev); + + kfree(optee_device); +} + static int optee_register_device(const uuid_t *device_uuid) { struct tee_client_device *optee_device = NULL; @@ -63,6 +70,7 @@ static int optee_register_device(const uuid_t *device_uuid) return -ENOMEM; optee_device->dev.bus = &tee_bus_type; + optee_device->dev.release = optee_release_device; if (dev_set_name(&optee_device->dev, "optee-ta-%pUb", device_uuid)) { kfree(optee_device); return -ENOMEM; @@ -154,3 +162,17 @@ int optee_enumerate_devices(u32 func) { return __optee_enumerate_devices(func); } + +static int __optee_unregister_device(struct device *dev, void *data) +{ + if (!strncmp(dev_name(dev), "optee-ta", strlen("optee-ta"))) + device_unregister(dev); + + return 0; +} + +void optee_unregister_devices(void) +{ + bus_for_each_dev(&tee_bus_type, NULL, NULL, + __optee_unregister_device); +} diff --git a/drivers/tee/optee/optee_private.h b/drivers/tee/optee/optee_private.h index dbdd367be1568330aa5481f9924932196282d5d4..ea09533e30cdef5b214043e801241618084cc38f 100644 --- a/drivers/tee/optee/optee_private.h +++ b/drivers/tee/optee/optee_private.h @@ -70,6 +70,7 @@ struct optee_supp { * struct optee - main service struct * @supp_teedev: supplicant device * @teedev: client device + * @ctx: driver internal TEE context * @invoke_fn: function to issue smc or hvc * @call_queue: queue of threads waiting to call @invoke_fn * @wait_queue: queue of threads from secure world waiting for a @@ -87,6 +88,7 @@ struct optee { struct tee_device *supp_teedev; struct tee_device *teedev; optee_invoke_fn *invoke_fn; + struct tee_context *ctx; struct optee_call_queue call_queue; struct optee_wait_queue wait_queue; struct optee_supp supp; @@ -184,6 +186,7 @@ void optee_fill_pages_list(u64 *dst, struct page **pages, int num_pages, #define PTA_CMD_GET_DEVICES 0x0 #define PTA_CMD_GET_DEVICES_SUPP 0x1 int optee_enumerate_devices(u32 func); +void optee_unregister_devices(void); /* * Small helpers diff --git a/drivers/tee/optee/rpc.c b/drivers/tee/optee/rpc.c index 9dbdd783d6f2d3b15c4bff071bc153f147ebcd58..f1e0332b0f6e8609d024f535fffb19252b8a2777 100644 --- a/drivers/tee/optee/rpc.c +++ b/drivers/tee/optee/rpc.c @@ -284,6 +284,7 @@ static struct tee_shm *cmd_alloc_suppl(struct tee_context *ctx, size_t sz) } static void handle_rpc_func_cmd_shm_alloc(struct tee_context *ctx, + struct optee *optee, struct optee_msg_arg *arg, struct optee_call_ctx *call_ctx) { @@ -313,7 +314,8 @@ static void handle_rpc_func_cmd_shm_alloc(struct tee_context *ctx, shm = cmd_alloc_suppl(ctx, sz); break; case OPTEE_MSG_RPC_SHM_TYPE_KERNEL: - shm = tee_shm_alloc(ctx, sz, TEE_SHM_MAPPED | TEE_SHM_PRIV); + shm = tee_shm_alloc(optee->ctx, sz, + TEE_SHM_MAPPED | TEE_SHM_PRIV); break; default: arg->ret = TEEC_ERROR_BAD_PARAMETERS; @@ -470,7 +472,7 @@ static void handle_rpc_func_cmd(struct tee_context *ctx, struct optee *optee, break; case OPTEE_MSG_RPC_CMD_SHM_ALLOC: free_pages_list(call_ctx); - handle_rpc_func_cmd_shm_alloc(ctx, arg, call_ctx); + handle_rpc_func_cmd_shm_alloc(ctx, optee, arg, call_ctx); break; case OPTEE_MSG_RPC_CMD_SHM_FREE: handle_rpc_func_cmd_shm_free(ctx, arg); @@ -501,7 +503,7 @@ void optee_handle_rpc(struct tee_context *ctx, struct optee_rpc_param *param, switch (OPTEE_SMC_RETURN_GET_RPC_FUNC(param->a0)) { case OPTEE_SMC_RPC_FUNC_ALLOC: - shm = tee_shm_alloc(ctx, param->a1, + shm = tee_shm_alloc(optee->ctx, param->a1, TEE_SHM_MAPPED | TEE_SHM_PRIV); if (!IS_ERR(shm) && !tee_shm_get_pa(shm, 0, &pa)) { reg_pair_from_64(¶m->a1, ¶m->a2, pa); diff --git a/drivers/tee/optee/shm_pool.c b/drivers/tee/optee/shm_pool.c index c41a9a501a6e9d8fcf5efce43259aca6466368b6..fa75024f16f7f1876c0d588794b6c59f698e10ac 100644 --- a/drivers/tee/optee/shm_pool.c +++ b/drivers/tee/optee/shm_pool.c @@ -41,10 +41,8 @@ static int pool_op_alloc(struct tee_shm_pool_mgr *poolm, goto err; } - for (i = 0; i < nr_pages; i++) { - pages[i] = page; - page++; - } + for (i = 0; i < nr_pages; i++) + pages[i] = page + i; shm->flags |= TEE_SHM_REGISTER; rc = optee_shm_register(shm->ctx, shm, pages, nr_pages, diff --git a/drivers/tee/tee_core.c b/drivers/tee/tee_core.c index 6ade4a5c4840789829fbd28b7825aad3742d9dc5..e07f997cf8dd3b5132be4d20a21245e092301376 100644 --- a/drivers/tee/tee_core.c +++ b/drivers/tee/tee_core.c @@ -43,7 +43,7 @@ static DEFINE_SPINLOCK(driver_lock); static struct class *tee_class; static dev_t tee_devt; -static struct tee_context *teedev_open(struct tee_device *teedev) +struct tee_context *teedev_open(struct tee_device *teedev) { int rc; struct tee_context *ctx; @@ -70,6 +70,7 @@ err: return ERR_PTR(rc); } +EXPORT_SYMBOL_GPL(teedev_open); void teedev_ctx_get(struct tee_context *ctx) { @@ -96,11 +97,14 @@ void teedev_ctx_put(struct tee_context *ctx) kref_put(&ctx->refcount, teedev_ctx_release); } -static void teedev_close_context(struct tee_context *ctx) +void teedev_close_context(struct tee_context *ctx) { - tee_device_put(ctx->teedev); + struct tee_device *teedev = ctx->teedev; + teedev_ctx_put(ctx); + tee_device_put(teedev); } +EXPORT_SYMBOL_GPL(teedev_close_context); static int tee_open(struct inode *inode, struct file *filp) { diff --git a/drivers/tee/tee_shm.c b/drivers/tee/tee_shm.c index 8a9384a64f3e2914336209914c9026acd9fea3de..499fccba3d74bd5471eea8aa609811c00ea17029 100644 --- a/drivers/tee/tee_shm.c +++ b/drivers/tee/tee_shm.c @@ -1,11 +1,11 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (c) 2015-2016, Linaro Limited + * Copyright (c) 2015-2017, 2019-2021 Linaro Limited */ +#include #include -#include -#include #include +#include #include #include #include @@ -28,16 +28,8 @@ static void release_registered_pages(struct tee_shm *shm) } } -static void tee_shm_release(struct tee_shm *shm) +static void tee_shm_release(struct tee_device *teedev, struct tee_shm *shm) { - struct tee_device *teedev = shm->ctx->teedev; - - if (shm->flags & TEE_SHM_DMA_BUF) { - mutex_lock(&teedev->mutex); - idr_remove(&teedev->idr, shm->id); - mutex_unlock(&teedev->mutex); - } - if (shm->flags & TEE_SHM_POOL) { struct tee_shm_pool_mgr *poolm; @@ -64,45 +56,6 @@ static void tee_shm_release(struct tee_shm *shm) tee_device_put(teedev); } -static struct sg_table *tee_shm_op_map_dma_buf(struct dma_buf_attachment - *attach, enum dma_data_direction dir) -{ - return NULL; -} - -static void tee_shm_op_unmap_dma_buf(struct dma_buf_attachment *attach, - struct sg_table *table, - enum dma_data_direction dir) -{ -} - -static void tee_shm_op_release(struct dma_buf *dmabuf) -{ - struct tee_shm *shm = dmabuf->priv; - - tee_shm_release(shm); -} - -static int tee_shm_op_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma) -{ - struct tee_shm *shm = dmabuf->priv; - size_t size = vma->vm_end - vma->vm_start; - - /* Refuse sharing shared memory provided by application */ - if (shm->flags & TEE_SHM_USER_MAPPED) - return -EINVAL; - - return remap_pfn_range(vma, vma->vm_start, shm->paddr >> PAGE_SHIFT, - size, vma->vm_page_prot); -} - -static const struct dma_buf_ops tee_shm_dma_buf_ops = { - .map_dma_buf = tee_shm_op_map_dma_buf, - .unmap_dma_buf = tee_shm_op_unmap_dma_buf, - .release = tee_shm_op_release, - .mmap = tee_shm_op_mmap, -}; - struct tee_shm *tee_shm_alloc(struct tee_context *ctx, size_t size, u32 flags) { struct tee_device *teedev = ctx->teedev; @@ -137,6 +90,7 @@ struct tee_shm *tee_shm_alloc(struct tee_context *ctx, size_t size, u32 flags) goto err_dev_put; } + refcount_set(&shm->refcount, 1); shm->flags = flags | TEE_SHM_POOL; shm->ctx = ctx; if (flags & TEE_SHM_DMA_BUF) @@ -150,10 +104,7 @@ struct tee_shm *tee_shm_alloc(struct tee_context *ctx, size_t size, u32 flags) goto err_kfree; } - if (flags & TEE_SHM_DMA_BUF) { - DEFINE_DMA_BUF_EXPORT_INFO(exp_info); - mutex_lock(&teedev->mutex); shm->id = idr_alloc(&teedev->idr, shm, 1, 0, GFP_KERNEL); mutex_unlock(&teedev->mutex); @@ -161,28 +112,11 @@ struct tee_shm *tee_shm_alloc(struct tee_context *ctx, size_t size, u32 flags) ret = ERR_PTR(shm->id); goto err_pool_free; } - - exp_info.ops = &tee_shm_dma_buf_ops; - exp_info.size = shm->size; - exp_info.flags = O_RDWR; - exp_info.priv = shm; - - shm->dmabuf = dma_buf_export(&exp_info); - if (IS_ERR(shm->dmabuf)) { - ret = ERR_CAST(shm->dmabuf); - goto err_rem; - } } teedev_ctx_get(ctx); return shm; -err_rem: - if (flags & TEE_SHM_DMA_BUF) { - mutex_lock(&teedev->mutex); - idr_remove(&teedev->idr, shm->id); - mutex_unlock(&teedev->mutex); - } err_pool_free: poolm->ops->free(poolm, shm); err_kfree: @@ -243,6 +177,7 @@ struct tee_shm *tee_shm_register(struct tee_context *ctx, unsigned long addr, goto err; } + refcount_set(&shm->refcount, 1); shm->flags = flags | TEE_SHM_REGISTER; shm->ctx = ctx; shm->id = -1; @@ -303,22 +238,6 @@ struct tee_shm *tee_shm_register(struct tee_context *ctx, unsigned long addr, goto err; } - if (flags & TEE_SHM_DMA_BUF) { - DEFINE_DMA_BUF_EXPORT_INFO(exp_info); - - exp_info.ops = &tee_shm_dma_buf_ops; - exp_info.size = shm->size; - exp_info.flags = O_RDWR; - exp_info.priv = shm; - - shm->dmabuf = dma_buf_export(&exp_info); - if (IS_ERR(shm->dmabuf)) { - ret = ERR_CAST(shm->dmabuf); - teedev->desc->ops->shm_unregister(ctx, shm); - goto err; - } - } - return shm; err: if (shm) { @@ -336,6 +255,35 @@ err: } EXPORT_SYMBOL_GPL(tee_shm_register); +static int tee_shm_fop_release(struct inode *inode, struct file *filp) +{ + tee_shm_put(filp->private_data); + return 0; +} + +static int tee_shm_fop_mmap(struct file *filp, struct vm_area_struct *vma) +{ + struct tee_shm *shm = filp->private_data; + size_t size = vma->vm_end - vma->vm_start; + + /* Refuse sharing shared memory provided by application */ + if (shm->flags & TEE_SHM_USER_MAPPED) + return -EINVAL; + + /* check for overflowing the buffer's size */ + if (vma->vm_pgoff + vma_pages(vma) > shm->size >> PAGE_SHIFT) + return -EINVAL; + + return remap_pfn_range(vma, vma->vm_start, shm->paddr >> PAGE_SHIFT, + size, vma->vm_page_prot); +} + +static const struct file_operations tee_shm_fops = { + .owner = THIS_MODULE, + .release = tee_shm_fop_release, + .mmap = tee_shm_fop_mmap, +}; + /** * tee_shm_get_fd() - Increase reference count and return file descriptor * @shm: Shared memory handle @@ -348,10 +296,11 @@ int tee_shm_get_fd(struct tee_shm *shm) if (!(shm->flags & TEE_SHM_DMA_BUF)) return -EINVAL; - get_dma_buf(shm->dmabuf); - fd = dma_buf_fd(shm->dmabuf, O_CLOEXEC); + /* matched by tee_shm_put() in tee_shm_op_release() */ + refcount_inc(&shm->refcount); + fd = anon_inode_getfd("tee_shm", &tee_shm_fops, shm, O_RDWR); if (fd < 0) - dma_buf_put(shm->dmabuf); + tee_shm_put(shm); return fd; } @@ -361,17 +310,7 @@ int tee_shm_get_fd(struct tee_shm *shm) */ void tee_shm_free(struct tee_shm *shm) { - /* - * dma_buf_put() decreases the dmabuf reference counter and will - * call tee_shm_release() when the last reference is gone. - * - * In the case of driver private memory we call tee_shm_release - * directly instead as it doesn't have a reference counter. - */ - if (shm->flags & TEE_SHM_DMA_BUF) - dma_buf_put(shm->dmabuf); - else - tee_shm_release(shm); + tee_shm_put(shm); } EXPORT_SYMBOL_GPL(tee_shm_free); @@ -478,10 +417,15 @@ struct tee_shm *tee_shm_get_from_id(struct tee_context *ctx, int id) teedev = ctx->teedev; mutex_lock(&teedev->mutex); shm = idr_find(&teedev->idr, id); + /* + * If the tee_shm was found in the IDR it must have a refcount + * larger than 0 due to the guarantee in tee_shm_put() below. So + * it's safe to use refcount_inc(). + */ if (!shm || shm->ctx != ctx) shm = ERR_PTR(-EINVAL); - else if (shm->flags & TEE_SHM_DMA_BUF) - get_dma_buf(shm->dmabuf); + else + refcount_inc(&shm->refcount); mutex_unlock(&teedev->mutex); return shm; } @@ -493,7 +437,24 @@ EXPORT_SYMBOL_GPL(tee_shm_get_from_id); */ void tee_shm_put(struct tee_shm *shm) { - if (shm->flags & TEE_SHM_DMA_BUF) - dma_buf_put(shm->dmabuf); + struct tee_device *teedev = shm->ctx->teedev; + bool do_release = false; + + mutex_lock(&teedev->mutex); + if (refcount_dec_and_test(&shm->refcount)) { + /* + * refcount has reached 0, we must now remove it from the + * IDR before releasing the mutex. This will guarantee that + * the refcount_inc() in tee_shm_get_from_id() never starts + * from 0. + */ + if (shm->flags & TEE_SHM_DMA_BUF) + idr_remove(&teedev->idr, shm->id); + do_release = true; + } + mutex_unlock(&teedev->mutex); + + if (do_release) + tee_shm_release(teedev, shm); } EXPORT_SYMBOL_GPL(tee_shm_put); diff --git a/drivers/thermal/cpufreq_cooling.c b/drivers/thermal/cpufreq_cooling.c index 6e1d6a31ee4fbde6c5bf59de1e9396547c2aa102..aee7ad4af654156c68f47452a11f5dd990bf724a 100644 --- a/drivers/thermal/cpufreq_cooling.c +++ b/drivers/thermal/cpufreq_cooling.c @@ -23,6 +23,7 @@ #include #include +#include /* * Cooling state <-> CPUFreq frequency @@ -213,6 +214,8 @@ static int cpufreq_get_requested_power(struct thermal_cooling_device *cdev, freq = cpufreq_quick_get(policy->cpu); + trace_android_vh_modify_thermal_request_freq(policy, &freq); + if (trace_thermal_power_cpu_get_power_enabled()) { u32 ncpus = cpumask_weight(policy->related_cpus); @@ -312,6 +315,8 @@ static int cpufreq_power2state(struct thermal_cooling_device *cdev, normalised_power = (power * 100) / last_load; target_freq = cpu_power_to_freq(cpufreq_cdev, normalised_power); + trace_android_vh_modify_thermal_target_freq(policy, &target_freq); + *state = get_level(cpufreq_cdev, target_freq); trace_thermal_power_cpu_limit(policy->related_cpus, target_freq, *state, power); diff --git a/drivers/thermal/gov_power_allocator.c b/drivers/thermal/gov_power_allocator.c index ab0be26f0816960aff154d33494cc7d0ebd1b0f0..e9504d20b93033a99a897662ef15df44d259d96b 100644 --- a/drivers/thermal/gov_power_allocator.c +++ b/drivers/thermal/gov_power_allocator.c @@ -14,6 +14,8 @@ #define CREATE_TRACE_POINTS #include +#undef CREATE_TRACE_POINTS +#include #include "thermal_core.h" @@ -409,6 +411,7 @@ static int allocate_power(struct thermal_zone_device *tz, } power_range = pid_controller(tz, control_temp, max_allocatable_power); + trace_android_vh_thermal_power_cap(&power_range); divvy_up_power(weighted_req_power, max_power, num_actors, total_weighted_req_power, power_range, granted_power, @@ -607,6 +610,7 @@ static int power_allocator_throttle(struct thermal_zone_device *tz, int trip) int ret; int switch_on_temp, control_temp; struct power_allocator_params *params = tz->governor_data; + int enable = 1, override = 0; /* * We get called for every trip point but we only need to do @@ -615,9 +619,12 @@ static int power_allocator_throttle(struct thermal_zone_device *tz, int trip) if (trip != params->trip_max_desired_temperature) return 0; - ret = tz->ops->get_trip_temp(tz, params->trip_switch_on, - &switch_on_temp); - if (!ret && (tz->temperature < switch_on_temp)) { + trace_android_vh_enable_thermal_power_throttle(&enable, &override); + if (enable) + ret = tz->ops->get_trip_temp(tz, params->trip_switch_on, + &switch_on_temp); + if (!enable || (!ret && (tz->temperature < switch_on_temp) && + !override)) { tz->passive = 0; reset_pid_controller(params); allow_maximum_power(tz); diff --git a/drivers/thermal/imx8mm_thermal.c b/drivers/thermal/imx8mm_thermal.c index a1e4f9bb4cb01e7eb17538d1597759c8ec1ae557..0f4cabd2a8c6247f6ccd152cbec2f306d13efcb7 100644 --- a/drivers/thermal/imx8mm_thermal.c +++ b/drivers/thermal/imx8mm_thermal.c @@ -21,6 +21,7 @@ #define TPS 0x4 #define TRITSR 0x20 /* TMU immediate temp */ +#define TER_ADC_PD BIT(30) #define TER_EN BIT(31) #define TRITSR_TEMP0_VAL_MASK 0xff #define TRITSR_TEMP1_VAL_MASK 0xff0000 @@ -113,6 +114,8 @@ static void imx8mm_tmu_enable(struct imx8mm_tmu *tmu, bool enable) val = readl_relaxed(tmu->base + TER); val = enable ? (val | TER_EN) : (val & ~TER_EN); + if (tmu->socdata->version == TMU_VER2) + val = enable ? (val & ~TER_ADC_PD) : (val | TER_ADC_PD); writel_relaxed(val, tmu->base + TER); } diff --git a/drivers/thermal/imx_thermal.c b/drivers/thermal/imx_thermal.c index 2c7473d86a59b013b43872777dfa5d7011152e6c..16663373b6829999971f5210f27193131fbf3a37 100644 --- a/drivers/thermal/imx_thermal.c +++ b/drivers/thermal/imx_thermal.c @@ -15,6 +15,7 @@ #include #include #include +#include #define REG_SET 0x4 #define REG_CLR 0x8 @@ -194,6 +195,7 @@ static struct thermal_soc_data thermal_imx7d_data = { }; struct imx_thermal_data { + struct device *dev; struct cpufreq_policy *policy; struct thermal_zone_device *tz; struct thermal_cooling_device *cdev; @@ -252,44 +254,15 @@ static int imx_get_temp(struct thermal_zone_device *tz, int *temp) const struct thermal_soc_data *soc_data = data->socdata; struct regmap *map = data->tempmon; unsigned int n_meas; - bool wait, run_measurement; u32 val; + int ret; - run_measurement = !data->irq_enabled; - if (!run_measurement) { - /* Check if a measurement is currently in progress */ - regmap_read(map, soc_data->temp_data, &val); - wait = !(val & soc_data->temp_valid_mask); - } else { - /* - * Every time we measure the temperature, we will power on the - * temperature sensor, enable measurements, take a reading, - * disable measurements, power off the temperature sensor. - */ - regmap_write(map, soc_data->sensor_ctrl + REG_CLR, - soc_data->power_down_mask); - regmap_write(map, soc_data->sensor_ctrl + REG_SET, - soc_data->measure_temp_mask); - - wait = true; - } - - /* - * According to the temp sensor designers, it may require up to ~17us - * to complete a measurement. - */ - if (wait) - usleep_range(20, 50); + ret = pm_runtime_resume_and_get(data->dev); + if (ret < 0) + return ret; regmap_read(map, soc_data->temp_data, &val); - if (run_measurement) { - regmap_write(map, soc_data->sensor_ctrl + REG_CLR, - soc_data->measure_temp_mask); - regmap_write(map, soc_data->sensor_ctrl + REG_SET, - soc_data->power_down_mask); - } - if ((val & soc_data->temp_valid_mask) == 0) { dev_dbg(&tz->device, "temp measurement never finished\n"); return -EAGAIN; @@ -328,6 +301,8 @@ static int imx_get_temp(struct thermal_zone_device *tz, int *temp) enable_irq(data->irq); } + pm_runtime_put(data->dev); + return 0; } @@ -335,24 +310,16 @@ static int imx_change_mode(struct thermal_zone_device *tz, enum thermal_device_mode mode) { struct imx_thermal_data *data = tz->devdata; - struct regmap *map = data->tempmon; - const struct thermal_soc_data *soc_data = data->socdata; if (mode == THERMAL_DEVICE_ENABLED) { - regmap_write(map, soc_data->sensor_ctrl + REG_CLR, - soc_data->power_down_mask); - regmap_write(map, soc_data->sensor_ctrl + REG_SET, - soc_data->measure_temp_mask); + pm_runtime_get(data->dev); if (!data->irq_enabled) { data->irq_enabled = true; enable_irq(data->irq); } } else { - regmap_write(map, soc_data->sensor_ctrl + REG_CLR, - soc_data->measure_temp_mask); - regmap_write(map, soc_data->sensor_ctrl + REG_SET, - soc_data->power_down_mask); + pm_runtime_put(data->dev); if (data->irq_enabled) { disable_irq(data->irq); @@ -393,6 +360,11 @@ static int imx_set_trip_temp(struct thermal_zone_device *tz, int trip, int temp) { struct imx_thermal_data *data = tz->devdata; + int ret; + + ret = pm_runtime_resume_and_get(data->dev); + if (ret < 0) + return ret; /* do not allow changing critical threshold */ if (trip == IMX_TRIP_CRITICAL) @@ -406,6 +378,8 @@ static int imx_set_trip_temp(struct thermal_zone_device *tz, int trip, imx_set_alarm_temp(data, temp); + pm_runtime_put(data->dev); + return 0; } @@ -681,6 +655,8 @@ static int imx_thermal_probe(struct platform_device *pdev) if (!data) return -ENOMEM; + data->dev = &pdev->dev; + map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node, "fsl,tempmon"); if (IS_ERR(map)) { ret = PTR_ERR(map); @@ -800,6 +776,16 @@ static int imx_thermal_probe(struct platform_device *pdev) data->socdata->power_down_mask); regmap_write(map, data->socdata->sensor_ctrl + REG_SET, data->socdata->measure_temp_mask); + /* After power up, we need a delay before first access can be done. */ + usleep_range(20, 50); + + /* the core was configured and enabled just before */ + pm_runtime_set_active(&pdev->dev); + pm_runtime_enable(data->dev); + + ret = pm_runtime_resume_and_get(data->dev); + if (ret < 0) + goto disable_runtime_pm; data->irq_enabled = true; ret = thermal_zone_device_enable(data->tz); @@ -814,10 +800,15 @@ static int imx_thermal_probe(struct platform_device *pdev) goto thermal_zone_unregister; } + pm_runtime_put(data->dev); + return 0; thermal_zone_unregister: thermal_zone_device_unregister(data->tz); +disable_runtime_pm: + pm_runtime_put_noidle(data->dev); + pm_runtime_disable(data->dev); clk_disable: clk_disable_unprepare(data->thermal_clk); legacy_cleanup: @@ -829,13 +820,9 @@ legacy_cleanup: static int imx_thermal_remove(struct platform_device *pdev) { struct imx_thermal_data *data = platform_get_drvdata(pdev); - struct regmap *map = data->tempmon; - /* Disable measurements */ - regmap_write(map, data->socdata->sensor_ctrl + REG_SET, - data->socdata->power_down_mask); - if (!IS_ERR(data->thermal_clk)) - clk_disable_unprepare(data->thermal_clk); + pm_runtime_put_noidle(data->dev); + pm_runtime_disable(data->dev); thermal_zone_device_unregister(data->tz); imx_thermal_unregister_legacy_cooling(data); @@ -858,29 +845,79 @@ static int __maybe_unused imx_thermal_suspend(struct device *dev) ret = thermal_zone_device_disable(data->tz); if (ret) return ret; + + return pm_runtime_force_suspend(data->dev); +} + +static int __maybe_unused imx_thermal_resume(struct device *dev) +{ + struct imx_thermal_data *data = dev_get_drvdata(dev); + int ret; + + ret = pm_runtime_force_resume(data->dev); + if (ret) + return ret; + /* Enabled thermal sensor after resume */ + return thermal_zone_device_enable(data->tz); +} + +static int __maybe_unused imx_thermal_runtime_suspend(struct device *dev) +{ + struct imx_thermal_data *data = dev_get_drvdata(dev); + const struct thermal_soc_data *socdata = data->socdata; + struct regmap *map = data->tempmon; + int ret; + + ret = regmap_write(map, socdata->sensor_ctrl + REG_CLR, + socdata->measure_temp_mask); + if (ret) + return ret; + + ret = regmap_write(map, socdata->sensor_ctrl + REG_SET, + socdata->power_down_mask); + if (ret) + return ret; + clk_disable_unprepare(data->thermal_clk); return 0; } -static int __maybe_unused imx_thermal_resume(struct device *dev) +static int __maybe_unused imx_thermal_runtime_resume(struct device *dev) { struct imx_thermal_data *data = dev_get_drvdata(dev); + const struct thermal_soc_data *socdata = data->socdata; + struct regmap *map = data->tempmon; int ret; ret = clk_prepare_enable(data->thermal_clk); if (ret) return ret; - /* Enabled thermal sensor after resume */ - ret = thermal_zone_device_enable(data->tz); + + ret = regmap_write(map, socdata->sensor_ctrl + REG_CLR, + socdata->power_down_mask); + if (ret) + return ret; + + ret = regmap_write(map, socdata->sensor_ctrl + REG_SET, + socdata->measure_temp_mask); if (ret) return ret; + /* + * According to the temp sensor designers, it may require up to ~17us + * to complete a measurement. + */ + usleep_range(20, 50); + return 0; } -static SIMPLE_DEV_PM_OPS(imx_thermal_pm_ops, - imx_thermal_suspend, imx_thermal_resume); +static const struct dev_pm_ops imx_thermal_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(imx_thermal_suspend, imx_thermal_resume) + SET_RUNTIME_PM_OPS(imx_thermal_runtime_suspend, + imx_thermal_runtime_resume, NULL) +}; static struct platform_driver imx_thermal = { .driver = { diff --git a/drivers/thermal/intel/int340x_thermal/int3400_thermal.c b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c index 0966551cbaaa0aad9259c39ee6a91b745a7af406..793d7b58fc6501b80b77bb5b42ee6439f9955e69 100644 --- a/drivers/thermal/intel/int340x_thermal/int3400_thermal.c +++ b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c @@ -402,6 +402,10 @@ static void int3400_notify(acpi_handle handle, thermal_prop[3] = kasprintf(GFP_KERNEL, "EVENT=%d", therm_event); thermal_prop[4] = NULL; kobject_uevent_env(&priv->thermal->device.kobj, KOBJ_CHANGE, thermal_prop); + kfree(thermal_prop[0]); + kfree(thermal_prop[1]); + kfree(thermal_prop[2]); + kfree(thermal_prop[3]); } static int int3400_thermal_get_temp(struct thermal_zone_device *thermal, diff --git a/drivers/thermal/qcom/tsens.c b/drivers/thermal/qcom/tsens.c index 3c4c0516e58ab58c86071c85f8c751582873d448..cb4f4b52244603026b98a2fb982f58cf6dc48d6c 100644 --- a/drivers/thermal/qcom/tsens.c +++ b/drivers/thermal/qcom/tsens.c @@ -415,7 +415,7 @@ static irqreturn_t tsens_critical_irq_thread(int irq, void *data) const struct tsens_sensor *s = &priv->sensor[i]; u32 hw_id = s->hw_id; - if (IS_ERR(s->tzd)) + if (!s->tzd) continue; if (!tsens_threshold_violated(priv, hw_id, &d)) continue; @@ -465,7 +465,7 @@ static irqreturn_t tsens_irq_thread(int irq, void *data) const struct tsens_sensor *s = &priv->sensor[i]; u32 hw_id = s->hw_id; - if (IS_ERR(s->tzd)) + if (!s->tzd) continue; if (!tsens_threshold_violated(priv, hw_id, &d)) continue; diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index c02dadc0f0059c3699dc9640eb13e20a37250b03..070cbb34fdfa95881673464a047058b78f97ea27 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -476,6 +476,8 @@ static void thermal_zone_device_init(struct thermal_zone_device *tz) { struct thermal_instance *pos; tz->temperature = THERMAL_TEMP_INVALID; + tz->prev_low_trip = -INT_MAX; + tz->prev_high_trip = INT_MAX; list_for_each_entry(pos, &tz->thermal_instances, tz_node) pos->initialized = false; } @@ -566,6 +568,8 @@ void thermal_zone_device_update(struct thermal_zone_device *tz, for (count = 0; count < tz->trips; count++) handle_thermal_trip(tz, count); + + trace_android_vh_get_thermal_zone_device(tz); } EXPORT_SYMBOL_GPL(thermal_zone_device_update); diff --git a/drivers/thermal/thermal_netlink.c b/drivers/thermal/thermal_netlink.c index c82dba5ee23150b6fc115071fa5e341ab787dab3..ca1054bdba10baee136c22855bae7a9c1fd34bf7 100644 --- a/drivers/thermal/thermal_netlink.c +++ b/drivers/thermal/thermal_netlink.c @@ -424,11 +424,12 @@ static int thermal_genl_cmd_tz_get_trip(struct param *p) for (i = 0; i < tz->trips; i++) { enum thermal_trip_type type; - int temp, hyst; + int temp, hyst = 0; tz->ops->get_trip_type(tz, i, &type); tz->ops->get_trip_temp(tz, i, &temp); - tz->ops->get_trip_hyst(tz, i, &hyst); + if (tz->ops->get_trip_hyst) + tz->ops->get_trip_hyst(tz, i, &hyst); if (nla_put_u32(msg, THERMAL_GENL_ATTR_TZ_TRIP_ID, i) || nla_put_u32(msg, THERMAL_GENL_ATTR_TZ_TRIP_TYPE, type) || diff --git a/drivers/thermal/thermal_of.c b/drivers/thermal/thermal_of.c index 6379f26a335f64c1e854cfd0cb45d58c3a2b6d3e..9233f7e744544641a8361bdc8bb6da6f071ded4e 100644 --- a/drivers/thermal/thermal_of.c +++ b/drivers/thermal/thermal_of.c @@ -89,7 +89,7 @@ static int of_thermal_get_temp(struct thermal_zone_device *tz, { struct __thermal_zone *data = tz->devdata; - if (!data->ops->get_temp) + if (!data->ops || !data->ops->get_temp) return -EINVAL; return data->ops->get_temp(data->sensor_data, temp); @@ -186,6 +186,9 @@ static int of_thermal_set_emul_temp(struct thermal_zone_device *tz, { struct __thermal_zone *data = tz->devdata; + if (!data->ops || !data->ops->set_emul_temp) + return -EINVAL; + return data->ops->set_emul_temp(data->sensor_data, temp); } @@ -194,7 +197,7 @@ static int of_thermal_get_trend(struct thermal_zone_device *tz, int trip, { struct __thermal_zone *data = tz->devdata; - if (!data->ops->get_trend) + if (!data->ops || !data->ops->get_trend) return -EINVAL; return data->ops->get_trend(data->sensor_data, trip, trend); @@ -301,7 +304,7 @@ static int of_thermal_set_trip_temp(struct thermal_zone_device *tz, int trip, if (trip >= data->ntrips || trip < 0) return -EDOM; - if (data->ops->set_trip_temp) { + if (data->ops && data->ops->set_trip_temp) { int ret; ret = data->ops->set_trip_temp(data->sensor_data, trip, temp); diff --git a/drivers/thunderbolt/acpi.c b/drivers/thunderbolt/acpi.c index b5442f979b4d0170950d9ded3d3dbe9e3066d6da..6355fdf7d71a3eef91b5422401bbe176bfb8e4e0 100644 --- a/drivers/thunderbolt/acpi.c +++ b/drivers/thunderbolt/acpi.c @@ -7,6 +7,7 @@ */ #include +#include #include "tb.h" @@ -74,8 +75,18 @@ static acpi_status tb_acpi_add_link(acpi_handle handle, u32 level, void *data, pci_pcie_type(pdev) == PCI_EXP_TYPE_DOWNSTREAM))) { const struct device_link *link; + /* + * Make them both active first to make sure the NHI does + * not runtime suspend before the consumer. The + * pm_runtime_put() below then allows the consumer to + * runtime suspend again (which then allows NHI runtime + * suspend too now that the device link is established). + */ + pm_runtime_get_sync(&pdev->dev); + link = device_link_add(&pdev->dev, &nhi->pdev->dev, DL_FLAG_AUTOREMOVE_SUPPLIER | + DL_FLAG_RPM_ACTIVE | DL_FLAG_PM_RUNTIME); if (link) { dev_dbg(&nhi->pdev->dev, "created link from %s\n", @@ -84,6 +95,8 @@ static acpi_status tb_acpi_add_link(acpi_handle handle, u32 level, void *data, dev_warn(&nhi->pdev->dev, "device link creation from %s failed\n", dev_name(&pdev->dev)); } + + pm_runtime_put(&pdev->dev); } out_put: diff --git a/drivers/tty/hvc/hvc_xen.c b/drivers/tty/hvc/hvc_xen.c index 92c9a476defc998f5cf8dfd6185127cf68b479b5..7948660e042fdf376d780398d839b2dbe129f145 100644 --- a/drivers/tty/hvc/hvc_xen.c +++ b/drivers/tty/hvc/hvc_xen.c @@ -37,6 +37,8 @@ struct xencons_info { struct xenbus_device *xbdev; struct xencons_interface *intf; unsigned int evtchn; + XENCONS_RING_IDX out_cons; + unsigned int out_cons_same; struct hvc_struct *hvc; int irq; int vtermno; @@ -86,7 +88,11 @@ static int __write_console(struct xencons_info *xencons, cons = intf->out_cons; prod = intf->out_prod; mb(); /* update queue values before going on */ - BUG_ON((prod - cons) > sizeof(intf->out)); + + if ((prod - cons) > sizeof(intf->out)) { + pr_err_once("xencons: Illegal ring page indices"); + return -EINVAL; + } while ((sent < len) && ((prod - cons) < sizeof(intf->out))) intf->out[MASK_XENCONS_IDX(prod++, intf->out)] = data[sent++]; @@ -114,7 +120,10 @@ static int domU_write_console(uint32_t vtermno, const char *data, int len) */ while (len) { int sent = __write_console(cons, data, len); - + + if (sent < 0) + return sent; + data += sent; len -= sent; @@ -131,6 +140,8 @@ static int domU_read_console(uint32_t vtermno, char *buf, int len) XENCONS_RING_IDX cons, prod; int recv = 0; struct xencons_info *xencons = vtermno_to_xencons(vtermno); + unsigned int eoiflag = 0; + if (xencons == NULL) return -EINVAL; intf = xencons->intf; @@ -138,7 +149,11 @@ static int domU_read_console(uint32_t vtermno, char *buf, int len) cons = intf->in_cons; prod = intf->in_prod; mb(); /* get pointers before reading ring */ - BUG_ON((prod - cons) > sizeof(intf->in)); + + if ((prod - cons) > sizeof(intf->in)) { + pr_err_once("xencons: Illegal ring page indices"); + return -EINVAL; + } while (cons != prod && recv < len) buf[recv++] = intf->in[MASK_XENCONS_IDX(cons++, intf->in)]; @@ -146,7 +161,27 @@ static int domU_read_console(uint32_t vtermno, char *buf, int len) mb(); /* read ring before consuming */ intf->in_cons = cons; - notify_daemon(xencons); + /* + * When to mark interrupt having been spurious: + * - there was no new data to be read, and + * - the backend did not consume some output bytes, and + * - the previous round with no read data didn't see consumed bytes + * (we might have a race with an interrupt being in flight while + * updating xencons->out_cons, so account for that by allowing one + * round without any visible reason) + */ + if (intf->out_cons != xencons->out_cons) { + xencons->out_cons = intf->out_cons; + xencons->out_cons_same = 0; + } + if (recv) { + notify_daemon(xencons); + } else if (xencons->out_cons_same++ > 1) { + eoiflag = XEN_EOI_FLAG_SPURIOUS; + } + + xen_irq_lateeoi(xencons->irq, eoiflag); + return recv; } @@ -375,7 +410,7 @@ static int xencons_connect_backend(struct xenbus_device *dev, if (ret) return ret; info->evtchn = evtchn; - irq = bind_evtchn_to_irq(evtchn); + irq = bind_interdomain_evtchn_to_irq_lateeoi(dev->otherend_id, evtchn); if (irq < 0) return irq; info->irq = irq; @@ -539,7 +574,7 @@ static int __init xen_hvc_init(void) return r; info = vtermno_to_xencons(HVC_COOKIE); - info->irq = bind_evtchn_to_irq(info->evtchn); + info->irq = bind_evtchn_to_irq_lateeoi(info->evtchn); } if (info->irq < 0) info->irq = 0; /* NO_IRQ */ diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c index d76880ae68c83be740ee77277955ed43986df9af..05562b3cca451a9bf5549a9edb6c3e9530653797 100644 --- a/drivers/tty/n_gsm.c +++ b/drivers/tty/n_gsm.c @@ -317,6 +317,7 @@ static struct tty_driver *gsm_tty_driver; #define GSM1_ESCAPE_BITS 0x20 #define XON 0x11 #define XOFF 0x13 +#define ISO_IEC_646_MASK 0x7F static const struct tty_port_operations gsm_port_ops; @@ -433,7 +434,7 @@ static u8 gsm_encode_modem(const struct gsm_dlci *dlci) modembits |= MDM_RTR; if (dlci->modem_tx & TIOCM_RI) modembits |= MDM_IC; - if (dlci->modem_tx & TIOCM_CD) + if (dlci->modem_tx & TIOCM_CD || dlci->gsm->initiator) modembits |= MDM_DV; return modembits; } @@ -526,7 +527,8 @@ static int gsm_stuff_frame(const u8 *input, u8 *output, int len) int olen = 0; while (len--) { if (*input == GSM1_SOF || *input == GSM1_ESCAPE - || *input == XON || *input == XOFF) { + || (*input & ISO_IEC_646_MASK) == XON + || (*input & ISO_IEC_646_MASK) == XOFF) { *output++ = GSM1_ESCAPE; *output++ = *input++ ^ GSM1_ESCAPE_BITS; olen++; @@ -1424,6 +1426,9 @@ static void gsm_dlci_close(struct gsm_dlci *dlci) if (dlci->addr != 0) { tty_port_tty_hangup(&dlci->port, false); kfifo_reset(&dlci->fifo); + /* Ensure that gsmtty_open() can return. */ + tty_port_set_initialized(&dlci->port, 0); + wake_up_interruptible(&dlci->port.open_wait); } else dlci->gsm->dead = true; wake_up(&dlci->gsm->event); @@ -1483,7 +1488,7 @@ static void gsm_dlci_t1(struct timer_list *t) dlci->mode = DLCI_MODE_ADM; gsm_dlci_open(dlci); } else { - gsm_dlci_close(dlci); + gsm_dlci_begin_close(dlci); /* prevent half open link */ } break; @@ -1717,7 +1722,12 @@ static void gsm_dlci_release(struct gsm_dlci *dlci) gsm_destroy_network(dlci); mutex_unlock(&dlci->mutex); - tty_hangup(tty); + /* We cannot use tty_hangup() because in tty_kref_put() the tty + * driver assumes that the hangup queue is free and reuses it to + * queue release_one_tty() -> NULL pointer panic in + * process_one_work(). + */ + tty_vhangup(tty); tty_port_tty_set(&dlci->port, NULL); tty_kref_put(tty); @@ -3171,9 +3181,9 @@ static void gsmtty_throttle(struct tty_struct *tty) if (dlci->state == DLCI_CLOSED) return; if (C_CRTSCTS(tty)) - dlci->modem_tx &= ~TIOCM_DTR; + dlci->modem_tx &= ~TIOCM_RTS; dlci->throttled = true; - /* Send an MSC with DTR cleared */ + /* Send an MSC with RTS cleared */ gsmtty_modem_update(dlci, 0); } @@ -3183,9 +3193,9 @@ static void gsmtty_unthrottle(struct tty_struct *tty) if (dlci->state == DLCI_CLOSED) return; if (C_CRTSCTS(tty)) - dlci->modem_tx |= TIOCM_DTR; + dlci->modem_tx |= TIOCM_RTS; dlci->throttled = false; - /* Send an MSC with DTR set */ + /* Send an MSC with RTS set */ gsmtty_modem_update(dlci, 0); } diff --git a/drivers/tty/n_hdlc.c b/drivers/tty/n_hdlc.c index 1363e659dc1dbce8f08983e0f1c439555640006b..48c64e68017cdafcb1859ce595413d4583379af7 100644 --- a/drivers/tty/n_hdlc.c +++ b/drivers/tty/n_hdlc.c @@ -139,6 +139,8 @@ struct n_hdlc { struct n_hdlc_buf_list rx_buf_list; struct n_hdlc_buf_list tx_free_buf_list; struct n_hdlc_buf_list rx_free_buf_list; + struct work_struct write_work; + struct tty_struct *tty_for_write_work; }; /* @@ -153,6 +155,7 @@ static struct n_hdlc_buf *n_hdlc_buf_get(struct n_hdlc_buf_list *list); /* Local functions */ static struct n_hdlc *n_hdlc_alloc(void); +static void n_hdlc_tty_write_work(struct work_struct *work); /* max frame size for memory allocations */ static int maxframe = 4096; @@ -209,6 +212,8 @@ static void n_hdlc_tty_close(struct tty_struct *tty) wake_up_interruptible(&tty->read_wait); wake_up_interruptible(&tty->write_wait); + cancel_work_sync(&n_hdlc->write_work); + n_hdlc_free_buf_list(&n_hdlc->rx_free_buf_list); n_hdlc_free_buf_list(&n_hdlc->tx_free_buf_list); n_hdlc_free_buf_list(&n_hdlc->rx_buf_list); @@ -240,6 +245,8 @@ static int n_hdlc_tty_open(struct tty_struct *tty) return -ENFILE; } + INIT_WORK(&n_hdlc->write_work, n_hdlc_tty_write_work); + n_hdlc->tty_for_write_work = tty; tty->disc_data = n_hdlc; tty->receive_room = 65536; @@ -333,6 +340,20 @@ check_again: goto check_again; } /* end of n_hdlc_send_frames() */ +/** + * n_hdlc_tty_write_work - Asynchronous callback for transmit wakeup + * @work: pointer to work_struct + * + * Called when low level device driver can accept more send data. + */ +static void n_hdlc_tty_write_work(struct work_struct *work) +{ + struct n_hdlc *n_hdlc = container_of(work, struct n_hdlc, write_work); + struct tty_struct *tty = n_hdlc->tty_for_write_work; + + n_hdlc_send_frames(n_hdlc, tty); +} /* end of n_hdlc_tty_write_work() */ + /** * n_hdlc_tty_wakeup - Callback for transmit wakeup * @tty: pointer to associated tty instance data @@ -343,7 +364,7 @@ static void n_hdlc_tty_wakeup(struct tty_struct *tty) { struct n_hdlc *n_hdlc = tty->disc_data; - n_hdlc_send_frames(n_hdlc, tty); + schedule_work(&n_hdlc->write_work); } /* end of n_hdlc_tty_wakeup() */ /** diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c index e4f4b2186bcecce1467d37c1221ac1353610a882..58190135efb7dd3088f9895cdceb1e01a646d7c8 100644 --- a/drivers/tty/n_tty.c +++ b/drivers/tty/n_tty.c @@ -1372,7 +1372,7 @@ handle_newline: put_tty_queue(c, ldata); smp_store_release(&ldata->canon_head, ldata->read_head); kill_fasync(&tty->fasync, SIGIO, POLL_IN); - wake_up_interruptible_poll(&tty->read_wait, EPOLLIN); + wake_up_interruptible_poll(&tty->read_wait, EPOLLIN | EPOLLRDNORM); return 0; } } @@ -1653,7 +1653,7 @@ static void __receive_buf(struct tty_struct *tty, const unsigned char *cp, if (read_cnt(ldata)) { kill_fasync(&tty->fasync, SIGIO, POLL_IN); - wake_up_interruptible_poll(&tty->read_wait, EPOLLIN); + wake_up_interruptible_poll(&tty->read_wait, EPOLLIN | EPOLLRDNORM); } } @@ -2024,7 +2024,7 @@ static bool canon_copy_from_read_buf(struct tty_struct *tty, return false; canon_head = smp_load_acquire(&ldata->canon_head); - n = min(*nr + 1, canon_head - ldata->read_tail); + n = min(*nr, canon_head - ldata->read_tail); tail = ldata->read_tail & (N_TTY_BUF_SIZE - 1); size = min_t(size_t, tail + n, N_TTY_BUF_SIZE); @@ -2046,10 +2046,8 @@ static bool canon_copy_from_read_buf(struct tty_struct *tty, n += N_TTY_BUF_SIZE; c = n + found; - if (!found || read_buf(ldata, eol) != __DISABLED_CHAR) { - c = min(*nr, c); + if (!found || read_buf(ldata, eol) != __DISABLED_CHAR) n = c; - } n_tty_trace("%s: eol:%zu found:%d n:%zu c:%zu tail:%zu more:%zu\n", __func__, eol, found, n, c, tail, more); diff --git a/drivers/tty/serial/8250/8250_dw.c b/drivers/tty/serial/8250/8250_dw.c index a3a0154da567da4a37fb9cc7a8fb1ba631b64bc8..49559731bbcf1cf595fef5b787d528afa513889c 100644 --- a/drivers/tty/serial/8250/8250_dw.c +++ b/drivers/tty/serial/8250/8250_dw.c @@ -726,7 +726,7 @@ static struct platform_driver dw8250_platform_driver = { .name = "dw-apb-uart", .pm = &dw8250_pm_ops, .of_match_table = dw8250_of_match, - .acpi_match_table = ACPI_PTR(dw8250_acpi_match), + .acpi_match_table = dw8250_acpi_match, }, .probe = dw8250_probe, .remove = dw8250_remove, diff --git a/drivers/tty/serial/8250/8250_fintek.c b/drivers/tty/serial/8250/8250_fintek.c index 31c9e83ea3cb2c5bcf62aea19e8957d7fff284a4..251f0018ae8cad4874aab6e0e9a58dd6f257ba1d 100644 --- a/drivers/tty/serial/8250/8250_fintek.c +++ b/drivers/tty/serial/8250/8250_fintek.c @@ -290,25 +290,6 @@ static void fintek_8250_set_max_fifo(struct fintek_8250 *pdata) } } -static void fintek_8250_goto_highspeed(struct uart_8250_port *uart, - struct fintek_8250 *pdata) -{ - sio_write_reg(pdata, LDN, pdata->index); - - switch (pdata->pid) { - case CHIP_ID_F81966: - case CHIP_ID_F81866: /* set uart clock for high speed serial mode */ - sio_write_mask_reg(pdata, F81866_UART_CLK, - F81866_UART_CLK_MASK, - F81866_UART_CLK_14_769MHZ); - - uart->port.uartclk = 921600 * 16; - break; - default: /* leave clock speed untouched */ - break; - } -} - static void fintek_8250_set_termios(struct uart_port *port, struct ktermios *termios, struct ktermios *old) @@ -430,7 +411,6 @@ static int probe_setup_port(struct fintek_8250 *pdata, fintek_8250_set_irq_mode(pdata, level_mode); fintek_8250_set_max_fifo(pdata); - fintek_8250_goto_highspeed(uart, pdata); fintek_8250_exit_key(addr[i]); diff --git a/drivers/tty/serial/8250/8250_gsc.c b/drivers/tty/serial/8250/8250_gsc.c index 673cda3d011d0c64c0c71a19dfa3052447d21c4f..948d0a1c6ae8edc91eefaadb5037f4a311d12509 100644 --- a/drivers/tty/serial/8250/8250_gsc.c +++ b/drivers/tty/serial/8250/8250_gsc.c @@ -26,7 +26,7 @@ static int __init serial_init_chip(struct parisc_device *dev) unsigned long address; int err; -#ifdef CONFIG_64BIT +#if defined(CONFIG_64BIT) && defined(CONFIG_IOSAPIC) if (!dev->irq && (dev->id.sversion == 0xad)) dev->irq = iosapic_serial_irq(dev); #endif diff --git a/drivers/tty/serial/8250/8250_of.c b/drivers/tty/serial/8250/8250_of.c index 65e9045dafe6d16a38260a4ee4dc23dc1026ab28..5595c63c46eaf33bc4fd4d0ef0360a8f0b3b6ea7 100644 --- a/drivers/tty/serial/8250/8250_of.c +++ b/drivers/tty/serial/8250/8250_of.c @@ -83,8 +83,17 @@ static int of_platform_serial_setup(struct platform_device *ofdev, port->mapsize = resource_size(&resource); /* Check for shifted address mapping */ - if (of_property_read_u32(np, "reg-offset", &prop) == 0) + if (of_property_read_u32(np, "reg-offset", &prop) == 0) { + if (prop >= port->mapsize) { + dev_warn(&ofdev->dev, "reg-offset %u exceeds region size %pa\n", + prop, &port->mapsize); + ret = -EINVAL; + goto err_unprepare; + } + port->mapbase += prop; + port->mapsize -= prop; + } port->iotype = UPIO_MEM; if (of_property_read_u32(np, "reg-io-width", &prop) == 0) { diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c index 58f718ed1bb98396665da50c3e1f41959a3e8587..3a985e953b8e904f69da97f163af86010e85c544 100644 --- a/drivers/tty/serial/8250/8250_pci.c +++ b/drivers/tty/serial/8250/8250_pci.c @@ -1349,29 +1349,33 @@ pericom_do_set_divisor(struct uart_port *port, unsigned int baud, { int scr; int lcr; - int actual_baud; - int tolerance; - for (scr = 5 ; scr <= 15 ; scr++) { - actual_baud = 921600 * 16 / scr; - tolerance = actual_baud / 50; + for (scr = 16; scr > 4; scr--) { + unsigned int maxrate = port->uartclk / scr; + unsigned int divisor = max(maxrate / baud, 1U); + int delta = maxrate / divisor - baud; - if ((baud < actual_baud + tolerance) && - (baud > actual_baud - tolerance)) { + if (baud > maxrate + baud / 50) + continue; + if (delta > baud / 50) + divisor++; + + if (divisor > 0xffff) + continue; + + /* Update delta due to possible divisor change */ + delta = maxrate / divisor - baud; + if (abs(delta) < baud / 50) { lcr = serial_port_in(port, UART_LCR); serial_port_out(port, UART_LCR, lcr | 0x80); - - serial_port_out(port, UART_DLL, 1); - serial_port_out(port, UART_DLM, 0); + serial_port_out(port, UART_DLL, divisor & 0xff); + serial_port_out(port, UART_DLM, divisor >> 8 & 0xff); serial_port_out(port, 2, 16 - scr); serial_port_out(port, UART_LCR, lcr); return; - } else if (baud > actual_baud) { - break; } } - serial8250_do_set_divisor(port, baud, quot, quot_frac); } static int pci_pericom_setup(struct serial_private *priv, const struct pciserial_board *board, @@ -2317,12 +2321,19 @@ static struct pci_serial_quirk pci_serial_quirks[] __refdata = { .setup = pci_pericom_setup_four_at_eight, }, { - .vendor = PCI_DEVICE_ID_ACCESIO_PCIE_ICM_4S, + .vendor = PCI_VENDOR_ID_ACCESIO, .device = PCI_DEVICE_ID_ACCESIO_PCIE_ICM232_4, .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID, .setup = pci_pericom_setup_four_at_eight, }, + { + .vendor = PCI_VENDOR_ID_ACCESIO, + .device = PCI_DEVICE_ID_ACCESIO_PCIE_ICM_4S, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .setup = pci_pericom_setup_four_at_eight, + }, { .vendor = PCI_VENDOR_ID_ACCESIO, .device = PCI_DEVICE_ID_ACCESIO_MPCIE_ICM232_4, @@ -5160,8 +5171,30 @@ static const struct pci_device_id serial_pci_tbl[] = { { PCI_VENDOR_ID_INTASHIELD, PCI_DEVICE_ID_INTASHIELD_IS400, PCI_ANY_ID, PCI_ANY_ID, 0, 0, /* 135a.0dc0 */ pbn_b2_4_115200 }, + /* Brainboxes Devices */ + /* + * Brainboxes UC-101 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x0BA1, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_b2_2_115200 }, + /* + * Brainboxes UC-235/246 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x0AA1, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_b2_1_115200 }, + /* + * Brainboxes UC-257 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x0861, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_b2_2_115200 }, /* - * BrainBoxes UC-260 + * Brainboxes UC-260/271/701/756 */ { PCI_VENDOR_ID_INTASHIELD, 0x0D21, PCI_ANY_ID, PCI_ANY_ID, @@ -5169,7 +5202,81 @@ static const struct pci_device_id serial_pci_tbl[] = { pbn_b2_4_115200 }, { PCI_VENDOR_ID_INTASHIELD, 0x0E34, PCI_ANY_ID, PCI_ANY_ID, - PCI_CLASS_COMMUNICATION_MULTISERIAL << 8, 0xffff00, + PCI_CLASS_COMMUNICATION_MULTISERIAL << 8, 0xffff00, + pbn_b2_4_115200 }, + /* + * Brainboxes UC-268 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x0841, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_b2_4_115200 }, + /* + * Brainboxes UC-275/279 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x0881, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_b2_8_115200 }, + /* + * Brainboxes UC-302 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x08E1, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_b2_2_115200 }, + /* + * Brainboxes UC-310 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x08C1, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_b2_2_115200 }, + /* + * Brainboxes UC-313 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x08A3, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_b2_2_115200 }, + /* + * Brainboxes UC-320/324 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x0A61, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_b2_1_115200 }, + /* + * Brainboxes UC-346 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x0B02, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_b2_4_115200 }, + /* + * Brainboxes UC-357 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x0A81, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_b2_2_115200 }, + { PCI_VENDOR_ID_INTASHIELD, 0x0A83, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_b2_2_115200 }, + /* + * Brainboxes UC-368 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x0C41, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_b2_4_115200 }, + /* + * Brainboxes UC-420/431 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x0921, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, pbn_b2_4_115200 }, /* * Perle PCI-RAS cards diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index 5d40f1010fbfd47adcd0a48297e01b51d2e3532c..7c07ebb37b1b9c07ff15a6f5f303ac493f7421dc 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -2029,13 +2029,6 @@ void serial8250_do_set_mctrl(struct uart_port *port, unsigned int mctrl) struct uart_8250_port *up = up_to_u8250p(port); unsigned char mcr; - if (port->rs485.flags & SER_RS485_ENABLED) { - if (serial8250_in_MCR(up) & UART_MCR_RTS) - mctrl |= TIOCM_RTS; - else - mctrl &= ~TIOCM_RTS; - } - mcr = serial8250_TIOCM_to_MCR(mctrl); mcr = (mcr & up->mcr_mask) | up->mcr_force | up->mcr; @@ -2675,21 +2668,32 @@ static unsigned int serial8250_get_baud_rate(struct uart_port *port, void serial8250_update_uartclk(struct uart_port *port, unsigned int uartclk) { struct uart_8250_port *up = up_to_u8250p(port); + struct tty_port *tport = &port->state->port; unsigned int baud, quot, frac = 0; struct ktermios *termios; + struct tty_struct *tty; unsigned long flags; - mutex_lock(&port->state->port.mutex); + tty = tty_port_tty_get(tport); + if (!tty) { + mutex_lock(&tport->mutex); + port->uartclk = uartclk; + mutex_unlock(&tport->mutex); + return; + } + + down_write(&tty->termios_rwsem); + mutex_lock(&tport->mutex); if (port->uartclk == uartclk) goto out_lock; port->uartclk = uartclk; - if (!tty_port_initialized(&port->state->port)) + if (!tty_port_initialized(tport)) goto out_lock; - termios = &port->state->port.tty->termios; + termios = &tty->termios; baud = serial8250_get_baud_rate(port, termios, NULL); quot = serial8250_get_divisor(port, baud, &frac); @@ -2706,7 +2710,9 @@ void serial8250_update_uartclk(struct uart_port *port, unsigned int uartclk) serial8250_rpm_put(up); out_lock: - mutex_unlock(&port->state->port.mutex); + mutex_unlock(&tport->mutex); + up_write(&tty->termios_rwsem); + tty_kref_put(tty); } EXPORT_SYMBOL_GPL(serial8250_update_uartclk); diff --git a/drivers/tty/serial/amba-pl010.c b/drivers/tty/serial/amba-pl010.c index 3f96edfe569c5d1045f53fd4ebb830b52d6fc4d5..e538d6d75155e4840c420f5bf2a2007dcd8e8891 100644 --- a/drivers/tty/serial/amba-pl010.c +++ b/drivers/tty/serial/amba-pl010.c @@ -448,14 +448,11 @@ pl010_set_termios(struct uart_port *port, struct ktermios *termios, if ((termios->c_cflag & CREAD) == 0) uap->port.ignore_status_mask |= UART_DUMMY_RSR_RX; - /* first, disable everything */ old_cr = readb(uap->port.membase + UART010_CR) & ~UART010_CR_MSIE; if (UART_ENABLE_MS(port, termios->c_cflag)) old_cr |= UART010_CR_MSIE; - writel(0, uap->port.membase + UART010_CR); - /* Set baud rate */ quot -= 1; writel((quot & 0xf00) >> 8, uap->port.membase + UART010_LCRM); diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c index a95fc8bf4c69d879530e33a52961e37c1c2f2a6b..07b19e97f850d3663f0c4c869b8ecdad47e2a929 100644 --- a/drivers/tty/serial/amba-pl011.c +++ b/drivers/tty/serial/amba-pl011.c @@ -2083,32 +2083,13 @@ static const char *pl011_type(struct uart_port *port) return uap->port.type == PORT_AMBA ? uap->type : NULL; } -/* - * Release the memory region(s) being used by 'port' - */ -static void pl011_release_port(struct uart_port *port) -{ - release_mem_region(port->mapbase, SZ_4K); -} - -/* - * Request the memory region(s) being used by 'port' - */ -static int pl011_request_port(struct uart_port *port) -{ - return request_mem_region(port->mapbase, SZ_4K, "uart-pl011") - != NULL ? 0 : -EBUSY; -} - /* * Configure/autoconfigure the port. */ static void pl011_config_port(struct uart_port *port, int flags) { - if (flags & UART_CONFIG_TYPE) { + if (flags & UART_CONFIG_TYPE) port->type = PORT_AMBA; - pl011_request_port(port); - } } /* @@ -2123,6 +2104,8 @@ static int pl011_verify_port(struct uart_port *port, struct serial_struct *ser) ret = -EINVAL; if (ser->baud_base < 9600) ret = -EINVAL; + if (port->mapbase != (unsigned long) ser->iomem_base) + ret = -EINVAL; return ret; } @@ -2140,8 +2123,6 @@ static const struct uart_ops amba_pl011_pops = { .flush_buffer = pl011_dma_flush_buffer, .set_termios = pl011_set_termios, .type = pl011_type, - .release_port = pl011_release_port, - .request_port = pl011_request_port, .config_port = pl011_config_port, .verify_port = pl011_verify_port, #ifdef CONFIG_CONSOLE_POLL @@ -2171,8 +2152,6 @@ static const struct uart_ops sbsa_uart_pops = { .shutdown = sbsa_uart_shutdown, .set_termios = sbsa_uart_set_termios, .type = pl011_type, - .release_port = pl011_release_port, - .request_port = pl011_request_port, .config_port = pl011_config_port, .verify_port = pl011_verify_port, #ifdef CONFIG_CONSOLE_POLL @@ -2790,6 +2769,7 @@ MODULE_DEVICE_TABLE(of, sbsa_uart_of_match); static const struct acpi_device_id sbsa_uart_acpi_match[] = { { "ARMH0011", 0 }, + { "ARMHB000", 0 }, {}, }; MODULE_DEVICE_TABLE(acpi, sbsa_uart_acpi_match); diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c index a24e5c2b30bc956ab1f828b8fb702125e3259cd5..602065bfc9bb8766e9c16038dd10fabc90f5ae89 100644 --- a/drivers/tty/serial/atmel_serial.c +++ b/drivers/tty/serial/atmel_serial.c @@ -1004,6 +1004,13 @@ static void atmel_tx_dma(struct uart_port *port) desc->callback = atmel_complete_tx_dma; desc->callback_param = atmel_port; atmel_port->cookie_tx = dmaengine_submit(desc); + if (dma_submit_error(atmel_port->cookie_tx)) { + dev_err(port->dev, "dma_submit_error %d\n", + atmel_port->cookie_tx); + return; + } + + dma_async_issue_pending(chan); } if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) @@ -1264,6 +1271,13 @@ static int atmel_prepare_rx_dma(struct uart_port *port) desc->callback_param = port; atmel_port->desc_rx = desc; atmel_port->cookie_rx = dmaengine_submit(desc); + if (dma_submit_error(atmel_port->cookie_rx)) { + dev_err(port->dev, "dma_submit_error %d\n", + atmel_port->cookie_rx); + goto chan_err; + } + + dma_async_issue_pending(atmel_port->chan_rx); return 0; diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c index a70911a227a84be875ee641208bcf949a3c28d3f..b9f8add284e33cf122b3f21e9aafeda8d6d3c9ed 100644 --- a/drivers/tty/serial/fsl_lpuart.c +++ b/drivers/tty/serial/fsl_lpuart.c @@ -2559,6 +2559,7 @@ OF_EARLYCON_DECLARE(lpuart, "fsl,vf610-lpuart", lpuart_early_console_setup); OF_EARLYCON_DECLARE(lpuart32, "fsl,ls1021a-lpuart", lpuart32_early_console_setup); OF_EARLYCON_DECLARE(lpuart32, "fsl,ls1028a-lpuart", ls1028a_early_console_setup); OF_EARLYCON_DECLARE(lpuart32, "fsl,imx7ulp-lpuart", lpuart32_imx_early_console_setup); +OF_EARLYCON_DECLARE(lpuart32, "fsl,imx8qxp-lpuart", lpuart32_imx_early_console_setup); EARLYCON_DECLARE(lpuart, lpuart_early_console_setup); EARLYCON_DECLARE(lpuart32, lpuart32_early_console_setup); diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c index cacf7266a262d200948d2c5c10b80a0dc3323d6b..93cd8ad57f385c595e0d842b4f003e3cf9c55d87 100644 --- a/drivers/tty/serial/imx.c +++ b/drivers/tty/serial/imx.c @@ -508,18 +508,21 @@ static void imx_uart_stop_tx(struct uart_port *port) static void imx_uart_stop_rx(struct uart_port *port) { struct imx_port *sport = (struct imx_port *)port; - u32 ucr1, ucr2; + u32 ucr1, ucr2, ucr4; ucr1 = imx_uart_readl(sport, UCR1); ucr2 = imx_uart_readl(sport, UCR2); + ucr4 = imx_uart_readl(sport, UCR4); if (sport->dma_is_enabled) { ucr1 &= ~(UCR1_RXDMAEN | UCR1_ATDMAEN); } else { ucr1 &= ~UCR1_RRDYEN; ucr2 &= ~UCR2_ATEN; + ucr4 &= ~UCR4_OREN; } imx_uart_writel(sport, ucr1, UCR1); + imx_uart_writel(sport, ucr4, UCR4); ucr2 &= ~UCR2_RXEN; imx_uart_writel(sport, ucr2, UCR2); @@ -1576,7 +1579,7 @@ static void imx_uart_shutdown(struct uart_port *port) imx_uart_writel(sport, ucr1, UCR1); ucr4 = imx_uart_readl(sport, UCR4); - ucr4 &= ~(UCR4_OREN | UCR4_TCEN); + ucr4 &= ~UCR4_TCEN; imx_uart_writel(sport, ucr4, UCR4); spin_unlock_irqrestore(&sport->port.lock, flags); @@ -2049,7 +2052,7 @@ imx_uart_console_write(struct console *co, const char *s, unsigned int count) * If the port was already initialised (eg, by a boot loader), * try to determine the current setup. */ -static void __init +static void imx_uart_console_get_options(struct imx_port *sport, int *baud, int *parity, int *bits) { @@ -2108,7 +2111,7 @@ imx_uart_console_get_options(struct imx_port *sport, int *baud, } } -static int __init +static int imx_uart_console_setup(struct console *co, char *options) { struct imx_port *sport; diff --git a/drivers/tty/serial/msm_serial.c b/drivers/tty/serial/msm_serial.c index 87f005e5d2affddec960654ebc39efb93c601bcc..26bcbec5422e2ff73d97858701635b1332532e57 100644 --- a/drivers/tty/serial/msm_serial.c +++ b/drivers/tty/serial/msm_serial.c @@ -599,6 +599,9 @@ static void msm_start_rx_dma(struct msm_port *msm_port) u32 val; int ret; + if (IS_ENABLED(CONFIG_CONSOLE_POLL)) + return; + if (!dma->chan) return; diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c index 9adb8362578c5d8c54a5c1a34061e5688eaf727d..04b4ed5d06341f76e96c6f6a7cf7ac2b4f4130ed 100644 --- a/drivers/tty/serial/sc16is7xx.c +++ b/drivers/tty/serial/sc16is7xx.c @@ -734,12 +734,15 @@ static irqreturn_t sc16is7xx_irq(int irq, void *dev_id) static void sc16is7xx_tx_proc(struct kthread_work *ws) { struct uart_port *port = &(to_sc16is7xx_one(ws, tx_work)->port); + struct sc16is7xx_port *s = dev_get_drvdata(port->dev); if ((port->rs485.flags & SER_RS485_ENABLED) && (port->rs485.delay_rts_before_send > 0)) msleep(port->rs485.delay_rts_before_send); + mutex_lock(&s->efr_lock); sc16is7xx_handle_tx(port); + mutex_unlock(&s->efr_lock); } static void sc16is7xx_reconf_rs485(struct uart_port *port) diff --git a/drivers/tty/serial/serial-tegra.c b/drivers/tty/serial/serial-tegra.c index 26fa69609ee5b64f7000eb82b3808c0e0863dafe..c2be22c3b7d1bd20f10916fca7128dc8c1c7187e 100644 --- a/drivers/tty/serial/serial-tegra.c +++ b/drivers/tty/serial/serial-tegra.c @@ -1501,7 +1501,7 @@ static struct tegra_uart_chip_data tegra20_uart_chip_data = { .fifo_mode_enable_status = false, .uart_max_port = 5, .max_dma_burst_bytes = 4, - .error_tolerance_low_range = 0, + .error_tolerance_low_range = -4, .error_tolerance_high_range = 4, }; @@ -1512,7 +1512,7 @@ static struct tegra_uart_chip_data tegra30_uart_chip_data = { .fifo_mode_enable_status = false, .uart_max_port = 5, .max_dma_burst_bytes = 4, - .error_tolerance_low_range = 0, + .error_tolerance_low_range = -4, .error_tolerance_high_range = 4, }; diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index 68a0ff6054761a989001609196a2e5e4cd04bee6..be0d9922e320e04a0c609cbece0e66a8b698b1a0 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -162,7 +162,7 @@ static void uart_port_dtr_rts(struct uart_port *uport, int raise) int RTS_after_send = !!(uport->rs485.flags & SER_RS485_RTS_AFTER_SEND); if (raise) { - if (rs485_on && !RTS_after_send) { + if (rs485_on && RTS_after_send) { uart_set_mctrl(uport, TIOCM_DTR); uart_clear_mctrl(uport, TIOCM_RTS); } else { @@ -171,7 +171,7 @@ static void uart_port_dtr_rts(struct uart_port *uport, int raise) } else { unsigned int clear = TIOCM_DTR; - clear |= (!rs485_on || !RTS_after_send) ? TIOCM_RTS : 0; + clear |= (!rs485_on || RTS_after_send) ? TIOCM_RTS : 0; uart_clear_mctrl(uport, clear); } } @@ -222,7 +222,11 @@ static int uart_port_startup(struct tty_struct *tty, struct uart_state *state, if (retval == 0) { if (uart_console(uport) && uport->cons->cflag) { tty->termios.c_cflag = uport->cons->cflag; + tty->termios.c_ispeed = uport->cons->ispeed; + tty->termios.c_ospeed = uport->cons->ospeed; uport->cons->cflag = 0; + uport->cons->ispeed = 0; + uport->cons->ospeed = 0; } /* * Initialise the hardware port settings. @@ -290,8 +294,11 @@ static void uart_shutdown(struct tty_struct *tty, struct uart_state *state) /* * Turn off DTR and RTS early. */ - if (uport && uart_console(uport) && tty) + if (uport && uart_console(uport) && tty) { uport->cons->cflag = tty->termios.c_cflag; + uport->cons->ispeed = tty->termios.c_ispeed; + uport->cons->ospeed = tty->termios.c_ospeed; + } if (!tty || C_HUPCL(tty)) uart_port_dtr_rts(uport, 0); @@ -1095,6 +1102,11 @@ uart_tiocmset(struct tty_struct *tty, unsigned int set, unsigned int clear) goto out; if (!tty_io_error(tty)) { + if (uport->rs485.flags & SER_RS485_ENABLED) { + set &= ~TIOCM_RTS; + clear &= ~TIOCM_RTS; + } + uart_update_mctrl(uport, set, clear); ret = 0; } @@ -1569,6 +1581,7 @@ static void uart_tty_port_shutdown(struct tty_port *port) { struct uart_state *state = container_of(port, struct uart_state, port); struct uart_port *uport = uart_port_check(state); + char *buf; /* * At this point, we stop accepting input. To do this, we @@ -1590,8 +1603,18 @@ static void uart_tty_port_shutdown(struct tty_port *port) */ tty_port_set_suspended(port, 0); - uart_change_pm(state, UART_PM_STATE_OFF); + /* + * Free the transmit buffer. + */ + spin_lock_irq(&uport->lock); + buf = state->xmit.buf; + state->xmit.buf = NULL; + spin_unlock_irq(&uport->lock); + + if (buf) + free_page((unsigned long)buf); + uart_change_pm(state, UART_PM_STATE_OFF); } static void uart_wait_until_sent(struct tty_struct *tty, int timeout) @@ -2123,8 +2146,11 @@ uart_set_options(struct uart_port *port, struct console *co, * Allow the setting of the UART parameters with a NULL console * too: */ - if (co) + if (co) { co->cflag = termios.c_cflag; + co->ispeed = termios.c_ispeed; + co->ospeed = termios.c_ospeed; + } return 0; } @@ -2258,6 +2284,8 @@ int uart_resume_port(struct uart_driver *drv, struct uart_port *uport) */ memset(&termios, 0, sizeof(struct ktermios)); termios.c_cflag = uport->cons->cflag; + termios.c_ispeed = uport->cons->ispeed; + termios.c_ospeed = uport->cons->ospeed; /* * If that's unset, use the tty termios setting. @@ -2386,7 +2414,8 @@ uart_configure_port(struct uart_driver *drv, struct uart_state *state, * We probably don't need a spinlock around this, but */ spin_lock_irqsave(&port->lock, flags); - port->ops->set_mctrl(port, port->mctrl & TIOCM_DTR); + port->mctrl &= TIOCM_DTR; + port->ops->set_mctrl(port, port->mctrl); spin_unlock_irqrestore(&port->lock, flags); /* diff --git a/drivers/tty/serial/stm32-usart.c b/drivers/tty/serial/stm32-usart.c index 844059861f9e18057001610d9b16df591189ac52..6afae051ba8d117cc5170c03b556db514d60a4a2 100644 --- a/drivers/tty/serial/stm32-usart.c +++ b/drivers/tty/serial/stm32-usart.c @@ -420,10 +420,22 @@ static void stm32_usart_transmit_chars(struct uart_port *port) struct stm32_port *stm32_port = to_stm32_port(port); const struct stm32_usart_offsets *ofs = &stm32_port->info->ofs; struct circ_buf *xmit = &port->state->xmit; + u32 isr; + int ret; if (port->x_char) { if (stm32_port->tx_dma_busy) stm32_usart_clr_bits(port, ofs->cr3, USART_CR3_DMAT); + + /* Check that TDR is empty before filling FIFO */ + ret = + readl_relaxed_poll_timeout_atomic(port->membase + ofs->isr, + isr, + (isr & USART_SR_TXE), + 10, 1000); + if (ret) + dev_warn(port->dev, "1 character may be erased\n"); + writel_relaxed(port->x_char, port->membase + ofs->tdr); port->x_char = 0; port->icount.tx++; @@ -574,7 +586,7 @@ static void stm32_usart_start_tx(struct uart_port *port) struct serial_rs485 *rs485conf = &port->rs485; struct circ_buf *xmit = &port->state->xmit; - if (uart_circ_empty(xmit)) + if (uart_circ_empty(xmit) && !port->x_char) return; if (rs485conf->flags & SER_RS485_ENABLED) { diff --git a/drivers/tty/serial/uartlite.c b/drivers/tty/serial/uartlite.c index 7081ab322b4028e1284d11391c88741f30470650..48923cd8c07d14d9de69e1ff88476423ea0607f8 100644 --- a/drivers/tty/serial/uartlite.c +++ b/drivers/tty/serial/uartlite.c @@ -615,7 +615,7 @@ static struct uart_driver ulite_uart_driver = { * * Returns: 0 on success, <0 otherwise */ -static int ulite_assign(struct device *dev, int id, u32 base, int irq, +static int ulite_assign(struct device *dev, int id, phys_addr_t base, int irq, struct uartlite_data *pdata) { struct uart_port *port; diff --git a/drivers/tty/serial/xilinx_uartps.c b/drivers/tty/serial/xilinx_uartps.c index a9b1ee27183a7105537c90140f9e7ce5969246c7..b5a8afbc452ba6d4b4b3313684c780ba21d95d81 100644 --- a/drivers/tty/serial/xilinx_uartps.c +++ b/drivers/tty/serial/xilinx_uartps.c @@ -601,9 +601,10 @@ static void cdns_uart_start_tx(struct uart_port *port) if (uart_circ_empty(&port->state->xmit)) return; + writel(CDNS_UART_IXR_TXEMPTY, port->membase + CDNS_UART_ISR); + cdns_uart_handle_tx(port); - writel(CDNS_UART_IXR_TXEMPTY, port->membase + CDNS_UART_ISR); /* Enable the TX Empty interrupt */ writel(CDNS_UART_IXR_TXEMPTY, port->membase + CDNS_UART_IER); } diff --git a/drivers/tty/tty_buffer.c b/drivers/tty/tty_buffer.c index bd2d91546e32716c68522da2545cb74bc30117c1..0fc473321d3e3b8b1f8ce61938d46db1d44aaa3c 100644 --- a/drivers/tty/tty_buffer.c +++ b/drivers/tty/tty_buffer.c @@ -534,6 +534,9 @@ static void flush_to_ldisc(struct work_struct *work) if (!count) break; head->read += count; + + if (need_resched()) + cond_resched(); } mutex_unlock(&buf->lock); diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index cea40ef090b77e8eda7371aa19ac045551e08ed7..a7ee1171eeb3e2bb56508979dbe9a0bf67d38b36 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -1220,8 +1220,25 @@ static int vc_do_resize(struct tty_struct *tty, struct vc_data *vc, new_row_size = new_cols << 1; new_screen_size = new_row_size * new_rows; - if (new_cols == vc->vc_cols && new_rows == vc->vc_rows) - return 0; + if (new_cols == vc->vc_cols && new_rows == vc->vc_rows) { + /* + * This function is being called here to cover the case + * where the userspace calls the FBIOPUT_VSCREENINFO twice, + * passing the same fb_var_screeninfo containing the fields + * yres/xres equal to a number non-multiple of vc_font.height + * and yres_virtual/xres_virtual equal to number lesser than the + * vc_font.height and yres/xres. + * In the second call, the struct fb_var_screeninfo isn't + * being modified by the underlying driver because of the + * if above, and this causes the fbcon_display->vrows to become + * negative and it eventually leads to out-of-bound + * access by the imageblit function. + * To give the correct values to the struct and to not have + * to deal with possible errors from the code below, we call + * the resize_screen here as well. + */ + return resize_screen(vc, new_cols, new_rows, user); + } if (new_screen_size > KMALLOC_MAX_SIZE || !new_screen_size) return -EINVAL; diff --git a/drivers/tty/vt/vt_ioctl.c b/drivers/tty/vt/vt_ioctl.c index 90e4fcd3dc39a932888fa18497b184c2fdc6c0d1..a9c6ea8986af08dcc004011321b67135df86cd62 100644 --- a/drivers/tty/vt/vt_ioctl.c +++ b/drivers/tty/vt/vt_ioctl.c @@ -699,8 +699,8 @@ static int vt_setactivate(struct vt_setactivate __user *sa) if (vsa.console == 0 || vsa.console > MAX_NR_CONSOLES) return -ENXIO; - vsa.console = array_index_nospec(vsa.console, MAX_NR_CONSOLES + 1); vsa.console--; + vsa.console = array_index_nospec(vsa.console, MAX_NR_CONSOLES); console_lock(); ret = vc_allocate(vsa.console); if (ret) { @@ -945,6 +945,7 @@ int vt_ioctl(struct tty_struct *tty, return -ENXIO; arg--; + arg = array_index_nospec(arg, MAX_NR_CONSOLES); console_lock(); ret = vc_allocate(arg); console_unlock(); diff --git a/drivers/usb/cdns3/gadget.c b/drivers/usb/cdns3/gadget.c index 9d38f864cb68c54af3ef672de3b28812b11c478d..e111622944139c991cae819d996df6cd4999e2ee 100644 --- a/drivers/usb/cdns3/gadget.c +++ b/drivers/usb/cdns3/gadget.c @@ -1101,6 +1101,19 @@ static int cdns3_ep_run_stream_transfer(struct cdns3_endpoint *priv_ep, return 0; } +static void cdns3_rearm_drdy_if_needed(struct cdns3_endpoint *priv_ep) +{ + struct cdns3_device *priv_dev = priv_ep->cdns3_dev; + + if (priv_dev->dev_ver < DEV_VER_V3) + return; + + if (readl(&priv_dev->regs->ep_sts) & EP_STS_TRBERR) { + writel(EP_STS_TRBERR, &priv_dev->regs->ep_sts); + writel(EP_CMD_DRDY, &priv_dev->regs->ep_cmd); + } +} + /** * cdns3_ep_run_transfer - start transfer on no-default endpoint hardware * @priv_ep: endpoint object @@ -1352,6 +1365,7 @@ static int cdns3_ep_run_transfer(struct cdns3_endpoint *priv_ep, /*clearing TRBERR and EP_STS_DESCMIS before seting DRDY*/ writel(EP_STS_TRBERR | EP_STS_DESCMIS, &priv_dev->regs->ep_sts); writel(EP_CMD_DRDY, &priv_dev->regs->ep_cmd); + cdns3_rearm_drdy_if_needed(priv_ep); trace_cdns3_doorbell_epx(priv_ep->name, readl(&priv_dev->regs->ep_traddr)); } diff --git a/drivers/usb/chipidea/ci_hdrc_imx.c b/drivers/usb/chipidea/ci_hdrc_imx.c index ee565bdb44d65166869a9d814886ee910b3c13f3..f7984559428442296e68dadd258f55043fa8286c 100644 --- a/drivers/usb/chipidea/ci_hdrc_imx.c +++ b/drivers/usb/chipidea/ci_hdrc_imx.c @@ -425,11 +425,16 @@ static int ci_hdrc_imx_probe(struct platform_device *pdev) data->phy = devm_usb_get_phy_by_phandle(dev, "fsl,usbphy", 0); if (IS_ERR(data->phy)) { ret = PTR_ERR(data->phy); - /* Return -EINVAL if no usbphy is available */ - if (ret == -ENODEV) - data->phy = NULL; - else + if (ret != -ENODEV) goto err_clk; + data->phy = devm_usb_get_phy_by_phandle(dev, "phys", 0); + if (IS_ERR(data->phy)) { + ret = PTR_ERR(data->phy); + if (ret == -ENODEV) + data->phy = NULL; + else + goto err_clk; + } } pdata.usb_phy = data->phy; diff --git a/drivers/usb/chipidea/core.c b/drivers/usb/chipidea/core.c index aa40e510b806ecb5b8ed3b30065b0c7cf3a162d1..127b1a62b1bf43b0f46359cfd4b177cea641b9fc 100644 --- a/drivers/usb/chipidea/core.c +++ b/drivers/usb/chipidea/core.c @@ -509,7 +509,7 @@ int hw_device_reset(struct ci_hdrc *ci) return 0; } -static irqreturn_t ci_irq(int irq, void *data) +static irqreturn_t ci_irq_handler(int irq, void *data) { struct ci_hdrc *ci = data; irqreturn_t ret = IRQ_NONE; @@ -562,6 +562,15 @@ static irqreturn_t ci_irq(int irq, void *data) return ret; } +static void ci_irq(struct ci_hdrc *ci) +{ + unsigned long flags; + + local_irq_save(flags); + ci_irq_handler(ci->irq, ci); + local_irq_restore(flags); +} + static int ci_cable_notifier(struct notifier_block *nb, unsigned long event, void *ptr) { @@ -571,7 +580,7 @@ static int ci_cable_notifier(struct notifier_block *nb, unsigned long event, cbl->connected = event; cbl->changed = true; - ci_irq(ci->irq, ci); + ci_irq(ci); return NOTIFY_DONE; } @@ -612,7 +621,7 @@ static int ci_usb_role_switch_set(struct usb_role_switch *sw, if (cable) { cable->changed = true; cable->connected = false; - ci_irq(ci->irq, ci); + ci_irq(ci); spin_unlock_irqrestore(&ci->lock, flags); if (ci->wq && role != USB_ROLE_NONE) flush_workqueue(ci->wq); @@ -630,7 +639,7 @@ static int ci_usb_role_switch_set(struct usb_role_switch *sw, if (cable) { cable->changed = true; cable->connected = true; - ci_irq(ci->irq, ci); + ci_irq(ci); } spin_unlock_irqrestore(&ci->lock, flags); pm_runtime_put_sync(ci->dev); @@ -1166,7 +1175,7 @@ static int ci_hdrc_probe(struct platform_device *pdev) } } - ret = devm_request_irq(dev, ci->irq, ci_irq, IRQF_SHARED, + ret = devm_request_irq(dev, ci->irq, ci_irq_handler, IRQF_SHARED, ci->platdata->name, ci); if (ret) goto stop; @@ -1287,11 +1296,11 @@ static void ci_extcon_wakeup_int(struct ci_hdrc *ci) if (!IS_ERR(cable_id->edev) && ci->is_otg && (otgsc & OTGSC_IDIE) && (otgsc & OTGSC_IDIS)) - ci_irq(ci->irq, ci); + ci_irq(ci); if (!IS_ERR(cable_vbus->edev) && ci->is_otg && (otgsc & OTGSC_BSVIE) && (otgsc & OTGSC_BSVIS)) - ci_irq(ci->irq, ci); + ci_irq(ci); } static int ci_controller_resume(struct device *dev) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 7748b1335558e214ff403405d00cf315f0d3dc69..7950d5b3af4293fa01c6df442d8734ac111f917d 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -340,6 +340,9 @@ static void acm_process_notification(struct acm *acm, unsigned char *buf) acm->iocount.overrun++; spin_unlock_irqrestore(&acm->read_lock, flags); + if (newctrl & ACM_CTRL_BRK) + tty_flip_buffer_push(&acm->port); + if (difference) wake_up_all(&acm->wioctl); @@ -475,11 +478,16 @@ static int acm_submit_read_urbs(struct acm *acm, gfp_t mem_flags) static void acm_process_read_urb(struct acm *acm, struct urb *urb) { + unsigned long flags; + if (!urb->actual_length) return; + spin_lock_irqsave(&acm->read_lock, flags); tty_insert_flip_string(&acm->port, urb->transfer_buffer, urb->actual_length); + spin_unlock_irqrestore(&acm->read_lock, flags); + tty_flip_buffer_push(&acm->port); } diff --git a/drivers/usb/common/Kconfig b/drivers/usb/common/Kconfig index 5e8a04e3dd3c896ed3a9a4d309d8963202247e74..b856622431a73ef32286c581ab1137dc8bfa50dc 100644 --- a/drivers/usb/common/Kconfig +++ b/drivers/usb/common/Kconfig @@ -6,8 +6,7 @@ config USB_COMMON config USB_LED_TRIG bool "USB LED Triggers" - depends on LEDS_CLASS && LEDS_TRIGGERS - select USB_COMMON + depends on LEDS_CLASS && USB_COMMON && LEDS_TRIGGERS help This option adds LED triggers for USB host and/or gadget activity. diff --git a/drivers/usb/common/ulpi.c b/drivers/usb/common/ulpi.c index a18d7c4222ddfedb62c3e1605190239601c1a1d5..3c705f1bead8c17acec460896f48f6cf13011274 100644 --- a/drivers/usb/common/ulpi.c +++ b/drivers/usb/common/ulpi.c @@ -39,8 +39,11 @@ static int ulpi_match(struct device *dev, struct device_driver *driver) struct ulpi *ulpi = to_ulpi_dev(dev); const struct ulpi_device_id *id; - /* Some ULPI devices don't have a vendor id so rely on OF match */ - if (ulpi->id.vendor == 0) + /* + * Some ULPI devices don't have a vendor id + * or provide an id_table so rely on OF match. + */ + if (ulpi->id.vendor == 0 || !drv->id_table) return of_driver_match_device(dev, driver); for (id = drv->id_table; id->vendor; id++) @@ -129,6 +132,7 @@ static const struct attribute_group *ulpi_dev_attr_groups[] = { static void ulpi_dev_release(struct device *dev) { + of_node_put(dev->of_node); kfree(to_ulpi_dev(dev)); } @@ -246,12 +250,16 @@ static int ulpi_register(struct device *dev, struct ulpi *ulpi) return ret; ret = ulpi_read_id(ulpi); - if (ret) + if (ret) { + of_node_put(ulpi->dev.of_node); return ret; + } ret = device_register(&ulpi->dev); - if (ret) + if (ret) { + put_device(&ulpi->dev); return ret; + } dev_dbg(&ulpi->dev, "registered ULPI PHY: vendor %04x, product %04x\n", ulpi->id.vendor, ulpi->id.product); @@ -298,7 +306,6 @@ EXPORT_SYMBOL_GPL(ulpi_register_interface); */ void ulpi_unregister_interface(struct ulpi *ulpi) { - of_node_put(ulpi->dev.of_node); device_unregister(&ulpi->dev); } EXPORT_SYMBOL_GPL(ulpi_unregister_interface); diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c index 562a730befdac64d8049c9b30bd6ce0e291f2af2..39f1eca60a7146f172b68063b037f20e4d118d7e 100644 --- a/drivers/usb/core/config.c +++ b/drivers/usb/core/config.c @@ -406,7 +406,7 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, * the USB-2 spec requires such endpoints to have wMaxPacketSize = 0 * (see the end of section 5.6.3), so don't warn about them. */ - maxp = usb_endpoint_maxp(&endpoint->desc); + maxp = le16_to_cpu(endpoint->desc.wMaxPacketSize); if (maxp == 0 && !(usb_endpoint_xfer_isoc(d) && asnum == 0)) { dev_warn(ddev, "config %d interface %d altsetting %d endpoint 0x%X has invalid wMaxPacketSize 0\n", cfgno, inum, asnum, d->bEndpointAddress); @@ -422,9 +422,9 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, maxpacket_maxes = full_speed_maxpacket_maxes; break; case USB_SPEED_HIGH: - /* Bits 12..11 are allowed only for HS periodic endpoints */ + /* Multiple-transactions bits are allowed only for HS periodic endpoints */ if (usb_endpoint_xfer_int(d) || usb_endpoint_xfer_isoc(d)) { - i = maxp & (BIT(12) | BIT(11)); + i = maxp & USB_EP_MAXP_MULT_MASK; maxp &= ~i; } fallthrough; diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index 4bbf3316a9a53164f276ba112533bd866f7bf1de..ddd1d3eef912b89f8f803c2f94a9a0df74f0f1de 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -754,6 +754,7 @@ void usb_hcd_poll_rh_status(struct usb_hcd *hcd) { struct urb *urb; int length; + int status; unsigned long flags; char buffer[6]; /* Any root hubs with > 31 ports? */ @@ -771,11 +772,17 @@ void usb_hcd_poll_rh_status(struct usb_hcd *hcd) if (urb) { clear_bit(HCD_FLAG_POLL_PENDING, &hcd->flags); hcd->status_urb = NULL; + if (urb->transfer_buffer_length >= length) { + status = 0; + } else { + status = -EOVERFLOW; + length = urb->transfer_buffer_length; + } urb->actual_length = length; memcpy(urb->transfer_buffer, buffer, length); usb_hcd_unlink_urb_from_ep(hcd, urb); - usb_hcd_giveback_urb(hcd, urb, 0); + usb_hcd_giveback_urb(hcd, urb, status); } else { length = 0; set_bit(HCD_FLAG_POLL_PENDING, &hcd->flags); @@ -1555,6 +1562,13 @@ int usb_hcd_submit_urb (struct urb *urb, gfp_t mem_flags) urb->hcpriv = NULL; INIT_LIST_HEAD(&urb->urb_list); atomic_dec(&urb->use_count); + /* + * Order the write of urb->use_count above before the read + * of urb->reject below. Pairs with the memory barriers in + * usb_kill_urb() and usb_poison_urb(). + */ + smp_mb__after_atomic(); + atomic_dec(&urb->dev->urbnum); if (atomic_read(&urb->reject)) wake_up(&usb_kill_urb_queue); @@ -1659,6 +1673,13 @@ static void __usb_hcd_giveback_urb(struct urb *urb) usb_anchor_resume_wakeups(anchor); atomic_dec(&urb->use_count); + /* + * Order the write of urb->use_count above before the read + * of urb->reject below. Pairs with the memory barriers in + * usb_kill_urb() and usb_poison_urb(). + */ + smp_mb__after_atomic(); + if (unlikely(atomic_read(&urb->reject))) wake_up(&usb_kill_urb_queue); usb_put_urb(urb); @@ -2640,7 +2661,6 @@ int usb_add_hcd(struct usb_hcd *hcd, { int retval; struct usb_device *rhdev; - struct usb_hcd *shared_hcd; if (!hcd->skip_phy_initialization && usb_hcd_is_primary_hcd(hcd)) { hcd->phy_roothub = usb_phy_roothub_alloc(hcd->self.sysdev); @@ -2797,26 +2817,13 @@ int usb_add_hcd(struct usb_hcd *hcd, goto err_hcd_driver_start; } - /* starting here, usbcore will pay attention to the shared HCD roothub */ - shared_hcd = hcd->shared_hcd; - if (!usb_hcd_is_primary_hcd(hcd) && shared_hcd && HCD_DEFER_RH_REGISTER(shared_hcd)) { - retval = register_root_hub(shared_hcd); - if (retval != 0) - goto err_register_root_hub; - - if (shared_hcd->uses_new_polling && HCD_POLL_RH(shared_hcd)) - usb_hcd_poll_rh_status(shared_hcd); - } - /* starting here, usbcore will pay attention to this root hub */ - if (!HCD_DEFER_RH_REGISTER(hcd)) { - retval = register_root_hub(hcd); - if (retval != 0) - goto err_register_root_hub; + retval = register_root_hub(hcd); + if (retval != 0) + goto err_register_root_hub; - if (hcd->uses_new_polling && HCD_POLL_RH(hcd)) - usb_hcd_poll_rh_status(hcd); - } + if (hcd->uses_new_polling && HCD_POLL_RH(hcd)) + usb_hcd_poll_rh_status(hcd); return retval; @@ -2859,7 +2866,6 @@ EXPORT_SYMBOL_GPL(usb_add_hcd); void usb_remove_hcd(struct usb_hcd *hcd) { struct usb_device *rhdev = hcd->self.root_hub; - bool rh_registered; dev_info(hcd->self.controller, "remove, state %x\n", hcd->state); @@ -2870,7 +2876,6 @@ void usb_remove_hcd(struct usb_hcd *hcd) dev_dbg(hcd->self.controller, "roothub graceful disconnect\n"); spin_lock_irq (&hcd_root_hub_lock); - rh_registered = hcd->rh_registered; hcd->rh_registered = 0; spin_unlock_irq (&hcd_root_hub_lock); @@ -2880,8 +2885,7 @@ void usb_remove_hcd(struct usb_hcd *hcd) cancel_work_sync(&hcd->died_work); mutex_lock(&usb_bus_idr_lock); - if (rh_registered) - usb_disconnect(&rhdev); /* Sets rhdev to NULL */ + usb_disconnect(&rhdev); /* Sets rhdev to NULL */ mutex_unlock(&usb_bus_idr_lock); /* diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 95a9bae72f135acee08f858cbb1d36499dd6c94b..18ee3914b46866690ea25bf6e9d1c79f1f8bab25 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -1109,7 +1109,10 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type) } else { hub_power_on(hub, true); } - } + /* Give some time on remote wakeup to let links to transit to U0 */ + } else if (hub_is_superspeed(hub->hdev)) + msleep(20); + init2: /* @@ -1224,7 +1227,7 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type) */ if (portchange || (hub_is_superspeed(hub->hdev) && port_resumed)) - set_bit(port1, hub->change_bits); + set_bit(port1, hub->event_bits); } else if (udev->persist_enabled) { #ifdef CONFIG_PM @@ -4628,8 +4631,6 @@ hub_port_init(struct usb_hub *hub, struct usb_device *udev, int port1, if (oldspeed == USB_SPEED_LOW) delay = HUB_LONG_RESET_TIME; - mutex_lock(hcd->address0_mutex); - /* Reset the device; full speed may morph to high speed */ /* FIXME a USB 2.0 device may morph into SuperSpeed on reset. */ retval = hub_port_reset(hub, port1, udev, delay, false); @@ -4940,7 +4941,6 @@ fail: hub_port_disable(hub, port1, 0); update_devnum(udev, devnum); /* for disconnect processing */ } - mutex_unlock(hcd->address0_mutex); return retval; } @@ -5115,6 +5115,7 @@ static void hub_port_connect(struct usb_hub *hub, int port1, u16 portstatus, struct usb_port *port_dev = hub->ports[port1 - 1]; struct usb_device *udev = port_dev->child; static int unreliable_port = -1; + bool retry_locked; /* Disconnect any existing devices under this port */ if (udev) { @@ -5170,8 +5171,11 @@ static void hub_port_connect(struct usb_hub *hub, int port1, u16 portstatus, unit_load = 100; status = 0; - for (i = 0; i < PORT_INIT_TRIES; i++) { + for (i = 0; i < PORT_INIT_TRIES; i++) { + usb_lock_port(port_dev); + mutex_lock(hcd->address0_mutex); + retry_locked = true; /* reallocate for each attempt, since references * to the previous one can escape in various ways */ @@ -5179,6 +5183,8 @@ static void hub_port_connect(struct usb_hub *hub, int port1, u16 portstatus, if (!udev) { dev_err(&port_dev->dev, "couldn't allocate usb_device\n"); + mutex_unlock(hcd->address0_mutex); + usb_unlock_port(port_dev); goto done; } @@ -5200,12 +5206,14 @@ static void hub_port_connect(struct usb_hub *hub, int port1, u16 portstatus, } /* reset (non-USB 3.0 devices) and get descriptor */ - usb_lock_port(port_dev); status = hub_port_init(hub, udev, port1, i); - usb_unlock_port(port_dev); if (status < 0) goto loop; + mutex_unlock(hcd->address0_mutex); + usb_unlock_port(port_dev); + retry_locked = false; + if (udev->quirks & USB_QUIRK_DELAY_INIT) msleep(2000); @@ -5298,6 +5306,10 @@ loop: usb_ep0_reinit(udev); release_devnum(udev); hub_free_dev(udev); + if (retry_locked) { + mutex_unlock(hcd->address0_mutex); + usb_unlock_port(port_dev); + } usb_put_dev(udev); if ((status == -ENOTCONN) || (status == -ENOTSUPP)) break; @@ -5839,6 +5851,8 @@ static int usb_reset_and_verify_device(struct usb_device *udev) bos = udev->bos; udev->bos = NULL; + mutex_lock(hcd->address0_mutex); + for (i = 0; i < PORT_INIT_TRIES; ++i) { /* ep0 maxpacket size may change; let the HCD know about it. @@ -5848,6 +5862,7 @@ static int usb_reset_and_verify_device(struct usb_device *udev) if (ret >= 0 || ret == -ENOTCONN || ret == -ENODEV) break; } + mutex_unlock(hcd->address0_mutex); if (ret < 0) goto re_enumerate; diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index a54a735b638433caf163441335f39550b7bcf0ac..baf80e2ac7d8e30a094ea9bda8b22ccc757c902b 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -435,6 +435,12 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x1532, 0x0116), .driver_info = USB_QUIRK_LINEAR_UFRAME_INTR_BINTERVAL }, + /* Lenovo USB-C to Ethernet Adapter RTL8153-04 */ + { USB_DEVICE(0x17ef, 0x720c), .driver_info = USB_QUIRK_NO_LPM }, + + /* Lenovo Powered USB-C Travel Hub (4X90S92381, RTL8153 GigE) */ + { USB_DEVICE(0x17ef, 0x721e), .driver_info = USB_QUIRK_NO_LPM }, + /* Lenovo ThinkCenter A630Z TI024Gen3 usb-audio */ { USB_DEVICE(0x17ef, 0xa012), .driver_info = USB_QUIRK_DISCONNECT_SUSPEND }, diff --git a/drivers/usb/core/urb.c b/drivers/usb/core/urb.c index 357b149b20d3a1cafdbb1d36aba1a157984166f0..9c285026f827675d099e7f3c38a39b1d6b07e47e 100644 --- a/drivers/usb/core/urb.c +++ b/drivers/usb/core/urb.c @@ -706,6 +706,12 @@ void usb_kill_urb(struct urb *urb) if (!(urb && urb->dev && urb->ep)) return; atomic_inc(&urb->reject); + /* + * Order the write of urb->reject above before the read + * of urb->use_count below. Pairs with the barriers in + * __usb_hcd_giveback_urb() and usb_hcd_submit_urb(). + */ + smp_mb__after_atomic(); usb_hcd_unlink_urb(urb, -ENOENT); wait_event(usb_kill_urb_queue, atomic_read(&urb->use_count) == 0); @@ -747,6 +753,12 @@ void usb_poison_urb(struct urb *urb) if (!urb) return; atomic_inc(&urb->reject); + /* + * Order the write of urb->reject above before the read + * of urb->use_count below. Pairs with the barriers in + * __usb_hcd_giveback_urb() and usb_hcd_submit_urb(). + */ + smp_mb__after_atomic(); if (!urb->dev || !urb->ep) return; diff --git a/drivers/usb/dwc2/core.h b/drivers/usb/dwc2/core.h index 641e4251cb7f10d501e059532092918686eaf108..03d16a08261d88e9f1ad91660cb363ed11155b65 100644 --- a/drivers/usb/dwc2/core.h +++ b/drivers/usb/dwc2/core.h @@ -1406,6 +1406,7 @@ void dwc2_hsotg_core_connect(struct dwc2_hsotg *hsotg); void dwc2_hsotg_disconnect(struct dwc2_hsotg *dwc2); int dwc2_hsotg_set_test_mode(struct dwc2_hsotg *hsotg, int testmode); #define dwc2_is_device_connected(hsotg) (hsotg->connected) +#define dwc2_is_device_enabled(hsotg) (hsotg->enabled) int dwc2_backup_device_registers(struct dwc2_hsotg *hsotg); int dwc2_restore_device_registers(struct dwc2_hsotg *hsotg, int remote_wakeup); int dwc2_gadget_enter_hibernation(struct dwc2_hsotg *hsotg); @@ -1434,6 +1435,7 @@ static inline int dwc2_hsotg_set_test_mode(struct dwc2_hsotg *hsotg, int testmode) { return 0; } #define dwc2_is_device_connected(hsotg) (0) +#define dwc2_is_device_enabled(hsotg) (0) static inline int dwc2_backup_device_registers(struct dwc2_hsotg *hsotg) { return 0; } static inline int dwc2_restore_device_registers(struct dwc2_hsotg *hsotg, diff --git a/drivers/usb/dwc2/drd.c b/drivers/usb/dwc2/drd.c index 2d4176f5788eb22e87f519cf08c3294c8fb734fa..36f2c38416e5ec4c2b942c0a49db3571ee9d04fd 100644 --- a/drivers/usb/dwc2/drd.c +++ b/drivers/usb/dwc2/drd.c @@ -7,6 +7,7 @@ * Author(s): Amelie Delaunay */ +#include #include #include #include @@ -25,9 +26,9 @@ static void dwc2_ovr_init(struct dwc2_hsotg *hsotg) gotgctl &= ~(GOTGCTL_BVALOVAL | GOTGCTL_AVALOVAL | GOTGCTL_VBVALOVAL); dwc2_writel(hsotg, gotgctl, GOTGCTL); - dwc2_force_mode(hsotg, false); - spin_unlock_irqrestore(&hsotg->lock, flags); + + dwc2_force_mode(hsotg, (hsotg->dr_mode == USB_DR_MODE_HOST)); } static int dwc2_ovr_avalid(struct dwc2_hsotg *hsotg, bool valid) @@ -39,6 +40,7 @@ static int dwc2_ovr_avalid(struct dwc2_hsotg *hsotg, bool valid) (!valid && !(gotgctl & GOTGCTL_ASESVLD))) return -EALREADY; + gotgctl &= ~GOTGCTL_BVALOVAL; if (valid) gotgctl |= GOTGCTL_AVALOVAL | GOTGCTL_VBVALOVAL; else @@ -57,6 +59,7 @@ static int dwc2_ovr_bvalid(struct dwc2_hsotg *hsotg, bool valid) (!valid && !(gotgctl & GOTGCTL_BSESVLD))) return -EALREADY; + gotgctl &= ~GOTGCTL_AVALOVAL; if (valid) gotgctl |= GOTGCTL_BVALOVAL | GOTGCTL_VBVALOVAL; else @@ -86,14 +89,30 @@ static int dwc2_drd_role_sw_set(struct usb_role_switch *sw, enum usb_role role) } #endif + /* + * In case of USB_DR_MODE_PERIPHERAL, clock is disabled at the end of + * the probe and enabled on udc_start. + * If role-switch set is called before the udc_start, we need to enable + * the clock to read/write GOTGCTL and GUSBCFG registers to override + * mode and sessions. It is the case if cable is plugged at boot. + */ + if (!hsotg->ll_hw_enabled && hsotg->clk) { + int ret = clk_prepare_enable(hsotg->clk); + + if (ret) + return ret; + } + spin_lock_irqsave(&hsotg->lock, flags); if (role == USB_ROLE_HOST) { already = dwc2_ovr_avalid(hsotg, true); } else if (role == USB_ROLE_DEVICE) { already = dwc2_ovr_bvalid(hsotg, true); - /* This clear DCTL.SFTDISCON bit */ - dwc2_hsotg_core_connect(hsotg); + if (dwc2_is_device_enabled(hsotg)) { + /* This clear DCTL.SFTDISCON bit */ + dwc2_hsotg_core_connect(hsotg); + } } else { if (dwc2_is_device_mode(hsotg)) { if (!dwc2_ovr_bvalid(hsotg, false)) @@ -110,6 +129,9 @@ static int dwc2_drd_role_sw_set(struct usb_role_switch *sw, enum usb_role role) /* This will raise a Connector ID Status Change Interrupt */ dwc2_force_mode(hsotg, role == USB_ROLE_HOST); + if (!hsotg->ll_hw_enabled && hsotg->clk) + clk_disable_unprepare(hsotg->clk); + dev_dbg(hsotg->dev, "%s-session valid\n", role == USB_ROLE_NONE ? "No" : role == USB_ROLE_HOST ? "A" : "B"); diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c index 7207a36c6e26b47ce32c7dd83c46b4b336a8a55e..ec54971063f8f499d77fe5e1c842b268a68d6f0a 100644 --- a/drivers/usb/dwc2/gadget.c +++ b/drivers/usb/dwc2/gadget.c @@ -1198,6 +1198,8 @@ static void dwc2_hsotg_start_req(struct dwc2_hsotg *hsotg, } ctrl |= DXEPCTL_CNAK; } else { + hs_req->req.frame_number = hs_ep->target_frame; + hs_req->req.actual = 0; dwc2_hsotg_complete_request(hsotg, hs_ep, hs_req, -ENODATA); return; } @@ -2856,9 +2858,12 @@ static void dwc2_gadget_handle_ep_disabled(struct dwc2_hsotg_ep *hs_ep) do { hs_req = get_ep_head(hs_ep); - if (hs_req) + if (hs_req) { + hs_req->req.frame_number = hs_ep->target_frame; + hs_req->req.actual = 0; dwc2_hsotg_complete_request(hsotg, hs_ep, hs_req, -ENODATA); + } dwc2_gadget_incr_frame_num(hs_ep); /* Update current frame number value. */ hsotg->frame_number = dwc2_hsotg_read_frameno(hsotg); @@ -2911,8 +2916,11 @@ static void dwc2_gadget_handle_out_token_ep_disabled(struct dwc2_hsotg_ep *ep) while (dwc2_gadget_target_frame_elapsed(ep)) { hs_req = get_ep_head(ep); - if (hs_req) + if (hs_req) { + hs_req->req.frame_number = ep->target_frame; + hs_req->req.actual = 0; dwc2_hsotg_complete_request(hsotg, ep, hs_req, -ENODATA); + } dwc2_gadget_incr_frame_num(ep); /* Update current frame number value. */ @@ -3001,8 +3009,11 @@ static void dwc2_gadget_handle_nak(struct dwc2_hsotg_ep *hs_ep) while (dwc2_gadget_target_frame_elapsed(hs_ep)) { hs_req = get_ep_head(hs_ep); - if (hs_req) + if (hs_req) { + hs_req->req.frame_number = hs_ep->target_frame; + hs_req->req.actual = 0; dwc2_hsotg_complete_request(hsotg, hs_ep, hs_req, -ENODATA); + } dwc2_gadget_incr_frame_num(hs_ep); /* Update current frame number value. */ @@ -5021,7 +5032,7 @@ int dwc2_hsotg_suspend(struct dwc2_hsotg *hsotg) hsotg->gadget.speed = USB_SPEED_UNKNOWN; spin_unlock_irqrestore(&hsotg->lock, flags); - for (ep = 0; ep < hsotg->num_of_eps; ep++) { + for (ep = 1; ep < hsotg->num_of_eps; ep++) { if (hsotg->eps_in[ep]) dwc2_hsotg_ep_disable_lock(&hsotg->eps_in[ep]->ep); if (hsotg->eps_out[ep]) diff --git a/drivers/usb/dwc2/hcd.c b/drivers/usb/dwc2/hcd.c index 6af1dcbc36564c603847c483f500218b4ef86502..30919f741b7fdfe6ce6279b525f67dbb6a31cf45 100644 --- a/drivers/usb/dwc2/hcd.c +++ b/drivers/usb/dwc2/hcd.c @@ -5074,6 +5074,10 @@ int dwc2_hcd_init(struct dwc2_hsotg *hsotg) hcd->has_tt = 1; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) { + retval = -EINVAL; + goto error1; + } hcd->rsrc_start = res->start; hcd->rsrc_len = resource_size(res); diff --git a/drivers/usb/dwc2/hcd_queue.c b/drivers/usb/dwc2/hcd_queue.c index 68bbac64b7536aec43574d543be8ff758b718fa9..94af71e9856f2e8cce5f89735c4c1c0e9f429ffa 100644 --- a/drivers/usb/dwc2/hcd_queue.c +++ b/drivers/usb/dwc2/hcd_queue.c @@ -59,7 +59,7 @@ #define DWC2_UNRESERVE_DELAY (msecs_to_jiffies(5)) /* If we get a NAK, wait this long before retrying */ -#define DWC2_RETRY_WAIT_DELAY 1*1E6L +#define DWC2_RETRY_WAIT_DELAY (1 * NSEC_PER_MSEC) /** * dwc2_periodic_channel_available() - Checks that a channel is available for a diff --git a/drivers/usb/dwc2/platform.c b/drivers/usb/dwc2/platform.c index 5f18acac7406885927b4d473045aaae94d474470..49d333f02af4e76fe9d355f046b8184c9a2e985f 100644 --- a/drivers/usb/dwc2/platform.c +++ b/drivers/usb/dwc2/platform.c @@ -542,6 +542,9 @@ static int dwc2_driver_probe(struct platform_device *dev) ggpio |= GGPIO_STM32_OTG_GCCFG_IDEN; ggpio |= GGPIO_STM32_OTG_GCCFG_VBDEN; dwc2_writel(hsotg, ggpio, GGPIO); + + /* ID/VBUS detection startup time */ + usleep_range(5000, 7000); } retval = dwc2_drd_init(hsotg); diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index be4690bcf1302c1819392acd4b9cf589222a641b..0bd2f41f13ba078c57870c7d0497b02cb6ae8af2 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -1566,6 +1566,12 @@ static int dwc3_probe(struct platform_device *pdev) dwc3_get_properties(dwc); + if (!dwc->sysdev_is_parent) { + ret = dma_set_mask_and_coherent(dwc->sysdev, DMA_BIT_MASK(64)); + if (ret) + return ret; + } + dwc->reset = devm_reset_control_array_get_optional_shared(dev); if (IS_ERR(dwc->reset)) return PTR_ERR(dwc->reset); diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h index d938f85c58471825aacb2580f7dac64d24792aee..8ac17e4cf5a7b81447dda75b56b784822f44e8aa 100644 --- a/drivers/usb/dwc3/core.h +++ b/drivers/usb/dwc3/core.h @@ -144,7 +144,7 @@ #define DWC3_GHWPARAMS8 0xc600 #define DWC3_GUCTL3 0xc60c #define DWC3_GFLADJ 0xc630 -#define DWC3_GHWPARAMS9 0xc680 +#define DWC3_GHWPARAMS9 0xc6e0 /* Device Registers */ #define DWC3_DCFG 0xc700 @@ -728,6 +728,7 @@ struct dwc3_ep { #define DWC3_EP_FORCE_RESTART_STREAM BIT(9) #define DWC3_EP_FIRST_STREAM_PRIMED BIT(10) #define DWC3_EP_PENDING_CLEAR_STALL BIT(11) +#define DWC3_EP_TXFIFO_RESIZED BIT(12) /* This last one is specific to EP0 */ #define DWC3_EP0_DIR_IN BIT(31) diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c index e5d7eed2e60cff70432a6f1bb22fc2b0ef14d86a..492977514d953eb6f70432e601ee1a4147744bc0 100644 --- a/drivers/usb/dwc3/dwc3-pci.c +++ b/drivers/usb/dwc3/dwc3-pci.c @@ -83,8 +83,8 @@ static const struct acpi_gpio_mapping acpi_dwc3_byt_gpios[] = { static struct gpiod_lookup_table platform_bytcr_gpios = { .dev_id = "0000:00:16.0", .table = { - GPIO_LOOKUP("INT33FC:00", 54, "reset", GPIO_ACTIVE_HIGH), - GPIO_LOOKUP("INT33FC:02", 14, "cs", GPIO_ACTIVE_HIGH), + GPIO_LOOKUP("INT33FC:00", 54, "cs", GPIO_ACTIVE_HIGH), + GPIO_LOOKUP("INT33FC:02", 14, "reset", GPIO_ACTIVE_HIGH), {} }, }; diff --git a/drivers/usb/dwc3/dwc3-qcom.c b/drivers/usb/dwc3/dwc3-qcom.c index f985bc3fedf902ba51882cc91b6e763e56527a7c..504f8af4d0f80ba319276d9c8347a6c0d5b2f38c 100644 --- a/drivers/usb/dwc3/dwc3-qcom.c +++ b/drivers/usb/dwc3/dwc3-qcom.c @@ -644,7 +644,6 @@ static int dwc3_qcom_of_register_core(struct platform_device *pdev) struct dwc3_qcom *qcom = platform_get_drvdata(pdev); struct device_node *np = pdev->dev.of_node, *dwc3_np; struct device *dev = &pdev->dev; - struct property *prop; int ret; dwc3_np = of_get_child_by_name(np, "dwc3"); @@ -653,20 +652,6 @@ static int dwc3_qcom_of_register_core(struct platform_device *pdev) return -ENODEV; } - prop = devm_kzalloc(dev, sizeof(*prop), GFP_KERNEL); - if (!prop) { - ret = -ENOMEM; - dev_err(dev, "unable to allocate memory for property\n"); - goto node_put; - } - - prop->name = "tx-fifo-resize"; - ret = of_add_property(dwc3_np, prop); - if (ret) { - dev_err(dev, "unable to add property\n"); - goto node_put; - } - ret = of_platform_populate(np, NULL, NULL, dev); if (ret) { dev_err(dev, "failed to register dwc3 core - %d\n", ret); @@ -779,9 +764,12 @@ static int dwc3_qcom_probe(struct platform_device *pdev) if (qcom->acpi_pdata->is_urs) { qcom->urs_usb = dwc3_qcom_create_urs_usb_platdev(dev); - if (!qcom->urs_usb) { + if (IS_ERR_OR_NULL(qcom->urs_usb)) { dev_err(dev, "failed to create URS USB platdev\n"); - return -ENODEV; + if (!qcom->urs_usb) + return -ENODEV; + else + return PTR_ERR(qcom->urs_usb); } } } diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 2867c923a8053f9e38ed1b5e21fef122431a9a8b..2f8004d5c0dc2531dd3e984f42f47377ada2e8a0 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -310,13 +310,24 @@ int dwc3_send_gadget_ep_cmd(struct dwc3_ep *dep, unsigned int cmd, if (DWC3_DEPCMD_CMD(cmd) == DWC3_DEPCMD_STARTTRANSFER) { int link_state; + /* + * Initiate remote wakeup if the link state is in U3 when + * operating in SS/SSP or L1/L2 when operating in HS/FS. If the + * link state is in U1/U2, no remote wakeup is needed. The Start + * Transfer command will initiate the link recovery. + */ link_state = dwc3_gadget_get_link_state(dwc); - if (link_state == DWC3_LINK_STATE_U1 || - link_state == DWC3_LINK_STATE_U2 || - link_state == DWC3_LINK_STATE_U3) { + switch (link_state) { + case DWC3_LINK_STATE_U2: + if (dwc->gadget->speed >= USB_SPEED_SUPER) + break; + + fallthrough; + case DWC3_LINK_STATE_U3: ret = __dwc3_gadget_wakeup(dwc); dev_WARN_ONCE(dwc->dev, ret, "wakeup failed --> %d\n", ret); + break; } } @@ -700,6 +711,7 @@ void dwc3_gadget_clear_tx_fifos(struct dwc3 *dwc) DWC31_GTXFIFOSIZ_TXFRAMNUM; dwc3_writel(dwc->regs, DWC3_GTXFIFOSIZ(num >> 1), size); + dep->flags &= ~DWC3_EP_TXFIFO_RESIZED; } dwc->num_ep_resized = 0; } @@ -745,6 +757,10 @@ static int dwc3_gadget_resize_tx_fifos(struct dwc3_ep *dep) if (!usb_endpoint_dir_in(dep->endpoint.desc) || dep->number <= 1) return 0; + /* bail if already resized */ + if (dep->flags & DWC3_EP_TXFIFO_RESIZED) + return 0; + ram1_depth = DWC3_RAM1_DEPTH(dwc->hwparams.hwparams7); if ((dep->endpoint.maxburst > 1 && @@ -805,6 +821,7 @@ static int dwc3_gadget_resize_tx_fifos(struct dwc3_ep *dep) } dwc3_writel(dwc->regs, DWC3_GTXFIFOSIZ(dep->number >> 1), fifo_size); + dep->flags |= DWC3_EP_TXFIFO_RESIZED; dwc->num_ep_resized++; return 0; @@ -993,7 +1010,7 @@ static int __dwc3_gadget_ep_disable(struct dwc3_ep *dep) dep->stream_capable = false; dep->type = 0; - dep->flags = 0; + dep->flags &= DWC3_EP_TXFIFO_RESIZED; return 0; } @@ -1252,6 +1269,19 @@ static void __dwc3_prepare_one_trb(struct dwc3_ep *dep, struct dwc3_trb *trb, if (usb_endpoint_xfer_bulk(dep->endpoint.desc) && dep->stream_capable) trb->ctrl |= DWC3_TRB_CTRL_SID_SOFN(stream_id); + /* + * As per data book 4.2.3.2TRB Control Bit Rules section + * + * The controller autonomously checks the HWO field of a TRB to determine if the + * entire TRB is valid. Therefore, software must ensure that the rest of the TRB + * is valid before setting the HWO field to '1'. In most systems, this means that + * software must update the fourth DWORD of a TRB last. + * + * However there is a possibility of CPU re-ordering here which can cause + * controller to observe the HWO bit set prematurely. + * Add a write memory barrier to prevent CPU re-ordering. + */ + wmb(); trb->ctrl |= DWC3_TRB_CTRL_HWO; dwc3_ep_inc_enq(dep); @@ -3256,6 +3286,9 @@ static bool dwc3_gadget_endpoint_trbs_complete(struct dwc3_ep *dep, struct dwc3 *dwc = dep->dwc; bool no_started_trb = true; + if (!dep->endpoint.desc) + return no_started_trb; + dwc3_gadget_ep_cleanup_completed_requests(dep, event, status); if (dep->flags & DWC3_EP_END_TRANSFER_PENDING) @@ -3303,6 +3336,9 @@ static void dwc3_gadget_endpoint_transfer_in_progress(struct dwc3_ep *dep, { int status = 0; + if (!dep->endpoint.desc) + return; + if (usb_endpoint_xfer_isoc(dep->endpoint.desc)) dwc3_gadget_endpoint_frame_from_event(dep, event); @@ -3356,6 +3392,14 @@ static void dwc3_gadget_endpoint_command_complete(struct dwc3_ep *dep, if (cmd != DWC3_DEPCMD_ENDTRANSFER) return; + /* + * The END_TRANSFER command will cause the controller to generate a + * NoStream Event, and it's not due to the host DP NoStream rejection. + * Ignore the next NoStream event. + */ + if (dep->stream_capable) + dep->flags |= DWC3_EP_IGNORE_NEXT_NOSTREAM; + dep->flags &= ~DWC3_EP_END_TRANSFER_PENDING; dep->flags &= ~DWC3_EP_TRANSFER_STARTED; dwc3_gadget_ep_cleanup_cancelled_requests(dep); @@ -3578,14 +3622,6 @@ void dwc3_stop_active_transfer(struct dwc3_ep *dep, bool force, WARN_ON_ONCE(ret); dep->resource_index = 0; - /* - * The END_TRANSFER command will cause the controller to generate a - * NoStream Event, and it's not due to the host DP NoStream rejection. - * Ignore the next NoStream event. - */ - if (dep->stream_capable) - dep->flags |= DWC3_EP_IGNORE_NEXT_NOSTREAM; - if (!interrupt) dep->flags &= ~DWC3_EP_TRANSFER_STARTED; else @@ -4111,9 +4147,11 @@ static irqreturn_t dwc3_thread_interrupt(int irq, void *_evt) unsigned long flags; irqreturn_t ret = IRQ_NONE; + local_bh_disable(); spin_lock_irqsave(&dwc->lock, flags); ret = dwc3_process_event_buf(evt); spin_unlock_irqrestore(&dwc->lock, flags); + local_bh_enable(); return ret; } diff --git a/drivers/usb/early/xhci-dbc.c b/drivers/usb/early/xhci-dbc.c index be4ecbabdd58694aa7fee6d395bd31fe377b6fc9..6c0434100e38cfa6e8a1dff9e224a17d2c745070 100644 --- a/drivers/usb/early/xhci-dbc.c +++ b/drivers/usb/early/xhci-dbc.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include @@ -136,9 +135,17 @@ static int handshake(void __iomem *ptr, u32 mask, u32 done, int wait, int delay) { u32 result; - return readl_poll_timeout_atomic(ptr, result, - ((result & mask) == done), - delay, wait); + /* Can not use readl_poll_timeout_atomic() for early boot things */ + do { + result = readl(ptr); + result &= mask; + if (result == done) + return 0; + udelay(delay); + wait -= delay; + } while (wait > 0); + + return -ETIMEDOUT; } static void __init xdbc_bios_handoff(void) diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index 504c1cbc255d14a9ba7fefbc1d4f1e10a67f264c..553382ce38378fac3af220a10e61cf4fcfdb91f6 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -1679,6 +1679,18 @@ composite_setup(struct usb_gadget *gadget, const struct usb_ctrlrequest *ctrl) struct usb_function *f = NULL; u8 endp; + if (w_length > USB_COMP_EP0_BUFSIZ) { + if (ctrl->bRequestType & USB_DIR_IN) { + /* Cast away the const, we are going to overwrite on purpose. */ + __le16 *temp = (__le16 *)&ctrl->wLength; + + *temp = cpu_to_le16(USB_COMP_EP0_BUFSIZ); + w_length = USB_COMP_EP0_BUFSIZ; + } else { + goto done; + } + } + /* partial re-init of the response message; the function or the * gadget might need to intercept e.g. a control-OUT completion * when we delegate to it. @@ -1963,6 +1975,9 @@ unknown: if (w_index != 0x5 || (w_value >> 8)) break; interface = w_value & 0xFF; + if (interface >= MAX_CONFIG_INTERFACES || + !os_desc_cfg->interface[interface]) + break; buf[6] = w_index; count = count_ext_prop(os_desc_cfg, interface); @@ -2209,7 +2224,7 @@ int composite_dev_prepare(struct usb_composite_driver *composite, if (!cdev->req) return -ENOMEM; - cdev->req->buf = kmalloc(USB_COMP_EP0_BUFSIZ, GFP_KERNEL); + cdev->req->buf = kzalloc(USB_COMP_EP0_BUFSIZ, GFP_KERNEL); if (!cdev->req->buf) goto fail; diff --git a/drivers/usb/gadget/function/f_accessory.c b/drivers/usb/gadget/function/f_accessory.c index f41214c152850c1468646fcbf514ea84069dfbd5..3510f6d39f0c3be9275a73efcc084c7cb2363945 100644 --- a/drivers/usb/gadget/function/f_accessory.c +++ b/drivers/usb/gadget/function/f_accessory.c @@ -678,8 +678,11 @@ fail: pr_err("acc_bind() could not allocate requests\n"); while ((req = req_get(dev, &dev->tx_idle))) acc_request_free(req, dev->ep_in); - for (i = 0; i < RX_REQ_MAX; i++) + for (i = 0; i < RX_REQ_MAX; i++) { acc_request_free(dev->rx_req[i], dev->ep_out); + dev->rx_req[i] = NULL; + } + return -1; } @@ -711,6 +714,12 @@ static ssize_t acc_read(struct file *fp, char __user *buf, goto done; } + if (!dev->rx_req[0]) { + pr_warn("acc_read: USB request already handled/freed"); + r = -EINVAL; + goto done; + } + /* * Calculate the data length by considering termination character. * Then compansite the difference of rounding up to @@ -922,6 +931,7 @@ static const struct file_operations acc_fops = { .read = acc_read, .write = acc_write, .unlocked_ioctl = acc_ioctl, + .compat_ioctl = acc_ioctl, .open = acc_open, .release = acc_release, }; @@ -1187,8 +1197,10 @@ acc_function_unbind(struct usb_configuration *c, struct usb_function *f) while ((req = req_get(dev, &dev->tx_idle))) acc_request_free(req, dev->ep_in); - for (i = 0; i < RX_REQ_MAX; i++) + for (i = 0; i < RX_REQ_MAX; i++) { acc_request_free(dev->rx_req[i], dev->ep_out); + dev->rx_req[i] = NULL; + } acc_hid_unbind(dev); } diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 725e35167837eb4f87f570b20970812d2cd47928..bb0d92837f6770f804310b25faaa2bd7d5453c4a 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -614,7 +614,7 @@ static int ffs_ep0_open(struct inode *inode, struct file *file) file->private_data = ffs; ffs_data_opened(ffs); - return 0; + return stream_open(inode, file); } static int ffs_ep0_release(struct inode *inode, struct file *file) @@ -1152,7 +1152,7 @@ ffs_epfile_open(struct inode *inode, struct file *file) file->private_data = epfile; ffs_data_opened(epfile->ffs); - return 0; + return stream_open(inode, file); } static int ffs_aio_cancel(struct kiocb *kiocb) @@ -1710,16 +1710,24 @@ static void ffs_data_put(struct ffs_data *ffs) static void ffs_data_closed(struct ffs_data *ffs) { + struct ffs_epfile *epfiles; + unsigned long flags; + ENTER(); if (atomic_dec_and_test(&ffs->opened)) { if (ffs->no_disconnect) { ffs->state = FFS_DEACTIVATED; - if (ffs->epfiles) { - ffs_epfiles_destroy(ffs->epfiles, - ffs->eps_count); - ffs->epfiles = NULL; - } + spin_lock_irqsave(&ffs->eps_lock, flags); + epfiles = ffs->epfiles; + ffs->epfiles = NULL; + spin_unlock_irqrestore(&ffs->eps_lock, + flags); + + if (epfiles) + ffs_epfiles_destroy(epfiles, + ffs->eps_count); + if (ffs->setup_state == FFS_SETUP_PENDING) __ffs_ep0_stall(ffs); } else { @@ -1766,17 +1774,34 @@ static struct ffs_data *ffs_data_new(const char *dev_name) static void ffs_data_clear(struct ffs_data *ffs) { + struct ffs_epfile *epfiles; + unsigned long flags; + ENTER(); ffs_closed(ffs); BUG_ON(ffs->gadget); - if (ffs->epfiles) - ffs_epfiles_destroy(ffs->epfiles, ffs->eps_count); + spin_lock_irqsave(&ffs->eps_lock, flags); + epfiles = ffs->epfiles; + ffs->epfiles = NULL; + spin_unlock_irqrestore(&ffs->eps_lock, flags); - if (ffs->ffs_eventfd) + /* + * potential race possible between ffs_func_eps_disable + * & ffs_epfile_release therefore maintaining a local + * copy of epfile will save us from use-after-free. + */ + if (epfiles) { + ffs_epfiles_destroy(epfiles, ffs->eps_count); + ffs->epfiles = NULL; + } + + if (ffs->ffs_eventfd) { eventfd_ctx_put(ffs->ffs_eventfd); + ffs->ffs_eventfd = NULL; + } kfree(ffs->raw_descs_data); kfree(ffs->raw_strings); @@ -1789,7 +1814,6 @@ static void ffs_data_reset(struct ffs_data *ffs) ffs_data_clear(ffs); - ffs->epfiles = NULL; ffs->raw_descs_data = NULL; ffs->raw_descs = NULL; ffs->raw_strings = NULL; @@ -1918,12 +1942,15 @@ static void ffs_epfiles_destroy(struct ffs_epfile *epfiles, unsigned count) static void ffs_func_eps_disable(struct ffs_function *func) { - struct ffs_ep *ep = func->eps; - struct ffs_epfile *epfile = func->ffs->epfiles; - unsigned count = func->ffs->eps_count; + struct ffs_ep *ep; + struct ffs_epfile *epfile; + unsigned short count; unsigned long flags; spin_lock_irqsave(&func->ffs->eps_lock, flags); + count = func->ffs->eps_count; + epfile = func->ffs->epfiles; + ep = func->eps; while (count--) { /* pending requests get nuked */ if (likely(ep->ep)) @@ -1941,14 +1968,18 @@ static void ffs_func_eps_disable(struct ffs_function *func) static int ffs_func_eps_enable(struct ffs_function *func) { - struct ffs_data *ffs = func->ffs; - struct ffs_ep *ep = func->eps; - struct ffs_epfile *epfile = ffs->epfiles; - unsigned count = ffs->eps_count; + struct ffs_data *ffs; + struct ffs_ep *ep; + struct ffs_epfile *epfile; + unsigned short count; unsigned long flags; int ret = 0; spin_lock_irqsave(&func->ffs->eps_lock, flags); + ffs = func->ffs; + ep = func->eps; + epfile = ffs->epfiles; + count = ffs->eps_count; while(count--) { ep->ep->driver_data = ep; diff --git a/drivers/usb/gadget/function/f_mass_storage.c b/drivers/usb/gadget/function/f_mass_storage.c index 950c9435beec376d7ca81c5e110af83d47ada626..03db2d6b2dbae45834c8e9edfad75d022c93407a 100644 --- a/drivers/usb/gadget/function/f_mass_storage.c +++ b/drivers/usb/gadget/function/f_mass_storage.c @@ -2301,6 +2301,16 @@ static void fsg_disable(struct usb_function *f) { struct fsg_dev *fsg = fsg_from_func(f); + /* Disable the endpoints */ + if (fsg->bulk_in_enabled) { + usb_ep_disable(fsg->bulk_in); + fsg->bulk_in_enabled = 0; + } + if (fsg->bulk_out_enabled) { + usb_ep_disable(fsg->bulk_out); + fsg->bulk_out_enabled = 0; + } + __raise_exception(fsg->common, FSG_STATE_CONFIG_CHANGE, NULL); } @@ -3436,6 +3446,7 @@ static struct usb_function *fsg_alloc(struct usb_function_instance *fi) DECLARE_USB_FUNCTION_INIT(mass_storage, fsg_alloc_inst, fsg_alloc); MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(VFS_internal_I_am_really_a_filesystem_and_am_NOT_a_driver); MODULE_AUTHOR("Michal Nazarewicz"); /************************* Module parameters *************************/ diff --git a/drivers/usb/gadget/function/f_serial.c b/drivers/usb/gadget/function/f_serial.c index 1ed8ff0ac2d310002e1a8c33eabcdab1dca756e8..a9480b9e312e32ab9c8325bd90e2b686a02c134e 100644 --- a/drivers/usb/gadget/function/f_serial.c +++ b/drivers/usb/gadget/function/f_serial.c @@ -345,6 +345,10 @@ static void gser_free(struct usb_function *f) static void gser_unbind(struct usb_configuration *c, struct usb_function *f) { + struct f_gser *gser = func_to_gser(f); + + /* Ensure port is disconnected before unbinding */ + gserial_disconnect(&gser->port); usb_free_all_descriptors(f); } diff --git a/drivers/usb/gadget/function/f_sourcesink.c b/drivers/usb/gadget/function/f_sourcesink.c index 282737e4609ce85cc3ee16b491de22f99f5b65dd..2c65a9bb3c81bc52a5cd073b758b90deba5c714f 100644 --- a/drivers/usb/gadget/function/f_sourcesink.c +++ b/drivers/usb/gadget/function/f_sourcesink.c @@ -583,6 +583,7 @@ static int source_sink_start_ep(struct f_sourcesink *ss, bool is_in, if (is_iso) { switch (speed) { + case USB_SPEED_SUPER_PLUS: case USB_SPEED_SUPER: size = ss->isoc_maxpacket * (ss->isoc_mult + 1) * diff --git a/drivers/usb/gadget/function/f_uac1_legacy.c b/drivers/usb/gadget/function/f_uac1_legacy.c index e2d7f69128a0e85b36088325dba484697280e9bf..8ffd477e79e11bca72b11e0540fbc9fe9c22610c 100644 --- a/drivers/usb/gadget/function/f_uac1_legacy.c +++ b/drivers/usb/gadget/function/f_uac1_legacy.c @@ -1015,4 +1015,5 @@ static struct usb_function *f_audio_alloc(struct usb_function_instance *fi) DECLARE_USB_FUNCTION_INIT(uac1_legacy, f_audio_alloc_inst, f_audio_alloc); MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(VFS_internal_I_am_really_a_filesystem_and_am_NOT_a_driver); MODULE_AUTHOR("Bryan Wu"); diff --git a/drivers/usb/gadget/function/f_uac2.c b/drivers/usb/gadget/function/f_uac2.c index 37c94031af1edd69c66130af1f89db3eb0b6784a..f1ba000ac3f7d645f7073507ca204e37eeea4779 100644 --- a/drivers/usb/gadget/function/f_uac2.c +++ b/drivers/usb/gadget/function/f_uac2.c @@ -177,7 +177,7 @@ static struct uac2_input_terminal_descriptor io_in_it_desc = { .bDescriptorSubtype = UAC_INPUT_TERMINAL, /* .bTerminalID = DYNAMIC */ - .wTerminalType = cpu_to_le16(UAC_INPUT_TERMINAL_UNDEFINED), + .wTerminalType = cpu_to_le16(UAC_INPUT_TERMINAL_MICROPHONE), .bAssocTerminal = 0, /* .bCSourceID = DYNAMIC */ .iChannelNames = 0, @@ -205,7 +205,7 @@ static struct uac2_output_terminal_descriptor io_out_ot_desc = { .bDescriptorSubtype = UAC_OUTPUT_TERMINAL, /* .bTerminalID = DYNAMIC */ - .wTerminalType = cpu_to_le16(UAC_OUTPUT_TERMINAL_UNDEFINED), + .wTerminalType = cpu_to_le16(UAC_OUTPUT_TERMINAL_SPEAKER), .bAssocTerminal = 0, /* .bSourceID = DYNAMIC */ /* .bCSourceID = DYNAMIC */ @@ -593,11 +593,17 @@ static int set_ep_max_packet_size(const struct f_uac2_opts *uac2_opts, ssize = uac2_opts->c_ssize; } - if (!is_playback && (uac2_opts->c_sync == USB_ENDPOINT_SYNC_ASYNC)) + if (!is_playback && (uac2_opts->c_sync == USB_ENDPOINT_SYNC_ASYNC)) { + // Win10 requires max packet size + 1 frame srate = srate * (1000 + uac2_opts->fb_max) / 1000; - - max_size_bw = num_channels(chmask) * ssize * - DIV_ROUND_UP(srate, factor / (1 << (ep_desc->bInterval - 1))); + // updated srate is always bigger, therefore DIV_ROUND_UP always yields +1 + max_size_bw = num_channels(chmask) * ssize * + (DIV_ROUND_UP(srate, factor / (1 << (ep_desc->bInterval - 1)))); + } else { + // adding 1 frame provision for Win10 + max_size_bw = num_channels(chmask) * ssize * + (DIV_ROUND_UP(srate, factor / (1 << (ep_desc->bInterval - 1))) + 1); + } ep_desc->wMaxPacketSize = cpu_to_le16(min_t(u16, max_size_bw, max_size_ep)); diff --git a/drivers/usb/gadget/function/rndis.c b/drivers/usb/gadget/function/rndis.c index 64de9f1b874c55a3fdb4dddb49ecbdc4eec0aa1b..0f14c5291af0742af2b33695711912210297b490 100644 --- a/drivers/usb/gadget/function/rndis.c +++ b/drivers/usb/gadget/function/rndis.c @@ -637,14 +637,17 @@ static int rndis_set_response(struct rndis_params *params, rndis_set_cmplt_type *resp; rndis_resp_t *r; + BufLength = le32_to_cpu(buf->InformationBufferLength); + BufOffset = le32_to_cpu(buf->InformationBufferOffset); + if ((BufLength > RNDIS_MAX_TOTAL_SIZE) || + (BufOffset + 8 >= RNDIS_MAX_TOTAL_SIZE)) + return -EINVAL; + r = rndis_add_response(params, sizeof(rndis_set_cmplt_type)); if (!r) return -ENOMEM; resp = (rndis_set_cmplt_type *)r->buf; - BufLength = le32_to_cpu(buf->InformationBufferLength); - BufOffset = le32_to_cpu(buf->InformationBufferOffset); - #ifdef VERBOSE_DEBUG pr_debug("%s: Length: %d\n", __func__, BufLength); pr_debug("%s: Offset: %d\n", __func__, BufOffset); @@ -919,6 +922,7 @@ struct rndis_params *rndis_register(void (*resp_avail)(void *v), void *v) params->resp_avail = resp_avail; params->v = v; INIT_LIST_HEAD(¶ms->resp_queue); + spin_lock_init(¶ms->resp_lock); pr_debug("%s: configNr = %d\n", __func__, i); return params; @@ -1012,12 +1016,14 @@ void rndis_free_response(struct rndis_params *params, u8 *buf) { rndis_resp_t *r, *n; + spin_lock(¶ms->resp_lock); list_for_each_entry_safe(r, n, ¶ms->resp_queue, list) { if (r->buf == buf) { list_del(&r->list); kfree(r); } } + spin_unlock(¶ms->resp_lock); } EXPORT_SYMBOL_GPL(rndis_free_response); @@ -1027,14 +1033,17 @@ u8 *rndis_get_next_response(struct rndis_params *params, u32 *length) if (!length) return NULL; + spin_lock(¶ms->resp_lock); list_for_each_entry_safe(r, n, ¶ms->resp_queue, list) { if (!r->send) { r->send = 1; *length = r->length; + spin_unlock(¶ms->resp_lock); return r->buf; } } + spin_unlock(¶ms->resp_lock); return NULL; } EXPORT_SYMBOL_GPL(rndis_get_next_response); @@ -1051,7 +1060,9 @@ static rndis_resp_t *rndis_add_response(struct rndis_params *params, u32 length) r->length = length; r->send = 0; + spin_lock(¶ms->resp_lock); list_add_tail(&r->list, ¶ms->resp_queue); + spin_unlock(¶ms->resp_lock); return r; } diff --git a/drivers/usb/gadget/function/rndis.h b/drivers/usb/gadget/function/rndis.h index f6167f7fea82b59da4b0eb40c0fd7effc06bec5c..6206b8b7490f64b0e11dcc71aa2d99691d5d1ac3 100644 --- a/drivers/usb/gadget/function/rndis.h +++ b/drivers/usb/gadget/function/rndis.h @@ -174,6 +174,7 @@ typedef struct rndis_params { void (*resp_avail)(void *v); void *v; struct list_head resp_queue; + spinlock_t resp_lock; } rndis_params; /* RNDIS Message parser and other useless functions */ diff --git a/drivers/usb/gadget/function/storage_common.c b/drivers/usb/gadget/function/storage_common.c index f7e6c42558eb768e58ffcf37a9996e25802cdc36..2451e45ada6ede2753c995f747d87e99d093f145 100644 --- a/drivers/usb/gadget/function/storage_common.c +++ b/drivers/usb/gadget/function/storage_common.c @@ -520,3 +520,4 @@ ssize_t fsg_store_inquiry_string(struct fsg_lun *curlun, const char *buf, EXPORT_SYMBOL_GPL(fsg_store_inquiry_string); MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(VFS_internal_I_am_really_a_filesystem_and_am_NOT_a_driver); diff --git a/drivers/usb/gadget/function/u_audio.c b/drivers/usb/gadget/function/u_audio.c index 5223bc09c1a5b5f21e9d622d32fa60955f62bcbc..e35a66aec941b307a2f76fcab4bcc39369197eb9 100644 --- a/drivers/usb/gadget/function/u_audio.c +++ b/drivers/usb/gadget/function/u_audio.c @@ -76,11 +76,13 @@ static const struct snd_pcm_hardware uac_pcm_hardware = { }; static void u_audio_set_fback_frequency(enum usb_device_speed speed, + struct usb_ep *out_ep, unsigned long long freq, unsigned int pitch, void *buf) { u32 ff = 0; + const struct usb_endpoint_descriptor *ep_desc; /* * Because the pitch base is 1000000, the final divider here @@ -108,8 +110,13 @@ static void u_audio_set_fback_frequency(enum usb_device_speed speed, * byte fromat (that is Q16.16) * * ff = (freq << 16) / 8000 + * + * Win10 and OSX UAC2 drivers require number of samples per packet + * in order to honor the feedback value. + * Linux snd-usb-audio detects the applied bit-shift automatically. */ - freq <<= 4; + ep_desc = out_ep->desc; + freq <<= 4 + (ep_desc->bInterval - 1); } ff = DIV_ROUND_CLOSEST_ULL((freq * pitch), 1953125); @@ -247,7 +254,7 @@ static void u_audio_iso_fback_complete(struct usb_ep *ep, pr_debug("%s: iso_complete status(%d) %d/%d\n", __func__, status, req->actual, req->length); - u_audio_set_fback_frequency(audio_dev->gadget->speed, + u_audio_set_fback_frequency(audio_dev->gadget->speed, audio_dev->out_ep, params->c_srate, prm->pitch, req->buf); @@ -506,7 +513,7 @@ int u_audio_start_capture(struct g_audio *audio_dev) * be meauserd at start of playback */ prm->pitch = 1000000; - u_audio_set_fback_frequency(audio_dev->gadget->speed, + u_audio_set_fback_frequency(audio_dev->gadget->speed, ep, params->c_srate, prm->pitch, req_fback->buf); diff --git a/drivers/usb/gadget/function/u_ether.c b/drivers/usb/gadget/function/u_ether.c index 85a3f6d4b5af34725c30151c0f3c40a8f6f0417e..e1c7826f33e061be07a90836dc2442036eb4feea 100644 --- a/drivers/usb/gadget/function/u_ether.c +++ b/drivers/usb/gadget/function/u_ether.c @@ -861,19 +861,23 @@ int gether_register_netdev(struct net_device *net) { struct eth_dev *dev; struct usb_gadget *g; - struct sockaddr sa; int status; if (!net->dev.parent) return -EINVAL; dev = netdev_priv(net); g = dev->gadget; + + memcpy(net->dev_addr, dev->dev_mac, ETH_ALEN); + net->addr_assign_type = NET_ADDR_RANDOM; + status = register_netdev(net); if (status < 0) { dev_dbg(&g->dev, "register_netdev failed, %d\n", status); return status; } else { INFO(dev, "HOST MAC %pM\n", dev->host_mac); + INFO(dev, "MAC %pM\n", dev->dev_mac); /* two kinds of host-initiated state changes: * - iff DATA transfer is active, carrier is "on" @@ -881,15 +885,6 @@ int gether_register_netdev(struct net_device *net) */ netif_carrier_off(net); } - sa.sa_family = net->type; - memcpy(sa.sa_data, dev->dev_mac, ETH_ALEN); - rtnl_lock(); - status = dev_set_mac_address(net, &sa, NULL); - rtnl_unlock(); - if (status) - pr_warn("cannot set self ethernet address: %d\n", status); - else - INFO(dev, "MAC %pM\n", dev->dev_mac); return status; } diff --git a/drivers/usb/gadget/function/uvc.h b/drivers/usb/gadget/function/uvc.h index 23ee25383c1f708b6af06aec1041f80062bb69cd..893aaa70f81a43633b5ca6b22bf37d6ee1f0dc49 100644 --- a/drivers/usb/gadget/function/uvc.h +++ b/drivers/usb/gadget/function/uvc.h @@ -117,6 +117,7 @@ struct uvc_device { enum uvc_state state; struct usb_function func; struct uvc_video video; + bool func_connected; /* Descriptors */ struct { @@ -147,6 +148,7 @@ static inline struct uvc_device *to_uvc(struct usb_function *f) struct uvc_file_handle { struct v4l2_fh vfh; struct uvc_video *device; + bool is_uvc_app_handle; }; #define to_uvc_file_handle(handle) \ diff --git a/drivers/usb/gadget/function/uvc_v4l2.c b/drivers/usb/gadget/function/uvc_v4l2.c index 4ca89eab61590d2722fb963c0d75cab5eea90308..197c26f7aec6382cf3019bf32d8254e09c1d4d1c 100644 --- a/drivers/usb/gadget/function/uvc_v4l2.c +++ b/drivers/usb/gadget/function/uvc_v4l2.c @@ -227,17 +227,55 @@ static int uvc_v4l2_subscribe_event(struct v4l2_fh *fh, const struct v4l2_event_subscription *sub) { + struct uvc_device *uvc = video_get_drvdata(fh->vdev); + struct uvc_file_handle *handle = to_uvc_file_handle(fh); + int ret; + if (sub->type < UVC_EVENT_FIRST || sub->type > UVC_EVENT_LAST) return -EINVAL; - return v4l2_event_subscribe(fh, sub, 2, NULL); + if (sub->type == UVC_EVENT_SETUP && uvc->func_connected) + return -EBUSY; + + ret = v4l2_event_subscribe(fh, sub, 2, NULL); + if (ret < 0) + return ret; + + if (sub->type == UVC_EVENT_SETUP) { + uvc->func_connected = true; + handle->is_uvc_app_handle = true; + uvc_function_connect(uvc); + } + + return 0; +} + +static void uvc_v4l2_disable(struct uvc_device *uvc) +{ + uvc->func_connected = false; + uvc_function_disconnect(uvc); + uvcg_video_enable(&uvc->video, 0); + uvcg_free_buffers(&uvc->video.queue); } static int uvc_v4l2_unsubscribe_event(struct v4l2_fh *fh, const struct v4l2_event_subscription *sub) { - return v4l2_event_unsubscribe(fh, sub); + struct uvc_device *uvc = video_get_drvdata(fh->vdev); + struct uvc_file_handle *handle = to_uvc_file_handle(fh); + int ret; + + ret = v4l2_event_unsubscribe(fh, sub); + if (ret < 0) + return ret; + + if (sub->type == UVC_EVENT_SETUP && handle->is_uvc_app_handle) { + uvc_v4l2_disable(uvc); + handle->is_uvc_app_handle = false; + } + + return 0; } static long @@ -292,7 +330,6 @@ uvc_v4l2_open(struct file *file) handle->device = &uvc->video; file->private_data = &handle->vfh; - uvc_function_connect(uvc); return 0; } @@ -304,11 +341,9 @@ uvc_v4l2_release(struct file *file) struct uvc_file_handle *handle = to_uvc_file_handle(file->private_data); struct uvc_video *video = handle->device; - uvc_function_disconnect(uvc); - mutex_lock(&video->mutex); - uvcg_video_enable(video, 0); - uvcg_free_buffers(&video->queue); + if (handle->is_uvc_app_handle) + uvc_v4l2_disable(uvc); mutex_unlock(&video->mutex); file->private_data = NULL; diff --git a/drivers/usb/gadget/legacy/dbgp.c b/drivers/usb/gadget/legacy/dbgp.c index e1d566c9918ae576d55fc322f317d2992193b7f1..6bcbad382580203e0aafbe012c202c3587297a3a 100644 --- a/drivers/usb/gadget/legacy/dbgp.c +++ b/drivers/usb/gadget/legacy/dbgp.c @@ -137,7 +137,7 @@ static int dbgp_enable_ep_req(struct usb_ep *ep) goto fail_1; } - req->buf = kmalloc(DBGP_REQ_LEN, GFP_KERNEL); + req->buf = kzalloc(DBGP_REQ_LEN, GFP_KERNEL); if (!req->buf) { err = -ENOMEM; stp = 2; @@ -345,6 +345,19 @@ static int dbgp_setup(struct usb_gadget *gadget, void *data = NULL; u16 len = 0; + if (length > DBGP_REQ_LEN) { + if (ctrl->bRequestType & USB_DIR_IN) { + /* Cast away the const, we are going to overwrite on purpose. */ + __le16 *temp = (__le16 *)&ctrl->wLength; + + *temp = cpu_to_le16(DBGP_REQ_LEN); + length = DBGP_REQ_LEN; + } else { + return err; + } + } + + if (request == USB_REQ_GET_DESCRIPTOR) { switch (value>>8) { case USB_DT_DEVICE: diff --git a/drivers/usb/gadget/legacy/hid.c b/drivers/usb/gadget/legacy/hid.c index 5b27d289443fe2af1feb815243a7b69add588341..3912cc805f3af08cdf12af22396fa86f1a998a25 100644 --- a/drivers/usb/gadget/legacy/hid.c +++ b/drivers/usb/gadget/legacy/hid.c @@ -99,8 +99,10 @@ static int do_config(struct usb_configuration *c) list_for_each_entry(e, &hidg_func_list, node) { e->f = usb_get_function(e->fi); - if (IS_ERR(e->f)) + if (IS_ERR(e->f)) { + status = PTR_ERR(e->f); goto put; + } status = usb_add_function(c, e->f); if (status < 0) { usb_put_function(e->f); diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c index 71e7d10dd76b90b458582d69334662a9446005ff..454860d52ce77f96b3ea047d37e90da6dc289fe0 100644 --- a/drivers/usb/gadget/legacy/inode.c +++ b/drivers/usb/gadget/legacy/inode.c @@ -110,6 +110,8 @@ enum ep0_state { /* enough for the whole queue: most events invalidate others */ #define N_EVENT 5 +#define RBUF_SIZE 256 + struct dev_data { spinlock_t lock; refcount_t count; @@ -144,7 +146,7 @@ struct dev_data { struct dentry *dentry; /* except this scratch i/o buffer for ep0 */ - u8 rbuf [256]; + u8 rbuf[RBUF_SIZE]; }; static inline void get_dev (struct dev_data *data) @@ -1333,6 +1335,18 @@ gadgetfs_setup (struct usb_gadget *gadget, const struct usb_ctrlrequest *ctrl) u16 w_value = le16_to_cpu(ctrl->wValue); u16 w_length = le16_to_cpu(ctrl->wLength); + if (w_length > RBUF_SIZE) { + if (ctrl->bRequestType & USB_DIR_IN) { + /* Cast away the const, we are going to overwrite on purpose. */ + __le16 *temp = (__le16 *)&ctrl->wLength; + + *temp = cpu_to_le16(RBUF_SIZE); + w_length = RBUF_SIZE; + } else { + return value; + } + } + spin_lock (&dev->lock); dev->setup_abort = 0; if (dev->state == STATE_DEV_UNCONNECTED) { @@ -1814,8 +1828,9 @@ dev_config (struct file *fd, const char __user *buf, size_t len, loff_t *ptr) spin_lock_irq (&dev->lock); value = -EINVAL; if (dev->buf) { + spin_unlock_irq(&dev->lock); kfree(kbuf); - goto fail; + return value; } dev->buf = kbuf; @@ -1862,8 +1877,8 @@ dev_config (struct file *fd, const char __user *buf, size_t len, loff_t *ptr) value = usb_gadget_probe_driver(&gadgetfs_driver); if (value != 0) { - kfree (dev->buf); - dev->buf = NULL; + spin_lock_irq(&dev->lock); + goto fail; } else { /* at this point "good" hardware has for the first time * let the USB the host see us. alternatively, if users @@ -1880,6 +1895,9 @@ dev_config (struct file *fd, const char __user *buf, size_t len, loff_t *ptr) return value; fail: + dev->config = NULL; + dev->hs_config = NULL; + dev->dev = NULL; spin_unlock_irq (&dev->lock); pr_debug ("%s: %s fail %zd, %p\n", shortname, __func__, value, dev); kfree (dev->buf); diff --git a/drivers/usb/gadget/legacy/raw_gadget.c b/drivers/usb/gadget/legacy/raw_gadget.c index 062dfac3039968d3f09868141aed177f91d8e5d4..33efa6915b91dc0745f1e0d12816c935c2f72822 100644 --- a/drivers/usb/gadget/legacy/raw_gadget.c +++ b/drivers/usb/gadget/legacy/raw_gadget.c @@ -1003,7 +1003,7 @@ static int raw_process_ep_io(struct raw_dev *dev, struct usb_raw_ep_io *io, ret = -EBUSY; goto out_unlock; } - if ((in && !ep->ep->caps.dir_in) || (!in && ep->ep->caps.dir_in)) { + if (in != usb_endpoint_dir_in(ep->ep->desc)) { dev_dbg(&dev->gadget->dev, "fail, wrong direction\n"); ret = -EINVAL; goto out_unlock; diff --git a/drivers/usb/gadget/udc/Kconfig b/drivers/usb/gadget/udc/Kconfig index 1a12aab208b465d32cbf2b4d80de61c1a54cc14a..933e80d5053ac80b456e88e2aa23e42729af8afa 100644 --- a/drivers/usb/gadget/udc/Kconfig +++ b/drivers/usb/gadget/udc/Kconfig @@ -330,6 +330,7 @@ config USB_AMD5536UDC config USB_FSL_QE tristate "Freescale QE/CPM USB Device Controller" depends on FSL_SOC && (QUICC_ENGINE || CPM) + depends on !64BIT || BROKEN help Some of Freescale PowerPC processors have a Full Speed QE/CPM2 USB controller, which support device mode with 4 diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c index edb83c8825de9e7be862881bc647b06a07cf4b41..24d0ace3ed3c2ba81925beecae60f5e26d6571f2 100644 --- a/drivers/usb/gadget/udc/core.c +++ b/drivers/usb/gadget/udc/core.c @@ -89,7 +89,7 @@ EXPORT_SYMBOL_GPL(usb_ep_set_maxpacket_limit); * configurable, with more generic names like "ep-a". (remember that for * USB, "in" means "towards the USB host".) * - * This routine must be called in process context. + * This routine may be called in an atomic (interrupt) context. * * returns zero, or a negative error code. */ @@ -134,7 +134,7 @@ EXPORT_SYMBOL_GPL(usb_ep_enable); * gadget drivers must call usb_ep_enable() again before queueing * requests to the endpoint. * - * This routine must be called in process context. + * This routine may be called in an atomic (interrupt) context. * * returns zero, or a negative error code. */ diff --git a/drivers/usb/gadget/udc/renesas_usb3.c b/drivers/usb/gadget/udc/renesas_usb3.c index 57d417a7c3e0a687e9fbc11b604b7bdb93aedc8f..601829a6b4badd7f0fd98805edcb0659f28f4a0f 100644 --- a/drivers/usb/gadget/udc/renesas_usb3.c +++ b/drivers/usb/gadget/udc/renesas_usb3.c @@ -2378,6 +2378,8 @@ static void handle_ext_role_switch_states(struct device *dev, switch (role) { case USB_ROLE_NONE: usb3->connection_state = USB_ROLE_NONE; + if (cur_role == USB_ROLE_HOST) + device_release_driver(host); if (usb3->driver) usb3_disconnect(usb3); usb3_vbus_out(usb3, false); diff --git a/drivers/usb/gadget/udc/udc-xilinx.c b/drivers/usb/gadget/udc/udc-xilinx.c index 77610b5f7db5b35ffd0f005c640a4b80ad62c28e..78561630a9d7dc6cbcf3ad03f1ca0ddd4bd710b8 100644 --- a/drivers/usb/gadget/udc/udc-xilinx.c +++ b/drivers/usb/gadget/udc/udc-xilinx.c @@ -1612,6 +1612,8 @@ static void xudc_getstatus(struct xusb_udc *udc) break; case USB_RECIP_ENDPOINT: epnum = udc->setup.wIndex & USB_ENDPOINT_NUMBER_MASK; + if (epnum >= XUSB_MAX_ENDPOINTS) + goto stall; target_ep = &udc->ep[epnum]; epcfgreg = udc->read_fn(udc->addr + target_ep->offset); halt = epcfgreg & XUSB_EP_CFG_STALL_MASK; @@ -1679,6 +1681,10 @@ static void xudc_set_clear_feature(struct xusb_udc *udc) case USB_RECIP_ENDPOINT: if (!udc->setup.wValue) { endpoint = udc->setup.wIndex & USB_ENDPOINT_NUMBER_MASK; + if (endpoint >= XUSB_MAX_ENDPOINTS) { + xudc_ep0_stall(udc); + return; + } target_ep = &udc->ep[endpoint]; outinbit = udc->setup.wIndex & USB_ENDPOINT_DIR_MASK; outinbit = outinbit >> 7; diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c index 6793fd99c1cb42be982e4f14d9aecf013984f078..8aff19ff8e8f0e0c55d6d047f6f6e7b502f1a3c7 100644 --- a/drivers/usb/host/ehci-hcd.c +++ b/drivers/usb/host/ehci-hcd.c @@ -634,7 +634,16 @@ static int ehci_run (struct usb_hcd *hcd) /* Wait until HC become operational */ ehci_readl(ehci, &ehci->regs->command); /* unblock posted writes */ msleep(5); - rc = ehci_handshake(ehci, &ehci->regs->status, STS_HALT, 0, 100 * 1000); + + /* For Aspeed, STS_HALT also depends on ASS/PSS status. + * Check CMD_RUN instead. + */ + if (ehci->is_aspeed) + rc = ehci_handshake(ehci, &ehci->regs->command, CMD_RUN, + 1, 100 * 1000); + else + rc = ehci_handshake(ehci, &ehci->regs->status, STS_HALT, + 0, 100 * 1000); up_write(&ehci_cf_port_reset_rwsem); diff --git a/drivers/usb/host/ehci-platform.c b/drivers/usb/host/ehci-platform.c index a48dd3fac15373e6287242bc43c26a7fade3f7cf..2dcfc67f2ba814aa8dd2bb04ddf9904af37a3dcc 100644 --- a/drivers/usb/host/ehci-platform.c +++ b/drivers/usb/host/ehci-platform.c @@ -294,6 +294,12 @@ static int ehci_platform_probe(struct platform_device *dev) "has-transaction-translator")) hcd->has_tt = 1; + if (of_device_is_compatible(dev->dev.of_node, + "aspeed,ast2500-ehci") || + of_device_is_compatible(dev->dev.of_node, + "aspeed,ast2600-ehci")) + ehci->is_aspeed = 1; + if (soc_device_match(quirk_poll_match)) priv->quirk_poll = true; diff --git a/drivers/usb/host/ehci.h b/drivers/usb/host/ehci.h index eabf22a78eae037ca51d7041d7d8ea42b2e07e42..59fd523c55f305becfa10f230b865ca789ebfdcb 100644 --- a/drivers/usb/host/ehci.h +++ b/drivers/usb/host/ehci.h @@ -218,6 +218,7 @@ struct ehci_hcd { /* one per controller */ unsigned frame_index_bug:1; /* MosChip (AKA NetMos) */ unsigned need_oc_pp_cycle:1; /* MPC834X port power */ unsigned imx28_write_fix:1; /* For Freescale i.MX28 */ + unsigned is_aspeed:1; /* required for usb32 quirk */ #define OHCI_CTRL_HCFS (3 << 6) diff --git a/drivers/usb/host/max3421-hcd.c b/drivers/usb/host/max3421-hcd.c index c86d413226eb9ec053fe10c3e4160f3b61674c52..b875da01c530907c6a26a89bb937b69b8fbbdfc3 100644 --- a/drivers/usb/host/max3421-hcd.c +++ b/drivers/usb/host/max3421-hcd.c @@ -125,8 +125,6 @@ struct max3421_hcd { struct task_struct *spi_thread; - struct max3421_hcd *next; - enum max3421_rh_state rh_state; /* lower 16 bits contain port status, upper 16 bits the change mask: */ u32 port_status; @@ -174,8 +172,6 @@ struct max3421_ep { u8 retransmit; /* packet needs retransmission */ }; -static struct max3421_hcd *max3421_hcd_list; - #define MAX3421_FIFO_SIZE 64 #define MAX3421_SPI_DIR_RD 0 /* read register from MAX3421 */ @@ -1882,9 +1878,8 @@ max3421_probe(struct spi_device *spi) } set_bit(HCD_FLAG_POLL_RH, &hcd->flags); max3421_hcd = hcd_to_max3421(hcd); - max3421_hcd->next = max3421_hcd_list; - max3421_hcd_list = max3421_hcd; INIT_LIST_HEAD(&max3421_hcd->ep_list); + spi_set_drvdata(spi, max3421_hcd); max3421_hcd->tx = kmalloc(sizeof(*max3421_hcd->tx), GFP_KERNEL); if (!max3421_hcd->tx) @@ -1934,28 +1929,18 @@ error: static int max3421_remove(struct spi_device *spi) { - struct max3421_hcd *max3421_hcd = NULL, **prev; - struct usb_hcd *hcd = NULL; + struct max3421_hcd *max3421_hcd; + struct usb_hcd *hcd; unsigned long flags; - for (prev = &max3421_hcd_list; *prev; prev = &(*prev)->next) { - max3421_hcd = *prev; - hcd = max3421_to_hcd(max3421_hcd); - if (hcd->self.controller == &spi->dev) - break; - } - if (!max3421_hcd) { - dev_err(&spi->dev, "no MAX3421 HCD found for SPI device %p\n", - spi); - return -ENODEV; - } + max3421_hcd = spi_get_drvdata(spi); + hcd = max3421_to_hcd(max3421_hcd); usb_remove_hcd(hcd); spin_lock_irqsave(&max3421_hcd->lock, flags); kthread_stop(max3421_hcd->spi_thread); - *prev = max3421_hcd->next; spin_unlock_irqrestore(&max3421_hcd->lock, flags); diff --git a/drivers/usb/host/ohci-tmio.c b/drivers/usb/host/ohci-tmio.c index 08ec2ab0d95a58901436aec69ecec36fce537b19..3f3d62dc067469279f26084bd665ec9e13c165da 100644 --- a/drivers/usb/host/ohci-tmio.c +++ b/drivers/usb/host/ohci-tmio.c @@ -199,7 +199,7 @@ static int ohci_hcd_tmio_drv_probe(struct platform_device *dev) if (usb_disabled()) return -ENODEV; - if (!cell) + if (!cell || !regs || !config || !sram) return -EINVAL; if (irq < 0) diff --git a/drivers/usb/host/uhci-platform.c b/drivers/usb/host/uhci-platform.c index 70dbd95c3f063a43db2035df59b152e66ea45984..be9e9db7cad1048f75aab2c01eb994a5171a6577 100644 --- a/drivers/usb/host/uhci-platform.c +++ b/drivers/usb/host/uhci-platform.c @@ -113,7 +113,8 @@ static int uhci_hcd_platform_probe(struct platform_device *pdev) num_ports); } if (of_device_is_compatible(np, "aspeed,ast2400-uhci") || - of_device_is_compatible(np, "aspeed,ast2500-uhci")) { + of_device_is_compatible(np, "aspeed,ast2500-uhci") || + of_device_is_compatible(np, "aspeed,ast2600-uhci")) { uhci->is_aspeed = 1; dev_info(&pdev->dev, "Enabled Aspeed implementation workarounds\n"); diff --git a/drivers/usb/host/xhci-dbgtty.c b/drivers/usb/host/xhci-dbgtty.c index ae4e4ab638b5596851a0f250366a66dee57bfa81..2a53b283199998d205f5bcbc637df0772eb41a0a 100644 --- a/drivers/usb/host/xhci-dbgtty.c +++ b/drivers/usb/host/xhci-dbgtty.c @@ -408,40 +408,38 @@ static int xhci_dbc_tty_register_device(struct xhci_dbc *dbc) return -EBUSY; xhci_dbc_tty_init_port(dbc, port); - tty_dev = tty_port_register_device(&port->port, - dbc_tty_driver, 0, NULL); - if (IS_ERR(tty_dev)) { - ret = PTR_ERR(tty_dev); - goto register_fail; - } ret = kfifo_alloc(&port->write_fifo, DBC_WRITE_BUF_SIZE, GFP_KERNEL); if (ret) - goto buf_alloc_fail; + goto err_exit_port; ret = xhci_dbc_alloc_requests(dbc, BULK_IN, &port->read_pool, dbc_read_complete); if (ret) - goto request_fail; + goto err_free_fifo; ret = xhci_dbc_alloc_requests(dbc, BULK_OUT, &port->write_pool, dbc_write_complete); if (ret) - goto request_fail; + goto err_free_requests; + + tty_dev = tty_port_register_device(&port->port, + dbc_tty_driver, 0, NULL); + if (IS_ERR(tty_dev)) { + ret = PTR_ERR(tty_dev); + goto err_free_requests; + } port->registered = true; return 0; -request_fail: +err_free_requests: xhci_dbc_free_requests(&port->read_pool); xhci_dbc_free_requests(&port->write_pool); +err_free_fifo: kfifo_free(&port->write_fifo); - -buf_alloc_fail: - tty_unregister_device(dbc_tty_driver, 0); - -register_fail: +err_exit_port: xhci_dbc_tty_exit_port(port); dev_err(dbc->dev, "can't register tty port, err %d\n", ret); diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index 15d543f1d93d19d7515e2cba15df9c2da4f18a5f..e0cd2825fcdcec662194248dfd42f1409007b775 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -171,7 +171,6 @@ static void xhci_common_hub_descriptor(struct xhci_hcd *xhci, { u16 temp; - desc->bPwrOn2PwrGood = 10; /* xhci section 5.4.9 says 20ms max */ desc->bHubContrCurrent = 0; desc->bNbrPorts = ports; @@ -206,6 +205,7 @@ static void xhci_usb2_hub_descriptor(struct usb_hcd *hcd, struct xhci_hcd *xhci, desc->bDescriptorType = USB_DT_HUB; temp = 1 + (ports / 8); desc->bDescLength = USB_DT_HUB_NONVAR_SIZE + 2 * temp; + desc->bPwrOn2PwrGood = 10; /* xhci section 5.4.8 says 20ms */ /* The Device Removable bits are reported on a byte granularity. * If the port doesn't exist within that byte, the bit is set to 0. @@ -258,6 +258,7 @@ static void xhci_usb3_hub_descriptor(struct usb_hcd *hcd, struct xhci_hcd *xhci, xhci_common_hub_descriptor(xhci, desc, ports); desc->bDescriptorType = USB_DT_SS_HUB; desc->bDescLength = USB_DT_SS_HUB_SIZE; + desc->bPwrOn2PwrGood = 50; /* usb 3.1 may fail if less than 100ms */ /* header decode latency should be zero for roothubs, * see section 4.23.5.2. @@ -635,6 +636,7 @@ static int xhci_enter_test_mode(struct xhci_hcd *xhci, continue; retval = xhci_disable_slot(xhci, i); + xhci_free_virt_device(xhci, i); if (retval) xhci_err(xhci, "Failed to disable slot %d, %d. Enter test mode anyway\n", i, retval); diff --git a/drivers/usb/host/xhci-mtk-sch.c b/drivers/usb/host/xhci-mtk-sch.c index 8b90da5a6ed1958cde3d6fc521ed9cc32eb7f47d..5e003baabac91f0564bf7a25632e6fc503af2383 100644 --- a/drivers/usb/host/xhci-mtk-sch.c +++ b/drivers/usb/host/xhci-mtk-sch.c @@ -590,10 +590,12 @@ static u32 get_esit_boundary(struct mu3h_sch_ep_info *sch_ep) u32 boundary = sch_ep->esit; if (sch_ep->sch_tt) { /* LS/FS with TT */ - /* tune for CS */ - if (sch_ep->ep_type != ISOC_OUT_EP) - boundary++; - else if (boundary > 1) /* normally esit >= 8 for FS/LS */ + /* + * tune for CS, normally esit >= 8 for FS/LS, + * not add one for other types to avoid access array + * out of boundary + */ + if (sch_ep->ep_type == ISOC_OUT_EP && boundary > 1) boundary--; } diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 119d1a8fbb1940c42724c5190282e765a247208c..dafb58f05c9fbf95e8d3eba1db3662f4145b21f4 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -30,6 +30,7 @@ #define PCI_VENDOR_ID_FRESCO_LOGIC 0x1b73 #define PCI_DEVICE_ID_FRESCO_LOGIC_PDK 0x1000 #define PCI_DEVICE_ID_FRESCO_LOGIC_FL1009 0x1009 +#define PCI_DEVICE_ID_FRESCO_LOGIC_FL1100 0x1100 #define PCI_DEVICE_ID_FRESCO_LOGIC_FL1400 0x1400 #define PCI_VENDOR_ID_ETRON 0x1b6f @@ -63,6 +64,15 @@ #define PCI_DEVICE_ID_AMD_PROMONTORYA_3 0x43ba #define PCI_DEVICE_ID_AMD_PROMONTORYA_2 0x43bb #define PCI_DEVICE_ID_AMD_PROMONTORYA_1 0x43bc +#define PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_1 0x161a +#define PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_2 0x161b +#define PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_3 0x161d +#define PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_4 0x161e +#define PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_5 0x15d6 +#define PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_6 0x15d7 +#define PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_7 0x161c +#define PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_8 0x161f + #define PCI_DEVICE_ID_ASMEDIA_1042_XHCI 0x1042 #define PCI_DEVICE_ID_ASMEDIA_1042A_XHCI 0x1142 #define PCI_DEVICE_ID_ASMEDIA_1142_XHCI 0x1242 @@ -146,6 +156,10 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) pdev->device == PCI_DEVICE_ID_FRESCO_LOGIC_FL1009) xhci->quirks |= XHCI_BROKEN_STREAMS; + if (pdev->vendor == PCI_VENDOR_ID_FRESCO_LOGIC && + pdev->device == PCI_DEVICE_ID_FRESCO_LOGIC_FL1100) + xhci->quirks |= XHCI_TRUST_TX_LENGTH; + if (pdev->vendor == PCI_VENDOR_ID_NEC) xhci->quirks |= XHCI_NEC_HOST; @@ -274,8 +288,10 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) pdev->device == 0x3432) xhci->quirks |= XHCI_BROKEN_STREAMS; - if (pdev->vendor == PCI_VENDOR_ID_VIA && pdev->device == 0x3483) + if (pdev->vendor == PCI_VENDOR_ID_VIA && pdev->device == 0x3483) { xhci->quirks |= XHCI_LPM_SUPPORT; + xhci->quirks |= XHCI_EP_CTX_BROKEN_DCS; + } if (pdev->vendor == PCI_VENDOR_ID_ASMEDIA && pdev->device == PCI_DEVICE_ID_ASMEDIA_1042_XHCI) @@ -308,6 +324,17 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) pdev->device == PCI_DEVICE_ID_AMD_PROMONTORYA_4)) xhci->quirks |= XHCI_NO_SOFT_RETRY; + if (pdev->vendor == PCI_VENDOR_ID_AMD && + (pdev->device == PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_1 || + pdev->device == PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_2 || + pdev->device == PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_3 || + pdev->device == PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_4 || + pdev->device == PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_5 || + pdev->device == PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_6 || + pdev->device == PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_7 || + pdev->device == PCI_DEVICE_ID_AMD_YELLOW_CARP_XHCI_8)) + xhci->quirks |= XHCI_DEFAULT_PM_RUNTIME_ALLOW; + if (xhci->quirks & XHCI_RESET_ON_RESUME) xhci_dbg_trace(xhci, trace_xhci_dbg_quirks, "QUIRK: Resetting on resume"); diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c index 21280a6d589e947d9e5c00641df59d6a5477a04d..01dcfd77494f948069e115c89c7850b6e092a80a 100644 --- a/drivers/usb/host/xhci-plat.c +++ b/drivers/usb/host/xhci-plat.c @@ -478,6 +478,9 @@ static int __maybe_unused xhci_plat_suspend(struct device *dev) struct xhci_hcd *xhci = hcd_to_xhci(hcd); int ret; + if (pm_runtime_suspended(dev)) + pm_runtime_resume(dev); + ret = xhci_priv_suspend_quirk(hcd); if (ret) return ret; diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index ee8a0232e731743dc193c684429cb1f991140a68..c85f36ed67d6a867fdb5902ed95d108104764adf 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -368,16 +368,29 @@ static void xhci_handle_stopped_cmd_ring(struct xhci_hcd *xhci, /* Must be called with xhci->lock held, releases and aquires lock back */ static int xhci_abort_cmd_ring(struct xhci_hcd *xhci, unsigned long flags) { - u64 temp_64; + struct xhci_segment *new_seg = xhci->cmd_ring->deq_seg; + union xhci_trb *new_deq = xhci->cmd_ring->dequeue; + u64 crcr; int ret; xhci_dbg(xhci, "Abort command ring\n"); reinit_completion(&xhci->cmd_ring_stop_completion); - temp_64 = xhci_read_64(xhci, &xhci->op_regs->cmd_ring); - xhci_write_64(xhci, temp_64 | CMD_RING_ABORT, - &xhci->op_regs->cmd_ring); + /* + * The control bits like command stop, abort are located in lower + * dword of the command ring control register. + * Some controllers require all 64 bits to be written to abort the ring. + * Make sure the upper dword is valid, pointing to the next command, + * avoiding corrupting the command ring pointer in case the command ring + * is stopped by the time the upper dword is written. + */ + next_trb(xhci, NULL, &new_seg, &new_deq); + if (trb_is_link(new_deq)) + next_trb(xhci, NULL, &new_seg, &new_deq); + + crcr = xhci_trb_virt_to_dma(new_seg, new_deq); + xhci_write_64(xhci, crcr | CMD_RING_ABORT, &xhci->op_regs->cmd_ring); /* Section 4.6.1.2 of xHCI 1.0 spec says software should also time the * completion of the Command Abort operation. If CRR is not negated in 5 @@ -561,8 +574,11 @@ static int xhci_move_dequeue_past_td(struct xhci_hcd *xhci, struct xhci_ring *ep_ring; struct xhci_command *cmd; struct xhci_segment *new_seg; + struct xhci_segment *halted_seg = NULL; union xhci_trb *new_deq; int new_cycle; + union xhci_trb *halted_trb; + int index = 0; dma_addr_t addr; u64 hw_dequeue; bool cycle_found = false; @@ -600,7 +616,27 @@ static int xhci_move_dequeue_past_td(struct xhci_hcd *xhci, hw_dequeue = xhci_get_hw_deq(xhci, dev, ep_index, stream_id); new_seg = ep_ring->deq_seg; new_deq = ep_ring->dequeue; - new_cycle = hw_dequeue & 0x1; + + /* + * Quirk: xHC write-back of the DCS field in the hardware dequeue + * pointer is wrong - use the cycle state of the TRB pointed to by + * the dequeue pointer. + */ + if (xhci->quirks & XHCI_EP_CTX_BROKEN_DCS && + !(ep->ep_state & EP_HAS_STREAMS)) + halted_seg = trb_in_td(xhci, td->start_seg, + td->first_trb, td->last_trb, + hw_dequeue & ~0xf, false); + if (halted_seg) { + index = ((dma_addr_t)(hw_dequeue & ~0xf) - halted_seg->dma) / + sizeof(*halted_trb); + halted_trb = &halted_seg->trbs[index]; + new_cycle = halted_trb->generic.field[3] & 0x1; + xhci_dbg(xhci, "Endpoint DCS = %d TRB index = %d cycle = %d\n", + (u8)(hw_dequeue & 0x1), index, new_cycle); + } else { + new_cycle = hw_dequeue & 0x1; + } /* * We want to find the pointer, segment and cycle state of the new trb @@ -944,17 +980,21 @@ static int xhci_invalidate_cancelled_tds(struct xhci_virt_ep *ep) td->urb->stream_id); hw_deq &= ~0xf; - if (td->cancel_status == TD_HALTED) { - cached_td = td; - } else if (trb_in_td(xhci, td->start_seg, td->first_trb, - td->last_trb, hw_deq, false)) { + if (td->cancel_status == TD_HALTED || + trb_in_td(xhci, td->start_seg, td->first_trb, td->last_trb, hw_deq, false)) { switch (td->cancel_status) { case TD_CLEARED: /* TD is already no-op */ case TD_CLEARING_CACHE: /* set TR deq command already queued */ break; case TD_DIRTY: /* TD is cached, clear it */ case TD_HALTED: - /* FIXME stream case, several stopped rings */ + td->cancel_status = TD_CLEARING_CACHE; + if (cached_td) + /* FIXME stream case, several stopped rings */ + xhci_dbg(xhci, + "Move dq past stream %u URB %p instead of stream %u URB %p\n", + td->urb->stream_id, td->urb, + cached_td->urb->stream_id, cached_td->urb); cached_td = td; break; } @@ -963,18 +1003,24 @@ static int xhci_invalidate_cancelled_tds(struct xhci_virt_ep *ep) td->cancel_status = TD_CLEARED; } } - if (cached_td) { - cached_td->cancel_status = TD_CLEARING_CACHE; - err = xhci_move_dequeue_past_td(xhci, slot_id, ep->ep_index, - cached_td->urb->stream_id, - cached_td); - /* Failed to move past cached td, try just setting it noop */ - if (err) { - td_to_noop(xhci, ring, cached_td, false); - cached_td->cancel_status = TD_CLEARED; + /* If there's no need to move the dequeue pointer then we're done */ + if (!cached_td) + return 0; + + err = xhci_move_dequeue_past_td(xhci, slot_id, ep->ep_index, + cached_td->urb->stream_id, + cached_td); + if (err) { + /* Failed to move past cached td, just set cached TDs to no-op */ + list_for_each_entry_safe(td, tmp_td, &ep->cancelled_td_list, cancelled_td_list) { + if (td->cancel_status != TD_CLEARING_CACHE) + continue; + xhci_dbg(xhci, "Failed to clear cancelled cached URB %p, mark clear anyway\n", + td->urb); + td_to_noop(xhci, ring, td, false); + td->cancel_status = TD_CLEARED; } - cached_td = NULL; } return 0; } @@ -1463,7 +1509,6 @@ static void xhci_handle_cmd_disable_slot(struct xhci_hcd *xhci, int slot_id) if (xhci->quirks & XHCI_EP_LIMIT_QUIRK) /* Delete default control endpoint resources */ xhci_free_device_endpoint_resources(xhci, virt_dev, true); - xhci_free_virt_device(xhci, slot_id); } static void xhci_handle_cmd_config_ep(struct xhci_hcd *xhci, int slot_id, diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 8532381908842916e8ebb00d023a39f21c05a165..f39db28ef040ab1575bc9cc262bab9ee86091b4a 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -692,7 +692,6 @@ int xhci_run(struct usb_hcd *hcd) if (ret) xhci_free_command(xhci, command); } - set_bit(HCD_FLAG_DEFER_RH_REGISTER, &hcd->flags); xhci_dbg_trace(xhci, trace_xhci_dbg_init, "Finished xhci_run for USB2 roothub"); @@ -1091,6 +1090,7 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated) int retval = 0; bool comp_timer_running = false; bool pending_portevent = false; + bool reinit_xhc = false; if (!hcd->state) return 0; @@ -1107,10 +1107,11 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated) set_bit(HCD_FLAG_HW_ACCESSIBLE, &xhci->shared_hcd->flags); spin_lock_irq(&xhci->lock); - if ((xhci->quirks & XHCI_RESET_ON_RESUME) || xhci->broken_suspend) - hibernated = true; - if (!hibernated) { + if (hibernated || xhci->quirks & XHCI_RESET_ON_RESUME || xhci->broken_suspend) + reinit_xhc = true; + + if (!reinit_xhc) { /* * Some controllers might lose power during suspend, so wait * for controller not ready bit to clear, just as in xHC init. @@ -1143,12 +1144,17 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated) spin_unlock_irq(&xhci->lock); return -ETIMEDOUT; } - temp = readl(&xhci->op_regs->status); } - /* If restore operation fails, re-initialize the HC during resume */ - if ((temp & STS_SRE) || hibernated) { + temp = readl(&xhci->op_regs->status); + /* re-initialize the HC on Restore Error, or Host Controller Error */ + if (temp & (STS_SRE | STS_HCE)) { + reinit_xhc = true; + xhci_warn(xhci, "xHC error in resume, USBSTS 0x%x, Reinit\n", temp); + } + + if (reinit_xhc) { if ((xhci->quirks & XHCI_COMP_MODE_QUIRK) && !(xhci_all_ports_seen_u0(xhci))) { del_timer_sync(&xhci->comp_mode_recovery_timer); @@ -1473,9 +1479,12 @@ static int xhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flag struct urb_priv *urb_priv; int num_tds; - if (!urb || xhci_check_args(hcd, urb->dev, urb->ep, - true, true, __func__) <= 0) + if (!urb) return -EINVAL; + ret = xhci_check_args(hcd, urb->dev, urb->ep, + true, true, __func__); + if (ret <= 0) + return ret ? ret : -EINVAL; slot_id = urb->dev->slot_id; ep_index = xhci_get_endpoint_index(&urb->ep->desc); @@ -3101,10 +3110,13 @@ static void xhci_endpoint_reset(struct usb_hcd *hcd, return; /* Bail out if toggle is already being cleared by a endpoint reset */ + spin_lock_irqsave(&xhci->lock, flags); if (ep->ep_state & EP_HARD_CLEAR_TOGGLE) { ep->ep_state &= ~EP_HARD_CLEAR_TOGGLE; + spin_unlock_irqrestore(&xhci->lock, flags); return; } + spin_unlock_irqrestore(&xhci->lock, flags); /* Only interrupt and bulk ep's use data toggle, USB2 spec 5.5.4-> */ if (usb_endpoint_xfer_control(&host_ep->desc) || usb_endpoint_xfer_isoc(&host_ep->desc)) @@ -3202,8 +3214,10 @@ static void xhci_endpoint_reset(struct usb_hcd *hcd, xhci_free_command(xhci, cfg_cmd); cleanup: xhci_free_command(xhci, stop_cmd); + spin_lock_irqsave(&xhci->lock, flags); if (ep->ep_state & EP_SOFT_CLEAR_TOGGLE) ep->ep_state &= ~EP_SOFT_CLEAR_TOGGLE; + spin_unlock_irqrestore(&xhci->lock, flags); } static int xhci_check_streams_endpoint(struct xhci_hcd *xhci, @@ -3218,7 +3232,7 @@ static int xhci_check_streams_endpoint(struct xhci_hcd *xhci, return -EINVAL; ret = xhci_check_args(xhci_to_hcd(xhci), udev, ep, 1, true, __func__); if (ret <= 0) - return -EINVAL; + return ret ? ret : -EINVAL; if (usb_ss_max_streams(&ep->ss_ep_comp) == 0) { xhci_warn(xhci, "WARN: SuperSpeed Endpoint Companion" " descriptor for ep 0x%x does not support streams\n", @@ -3836,7 +3850,6 @@ static void xhci_free_dev(struct usb_hcd *hcd, struct usb_device *udev) struct xhci_slot_ctx *slot_ctx; int i, ret; -#ifndef CONFIG_USB_DEFAULT_PERSIST /* * We called pm_runtime_get_noresume when the device was attached. * Decrement the counter here to allow controller to runtime suspend @@ -3844,7 +3857,6 @@ static void xhci_free_dev(struct usb_hcd *hcd, struct usb_device *udev) */ if (xhci->quirks & XHCI_RESET_ON_RESUME) pm_runtime_put_noidle(hcd->self.controller); -#endif ret = xhci_check_args(hcd, udev, NULL, 0, true, __func__); /* If the host is halted due to driver unload, we still need to free the @@ -3863,9 +3875,8 @@ static void xhci_free_dev(struct usb_hcd *hcd, struct usb_device *udev) del_timer_sync(&virt_dev->eps[i].stop_cmd_timer); } virt_dev->udev = NULL; - ret = xhci_disable_slot(xhci, udev->slot_id); - if (ret) - xhci_free_virt_device(xhci, udev->slot_id); + xhci_disable_slot(xhci, udev->slot_id); + xhci_free_virt_device(xhci, udev->slot_id); } int xhci_disable_slot(struct xhci_hcd *xhci, u32 slot_id) @@ -3875,7 +3886,7 @@ int xhci_disable_slot(struct xhci_hcd *xhci, u32 slot_id) u32 state; int ret = 0; - command = xhci_alloc_command(xhci, false, GFP_KERNEL); + command = xhci_alloc_command(xhci, true, GFP_KERNEL); if (!command) return -ENOMEM; @@ -3900,6 +3911,15 @@ int xhci_disable_slot(struct xhci_hcd *xhci, u32 slot_id) } xhci_ring_cmd_db(xhci); spin_unlock_irqrestore(&xhci->lock, flags); + + wait_for_completion(command->completion); + + if (command->status != COMP_SUCCESS) + xhci_warn(xhci, "Unsuccessful disable slot %u command, status %d\n", + slot_id, command->status); + + xhci_free_command(xhci, command); + return ret; } @@ -4004,23 +4024,20 @@ int xhci_alloc_dev(struct usb_hcd *hcd, struct usb_device *udev) xhci_debugfs_create_slot(xhci, slot_id); -#ifndef CONFIG_USB_DEFAULT_PERSIST /* * If resetting upon resume, we can't put the controller into runtime * suspend if there is a device attached. */ if (xhci->quirks & XHCI_RESET_ON_RESUME) pm_runtime_get_noresume(hcd->self.controller); -#endif /* Is this a LS or FS device under a HS hub? */ /* Hub or peripherial? */ return 1; disable_slot: - ret = xhci_disable_slot(xhci, udev->slot_id); - if (ret) - xhci_free_virt_device(xhci, udev->slot_id); + xhci_disable_slot(xhci, udev->slot_id); + xhci_free_virt_device(xhci, udev->slot_id); return 0; } @@ -4157,6 +4174,7 @@ static int xhci_setup_device(struct usb_hcd *hcd, struct usb_device *udev, mutex_unlock(&xhci->mutex); ret = xhci_disable_slot(xhci, udev->slot_id); + xhci_free_virt_device(xhci, udev->slot_id); if (!ret) xhci_alloc_dev(hcd, udev); kfree(command->completion); diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 71c51dee3a5f889b6522d6d822985a9f47d83568..2bba700a780594046be92e0488c0a28e0a52ac26 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1902,6 +1902,7 @@ struct xhci_hcd { #define XHCI_DISABLE_SPARSE BIT_ULL(38) #define XHCI_SG_TRB_CACHE_SIZE_QUIRK BIT_ULL(39) #define XHCI_NO_SOFT_RETRY BIT_ULL(40) +#define XHCI_EP_CTX_BROKEN_DCS BIT_ULL(42) unsigned int num_active_eps; unsigned int limit_active_eps; diff --git a/drivers/usb/misc/ftdi-elan.c b/drivers/usb/misc/ftdi-elan.c index 8a3d9c0c8d8bc54304b5d46dc801ea1476af64f1..157b31d354ac23b14576f7f00cdb07d5d8827671 100644 --- a/drivers/usb/misc/ftdi-elan.c +++ b/drivers/usb/misc/ftdi-elan.c @@ -202,6 +202,7 @@ static void ftdi_elan_delete(struct kref *kref) mutex_unlock(&ftdi_module_lock); kfree(ftdi->bulk_in_buffer); ftdi->bulk_in_buffer = NULL; + kfree(ftdi); } static void ftdi_elan_put_kref(struct usb_ftdi *ftdi) diff --git a/drivers/usb/misc/iowarrior.c b/drivers/usb/misc/iowarrior.c index 70ec296815268cd2d8e9f984fc93727fd454a3fb..72a06af25081286baa73def41807186a6cf4031c 100644 --- a/drivers/usb/misc/iowarrior.c +++ b/drivers/usb/misc/iowarrior.c @@ -99,10 +99,6 @@ struct iowarrior { /* globals */ /*--------------*/ -/* - * USB spec identifies 5 second timeouts. - */ -#define GET_TIMEOUT 5 #define USB_REQ_GET_REPORT 0x01 //#if 0 static int usb_get_report(struct usb_device *dev, @@ -114,7 +110,7 @@ static int usb_get_report(struct usb_device *dev, USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_INTERFACE, (type << 8) + id, inter->desc.bInterfaceNumber, buf, size, - GET_TIMEOUT*HZ); + USB_CTRL_GET_TIMEOUT); } //#endif @@ -129,7 +125,7 @@ static int usb_set_report(struct usb_interface *intf, unsigned char type, USB_TYPE_CLASS | USB_RECIP_INTERFACE, (type << 8) + id, intf->cur_altsetting->desc.bInterfaceNumber, buf, - size, HZ); + size, 1000); } /*---------------------*/ diff --git a/drivers/usb/mtu3/mtu3_gadget.c b/drivers/usb/mtu3/mtu3_gadget.c index 0b3aa7c65857a52fe8a81e5f179a40d840a97761..b7a6363f387aa7ff644113269bd866700f06d88f 100644 --- a/drivers/usb/mtu3/mtu3_gadget.c +++ b/drivers/usb/mtu3/mtu3_gadget.c @@ -77,7 +77,7 @@ static int mtu3_ep_enable(struct mtu3_ep *mep) if (usb_endpoint_xfer_int(desc) || usb_endpoint_xfer_isoc(desc)) { interval = desc->bInterval; - interval = clamp_val(interval, 1, 16) - 1; + interval = clamp_val(interval, 1, 16); if (usb_endpoint_xfer_isoc(desc) && comp_desc) mult = comp_desc->bmAttributes; } @@ -89,9 +89,16 @@ static int mtu3_ep_enable(struct mtu3_ep *mep) if (usb_endpoint_xfer_isoc(desc) || usb_endpoint_xfer_int(desc)) { interval = desc->bInterval; - interval = clamp_val(interval, 1, 16) - 1; + interval = clamp_val(interval, 1, 16); mult = usb_endpoint_maxp_mult(desc) - 1; } + break; + case USB_SPEED_FULL: + if (usb_endpoint_xfer_isoc(desc)) + interval = clamp_val(desc->bInterval, 1, 16); + else if (usb_endpoint_xfer_int(desc)) + interval = clamp_val(desc->bInterval, 1, 255); + break; default: break; /*others are ignored */ @@ -235,6 +242,7 @@ struct usb_request *mtu3_alloc_request(struct usb_ep *ep, gfp_t gfp_flags) mreq->request.dma = DMA_ADDR_INVALID; mreq->epnum = mep->epnum; mreq->mep = mep; + INIT_LIST_HEAD(&mreq->list); trace_mtu3_alloc_request(mreq); return &mreq->request; diff --git a/drivers/usb/mtu3/mtu3_qmu.c b/drivers/usb/mtu3/mtu3_qmu.c index 3f414f91b5899b7c936d5d127250c32cd858e96d..2ea3157ddb6e25b0301bf57a5af4a9128b7ccf8d 100644 --- a/drivers/usb/mtu3/mtu3_qmu.c +++ b/drivers/usb/mtu3/mtu3_qmu.c @@ -273,6 +273,8 @@ static int mtu3_prepare_tx_gpd(struct mtu3_ep *mep, struct mtu3_request *mreq) gpd->dw3_info |= cpu_to_le32(GPD_EXT_FLAG_ZLP); } + /* prevent reorder, make sure GPD's HWO is set last */ + mb(); gpd->dw0_info |= cpu_to_le32(GPD_FLAGS_IOC | GPD_FLAGS_HWO); mreq->gpd = gpd; @@ -306,6 +308,8 @@ static int mtu3_prepare_rx_gpd(struct mtu3_ep *mep, struct mtu3_request *mreq) gpd->next_gpd = cpu_to_le32(lower_32_bits(enq_dma)); ext_addr |= GPD_EXT_NGP(mtu, upper_32_bits(enq_dma)); gpd->dw3_info = cpu_to_le32(ext_addr); + /* prevent reorder, make sure GPD's HWO is set last */ + mb(); gpd->dw0_info |= cpu_to_le32(GPD_FLAGS_IOC | GPD_FLAGS_HWO); mreq->gpd = gpd; @@ -445,7 +449,8 @@ static void qmu_tx_zlp_error_handler(struct mtu3 *mtu, u8 epnum) return; } mtu3_setbits(mbase, MU3D_EP_TXCR0(mep->epnum), TX_TXPKTRDY); - + /* prevent reorder, make sure GPD's HWO is set last */ + mb(); /* by pass the current GDP */ gpd_current->dw0_info |= cpu_to_le32(GPD_FLAGS_BPS | GPD_FLAGS_HWO); diff --git a/drivers/usb/musb/Kconfig b/drivers/usb/musb/Kconfig index 8de143807c1aec12692923da04c455f3e99d953a..4d61df6a9b5c88cff7a8d581964f201ec5888b4c 100644 --- a/drivers/usb/musb/Kconfig +++ b/drivers/usb/musb/Kconfig @@ -120,7 +120,7 @@ config USB_MUSB_MEDIATEK tristate "MediaTek platforms" depends on ARCH_MEDIATEK || COMPILE_TEST depends on NOP_USB_XCEIV - depends on GENERIC_PHY + select GENERIC_PHY select USB_ROLE_SWITCH comment "MUSB DMA mode" diff --git a/drivers/usb/musb/musb_dsps.c b/drivers/usb/musb/musb_dsps.c index ce9fc46c92661cb259dc701ddcbe78e8c730efb0..b5935834f9d2414709d4b50bc8e9465fcb8e9f8b 100644 --- a/drivers/usb/musb/musb_dsps.c +++ b/drivers/usb/musb/musb_dsps.c @@ -899,11 +899,13 @@ static int dsps_probe(struct platform_device *pdev) if (usb_get_dr_mode(&pdev->dev) == USB_DR_MODE_PERIPHERAL) { ret = dsps_setup_optional_vbus_irq(pdev, glue); if (ret) - goto err; + goto unregister_pdev; } return 0; +unregister_pdev: + platform_device_unregister(glue->musb); err: pm_runtime_disable(&pdev->dev); iounmap(glue->usbss_base); diff --git a/drivers/usb/musb/musb_gadget.c b/drivers/usb/musb/musb_gadget.c index f62ffaede1abba48cc120445c036200922273669..fb806b33178a03c5020bb3c026686f739103cf5e 100644 --- a/drivers/usb/musb/musb_gadget.c +++ b/drivers/usb/musb/musb_gadget.c @@ -1247,9 +1247,11 @@ static int musb_gadget_queue(struct usb_ep *ep, struct usb_request *req, status = musb_queue_resume_work(musb, musb_ep_restart_resume_work, request); - if (status < 0) + if (status < 0) { dev_err(musb->controller, "%s resume work: %i\n", __func__, status); + list_del(&request->list); + } } unlock: diff --git a/drivers/usb/musb/tusb6010.c b/drivers/usb/musb/tusb6010.c index 0c2afed4131bc19d7cf253cf8d5cb4218de2a3e4..038307f66198542740724923ad2d86702ae4db64 100644 --- a/drivers/usb/musb/tusb6010.c +++ b/drivers/usb/musb/tusb6010.c @@ -1103,6 +1103,11 @@ static int tusb_musb_init(struct musb *musb) /* dma address for async dma */ mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!mem) { + pr_debug("no async dma resource?\n"); + ret = -ENODEV; + goto done; + } musb->async = mem->start; /* dma address for sync dma */ diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c index f26861246f653073448b9ccf311834d2f0611fe7..a2a38fc76ca53c602b94bba123fd9776d03a7a57 100644 --- a/drivers/usb/serial/ch341.c +++ b/drivers/usb/serial/ch341.c @@ -81,10 +81,10 @@ #define CH341_QUIRK_SIMULATE_BREAK BIT(1) static const struct usb_device_id id_table[] = { - { USB_DEVICE(0x1a86, 0x5512) }, { USB_DEVICE(0x1a86, 0x5523) }, { USB_DEVICE(0x1a86, 0x7522) }, { USB_DEVICE(0x1a86, 0x7523) }, + { USB_DEVICE(0x2184, 0x0057) }, { USB_DEVICE(0x4348, 0x5523) }, { USB_DEVICE(0x9986, 0x7523) }, { }, diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index 6d858bdaf33ce0e3814cd0cf0b34381a5c84fd74..7ac668023da872f844a2e6764aee50f4187c9865 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -55,6 +55,7 @@ static void cp210x_enable_event_mode(struct usb_serial_port *port); static void cp210x_disable_event_mode(struct usb_serial_port *port); static const struct usb_device_id id_table[] = { + { USB_DEVICE(0x0404, 0x034C) }, /* NCR Retail IO Box */ { USB_DEVICE(0x045B, 0x0053) }, /* Renesas RX610 RX-Stick */ { USB_DEVICE(0x0471, 0x066A) }, /* AKTAKOM ACE-1001 cable */ { USB_DEVICE(0x0489, 0xE000) }, /* Pirelli Broadband S.p.A, DP-L10 SIP/GSM Mobile */ @@ -72,6 +73,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x0FCF, 0x1004) }, /* Dynastream ANT2USB */ { USB_DEVICE(0x0FCF, 0x1006) }, /* Dynastream ANT development board */ { USB_DEVICE(0x0FDE, 0xCA05) }, /* OWL Wireless Electricity Monitor CM-160 */ + { USB_DEVICE(0x106F, 0x0003) }, /* CPI / Money Controls Bulk Coin Recycler */ { USB_DEVICE(0x10A6, 0xAA26) }, /* Knock-off DCU-11 cable */ { USB_DEVICE(0x10AB, 0x10C5) }, /* Siemens MC60 Cable */ { USB_DEVICE(0x10B5, 0xAC70) }, /* Nokia CA-42 USB */ @@ -1750,6 +1752,8 @@ static int cp2105_gpioconf_init(struct usb_serial *serial) /* 2 banks of GPIO - One for the pins taken from each serial port */ if (intf_num == 0) { + priv->gc.ngpio = 2; + if (mode.eci == CP210X_PIN_MODE_MODEM) { /* mark all GPIOs of this interface as reserved */ priv->gpio_altfunc = 0xff; @@ -1760,8 +1764,9 @@ static int cp2105_gpioconf_init(struct usb_serial *serial) priv->gpio_pushpull = (u8)((le16_to_cpu(config.gpio_mode) & CP210X_ECI_GPIO_MODE_MASK) >> CP210X_ECI_GPIO_MODE_OFFSET); - priv->gc.ngpio = 2; } else if (intf_num == 1) { + priv->gc.ngpio = 3; + if (mode.sci == CP210X_PIN_MODE_MODEM) { /* mark all GPIOs of this interface as reserved */ priv->gpio_altfunc = 0xff; @@ -1772,7 +1777,6 @@ static int cp2105_gpioconf_init(struct usb_serial *serial) priv->gpio_pushpull = (u8)((le16_to_cpu(config.gpio_mode) & CP210X_SCI_GPIO_MODE_MASK) >> CP210X_SCI_GPIO_MODE_OFFSET); - priv->gc.ngpio = 3; } else { return -ENODEV; } diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index dfcf79bdfddce2effb15b979cbfd5102a270ccba..b74621dc2a65888024a3502515baae552109e59c 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -969,6 +969,7 @@ static const struct usb_device_id id_table_combined[] = { { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_VX_023_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_VX_034_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_101_PID) }, + { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_159_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_160_1_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_160_2_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_160_3_PID) }, @@ -977,12 +978,14 @@ static const struct usb_device_id id_table_combined[] = { { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_160_6_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_160_7_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_160_8_PID) }, + { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_235_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_257_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_279_1_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_279_2_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_279_3_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_279_4_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_313_PID) }, + { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_320_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_324_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_346_1_PID) }, { USB_DEVICE(BRAINBOXES_VID, BRAINBOXES_US_346_2_PID) }, diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h index 755858ca20bacf7702efc2d42c68b135cbc984f3..d1a9564697a4becf6f32b52461eef0fe36c1bbc0 100644 --- a/drivers/usb/serial/ftdi_sio_ids.h +++ b/drivers/usb/serial/ftdi_sio_ids.h @@ -1506,6 +1506,9 @@ #define BRAINBOXES_VX_023_PID 0x1003 /* VX-023 ExpressCard 1 Port RS422/485 */ #define BRAINBOXES_VX_034_PID 0x1004 /* VX-034 ExpressCard 2 Port RS422/485 */ #define BRAINBOXES_US_101_PID 0x1011 /* US-101 1xRS232 */ +#define BRAINBOXES_US_159_PID 0x1021 /* US-159 1xRS232 */ +#define BRAINBOXES_US_235_PID 0x1017 /* US-235 1xRS232 */ +#define BRAINBOXES_US_320_PID 0x1019 /* US-320 1xRS422/485 */ #define BRAINBOXES_US_324_PID 0x1013 /* US-324 1xRS422/485 1Mbaud */ #define BRAINBOXES_US_606_1_PID 0x2001 /* US-606 6 Port RS232 Serial Port 1 and 2 */ #define BRAINBOXES_US_606_2_PID 0x2002 /* US-606 6 Port RS232 Serial Port 3 and 4 */ diff --git a/drivers/usb/serial/keyspan.c b/drivers/usb/serial/keyspan.c index aa3dbce22cfbe857af446804f28440c691729cb7..451759f38b57341da051d5faca57653ccab47ea2 100644 --- a/drivers/usb/serial/keyspan.c +++ b/drivers/usb/serial/keyspan.c @@ -2910,22 +2910,22 @@ static int keyspan_port_probe(struct usb_serial_port *port) for (i = 0; i < ARRAY_SIZE(p_priv->in_buffer); ++i) { p_priv->in_buffer[i] = kzalloc(IN_BUFLEN, GFP_KERNEL); if (!p_priv->in_buffer[i]) - goto err_in_buffer; + goto err_free_in_buffer; } for (i = 0; i < ARRAY_SIZE(p_priv->out_buffer); ++i) { p_priv->out_buffer[i] = kzalloc(OUT_BUFLEN, GFP_KERNEL); if (!p_priv->out_buffer[i]) - goto err_out_buffer; + goto err_free_out_buffer; } p_priv->inack_buffer = kzalloc(INACK_BUFLEN, GFP_KERNEL); if (!p_priv->inack_buffer) - goto err_inack_buffer; + goto err_free_out_buffer; p_priv->outcont_buffer = kzalloc(OUTCONT_BUFLEN, GFP_KERNEL); if (!p_priv->outcont_buffer) - goto err_outcont_buffer; + goto err_free_inack_buffer; p_priv->device_details = d_details; @@ -2971,15 +2971,14 @@ static int keyspan_port_probe(struct usb_serial_port *port) return 0; -err_outcont_buffer: +err_free_inack_buffer: kfree(p_priv->inack_buffer); -err_inack_buffer: +err_free_out_buffer: for (i = 0; i < ARRAY_SIZE(p_priv->out_buffer); ++i) kfree(p_priv->out_buffer[i]); -err_out_buffer: +err_free_in_buffer: for (i = 0; i < ARRAY_SIZE(p_priv->in_buffer); ++i) kfree(p_priv->in_buffer[i]); -err_in_buffer: kfree(p_priv); return -ENOMEM; diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 1e990a8264a53b1681ad3d03308d6e6d9e1d5018..b878f4c87fee8e5d1eafc3cf5e46ce785a71e753 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -198,6 +198,8 @@ static void option_instat_callback(struct urb *urb); #define DELL_PRODUCT_5821E 0x81d7 #define DELL_PRODUCT_5821E_ESIM 0x81e0 +#define DELL_PRODUCT_5829E_ESIM 0x81e4 +#define DELL_PRODUCT_5829E 0x81e6 #define KYOCERA_VENDOR_ID 0x0c88 #define KYOCERA_PRODUCT_KPC650 0x17da @@ -246,11 +248,13 @@ static void option_instat_callback(struct urb *urb); /* These Quectel products use Quectel's vendor ID */ #define QUECTEL_PRODUCT_EC21 0x0121 #define QUECTEL_PRODUCT_EC25 0x0125 +#define QUECTEL_PRODUCT_EG91 0x0191 #define QUECTEL_PRODUCT_EG95 0x0195 #define QUECTEL_PRODUCT_BG96 0x0296 #define QUECTEL_PRODUCT_EP06 0x0306 #define QUECTEL_PRODUCT_EM12 0x0512 #define QUECTEL_PRODUCT_RM500Q 0x0800 +#define QUECTEL_PRODUCT_EC200S_CN 0x6002 #define QUECTEL_PRODUCT_EC200T 0x6026 #define CMOTECH_VENDOR_ID 0x16d8 @@ -1061,6 +1065,10 @@ static const struct usb_device_id option_ids[] = { .driver_info = RSVD(0) | RSVD(1) | RSVD(6) }, { USB_DEVICE(DELL_VENDOR_ID, DELL_PRODUCT_5821E_ESIM), .driver_info = RSVD(0) | RSVD(1) | RSVD(6) }, + { USB_DEVICE(DELL_VENDOR_ID, DELL_PRODUCT_5829E), + .driver_info = RSVD(0) | RSVD(6) }, + { USB_DEVICE(DELL_VENDOR_ID, DELL_PRODUCT_5829E_ESIM), + .driver_info = RSVD(0) | RSVD(6) }, { USB_DEVICE(ANYDATA_VENDOR_ID, ANYDATA_PRODUCT_ADU_E100A) }, /* ADU-E100, ADU-310 */ { USB_DEVICE(ANYDATA_VENDOR_ID, ANYDATA_PRODUCT_ADU_500A) }, { USB_DEVICE(ANYDATA_VENDOR_ID, ANYDATA_PRODUCT_ADU_620UW) }, @@ -1111,6 +1119,9 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC25, 0xff, 0xff, 0xff), .driver_info = NUMEP2 }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC25, 0xff, 0, 0) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EG91, 0xff, 0xff, 0xff), + .driver_info = NUMEP2 }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EG91, 0xff, 0, 0) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EG95, 0xff, 0xff, 0xff), .driver_info = NUMEP2 }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EG95, 0xff, 0, 0) }, @@ -1128,6 +1139,7 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM500Q, 0xff, 0, 0) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM500Q, 0xff, 0xff, 0x10), .driver_info = ZLP }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC200S_CN, 0xff, 0, 0) }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC200T, 0xff, 0, 0) }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6001) }, @@ -1213,6 +1225,14 @@ static const struct usb_device_id option_ids[] = { .driver_info = NCTRL(2) | RSVD(3) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1063, 0xff), /* Telit LN920 (ECM) */ .driver_info = NCTRL(0) | RSVD(1) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1070, 0xff), /* Telit FN990 (rmnet) */ + .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1071, 0xff), /* Telit FN990 (MBIM) */ + .driver_info = NCTRL(0) | RSVD(1) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1072, 0xff), /* Telit FN990 (RNDIS) */ + .driver_info = NCTRL(2) | RSVD(3) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1073, 0xff), /* Telit FN990 (ECM) */ + .driver_info = NCTRL(0) | RSVD(1) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910), .driver_info = NCTRL(0) | RSVD(1) | RSVD(3) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_ME910_DUAL_MODEM), @@ -1227,6 +1247,8 @@ static const struct usb_device_id option_ids[] = { .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1203, 0xff), /* Telit LE910Cx (RNDIS) */ .driver_info = NCTRL(2) | RSVD(3) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1204, 0xff), /* Telit LE910Cx (MBIM) */ + .driver_info = NCTRL(0) | RSVD(1) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE910_USBCFG4), .driver_info = NCTRL(0) | RSVD(1) | RSVD(2) | RSVD(3) }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920), @@ -1257,8 +1279,16 @@ static const struct usb_device_id option_ids[] = { .driver_info = NCTRL(2) }, { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x7011, 0xff), /* Telit LE910-S1 (ECM) */ .driver_info = NCTRL(2) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x701a, 0xff), /* Telit LE910R1 (RNDIS) */ + .driver_info = NCTRL(2) }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x701b, 0xff), /* Telit LE910R1 (ECM) */ + .driver_info = NCTRL(2) }, { USB_DEVICE(TELIT_VENDOR_ID, 0x9010), /* Telit SBL FN980 flashing device */ .driver_info = NCTRL(0) | ZLP }, + { USB_DEVICE(TELIT_VENDOR_ID, 0x9200), /* Telit LE910S1 flashing device */ + .driver_info = NCTRL(0) | ZLP }, + { USB_DEVICE(TELIT_VENDOR_ID, 0x9201), /* Telit LE910R1 flashing device */ + .driver_info = NCTRL(0) | ZLP }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, ZTE_PRODUCT_MF622, 0xff, 0xff, 0xff) }, /* ZTE WCDMA products */ { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0002, 0xff, 0xff, 0xff), .driver_info = RSVD(1) }, @@ -1631,6 +1661,8 @@ static const struct usb_device_id option_ids[] = { .driver_info = RSVD(2) }, { USB_DEVICE_INTERFACE_CLASS(ZTE_VENDOR_ID, 0x1476, 0xff) }, /* GosunCn ZTE WeLink ME3630 (ECM/NCM mode) */ { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1481, 0xff, 0x00, 0x00) }, /* ZTE MF871A */ + { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1485, 0xff, 0xff, 0xff), /* ZTE MF286D */ + .driver_info = RSVD(5) }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1533, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1534, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1535, 0xff, 0xff, 0xff) }, @@ -2086,6 +2118,9 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(0x2cb7, 0x010b, 0xff, 0xff, 0x30) }, /* Fibocom FG150 Diag */ { USB_DEVICE_AND_INTERFACE_INFO(0x2cb7, 0x010b, 0xff, 0, 0) }, /* Fibocom FG150 AT */ { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a0, 0xff) }, /* Fibocom NL668-AM/NL652-EU (laptop MBIM) */ + { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a2, 0xff) }, /* Fibocom FM101-GL (laptop MBIM) */ + { USB_DEVICE_INTERFACE_CLASS(0x2cb7, 0x01a4, 0xff), /* Fibocom FM101-GL (laptop MBIM) */ + .driver_info = RSVD(4) }, { USB_DEVICE_INTERFACE_CLASS(0x2df3, 0x9d03, 0xff) }, /* LongSung M5710 */ { USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1404, 0xff) }, /* GosunCn GM500 RNDIS */ { USB_DEVICE_INTERFACE_CLASS(0x305a, 0x1405, 0xff) }, /* GosunCn GM500 MBIM */ diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c index 83da8236e3c8bc5bfce602eaabea50169a44827e..c18bf8164bc2e9676247f36e672f2c9cea489e30 100644 --- a/drivers/usb/serial/qcserial.c +++ b/drivers/usb/serial/qcserial.c @@ -165,6 +165,7 @@ static const struct usb_device_id id_table[] = { {DEVICE_SWI(0x1199, 0x907b)}, /* Sierra Wireless EM74xx */ {DEVICE_SWI(0x1199, 0x9090)}, /* Sierra Wireless EM7565 QDL */ {DEVICE_SWI(0x1199, 0x9091)}, /* Sierra Wireless EM7565 */ + {DEVICE_SWI(0x1199, 0x90d2)}, /* Sierra Wireless EM9191 QDL */ {DEVICE_SWI(0x413c, 0x81a2)}, /* Dell Wireless 5806 Gobi(TM) 4G LTE Mobile Broadband Card */ {DEVICE_SWI(0x413c, 0x81a3)}, /* Dell Wireless 5570 HSPA+ (42Mbps) Mobile Broadband Card */ {DEVICE_SWI(0x413c, 0x81a4)}, /* Dell Wireless 5570e HSPA+ (42Mbps) Mobile Broadband Card */ diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h index c6b3fcf9018052a4e09ed4b71d7cac32725a4646..1a05e3dcfec8a10652ec437bc0d80ce38c401d50 100644 --- a/drivers/usb/storage/unusual_devs.h +++ b/drivers/usb/storage/unusual_devs.h @@ -406,6 +406,16 @@ UNUSUAL_DEV( 0x04b8, 0x0602, 0x0110, 0x0110, "785EPX Storage", USB_SC_SCSI, USB_PR_BULK, NULL, US_FL_SINGLE_LUN), +/* + * Reported by James Buren + * Virtual ISOs cannot be remounted if ejected while the device is locked + * Disable locking to mimic Windows behavior that bypasses the issue + */ +UNUSUAL_DEV( 0x04c5, 0x2028, 0x0001, 0x0001, + "iODD", + "2531/2541", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_NOT_LOCKABLE), + /* * Not sure who reported this originally but * Pavel Machek reported that the extra US_FL_SINGLE_LUN @@ -2291,6 +2301,16 @@ UNUSUAL_DEV( 0x2027, 0xa001, 0x0000, 0x9999, USB_SC_DEVICE, USB_PR_DEVICE, usb_stor_euscsi_init, US_FL_SCM_MULT_TARG ), +/* + * Reported by DocMAX + * and Thomas Weißschuh + */ +UNUSUAL_DEV( 0x2109, 0x0715, 0x9999, 0x9999, + "VIA Labs, Inc.", + "VL817 SATA Bridge", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_IGNORE_UAS), + UNUSUAL_DEV( 0x2116, 0x0320, 0x0001, 0x0001, "ST", "2A", diff --git a/drivers/usb/typec/Kconfig b/drivers/usb/typec/Kconfig index e7f120874c48379669195699f65729e28a05686d..0d953c6805f0fc610c3a3655d7c28546512b5f18 100644 --- a/drivers/usb/typec/Kconfig +++ b/drivers/usb/typec/Kconfig @@ -75,9 +75,9 @@ config TYPEC_TPS6598X config TYPEC_STUSB160X tristate "STMicroelectronics STUSB160x Type-C controller driver" - depends on I2C - depends on REGMAP_I2C depends on USB_ROLE_SWITCH || !USB_ROLE_SWITCH + depends on I2C + select REGMAP_I2C help Say Y or M here if your system has STMicroelectronics STUSB160x Type-C port controller. diff --git a/drivers/usb/typec/tcpm/fusb302.c b/drivers/usb/typec/tcpm/fusb302.c index ebc46b9f776cdb02952e22a4509ff4343255cdbf..0df739403150165ed750e6faf10f5ebcd57abb1a 100644 --- a/drivers/usb/typec/tcpm/fusb302.c +++ b/drivers/usb/typec/tcpm/fusb302.c @@ -668,25 +668,27 @@ static int tcpm_set_cc(struct tcpc_dev *dev, enum typec_cc_status cc) ret = fusb302_i2c_mask_write(chip, FUSB_REG_MASK, FUSB_REG_MASK_BC_LVL | FUSB_REG_MASK_COMP_CHNG, - FUSB_REG_MASK_COMP_CHNG); + FUSB_REG_MASK_BC_LVL); if (ret < 0) { fusb302_log(chip, "cannot set SRC interrupt, ret=%d", ret); goto done; } chip->intr_comp_chng = true; + chip->intr_bc_lvl = false; break; case TYPEC_CC_RD: ret = fusb302_i2c_mask_write(chip, FUSB_REG_MASK, FUSB_REG_MASK_BC_LVL | FUSB_REG_MASK_COMP_CHNG, - FUSB_REG_MASK_BC_LVL); + FUSB_REG_MASK_COMP_CHNG); if (ret < 0) { fusb302_log(chip, "cannot set SRC interrupt, ret=%d", ret); goto done; } chip->intr_bc_lvl = true; + chip->intr_comp_chng = false; break; default: break; diff --git a/drivers/usb/typec/tcpm/tcpci.c b/drivers/usb/typec/tcpm/tcpci.c index 05aa15aaf26ab0dc8920b81c79300c894a33071e..f7141b135270dcbb0810f90c35006b81d70a61f1 100644 --- a/drivers/usb/typec/tcpm/tcpci.c +++ b/drivers/usb/typec/tcpm/tcpci.c @@ -722,7 +722,7 @@ irqreturn_t tcpci_irq(struct tcpci *tcpci) tcpm_pd_receive(tcpci->port, &msg); } - if (status & TCPC_ALERT_EXTENDED_STATUS) { + if (tcpci->data->vbus_vsafe0v && (status & TCPC_ALERT_EXTENDED_STATUS)) { ret = regmap_read(tcpci->regmap, TCPC_EXTENDED_STATUS, &raw); if (!ret && (raw & TCPC_EXTENDED_STATUS_VSAFE0V)) tcpm_vbus_change(tcpci->port); diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index 9d64dc34f5271bed7bfdff5f1033fbbdf3e8e5ee..fed8153ea7f0909ab26a866e917b9220118aefaf 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -4183,12 +4183,9 @@ static void run_state_machine(struct tcpm_port *port) 0); port->debouncing = false; } else { - /* Wait for VBUS, but not forever */ - tcpm_set_state(port, PORT_RESET, PD_T_PS_SOURCE_ON); port->debouncing = false; } break; - case SRC_TRY: port->try_src_count++; tcpm_set_cc(port, tcpm_rp_cc(port)); @@ -4990,6 +4987,7 @@ static void _tcpm_cc_change(struct tcpm_port *port, enum typec_cc_status cc1, tcpm_set_state(port, SRC_ATTACH_WAIT, 0); break; case SRC_ATTACHED: + case SRC_STARTUP: case SRC_SEND_CAPABILITIES: case SRC_READY: if (tcpm_port_is_disconnected(port) || @@ -5272,8 +5270,9 @@ static void _tcpm_pd_vbus_off(struct tcpm_port *port) case SNK_TRYWAIT_DEBOUNCE: break; case SNK_ATTACH_WAIT: + case SNK_DEBOUNCED: port->debouncing = false; - tcpm_set_state(port, SNK_UNATTACHED, 0); + /* Do nothing, as TCPM is still waiting for vbus to reaach VSAFE5V to connect */ break; case SNK_NEGOTIATE_CAPABILITIES: @@ -5447,9 +5446,38 @@ static void tcpm_pd_event_handler(struct kthread_work *work) } if (events & TCPM_CC_EVENT) { enum typec_cc_status cc1, cc2; + bool modified = false; if (port->tcpc->get_cc(port->tcpc, &cc1, &cc2) == 0) _tcpm_cc_change(port, cc1, cc2); + + trace_android_vh_typec_tcpm_modify_src_caps(&port->nr_src_pdo, + &port->src_pdo, &modified); + if (modified) { + int ret; + + switch (port->state) { + case SRC_UNATTACHED: + case SRC_ATTACH_WAIT: + case SRC_TRYWAIT: + tcpm_set_cc(port, tcpm_rp_cc(port)); + break; + case SRC_SEND_CAPABILITIES: + case SRC_SEND_CAPABILITIES_TIMEOUT: + case SRC_NEGOTIATE_CAPABILITIES: + case SRC_READY: + case SRC_WAIT_NEW_CAPABILITIES: + port->caps_count = 0; + port->upcoming_state = SRC_SEND_CAPABILITIES; + ret = tcpm_ams_start(port, POWER_NEGOTIATION); + if (ret == -EAGAIN) + port->upcoming_state = INVALID_STATE; + break; + default: + break; + } + } + } if (events & TCPM_FRS_EVENT) { if (port->state == SNK_READY) { diff --git a/drivers/usb/typec/tps6598x.c b/drivers/usb/typec/tps6598x.c index 30bfc314b743cf71c8f56b3110f072f394ab94e7..6cb5c8e2c8535a6f1422d2209419fcfea0f1f26d 100644 --- a/drivers/usb/typec/tps6598x.c +++ b/drivers/usb/typec/tps6598x.c @@ -109,7 +109,7 @@ tps6598x_block_read(struct tps6598x *tps, u8 reg, void *val, size_t len) u8 data[TPS_MAX_LEN + 1]; int ret; - if (WARN_ON(len + 1 > sizeof(data))) + if (len + 1 > sizeof(data)) return -EINVAL; if (!tps->i2c_protocol) diff --git a/drivers/usb/typec/ucsi/ucsi_ccg.c b/drivers/usb/typec/ucsi/ucsi_ccg.c index bff96d64dddffee6857125ad6040f5d67a873847..6db7c8ddd51cd0aac16d89b86e83963fe39af70b 100644 --- a/drivers/usb/typec/ucsi/ucsi_ccg.c +++ b/drivers/usb/typec/ucsi/ucsi_ccg.c @@ -325,7 +325,7 @@ static int ucsi_ccg_init(struct ucsi_ccg *uc) if (status < 0) return status; - if (!data) + if (!(data & DEV_INT)) return 0; status = ccg_write(uc, CCGX_RAB_INTR_REG, &data, sizeof(data)); diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index fbdc9468818d32a5a44783e4f57804ece5e9be32..65d6f8fd81e706a25e12937d5d81bfb78bdd2535 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -812,8 +812,6 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque MLX5_SET(virtio_q, vq_ctx, umem_3_id, mvq->umem3.id); MLX5_SET(virtio_q, vq_ctx, umem_3_size, mvq->umem3.size); MLX5_SET(virtio_q, vq_ctx, pd, ndev->mvdev.res.pdn); - if (MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, eth_frame_offload_type)) - MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0, 1); err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out)); if (err) diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index c4d53ff06bf858abcb2f904720bd802a43cc3d45..e4d60009d90832c5b6ffb1f278cf2af3af84c8b4 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -196,7 +196,7 @@ static int vhost_vdpa_config_validate(struct vhost_vdpa *v, break; } - if (c->len == 0) + if (c->len == 0 || c->off > size) return -EINVAL; if (c->len > size - c->off) @@ -325,7 +325,7 @@ static long vhost_vdpa_set_config_call(struct vhost_vdpa *v, u32 __user *argp) struct eventfd_ctx *ctx; cb.callback = vhost_vdpa_config_cb; - cb.private = v->vdpa; + cb.private = v; if (copy_from_user(&fd, argp, sizeof(fd))) return -EFAULT; diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index a483cec31d5cbd2f9d69f5caa869b28a98d3fd6c..c282fc0d04bd1b96e76c2cb95929bb3ec7675bef 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -494,7 +494,7 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work) virtio_transport_free_pkt(pkt); len += sizeof(pkt->hdr); - vhost_add_used(vq, head, len); + vhost_add_used(vq, head, 0); total_len += len; added = true; } while(likely(!vhost_exceeds_weight(vq, ++pkts, total_len))); @@ -573,16 +573,18 @@ err: return ret; } -static int vhost_vsock_stop(struct vhost_vsock *vsock) +static int vhost_vsock_stop(struct vhost_vsock *vsock, bool check_owner) { size_t i; - int ret; + int ret = 0; mutex_lock(&vsock->dev.mutex); - ret = vhost_dev_check_owner(&vsock->dev); - if (ret) - goto err; + if (check_owner) { + ret = vhost_dev_check_owner(&vsock->dev); + if (ret) + goto err; + } for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) { struct vhost_virtqueue *vq = &vsock->vqs[i]; @@ -697,7 +699,12 @@ static int vhost_vsock_dev_release(struct inode *inode, struct file *file) * inefficient. Room for improvement here. */ vsock_for_each_connected_socket(vhost_vsock_reset_orphans); - vhost_vsock_stop(vsock); + /* Don't check the owner, because we are in the release path, so we + * need to stop the vsock device in any case. + * vhost_vsock_stop() can not fail in this case, so we don't need to + * check the return code. + */ + vhost_vsock_stop(vsock, false); vhost_vsock_flush(vsock); vhost_dev_stop(&vsock->dev); @@ -801,7 +808,7 @@ static long vhost_vsock_dev_ioctl(struct file *f, unsigned int ioctl, if (start) return vhost_vsock_start(vsock); else - return vhost_vsock_stop(vsock); + return vhost_vsock_stop(vsock, true); case VHOST_GET_FEATURES: features = VHOST_VSOCK_FEATURES; if (copy_to_user(argp, &features, sizeof(features))) diff --git a/drivers/video/backlight/backlight.c b/drivers/video/backlight/backlight.c index 537fe1b376ad73d3a304809a90ab835bd02fab4a..fc990e576340be6f36ccf4e63903b3ffc46db576 100644 --- a/drivers/video/backlight/backlight.c +++ b/drivers/video/backlight/backlight.c @@ -688,12 +688,6 @@ static struct backlight_device *of_find_backlight(struct device *dev) of_node_put(np); if (!bd) return ERR_PTR(-EPROBE_DEFER); - /* - * Note: gpio_backlight uses brightness as - * power state during probe - */ - if (!bd->props.brightness) - bd->props.brightness = bd->props.max_brightness; } } diff --git a/drivers/video/backlight/qcom-wled.c b/drivers/video/backlight/qcom-wled.c index cd11c5776438184c6bc466b1a0632d623940acc4..486d35da0150724e124d477b31153da2911c2fd9 100644 --- a/drivers/video/backlight/qcom-wled.c +++ b/drivers/video/backlight/qcom-wled.c @@ -231,14 +231,14 @@ struct wled { static int wled3_set_brightness(struct wled *wled, u16 brightness) { int rc, i; - u8 v[2]; + __le16 v; - v[0] = brightness & 0xff; - v[1] = (brightness >> 8) & 0xf; + v = cpu_to_le16(brightness & WLED3_SINK_REG_BRIGHT_MAX); for (i = 0; i < wled->cfg.num_strings; ++i) { rc = regmap_bulk_write(wled->regmap, wled->ctrl_addr + - WLED3_SINK_REG_BRIGHT(i), v, 2); + WLED3_SINK_REG_BRIGHT(wled->cfg.enabled_strings[i]), + &v, sizeof(v)); if (rc < 0) return rc; } @@ -250,18 +250,18 @@ static int wled4_set_brightness(struct wled *wled, u16 brightness) { int rc, i; u16 low_limit = wled->max_brightness * 4 / 1000; - u8 v[2]; + __le16 v; /* WLED4's lower limit of operation is 0.4% */ if (brightness > 0 && brightness < low_limit) brightness = low_limit; - v[0] = brightness & 0xff; - v[1] = (brightness >> 8) & 0xf; + v = cpu_to_le16(brightness & WLED3_SINK_REG_BRIGHT_MAX); for (i = 0; i < wled->cfg.num_strings; ++i) { rc = regmap_bulk_write(wled->regmap, wled->sink_addr + - WLED4_SINK_REG_BRIGHT(i), v, 2); + WLED4_SINK_REG_BRIGHT(wled->cfg.enabled_strings[i]), + &v, sizeof(v)); if (rc < 0) return rc; } @@ -273,21 +273,20 @@ static int wled5_set_brightness(struct wled *wled, u16 brightness) { int rc, offset; u16 low_limit = wled->max_brightness * 1 / 1000; - u8 v[2]; + __le16 v; /* WLED5's lower limit is 0.1% */ if (brightness < low_limit) brightness = low_limit; - v[0] = brightness & 0xff; - v[1] = (brightness >> 8) & 0x7f; + v = cpu_to_le16(brightness & WLED5_SINK_REG_BRIGHT_MAX_15B); offset = (wled->cfg.mod_sel == MOD_A) ? WLED5_SINK_REG_MOD_A_BRIGHTNESS_LSB : WLED5_SINK_REG_MOD_B_BRIGHTNESS_LSB; rc = regmap_bulk_write(wled->regmap, wled->sink_addr + offset, - v, 2); + &v, sizeof(v)); return rc; } @@ -572,7 +571,7 @@ unlock_mutex: static void wled_auto_string_detection(struct wled *wled) { - int rc = 0, i, delay_time_us; + int rc = 0, i, j, delay_time_us; u32 sink_config = 0; u8 sink_test = 0, sink_valid = 0, val; bool fault_set; @@ -619,14 +618,15 @@ static void wled_auto_string_detection(struct wled *wled) /* Iterate through the strings one by one */ for (i = 0; i < wled->cfg.num_strings; i++) { - sink_test = BIT((WLED4_SINK_REG_CURR_SINK_SHFT + i)); + j = wled->cfg.enabled_strings[i]; + sink_test = BIT((WLED4_SINK_REG_CURR_SINK_SHFT + j)); /* Enable feedback control */ rc = regmap_write(wled->regmap, wled->ctrl_addr + - WLED3_CTRL_REG_FEEDBACK_CONTROL, i + 1); + WLED3_CTRL_REG_FEEDBACK_CONTROL, j + 1); if (rc < 0) { dev_err(wled->dev, "Failed to enable feedback for SINK %d rc = %d\n", - i + 1, rc); + j + 1, rc); goto failed_detect; } @@ -635,7 +635,7 @@ static void wled_auto_string_detection(struct wled *wled) WLED4_SINK_REG_CURR_SINK, sink_test); if (rc < 0) { dev_err(wled->dev, "Failed to configure SINK %d rc=%d\n", - i + 1, rc); + j + 1, rc); goto failed_detect; } @@ -662,7 +662,7 @@ static void wled_auto_string_detection(struct wled *wled) if (fault_set) dev_dbg(wled->dev, "WLED OVP fault detected with SINK %d\n", - i + 1); + j + 1); else sink_valid |= sink_test; @@ -702,15 +702,16 @@ static void wled_auto_string_detection(struct wled *wled) /* Enable valid sinks */ if (wled->version == 4) { for (i = 0; i < wled->cfg.num_strings; i++) { + j = wled->cfg.enabled_strings[i]; if (sink_config & - BIT(WLED4_SINK_REG_CURR_SINK_SHFT + i)) + BIT(WLED4_SINK_REG_CURR_SINK_SHFT + j)) val = WLED4_SINK_REG_STR_MOD_MASK; else /* Disable modulator_en for unused sink */ val = 0; rc = regmap_write(wled->regmap, wled->sink_addr + - WLED4_SINK_REG_STR_MOD_EN(i), val); + WLED4_SINK_REG_STR_MOD_EN(j), val); if (rc < 0) { dev_err(wled->dev, "Failed to configure MODULATOR_EN rc=%d\n", rc); @@ -1256,21 +1257,6 @@ static const struct wled_var_cfg wled5_ovp_cfg = { .size = 16, }; -static u32 wled3_num_strings_values_fn(u32 idx) -{ - return idx + 1; -} - -static const struct wled_var_cfg wled3_num_strings_cfg = { - .fn = wled3_num_strings_values_fn, - .size = 3, -}; - -static const struct wled_var_cfg wled4_num_strings_cfg = { - .fn = wled3_num_strings_values_fn, - .size = 4, -}; - static u32 wled3_switch_freq_values_fn(u32 idx) { return 19200 / (2 * (1 + idx)); @@ -1344,11 +1330,6 @@ static int wled_configure(struct wled *wled) .val_ptr = &cfg->switch_freq, .cfg = &wled3_switch_freq_cfg, }, - { - .name = "qcom,num-strings", - .val_ptr = &cfg->num_strings, - .cfg = &wled3_num_strings_cfg, - }, }; const struct wled_u32_opts wled4_opts[] = { @@ -1372,11 +1353,6 @@ static int wled_configure(struct wled *wled) .val_ptr = &cfg->switch_freq, .cfg = &wled3_switch_freq_cfg, }, - { - .name = "qcom,num-strings", - .val_ptr = &cfg->num_strings, - .cfg = &wled4_num_strings_cfg, - }, }; const struct wled_u32_opts wled5_opts[] = { @@ -1400,11 +1376,6 @@ static int wled_configure(struct wled *wled) .val_ptr = &cfg->switch_freq, .cfg = &wled3_switch_freq_cfg, }, - { - .name = "qcom,num-strings", - .val_ptr = &cfg->num_strings, - .cfg = &wled4_num_strings_cfg, - }, { .name = "qcom,modulator-sel", .val_ptr = &cfg->mod_sel, @@ -1523,16 +1494,57 @@ static int wled_configure(struct wled *wled) *bool_opts[i].val_ptr = true; } - cfg->num_strings = cfg->num_strings + 1; - string_len = of_property_count_elems_of_size(dev->of_node, "qcom,enabled-strings", sizeof(u32)); - if (string_len > 0) - of_property_read_u32_array(dev->of_node, + if (string_len > 0) { + if (string_len > wled->max_string_count) { + dev_err(dev, "Cannot have more than %d strings\n", + wled->max_string_count); + return -EINVAL; + } + + rc = of_property_read_u32_array(dev->of_node, "qcom,enabled-strings", wled->cfg.enabled_strings, - sizeof(u32)); + string_len); + if (rc) { + dev_err(dev, "Failed to read %d elements from qcom,enabled-strings: %d\n", + string_len, rc); + return rc; + } + + for (i = 0; i < string_len; ++i) { + if (wled->cfg.enabled_strings[i] >= wled->max_string_count) { + dev_err(dev, + "qcom,enabled-strings index %d at %d is out of bounds\n", + wled->cfg.enabled_strings[i], i); + return -EINVAL; + } + } + + cfg->num_strings = string_len; + } + + rc = of_property_read_u32(dev->of_node, "qcom,num-strings", &val); + if (!rc) { + if (val < 1 || val > wled->max_string_count) { + dev_err(dev, "qcom,num-strings must be between 1 and %d\n", + wled->max_string_count); + return -EINVAL; + } + + if (string_len > 0) { + dev_warn(dev, "Only one of qcom,num-strings or qcom,enabled-strings" + " should be set\n"); + if (val > string_len) { + dev_err(dev, "qcom,num-strings exceeds qcom,enabled-strings\n"); + return -EINVAL; + } + } + + cfg->num_strings = val; + } return 0; } diff --git a/drivers/video/console/Kconfig b/drivers/video/console/Kconfig index ee33b8ec62bb259b2368fa97dc7294c751fbe059..47c4939577725059a905459d818f9c4bad9c0203 100644 --- a/drivers/video/console/Kconfig +++ b/drivers/video/console/Kconfig @@ -78,6 +78,26 @@ config FRAMEBUFFER_CONSOLE help Low-level framebuffer-based console driver. +config FRAMEBUFFER_CONSOLE_LEGACY_ACCELERATION + bool "Enable legacy fbcon hardware acceleration code" + depends on FRAMEBUFFER_CONSOLE + default y if PARISC + default n + help + This option enables the fbcon (framebuffer text-based) hardware + acceleration for graphics drivers which were written for the fbdev + graphics interface. + + On modern machines, on mainstream machines (like x86-64) or when + using a modern Linux distribution those fbdev drivers usually aren't used. + So enabling this option wouldn't have any effect, which is why you want + to disable this option on such newer machines. + + If you compile this kernel for older machines which still require the + fbdev drivers, you may want to say Y. + + If unsure, select n. + config FRAMEBUFFER_CONSOLE_DETECT_PRIMARY bool "Map the console to the primary display device" depends on FRAMEBUFFER_CONSOLE diff --git a/drivers/video/console/sticon.c b/drivers/video/console/sticon.c index 1b451165311c942dff5304d92302d57b80f21c67..40496e9e9b4381fe81da1d3f60ce722dde4c16f3 100644 --- a/drivers/video/console/sticon.c +++ b/drivers/video/console/sticon.c @@ -332,13 +332,13 @@ static u8 sticon_build_attr(struct vc_data *conp, u8 color, bool blink, bool underline, bool reverse, bool italic) { - u8 attr = ((color & 0x70) >> 1) | ((color & 7)); + u8 fg = color & 7; + u8 bg = (color & 0x70) >> 4; - if (reverse) { - color = ((color >> 3) & 0x7) | ((color & 0x7) << 3); - } - - return attr; + if (reverse) + return (fg << 3) | bg; + else + return (bg << 3) | fg; } static void sticon_invert_region(struct vc_data *conp, u16 *p, int count) diff --git a/drivers/video/console/vgacon.c b/drivers/video/console/vgacon.c index 5dc88a914b3494847a2ad8d8c8445b6d30532cdd..042e166d926824471291949ade7345ea3254eec9 100644 --- a/drivers/video/console/vgacon.c +++ b/drivers/video/console/vgacon.c @@ -370,11 +370,17 @@ static void vgacon_init(struct vc_data *c, int init) struct uni_pagedir *p; /* - * We cannot be loaded as a module, therefore init is always 1, - * but vgacon_init can be called more than once, and init will - * not be 1. + * We cannot be loaded as a module, therefore init will be 1 + * if we are the default console, however if we are a fallback + * console, for example if fbcon has failed registration, then + * init will be 0, so we need to make sure our boot parameters + * have been copied to the console structure for vgacon_resize + * ultimately called by vc_resize. Any subsequent calls to + * vgacon_init init will have init set to 0 too. */ c->vc_can_do_color = vga_can_do_color; + c->vc_scan_lines = vga_scan_lines; + c->vc_font.height = c->vc_cell_height = vga_video_font_height; /* set dimensions manually if init != 0 since vc_resize() will fail */ if (init) { @@ -383,8 +389,6 @@ static void vgacon_init(struct vc_data *c, int init) } else vc_resize(c, vga_video_num_columns, vga_video_num_lines); - c->vc_scan_lines = vga_scan_lines; - c->vc_font.height = c->vc_cell_height = vga_video_font_height; c->vc_complement_mask = 0x7700; if (vga_512_chars) c->vc_hi_font_mask = 0x0800; diff --git a/drivers/video/fbdev/chipsfb.c b/drivers/video/fbdev/chipsfb.c index 998067b701fa025ebe80657baba51ba8e1b51566..393894af26f849981e71f22b22923e49d6de8d57 100644 --- a/drivers/video/fbdev/chipsfb.c +++ b/drivers/video/fbdev/chipsfb.c @@ -331,7 +331,7 @@ static const struct fb_var_screeninfo chipsfb_var = { static void init_chips(struct fb_info *p, unsigned long addr) { - memset(p->screen_base, 0, 0x100000); + fb_memset(p->screen_base, 0, 0x100000); p->fix = chipsfb_fix; p->fix.smem_start = addr; diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c index 42c72d051158fa47451187d0450c30a1113c5017..f102519ccefb43be0fb7806e7d0c716c51d1fdb1 100644 --- a/drivers/video/fbdev/core/fbcon.c +++ b/drivers/video/fbdev/core/fbcon.c @@ -1033,7 +1033,7 @@ static void fbcon_init(struct vc_data *vc, int init) struct vc_data *svc = *default_mode; struct fbcon_display *t, *p = &fb_display[vc->vc_num]; int logo = 1, new_rows, new_cols, rows, cols, charcnt = 256; - int ret; + int cap, ret; if (WARN_ON(info_idx == -1)) return; @@ -1042,6 +1042,7 @@ static void fbcon_init(struct vc_data *vc, int init) con2fb_map[vc->vc_num] = info_idx; info = registered_fb[con2fb_map[vc->vc_num]]; + cap = info->flags; if (logo_shown < 0 && console_loglevel <= CONSOLE_LOGLEVEL_QUIET) logo_shown = FBCON_LOGO_DONTSHOW; @@ -1146,13 +1147,13 @@ static void fbcon_init(struct vc_data *vc, int init) ops->graphics = 0; - /* - * No more hw acceleration for fbcon. - * - * FIXME: Garbage collect all the now dead code after sufficient time - * has passed. - */ - p->scrollmode = SCROLL_REDRAW; +#ifdef CONFIG_FRAMEBUFFER_CONSOLE_LEGACY_ACCELERATION + if ((cap & FBINFO_HWACCEL_COPYAREA) && + !(cap & FBINFO_HWACCEL_DISABLED)) + p->scrollmode = SCROLL_MOVE; + else /* default to something safe */ + p->scrollmode = SCROLL_REDRAW; +#endif /* * ++guenther: console.c:vc_allocate() relies on initializing @@ -1718,7 +1719,7 @@ static bool fbcon_scroll(struct vc_data *vc, unsigned int t, unsigned int b, count = vc->vc_rows; if (logo_shown >= 0) goto redraw_up; - switch (p->scrollmode) { + switch (fb_scrollmode(p)) { case SCROLL_MOVE: fbcon_redraw_blit(vc, info, p, t, b - t - count, count); @@ -1808,7 +1809,7 @@ static bool fbcon_scroll(struct vc_data *vc, unsigned int t, unsigned int b, count = vc->vc_rows; if (logo_shown >= 0) goto redraw_down; - switch (p->scrollmode) { + switch (fb_scrollmode(p)) { case SCROLL_MOVE: fbcon_redraw_blit(vc, info, p, b - 1, b - t - count, -count); @@ -1959,6 +1960,48 @@ static void fbcon_bmove_rec(struct vc_data *vc, struct fbcon_display *p, int sy, height, width); } +static void updatescrollmode_accel(struct fbcon_display *p, + struct fb_info *info, + struct vc_data *vc) +{ +#ifdef CONFIG_FRAMEBUFFER_CONSOLE_LEGACY_ACCELERATION + struct fbcon_ops *ops = info->fbcon_par; + int cap = info->flags; + u16 t = 0; + int ypan = FBCON_SWAP(ops->rotate, info->fix.ypanstep, + info->fix.xpanstep); + int ywrap = FBCON_SWAP(ops->rotate, info->fix.ywrapstep, t); + int yres = FBCON_SWAP(ops->rotate, info->var.yres, info->var.xres); + int vyres = FBCON_SWAP(ops->rotate, info->var.yres_virtual, + info->var.xres_virtual); + int good_pan = (cap & FBINFO_HWACCEL_YPAN) && + divides(ypan, vc->vc_font.height) && vyres > yres; + int good_wrap = (cap & FBINFO_HWACCEL_YWRAP) && + divides(ywrap, vc->vc_font.height) && + divides(vc->vc_font.height, vyres) && + divides(vc->vc_font.height, yres); + int reading_fast = cap & FBINFO_READS_FAST; + int fast_copyarea = (cap & FBINFO_HWACCEL_COPYAREA) && + !(cap & FBINFO_HWACCEL_DISABLED); + int fast_imageblit = (cap & FBINFO_HWACCEL_IMAGEBLIT) && + !(cap & FBINFO_HWACCEL_DISABLED); + + if (good_wrap || good_pan) { + if (reading_fast || fast_copyarea) + p->scrollmode = good_wrap ? + SCROLL_WRAP_MOVE : SCROLL_PAN_MOVE; + else + p->scrollmode = good_wrap ? SCROLL_REDRAW : + SCROLL_PAN_REDRAW; + } else { + if (reading_fast || (fast_copyarea && !fast_imageblit)) + p->scrollmode = SCROLL_MOVE; + else + p->scrollmode = SCROLL_REDRAW; + } +#endif +} + static void updatescrollmode(struct fbcon_display *p, struct fb_info *info, struct vc_data *vc) @@ -1974,6 +2017,9 @@ static void updatescrollmode(struct fbcon_display *p, p->vrows -= (yres - (fh * vc->vc_rows)) / fh; if ((yres % fh) && (vyres % fh < yres % fh)) p->vrows--; + + /* update scrollmode in case hardware acceleration is used */ + updatescrollmode_accel(p, info, vc); } #define PITCH(w) (((w) + 7) >> 3) @@ -2134,7 +2180,7 @@ static int fbcon_switch(struct vc_data *vc) updatescrollmode(p, info, vc); - switch (p->scrollmode) { + switch (fb_scrollmode(p)) { case SCROLL_WRAP_MOVE: scrollback_phys_max = p->vrows - vc->vc_rows; break; diff --git a/drivers/video/fbdev/core/fbcon.h b/drivers/video/fbdev/core/fbcon.h index 9315b360c898136bb2a65bd348aa16f57dcbecea..0f16cbc99e6a4f483018a46928f9e957917f9179 100644 --- a/drivers/video/fbdev/core/fbcon.h +++ b/drivers/video/fbdev/core/fbcon.h @@ -29,7 +29,9 @@ struct fbcon_display { /* Filled in by the low-level console driver */ const u_char *fontdata; int userfont; /* != 0 if fontdata kmalloc()ed */ - u_short scrollmode; /* Scroll Method */ +#ifdef CONFIG_FRAMEBUFFER_CONSOLE_LEGACY_ACCELERATION + u_short scrollmode; /* Scroll Method, use fb_scrollmode() */ +#endif u_short inverse; /* != 0 text black on white as default */ short yscroll; /* Hardware scrolling */ int vrows; /* number of virtual rows */ @@ -208,6 +210,17 @@ static inline int attr_col_ec(int shift, struct vc_data *vc, #define SCROLL_REDRAW 0x004 #define SCROLL_PAN_REDRAW 0x005 +static inline u_short fb_scrollmode(struct fbcon_display *fb) +{ +#ifdef CONFIG_FRAMEBUFFER_CONSOLE_LEGACY_ACCELERATION + return fb->scrollmode; +#else + /* hardcoded to SCROLL_REDRAW if acceleration was disabled. */ + return SCROLL_REDRAW; +#endif +} + + #ifdef CONFIG_FB_TILEBLITTING extern void fbcon_set_tileops(struct vc_data *vc, struct fb_info *info); #endif diff --git a/drivers/video/fbdev/core/fbcon_ccw.c b/drivers/video/fbdev/core/fbcon_ccw.c index bbd869efd03bc94ae9387d4355eee28a280fb9c2..f75b24c32d497e27fdb6c56b512b68e92349b9a2 100644 --- a/drivers/video/fbdev/core/fbcon_ccw.c +++ b/drivers/video/fbdev/core/fbcon_ccw.c @@ -65,7 +65,7 @@ static void ccw_bmove(struct vc_data *vc, struct fb_info *info, int sy, { struct fbcon_ops *ops = info->fbcon_par; struct fb_copyarea area; - u32 vyres = GETVYRES(ops->p->scrollmode, info); + u32 vyres = GETVYRES(ops->p, info); area.sx = sy * vc->vc_font.height; area.sy = vyres - ((sx + width) * vc->vc_font.width); @@ -83,7 +83,7 @@ static void ccw_clear(struct vc_data *vc, struct fb_info *info, int sy, struct fbcon_ops *ops = info->fbcon_par; struct fb_fillrect region; int bgshift = (vc->vc_hi_font_mask) ? 13 : 12; - u32 vyres = GETVYRES(ops->p->scrollmode, info); + u32 vyres = GETVYRES(ops->p, info); region.color = attr_bgcol_ec(bgshift,vc,info); region.dx = sy * vc->vc_font.height; @@ -140,7 +140,7 @@ static void ccw_putcs(struct vc_data *vc, struct fb_info *info, u32 cnt, pitch, size; u32 attribute = get_attribute(info, scr_readw(s)); u8 *dst, *buf = NULL; - u32 vyres = GETVYRES(ops->p->scrollmode, info); + u32 vyres = GETVYRES(ops->p, info); if (!ops->fontbuffer) return; @@ -229,7 +229,7 @@ static void ccw_cursor(struct vc_data *vc, struct fb_info *info, int mode, int attribute, use_sw = vc->vc_cursor_type & CUR_SW; int err = 1, dx, dy; char *src; - u32 vyres = GETVYRES(ops->p->scrollmode, info); + u32 vyres = GETVYRES(ops->p, info); if (!ops->fontbuffer) return; @@ -387,7 +387,7 @@ static int ccw_update_start(struct fb_info *info) { struct fbcon_ops *ops = info->fbcon_par; u32 yoffset; - u32 vyres = GETVYRES(ops->p->scrollmode, info); + u32 vyres = GETVYRES(ops->p, info); int err; yoffset = (vyres - info->var.yres) - ops->var.xoffset; diff --git a/drivers/video/fbdev/core/fbcon_cw.c b/drivers/video/fbdev/core/fbcon_cw.c index a34cbe8e98744b12ffee9e45fa834f2d7aab8a4f..cf03dc62f35d37869fd6aac1bb679285527a9f97 100644 --- a/drivers/video/fbdev/core/fbcon_cw.c +++ b/drivers/video/fbdev/core/fbcon_cw.c @@ -50,7 +50,7 @@ static void cw_bmove(struct vc_data *vc, struct fb_info *info, int sy, { struct fbcon_ops *ops = info->fbcon_par; struct fb_copyarea area; - u32 vxres = GETVXRES(ops->p->scrollmode, info); + u32 vxres = GETVXRES(ops->p, info); area.sx = vxres - ((sy + height) * vc->vc_font.height); area.sy = sx * vc->vc_font.width; @@ -68,7 +68,7 @@ static void cw_clear(struct vc_data *vc, struct fb_info *info, int sy, struct fbcon_ops *ops = info->fbcon_par; struct fb_fillrect region; int bgshift = (vc->vc_hi_font_mask) ? 13 : 12; - u32 vxres = GETVXRES(ops->p->scrollmode, info); + u32 vxres = GETVXRES(ops->p, info); region.color = attr_bgcol_ec(bgshift,vc,info); region.dx = vxres - ((sy + height) * vc->vc_font.height); @@ -125,7 +125,7 @@ static void cw_putcs(struct vc_data *vc, struct fb_info *info, u32 cnt, pitch, size; u32 attribute = get_attribute(info, scr_readw(s)); u8 *dst, *buf = NULL; - u32 vxres = GETVXRES(ops->p->scrollmode, info); + u32 vxres = GETVXRES(ops->p, info); if (!ops->fontbuffer) return; @@ -212,7 +212,7 @@ static void cw_cursor(struct vc_data *vc, struct fb_info *info, int mode, int attribute, use_sw = vc->vc_cursor_type & CUR_SW; int err = 1, dx, dy; char *src; - u32 vxres = GETVXRES(ops->p->scrollmode, info); + u32 vxres = GETVXRES(ops->p, info); if (!ops->fontbuffer) return; @@ -369,7 +369,7 @@ static void cw_cursor(struct vc_data *vc, struct fb_info *info, int mode, static int cw_update_start(struct fb_info *info) { struct fbcon_ops *ops = info->fbcon_par; - u32 vxres = GETVXRES(ops->p->scrollmode, info); + u32 vxres = GETVXRES(ops->p, info); u32 xoffset; int err; diff --git a/drivers/video/fbdev/core/fbcon_rotate.h b/drivers/video/fbdev/core/fbcon_rotate.h index e233444cda664783d055b372929cafd2f4ac2e89..01cbe303b8a29592d3236add1f8b46db85d2b74f 100644 --- a/drivers/video/fbdev/core/fbcon_rotate.h +++ b/drivers/video/fbdev/core/fbcon_rotate.h @@ -12,11 +12,11 @@ #define _FBCON_ROTATE_H #define GETVYRES(s,i) ({ \ - (s == SCROLL_REDRAW || s == SCROLL_MOVE) ? \ + (fb_scrollmode(s) == SCROLL_REDRAW || fb_scrollmode(s) == SCROLL_MOVE) ? \ (i)->var.yres : (i)->var.yres_virtual; }) #define GETVXRES(s,i) ({ \ - (s == SCROLL_REDRAW || s == SCROLL_MOVE || !(i)->fix.xpanstep) ? \ + (fb_scrollmode(s) == SCROLL_REDRAW || fb_scrollmode(s) == SCROLL_MOVE || !(i)->fix.xpanstep) ? \ (i)->var.xres : (i)->var.xres_virtual; }) diff --git a/drivers/video/fbdev/core/fbcon_ud.c b/drivers/video/fbdev/core/fbcon_ud.c index 199cbc7abe35315d5adb9c79aaa29ca20eedcd32..c5d2da731d68629b2a2fc4ce15fcdc5d654ced21 100644 --- a/drivers/video/fbdev/core/fbcon_ud.c +++ b/drivers/video/fbdev/core/fbcon_ud.c @@ -50,8 +50,8 @@ static void ud_bmove(struct vc_data *vc, struct fb_info *info, int sy, { struct fbcon_ops *ops = info->fbcon_par; struct fb_copyarea area; - u32 vyres = GETVYRES(ops->p->scrollmode, info); - u32 vxres = GETVXRES(ops->p->scrollmode, info); + u32 vyres = GETVYRES(ops->p, info); + u32 vxres = GETVXRES(ops->p, info); area.sy = vyres - ((sy + height) * vc->vc_font.height); area.sx = vxres - ((sx + width) * vc->vc_font.width); @@ -69,8 +69,8 @@ static void ud_clear(struct vc_data *vc, struct fb_info *info, int sy, struct fbcon_ops *ops = info->fbcon_par; struct fb_fillrect region; int bgshift = (vc->vc_hi_font_mask) ? 13 : 12; - u32 vyres = GETVYRES(ops->p->scrollmode, info); - u32 vxres = GETVXRES(ops->p->scrollmode, info); + u32 vyres = GETVYRES(ops->p, info); + u32 vxres = GETVXRES(ops->p, info); region.color = attr_bgcol_ec(bgshift,vc,info); region.dy = vyres - ((sy + height) * vc->vc_font.height); @@ -162,8 +162,8 @@ static void ud_putcs(struct vc_data *vc, struct fb_info *info, u32 mod = vc->vc_font.width % 8, cnt, pitch, size; u32 attribute = get_attribute(info, scr_readw(s)); u8 *dst, *buf = NULL; - u32 vyres = GETVYRES(ops->p->scrollmode, info); - u32 vxres = GETVXRES(ops->p->scrollmode, info); + u32 vyres = GETVYRES(ops->p, info); + u32 vxres = GETVXRES(ops->p, info); if (!ops->fontbuffer) return; @@ -259,8 +259,8 @@ static void ud_cursor(struct vc_data *vc, struct fb_info *info, int mode, int attribute, use_sw = vc->vc_cursor_type & CUR_SW; int err = 1, dx, dy; char *src; - u32 vyres = GETVYRES(ops->p->scrollmode, info); - u32 vxres = GETVXRES(ops->p->scrollmode, info); + u32 vyres = GETVYRES(ops->p, info); + u32 vxres = GETVXRES(ops->p, info); if (!ops->fontbuffer) return; @@ -410,8 +410,8 @@ static int ud_update_start(struct fb_info *info) { struct fbcon_ops *ops = info->fbcon_par; int xoffset, yoffset; - u32 vyres = GETVYRES(ops->p->scrollmode, info); - u32 vxres = GETVXRES(ops->p->scrollmode, info); + u32 vyres = GETVYRES(ops->p, info); + u32 vxres = GETVXRES(ops->p, info); int err; xoffset = vxres - info->var.xres - ops->var.xoffset; diff --git a/drivers/video/fbdev/gbefb.c b/drivers/video/fbdev/gbefb.c index 31270a8986e8e4b4866b053744b1089b00ce1ccc..8f8ca1f88fe211688273be6f49255e737148acb6 100644 --- a/drivers/video/fbdev/gbefb.c +++ b/drivers/video/fbdev/gbefb.c @@ -1269,7 +1269,7 @@ static struct platform_device *gbefb_device; static int __init gbefb_init(void) { int ret = platform_driver_register(&gbefb_driver); - if (!ret) { + if (IS_ENABLED(CONFIG_SGI_IP32) && !ret) { gbefb_device = platform_device_alloc("gbefb", 0); if (gbefb_device) { ret = platform_device_add(gbefb_device); diff --git a/drivers/video/fbdev/hyperv_fb.c b/drivers/video/fbdev/hyperv_fb.c index 4dc9077dd2ac04d07401e210b13fcb052b0f6ab1..3c309ab20887473d3f7f1b97fc1fbf62d9349856 100644 --- a/drivers/video/fbdev/hyperv_fb.c +++ b/drivers/video/fbdev/hyperv_fb.c @@ -286,8 +286,6 @@ struct hvfb_par { static uint screen_width = HVFB_WIDTH; static uint screen_height = HVFB_HEIGHT; -static uint screen_width_max = HVFB_WIDTH; -static uint screen_height_max = HVFB_HEIGHT; static uint screen_depth; static uint screen_fb_size; static uint dio_fb_size; /* FB size for deferred IO */ @@ -581,7 +579,6 @@ static int synthvid_get_supported_resolution(struct hv_device *hdev) int ret = 0; unsigned long t; u8 index; - int i; memset(msg, 0, sizeof(struct synthvid_msg)); msg->vid_hdr.type = SYNTHVID_RESOLUTION_REQUEST; @@ -612,13 +609,6 @@ static int synthvid_get_supported_resolution(struct hv_device *hdev) goto out; } - for (i = 0; i < msg->resolution_resp.resolution_count; i++) { - screen_width_max = max_t(unsigned int, screen_width_max, - msg->resolution_resp.supported_resolution[i].width); - screen_height_max = max_t(unsigned int, screen_height_max, - msg->resolution_resp.supported_resolution[i].height); - } - screen_width = msg->resolution_resp.supported_resolution[index].width; screen_height = @@ -940,7 +930,7 @@ static void hvfb_get_option(struct fb_info *info) if (x < HVFB_WIDTH_MIN || y < HVFB_HEIGHT_MIN || (synthvid_ver_ge(par->synthvid_version, SYNTHVID_VERSION_WIN10) && - (x > screen_width_max || y > screen_height_max)) || + (x * y * screen_depth / 8 > screen_fb_size)) || (par->synthvid_version == SYNTHVID_VERSION_WIN8 && x * y * screen_depth / 8 > SYNTHVID_FB_SIZE_WIN8) || (par->synthvid_version == SYNTHVID_VERSION_WIN7 && @@ -1193,8 +1183,8 @@ static int hvfb_probe(struct hv_device *hdev, } hvfb_get_option(info); - pr_info("Screen resolution: %dx%d, Color depth: %d\n", - screen_width, screen_height, screen_depth); + pr_info("Screen resolution: %dx%d, Color depth: %d, Frame buffer size: %d\n", + screen_width, screen_height, screen_depth, screen_fb_size); ret = hvfb_getmem(hdev, info); if (ret) { diff --git a/drivers/video/fbdev/vga16fb.c b/drivers/video/fbdev/vga16fb.c index 1e8a38a7967d890a9e44fb245e2c6ce11f7b0f5c..5c6e9dc88060ba30187bb5727a1c519c32d8d09e 100644 --- a/drivers/video/fbdev/vga16fb.c +++ b/drivers/video/fbdev/vga16fb.c @@ -184,6 +184,25 @@ static inline void setindex(int index) vga_io_w(VGA_GFX_I, index); } +/* Check if the video mode is supported by the driver */ +static inline int check_mode_supported(void) +{ + /* non-x86 architectures treat orig_video_isVGA as a boolean flag */ +#if defined(CONFIG_X86) + /* only EGA and VGA in 16 color graphic mode are supported */ + if (screen_info.orig_video_isVGA != VIDEO_TYPE_EGAC && + screen_info.orig_video_isVGA != VIDEO_TYPE_VGAC) + return -ENODEV; + + if (screen_info.orig_video_mode != 0x0D && /* 320x200/4 (EGA) */ + screen_info.orig_video_mode != 0x0E && /* 640x200/4 (EGA) */ + screen_info.orig_video_mode != 0x10 && /* 640x350/4 (EGA) */ + screen_info.orig_video_mode != 0x12) /* 640x480/4 (VGA) */ + return -ENODEV; +#endif + return 0; +} + static void vga16fb_pan_var(struct fb_info *info, struct fb_var_screeninfo *var) { @@ -1422,6 +1441,11 @@ static int __init vga16fb_init(void) vga16fb_setup(option); #endif + + ret = check_mode_supported(); + if (ret) + return ret; + ret = platform_driver_register(&vga16fb_driver); if (!ret) { diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index 84b5dec5d29cd762bfa583f24f6312b250012bef..441bc057896f5fd6521791467f2c152385a1a5b0 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -167,14 +167,13 @@ void virtio_add_status(struct virtio_device *dev, unsigned int status) } EXPORT_SYMBOL_GPL(virtio_add_status); -int virtio_finalize_features(struct virtio_device *dev) +/* Do some validation, then set FEATURES_OK */ +static int virtio_features_ok(struct virtio_device *dev) { - int ret = dev->config->finalize_features(dev); unsigned status; + int ret; might_sleep(); - if (ret) - return ret; ret = arch_has_restricted_virtio_memory_access(); if (ret) { @@ -203,7 +202,6 @@ int virtio_finalize_features(struct virtio_device *dev) } return 0; } -EXPORT_SYMBOL_GPL(virtio_finalize_features); static int virtio_dev_probe(struct device *_d) { @@ -250,13 +248,26 @@ static int virtio_dev_probe(struct device *_d) if (device_features & (1ULL << i)) __virtio_set_bit(dev, i); + err = dev->config->finalize_features(dev); + if (err) + goto err; + if (drv->validate) { + u64 features = dev->features; + err = drv->validate(dev); if (err) goto err; + + /* Did validation change any features? Then write them again. */ + if (features != dev->features) { + err = dev->config->finalize_features(dev); + if (err) + goto err; + } } - err = virtio_finalize_features(dev); + err = virtio_features_ok(dev); if (err) goto err; @@ -427,7 +438,11 @@ int virtio_device_restore(struct virtio_device *dev) /* We have a driver! */ virtio_add_status(dev, VIRTIO_CONFIG_S_DRIVER); - ret = virtio_finalize_features(dev); + ret = dev->config->finalize_features(dev); + if (ret) + goto err; + + ret = virtio_features_ok(dev); if (ret) goto err; diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c index 3d6ae5a5e252e069a66c342cd745802e09247bf5..2158e16251334f1d8bd6048efc0657d8dbe40896 100644 --- a/drivers/virtio/virtio_pci_modern.c +++ b/drivers/virtio/virtio_pci_modern.c @@ -63,12 +63,13 @@ static void vp_iowrite64_twopart(u64 val, vp_iowrite32(val >> 32, hi); } -static void __iomem *map_capability(struct pci_dev *dev, int off, +static void __iomem *map_capability(struct virtio_pci_device *vp_dev, int off, size_t minlen, u32 align, u32 start, u32 size, size_t *len) { + struct pci_dev *dev = vp_dev->pci_dev; u8 bar; u32 offset, length; void __iomem *p; @@ -81,6 +82,13 @@ static void __iomem *map_capability(struct pci_dev *dev, int off, pci_read_config_dword(dev, off + offsetof(struct virtio_pci_cap, length), &length); + /* Check if the BAR may have changed since we requested the region. */ + if (bar >= PCI_STD_NUM_BARS || !(vp_dev->modern_bars & (1 << bar))) { + dev_err(&dev->dev, + "virtio_pci: bar unexpectedly changed to %u\n", bar); + return NULL; + } + if (length <= start) { dev_err(&dev->dev, "virtio_pci: bad capability len %u (>%u expected)\n", @@ -370,7 +378,7 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, vq->priv = (void __force *)vp_dev->notify_base + off * vp_dev->notify_offset_multiplier; } else { - vq->priv = (void __force *)map_capability(vp_dev->pci_dev, + vq->priv = (void __force *)map_capability(vp_dev, vp_dev->notify_map_cap, 2, 2, off * vp_dev->notify_offset_multiplier, 2, NULL); @@ -451,7 +459,7 @@ static int virtio_pci_find_shm_cap(struct pci_dev *dev, u8 required_id, for (pos = pci_find_capability(dev, PCI_CAP_ID_VNDR); pos > 0; pos = pci_find_next_capability(dev, pos, PCI_CAP_ID_VNDR)) { - u8 type, cap_len, id; + u8 type, cap_len, id, res_bar; u32 tmp32; u64 res_offset, res_length; @@ -473,9 +481,14 @@ static int virtio_pci_find_shm_cap(struct pci_dev *dev, u8 required_id, if (id != required_id) continue; - /* Type, and ID match, looks good */ pci_read_config_byte(dev, pos + offsetof(struct virtio_pci_cap, - bar), bar); + bar), &res_bar); + if (res_bar >= PCI_STD_NUM_BARS) + continue; + + /* Type and ID match, and the BAR value isn't reserved. + * Looks good. + */ /* Read the lower 32bit of length and offset */ pci_read_config_dword(dev, pos + offsetof(struct virtio_pci_cap, @@ -495,6 +508,7 @@ static int virtio_pci_find_shm_cap(struct pci_dev *dev, u8 required_id, length_hi), &tmp32); res_length |= ((u64)tmp32) << 32; + *bar = res_bar; *offset = res_offset; *len = res_length; @@ -597,7 +611,7 @@ static inline int virtio_pci_find_capability(struct pci_dev *dev, u8 cfg_type, &bar); /* Ignore structures with reserved BAR values */ - if (bar > 0x5) + if (bar >= PCI_STD_NUM_BARS) continue; if (type == cfg_type) { @@ -743,13 +757,13 @@ int virtio_pci_modern_probe(struct virtio_pci_device *vp_dev) return err; err = -EINVAL; - vp_dev->common = map_capability(pci_dev, common, + vp_dev->common = map_capability(vp_dev, common, sizeof(struct virtio_pci_common_cfg), 4, 0, sizeof(struct virtio_pci_common_cfg), NULL); if (!vp_dev->common) goto err_map_common; - vp_dev->isr = map_capability(pci_dev, isr, sizeof(u8), 1, + vp_dev->isr = map_capability(vp_dev, isr, sizeof(u8), 1, 0, 1, NULL); if (!vp_dev->isr) @@ -776,7 +790,7 @@ int virtio_pci_modern_probe(struct virtio_pci_device *vp_dev) * Otherwise, map each VQ individually later. */ if ((u64)notify_length + (notify_offset % PAGE_SIZE) <= PAGE_SIZE) { - vp_dev->notify_base = map_capability(pci_dev, notify, 2, 2, + vp_dev->notify_base = map_capability(vp_dev, notify, 2, 2, 0, notify_length, &vp_dev->notify_len); if (!vp_dev->notify_base) @@ -789,7 +803,7 @@ int virtio_pci_modern_probe(struct virtio_pci_device *vp_dev) * is more than enough for all existing devices. */ if (device) { - vp_dev->device = map_capability(pci_dev, device, 0, 4, + vp_dev->device = map_capability(vp_dev, device, 0, 4, 0, PAGE_SIZE, &vp_dev->device_len); if (!vp_dev->device) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 6c730d6d50f711f24877489279b617d1c9fef7ff..3d5c050d0a3a3b3e06b3086468ef4b91f0feee3d 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -74,14 +74,14 @@ struct vring_desc_state_packed { void *data; /* Data for callback. */ struct vring_packed_desc *indir_desc; /* Indirect descriptor, if any. */ u16 num; /* Descriptor list length. */ - u16 next; /* The next desc state in a list. */ u16 last; /* The last desc state in a list. */ }; -struct vring_desc_extra_packed { +struct vring_desc_extra { dma_addr_t addr; /* Buffer DMA addr. */ u32 len; /* Buffer length. */ u16 flags; /* Descriptor flags. */ + u16 next; /* The next desc state in a list. */ }; struct vring_virtqueue { @@ -130,6 +130,7 @@ struct vring_virtqueue { /* Per-descriptor state. */ struct vring_desc_state_split *desc_state; + struct vring_desc_extra *desc_extra; /* DMA address and size information */ dma_addr_t queue_dma_addr; @@ -166,7 +167,7 @@ struct vring_virtqueue { /* Per-descriptor state. */ struct vring_desc_state_packed *desc_state; - struct vring_desc_extra_packed *desc_extra; + struct vring_desc_extra *desc_extra; /* DMA address and size information */ dma_addr_t ring_dma_addr; @@ -263,7 +264,7 @@ size_t virtio_max_dma_size(struct virtio_device *vdev) size_t max_segment_size = SIZE_MAX; if (vring_use_dma_api(vdev)) - max_segment_size = dma_max_mapping_size(&vdev->dev); + max_segment_size = dma_max_mapping_size(vdev->dev.parent); return max_segment_size; } @@ -364,8 +365,8 @@ static int vring_mapping_error(const struct vring_virtqueue *vq, * Split ring specific functions - *_split(). */ -static void vring_unmap_one_split(const struct vring_virtqueue *vq, - struct vring_desc *desc) +static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq, + struct vring_desc *desc) { u16 flags; @@ -389,6 +390,35 @@ static void vring_unmap_one_split(const struct vring_virtqueue *vq, } } +static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq, + unsigned int i) +{ + struct vring_desc_extra *extra = vq->split.desc_extra; + u16 flags; + + if (!vq->use_dma_api) + goto out; + + flags = extra[i].flags; + + if (flags & VRING_DESC_F_INDIRECT) { + dma_unmap_single(vring_dma_dev(vq), + extra[i].addr, + extra[i].len, + (flags & VRING_DESC_F_WRITE) ? + DMA_FROM_DEVICE : DMA_TO_DEVICE); + } else { + dma_unmap_page(vring_dma_dev(vq), + extra[i].addr, + extra[i].len, + (flags & VRING_DESC_F_WRITE) ? + DMA_FROM_DEVICE : DMA_TO_DEVICE); + } + +out: + return extra[i].next; +} + static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq, unsigned int total_sg, gfp_t gfp) @@ -412,6 +442,35 @@ static struct vring_desc *alloc_indirect_split(struct virtqueue *_vq, return desc; } +static inline unsigned int virtqueue_add_desc_split(struct virtqueue *vq, + struct vring_desc *desc, + unsigned int i, + dma_addr_t addr, + unsigned int len, + u16 flags, + bool indirect) +{ + struct vring_virtqueue *vring = to_vvq(vq); + struct vring_desc_extra *extra = vring->split.desc_extra; + u16 next; + + desc[i].flags = cpu_to_virtio16(vq->vdev, flags); + desc[i].addr = cpu_to_virtio64(vq->vdev, addr); + desc[i].len = cpu_to_virtio32(vq->vdev, len); + + if (!indirect) { + next = extra[i].next; + desc[i].next = cpu_to_virtio16(vq->vdev, next); + + extra[i].addr = addr; + extra[i].len = len; + extra[i].flags = flags; + } else + next = virtio16_to_cpu(vq->vdev, desc[i].next); + + return next; +} + static inline int virtqueue_add_split(struct virtqueue *_vq, struct scatterlist *sgs[], unsigned int total_sg, @@ -484,11 +543,13 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, if (vring_mapping_error(vq, addr)) goto unmap_release; - desc[i].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_NEXT); - desc[i].addr = cpu_to_virtio64(_vq->vdev, addr); - desc[i].len = cpu_to_virtio32(_vq->vdev, sg->length); prev = i; - i = virtio16_to_cpu(_vq->vdev, desc[i].next); + /* Note that we trust indirect descriptor + * table since it use stream DMA mapping. + */ + i = virtqueue_add_desc_split(_vq, desc, i, addr, sg->length, + VRING_DESC_F_NEXT, + indirect); } } for (; n < (out_sgs + in_sgs); n++) { @@ -497,15 +558,22 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, if (vring_mapping_error(vq, addr)) goto unmap_release; - desc[i].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_NEXT | VRING_DESC_F_WRITE); - desc[i].addr = cpu_to_virtio64(_vq->vdev, addr); - desc[i].len = cpu_to_virtio32(_vq->vdev, sg->length); prev = i; - i = virtio16_to_cpu(_vq->vdev, desc[i].next); + /* Note that we trust indirect descriptor + * table since it use stream DMA mapping. + */ + i = virtqueue_add_desc_split(_vq, desc, i, addr, + sg->length, + VRING_DESC_F_NEXT | + VRING_DESC_F_WRITE, + indirect); } } /* Last one doesn't continue. */ desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT); + if (!indirect && vq->use_dma_api) + vq->split.desc_extra[prev & (vq->split.vring.num - 1)].flags &= + ~VRING_DESC_F_NEXT; if (indirect) { /* Now that the indirect table is filled in, map it. */ @@ -515,13 +583,11 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, if (vring_mapping_error(vq, addr)) goto unmap_release; - vq->split.vring.desc[head].flags = cpu_to_virtio16(_vq->vdev, - VRING_DESC_F_INDIRECT); - vq->split.vring.desc[head].addr = cpu_to_virtio64(_vq->vdev, - addr); - - vq->split.vring.desc[head].len = cpu_to_virtio32(_vq->vdev, - total_sg * sizeof(struct vring_desc)); + virtqueue_add_desc_split(_vq, vq->split.vring.desc, + head, addr, + total_sg * sizeof(struct vring_desc), + VRING_DESC_F_INDIRECT, + false); } /* We're using some buffers from the free list. */ @@ -529,8 +595,7 @@ static inline int virtqueue_add_split(struct virtqueue *_vq, /* Update free pointer */ if (indirect) - vq->free_head = virtio16_to_cpu(_vq->vdev, - vq->split.vring.desc[head].next); + vq->free_head = vq->split.desc_extra[head].next; else vq->free_head = i; @@ -575,8 +640,11 @@ unmap_release: for (n = 0; n < total_sg; n++) { if (i == err_idx) break; - vring_unmap_one_split(vq, &desc[i]); - i = virtio16_to_cpu(_vq->vdev, desc[i].next); + if (indirect) { + vring_unmap_one_split_indirect(vq, &desc[i]); + i = virtio16_to_cpu(_vq->vdev, desc[i].next); + } else + i = vring_unmap_one_split(vq, i); } if (indirect) @@ -630,14 +698,13 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head, i = head; while (vq->split.vring.desc[i].flags & nextflag) { - vring_unmap_one_split(vq, &vq->split.vring.desc[i]); - i = virtio16_to_cpu(vq->vq.vdev, vq->split.vring.desc[i].next); + vring_unmap_one_split(vq, i); + i = vq->split.desc_extra[i].next; vq->vq.num_free++; } - vring_unmap_one_split(vq, &vq->split.vring.desc[i]); - vq->split.vring.desc[i].next = cpu_to_virtio16(vq->vq.vdev, - vq->free_head); + vring_unmap_one_split(vq, i); + vq->split.desc_extra[i].next = vq->free_head; vq->free_head = head; /* Plus final descriptor */ @@ -652,15 +719,14 @@ static void detach_buf_split(struct vring_virtqueue *vq, unsigned int head, if (!indir_desc) return; - len = virtio32_to_cpu(vq->vq.vdev, - vq->split.vring.desc[head].len); + len = vq->split.desc_extra[head].len; - BUG_ON(!(vq->split.vring.desc[head].flags & - cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_INDIRECT))); + BUG_ON(!(vq->split.desc_extra[head].flags & + VRING_DESC_F_INDIRECT)); BUG_ON(len == 0 || len % sizeof(struct vring_desc)); for (j = 0; j < len / sizeof(struct vring_desc); j++) - vring_unmap_one_split(vq, &indir_desc[j]); + vring_unmap_one_split_indirect(vq, &indir_desc[j]); kfree(indir_desc); vq->split.desc_state[head].indir_desc = NULL; @@ -912,7 +978,7 @@ static struct virtqueue *vring_create_virtqueue_split( */ static void vring_unmap_state_packed(const struct vring_virtqueue *vq, - struct vring_desc_extra_packed *state) + struct vring_desc_extra *state) { u16 flags; @@ -992,6 +1058,8 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, head = vq->packed.next_avail_idx; desc = alloc_indirect_packed(total_sg, gfp); + if (!desc) + return -ENOMEM; if (unlikely(vq->vq.num_free < 1)) { pr_debug("Can't add buf len 1 - avail = 0\n"); @@ -1061,7 +1129,7 @@ static int virtqueue_add_indirect_packed(struct vring_virtqueue *vq, 1 << VRING_PACKED_DESC_F_USED; } vq->packed.next_avail_idx = n; - vq->free_head = vq->packed.desc_state[id].next; + vq->free_head = vq->packed.desc_extra[id].next; /* Store token and indirect buffer state. */ vq->packed.desc_state[id].num = 1; @@ -1103,6 +1171,7 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, unsigned int i, n, c, descs_used, err_idx; __le16 head_flags, flags; u16 head, id, prev, curr, avail_used_flags; + int err; START_USE(vq); @@ -1118,9 +1187,16 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, BUG_ON(total_sg == 0); - if (virtqueue_use_indirect(_vq, total_sg)) - return virtqueue_add_indirect_packed(vq, sgs, total_sg, - out_sgs, in_sgs, data, gfp); + if (virtqueue_use_indirect(_vq, total_sg)) { + err = virtqueue_add_indirect_packed(vq, sgs, total_sg, out_sgs, + in_sgs, data, gfp); + if (err != -ENOMEM) { + END_USE(vq); + return err; + } + + /* fall back on direct */ + } head = vq->packed.next_avail_idx; avail_used_flags = vq->packed.avail_used_flags; @@ -1169,7 +1245,7 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, le16_to_cpu(flags); } prev = curr; - curr = vq->packed.desc_state[curr].next; + curr = vq->packed.desc_extra[curr].next; if ((unlikely(++i >= vq->packed.vring.num))) { i = 0; @@ -1213,13 +1289,16 @@ static inline int virtqueue_add_packed(struct virtqueue *_vq, unmap_release: err_idx = i; i = head; + curr = vq->free_head; vq->packed.avail_used_flags = avail_used_flags; for (n = 0; n < total_sg; n++) { if (i == err_idx) break; - vring_unmap_desc_packed(vq, &desc[i]); + vring_unmap_state_packed(vq, + &vq->packed.desc_extra[curr]); + curr = vq->packed.desc_extra[curr].next; i++; if (i >= vq->packed.vring.num) i = 0; @@ -1290,7 +1369,7 @@ static void detach_buf_packed(struct vring_virtqueue *vq, /* Clear data ptr. */ state->data = NULL; - vq->packed.desc_state[state->last].next = vq->free_head; + vq->packed.desc_extra[state->last].next = vq->free_head; vq->free_head = id; vq->vq.num_free += state->num; @@ -1299,7 +1378,7 @@ static void detach_buf_packed(struct vring_virtqueue *vq, for (i = 0; i < state->num; i++) { vring_unmap_state_packed(vq, &vq->packed.desc_extra[curr]); - curr = vq->packed.desc_state[curr].next; + curr = vq->packed.desc_extra[curr].next; } } @@ -1550,6 +1629,25 @@ static void *virtqueue_detach_unused_buf_packed(struct virtqueue *_vq) return NULL; } +static struct vring_desc_extra *vring_alloc_desc_extra(struct vring_virtqueue *vq, + unsigned int num) +{ + struct vring_desc_extra *desc_extra; + unsigned int i; + + desc_extra = kmalloc_array(num, sizeof(struct vring_desc_extra), + GFP_KERNEL); + if (!desc_extra) + return NULL; + + memset(desc_extra, 0, num * sizeof(struct vring_desc_extra)); + + for (i = 0; i < num - 1; i++) + desc_extra[i].next = i + 1; + + return desc_extra; +} + static struct virtqueue *vring_create_virtqueue_packed( unsigned int index, unsigned int num, @@ -1567,7 +1665,6 @@ static struct virtqueue *vring_create_virtqueue_packed( struct vring_packed_desc_event *driver, *device; dma_addr_t ring_dma_addr, driver_event_dma_addr, device_event_dma_addr; size_t ring_size_in_bytes, event_size_in_bytes; - unsigned int i; ring_size_in_bytes = num * sizeof(struct vring_packed_desc); @@ -1649,18 +1746,11 @@ static struct virtqueue *vring_create_virtqueue_packed( /* Put everything in free lists. */ vq->free_head = 0; - for (i = 0; i < num-1; i++) - vq->packed.desc_state[i].next = i + 1; - vq->packed.desc_extra = kmalloc_array(num, - sizeof(struct vring_desc_extra_packed), - GFP_KERNEL); + vq->packed.desc_extra = vring_alloc_desc_extra(vq, num); if (!vq->packed.desc_extra) goto err_desc_extra; - memset(vq->packed.desc_extra, 0, - num * sizeof(struct vring_desc_extra_packed)); - /* No callback? Tell other side not to bother us. */ if (!callback) { vq->packed.event_flags_shadow = VRING_PACKED_EVENT_FLAG_DISABLE; @@ -2064,7 +2154,6 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index, void (*callback)(struct virtqueue *), const char *name) { - unsigned int i; struct vring_virtqueue *vq; if (virtio_has_feature(vdev, VIRTIO_F_RING_PACKED)) @@ -2116,15 +2205,15 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index, vq->split.desc_state = kmalloc_array(vring.num, sizeof(struct vring_desc_state_split), GFP_KERNEL); - if (!vq->split.desc_state) { - kfree(vq); - return NULL; - } + if (!vq->split.desc_state) + goto err_state; + + vq->split.desc_extra = vring_alloc_desc_extra(vq, vring.num); + if (!vq->split.desc_extra) + goto err_extra; /* Put everything in free lists. */ vq->free_head = 0; - for (i = 0; i < vring.num-1; i++) - vq->split.vring.desc[i].next = cpu_to_virtio16(vdev, i + 1); memset(vq->split.desc_state, 0, vring.num * sizeof(struct vring_desc_state_split)); @@ -2132,6 +2221,12 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index, list_add_tail(&vq->vq.list, &vdev->vqs); spin_unlock(&vdev->vqs_list_lock); return &vq->vq; + +err_extra: + kfree(vq->split.desc_state); +err_state: + kfree(vq); + return NULL; } EXPORT_SYMBOL_GPL(__vring_new_virtqueue); @@ -2212,8 +2307,10 @@ void vring_del_virtqueue(struct virtqueue *_vq) vq->split.queue_dma_addr); } } - if (!vq->packed_ring) + if (!vq->packed_ring) { kfree(vq->split.desc_state); + kfree(vq->split.desc_extra); + } spin_lock(&vq->vq.vdev->vqs_list_lock); list_del(&_vq->list); spin_unlock(&vq->vq.vdev->vqs_list_lock); diff --git a/drivers/w1/slaves/w1_ds28e04.c b/drivers/w1/slaves/w1_ds28e04.c index e4f336111edc6feb51cf0c602b19585bed1b87a5..6cef6e2edb892d710f58b5ee8c606b28554dde7f 100644 --- a/drivers/w1/slaves/w1_ds28e04.c +++ b/drivers/w1/slaves/w1_ds28e04.c @@ -32,7 +32,7 @@ static int w1_strong_pullup = 1; module_param_named(strong_pullup, w1_strong_pullup, int, 0); /* enable/disable CRC checking on DS28E04-100 memory accesses */ -static char w1_enable_crccheck = 1; +static bool w1_enable_crccheck = true; #define W1_EEPROM_SIZE 512 #define W1_PAGE_COUNT 16 @@ -339,32 +339,18 @@ static BIN_ATTR_RW(pio, 1); static ssize_t crccheck_show(struct device *dev, struct device_attribute *attr, char *buf) { - if (put_user(w1_enable_crccheck + 0x30, buf)) - return -EFAULT; - - return sizeof(w1_enable_crccheck); + return sysfs_emit(buf, "%d\n", w1_enable_crccheck); } static ssize_t crccheck_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - char val; - - if (count != 1 || !buf) - return -EINVAL; + int err = kstrtobool(buf, &w1_enable_crccheck); - if (get_user(val, buf)) - return -EFAULT; + if (err) + return err; - /* convert to decimal */ - val = val - 0x30; - if (val != 0 && val != 1) - return -EINVAL; - - /* set the new value */ - w1_enable_crccheck = val; - - return sizeof(w1_enable_crccheck); + return count; } static DEVICE_ATTR_RW(crccheck); diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index db935d6b10c277ddd5b947fd663fc6cd5b764f17..01ce3f41cc219f2b2c841f2936d36d768b67020b 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -1723,7 +1723,7 @@ config SIBYTE_WDOG config AR7_WDT tristate "TI AR7 Watchdog Timer" - depends on AR7 || (MIPS && COMPILE_TEST) + depends on AR7 || (MIPS && 32BIT && COMPILE_TEST) help Hardware driver for the TI AR7 Watchdog Timer. diff --git a/drivers/watchdog/f71808e_wdt.c b/drivers/watchdog/f71808e_wdt.c index f60beec1bbaea3451379f203a500d4d9d113ef7f..f7d82d261913324441bb37258643d05c9ce2bc60 100644 --- a/drivers/watchdog/f71808e_wdt.c +++ b/drivers/watchdog/f71808e_wdt.c @@ -228,15 +228,17 @@ static int watchdog_set_timeout(int timeout) mutex_lock(&watchdog.lock); - watchdog.timeout = timeout; if (timeout > 0xff) { watchdog.timer_val = DIV_ROUND_UP(timeout, 60); watchdog.minutes_mode = true; + timeout = watchdog.timer_val * 60; } else { watchdog.timer_val = timeout; watchdog.minutes_mode = false; } + watchdog.timeout = timeout; + mutex_unlock(&watchdog.lock); return 0; diff --git a/drivers/watchdog/omap_wdt.c b/drivers/watchdog/omap_wdt.c index 1616f93dfad7fcdccf7d74a7dc18d979a1643e7a..74d785b2b478f4719c78eae1425f826e7368b526 100644 --- a/drivers/watchdog/omap_wdt.c +++ b/drivers/watchdog/omap_wdt.c @@ -268,8 +268,12 @@ static int omap_wdt_probe(struct platform_device *pdev) wdev->wdog.bootstatus = WDIOF_CARDRESET; } - if (!early_enable) + if (early_enable) { + omap_wdt_start(&wdev->wdog); + set_bit(WDOG_HW_RUNNING, &wdev->wdog.status); + } else { omap_wdt_disable(wdev); + } ret = watchdog_register_device(&wdev->wdog); if (ret) { diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index 15d4b1ef19f8394fe1f15dd648bd6f4e021025ad..c5b02365a5fe728eb557184b8e344dbb812c37c0 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -58,6 +58,7 @@ #include #include #include +#include #include #include @@ -73,6 +74,12 @@ #include #include +#undef MODULE_PARAM_PREFIX +#define MODULE_PARAM_PREFIX "xen." + +static uint __read_mostly balloon_boot_timeout = 180; +module_param(balloon_boot_timeout, uint, 0444); + static int xen_hotplug_unpopulated; #ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG @@ -125,12 +132,12 @@ static struct ctl_table xen_root[] = { * BP_ECANCELED: error, balloon operation canceled. */ -enum bp_state { +static enum bp_state { BP_DONE, BP_WAIT, BP_EAGAIN, BP_ECANCELED -}; +} balloon_state = BP_DONE; /* Main waiting point for xen-balloon thread. */ static DECLARE_WAIT_QUEUE_HEAD(balloon_thread_wq); @@ -199,18 +206,15 @@ static struct page *balloon_next_page(struct page *page) return list_entry(next, struct page, lru); } -static enum bp_state update_schedule(enum bp_state state) +static void update_schedule(void) { - if (state == BP_WAIT) - return BP_WAIT; - - if (state == BP_ECANCELED) - return BP_ECANCELED; + if (balloon_state == BP_WAIT || balloon_state == BP_ECANCELED) + return; - if (state == BP_DONE) { + if (balloon_state == BP_DONE) { balloon_stats.schedule_delay = 1; balloon_stats.retry_count = 1; - return BP_DONE; + return; } ++balloon_stats.retry_count; @@ -219,7 +223,8 @@ static enum bp_state update_schedule(enum bp_state state) balloon_stats.retry_count > balloon_stats.max_retry_count) { balloon_stats.schedule_delay = 1; balloon_stats.retry_count = 1; - return BP_ECANCELED; + balloon_state = BP_ECANCELED; + return; } balloon_stats.schedule_delay <<= 1; @@ -227,7 +232,7 @@ static enum bp_state update_schedule(enum bp_state state) if (balloon_stats.schedule_delay > balloon_stats.max_schedule_delay) balloon_stats.schedule_delay = balloon_stats.max_schedule_delay; - return BP_EAGAIN; + balloon_state = BP_EAGAIN; } #ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG @@ -491,12 +496,12 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp) } /* - * Stop waiting if either state is not BP_EAGAIN and ballooning action is - * needed, or if the credit has changed while state is BP_EAGAIN. + * Stop waiting if either state is BP_DONE and ballooning action is + * needed, or if the credit has changed while state is not BP_DONE. */ -static bool balloon_thread_cond(enum bp_state state, long credit) +static bool balloon_thread_cond(long credit) { - if (state != BP_EAGAIN) + if (balloon_state == BP_DONE) credit = 0; return current_credit() != credit || kthread_should_stop(); @@ -510,20 +515,28 @@ static bool balloon_thread_cond(enum bp_state state, long credit) */ static int balloon_thread(void *unused) { - enum bp_state state = BP_DONE; long credit; unsigned long timeout; set_freezable(); for (;;) { - if (state == BP_EAGAIN) - timeout = balloon_stats.schedule_delay * HZ; - else + switch (balloon_state) { + case BP_DONE: + case BP_ECANCELED: timeout = 3600 * HZ; + break; + case BP_EAGAIN: + timeout = balloon_stats.schedule_delay * HZ; + break; + case BP_WAIT: + timeout = HZ; + break; + } + credit = current_credit(); wait_event_freezable_timeout(balloon_thread_wq, - balloon_thread_cond(state, credit), timeout); + balloon_thread_cond(credit), timeout); if (kthread_should_stop()) return 0; @@ -534,22 +547,23 @@ static int balloon_thread(void *unused) if (credit > 0) { if (balloon_is_inflated()) - state = increase_reservation(credit); + balloon_state = increase_reservation(credit); else - state = reserve_additional_memory(); + balloon_state = reserve_additional_memory(); } if (credit < 0) { long n_pages; n_pages = min(-credit, si_mem_available()); - state = decrease_reservation(n_pages, GFP_BALLOON); - if (state == BP_DONE && n_pages != -credit && + balloon_state = decrease_reservation(n_pages, + GFP_BALLOON); + if (balloon_state == BP_DONE && n_pages != -credit && n_pages < totalreserve_pages) - state = BP_EAGAIN; + balloon_state = BP_EAGAIN; } - state = update_schedule(state); + update_schedule(); mutex_unlock(&balloon_mutex); @@ -756,3 +770,38 @@ static int __init balloon_init(void) return 0; } subsys_initcall(balloon_init); + +static int __init balloon_wait_finish(void) +{ + long credit, last_credit = 0; + unsigned long last_changed = 0; + + if (!xen_domain()) + return -ENODEV; + + /* PV guests don't need to wait. */ + if (xen_pv_domain() || !current_credit()) + return 0; + + pr_notice("Waiting for initial ballooning down having finished.\n"); + + while ((credit = current_credit()) < 0) { + if (credit != last_credit) { + last_changed = jiffies; + last_credit = credit; + } + if (balloon_state == BP_ECANCELED) { + pr_warn_once("Initial ballooning failed, %ld pages need to be freed.\n", + -credit); + if (jiffies - last_changed >= HZ * balloon_boot_timeout) + panic("Initial ballooning failed!\n"); + } + + schedule_timeout_interruptible(HZ / 10); + } + + pr_notice("Initial ballooning down finished.\n"); + + return 0; +} +late_initcall_sync(balloon_wait_finish); diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c index 3fa40c723e8e95cb85d8c4ee5449ac716cb43e63..edb0acd0b8323cad2165d23fb4764358af8a45b8 100644 --- a/drivers/xen/gntalloc.c +++ b/drivers/xen/gntalloc.c @@ -169,20 +169,14 @@ undo: __del_gref(gref); } - /* It's possible for the target domain to map the just-allocated grant - * references by blindly guessing their IDs; if this is done, then - * __del_gref will leave them in the queue_gref list. They need to be - * added to the global list so that we can free them when they are no - * longer referenced. - */ - if (unlikely(!list_empty(&queue_gref))) - list_splice_tail(&queue_gref, &gref_list); mutex_unlock(&gref_mutex); return rc; } static void __del_gref(struct gntalloc_gref *gref) { + unsigned long addr; + if (gref->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) { uint8_t *tmp = kmap(gref->page); tmp[gref->notify.pgoff] = 0; @@ -196,21 +190,16 @@ static void __del_gref(struct gntalloc_gref *gref) gref->notify.flags = 0; if (gref->gref_id) { - if (gnttab_query_foreign_access(gref->gref_id)) - return; - - if (!gnttab_end_foreign_access_ref(gref->gref_id, 0)) - return; - - gnttab_free_grant_reference(gref->gref_id); + if (gref->page) { + addr = (unsigned long)page_to_virt(gref->page); + gnttab_end_foreign_access(gref->gref_id, 0, addr); + } else + gnttab_free_grant_reference(gref->gref_id); } gref_size--; list_del(&gref->next_gref); - if (gref->page) - __free_page(gref->page); - kfree(gref); } diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index b9651f797676c47b036c6f7c173ee9130383bcc4..54778aadf618da4a5612404bd4f45851b47aa2df 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -240,13 +240,13 @@ void gntdev_put_map(struct gntdev_priv *priv, struct gntdev_grant_map *map) if (!refcount_dec_and_test(&map->users)) return; + if (map->pages && !use_ptemod) + unmap_grant_pages(map, 0, map->count); + if (map->notify.flags & UNMAP_NOTIFY_SEND_EVENT) { notify_remote_via_evtchn(map->notify.event); evtchn_put(map->notify.event); } - - if (map->pages && !use_ptemod) - unmap_grant_pages(map, 0, map->count); gntdev_free_map(map); } diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 3729bea0c98956ac6124c01260dc4d7423445dca..5c83d41766c8522cb0fbf65f28469b818f7b1ed0 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -134,12 +134,9 @@ struct gnttab_ops { */ unsigned long (*end_foreign_transfer_ref)(grant_ref_t ref); /* - * Query the status of a grant entry. Ref parameter is reference of - * queried grant entry, return value is the status of queried entry. - * Detailed status(writing/reading) can be gotten from the return value - * by bit operations. + * Read the frame number related to a given grant reference. */ - int (*query_foreign_access)(grant_ref_t ref); + unsigned long (*read_frame)(grant_ref_t ref); }; struct unmap_refs_callback_data { @@ -284,22 +281,6 @@ int gnttab_grant_foreign_access(domid_t domid, unsigned long frame, } EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access); -static int gnttab_query_foreign_access_v1(grant_ref_t ref) -{ - return gnttab_shared.v1[ref].flags & (GTF_reading|GTF_writing); -} - -static int gnttab_query_foreign_access_v2(grant_ref_t ref) -{ - return grstatus[ref] & (GTF_reading|GTF_writing); -} - -int gnttab_query_foreign_access(grant_ref_t ref) -{ - return gnttab_interface->query_foreign_access(ref); -} -EXPORT_SYMBOL_GPL(gnttab_query_foreign_access); - static int gnttab_end_foreign_access_ref_v1(grant_ref_t ref, int readonly) { u16 flags, nflags; @@ -353,6 +334,16 @@ int gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly) } EXPORT_SYMBOL_GPL(gnttab_end_foreign_access_ref); +static unsigned long gnttab_read_frame_v1(grant_ref_t ref) +{ + return gnttab_shared.v1[ref].frame; +} + +static unsigned long gnttab_read_frame_v2(grant_ref_t ref) +{ + return gnttab_shared.v2[ref].full_page.frame; +} + struct deferred_entry { struct list_head list; grant_ref_t ref; @@ -382,12 +373,9 @@ static void gnttab_handle_deferred(struct timer_list *unused) spin_unlock_irqrestore(&gnttab_list_lock, flags); if (_gnttab_end_foreign_access_ref(entry->ref, entry->ro)) { put_free_entry(entry->ref); - if (entry->page) { - pr_debug("freeing g.e. %#x (pfn %#lx)\n", - entry->ref, page_to_pfn(entry->page)); - put_page(entry->page); - } else - pr_info("freeing g.e. %#x\n", entry->ref); + pr_debug("freeing g.e. %#x (pfn %#lx)\n", + entry->ref, page_to_pfn(entry->page)); + put_page(entry->page); kfree(entry); entry = NULL; } else { @@ -412,9 +400,18 @@ static void gnttab_handle_deferred(struct timer_list *unused) static void gnttab_add_deferred(grant_ref_t ref, bool readonly, struct page *page) { - struct deferred_entry *entry = kmalloc(sizeof(*entry), GFP_ATOMIC); + struct deferred_entry *entry; + gfp_t gfp = (in_atomic() || irqs_disabled()) ? GFP_ATOMIC : GFP_KERNEL; const char *what = KERN_WARNING "leaking"; + entry = kmalloc(sizeof(*entry), gfp); + if (!page) { + unsigned long gfn = gnttab_interface->read_frame(ref); + + page = pfn_to_page(gfn_to_pfn(gfn)); + get_page(page); + } + if (entry) { unsigned long flags; @@ -435,11 +432,21 @@ static void gnttab_add_deferred(grant_ref_t ref, bool readonly, what, ref, page ? page_to_pfn(page) : -1); } +int gnttab_try_end_foreign_access(grant_ref_t ref) +{ + int ret = _gnttab_end_foreign_access_ref(ref, 0); + + if (ret) + put_free_entry(ref); + + return ret; +} +EXPORT_SYMBOL_GPL(gnttab_try_end_foreign_access); + void gnttab_end_foreign_access(grant_ref_t ref, int readonly, unsigned long page) { - if (gnttab_end_foreign_access_ref(ref, readonly)) { - put_free_entry(ref); + if (gnttab_try_end_foreign_access(ref)) { if (page != 0) put_page(virt_to_page(page)); } else @@ -1417,7 +1424,7 @@ static const struct gnttab_ops gnttab_v1_ops = { .update_entry = gnttab_update_entry_v1, .end_foreign_access_ref = gnttab_end_foreign_access_ref_v1, .end_foreign_transfer_ref = gnttab_end_foreign_transfer_ref_v1, - .query_foreign_access = gnttab_query_foreign_access_v1, + .read_frame = gnttab_read_frame_v1, }; static const struct gnttab_ops gnttab_v2_ops = { @@ -1429,7 +1436,7 @@ static const struct gnttab_ops gnttab_v2_ops = { .update_entry = gnttab_update_entry_v2, .end_foreign_access_ref = gnttab_end_foreign_access_ref_v2, .end_foreign_transfer_ref = gnttab_end_foreign_transfer_ref_v2, - .query_foreign_access = gnttab_query_foreign_access_v2, + .read_frame = gnttab_read_frame_v2, }; static bool gnttab_need_v2(void) diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c index 720a7b7abd46d690f2ed86f544064403ab4f1647..fe8df32bb612b253b790a4f6e25abfc4ecfc087a 100644 --- a/drivers/xen/privcmd.c +++ b/drivers/xen/privcmd.c @@ -803,11 +803,12 @@ static long privcmd_ioctl_mmap_resource(struct file *file, unsigned int domid = (xdata.flags & XENMEM_rsrc_acq_caller_owned) ? DOMID_SELF : kdata.dom; - int num; + int num, *errs = (int *)pfns; + BUILD_BUG_ON(sizeof(*errs) > sizeof(*pfns)); num = xen_remap_domain_mfn_array(vma, kdata.addr & PAGE_MASK, - pfns, kdata.num, (int *)pfns, + pfns, kdata.num, errs, vma->vm_page_prot, domid, vma->vm_private_data); @@ -817,7 +818,7 @@ static long privcmd_ioctl_mmap_resource(struct file *file, unsigned int i; for (i = 0; i < num; i++) { - rc = pfns[i]; + rc = errs[i]; if (rc < 0) break; } diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c index 7984645b59563b7fc974ec4ecfcc10fb2dbf3e39..bbe337dc296e3d6c78ed8afcb48c3ac308b92ea2 100644 --- a/drivers/xen/pvcalls-front.c +++ b/drivers/xen/pvcalls-front.c @@ -337,8 +337,8 @@ static void free_active_ring(struct sock_mapping *map) if (!map->active.ring) return; - free_pages((unsigned long)map->active.data.in, - map->active.ring->ring_order); + free_pages_exact(map->active.data.in, + PAGE_SIZE << map->active.ring->ring_order); free_page((unsigned long)map->active.ring); } @@ -352,8 +352,8 @@ static int alloc_active_ring(struct sock_mapping *map) goto out; map->active.ring->ring_order = PVCALLS_RING_ORDER; - bytes = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, - PVCALLS_RING_ORDER); + bytes = alloc_pages_exact(PAGE_SIZE << PVCALLS_RING_ORDER, + GFP_KERNEL | __GFP_ZERO); if (!bytes) goto out; diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 936403a6ba588203494f226b66760b1da0023c79..e40902129166c8d803560cb19d419ad14a01464e 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -206,8 +206,8 @@ error: if (repeat--) { /* Min is 2MB */ nslabs = max(1024UL, (nslabs >> 1)); - pr_info("Lowering to %luMB\n", - (nslabs << IO_TLB_SHIFT) >> 20); + bytes = nslabs << IO_TLB_SHIFT; + pr_info("Lowering to %luMB\n", bytes >> 20); goto retry; } pr_err("%s (rc:%d)\n", xen_swiotlb_error(m_ret), rc); diff --git a/drivers/xen/xen-pciback/conf_space_capability.c b/drivers/xen/xen-pciback/conf_space_capability.c index 22f13abbe91306147a24bbbd7c7ac47e4c2833bf..5e53b4817f16796bec96c67a4af63b97bc9c97cc 100644 --- a/drivers/xen/xen-pciback/conf_space_capability.c +++ b/drivers/xen/xen-pciback/conf_space_capability.c @@ -160,7 +160,7 @@ static void *pm_ctrl_init(struct pci_dev *dev, int offset) } out: - return ERR_PTR(err); + return err ? ERR_PTR(err) : NULL; } static const struct config_field caplist_pm[] = { diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c index 0cd728961fce9eb004bb77cf71aecfa0637933c0..16cfef09932953167de76fff884a24be4e4cdd1f 100644 --- a/drivers/xen/xenbus/xenbus_client.c +++ b/drivers/xen/xenbus/xenbus_client.c @@ -379,7 +379,14 @@ int xenbus_grant_ring(struct xenbus_device *dev, void *vaddr, unsigned int nr_pages, grant_ref_t *grefs) { int err; - int i, j; + unsigned int i; + grant_ref_t gref_head; + + err = gnttab_alloc_grant_references(nr_pages, &gref_head); + if (err) { + xenbus_dev_fatal(dev, err, "granting access to ring page"); + return err; + } for (i = 0; i < nr_pages; i++) { unsigned long gfn; @@ -389,23 +396,14 @@ int xenbus_grant_ring(struct xenbus_device *dev, void *vaddr, else gfn = virt_to_gfn(vaddr); - err = gnttab_grant_foreign_access(dev->otherend_id, gfn, 0); - if (err < 0) { - xenbus_dev_fatal(dev, err, - "granting access to ring page"); - goto fail; - } - grefs[i] = err; + grefs[i] = gnttab_claim_grant_reference(&gref_head); + gnttab_grant_foreign_access_ref(grefs[i], dev->otherend_id, + gfn, 0); vaddr = vaddr + XEN_PAGE_SIZE; } return 0; - -fail: - for (j = 0; j < i; j++) - gnttab_end_foreign_access_ref(grefs[j], 0); - return err; } EXPORT_SYMBOL_GPL(xenbus_grant_ring); diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index 8a75092bb148b6b5ed3efb9ab48197aa9a8589e9..98d870672dc5ef5ff2f02425200976969f2626c8 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -846,7 +846,7 @@ static struct notifier_block xenbus_resume_nb = { static int __init xenbus_init(void) { - int err = 0; + int err; uint64_t v = 0; xen_store_domain_type = XS_UNKNOWN; @@ -886,6 +886,29 @@ static int __init xenbus_init(void) err = hvm_get_parameter(HVM_PARAM_STORE_PFN, &v); if (err) goto out_error; + /* + * Uninitialized hvm_params are zero and return no error. + * Although it is theoretically possible to have + * HVM_PARAM_STORE_PFN set to zero on purpose, in reality it is + * not zero when valid. If zero, it means that Xenstore hasn't + * been properly initialized. Instead of attempting to map a + * wrong guest physical address return error. + * + * Also recognize all bits set as an invalid value. + */ + if (!v || !~v) { + err = -ENOENT; + goto out_error; + } + /* Avoid truncation on 32-bit. */ +#if BITS_PER_LONG == 32 + if (v > ULONG_MAX) { + pr_err("%s: cannot handle HVM_PARAM_STORE_PFN=%llx > ULONG_MAX\n", + __func__, v); + err = -EINVAL; + goto out_error; + } +#endif xen_store_gfn = (unsigned long)v; xen_store_interface = xen_remap(xen_store_gfn << XEN_PAGE_SHIFT, @@ -920,8 +943,10 @@ static int __init xenbus_init(void) */ proc_create_mount_point("xen"); #endif + return 0; out_error: + xen_store_domain_type = XS_UNKNOWN; return err; } diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c index 39def020a074b7fab442f60216916ae3c3ac6a7b..6e3e65deb0b93eb1eb08587ca4df536b2d7986e4 100644 --- a/fs/9p/v9fs.c +++ b/fs/9p/v9fs.c @@ -738,3 +738,4 @@ MODULE_AUTHOR("Latchesar Ionkov "); MODULE_AUTHOR("Eric Van Hensbergen "); MODULE_AUTHOR("Ron Minnich "); MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(ANDROID_GKI_VFS_EXPORT_ONLY); diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index 72b67d810b8c202ac8323c700361920c96dd193c..a13ef836fe4e1b13b7162b0ce6fc5c630e048c70 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -541,7 +541,10 @@ int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr) { int retval; struct p9_fid *fid = NULL; - struct p9_iattr_dotl p9attr; + struct p9_iattr_dotl p9attr = { + .uid = INVALID_UID, + .gid = INVALID_GID, + }; struct inode *inode = d_inode(dentry); p9_debug(P9_DEBUG_VFS, "\n"); @@ -551,14 +554,22 @@ int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr) return retval; p9attr.valid = v9fs_mapped_iattr_valid(iattr->ia_valid); - p9attr.mode = iattr->ia_mode; - p9attr.uid = iattr->ia_uid; - p9attr.gid = iattr->ia_gid; - p9attr.size = iattr->ia_size; - p9attr.atime_sec = iattr->ia_atime.tv_sec; - p9attr.atime_nsec = iattr->ia_atime.tv_nsec; - p9attr.mtime_sec = iattr->ia_mtime.tv_sec; - p9attr.mtime_nsec = iattr->ia_mtime.tv_nsec; + if (iattr->ia_valid & ATTR_MODE) + p9attr.mode = iattr->ia_mode; + if (iattr->ia_valid & ATTR_UID) + p9attr.uid = iattr->ia_uid; + if (iattr->ia_valid & ATTR_GID) + p9attr.gid = iattr->ia_gid; + if (iattr->ia_valid & ATTR_SIZE) + p9attr.size = iattr->ia_size; + if (iattr->ia_valid & ATTR_ATIME_SET) { + p9attr.atime_sec = iattr->ia_atime.tv_sec; + p9attr.atime_nsec = iattr->ia_atime.tv_nsec; + } + if (iattr->ia_valid & ATTR_MTIME_SET) { + p9attr.mtime_sec = iattr->ia_mtime.tv_sec; + p9attr.mtime_nsec = iattr->ia_mtime.tv_nsec; + } if (iattr->ia_valid & ATTR_FILE) { fid = iattr->ia_file->private_data; diff --git a/fs/Makefile b/fs/Makefile index 5dca3480ee83b22ce0412ba81a0a0d73ee9a464f..359c63fefa1b88011bb610f24d53e46a2e7156ea 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -6,6 +6,8 @@ # Rewritten to use lists instead of if-statements. # +subdir-ccflags-y += -DANDROID_GKI_VFS_EXPORT_ONLY=VFS_internal_I_am_really_a_filesystem_and_am_NOT_a_driver + obj-y := open.o read_write.o file_table.o super.o \ char_dev.o stat.o exec.o pipe.o namei.o fcntl.o \ ioctl.o readdir.o select.o dcache.o inode.o \ diff --git a/fs/adfs/super.c b/fs/adfs/super.c index bdbd26e571ed37b0a3def3fb25ed70467f6350d1..57044e5372906fa52d59199f9fb2d7f0986274ec 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c @@ -492,3 +492,4 @@ static void __exit exit_adfs_fs(void) module_init(init_adfs_fs) module_exit(exit_adfs_fs) MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(ANDROID_GKI_VFS_EXPORT_ONLY); diff --git a/fs/affs/super.c b/fs/affs/super.c index c6c2a513ec92d1549feff40c2f606bd1c0225154..2d2797ef7cfafd3988870f207922d4e1ddaec97a 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -676,6 +676,7 @@ static void __exit exit_affs_fs(void) MODULE_DESCRIPTION("Amiga filesystem support for Linux"); MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(ANDROID_GKI_VFS_EXPORT_ONLY); module_init(init_affs_fs) module_exit(exit_affs_fs) diff --git a/fs/afs/main.c b/fs/afs/main.c index 179004b15566d326d507403500486d7f4377234e..c6bd956454995906983460151a641cb13cffd9ed 100644 --- a/fs/afs/main.c +++ b/fs/afs/main.c @@ -18,6 +18,7 @@ MODULE_DESCRIPTION("AFS Client File System"); MODULE_AUTHOR("Red Hat, Inc."); MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(ANDROID_GKI_VFS_EXPORT_ONLY); unsigned afs_debug; module_param_named(debug, afs_debug, uint, S_IWUSR | S_IRUGO); diff --git a/fs/aio.c b/fs/aio.c index 6a21d8919409c4aa2c0378108337724a1f73eb35..2a9dfa58ec3ab4e51f88b88d82011380be4f7bdb 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -182,8 +182,9 @@ struct poll_iocb { struct file *file; struct wait_queue_head *head; __poll_t events; - bool done; bool cancelled; + bool work_scheduled; + bool work_need_resched; struct wait_queue_entry wait; struct work_struct work; }; @@ -1621,6 +1622,51 @@ static void aio_poll_put_work(struct work_struct *work) iocb_put(iocb); } +/* + * Safely lock the waitqueue which the request is on, synchronizing with the + * case where the ->poll() provider decides to free its waitqueue early. + * + * Returns true on success, meaning that req->head->lock was locked, req->wait + * is on req->head, and an RCU read lock was taken. Returns false if the + * request was already removed from its waitqueue (which might no longer exist). + */ +static bool poll_iocb_lock_wq(struct poll_iocb *req) +{ + wait_queue_head_t *head; + + /* + * While we hold the waitqueue lock and the waitqueue is nonempty, + * wake_up_pollfree() will wait for us. However, taking the waitqueue + * lock in the first place can race with the waitqueue being freed. + * + * We solve this as eventpoll does: by taking advantage of the fact that + * all users of wake_up_pollfree() will RCU-delay the actual free. If + * we enter rcu_read_lock() and see that the pointer to the queue is + * non-NULL, we can then lock it without the memory being freed out from + * under us, then check whether the request is still on the queue. + * + * Keep holding rcu_read_lock() as long as we hold the queue lock, in + * case the caller deletes the entry from the queue, leaving it empty. + * In that case, only RCU prevents the queue memory from being freed. + */ + rcu_read_lock(); + head = smp_load_acquire(&req->head); + if (head) { + spin_lock(&head->lock); + if (!list_empty(&req->wait.entry)) + return true; + spin_unlock(&head->lock); + } + rcu_read_unlock(); + return false; +} + +static void poll_iocb_unlock_wq(struct poll_iocb *req) +{ + spin_unlock(&req->head->lock); + rcu_read_unlock(); +} + static void aio_poll_complete_work(struct work_struct *work) { struct poll_iocb *req = container_of(work, struct poll_iocb, work); @@ -1640,14 +1686,27 @@ static void aio_poll_complete_work(struct work_struct *work) * avoid further branches in the fast path. */ spin_lock_irq(&ctx->ctx_lock); - if (!mask && !READ_ONCE(req->cancelled)) { - add_wait_queue(req->head, &req->wait); - spin_unlock_irq(&ctx->ctx_lock); - return; - } + if (poll_iocb_lock_wq(req)) { + if (!mask && !READ_ONCE(req->cancelled)) { + /* + * The request isn't actually ready to be completed yet. + * Reschedule completion if another wakeup came in. + */ + if (req->work_need_resched) { + schedule_work(&req->work); + req->work_need_resched = false; + } else { + req->work_scheduled = false; + } + poll_iocb_unlock_wq(req); + spin_unlock_irq(&ctx->ctx_lock); + return; + } + list_del_init(&req->wait.entry); + poll_iocb_unlock_wq(req); + } /* else, POLLFREE has freed the waitqueue, so we must complete */ list_del_init(&iocb->ki_list); iocb->ki_res.res = mangle_poll(mask); - req->done = true; spin_unlock_irq(&ctx->ctx_lock); iocb_put(iocb); @@ -1659,13 +1718,14 @@ static int aio_poll_cancel(struct kiocb *iocb) struct aio_kiocb *aiocb = container_of(iocb, struct aio_kiocb, rw); struct poll_iocb *req = &aiocb->poll; - spin_lock(&req->head->lock); - WRITE_ONCE(req->cancelled, true); - if (!list_empty(&req->wait.entry)) { - list_del_init(&req->wait.entry); - schedule_work(&aiocb->poll.work); - } - spin_unlock(&req->head->lock); + if (poll_iocb_lock_wq(req)) { + WRITE_ONCE(req->cancelled, true); + if (!req->work_scheduled) { + schedule_work(&aiocb->poll.work); + req->work_scheduled = true; + } + poll_iocb_unlock_wq(req); + } /* else, the request was force-cancelled by POLLFREE already */ return 0; } @@ -1682,20 +1742,26 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, if (mask && !(mask & req->events)) return 0; - list_del_init(&req->wait.entry); - - if (mask && spin_trylock_irqsave(&iocb->ki_ctx->ctx_lock, flags)) { + /* + * Complete the request inline if possible. This requires that three + * conditions be met: + * 1. An event mask must have been passed. If a plain wakeup was done + * instead, then mask == 0 and we have to call vfs_poll() to get + * the events, so inline completion isn't possible. + * 2. The completion work must not have already been scheduled. + * 3. ctx_lock must not be busy. We have to use trylock because we + * already hold the waitqueue lock, so this inverts the normal + * locking order. Use irqsave/irqrestore because not all + * filesystems (e.g. fuse) call this function with IRQs disabled, + * yet IRQs have to be disabled before ctx_lock is obtained. + */ + if (mask && !req->work_scheduled && + spin_trylock_irqsave(&iocb->ki_ctx->ctx_lock, flags)) { struct kioctx *ctx = iocb->ki_ctx; - /* - * Try to complete the iocb inline if we can. Use - * irqsave/irqrestore because not all filesystems (e.g. fuse) - * call this function with IRQs disabled and because IRQs - * have to be disabled before ctx_lock is obtained. - */ + list_del_init(&req->wait.entry); list_del(&iocb->ki_list); iocb->ki_res.res = mangle_poll(mask); - req->done = true; if (iocb->ki_eventfd && eventfd_signal_count()) { iocb = NULL; INIT_WORK(&req->work, aio_poll_put_work); @@ -1705,7 +1771,43 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, if (iocb) iocb_put(iocb); } else { - schedule_work(&req->work); + /* + * Schedule the completion work if needed. If it was already + * scheduled, record that another wakeup came in. + * + * Don't remove the request from the waitqueue here, as it might + * not actually be complete yet (we won't know until vfs_poll() + * is called), and we must not miss any wakeups. POLLFREE is an + * exception to this; see below. + */ + if (req->work_scheduled) { + req->work_need_resched = true; + } else { + schedule_work(&req->work); + req->work_scheduled = true; + } + + /* + * If the waitqueue is being freed early but we can't complete + * the request inline, we have to tear down the request as best + * we can. That means immediately removing the request from its + * waitqueue and preventing all further accesses to the + * waitqueue via the request. We also need to schedule the + * completion work (done above). Also mark the request as + * cancelled, to potentially skip an unneeded call to ->poll(). + */ + if (mask & POLLFREE) { + WRITE_ONCE(req->cancelled, true); + list_del_init(&req->wait.entry); + + /* + * Careful: this *must* be the last step, since as soon + * as req->head is NULL'ed out, the request can be + * completed and freed, since aio_poll_complete_work() + * will no longer need to take the waitqueue lock. + */ + smp_store_release(&req->head, NULL); + } } return 1; } @@ -1713,6 +1815,7 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, struct aio_poll_table { struct poll_table_struct pt; struct aio_kiocb *iocb; + bool queued; int error; }; @@ -1723,11 +1826,12 @@ aio_poll_queue_proc(struct file *file, struct wait_queue_head *head, struct aio_poll_table *pt = container_of(p, struct aio_poll_table, pt); /* multiple wait queues per file are not supported */ - if (unlikely(pt->iocb->poll.head)) { + if (unlikely(pt->queued)) { pt->error = -EINVAL; return; } + pt->queued = true; pt->error = 0; pt->iocb->poll.head = head; add_wait_queue(head, &pt->iocb->poll.wait); @@ -1752,12 +1856,14 @@ static int aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb) req->events = demangle_poll(iocb->aio_buf) | EPOLLERR | EPOLLHUP; req->head = NULL; - req->done = false; req->cancelled = false; + req->work_scheduled = false; + req->work_need_resched = false; apt.pt._qproc = aio_poll_queue_proc; apt.pt._key = req->events; apt.iocb = aiocb; + apt.queued = false; apt.error = -EINVAL; /* same as no support for IOCB_CMD_POLL */ /* initialized the list so that we can do list_empty checks */ @@ -1766,23 +1872,35 @@ static int aio_poll(struct aio_kiocb *aiocb, const struct iocb *iocb) mask = vfs_poll(req->file, &apt.pt) & req->events; spin_lock_irq(&ctx->ctx_lock); - if (likely(req->head)) { - spin_lock(&req->head->lock); - if (unlikely(list_empty(&req->wait.entry))) { - if (apt.error) + if (likely(apt.queued)) { + bool on_queue = poll_iocb_lock_wq(req); + + if (!on_queue || req->work_scheduled) { + /* + * aio_poll_wake() already either scheduled the async + * completion work, or completed the request inline. + */ + if (apt.error) /* unsupported case: multiple queues */ cancel = true; apt.error = 0; mask = 0; } if (mask || apt.error) { + /* Steal to complete synchronously. */ list_del_init(&req->wait.entry); } else if (cancel) { + /* Cancel if possible (may be too late though). */ WRITE_ONCE(req->cancelled, true); - } else if (!req->done) { /* actually waiting for an event */ + } else if (on_queue) { + /* + * Actually waiting for an event, so add the request to + * active_reqs so that it can be cancelled if needed. + */ list_add_tail(&aiocb->ki_list, &ctx->active_reqs); aiocb->ki_cancel = aio_poll_cancel; } - spin_unlock(&req->head->lock); + if (on_queue) + poll_iocb_unlock_wq(req); } if (mask) { /* no async, we'd stolen it */ aiocb->ki_res.res = mangle_poll(mask); diff --git a/fs/attr.c b/fs/attr.c index b4bbdbd4c8ca096e86451b0443e64bc2ca9e2b1d..d8c1f796d0e91f116b6fd6516a5b3f08f145b2af 100644 --- a/fs/attr.c +++ b/fs/attr.c @@ -114,7 +114,7 @@ kill_priv: return 0; } -EXPORT_SYMBOL(setattr_prepare); +EXPORT_SYMBOL_NS(setattr_prepare, ANDROID_GKI_VFS_EXPORT_ONLY); /** * inode_newsize_ok - may this inode be truncated to a given size @@ -158,7 +158,7 @@ out_sig: out_big: return -EFBIG; } -EXPORT_SYMBOL(inode_newsize_ok); +EXPORT_SYMBOL_NS(inode_newsize_ok, ANDROID_GKI_VFS_EXPORT_ONLY); /** * setattr_copy - copy simple metadata updates into the generic inode @@ -345,4 +345,4 @@ int notify_change(struct dentry * dentry, struct iattr * attr, struct inode **de return error; } -EXPORT_SYMBOL(notify_change); +EXPORT_SYMBOL_NS(notify_change, ANDROID_GKI_VFS_EXPORT_ONLY); diff --git a/fs/autofs/init.c b/fs/autofs/init.c index d3f55e87433890384b3a56695b4ca26a24656193..ba08261f4faa74fd498fbbc61324ac922727cbde 100644 --- a/fs/autofs/init.c +++ b/fs/autofs/init.c @@ -44,3 +44,4 @@ static void __exit exit_autofs_fs(void) module_init(init_autofs_fs) module_exit(exit_autofs_fs) MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(ANDROID_GKI_VFS_EXPORT_ONLY); diff --git a/fs/bad_inode.c b/fs/bad_inode.c index 54f0ce44427200ba32ba0db078faf120a9b5f172..b292859f5f85a41c782e7b020b5027a0a85ddc9d 100644 --- a/fs/bad_inode.c +++ b/fs/bad_inode.c @@ -207,7 +207,7 @@ void make_bad_inode(struct inode *inode) inode->i_opflags &= ~IOP_XATTR; inode->i_fop = &bad_file_ops; } -EXPORT_SYMBOL(make_bad_inode); +EXPORT_SYMBOL_NS(make_bad_inode, ANDROID_GKI_VFS_EXPORT_ONLY); /* * This tests whether an inode has been flagged as bad. The test uses @@ -227,7 +227,7 @@ bool is_bad_inode(struct inode *inode) return (inode->i_op == &bad_inode_ops); } -EXPORT_SYMBOL(is_bad_inode); +EXPORT_SYMBOL_NS(is_bad_inode, ANDROID_GKI_VFS_EXPORT_ONLY); /** * iget_failed - Mark an under-construction inode as dead and release it diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index c1ba13d19024e4beb625714dca473094d5092dcd..abb8f6bb7e395514fe1f8442f355143e87befc40 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -34,6 +34,7 @@ MODULE_DESCRIPTION("BeOS File System (BeFS) driver"); MODULE_AUTHOR("Will Dyson"); MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(ANDROID_GKI_VFS_EXPORT_ONLY); /* The units the vfs expects inode->i_blocks to be in */ #define VFS_BLOCK_SIZE 512 diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index fd691e4815c56492ff593e30904c15ec3e0c7a6d..293223cd4b375b95ba0c5a61e8971adc5d92972f 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c @@ -22,6 +22,7 @@ MODULE_AUTHOR("Tigran Aivazian "); MODULE_DESCRIPTION("SCO UnixWare BFS filesystem for Linux"); MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(ANDROID_GKI_VFS_EXPORT_ONLY); #undef DEBUG diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index fa50e8936f5fc30205bfb845afd70e1f3e7f366a..04c4aa7a1df2c59716abaa12aefdf29fd7d4630c 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -627,7 +627,7 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, vaddr = eppnt->p_vaddr; if (interp_elf_ex->e_type == ET_EXEC || load_addr_set) - elf_type |= MAP_FIXED_NOREPLACE; + elf_type |= MAP_FIXED; else if (no_base && interp_elf_ex->e_type == ET_DYN) load_addr = -vaddr; diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index 11b5bf2419555b00fbfb540a4433fe4d8ea20806..3e4791efdf77b81a75e1216881a929874de18f68 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -832,3 +832,4 @@ static void __exit exit_misc_binfmt(void) core_initcall(init_misc_binfmt); module_exit(exit_misc_binfmt); MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(ANDROID_GKI_VFS_EXPORT_ONLY); diff --git a/fs/block_dev.c b/fs/block_dev.c index e6c99e04f74c11f5df359dab5098cad73edca318..aa0766b7be4f3eac0b606d4be28f23a4972d54c1 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -186,7 +186,7 @@ int sb_set_blocksize(struct super_block *sb, int size) return sb->s_blocksize; } -EXPORT_SYMBOL(sb_set_blocksize); +EXPORT_SYMBOL_NS(sb_set_blocksize, ANDROID_GKI_VFS_EXPORT_ONLY); int sb_min_blocksize(struct super_block *sb, int size) { @@ -196,7 +196,7 @@ int sb_min_blocksize(struct super_block *sb, int size) return sb_set_blocksize(sb, size); } -EXPORT_SYMBOL(sb_min_blocksize); +EXPORT_SYMBOL_NS(sb_min_blocksize, ANDROID_GKI_VFS_EXPORT_ONLY); static int blkdev_get_block(struct inode *inode, sector_t iblock, @@ -235,7 +235,7 @@ static void blkdev_bio_end_io_simple(struct bio *bio) static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter, - int nr_pages) + unsigned int nr_pages) { struct file *file = iocb->ki_filp; struct block_device *bdev = I_BDEV(bdev_file_inode(file)); @@ -371,8 +371,8 @@ static void blkdev_bio_end_io(struct bio *bio) } } -static ssize_t -__blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) +static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, + unsigned int nr_pages) { struct file *file = iocb->ki_filp; struct inode *inode = bdev_file_inode(file); @@ -504,7 +504,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) static ssize_t blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter) { - int nr_pages; + unsigned int nr_pages; nr_pages = iov_iter_npages(iter, BIO_MAX_PAGES + 1); if (!nr_pages) @@ -512,7 +512,7 @@ blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter) if (is_sync_kiocb(iocb) && nr_pages <= BIO_MAX_PAGES) return __blkdev_direct_IO_simple(iocb, iter, nr_pages); - return __blkdev_direct_IO(iocb, iter, min(nr_pages, BIO_MAX_PAGES)); + return __blkdev_direct_IO(iocb, iter, bio_max_segs(nr_pages)); } static __init int blkdev_init(void) diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 309516e6a96820cd760e378f7110dc887e316855..43c89952b7d25746f1a96cf1fba527d4624782b9 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c @@ -234,6 +234,13 @@ static void run_ordered_work(struct __btrfs_workqueue *wq, ordered_list); if (!test_bit(WORK_DONE_BIT, &work->flags)) break; + /* + * Orders all subsequent loads after reading WORK_DONE_BIT, + * paired with the smp_mb__before_atomic in btrfs_work_helper + * this guarantees that the ordered function will see all + * updates from ordinary work function. + */ + smp_rmb(); /* * we are going to call the ordered done function, but @@ -317,6 +324,13 @@ static void btrfs_work_helper(struct work_struct *normal_work) thresh_exec_hook(wq); work->func(work); if (need_order) { + /* + * Ensures all memory accesses done in the work function are + * ordered before setting the WORK_DONE_BIT. Ensuring the thread + * which is going to executed the ordered work sees them. + * Pairs with the smp_rmb in run_ordered_work. + */ + smp_mb__before_atomic(); set_bit(WORK_DONE_BIT, &work->flags); run_ordered_work(wq, work); } else { diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 6e447bdaf9ec8b381465dfbaeb73a240b6af4416..baff31a147e7dfa802c6a8823dce91837443daf5 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -1213,7 +1213,12 @@ again: ret = btrfs_search_slot(trans, fs_info->extent_root, &key, path, 0, 0); if (ret < 0) goto out; - BUG_ON(ret == 0); + if (ret == 0) { + /* This shouldn't happen, indicates a bug or fs corruption. */ + ASSERT(ret != 0); + ret = -EUCLEAN; + goto out; + } #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS if (trans && likely(trans->type != __TRANS_DUMMY) && @@ -1361,10 +1366,18 @@ again: goto out; if (!ret && extent_item_pos) { /* - * we've recorded that parent, so we must extend - * its inode list here + * We've recorded that parent, so we must extend + * its inode list here. + * + * However if there was corruption we may not + * have found an eie, return an error in this + * case. */ - BUG_ON(!eie); + ASSERT(eie); + if (!eie) { + ret = -EUCLEAN; + goto out; + } while (eie->next) eie = eie->next; eie->next = ref->inode_list; diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 519cf145f9bd176596bef40a03a17a6b3e81b07e..5addd1e36a8ee1699b5f8be4a4b48c36aeabe282 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -2589,12 +2589,9 @@ static struct extent_buffer *btrfs_search_slot_get_root(struct btrfs_root *root, { struct btrfs_fs_info *fs_info = root->fs_info; struct extent_buffer *b; - int root_lock; + int root_lock = 0; int level = 0; - /* We try very hard to do read locks on the root */ - root_lock = BTRFS_READ_LOCK; - if (p->search_commit_root) { /* * The commit roots are read only so we always do read locks, @@ -2632,6 +2629,9 @@ static struct extent_buffer *btrfs_search_slot_get_root(struct btrfs_root *root, goto out; } + /* We try very hard to do read locks on the root */ + root_lock = BTRFS_READ_LOCK; + /* * If the level is set to maximum, we can skip trying to get the read * lock. @@ -2658,6 +2658,17 @@ static struct extent_buffer *btrfs_search_slot_get_root(struct btrfs_root *root, level = btrfs_header_level(b); out: + /* + * The root may have failed to write out at some point, and thus is no + * longer valid, return an error in this case. + */ + if (!extent_buffer_uptodate(b)) { + if (root_lock) + btrfs_tree_unlock_rw(b, root_lock); + free_extent_buffer(b); + return ERR_PTR(-EIO); + } + p->nodes[level] = b; if (!p->skip_locking) p->locks[level] = root_lock; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index ef7df2141f34f3e60b8865a67334686d8714ad4f..a5bcad0278835789a251bb2e74fb43ca58b8f578 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1603,6 +1603,14 @@ again: } return root; fail: + /* + * If our caller provided us an anonymous device, then it's his + * responsability to free it in case we fail. So we have to set our + * root's anon_dev to 0 to avoid a double free, once by btrfs_put_root() + * and once again by our caller. + */ + if (anon_dev) + root->anon_dev = 0; btrfs_put_root(root); return ERR_PTR(ret); } @@ -3223,7 +3231,8 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device goto fail_sysfs; } - if (!sb_rdonly(sb) && !btrfs_check_rw_degradable(fs_info, NULL)) { + if (!sb_rdonly(sb) && fs_info->fs_devices->missing_devices && + !btrfs_check_rw_degradable(fs_info, NULL)) { btrfs_warn(fs_info, "writable mount is not allowed due to too many missing devices"); goto fail_sysfs; @@ -3691,11 +3700,23 @@ static void btrfs_end_empty_barrier(struct bio *bio) */ static void write_dev_flush(struct btrfs_device *device) { - struct request_queue *q = bdev_get_queue(device->bdev); struct bio *bio = device->flush_bio; +#ifndef CONFIG_BTRFS_FS_CHECK_INTEGRITY + /* + * When a disk has write caching disabled, we skip submission of a bio + * with flush and sync requests before writing the superblock, since + * it's not needed. However when the integrity checker is enabled, this + * results in reports that there are metadata blocks referred by a + * superblock that were not properly flushed. So don't skip the bio + * submission only when the integrity checker is enabled for the sake + * of simplicity, since this is a debug tool and not meant for use in + * non-debug builds. + */ + struct request_queue *q = bdev_get_queue(device->bdev); if (!test_bit(QUEUE_FLAG_WC, &q->queue_flags)) return; +#endif bio_reset(bio); bio->bi_end_io = btrfs_end_empty_barrier; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index c346c46020e5ede08975797c61424855a6d0345e..284294620e9fa755fe36d2cae88597e6b06ce33f 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -4715,6 +4715,7 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, out_free_delayed: btrfs_free_delayed_extent_op(extent_op); out_free_buf: + btrfs_tree_unlock(buf); free_extent_buffer(buf); out_free_reserved: btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 0); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 81e98a457130f8e6ee52d3f70b54c3eda1dd1663..5fc65a780f83bae66b3035dcec8b2be7cbf2114d 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3769,6 +3769,12 @@ static void set_btree_ioerr(struct page *page) if (test_and_set_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)) return; + /* + * A read may stumble upon this buffer later, make sure that it gets an + * error and knows there was an error. + */ + clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); + /* * If we error out, we should add back the dirty_metadata_bytes * to make it consistent. diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 48a2ea6d709218bebf6dc966c059a6d028ab2673..2de1d8247494e36b2f7a1009c398ee6b57ff7d14 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -568,7 +568,18 @@ blk_status_t btrfs_csum_one_bio(struct btrfs_inode *inode, struct bio *bio, if (!ordered) { ordered = btrfs_lookup_ordered_extent(inode, offset); - BUG_ON(!ordered); /* Logic error */ + /* + * The bio range is not covered by any ordered extent, + * must be a code logic error. + */ + if (unlikely(!ordered)) { + WARN(1, KERN_WARNING + "no ordered extent for root %llu ino %llu offset %llu\n", + inode->root->root_key.objectid, + btrfs_ino(inode), offset); + kvfree(sums); + return BLK_STS_IOERR; + } } nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info, diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 6ab91661cd261d5bb495fdff5e065ab9ec1f0513..f59ec55e5feb232bbf210837843bd1f87b8a5e4e 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -710,8 +710,7 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans, if (start >= inode->disk_i_size && !replace_extent) modify_tree = 0; - update_refs = (test_bit(BTRFS_ROOT_SHAREABLE, &root->state) || - root == fs_info->tree_root); + update_refs = (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID); while (1) { recow = 0; ret = btrfs_lookup_file_extent(trans, root, path, ino, @@ -2662,14 +2661,16 @@ int btrfs_replace_file_extents(struct inode *inode, struct btrfs_path *path, 1, 0, 0, NULL); if (ret != -ENOSPC) { /* - * When cloning we want to avoid transaction aborts when - * nothing was done and we are attempting to clone parts - * of inline extents, in such cases -EOPNOTSUPP is - * returned by __btrfs_drop_extents() without having - * changed anything in the file. + * The only time we don't want to abort is if we are + * attempting to clone a partial inline extent, in which + * case we'll get EOPNOTSUPP. However if we aren't + * clone we need to abort no matter what, because if we + * got EOPNOTSUPP via prealloc then we messed up and + * need to abort. */ - if (extent_info && !extent_info->is_new_extent && - ret && ret != -EOPNOTSUPP) + if (ret && + (ret != -EOPNOTSUPP || + (extent_info && extent_info->is_new_extent))) btrfs_abort_transaction(trans, ret); break; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index ff3f0638cdb907af56c04cc32a710ff01da00cd2..1d9262a35473ca7f9d696bc69d9fc0dcec43f01d 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -10094,9 +10094,19 @@ static int btrfs_add_swap_extent(struct swap_info_struct *sis, struct btrfs_swap_info *bsi) { unsigned long nr_pages; + unsigned long max_pages; u64 first_ppage, first_ppage_reported, next_ppage; int ret; + /* + * Our swapfile may have had its size extended after the swap header was + * written. In that case activating the swapfile should not go beyond + * the max size set in the swap header. + */ + if (bsi->nr_pages >= sis->max) + return 0; + + max_pages = sis->max - bsi->nr_pages; first_ppage = ALIGN(bsi->block_start, PAGE_SIZE) >> PAGE_SHIFT; next_ppage = ALIGN_DOWN(bsi->block_start + bsi->block_len, PAGE_SIZE) >> PAGE_SHIFT; @@ -10104,6 +10114,7 @@ static int btrfs_add_swap_extent(struct swap_info_struct *sis, if (first_ppage >= next_ppage) return 0; nr_pages = next_ppage - first_ppage; + nr_pages = min(nr_pages, max_pages); first_ppage_reported = first_ppage; if (bsi->start == 0) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 040db0dfba264315572a4abe3933e4213e4137e8..b5e9bfe884c4b7eec01517fdb708eb2fa13636b7 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3103,10 +3103,8 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, inode_lock(inode); err = btrfs_delete_subvolume(dir, dentry); inode_unlock(inode); - if (!err) { - fsnotify_rmdir(dir, dentry); - d_delete(dentry); - } + if (!err) + d_delete_notify(dir, dentry); out_dput: dput(dentry); diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 4bac32a274ceb0b6d4602574e512170be06b1f94..a02e38fb696c17f62d81a9ef0358bf376d411c48 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -941,6 +941,14 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info) int ret = 0; int slot; + /* + * We need to have subvol_sem write locked, to prevent races between + * concurrent tasks trying to enable quotas, because we will unlock + * and relock qgroup_ioctl_lock before setting fs_info->quota_root + * and before setting BTRFS_FS_QUOTA_ENABLED. + */ + lockdep_assert_held_write(&fs_info->subvol_sem); + mutex_lock(&fs_info->qgroup_ioctl_lock); if (fs_info->quota_root) goto out; @@ -1118,8 +1126,19 @@ out_add_root: goto out_free_path; } + mutex_unlock(&fs_info->qgroup_ioctl_lock); + /* + * Commit the transaction while not holding qgroup_ioctl_lock, to avoid + * a deadlock with tasks concurrently doing other qgroup operations, such + * adding/removing qgroups or adding/deleting qgroup relations for example, + * because all qgroup operations first start or join a transaction and then + * lock the qgroup_ioctl_lock mutex. + * We are safe from a concurrent task trying to enable quotas, by calling + * this function, since we are serialized by fs_info->subvol_sem. + */ ret = btrfs_commit_transaction(trans); trans = NULL; + mutex_lock(&fs_info->qgroup_ioctl_lock); if (ret) goto out_free_path; @@ -1167,11 +1186,33 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info) struct btrfs_trans_handle *trans = NULL; int ret = 0; + /* + * We need to have subvol_sem write locked, to prevent races between + * concurrent tasks trying to disable quotas, because we will unlock + * and relock qgroup_ioctl_lock across BTRFS_FS_QUOTA_ENABLED changes. + */ + lockdep_assert_held_write(&fs_info->subvol_sem); + mutex_lock(&fs_info->qgroup_ioctl_lock); if (!fs_info->quota_root) goto out; + + /* + * Unlock the qgroup_ioctl_lock mutex before waiting for the rescan worker to + * complete. Otherwise we can deadlock because btrfs_remove_qgroup() needs + * to lock that mutex while holding a transaction handle and the rescan + * worker needs to commit a transaction. + */ mutex_unlock(&fs_info->qgroup_ioctl_lock); + /* + * Request qgroup rescan worker to complete and wait for it. This wait + * must be done before transaction start for quota disable since it may + * deadlock with transaction by the qgroup rescan worker. + */ + clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); + btrfs_qgroup_wait_for_completion(fs_info, false); + /* * 1 For the root item * @@ -1187,14 +1228,13 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info) if (IS_ERR(trans)) { ret = PTR_ERR(trans); trans = NULL; + set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); goto out; } if (!fs_info->quota_root) goto out; - clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags); - btrfs_qgroup_wait_for_completion(fs_info, false); spin_lock(&fs_info->qgroup_lock); quota_root = fs_info->quota_root; fs_info->quota_root = NULL; @@ -3371,6 +3411,9 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid, btrfs_warn(fs_info, "qgroup rescan init failed, qgroup is not enabled"); ret = -EINVAL; + } else if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) { + /* Quota disable is in progress */ + ret = -EBUSY; } if (ret) { diff --git a/fs/btrfs/reflink.c b/fs/btrfs/reflink.c index 96ef9fed9a6568545a24bec31e25d3b7c162036f..3a3102bc15a057291154e3a44c1e369fd822bda3 100644 --- a/fs/btrfs/reflink.c +++ b/fs/btrfs/reflink.c @@ -634,7 +634,7 @@ static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 len, static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen, struct inode *dst, u64 dst_loff) { - int ret; + int ret = 0; u64 i, tail_len, chunk_count; struct btrfs_root *root_dst = BTRFS_I(dst)->root; diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 702dc5441f039743b4ad1cb99d10624084898705..db37a37996497828fa6124df02547fc6e03a9a34 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -336,7 +336,8 @@ int btrfs_del_root_ref(struct btrfs_trans_handle *trans, u64 root_id, key.offset = ref_id; again: ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1); - BUG_ON(ret < 0); + if (ret < 0) + goto out; if (ret == 0) { leaf = path->nodes[0]; ref = btrfs_item_ptr(leaf, path->slots[0], diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 10f020ab1186f1fd049ed9cf80060bf33801ca1e..6b80dee17f49d99219bdd86f5fc130b1bb098a42 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -5006,6 +5006,10 @@ static int put_file_data(struct send_ctx *sctx, u64 offset, u32 len) lock_page(page); if (!PageUptodate(page)) { unlock_page(page); + btrfs_err(fs_info, + "send: IO error at offset %llu for inode %llu root %llu", + page_offset(page), sctx->cur_ino, + sctx->send_root->root_key.objectid); put_page(page); ret = -EIO; break; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 2663485c17cb88a97b8f5a5e4a12b0d86e25a119..b5d2005d3415d76415786261efcd20a7cfddd448 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -2597,6 +2597,7 @@ late_initcall(init_btrfs_fs); module_exit(exit_btrfs_fs) MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(ANDROID_GKI_VFS_EXPORT_ONLY); MODULE_SOFTDEP("pre: crc32c"); MODULE_SOFTDEP("pre: xxhash64"); MODULE_SOFTDEP("pre: sha256"); diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index d4a3a56726aa8cba77739a45e8f591d420370838..32f1b15b25dcc34b027b3f4883e36b12e4793e4c 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -947,6 +947,7 @@ static int check_dev_item(struct extent_buffer *leaf, struct btrfs_key *key, int slot) { struct btrfs_dev_item *ditem; + const u32 item_size = btrfs_item_size_nr(leaf, slot); if (key->objectid != BTRFS_DEV_ITEMS_OBJECTID) { dev_item_err(leaf, slot, @@ -954,6 +955,13 @@ static int check_dev_item(struct extent_buffer *leaf, key->objectid, BTRFS_DEV_ITEMS_OBJECTID); return -EUCLEAN; } + + if (unlikely(item_size != sizeof(*ditem))) { + dev_item_err(leaf, slot, "invalid item size: has %u expect %zu", + item_size, sizeof(*ditem)); + return -EUCLEAN; + } + ditem = btrfs_item_ptr(leaf, slot, struct btrfs_dev_item); if (btrfs_device_id(leaf, ditem) != key->offset) { dev_item_err(leaf, slot, @@ -989,6 +997,7 @@ static int check_inode_item(struct extent_buffer *leaf, struct btrfs_inode_item *iitem; u64 super_gen = btrfs_super_generation(fs_info->super_copy); u32 valid_mask = (S_IFMT | S_ISUID | S_ISGID | S_ISVTX | 0777); + const u32 item_size = btrfs_item_size_nr(leaf, slot); u32 mode; int ret; @@ -996,6 +1005,12 @@ static int check_inode_item(struct extent_buffer *leaf, if (ret < 0) return ret; + if (unlikely(item_size != sizeof(*iitem))) { + generic_err(leaf, slot, "invalid item size: has %u expect %zu", + item_size, sizeof(*iitem)); + return -EUCLEAN; + } + iitem = btrfs_item_ptr(leaf, slot, struct btrfs_inode_item); /* Here we use super block generation + 1 to handle log tree */ diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 7bf3936aceda2a3b84e57ae030a6379958477469..62784b99a80741bfecc9ae40d7fffc27cd34f099 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -894,9 +894,11 @@ out: } /* - * helper function to see if a given name and sequence number found - * in an inode back reference are already in a directory and correctly - * point to this inode + * See if a given name and sequence number found in an inode back reference are + * already in a directory and correctly point to this inode. + * + * Returns: < 0 on error, 0 if the directory entry does not exists and 1 if it + * exists. */ static noinline int inode_in_dir(struct btrfs_root *root, struct btrfs_path *path, @@ -905,29 +907,35 @@ static noinline int inode_in_dir(struct btrfs_root *root, { struct btrfs_dir_item *di; struct btrfs_key location; - int match = 0; + int ret = 0; di = btrfs_lookup_dir_index_item(NULL, root, path, dirid, index, name, name_len, 0); - if (di && !IS_ERR(di)) { + if (IS_ERR(di)) { + if (PTR_ERR(di) != -ENOENT) + ret = PTR_ERR(di); + goto out; + } else if (di) { btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location); if (location.objectid != objectid) goto out; - } else + } else { goto out; - btrfs_release_path(path); + } + btrfs_release_path(path); di = btrfs_lookup_dir_item(NULL, root, path, dirid, name, name_len, 0); - if (di && !IS_ERR(di)) { - btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location); - if (location.objectid != objectid) - goto out; - } else + if (IS_ERR(di)) { + ret = PTR_ERR(di); goto out; - match = 1; + } else if (di) { + btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location); + if (location.objectid == objectid) + ret = 1; + } out: btrfs_release_path(path); - return match; + return ret; } /* @@ -1101,6 +1109,7 @@ again: parent_objectid, victim_name, victim_name_len); if (ret < 0) { + kfree(victim_name); return ret; } else if (!ret) { ret = -ENOENT; @@ -1137,7 +1146,10 @@ next: /* look for a conflicting sequence number */ di = btrfs_lookup_dir_index_item(trans, root, path, btrfs_ino(dir), ref_index, name, namelen, 0); - if (di && !IS_ERR(di)) { + if (IS_ERR(di)) { + if (PTR_ERR(di) != -ENOENT) + return PTR_ERR(di); + } else if (di) { ret = drop_one_dir_item(trans, root, path, dir, di); if (ret) return ret; @@ -1147,7 +1159,9 @@ next: /* look for a conflicting name */ di = btrfs_lookup_dir_item(trans, root, path, btrfs_ino(dir), name, namelen, 0); - if (di && !IS_ERR(di)) { + if (IS_ERR(di)) { + return PTR_ERR(di); + } else if (di) { ret = drop_one_dir_item(trans, root, path, dir, di); if (ret) return ret; @@ -1272,6 +1286,15 @@ again: inode, name, namelen); kfree(name); iput(dir); + /* + * Whenever we need to check if a name exists or not, we + * check the subvolume tree. So after an unlink we must + * run delayed items, so that future checks for a name + * during log replay see that the name does not exists + * anymore. + */ + if (!ret) + ret = btrfs_run_delayed_items(trans); if (ret) goto out; goto again; @@ -1472,10 +1495,12 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, if (ret) goto out; - /* if we already have a perfect match, we're done */ - if (!inode_in_dir(root, path, btrfs_ino(BTRFS_I(dir)), - btrfs_ino(BTRFS_I(inode)), ref_index, - name, namelen)) { + ret = inode_in_dir(root, path, btrfs_ino(BTRFS_I(dir)), + btrfs_ino(BTRFS_I(inode)), ref_index, + name, namelen); + if (ret < 0) { + goto out; + } else if (ret == 0) { /* * look for a conflicting back reference in the * metadata. if we find one we have to unlink that name @@ -1521,6 +1546,15 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, */ if (!ret && inode->i_nlink == 0) inc_nlink(inode); + /* + * Whenever we need to check if a name exists or + * not, we check the subvolume tree. So after an + * unlink we must run delayed items, so that future + * checks for a name during log replay see that the + * name does not exists anymore. + */ + if (!ret) + ret = btrfs_run_delayed_items(trans); } if (ret < 0) goto out; @@ -1533,6 +1567,7 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, btrfs_update_inode(trans, root, inode); } + /* Else, ret == 1, we already have a perfect match, we're done. */ ref_ptr = (unsigned long)(ref_ptr + ref_struct_size) + namelen; kfree(name); @@ -1897,8 +1932,8 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans, struct btrfs_key log_key; struct inode *dir; u8 log_type; - int exists; - int ret = 0; + bool exists; + int ret; bool update_size = (key->type == BTRFS_DIR_INDEX_KEY); bool name_added = false; @@ -1918,12 +1953,12 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans, name_len); btrfs_dir_item_key_to_cpu(eb, di, &log_key); - exists = btrfs_lookup_inode(trans, root, path, &log_key, 0); - if (exists == 0) - exists = 1; - else - exists = 0; + ret = btrfs_lookup_inode(trans, root, path, &log_key, 0); btrfs_release_path(path); + if (ret < 0) + goto out; + exists = (ret == 0); + ret = 0; if (key->type == BTRFS_DIR_ITEM_KEY) { dst_di = btrfs_lookup_dir_item(trans, root, path, key->objectid, @@ -1938,7 +1973,14 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans, ret = -EINVAL; goto out; } - if (IS_ERR_OR_NULL(dst_di)) { + + if (dst_di == ERR_PTR(-ENOENT)) + dst_di = NULL; + + if (IS_ERR(dst_di)) { + ret = PTR_ERR(dst_di); + goto out; + } else if (!dst_di) { /* we need a sequence number to insert, so we only * do inserts for the BTRFS_DIR_INDEX_KEY types */ @@ -2443,7 +2485,9 @@ again: else { ret = find_dir_range(log, path, dirid, key_type, &range_start, &range_end); - if (ret != 0) + if (ret < 0) + goto out; + else if (ret > 0) break; } @@ -4271,7 +4315,7 @@ static int log_one_extent(struct btrfs_trans_handle *trans, /* * Log all prealloc extents beyond the inode's i_size to make sure we do not - * lose them after doing a fast fsync and replaying the log. We scan the + * lose them after doing a full/fast fsync and replaying the log. We scan the * subvolume's root instead of iterating the inode's extent map tree because * otherwise we can log incorrect extent items based on extent map conversion. * That can happen due to the fact that extent maps are merged when they @@ -5058,6 +5102,7 @@ static int copy_inode_items_to_log(struct btrfs_trans_handle *trans, struct btrfs_log_ctx *ctx, bool *need_log_inode_item) { + const u64 i_size = i_size_read(&inode->vfs_inode); struct btrfs_root *root = inode->root; int ins_start_slot = 0; int ins_nr = 0; @@ -5078,13 +5123,21 @@ again: if (min_key->type > max_key->type) break; - if (min_key->type == BTRFS_INODE_ITEM_KEY) + if (min_key->type == BTRFS_INODE_ITEM_KEY) { *need_log_inode_item = false; - - if ((min_key->type == BTRFS_INODE_REF_KEY || - min_key->type == BTRFS_INODE_EXTREF_KEY) && - inode->generation == trans->transid && - !recursive_logging) { + } else if (min_key->type == BTRFS_EXTENT_DATA_KEY && + min_key->offset >= i_size) { + /* + * Extents at and beyond eof are logged with + * btrfs_log_prealloc_extents(). + * Only regular files have BTRFS_EXTENT_DATA_KEY keys, + * and no keys greater than that, so bail out. + */ + break; + } else if ((min_key->type == BTRFS_INODE_REF_KEY || + min_key->type == BTRFS_INODE_EXTREF_KEY) && + inode->generation == trans->transid && + !recursive_logging) { u64 other_ino = 0; u64 other_parent = 0; @@ -5115,10 +5168,8 @@ again: btrfs_release_path(path); goto next_key; } - } - - /* Skip xattrs, we log them later with btrfs_log_all_xattrs() */ - if (min_key->type == BTRFS_XATTR_ITEM_KEY) { + } else if (min_key->type == BTRFS_XATTR_ITEM_KEY) { + /* Skip xattrs, logged later with btrfs_log_all_xattrs() */ if (ins_nr == 0) goto next_slot; ret = copy_items(trans, inode, dst_path, path, @@ -5171,9 +5222,21 @@ next_key: break; } } - if (ins_nr) + if (ins_nr) { ret = copy_items(trans, inode, dst_path, path, ins_start_slot, ins_nr, inode_only, logged_isize); + if (ret) + return ret; + } + + if (inode_only == LOG_INODE_ALL && S_ISREG(inode->vfs_inode.i_mode)) { + /* + * Release the path because otherwise we might attempt to double + * lock the same leaf with btrfs_log_prealloc_extents() below. + */ + btrfs_release_path(path); + ret = btrfs_log_prealloc_extents(trans, inode, dst_path); + } return ret; } diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index d8b8764f5bd107142fddd8846d2370ebeb1596ef..e462de9917237bc5b558aa4365bb11293a71c8f8 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "misc.h" #include "ctree.h" #include "extent_map.h" @@ -1133,8 +1134,10 @@ static void btrfs_close_one_device(struct btrfs_device *device) if (device->devid == BTRFS_DEV_REPLACE_DEVID) clear_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state); - if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state)) + if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state)) { + clear_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state); fs_devices->missing_devices--; + } btrfs_close_bdev(device); if (device->bdev) { @@ -1147,6 +1150,19 @@ static void btrfs_close_one_device(struct btrfs_device *device) atomic_set(&device->dev_stats_ccnt, 0); extent_io_tree_release(&device->alloc_state); + /* + * Reset the flush error record. We might have a transient flush error + * in this mount, and if so we aborted the current transaction and set + * the fs to an error state, guaranteeing no super blocks can be further + * committed. However that error might be transient and if we unmount the + * filesystem and mount it again, we should allow the mount to succeed + * (btrfs_check_rw_degradable() should not fail) - if after mounting the + * filesystem again we still get flush errors, then we will again abort + * any transaction and set the error state, guaranteeing no commits of + * unsafe super blocks. + */ + device->last_flush_error = 0; + /* Verify the device is back in a pristine state */ ASSERT(!test_bit(BTRFS_DEV_STATE_FLUSH_SENT, &device->dev_state)); ASSERT(!test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)); @@ -1856,18 +1872,22 @@ out: /* * Function to update ctime/mtime for a given device path. * Mainly used for ctime/mtime based probe like libblkid. + * + * We don't care about errors here, this is just to be kind to userspace. */ -static void update_dev_time(struct block_device *bdev) +static void update_dev_time(const char *device_path) { - struct inode *inode = bdev->bd_inode; + struct path path; struct timespec64 now; + int ret; - /* Shouldn't happen but just in case. */ - if (!inode) + ret = kern_path(device_path, LOOKUP_FOLLOW, &path); + if (ret) return; - now = current_time(inode); - generic_update_time(inode, &now, S_MTIME | S_CTIME); + now = current_time(d_inode(path.dentry)); + inode_update_time(d_inode(path.dentry), &now, S_MTIME | S_CTIME); + path_put(&path); } static int btrfs_rm_dev_item(struct btrfs_device *device) @@ -2042,7 +2062,7 @@ void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info, btrfs_kobject_uevent(bdev, KOBJ_CHANGE); /* Update ctime/mtime for device path for libblkid */ - update_dev_time(bdev); + update_dev_time(device_path); } int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path, @@ -2054,8 +2074,11 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path, u64 num_devices; int ret = 0; - mutex_lock(&uuid_mutex); - + /* + * The device list in fs_devices is accessed without locks (neither + * uuid_mutex nor device_list_mutex) as it won't change on a mounted + * filesystem and another device rm cannot run. + */ num_devices = btrfs_num_devices(fs_info); ret = btrfs_check_raid_min_devices(fs_info, num_devices - 1); @@ -2099,11 +2122,9 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path, mutex_unlock(&fs_info->chunk_mutex); } - mutex_unlock(&uuid_mutex); ret = btrfs_shrink_device(device, 0); if (!ret) btrfs_reada_remove_dev(device); - mutex_lock(&uuid_mutex); if (ret) goto error_undo; @@ -2179,7 +2200,6 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path, } out: - mutex_unlock(&uuid_mutex); return ret; error_undo: @@ -2685,7 +2705,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path btrfs_forget_devices(device_path); /* Update ctime/mtime for blkid or udev */ - update_dev_time(bdev); + update_dev_time(device_path); return ret; diff --git a/fs/buffer.c b/fs/buffer.c index 63afd6ddb85f7a8ff2fe639047c614b0d321932d..13dd0f71f7623d87e0155500ab55dfe686e7c8ab 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -173,7 +173,7 @@ void end_buffer_write_sync(struct buffer_head *bh, int uptodate) unlock_buffer(bh); put_bh(bh); } -EXPORT_SYMBOL(end_buffer_write_sync); +EXPORT_SYMBOL_NS(end_buffer_write_sync, ANDROID_GKI_VFS_EXPORT_ONLY); /* * Various filesystems appear to want __find_get_block to be non-blocking. @@ -419,7 +419,7 @@ void mark_buffer_async_write(struct buffer_head *bh) { mark_buffer_async_write_endio(bh, end_buffer_async_write); } -EXPORT_SYMBOL(mark_buffer_async_write); +EXPORT_SYMBOL_NS(mark_buffer_async_write, ANDROID_GKI_VFS_EXPORT_ONLY); /* @@ -674,7 +674,7 @@ int __set_page_dirty_buffers(struct page *page) return newly_dirty; } -EXPORT_SYMBOL(__set_page_dirty_buffers); +EXPORT_SYMBOL_NS(__set_page_dirty_buffers, ANDROID_GKI_VFS_EXPORT_ONLY); /* * Write out and wait upon a list of buffers. @@ -1141,7 +1141,7 @@ void mark_buffer_dirty(struct buffer_head *bh) __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); } } -EXPORT_SYMBOL(mark_buffer_dirty); +EXPORT_SYMBOL_NS(mark_buffer_dirty, ANDROID_GKI_VFS_EXPORT_ONLY); void mark_buffer_write_io_error(struct buffer_head *bh) { @@ -1159,7 +1159,7 @@ void mark_buffer_write_io_error(struct buffer_head *bh) errseq_set(&sb->s_wb_err, -EIO); rcu_read_unlock(); } -EXPORT_SYMBOL(mark_buffer_write_io_error); +EXPORT_SYMBOL_NS(mark_buffer_write_io_error, ANDROID_GKI_VFS_EXPORT_ONLY); /* * Decrement a buffer_head's reference count. If all buffers against a page @@ -1176,7 +1176,7 @@ void __brelse(struct buffer_head * buf) } WARN(1, KERN_ERR "VFS: brelse: Trying to free free buffer\n"); } -EXPORT_SYMBOL(__brelse); +EXPORT_SYMBOL_NS(__brelse, ANDROID_GKI_VFS_EXPORT_ONLY); /* * bforget() is like brelse(), except it discards any @@ -1195,7 +1195,7 @@ void __bforget(struct buffer_head *bh) } __brelse(bh); } -EXPORT_SYMBOL(__bforget); +EXPORT_SYMBOL_NS(__bforget, ANDROID_GKI_VFS_EXPORT_ONLY); static struct buffer_head *__bread_slow(struct buffer_head *bh) { @@ -1376,7 +1376,7 @@ void __breadahead(struct block_device *bdev, sector_t block, unsigned size) brelse(bh); } } -EXPORT_SYMBOL(__breadahead); +EXPORT_SYMBOL_NS(__breadahead, ANDROID_GKI_VFS_EXPORT_ONLY); void __breadahead_gfp(struct block_device *bdev, sector_t block, unsigned size, gfp_t gfp) @@ -1411,7 +1411,7 @@ __bread_gfp(struct block_device *bdev, sector_t block, bh = __bread_slow(bh); return bh; } -EXPORT_SYMBOL(__bread_gfp); +EXPORT_SYMBOL_NS(__bread_gfp, ANDROID_GKI_VFS_EXPORT_ONLY); static void __invalidate_bh_lrus(struct bh_lru *b) { @@ -1569,7 +1569,7 @@ void block_invalidatepage(struct page *page, unsigned int offset, out: return; } -EXPORT_SYMBOL(block_invalidatepage); +EXPORT_SYMBOL_NS(block_invalidatepage, ANDROID_GKI_VFS_EXPORT_ONLY); /* @@ -1605,7 +1605,7 @@ void create_empty_buffers(struct page *page, attach_page_private(page, head); spin_unlock(&page->mapping->private_lock); } -EXPORT_SYMBOL(create_empty_buffers); +EXPORT_SYMBOL_NS(create_empty_buffers, ANDROID_GKI_VFS_EXPORT_ONLY); /** * clean_bdev_aliases: clean a range of buffers in block device @@ -1679,7 +1679,7 @@ unlock_page: break; } } -EXPORT_SYMBOL(clean_bdev_aliases); +EXPORT_SYMBOL_NS(clean_bdev_aliases, ANDROID_GKI_VFS_EXPORT_ONLY); /* * Size is a power-of-two in the range 512..PAGE_SIZE, @@ -1937,7 +1937,7 @@ void page_zero_new_buffers(struct page *page, unsigned from, unsigned to) bh = bh->b_this_page; } while (bh != head); } -EXPORT_SYMBOL(page_zero_new_buffers); +EXPORT_SYMBOL_NS(page_zero_new_buffers, ANDROID_GKI_VFS_EXPORT_ONLY); static void iomap_to_bh(struct inode *inode, sector_t block, struct buffer_head *bh, @@ -2271,7 +2271,7 @@ int block_is_partially_uptodate(struct page *page, unsigned long from, return ret; } -EXPORT_SYMBOL(block_is_partially_uptodate); +EXPORT_SYMBOL_NS(block_is_partially_uptodate, ANDROID_GKI_VFS_EXPORT_ONLY); /* * Generic "read page" function for block devices that have the normal @@ -3138,7 +3138,7 @@ void ll_rw_block(int op, int op_flags, int nr, struct buffer_head *bhs[]) unlock_buffer(bh); } } -EXPORT_SYMBOL(ll_rw_block); +EXPORT_SYMBOL_NS(ll_rw_block, ANDROID_GKI_VFS_EXPORT_ONLY); void write_dirty_buffer(struct buffer_head *bh, int op_flags) { @@ -3185,13 +3185,13 @@ int __sync_dirty_buffer(struct buffer_head *bh, int op_flags) } return ret; } -EXPORT_SYMBOL(__sync_dirty_buffer); +EXPORT_SYMBOL_NS(__sync_dirty_buffer, ANDROID_GKI_VFS_EXPORT_ONLY); int sync_dirty_buffer(struct buffer_head *bh) { return __sync_dirty_buffer(bh, REQ_SYNC); } -EXPORT_SYMBOL(sync_dirty_buffer); +EXPORT_SYMBOL_NS(sync_dirty_buffer, ANDROID_GKI_VFS_EXPORT_ONLY); /* * try_to_free_buffers() checks if all the buffers on this particular page diff --git a/fs/cachefiles/main.c b/fs/cachefiles/main.c index ddf0cd58d60cce416b1c00b5bc2d69eca8dc79bb..0649e7e6013410d47e0b11ba3b90a4aff253f052 100644 --- a/fs/cachefiles/main.c +++ b/fs/cachefiles/main.c @@ -28,6 +28,7 @@ MODULE_PARM_DESC(cachefiles_debug, "CacheFiles debugging mask"); MODULE_DESCRIPTION("Mounted-filesystem based cache"); MODULE_AUTHOR("Red Hat, Inc."); MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(ANDROID_GKI_VFS_EXPORT_ONLY); struct kmem_cache *cachefiles_object_jar; diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 48ea95b81df84dbcdbb67664e257a27d02284ed0..d3f67271d3c724f7e22ef0233f304cb4a1976403 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2334,7 +2334,6 @@ static int unsafe_request_wait(struct inode *inode) int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync) { - struct ceph_file_info *fi = file->private_data; struct inode *inode = file->f_mapping->host; struct ceph_inode_info *ci = ceph_inode(inode); u64 flush_tid; @@ -2369,14 +2368,9 @@ int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync) if (err < 0) ret = err; - if (errseq_check(&ci->i_meta_err, READ_ONCE(fi->meta_err))) { - spin_lock(&file->f_lock); - err = errseq_check_and_advance(&ci->i_meta_err, - &fi->meta_err); - spin_unlock(&file->f_lock); - if (err < 0) - ret = err; - } + err = file_check_and_advance_wb_err(file); + if (err < 0) + ret = err; out: dout("fsync %p%s result=%d\n", inode, datasync ? " datasync" : "", ret); return ret; @@ -4365,7 +4359,7 @@ void ceph_get_fmode(struct ceph_inode_info *ci, int fmode, int count) { struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(ci->vfs_inode.i_sb); int bits = (fmode << 1) | 1; - bool is_opened = false; + bool already_opened = false; int i; if (count == 1) @@ -4373,19 +4367,19 @@ void ceph_get_fmode(struct ceph_inode_info *ci, int fmode, int count) spin_lock(&ci->i_ceph_lock); for (i = 0; i < CEPH_FILE_MODE_BITS; i++) { - if (bits & (1 << i)) - ci->i_nr_by_mode[i] += count; - /* - * If any of the mode ref is larger than 1, + * If any of the mode ref is larger than 0, * that means it has been already opened by * others. Just skip checking the PIN ref. */ - if (i && ci->i_nr_by_mode[i] > 1) - is_opened = true; + if (i && ci->i_nr_by_mode[i]) + already_opened = true; + + if (bits & (1 << i)) + ci->i_nr_by_mode[i] += count; } - if (!is_opened) + if (!already_opened) percpu_counter_inc(&mdsc->metric.opened_inodes); spin_unlock(&ci->i_ceph_lock); } diff --git a/fs/ceph/file.c b/fs/ceph/file.c index f1895f78ab45288f7af5e346dc73449484255cdb..450050801f3b6ef3a1fd094aaeddd0d5d11d8e06 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -233,7 +233,6 @@ static int ceph_init_file_info(struct inode *inode, struct file *file, spin_lock_init(&fi->rw_contexts_lock); INIT_LIST_HEAD(&fi->rw_contexts); - fi->meta_err = errseq_sample(&ci->i_meta_err); fi->filp_gen = READ_ONCE(ceph_inode_to_client(inode)->filp_gen); return 0; @@ -578,6 +577,7 @@ static int ceph_finish_async_create(struct inode *dir, struct dentry *dentry, struct ceph_inode_info *ci = ceph_inode(dir); struct inode *inode; struct timespec64 now; + struct ceph_string *pool_ns; struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb); struct ceph_vino vino = { .ino = req->r_deleg_ino, .snap = CEPH_NOSNAP }; @@ -604,17 +604,35 @@ static int ceph_finish_async_create(struct inode *dir, struct dentry *dentry, in.cap.realm = cpu_to_le64(ci->i_snap_realm->ino); in.cap.flags = CEPH_CAP_FLAG_AUTH; in.ctime = in.mtime = in.atime = iinfo.btime; - in.mode = cpu_to_le32((u32)mode); in.truncate_seq = cpu_to_le32(1); in.truncate_size = cpu_to_le64(-1ULL); in.xattr_version = cpu_to_le64(1); in.uid = cpu_to_le32(from_kuid(&init_user_ns, current_fsuid())); - in.gid = cpu_to_le32(from_kgid(&init_user_ns, dir->i_mode & S_ISGID ? - dir->i_gid : current_fsgid())); + if (dir->i_mode & S_ISGID) { + in.gid = cpu_to_le32(from_kgid(&init_user_ns, dir->i_gid)); + + /* Directories always inherit the setgid bit. */ + if (S_ISDIR(mode)) + mode |= S_ISGID; + else if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP) && + !in_group_p(dir->i_gid) && + !capable_wrt_inode_uidgid(dir, CAP_FSETID)) + mode &= ~S_ISGID; + } else { + in.gid = cpu_to_le32(from_kgid(&init_user_ns, current_fsgid())); + } + in.mode = cpu_to_le32((u32)mode); + in.nlink = cpu_to_le32(1); in.max_size = cpu_to_le64(lo->stripe_unit); ceph_file_layout_to_legacy(lo, &in.layout); + /* lo is private, so pool_ns can't change */ + pool_ns = rcu_dereference_raw(lo->pool_ns); + if (pool_ns) { + iinfo.pool_ns_len = pool_ns->len; + iinfo.pool_ns_data = pool_ns->str; + } down_read(&mdsc->snap_rwsem); ret = ceph_fill_inode(inode, NULL, &iinfo, NULL, req->r_session, @@ -732,8 +750,10 @@ retry: restore_deleg_ino(dir, req->r_deleg_ino); ceph_mdsc_put_request(req); try_async = false; + ceph_put_string(rcu_dereference_raw(lo.pool_ns)); goto retry; } + ceph_put_string(rcu_dereference_raw(lo.pool_ns)); goto out_req; } } diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 63e781e4f7e442880844040309a05151d77468c4..76be50f6f041a1e132e8645a2545096c67077307 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -529,8 +529,6 @@ struct inode *ceph_alloc_inode(struct super_block *sb) ceph_fscache_inode_init(ci); - ci->i_meta_err = 0; - return &ci->vfs_inode; } diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 0f57b7d094578d0cf50f11ae63b7a807687029ad..981a915906314b6c259879210e6da65140934ff7 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1481,7 +1481,6 @@ static void cleanup_session_requests(struct ceph_mds_client *mdsc, { struct ceph_mds_request *req; struct rb_node *p; - struct ceph_inode_info *ci; dout("cleanup_session_requests mds%d\n", session->s_mds); mutex_lock(&mdsc->mutex); @@ -1490,16 +1489,10 @@ static void cleanup_session_requests(struct ceph_mds_client *mdsc, struct ceph_mds_request, r_unsafe_item); pr_warn_ratelimited(" dropping unsafe request %llu\n", req->r_tid); - if (req->r_target_inode) { - /* dropping unsafe change of inode's attributes */ - ci = ceph_inode(req->r_target_inode); - errseq_set(&ci->i_meta_err, -EIO); - } - if (req->r_unsafe_dir) { - /* dropping unsafe directory operation */ - ci = ceph_inode(req->r_unsafe_dir); - errseq_set(&ci->i_meta_err, -EIO); - } + if (req->r_target_inode) + mapping_set_error(req->r_target_inode->i_mapping, -EIO); + if (req->r_unsafe_dir) + mapping_set_error(req->r_unsafe_dir->i_mapping, -EIO); __unregister_request(mdsc, req); } /* zero r_attempts, so kick_requests() will re-send requests */ @@ -1668,7 +1661,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, spin_unlock(&mdsc->cap_dirty_lock); if (dirty_dropped) { - errseq_set(&ci->i_meta_err, -EIO); + mapping_set_error(inode->i_mapping, -EIO); if (ci->i_wrbuffer_ref_head == 0 && ci->i_wr_ref == 0 && @@ -3703,7 +3696,7 @@ static int reconnect_caps_cb(struct inode *inode, struct ceph_cap *cap, struct ceph_pagelist *pagelist = recon_state->pagelist; struct dentry *dentry; char *path; - int pathlen, err; + int pathlen = 0, err; u64 pathbase; u64 snap_follows; @@ -3723,7 +3716,6 @@ static int reconnect_caps_cb(struct inode *inode, struct ceph_cap *cap, } } else { path = NULL; - pathlen = 0; pathbase = 0; } diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 33ba6f0aa55ce7d24bb281d2e6a2d32115fece80..e0562c55384f6ed5c3d198d45f47a383a47807d4 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -52,8 +52,7 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) struct ceph_fs_client *fsc = ceph_inode_to_client(d_inode(dentry)); struct ceph_mon_client *monc = &fsc->client->monc; struct ceph_statfs st; - u64 fsid; - int err; + int i, err; u64 data_pool; if (fsc->mdsc->mdsmap->m_num_data_pg_pools == 1) { @@ -99,12 +98,14 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_namelen = NAME_MAX; /* Must convert the fsid, for consistent values across arches */ + buf->f_fsid.val[0] = 0; mutex_lock(&monc->mutex); - fsid = le64_to_cpu(*(__le64 *)(&monc->monmap->fsid)) ^ - le64_to_cpu(*((__le64 *)&monc->monmap->fsid + 1)); + for (i = 0 ; i < sizeof(monc->monmap->fsid) / sizeof(__le32) ; ++i) + buf->f_fsid.val[0] ^= le32_to_cpu(((__le32 *)&monc->monmap->fsid)[i]); mutex_unlock(&monc->mutex); - buf->f_fsid = u64_to_fsid(fsid); + /* fold the fs_cluster_id into the upper bits */ + buf->f_fsid.val[1] = monc->fs_cluster_id; return 0; } @@ -997,16 +998,16 @@ static int ceph_compare_super(struct super_block *sb, struct fs_context *fc) struct ceph_fs_client *new = fc->s_fs_info; struct ceph_mount_options *fsopt = new->mount_options; struct ceph_options *opt = new->client->options; - struct ceph_fs_client *other = ceph_sb_to_client(sb); + struct ceph_fs_client *fsc = ceph_sb_to_client(sb); dout("ceph_compare_super %p\n", sb); - if (compare_mount_options(fsopt, opt, other)) { + if (compare_mount_options(fsopt, opt, fsc)) { dout("monitor(s)/mount options don't match\n"); return 0; } if ((opt->flags & CEPH_OPT_FSID) && - ceph_fsid_compare(&opt->fsid, &other->client->fsid)) { + ceph_fsid_compare(&opt->fsid, &fsc->client->fsid)) { dout("fsid doesn't match\n"); return 0; } @@ -1014,6 +1015,17 @@ static int ceph_compare_super(struct super_block *sb, struct fs_context *fc) dout("flags differ\n"); return 0; } + + if (fsc->blocklisted && !ceph_test_mount_opt(fsc, CLEANRECOVER)) { + dout("client is blocklisted (and CLEANRECOVER is not set)\n"); + return 0; + } + + if (fsc->mount_state == CEPH_MOUNT_SHUTDOWN) { + dout("client has been forcibly unmounted\n"); + return 0; + } + return 1; } @@ -1323,3 +1335,4 @@ MODULE_AUTHOR("Yehuda Sadeh "); MODULE_AUTHOR("Patience Warnick "); MODULE_DESCRIPTION("Ceph filesystem for Linux"); MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(ANDROID_GKI_VFS_EXPORT_ONLY); diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 9362eeb5812d9358359d9f7fec56d5cb6f1f5af1..4db305fd2a02a52ec419381cab9f2714b693d2a5 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -430,8 +430,6 @@ struct ceph_inode_info { struct fscache_cookie *fscache; u32 i_fscache_gen; #endif - errseq_t i_meta_err; - struct inode vfs_inode; /* at end */ }; @@ -773,7 +771,6 @@ struct ceph_file_info { spinlock_t rw_contexts_lock; struct list_head rw_contexts; - errseq_t meta_err; u32 filp_gen; atomic_t num_locks; }; diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index f0ed29a9a6f11a3e923d4eb61574324cc4482747..8563e7b78c02debc9e18be3b8a0fe289b9571c61 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -864,6 +864,7 @@ cifs_smb3_do_mount(struct file_system_type *fs_type, out_super: deactivate_locked_super(sb); + return root; out: cifs_cleanup_volume_info(volume_info); return root; @@ -1692,6 +1693,7 @@ exit_cifs(void) MODULE_AUTHOR("Steve French"); MODULE_LICENSE("GPL"); /* combination of LGPL + GPL source behaves as GPL */ +MODULE_IMPORT_NS(ANDROID_GKI_VFS_EXPORT_ONLY); MODULE_DESCRIPTION ("VFS to access SMB3 servers e.g. Samba, Macs, Azure and Windows (and " "also older servers complying with the SNIA CIFS Specification)"); diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 67139f9d583f2dda3e160004c9cfe139cff57be2..6c06870f90184e03861edac28a74e4e506b03b35 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -2618,12 +2618,23 @@ int cifs_strict_fsync(struct file *file, loff_t start, loff_t end, tcon = tlink_tcon(smbfile->tlink); if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) { server = tcon->ses->server; - if (server->ops->flush) - rc = server->ops->flush(xid, tcon, &smbfile->fid); - else + if (server->ops->flush == NULL) { rc = -ENOSYS; + goto strict_fsync_exit; + } + + if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) { + smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY); + if (smbfile) { + rc = server->ops->flush(xid, tcon, &smbfile->fid); + cifsFileInfo_put(smbfile); + } else + cifs_dbg(FYI, "ignore fsync for file not open for write\n"); + } else + rc = server->ops->flush(xid, tcon, &smbfile->fid); } +strict_fsync_exit: free_xid(xid); return rc; } @@ -2635,6 +2646,7 @@ int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync) struct cifs_tcon *tcon; struct TCP_Server_Info *server; struct cifsFileInfo *smbfile = file->private_data; + struct inode *inode = file_inode(file); struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file); rc = file_write_and_wait_range(file, start, end); @@ -2651,12 +2663,23 @@ int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync) tcon = tlink_tcon(smbfile->tlink); if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) { server = tcon->ses->server; - if (server->ops->flush) - rc = server->ops->flush(xid, tcon, &smbfile->fid); - else + if (server->ops->flush == NULL) { rc = -ENOSYS; + goto fsync_exit; + } + + if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) { + smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY); + if (smbfile) { + rc = server->ops->flush(xid, tcon, &smbfile->fid); + cifsFileInfo_put(smbfile); + } else + cifs_dbg(FYI, "ignore fsync for file not open for write\n"); + } else + rc = server->ops->flush(xid, tcon, &smbfile->fid); } +fsync_exit: free_xid(xid); return rc; } diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index ca5102773b72b40949ef1dcf60c3653e4ddef2e8..88554b640b0da9f4b98b092f3c45c078c0a3327d 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -2339,7 +2339,7 @@ create_sd_buf(umode_t mode, bool set_owner, unsigned int *len) buf->sd.OffsetDacl = cpu_to_le32(ptr - (__u8 *)&buf->sd); /* Ship the ACL for now. we will copy it into buf later. */ aclptr = ptr; - ptr += sizeof(struct cifs_acl); + ptr += sizeof(struct smb3_acl); /* create one ACE to hold the mode embedded in reserved special SID */ acelen = setup_special_mode_ACE((struct cifs_ace *)ptr, (__u64)mode); @@ -2364,7 +2364,7 @@ create_sd_buf(umode_t mode, bool set_owner, unsigned int *len) acl.AclRevision = ACL_REVISION; /* See 2.4.4.1 of MS-DTYP */ acl.AclSize = cpu_to_le16(acl_size); acl.AceCount = cpu_to_le16(ace_count); - memcpy(aclptr, &acl, sizeof(struct cifs_acl)); + memcpy(aclptr, &acl, sizeof(struct smb3_acl)); buf->ccontext.DataLength = cpu_to_le32(ptr - (__u8 *)&buf->sd); *len = roundup(ptr - (__u8 *)buf, 8); diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c index 240669f51eac36d8d6c677170341f835bfc43a8a..897c7cb93f47c80efeb04492b7ef67f74e759c55 100644 --- a/fs/coda/psdev.c +++ b/fs/coda/psdev.c @@ -388,6 +388,7 @@ MODULE_AUTHOR("Jan Harkes, Peter J. Braam"); MODULE_DESCRIPTION("Coda Distributed File System VFS interface"); MODULE_ALIAS_CHARDEV_MAJOR(CODA_PSDEV_MAJOR); MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(ANDROID_GKI_VFS_EXPORT_ONLY); MODULE_VERSION("7.0"); static int __init init_coda(void) diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index a6d9f222039234645d67e90621dddd9e80b0681e..86bbd25090d50ceaa721cad1844d077e4dc642f8 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -36,6 +36,14 @@ */ DEFINE_SPINLOCK(configfs_dirent_lock); +/* + * All of link_obj/unlink_obj/link_group/unlink_group require that + * subsys->su_mutex is held. + * But parent configfs_subsystem is NULL when config_item is root. + * Use this mutex when config_item is root. + */ +static DEFINE_MUTEX(configfs_subsystem_mutex); + static void configfs_d_iput(struct dentry * dentry, struct inode * inode) { @@ -1820,8 +1828,8 @@ void configfs_unregister_group(struct config_group *group) configfs_detach_group(&group->cg_item); d_inode(dentry)->i_flags |= S_DEAD; dont_mount(dentry); + d_drop(dentry); fsnotify_rmdir(d_inode(parent), dentry); - d_delete(dentry); inode_unlock(d_inode(parent)); dput(dentry); @@ -1899,7 +1907,9 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys) group->cg_item.ci_name = group->cg_item.ci_namebuf; sd = root->d_fsdata; + mutex_lock(&configfs_subsystem_mutex); link_group(to_config_group(sd->s_element), group); + mutex_unlock(&configfs_subsystem_mutex); inode_lock_nested(d_inode(root), I_MUTEX_PARENT); @@ -1924,7 +1934,9 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys) inode_unlock(d_inode(root)); if (err) { + mutex_lock(&configfs_subsystem_mutex); unlink_group(group); + mutex_unlock(&configfs_subsystem_mutex); configfs_release_fs(); } put_fragment(frag); @@ -1962,16 +1974,18 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys) configfs_detach_group(&group->cg_item); d_inode(dentry)->i_flags |= S_DEAD; dont_mount(dentry); - fsnotify_rmdir(d_inode(root), dentry); inode_unlock(d_inode(dentry)); - d_delete(dentry); + d_drop(dentry); + fsnotify_rmdir(d_inode(root), dentry); inode_unlock(d_inode(root)); dput(dentry); + mutex_lock(&configfs_subsystem_mutex); unlink_group(group); + mutex_unlock(&configfs_subsystem_mutex); configfs_release_fs(); } diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c index 0c6e8cf6195300c706bb04f2be4738a54f0d785f..c01eb7264b4d18c2c3ec1651084f9fa896951819 100644 --- a/fs/configfs/mount.c +++ b/fs/configfs/mount.c @@ -175,6 +175,7 @@ MODULE_AUTHOR("Oracle"); MODULE_LICENSE("GPL"); MODULE_VERSION("0.0.2"); MODULE_DESCRIPTION("Simple RAM filesystem for user driven kernel subsystem configuration."); +MODULE_IMPORT_NS(VFS_internal_I_am_really_a_filesystem_and_am_NOT_a_driver); core_initcall(configfs_init); module_exit(configfs_exit); diff --git a/fs/coredump.c b/fs/coredump.c index 4ab1c32db81259e82b2c91d0da696f97adc88aed..c56a3bdce7cd4552168f6d9ac52b2cdb39221ed3 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -900,7 +900,7 @@ int dump_user_range(struct coredump_params *cprm, unsigned long start, stop = !dump_emit(cprm, kaddr, PAGE_SIZE); kunmap(page); - put_user_page(page); + put_page(page); } else { stop = !dump_skip(cprm, PAGE_SIZE); } diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c index 4b90cfd1ec36035e0ca24914fa35b0d8563a9b8b..6245470112a1d3e70362c86d48788d67ca69a998 100644 --- a/fs/cramfs/inode.c +++ b/fs/cramfs/inode.c @@ -1010,3 +1010,4 @@ static void __exit exit_cramfs_fs(void) module_init(init_cramfs_fs) module_exit(exit_cramfs_fs) MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(ANDROID_GKI_VFS_EXPORT_ONLY); diff --git a/fs/crypto/Makefile b/fs/crypto/Makefile index 652c7180ec6de942611ff550652548f05c6af666..d39077502e0620aec31111d6503901f5e4173bbc 100644 --- a/fs/crypto/Makefile +++ b/fs/crypto/Makefile @@ -1,6 +1,8 @@ # SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_FS_ENCRYPTION) += fscrypto.o +ccflags-y += -DDEFAULT_SYMBOL_NAMESPACE=ANDROID_GKI_VFS_EXPORT_ONLY + fscrypto-y := crypto.o \ fname.o \ hkdf.o \ diff --git a/fs/crypto/bio.c b/fs/crypto/bio.c index b048a0e3851629081eba466cbe85e2d5eb73abb6..d507cc1f12eb94963a5b72435c8aa0167baa2ba6 100644 --- a/fs/crypto/bio.c +++ b/fs/crypto/bio.c @@ -1,23 +1,10 @@ // SPDX-License-Identifier: GPL-2.0 /* - * This contains encryption functions for per-file encryption. + * Utility functions for file contents encryption/decryption on + * block device-based filesystems. * * Copyright (C) 2015, Google, Inc. * Copyright (C) 2015, Motorola Mobility - * - * Written by Michael Halcrow, 2014. - * - * Filename encryption additions - * Uday Savagaonkar, 2014 - * Encryption policy handling additions - * Ildar Muslukhov, 2014 - * Add fscrypt_pullback_bio_page() - * Jaegeuk Kim, 2015. - * - * This has not yet undergone a rigorous security audit. - * - * The usage of AES-XTS should conform to recommendations in NIST - * Special Publication 800-38E and IEEE P1619/D16. */ #include @@ -26,6 +13,21 @@ #include #include "fscrypt_private.h" +/** + * fscrypt_decrypt_bio() - decrypt the contents of a bio + * @bio: the bio to decrypt + * + * Decrypt the contents of a "read" bio following successful completion of the + * underlying disk read. The bio must be reading a whole number of blocks of an + * encrypted file directly into the page cache. If the bio is reading the + * ciphertext into bounce pages instead of the page cache (for example, because + * the file is also compressed, so decompression is required after decryption), + * then this function isn't applicable. This function may sleep, so it must be + * called from a workqueue rather than from the bio's bi_end_io callback. + * + * This function sets PG_error on any pages that contain any blocks that failed + * to be decrypted. The filesystem must not mark such pages uptodate. + */ void fscrypt_decrypt_bio(struct bio *bio) { struct bio_vec *bv; diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c index bb5b121d182fda7fe3c53d519a2d4a95a2e1d064..ac1d349863550dd9a73ad0e7a6aca5eb49510cd1 100644 --- a/fs/crypto/fname.c +++ b/fs/crypto/fname.c @@ -26,7 +26,7 @@ * it to find the directory entry again if requested. Naively, that would just * mean using the ciphertext filenames. However, since the ciphertext filenames * can contain illegal characters ('\0' and '/'), they must be encoded in some - * way. We use base64. But that can cause names to exceed NAME_MAX (255 + * way. We use base64url. But that can cause names to exceed NAME_MAX (255 * bytes), so we also need to use a strong hash to abbreviate long names. * * The filesystem may also need another kind of hash, the "dirhash", to quickly @@ -38,7 +38,7 @@ * casefolded directories use this type of dirhash. At least in these cases, * each no-key name must include the name's dirhash too. * - * To meet all these requirements, we base64-encode the following + * To meet all these requirements, we base64url-encode the following * variable-length structure. It contains the dirhash, or 0's if the filesystem * didn't provide one; up to 149 bytes of the ciphertext name; and for * ciphertexts longer than 149 bytes, also the SHA-256 of the remaining bytes. @@ -52,15 +52,19 @@ struct fscrypt_nokey_name { u32 dirhash[2]; u8 bytes[149]; u8 sha256[SHA256_DIGEST_SIZE]; -}; /* 189 bytes => 252 bytes base64-encoded, which is <= NAME_MAX (255) */ +}; /* 189 bytes => 252 bytes base64url-encoded, which is <= NAME_MAX (255) */ /* - * Decoded size of max-size nokey name, i.e. a name that was abbreviated using + * Decoded size of max-size no-key name, i.e. a name that was abbreviated using * the strong hash and thus includes the 'sha256' field. This isn't simply * sizeof(struct fscrypt_nokey_name), as the padding at the end isn't included. */ #define FSCRYPT_NOKEY_NAME_MAX offsetofend(struct fscrypt_nokey_name, sha256) +/* Encoded size of max-size no-key name */ +#define FSCRYPT_NOKEY_NAME_MAX_ENCODED \ + FSCRYPT_BASE64URL_CHARS(FSCRYPT_NOKEY_NAME_MAX) + static inline bool fscrypt_is_dot_dotdot(const struct qstr *str) { if (str->len == 1 && str->name[0] == '.') @@ -175,62 +179,82 @@ static int fname_decrypt(const struct inode *inode, return 0; } -static const char lookup_table[65] = - "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,"; +static const char base64url_table[65] = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"; -#define BASE64_CHARS(nbytes) DIV_ROUND_UP((nbytes) * 4, 3) +#define FSCRYPT_BASE64URL_CHARS(nbytes) DIV_ROUND_UP((nbytes) * 4, 3) /** - * base64_encode() - base64-encode some bytes - * @src: the bytes to encode - * @len: number of bytes to encode - * @dst: (output) the base64-encoded string. Not NUL-terminated. + * fscrypt_base64url_encode() - base64url-encode some binary data + * @src: the binary data to encode + * @srclen: the length of @src in bytes + * @dst: (output) the base64url-encoded string. Not NUL-terminated. * - * Encodes the input string using characters from the set [A-Za-z0-9+,]. - * The encoded string is roughly 4/3 times the size of the input string. + * Encodes data using base64url encoding, i.e. the "Base 64 Encoding with URL + * and Filename Safe Alphabet" specified by RFC 4648. '='-padding isn't used, + * as it's unneeded and not required by the RFC. base64url is used instead of + * base64 to avoid the '/' character, which isn't allowed in filenames. * - * Return: length of the encoded string + * Return: the length of the resulting base64url-encoded string in bytes. + * This will be equal to FSCRYPT_BASE64URL_CHARS(srclen). */ -static int base64_encode(const u8 *src, int len, char *dst) +static int fscrypt_base64url_encode(const u8 *src, int srclen, char *dst) { - int i, bits = 0, ac = 0; + u32 ac = 0; + int bits = 0; + int i; char *cp = dst; - for (i = 0; i < len; i++) { - ac += src[i] << bits; + for (i = 0; i < srclen; i++) { + ac = (ac << 8) | src[i]; bits += 8; do { - *cp++ = lookup_table[ac & 0x3f]; - ac >>= 6; bits -= 6; + *cp++ = base64url_table[(ac >> bits) & 0x3f]; } while (bits >= 6); } if (bits) - *cp++ = lookup_table[ac & 0x3f]; + *cp++ = base64url_table[(ac << (6 - bits)) & 0x3f]; return cp - dst; } -static int base64_decode(const char *src, int len, u8 *dst) +/** + * fscrypt_base64url_decode() - base64url-decode a string + * @src: the string to decode. Doesn't need to be NUL-terminated. + * @srclen: the length of @src in bytes + * @dst: (output) the decoded binary data + * + * Decodes a string using base64url encoding, i.e. the "Base 64 Encoding with + * URL and Filename Safe Alphabet" specified by RFC 4648. '='-padding isn't + * accepted, nor are non-encoding characters such as whitespace. + * + * This implementation hasn't been optimized for performance. + * + * Return: the length of the resulting decoded binary data in bytes, + * or -1 if the string isn't a valid base64url string. + */ +static int fscrypt_base64url_decode(const char *src, int srclen, u8 *dst) { - int i, bits = 0, ac = 0; - const char *p; - u8 *cp = dst; + u32 ac = 0; + int bits = 0; + int i; + u8 *bp = dst; + + for (i = 0; i < srclen; i++) { + const char *p = strchr(base64url_table, src[i]); - for (i = 0; i < len; i++) { - p = strchr(lookup_table, src[i]); if (p == NULL || src[i] == 0) - return -2; - ac += (p - lookup_table) << bits; + return -1; + ac = (ac << 6) | (p - base64url_table); bits += 6; if (bits >= 8) { - *cp++ = ac & 0xff; - ac >>= 8; bits -= 8; + *bp++ = (u8)(ac >> bits); } } - if (ac) + if (ac & ((1 << bits) - 1)) return -1; - return cp - dst; + return bp - dst; } bool fscrypt_fname_encrypted_size(const union fscrypt_policy *policy, @@ -263,10 +287,8 @@ bool fscrypt_fname_encrypted_size(const union fscrypt_policy *policy, int fscrypt_fname_alloc_buffer(u32 max_encrypted_len, struct fscrypt_str *crypto_str) { - const u32 max_encoded_len = BASE64_CHARS(FSCRYPT_NOKEY_NAME_MAX); - u32 max_presented_len; - - max_presented_len = max(max_encoded_len, max_encrypted_len); + u32 max_presented_len = max_t(u32, FSCRYPT_NOKEY_NAME_MAX_ENCODED, + max_encrypted_len); crypto_str->name = kmalloc(max_presented_len + 1, GFP_NOFS); if (!crypto_str->name) @@ -342,7 +364,7 @@ int fscrypt_fname_disk_to_usr(const struct inode *inode, offsetof(struct fscrypt_nokey_name, bytes)); BUILD_BUG_ON(offsetofend(struct fscrypt_nokey_name, bytes) != offsetof(struct fscrypt_nokey_name, sha256)); - BUILD_BUG_ON(BASE64_CHARS(FSCRYPT_NOKEY_NAME_MAX) > NAME_MAX); + BUILD_BUG_ON(FSCRYPT_NOKEY_NAME_MAX_ENCODED > NAME_MAX); nokey_name.dirhash[0] = hash; nokey_name.dirhash[1] = minor_hash; @@ -358,7 +380,8 @@ int fscrypt_fname_disk_to_usr(const struct inode *inode, nokey_name.sha256); size = FSCRYPT_NOKEY_NAME_MAX; } - oname->len = base64_encode((const u8 *)&nokey_name, size, oname->name); + oname->len = fscrypt_base64url_encode((const u8 *)&nokey_name, size, + oname->name); return 0; } EXPORT_SYMBOL(fscrypt_fname_disk_to_usr); @@ -406,8 +429,7 @@ int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname, if (fscrypt_has_encryption_key(dir)) { if (!fscrypt_fname_encrypted_size(&dir->i_crypt_info->ci_policy, - iname->len, - dir->i_sb->s_cop->max_namelen, + iname->len, NAME_MAX, &fname->crypto_buf.len)) return -ENAMETOOLONG; fname->crypto_buf.name = kmalloc(fname->crypto_buf.len, @@ -432,14 +454,15 @@ int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname, * user-supplied name */ - if (iname->len > BASE64_CHARS(FSCRYPT_NOKEY_NAME_MAX)) + if (iname->len > FSCRYPT_NOKEY_NAME_MAX_ENCODED) return -ENOENT; fname->crypto_buf.name = kmalloc(FSCRYPT_NOKEY_NAME_MAX, GFP_KERNEL); if (fname->crypto_buf.name == NULL) return -ENOMEM; - ret = base64_decode(iname->name, iname->len, fname->crypto_buf.name); + ret = fscrypt_base64url_decode(iname->name, iname->len, + fname->crypto_buf.name); if (ret < (int)offsetof(struct fscrypt_nokey_name, bytes[1]) || (ret > offsetof(struct fscrypt_nokey_name, sha256) && ret != FSCRYPT_NOKEY_NAME_MAX)) { diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h index a455e29e807e9bf78368d45cfc47a8aea6e0fa81..bf75ecda9459f9b9c43ac7992c711b45ce7259b4 100644 --- a/fs/crypto/fscrypt_private.h +++ b/fs/crypto/fscrypt_private.h @@ -20,6 +20,11 @@ #define FSCRYPT_FILE_NONCE_SIZE 16 +/* + * Minimum size of an fscrypt master key. Note: a longer key will be required + * if ciphers with a 256-bit security strength are used. This is just the + * absolute minimum, which applies when only 128-bit encryption is used. + */ #define FSCRYPT_MIN_KEY_SIZE 16 #define FSCRYPT_MAX_HW_WRAPPED_KEY_SIZE 128 @@ -437,7 +442,11 @@ struct fscrypt_master_key_secret { */ struct fscrypt_hkdf hkdf; - /* Size of the raw key in bytes. Set even if ->raw isn't set. */ + /* + * Size of the raw key in bytes. This remains set even if ->raw was + * zeroized due to no longer being needed. I.e. we still remember the + * size of the key even if we don't need to remember the key itself. + */ u32 size; /* True if the key in ->raw is a hardware-wrapped key. */ @@ -580,8 +589,9 @@ int __init fscrypt_init_keyring(void); struct fscrypt_mode { const char *friendly_name; const char *cipher_str; - int keysize; - int ivsize; + int keysize; /* key size in bytes */ + int security_strength; /* security strength in bytes */ + int ivsize; /* IV size in bytes */ int logged_impl_name; enum blk_crypto_mode_num blk_crypto_mode; }; diff --git a/fs/crypto/hkdf.c b/fs/crypto/hkdf.c index 0cba7928446d34732886ff9cdb7b4c5c9bc81571..24172bf3e8c6f82ae73da0825ea1cc3410346835 100644 --- a/fs/crypto/hkdf.c +++ b/fs/crypto/hkdf.c @@ -16,9 +16,14 @@ /* * HKDF supports any unkeyed cryptographic hash algorithm, but fscrypt uses - * SHA-512 because it is reasonably secure and efficient; and since it produces - * a 64-byte digest, deriving an AES-256-XTS key preserves all 64 bytes of - * entropy from the master key and requires only one iteration of HKDF-Expand. + * SHA-512 because it is well-established, secure, and reasonably efficient. + * + * HKDF-SHA256 was also considered, as its 256-bit security strength would be + * sufficient here. A 512-bit security strength is "nice to have", though. + * Also, on 64-bit CPUs, SHA-512 is usually just as fast as SHA-256. In the + * common case of deriving an AES-256-XTS key (512 bits), that can result in + * HKDF-SHA512 being much faster than HKDF-SHA256, as the longer digest size of + * SHA-512 causes HKDF-Expand to only need to do one iteration rather than two. */ #define HKDF_HMAC_ALG "hmac(sha512)" #define HKDF_HASHLEN SHA512_DIGEST_SIZE diff --git a/fs/crypto/inline_crypt.c b/fs/crypto/inline_crypt.c index 7df5877fac9a9efbd899ac0be489bf78773268ce..68ab6830767a49dbbdc8cfac890b8617e78b4e6f 100644 --- a/fs/crypto/inline_crypt.c +++ b/fs/crypto/inline_crypt.c @@ -345,6 +345,10 @@ EXPORT_SYMBOL_GPL(fscrypt_set_bio_crypt_ctx_bh); * * fscrypt_set_bio_crypt_ctx() must have already been called on the bio. * + * This function isn't required in cases where crypto-mergeability is ensured in + * another way, such as I/O targeting only a single file (and thus a single key) + * combined with fscrypt_limit_io_blocks() to ensure DUN contiguity. + * * This function also returns false if the next part of the I/O would need to * have a different value for the bi_skip_dm_default_key flag. * @@ -402,12 +406,15 @@ bool fscrypt_mergeable_bio_bh(struct bio *bio, EXPORT_SYMBOL_GPL(fscrypt_mergeable_bio_bh); /** - * fscrypt_dio_supported() - check whether a direct I/O request is unsupported - * due to encryption constraints + * fscrypt_dio_supported() - check whether a DIO (direct I/O) request is + * supported as far as encryption is concerned * @iocb: the file and position the I/O is targeting * @iter: the I/O data segment(s) * - * Return: true if direct I/O is supported + * Return: %true if there are no encryption constraints that prevent DIO from + * being supported; %false if DIO is unsupported. (Note that in the + * %true case, the filesystem might have other, non-encryption-related + * constraints that prevent DIO from actually being supported.) */ bool fscrypt_dio_supported(struct kiocb *iocb, struct iov_iter *iter) { @@ -418,13 +425,22 @@ bool fscrypt_dio_supported(struct kiocb *iocb, struct iov_iter *iter) if (!fscrypt_needs_contents_encryption(inode)) return true; - /* We only support direct I/O with inline crypto, not fs-layer crypto */ + /* We only support DIO with inline crypto, not fs-layer crypto. */ if (!fscrypt_inode_uses_inline_crypto(inode)) return false; /* - * Since the granularity of encryption is filesystem blocks, the I/O - * must be block aligned -- not just disk sector aligned. + * Since the granularity of encryption is filesystem blocks, the file + * position and total I/O length must be aligned to the filesystem block + * size -- not just to the block device's logical block size as is + * traditionally the case for DIO on many filesystems. + * + * We require that the user-provided memory buffers be filesystem block + * aligned too. It is simpler to have a single alignment value required + * for all properties of the I/O, as is normally the case for DIO. + * Also, allowing less aligned buffers would imply that data units could + * cross bvecs, which would greatly complicate the I/O stack, which + * assumes that bios can be split at any bvec boundary. */ if (!IS_ALIGNED(iocb->ki_pos | iov_iter_alignment(iter), blocksize)) return false; @@ -437,24 +453,25 @@ EXPORT_SYMBOL_GPL(fscrypt_dio_supported); * fscrypt_limit_io_blocks() - limit I/O blocks to avoid discontiguous DUNs * @inode: the file on which I/O is being done * @lblk: the block at which the I/O is being started from - * @nr_blocks: the number of blocks we want to submit starting at @pos + * @nr_blocks: the number of blocks we want to submit starting at @lblk * - * Determine the limit to the number of blocks that can be submitted in the bio - * targeting @pos without causing a data unit number (DUN) discontinuity. + * Determine the limit to the number of blocks that can be submitted in a bio + * targeting @lblk without causing a data unit number (DUN) discontiguity. * * This is normally just @nr_blocks, as normally the DUNs just increment along * with the logical blocks. (Or the file is not encrypted.) * * In rare cases, fscrypt can be using an IV generation method that allows the - * DUN to wrap around within logically continuous blocks, and that wraparound + * DUN to wrap around within logically contiguous blocks, and that wraparound * will occur. If this happens, a value less than @nr_blocks will be returned - * so that the wraparound doesn't occur in the middle of the bio. + * so that the wraparound doesn't occur in the middle of a bio, which would + * cause encryption/decryption to produce wrong results. * * Return: the actual number of blocks that can be submitted */ u64 fscrypt_limit_io_blocks(const struct inode *inode, u64 lblk, u64 nr_blocks) { - const struct fscrypt_info *ci = inode->i_crypt_info; + const struct fscrypt_info *ci; u32 dun; if (!fscrypt_inode_uses_inline_crypto(inode)) @@ -463,6 +480,7 @@ u64 fscrypt_limit_io_blocks(const struct inode *inode, u64 lblk, u64 nr_blocks) if (nr_blocks <= 1) return nr_blocks; + ci = inode->i_crypt_info; if (!(fscrypt_policy_flags(&ci->ci_policy) & FSCRYPT_POLICY_FLAG_IV_INO_LBLK_32)) return nr_blocks; diff --git a/fs/crypto/keysetup.c b/fs/crypto/keysetup.c index 32dc97e908fd1d771471c1ada7bd3672d2c34cde..bec2915effe587a3d0583fd5ba737b8aec7b1e60 100644 --- a/fs/crypto/keysetup.c +++ b/fs/crypto/keysetup.c @@ -19,6 +19,7 @@ struct fscrypt_mode fscrypt_modes[] = { .friendly_name = "AES-256-XTS", .cipher_str = "xts(aes)", .keysize = 64, + .security_strength = 32, .ivsize = 16, .blk_crypto_mode = BLK_ENCRYPTION_MODE_AES_256_XTS, }, @@ -26,12 +27,14 @@ struct fscrypt_mode fscrypt_modes[] = { .friendly_name = "AES-256-CTS-CBC", .cipher_str = "cts(cbc(aes))", .keysize = 32, + .security_strength = 32, .ivsize = 16, }, [FSCRYPT_MODE_AES_128_CBC] = { .friendly_name = "AES-128-CBC-ESSIV", .cipher_str = "essiv(cbc(aes),sha256)", .keysize = 16, + .security_strength = 16, .ivsize = 16, .blk_crypto_mode = BLK_ENCRYPTION_MODE_AES_128_CBC_ESSIV, }, @@ -39,12 +42,14 @@ struct fscrypt_mode fscrypt_modes[] = { .friendly_name = "AES-128-CTS-CBC", .cipher_str = "cts(cbc(aes))", .keysize = 16, + .security_strength = 16, .ivsize = 16, }, [FSCRYPT_MODE_ADIANTUM] = { .friendly_name = "Adiantum", .cipher_str = "adiantum(xchacha12,aes)", .keysize = 32, + .security_strength = 32, .ivsize = 32, .blk_crypto_mode = BLK_ENCRYPTION_MODE_ADIANTUM, }, @@ -117,8 +122,9 @@ err_free_tfm: /* * Prepare the crypto transform object or blk-crypto key in @prep_key, given the - * raw key, encryption mode, and flag indicating which encryption implementation - * (fs-layer or blk-crypto) will be used. + * raw key, encryption mode (@ci->ci_mode), flag indicating which encryption + * implementation (fs-layer or blk-crypto) will be used (@ci->ci_inlinecrypt), + * and IV generation method (@ci->ci_policy.flags). */ int fscrypt_prepare_key(struct fscrypt_prepared_key *prep_key, const u8 *raw_key, unsigned int raw_key_size, @@ -396,6 +402,45 @@ static int fscrypt_setup_v2_file_key(struct fscrypt_info *ci, return 0; } +/* + * Check whether the size of the given master key (@mk) is appropriate for the + * encryption settings which a particular file will use (@ci). + * + * If the file uses a v1 encryption policy, then the master key must be at least + * as long as the derived key, as this is a requirement of the v1 KDF. + * + * Otherwise, the KDF can accept any size key, so we enforce a slightly looser + * requirement: we require that the size of the master key be at least the + * maximum security strength of any algorithm whose key will be derived from it + * (but in practice we only need to consider @ci->ci_mode, since any other + * possible subkeys such as DIRHASH and INODE_HASH will never increase the + * required key size over @ci->ci_mode). This allows AES-256-XTS keys to be + * derived from a 256-bit master key, which is cryptographically sufficient, + * rather than requiring a 512-bit master key which is unnecessarily long. (We + * still allow 512-bit master keys if the user chooses to use them, though.) + */ +static bool fscrypt_valid_master_key_size(const struct fscrypt_master_key *mk, + const struct fscrypt_info *ci) +{ + unsigned int min_keysize; + + if (ci->ci_policy.version == FSCRYPT_POLICY_V1) + min_keysize = ci->ci_mode->keysize; + else + min_keysize = ci->ci_mode->security_strength; + + if (mk->mk_secret.size < min_keysize) { + fscrypt_warn(NULL, + "key with %s %*phN is too short (got %u bytes, need %u+ bytes)", + master_key_spec_type(&mk->mk_spec), + master_key_spec_len(&mk->mk_spec), + (u8 *)&mk->mk_spec.u, + mk->mk_secret.size, min_keysize); + return false; + } + return true; +} + /* * Find the master key, then set up the inode's actual encryption key. * @@ -461,18 +506,7 @@ static int setup_file_encryption_key(struct fscrypt_info *ci, goto out_release_key; } - /* - * Require that the master key be at least as long as the derived key. - * Otherwise, the derived key cannot possibly contain as much entropy as - * that required by the encryption mode it will be used for. For v1 - * policies it's also required for the KDF to work at all. - */ - if (mk->mk_secret.size < ci->ci_mode->keysize) { - fscrypt_warn(NULL, - "key with %s %*phN is too short (got %u bytes, need %u+ bytes)", - master_key_spec_type(&mk_spec), - master_key_spec_len(&mk_spec), (u8 *)&mk_spec.u, - mk->mk_secret.size, ci->ci_mode->keysize); + if (!fscrypt_valid_master_key_size(mk, ci)) { err = -ENOKEY; goto out_release_key; } diff --git a/fs/dcache.c b/fs/dcache.c index ea0485861d9377311a858359ab1183a1bae4e042..cb588cee9669af0664ca8589076d73e598ea57be 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2110,7 +2110,7 @@ struct dentry *d_obtain_alias(struct inode *inode) { return __d_obtain_alias(inode, true); } -EXPORT_SYMBOL(d_obtain_alias); +EXPORT_SYMBOL_NS(d_obtain_alias, ANDROID_GKI_VFS_EXPORT_ONLY); /** * d_obtain_root - find or allocate a dentry for a given inode @@ -2184,7 +2184,7 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode, } return found; } -EXPORT_SYMBOL(d_add_ci); +EXPORT_SYMBOL_NS(d_add_ci, ANDROID_GKI_VFS_EXPORT_ONLY); static inline bool d_same_name(const struct dentry *dentry, @@ -3065,7 +3065,7 @@ out: __d_add(dentry, inode); return NULL; } -EXPORT_SYMBOL(d_splice_alias); +EXPORT_SYMBOL_NS(d_splice_alias, ANDROID_GKI_VFS_EXPORT_ONLY); /* * Test whether new_dentry is a subdirectory of old_dentry. diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index 3aa5eb9ce498e46b89ef40e7cc900378de3bb29a..96059af28f5084827dceda7aaac2bb8094fe36cc 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c @@ -147,7 +147,7 @@ static int debugfs_locked_down(struct inode *inode, struct file *filp, const struct file_operations *real_fops) { - if ((inode->i_mode & 07777) == 0444 && + if ((inode->i_mode & 07777 & ~0444) == 0 && !(filp->f_mode & FMODE_WRITE) && !real_fops->unlocked_ioctl && !real_fops->compat_ioctl && diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 720d65f224f090217df390801682dffa8998f2e2..848e0aaa8da5d6c94551b7ac5d2c08faadc01622 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -524,7 +524,7 @@ void debugfs_create_file_size(const char *name, umode_t mode, { struct dentry *de = debugfs_create_file(name, mode, parent, data, fops); - if (de) + if (!IS_ERR(de)) d_inode(de)->i_size = file_size; } EXPORT_SYMBOL_GPL(debugfs_create_file_size); diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index 42e5a766d33c7d1fea3ca9c05be73639c38c421e..4f25015aa5342a332c76ad7d931a507784eea764 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -621,8 +621,8 @@ void devpts_pty_kill(struct dentry *dentry) dentry->d_fsdata = NULL; drop_nlink(dentry->d_inode); - fsnotify_unlink(d_inode(dentry->d_parent), dentry); d_drop(dentry); + fsnotify_unlink(d_inode(dentry->d_parent), dentry); dput(dentry); /* d_alloc_name() in devpts_pty_new() */ } diff --git a/fs/direct-io.c b/fs/direct-io.c index a70fc3267dadb90a80e9b92b4c5b1c17121a298f..771017cf8fcc96a1f8b25f0db767d41824b43966 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -24,7 +24,6 @@ #include #include #include -#include #include #include #include @@ -393,7 +392,6 @@ dio_bio_alloc(struct dio *dio, struct dio_submit *sdio, sector_t first_sector, int nr_vecs) { struct bio *bio; - struct inode *inode = dio->inode; /* * bio_alloc() is guaranteed to return a bio when allowed to sleep and @@ -401,9 +399,6 @@ dio_bio_alloc(struct dio *dio, struct dio_submit *sdio, */ bio = bio_alloc(GFP_KERNEL, nr_vecs); - fscrypt_set_bio_crypt_ctx(bio, inode, - sdio->cur_page_fs_offset >> inode->i_blkbits, - GFP_KERNEL); bio_set_dev(bio, bdev); bio->bi_iter.bi_sector = first_sector; bio_set_op_attrs(bio, dio->op, dio->op_flags); @@ -698,7 +693,7 @@ static inline int dio_new_bio(struct dio *dio, struct dio_submit *sdio, if (ret) goto out; sector = start_sector << (sdio->blkbits - 9); - nr_pages = min(sdio->pages_in_io, BIO_MAX_PAGES); + nr_pages = bio_max_segs(sdio->pages_in_io); BUG_ON(nr_pages <= 0); dio_bio_alloc(dio, sdio, map_bh->b_bdev, sector, nr_pages); sdio->boundary = 0; @@ -768,17 +763,9 @@ static inline int dio_send_cur_page(struct dio *dio, struct dio_submit *sdio, * current logical offset in the file does not equal what would * be the next logical offset in the bio, submit the bio we * have. - * - * When fscrypt inline encryption is used, data unit number - * (DUN) contiguity is also required. Normally that's implied - * by logical contiguity. However, certain IV generation - * methods (e.g. IV_INO_LBLK_32) don't guarantee it. So, we - * must explicitly check fscrypt_mergeable_bio() too. */ if (sdio->final_block_in_bio != sdio->cur_page_block || - cur_offset != bio_next_offset || - !fscrypt_mergeable_bio(sdio->bio, dio->inode, - cur_offset >> dio->inode->i_blkbits)) + cur_offset != bio_next_offset) dio_bio_submit(dio, sdio); } @@ -1380,7 +1367,7 @@ ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, end_io, submit_io, flags); } -EXPORT_SYMBOL(__blockdev_direct_IO); +EXPORT_SYMBOL_NS(__blockdev_direct_IO, ANDROID_GKI_VFS_EXPORT_ONLY); static __init int dio_init(void) { diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index 002123efc6b052346ee8c7093789fbbb08929bd1..1e9d8999b9390a32c4dcbf217a5b1be93d183885 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c @@ -3975,6 +3975,14 @@ static int validate_message(struct dlm_lkb *lkb, struct dlm_message *ms) int from = ms->m_header.h_nodeid; int error = 0; + /* currently mixing of user/kernel locks are not supported */ + if (ms->m_flags & DLM_IFL_USER && ~lkb->lkb_flags & DLM_IFL_USER) { + log_error(lkb->lkb_resource->res_ls, + "got user dlm message for a kernel lock"); + error = -EINVAL; + goto out; + } + switch (ms->m_type) { case DLM_MSG_CONVERT: case DLM_MSG_UNLOCK: @@ -4003,6 +4011,7 @@ static int validate_message(struct dlm_lkb *lkb, struct dlm_message *ms) error = -EINVAL; } +out: if (error) log_error(lkb->lkb_resource->res_ls, "ignore invalid message %d from %d %x %x %x %d", diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 0c78fdfb1f6faaec81253a6535055cef23a0acc8..68b765369c928fcda16b0a92f61910a96d9b6dd1 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -471,8 +471,8 @@ int dlm_lowcomms_connect_node(int nodeid) static void lowcomms_error_report(struct sock *sk) { struct connection *con; - struct sockaddr_storage saddr; void (*orig_report)(struct sock *) = NULL; + struct inet_sock *inet; read_lock_bh(&sk->sk_callback_lock); con = sock2con(sk); @@ -480,34 +480,33 @@ static void lowcomms_error_report(struct sock *sk) goto out; orig_report = listen_sock.sk_error_report; - if (con->sock == NULL || - kernel_getpeername(con->sock, (struct sockaddr *)&saddr) < 0) { - printk_ratelimited(KERN_ERR "dlm: node %d: socket error " - "sending to node %d, port %d, " - "sk_err=%d/%d\n", dlm_our_nodeid(), - con->nodeid, dlm_config.ci_tcp_port, - sk->sk_err, sk->sk_err_soft); - } else if (saddr.ss_family == AF_INET) { - struct sockaddr_in *sin4 = (struct sockaddr_in *)&saddr; + inet = inet_sk(sk); + switch (sk->sk_family) { + case AF_INET: printk_ratelimited(KERN_ERR "dlm: node %d: socket error " - "sending to node %d at %pI4, port %d, " + "sending to node %d at %pI4, dport %d, " "sk_err=%d/%d\n", dlm_our_nodeid(), - con->nodeid, &sin4->sin_addr.s_addr, - dlm_config.ci_tcp_port, sk->sk_err, + con->nodeid, &inet->inet_daddr, + ntohs(inet->inet_dport), sk->sk_err, sk->sk_err_soft); - } else { - struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&saddr; - + break; +#if IS_ENABLED(CONFIG_IPV6) + case AF_INET6: printk_ratelimited(KERN_ERR "dlm: node %d: socket error " - "sending to node %d at %u.%u.%u.%u, " - "port %d, sk_err=%d/%d\n", dlm_our_nodeid(), - con->nodeid, sin6->sin6_addr.s6_addr32[0], - sin6->sin6_addr.s6_addr32[1], - sin6->sin6_addr.s6_addr32[2], - sin6->sin6_addr.s6_addr32[3], - dlm_config.ci_tcp_port, sk->sk_err, + "sending to node %d at %pI6c, " + "dport %d, sk_err=%d/%d\n", dlm_our_nodeid(), + con->nodeid, &sk->sk_v6_daddr, + ntohs(inet->inet_dport), sk->sk_err, sk->sk_err_soft); + break; +#endif + default: + printk_ratelimited(KERN_ERR "dlm: node %d: socket error " + "invalid socket family %d set, " + "sk_err=%d/%d\n", dlm_our_nodeid(), + sk->sk_family, sk->sk_err, sk->sk_err_soft); + goto out; } out: read_unlock_bh(&sk->sk_callback_lock); diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index b2f6a1937d2390953289b8f02ef8d9170c54c1c9..eb12d38d000dfb0235cd13d3d53efda17ef54bf7 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -891,6 +891,7 @@ MODULE_AUTHOR("Michael A. Halcrow "); MODULE_DESCRIPTION("eCryptfs"); MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(ANDROID_GKI_VFS_EXPORT_ONLY); module_init(ecryptfs_init) module_exit(ecryptfs_exit) diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c index 15880a68faadc58ba34d1a3f68ac412416202f2e..b62aefe3b4b8296c5e72962141dd8499b01e1fbf 100644 --- a/fs/efivarfs/super.c +++ b/fs/efivarfs/super.c @@ -272,6 +272,7 @@ static __exit void efivarfs_exit(void) MODULE_AUTHOR("Matthew Garrett, Jeremy Kerr"); MODULE_DESCRIPTION("EFI Variable Filesystem"); MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(ANDROID_GKI_VFS_EXPORT_ONLY); MODULE_ALIAS_FS("efivarfs"); module_init(efivarfs_init); diff --git a/fs/efs/inode.c b/fs/efs/inode.c index 89e73a6f0d361c890cd57c7db33888c4df6a37e9..8c0ecaa62de2b474e538e06e88ed91e82add9473 100644 --- a/fs/efs/inode.c +++ b/fs/efs/inode.c @@ -311,3 +311,4 @@ efs_block_t efs_map_block(struct inode *inode, efs_block_t block) { } MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(ANDROID_GKI_VFS_EXPORT_ONLY); diff --git a/fs/erofs/Kconfig b/fs/erofs/Kconfig index 74b0aaa7114c238788d9e78ad9a0c9439b368f79..f57255ab88ed4a0858ed10c96aea532a887c248c 100644 --- a/fs/erofs/Kconfig +++ b/fs/erofs/Kconfig @@ -3,18 +3,25 @@ config EROFS_FS tristate "EROFS filesystem support" depends on BLOCK + select FS_IOMAP select LIBCRC32C help - EROFS (Enhanced Read-Only File System) is a lightweight - read-only file system with modern designs (eg. page-sized - blocks, inline xattrs/data, etc.) for scenarios which need - high-performance read-only requirements, e.g. Android OS - for mobile phones and LIVECDs. - - It also provides fixed-sized output compression support, - which improves storage density, keeps relatively higher - compression ratios, which is more useful to achieve high - performance for embedded devices with limited memory. + EROFS (Enhanced Read-Only File System) is a lightweight read-only + file system with modern designs (e.g. no buffer heads, inline + xattrs/data, chunk-based deduplication, multiple devices, etc.) for + scenarios which need high-performance read-only solutions, e.g. + smartphones with Android OS, LiveCDs and high-density hosts with + numerous containers; + + It also provides fixed-sized output compression support in order to + improve storage density as well as keep relatively higher compression + ratios and implements in-place decompression to reuse the file page + for compressed data temporarily with proper strategies, which is + quite useful to ensure guaranteed end-to-end runtime decompression + performance under extremely memory pressure without extra cost. + + See the documentation at + for more details. If unsure, say N. @@ -76,17 +83,18 @@ config EROFS_FS_ZIP If you don't want to enable compression feature, say N. -config EROFS_FS_CLUSTER_PAGE_LIMIT - int "EROFS Cluster Pages Hard Limit" +config EROFS_FS_ZIP_LZMA + bool "EROFS LZMA compressed data support" depends on EROFS_FS_ZIP - range 1 256 - default "1" + select XZ_DEC + select XZ_DEC_MICROLZMA help - Indicates maximum # of pages of a compressed - physical cluster. + Saying Y here includes support for reading EROFS file systems + containing LZMA compressed data, specifically called microLZMA. it + gives better compression ratios than the LZ4 algorithm, at the + expense of more CPU overhead. - For example, if files in a image were compressed - into 8k-unit, hard limit should not be configured - less than 2. Otherwise, the image will be refused - to mount on this kernel. + LZMA support is an experimental feature for now and so most file + systems will be readable without selecting this option. + If unsure, say N. diff --git a/fs/erofs/Makefile b/fs/erofs/Makefile index 46f2aa4ba46c226e2b85aa2f82aefa508b586a1a..8a3317e38e5a8a12ca6aeb07eb47123d1a074f47 100644 --- a/fs/erofs/Makefile +++ b/fs/erofs/Makefile @@ -1,11 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only -EROFS_VERSION = "1.0" - -ccflags-y += -DEROFS_VERSION=\"$(EROFS_VERSION)\" - obj-$(CONFIG_EROFS_FS) += erofs.o -erofs-objs := super.o inode.o data.o namei.o dir.o utils.o +erofs-objs := super.o inode.o data.o namei.o dir.o utils.o pcpubuf.o sysfs.o erofs-$(CONFIG_EROFS_FS_XATTR) += xattr.o erofs-$(CONFIG_EROFS_FS_ZIP) += decompressor.o zmap.o zdata.o - +erofs-$(CONFIG_EROFS_FS_ZIP_LZMA) += decompressor_lzma.o diff --git a/fs/erofs/compress.h b/fs/erofs/compress.h index aea129ddda74bff6673ef07b06fbfaf8c36f2ace..5794065049190500764004360e9476f3d829aaaf 100644 --- a/fs/erofs/compress.h +++ b/fs/erofs/compress.h @@ -2,18 +2,12 @@ /* * Copyright (C) 2019 HUAWEI, Inc. * https://www.huawei.com/ - * Created by Gao Xiang */ #ifndef __EROFS_FS_COMPRESS_H #define __EROFS_FS_COMPRESS_H #include "internal.h" -enum { - Z_EROFS_COMPRESSION_SHIFTED = Z_EROFS_COMPRESSION_MAX, - Z_EROFS_COMPRESSION_RUNTIME_MAX -}; - struct z_erofs_decompress_req { struct super_block *sb; struct page **in, **out; @@ -26,6 +20,12 @@ struct z_erofs_decompress_req { bool inplace_io, partial_decoding; }; +struct z_erofs_decompressor { + int (*decompress)(struct z_erofs_decompress_req *rq, + struct page **pagepool); + char *name; +}; + /* some special page->private (unsigned long, see below) */ #define Z_EROFS_SHORTLIVED_PAGE (-1UL << 2) #define Z_EROFS_PREALLOCATED_PAGE (-2UL << 2) @@ -64,7 +64,7 @@ static inline bool z_erofs_is_shortlived_page(struct page *page) return true; } -static inline bool z_erofs_put_shortlivedpage(struct list_head *pagepool, +static inline bool z_erofs_put_shortlivedpage(struct page **pagepool, struct page *page) { if (!z_erofs_is_shortlived_page(page)) @@ -75,14 +75,22 @@ static inline bool z_erofs_put_shortlivedpage(struct list_head *pagepool, put_page(page); } else { /* follow the pcluster rule above. */ - set_page_private(page, 0); - list_add(&page->lru, pagepool); + erofs_pagepool_add(pagepool, page); } return true; } +#define MNGD_MAPPING(sbi) ((sbi)->managed_cache->i_mapping) +static inline bool erofs_page_is_managed(const struct erofs_sb_info *sbi, + struct page *page) +{ + return page->mapping == MNGD_MAPPING(sbi); +} + int z_erofs_decompress(struct z_erofs_decompress_req *rq, - struct list_head *pagepool); + struct page **pagepool); +/* prototypes for specific algorithms */ +int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq, + struct page **pagepool); #endif - diff --git a/fs/erofs/data.c b/fs/erofs/data.c index ea4f693bee2247327f870af0e6ba3da77b487e58..69454382f680af82b167a86fb2f2a7089db060c1 100644 --- a/fs/erofs/data.c +++ b/fs/erofs/data.c @@ -2,36 +2,14 @@ /* * Copyright (C) 2017-2018 HUAWEI, Inc. * https://www.huawei.com/ - * Created by Gao Xiang + * Copyright (C) 2021, Alibaba Cloud */ #include "internal.h" #include - +#include +#include #include -static void erofs_readendio(struct bio *bio) -{ - struct bio_vec *bvec; - blk_status_t err = bio->bi_status; - struct bvec_iter_all iter_all; - - bio_for_each_segment_all(bvec, bio, iter_all) { - struct page *page = bvec->bv_page; - - /* page is already locked */ - DBG_BUGON(PageUptodate(page)); - - if (err) - SetPageError(page); - else - SetPageUptodate(page); - - unlock_page(page); - /* page could be reclaimed now */ - } - bio_put(bio); -} - struct page *erofs_get_meta_page(struct super_block *sb, erofs_blk_t blkaddr) { struct address_space *const mapping = sb->s_bdev->bd_inode->i_mapping; @@ -60,13 +38,6 @@ static int erofs_map_blocks_flatmode(struct inode *inode, nblocks = DIV_ROUND_UP(inode->i_size, PAGE_SIZE); lastblk = nblocks - tailendpacking; - if (offset >= inode->i_size) { - /* leave out-of-bound access unmapped */ - map->m_flags = 0; - map->m_plen = 0; - goto out; - } - /* there is no hole in flatmode */ map->m_flags = EROFS_MAP_MAPPED; @@ -101,241 +72,320 @@ static int erofs_map_blocks_flatmode(struct inode *inode, goto err_out; } -out: map->m_llen = map->m_plen; - err_out: trace_erofs_map_blocks_flatmode_exit(inode, map, flags, 0); return err; } -int erofs_map_blocks(struct inode *inode, - struct erofs_map_blocks *map, int flags) +static int erofs_map_blocks(struct inode *inode, + struct erofs_map_blocks *map, int flags) { - if (erofs_inode_is_data_compressed(EROFS_I(inode)->datalayout)) { - int err = z_erofs_map_blocks_iter(inode, map, flags); + struct super_block *sb = inode->i_sb; + struct erofs_inode *vi = EROFS_I(inode); + struct erofs_inode_chunk_index *idx; + struct page *page; + u64 chunknr; + unsigned int unit; + erofs_off_t pos; + int err = 0; - if (map->mpage) { - put_page(map->mpage); - map->mpage = NULL; - } - return err; + map->m_deviceid = 0; + if (map->m_la >= inode->i_size) { + /* leave out-of-bound access unmapped */ + map->m_flags = 0; + map->m_plen = 0; + goto out; } - return erofs_map_blocks_flatmode(inode, map, flags); -} -static inline struct bio *erofs_read_raw_page(struct bio *bio, - struct address_space *mapping, - struct page *page, - erofs_off_t *last_block, - unsigned int nblocks, - bool ra) -{ - struct inode *const inode = mapping->host; - struct super_block *const sb = inode->i_sb; - erofs_off_t current_block = (erofs_off_t)page->index; - int err; + if (vi->datalayout != EROFS_INODE_CHUNK_BASED) + return erofs_map_blocks_flatmode(inode, map, flags); - DBG_BUGON(!nblocks); + if (vi->chunkformat & EROFS_CHUNK_FORMAT_INDEXES) + unit = sizeof(*idx); /* chunk index */ + else + unit = EROFS_BLOCK_MAP_ENTRY_SIZE; /* block map */ - if (PageUptodate(page)) { - err = 0; - goto has_updated; - } + chunknr = map->m_la >> vi->chunkbits; + pos = ALIGN(iloc(EROFS_SB(sb), vi->nid) + vi->inode_isize + + vi->xattr_isize, unit) + unit * chunknr; - /* note that for readpage case, bio also equals to NULL */ - if (bio && - /* not continuous */ - *last_block + 1 != current_block) { -submit_bio_retry: - submit_bio(bio); - bio = NULL; - } - - if (!bio) { - struct erofs_map_blocks map = { - .m_la = blknr_to_addr(current_block), - }; - erofs_blk_t blknr; - unsigned int blkoff; + page = erofs_get_meta_page(inode->i_sb, erofs_blknr(pos)); + if (IS_ERR(page)) + return PTR_ERR(page); - err = erofs_map_blocks(inode, &map, EROFS_GET_BLOCKS_RAW); - if (err) - goto err_out; + map->m_la = chunknr << vi->chunkbits; + map->m_plen = min_t(erofs_off_t, 1UL << vi->chunkbits, + roundup(inode->i_size - map->m_la, EROFS_BLKSIZ)); - /* zero out the holed page */ - if (!(map.m_flags & EROFS_MAP_MAPPED)) { - zero_user_segment(page, 0, PAGE_SIZE); - SetPageUptodate(page); + /* handle block map */ + if (!(vi->chunkformat & EROFS_CHUNK_FORMAT_INDEXES)) { + __le32 *blkaddr = page_address(page) + erofs_blkoff(pos); - /* imply err = 0, see erofs_map_blocks */ - goto has_updated; + if (le32_to_cpu(*blkaddr) == EROFS_NULL_ADDR) { + map->m_flags = 0; + } else { + map->m_pa = blknr_to_addr(le32_to_cpu(*blkaddr)); + map->m_flags = EROFS_MAP_MAPPED; } + goto out_unlock; + } + /* parse chunk indexes */ + idx = page_address(page) + erofs_blkoff(pos); + switch (le32_to_cpu(idx->blkaddr)) { + case EROFS_NULL_ADDR: + map->m_flags = 0; + break; + default: + map->m_deviceid = le16_to_cpu(idx->device_id) & + EROFS_SB(sb)->device_id_mask; + map->m_pa = blknr_to_addr(le32_to_cpu(idx->blkaddr)); + map->m_flags = EROFS_MAP_MAPPED; + break; + } +out_unlock: + unlock_page(page); + put_page(page); +out: + map->m_llen = map->m_plen; + return err; +} - /* for RAW access mode, m_plen must be equal to m_llen */ - DBG_BUGON(map.m_plen != map.m_llen); - - blknr = erofs_blknr(map.m_pa); - blkoff = erofs_blkoff(map.m_pa); - - /* deal with inline page */ - if (map.m_flags & EROFS_MAP_META) { - void *vsrc, *vto; - struct page *ipage; - - DBG_BUGON(map.m_plen > PAGE_SIZE); - - ipage = erofs_get_meta_page(inode->i_sb, blknr); - - if (IS_ERR(ipage)) { - err = PTR_ERR(ipage); - goto err_out; +int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map) +{ + struct erofs_dev_context *devs = EROFS_SB(sb)->devs; + struct erofs_device_info *dif; + int id; + + /* primary device by default */ + map->m_bdev = sb->s_bdev; + map->m_daxdev = EROFS_SB(sb)->dax_dev; + + if (map->m_deviceid) { + down_read(&devs->rwsem); + dif = idr_find(&devs->tree, map->m_deviceid - 1); + if (!dif) { + up_read(&devs->rwsem); + return -ENODEV; + } + map->m_bdev = dif->bdev; + map->m_daxdev = dif->dax_dev; + up_read(&devs->rwsem); + } else if (devs->extra_devices) { + down_read(&devs->rwsem); + idr_for_each_entry(&devs->tree, dif, id) { + erofs_off_t startoff, length; + + if (!dif->mapped_blkaddr) + continue; + startoff = blknr_to_addr(dif->mapped_blkaddr); + length = blknr_to_addr(dif->blocks); + + if (map->m_pa >= startoff && + map->m_pa < startoff + length) { + map->m_pa -= startoff; + map->m_bdev = dif->bdev; + map->m_daxdev = dif->dax_dev; + break; } - - vsrc = kmap_atomic(ipage); - vto = kmap_atomic(page); - memcpy(vto, vsrc + blkoff, map.m_plen); - memset(vto + map.m_plen, 0, PAGE_SIZE - map.m_plen); - kunmap_atomic(vto); - kunmap_atomic(vsrc); - flush_dcache_page(page); - - SetPageUptodate(page); - /* TODO: could we unlock the page earlier? */ - unlock_page(ipage); - put_page(ipage); - - /* imply err = 0, see erofs_map_blocks */ - goto has_updated; } + up_read(&devs->rwsem); + } + return 0; +} - /* pa must be block-aligned for raw reading */ - DBG_BUGON(erofs_blkoff(map.m_pa)); +static int erofs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, + unsigned int flags, struct iomap *iomap, struct iomap *srcmap) +{ + int ret; + struct erofs_map_blocks map; + struct erofs_map_dev mdev; - /* max # of continuous pages */ - if (nblocks > DIV_ROUND_UP(map.m_plen, PAGE_SIZE)) - nblocks = DIV_ROUND_UP(map.m_plen, PAGE_SIZE); - if (nblocks > BIO_MAX_PAGES) - nblocks = BIO_MAX_PAGES; + map.m_la = offset; + map.m_llen = length; - bio = bio_alloc(GFP_NOIO, nblocks); + ret = erofs_map_blocks(inode, &map, EROFS_GET_BLOCKS_RAW); + if (ret < 0) + return ret; - bio->bi_end_io = erofs_readendio; - bio_set_dev(bio, sb->s_bdev); - bio->bi_iter.bi_sector = (sector_t)blknr << - LOG_SECTORS_PER_BLOCK; - bio->bi_opf = REQ_OP_READ | (ra ? REQ_RAHEAD : 0); + mdev = (struct erofs_map_dev) { + .m_deviceid = map.m_deviceid, + .m_pa = map.m_pa, + }; + ret = erofs_map_dev(inode->i_sb, &mdev); + if (ret) + return ret; + + iomap->bdev = mdev.m_bdev; + iomap->dax_dev = mdev.m_daxdev; + iomap->offset = map.m_la; + iomap->length = map.m_llen; + iomap->flags = 0; + iomap->private = NULL; + + if (!(map.m_flags & EROFS_MAP_MAPPED)) { + iomap->type = IOMAP_HOLE; + iomap->addr = IOMAP_NULL_ADDR; + if (!iomap->length) + iomap->length = length; + return 0; } - err = bio_add_page(bio, page, PAGE_SIZE, 0); - /* out of the extent or bio is full */ - if (err < PAGE_SIZE) - goto submit_bio_retry; + if (map.m_flags & EROFS_MAP_META) { + struct page *ipage; + + iomap->type = IOMAP_INLINE; + ipage = erofs_get_meta_page(inode->i_sb, + erofs_blknr(mdev.m_pa)); + if (IS_ERR(ipage)) + return PTR_ERR(ipage); + iomap->inline_data = page_address(ipage) + + erofs_blkoff(mdev.m_pa); + iomap->private = ipage; + } else { + iomap->type = IOMAP_MAPPED; + iomap->addr = mdev.m_pa; + } + return 0; +} - *last_block = current_block; +static int erofs_iomap_end(struct inode *inode, loff_t pos, loff_t length, + ssize_t written, unsigned int flags, struct iomap *iomap) +{ + struct page *ipage = iomap->private; - /* shift in advance in case of it followed by too many gaps */ - if (bio->bi_iter.bi_size >= bio->bi_max_vecs * PAGE_SIZE) { - /* err should reassign to 0 after submitting */ - err = 0; - goto submit_bio_out; + if (ipage) { + DBG_BUGON(iomap->type != IOMAP_INLINE); + unlock_page(ipage); + put_page(ipage); + } else { + DBG_BUGON(iomap->type == IOMAP_INLINE); } + return written; +} - return bio; +static const struct iomap_ops erofs_iomap_ops = { + .iomap_begin = erofs_iomap_begin, + .iomap_end = erofs_iomap_end, +}; -err_out: - /* for sync reading, set page error immediately */ - if (!ra) { - SetPageError(page); - ClearPageUptodate(page); +int erofs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, + u64 start, u64 len) +{ + if (erofs_inode_is_data_compressed(EROFS_I(inode)->datalayout)) { +#ifdef CONFIG_EROFS_FS_ZIP + return iomap_fiemap(inode, fieinfo, start, len, + &z_erofs_iomap_report_ops); +#else + return -EOPNOTSUPP; +#endif } -has_updated: - unlock_page(page); - - /* if updated manually, continuous pages has a gap */ - if (bio) -submit_bio_out: - submit_bio(bio); - return err ? ERR_PTR(err) : NULL; + return iomap_fiemap(inode, fieinfo, start, len, &erofs_iomap_ops); } /* * since we dont have write or truncate flows, so no inode * locking needs to be held at the moment. */ -static int erofs_raw_access_readpage(struct file *file, struct page *page) +static int erofs_readpage(struct file *file, struct page *page) { - erofs_off_t last_block; - struct bio *bio; - - trace_erofs_readpage(page, true); + return iomap_readpage(page, &erofs_iomap_ops); +} - bio = erofs_read_raw_page(NULL, page->mapping, - page, &last_block, 1, false); +static void erofs_readahead(struct readahead_control *rac) +{ + return iomap_readahead(rac, &erofs_iomap_ops); +} - if (IS_ERR(bio)) - return PTR_ERR(bio); +static sector_t erofs_bmap(struct address_space *mapping, sector_t block) +{ + return iomap_bmap(mapping, block, &erofs_iomap_ops); +} - DBG_BUGON(bio); /* since we have only one bio -- must be NULL */ +static int erofs_prepare_dio(struct kiocb *iocb, struct iov_iter *to) +{ + struct inode *inode = file_inode(iocb->ki_filp); + loff_t align = iocb->ki_pos | iov_iter_count(to) | + iov_iter_alignment(to); + struct block_device *bdev = inode->i_sb->s_bdev; + unsigned int blksize_mask; + + if (bdev) + blksize_mask = (1 << ilog2(bdev_logical_block_size(bdev))) - 1; + else + blksize_mask = (1 << inode->i_blkbits) - 1; + + if (align & blksize_mask) + return -EINVAL; return 0; } -static void erofs_raw_access_readahead(struct readahead_control *rac) +static ssize_t erofs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) { - erofs_off_t last_block; - struct bio *bio = NULL; - struct page *page; - - trace_erofs_readpages(rac->mapping->host, readahead_index(rac), - readahead_count(rac), true); - - while ((page = readahead_page(rac))) { - prefetchw(&page->flags); - - bio = erofs_read_raw_page(bio, rac->mapping, page, &last_block, - readahead_count(rac), true); - - /* all the page errors are ignored when readahead */ - if (IS_ERR(bio)) { - pr_err("%s, readahead error at page %lu of nid %llu\n", - __func__, page->index, - EROFS_I(rac->mapping->host)->nid); - - bio = NULL; - } - - put_page(page); + /* no need taking (shared) inode lock since it's a ro filesystem */ + if (!iov_iter_count(to)) + return 0; + +#ifdef CONFIG_FS_DAX + if (IS_DAX(iocb->ki_filp->f_mapping->host)) + return dax_iomap_rw(iocb, to, &erofs_iomap_ops); +#endif + if (iocb->ki_flags & IOCB_DIRECT) { + int err = erofs_prepare_dio(iocb, to); + + if (!err) + return iomap_dio_rw(iocb, to, &erofs_iomap_ops, + NULL, 0); + if (err < 0) + return err; } + return generic_file_buffered_read(iocb, to, 0); +} + +/* for uncompressed (aligned) files and raw access for other files */ +const struct address_space_operations erofs_raw_access_aops = { + .readpage = erofs_readpage, + .readahead = erofs_readahead, + .bmap = erofs_bmap, + .direct_IO = noop_direct_IO, +}; - /* the rare case (end in gaps) */ - if (bio) - submit_bio(bio); +#ifdef CONFIG_FS_DAX +static vm_fault_t erofs_dax_huge_fault(struct vm_fault *vmf, + enum page_entry_size pe_size) +{ + return dax_iomap_fault(vmf, pe_size, NULL, NULL, &erofs_iomap_ops); } -static sector_t erofs_bmap(struct address_space *mapping, sector_t block) +static vm_fault_t erofs_dax_fault(struct vm_fault *vmf) { - struct inode *inode = mapping->host; - struct erofs_map_blocks map = { - .m_la = blknr_to_addr(block), - }; + return erofs_dax_huge_fault(vmf, PE_SIZE_PTE); +} - if (EROFS_I(inode)->datalayout == EROFS_INODE_FLAT_INLINE) { - erofs_blk_t blks = i_size_read(inode) >> LOG_BLOCK_SIZE; +static const struct vm_operations_struct erofs_dax_vm_ops = { + .fault = erofs_dax_fault, + .huge_fault = erofs_dax_huge_fault, +}; - if (block >> LOG_SECTORS_PER_BLOCK >= blks) - return 0; - } +static int erofs_file_mmap(struct file *file, struct vm_area_struct *vma) +{ + if (!IS_DAX(file_inode(file))) + return generic_file_readonly_mmap(file, vma); - if (!erofs_map_blocks(inode, &map, EROFS_GET_BLOCKS_RAW)) - return erofs_blknr(map.m_pa); + if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) + return -EINVAL; + vma->vm_ops = &erofs_dax_vm_ops; + vma->vm_flags |= VM_HUGEPAGE; return 0; } - -/* for uncompressed (aligned) files and raw access for other files */ -const struct address_space_operations erofs_raw_access_aops = { - .readpage = erofs_raw_access_readpage, - .readahead = erofs_raw_access_readahead, - .bmap = erofs_bmap, +#else +#define erofs_file_mmap generic_file_readonly_mmap +#endif + +const struct file_operations erofs_file_fops = { + .llseek = generic_file_llseek, + .read_iter = erofs_file_read_iter, + .mmap = erofs_file_mmap, + .splice_read = generic_file_splice_read, }; - diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c index 04624954a119eb0eb141a43ba7f1bb6209f24187..bf37fc76b1824d68c530bd62b4f2526d226600f1 100644 --- a/fs/erofs/decompressor.c +++ b/fs/erofs/decompressor.c @@ -2,7 +2,6 @@ /* * Copyright (C) 2019 HUAWEI, Inc. * https://www.huawei.com/ - * Created by Gao Xiang */ #include "compress.h" #include @@ -17,30 +16,48 @@ #define LZ4_DECOMPRESS_INPLACE_MARGIN(srcsize) (((srcsize) >> 8) + 32) #endif -struct z_erofs_decompressor { - /* - * if destpages have sparsed pages, fill them with bounce pages. - * it also check whether destpages indicate continuous physical memory. - */ - int (*prepare_destpages)(struct z_erofs_decompress_req *rq, - struct list_head *pagepool); - int (*decompress)(struct z_erofs_decompress_req *rq, u8 *out); - char *name; -}; - int z_erofs_load_lz4_config(struct super_block *sb, - struct erofs_super_block *dsb) + struct erofs_super_block *dsb, + struct z_erofs_lz4_cfgs *lz4, int size) { - u16 distance = le16_to_cpu(dsb->lz4_max_distance); + struct erofs_sb_info *sbi = EROFS_SB(sb); + u16 distance; + + if (lz4) { + if (size < sizeof(struct z_erofs_lz4_cfgs)) { + erofs_err(sb, "invalid lz4 cfgs, size=%u", size); + return -EINVAL; + } + distance = le16_to_cpu(lz4->max_distance); + + sbi->lz4.max_pclusterblks = le16_to_cpu(lz4->max_pclusterblks); + if (!sbi->lz4.max_pclusterblks) { + sbi->lz4.max_pclusterblks = 1; /* reserved case */ + } else if (sbi->lz4.max_pclusterblks > + Z_EROFS_PCLUSTER_MAX_SIZE / EROFS_BLKSIZ) { + erofs_err(sb, "too large lz4 pclusterblks %u", + sbi->lz4.max_pclusterblks); + return -EINVAL; + } else if (sbi->lz4.max_pclusterblks >= 2) { + erofs_info(sb, "EXPERIMENTAL big pcluster feature in use. Use at your own risk!"); + } + } else { + distance = le16_to_cpu(dsb->u1.lz4_max_distance); + sbi->lz4.max_pclusterblks = 1; + } - EROFS_SB(sb)->lz4.max_distance_pages = distance ? + sbi->lz4.max_distance_pages = distance ? DIV_ROUND_UP(distance, PAGE_SIZE) + 1 : LZ4_MAX_DISTANCE_PAGES; - return 0; + return erofs_pcpubuf_growsize(sbi->lz4.max_pclusterblks); } -static int z_erofs_lz4_prepare_destpages(struct z_erofs_decompress_req *rq, - struct list_head *pagepool) +/* + * Fill all gaps with bounce pages if it's a sparse page list. Also check if + * all physical pages are consecutive, which can be seen for moderate CR. + */ +static int z_erofs_lz4_prepare_dstpages(struct z_erofs_decompress_req *rq, + struct page **pagepool) { const unsigned int nr = PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT; @@ -95,212 +112,180 @@ static int z_erofs_lz4_prepare_destpages(struct z_erofs_decompress_req *rq, return kaddr ? 1 : 0; } -static void *generic_copy_inplace_data(struct z_erofs_decompress_req *rq, - u8 *src, unsigned int pageofs_in) +static void *z_erofs_lz4_handle_inplace_io(struct z_erofs_decompress_req *rq, + void *inpage, unsigned int *inputmargin, int *maptype, + bool support_0padding) { - /* - * if in-place decompression is ongoing, those decompressed - * pages should be copied in order to avoid being overlapped. - */ - struct page **in = rq->in; - u8 *const tmp = erofs_get_pcpubuf(0); - u8 *tmpp = tmp; - unsigned int inlen = rq->inputsize - pageofs_in; - unsigned int count = min_t(uint, inlen, PAGE_SIZE - pageofs_in); - - while (tmpp < tmp + inlen) { - if (!src) - src = kmap_atomic(*in); - memcpy(tmpp, src + pageofs_in, count); - kunmap_atomic(src); - src = NULL; - tmpp += count; - pageofs_in = 0; - count = PAGE_SIZE; + unsigned int nrpages_in, nrpages_out; + unsigned int ofull, oend, inputsize, total, i, j; + struct page **in; + void *src, *tmp; + + inputsize = rq->inputsize; + nrpages_in = PAGE_ALIGN(inputsize) >> PAGE_SHIFT; + oend = rq->pageofs_out + rq->outputsize; + ofull = PAGE_ALIGN(oend); + nrpages_out = ofull >> PAGE_SHIFT; + + if (rq->inplace_io) { + if (rq->partial_decoding || !support_0padding || + ofull - oend < LZ4_DECOMPRESS_INPLACE_MARGIN(inputsize)) + goto docopy; + + for (i = 0; i < nrpages_in; ++i) { + DBG_BUGON(rq->in[i] == NULL); + for (j = 0; j < nrpages_out - nrpages_in + i; ++j) + if (rq->out[j] == rq->in[i]) + goto docopy; + } + } + + if (nrpages_in <= 1) { + *maptype = 0; + return inpage; + } + kunmap_atomic(inpage); + might_sleep(); + src = erofs_vm_map_ram(rq->in, nrpages_in); + if (!src) + return ERR_PTR(-ENOMEM); + *maptype = 1; + return src; + +docopy: + /* Or copy compressed data which can be overlapped to per-CPU buffer */ + in = rq->in; + src = erofs_get_pcpubuf(nrpages_in); + if (!src) { + DBG_BUGON(1); + kunmap_atomic(inpage); + return ERR_PTR(-EFAULT); + } + + tmp = src; + total = rq->inputsize; + while (total) { + unsigned int page_copycnt = + min_t(unsigned int, total, PAGE_SIZE - *inputmargin); + + if (!inpage) + inpage = kmap_atomic(*in); + memcpy(tmp, inpage + *inputmargin, page_copycnt); + kunmap_atomic(inpage); + inpage = NULL; + tmp += page_copycnt; + total -= page_copycnt; ++in; + *inputmargin = 0; } - return tmp; + *maptype = 2; + return src; } -static int z_erofs_lz4_decompress(struct z_erofs_decompress_req *rq, u8 *out) +static int z_erofs_lz4_decompress_mem(struct z_erofs_decompress_req *rq, + u8 *out) { - unsigned int inputmargin, inlen; - u8 *src; - bool copied, support_0padding; - int ret; + unsigned int inputmargin; + u8 *headpage, *src; + bool support_0padding; + int ret, maptype; - if (rq->inputsize > PAGE_SIZE) - return -EOPNOTSUPP; - - src = kmap_atomic(*rq->in); + DBG_BUGON(*rq->in == NULL); + headpage = kmap_atomic(*rq->in); inputmargin = 0; support_0padding = false; /* decompression inplace is only safe when 0padding is enabled */ - if (EROFS_SB(rq->sb)->feature_incompat & - EROFS_FEATURE_INCOMPAT_LZ4_0PADDING) { + if (erofs_sb_has_lz4_0padding(EROFS_SB(rq->sb))) { support_0padding = true; - while (!src[inputmargin & ~PAGE_MASK]) + while (!headpage[inputmargin & ~PAGE_MASK]) if (!(++inputmargin & ~PAGE_MASK)) break; if (inputmargin >= rq->inputsize) { - kunmap_atomic(src); + kunmap_atomic(headpage); return -EIO; } } - copied = false; - inlen = rq->inputsize - inputmargin; - if (rq->inplace_io) { - const uint oend = (rq->pageofs_out + - rq->outputsize) & ~PAGE_MASK; - const uint nr = PAGE_ALIGN(rq->pageofs_out + - rq->outputsize) >> PAGE_SHIFT; - - if (rq->partial_decoding || !support_0padding || - rq->out[nr - 1] != rq->in[0] || - rq->inputsize - oend < - LZ4_DECOMPRESS_INPLACE_MARGIN(inlen)) { - src = generic_copy_inplace_data(rq, src, inputmargin); - inputmargin = 0; - copied = true; - } - } + rq->inputsize -= inputmargin; + src = z_erofs_lz4_handle_inplace_io(rq, headpage, &inputmargin, + &maptype, support_0padding); + if (IS_ERR(src)) + return PTR_ERR(src); /* legacy format could compress extra data in a pcluster. */ if (rq->partial_decoding || !support_0padding) ret = LZ4_decompress_safe_partial(src + inputmargin, out, - inlen, rq->outputsize, - rq->outputsize); + rq->inputsize, rq->outputsize, rq->outputsize); else ret = LZ4_decompress_safe(src + inputmargin, out, - inlen, rq->outputsize); + rq->inputsize, rq->outputsize); if (ret != rq->outputsize) { erofs_err(rq->sb, "failed to decompress %d in[%u, %u] out[%u]", - ret, inlen, inputmargin, rq->outputsize); + ret, rq->inputsize, inputmargin, rq->outputsize); - WARN_ON(1); print_hex_dump(KERN_DEBUG, "[ in]: ", DUMP_PREFIX_OFFSET, - 16, 1, src + inputmargin, inlen, true); + 16, 1, src + inputmargin, rq->inputsize, true); print_hex_dump(KERN_DEBUG, "[out]: ", DUMP_PREFIX_OFFSET, 16, 1, out, rq->outputsize, true); if (ret >= 0) memset(out + ret, 0, rq->outputsize - ret); ret = -EIO; + } else { + ret = 0; } - if (copied) - erofs_put_pcpubuf(src); - else + if (maptype == 0) { kunmap_atomic(src); - return ret; -} - -static struct z_erofs_decompressor decompressors[] = { - [Z_EROFS_COMPRESSION_SHIFTED] = { - .name = "shifted" - }, - [Z_EROFS_COMPRESSION_LZ4] = { - .prepare_destpages = z_erofs_lz4_prepare_destpages, - .decompress = z_erofs_lz4_decompress, - .name = "lz4" - }, -}; - -static void copy_from_pcpubuf(struct page **out, const char *dst, - unsigned short pageofs_out, - unsigned int outputsize) -{ - const char *end = dst + outputsize; - const unsigned int righthalf = PAGE_SIZE - pageofs_out; - const char *cur = dst - pageofs_out; - - while (cur < end) { - struct page *const page = *out++; - - if (page) { - char *buf = kmap_atomic(page); - - if (cur >= dst) { - memcpy(buf, cur, min_t(uint, PAGE_SIZE, - end - cur)); - } else { - memcpy(buf + pageofs_out, cur + pageofs_out, - min_t(uint, righthalf, end - cur)); - } - kunmap_atomic(buf); - } - cur += PAGE_SIZE; + } else if (maptype == 1) { + vm_unmap_ram(src, PAGE_ALIGN(rq->inputsize) >> PAGE_SHIFT); + } else if (maptype == 2) { + erofs_put_pcpubuf(src); + } else { + DBG_BUGON(1); + return -EFAULT; } + return ret; } -static int z_erofs_decompress_generic(struct z_erofs_decompress_req *rq, - struct list_head *pagepool) +static int z_erofs_lz4_decompress(struct z_erofs_decompress_req *rq, + struct page **pagepool) { const unsigned int nrpages_out = PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT; - const struct z_erofs_decompressor *alg = decompressors + rq->alg; unsigned int dst_maptype; void *dst; - int ret, i; + int ret; - if (nrpages_out == 1 && !rq->inplace_io) { + /* one optimized fast path only for non bigpcluster cases yet */ + if (rq->inputsize <= PAGE_SIZE && nrpages_out == 1 && !rq->inplace_io) { DBG_BUGON(!*rq->out); dst = kmap_atomic(*rq->out); dst_maptype = 0; goto dstmap_out; } - /* - * For the case of small output size (especially much less - * than PAGE_SIZE), memcpy the decompressed data rather than - * compressed data is preferred. - */ - if (rq->outputsize <= PAGE_SIZE * 7 / 8) { - dst = erofs_get_pcpubuf(0); - if (IS_ERR(dst)) - return PTR_ERR(dst); - - rq->inplace_io = false; - ret = alg->decompress(rq, dst); - if (!ret) - copy_from_pcpubuf(rq->out, dst, rq->pageofs_out, - rq->outputsize); - - erofs_put_pcpubuf(dst); + /* general decoding path which can be used for all cases */ + ret = z_erofs_lz4_prepare_dstpages(rq, pagepool); + if (ret < 0) return ret; - } - - ret = alg->prepare_destpages(rq, pagepool); - if (ret < 0) { - return ret; - } else if (ret) { + if (ret) { dst = page_address(*rq->out); dst_maptype = 1; goto dstmap_out; } - i = 0; - while (1) { - dst = vm_map_ram(rq->out, nrpages_out, -1); - - /* retry two more times (totally 3 times) */ - if (dst || ++i >= 3) - break; - vm_unmap_aliases(); - } - + dst = erofs_vm_map_ram(rq->out, nrpages_out); if (!dst) return -ENOMEM; - dst_maptype = 2; dstmap_out: - ret = alg->decompress(rq, dst + rq->pageofs_out); + ret = z_erofs_lz4_decompress_mem(rq, dst + rq->pageofs_out); if (!dst_maptype) kunmap_atomic(dst); @@ -309,8 +294,8 @@ dstmap_out: return ret; } -static int z_erofs_shifted_transform(const struct z_erofs_decompress_req *rq, - struct list_head *pagepool) +static int z_erofs_shifted_transform(struct z_erofs_decompress_req *rq, + struct page **pagepool) { const unsigned int nrpages_out = PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT; @@ -348,11 +333,25 @@ static int z_erofs_shifted_transform(const struct z_erofs_decompress_req *rq, return 0; } +static struct z_erofs_decompressor decompressors[] = { + [Z_EROFS_COMPRESSION_SHIFTED] = { + .decompress = z_erofs_shifted_transform, + .name = "shifted" + }, + [Z_EROFS_COMPRESSION_LZ4] = { + .decompress = z_erofs_lz4_decompress, + .name = "lz4" + }, +#ifdef CONFIG_EROFS_FS_ZIP_LZMA + [Z_EROFS_COMPRESSION_LZMA] = { + .decompress = z_erofs_lzma_decompress, + .name = "lzma" + }, +#endif +}; + int z_erofs_decompress(struct z_erofs_decompress_req *rq, - struct list_head *pagepool) + struct page **pagepool) { - if (rq->alg == Z_EROFS_COMPRESSION_SHIFTED) - return z_erofs_shifted_transform(rq, pagepool); - return z_erofs_decompress_generic(rq, pagepool); + return decompressors[rq->alg].decompress(rq, pagepool); } - diff --git a/fs/erofs/decompressor_lzma.c b/fs/erofs/decompressor_lzma.c new file mode 100644 index 0000000000000000000000000000000000000000..50045510a1f4161876945c31ab72a8970563d788 --- /dev/null +++ b/fs/erofs/decompressor_lzma.c @@ -0,0 +1,290 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +#include +#include +#include "compress.h" + +struct z_erofs_lzma { + struct z_erofs_lzma *next; + struct xz_dec_microlzma *state; + struct xz_buf buf; + u8 bounce[PAGE_SIZE]; +}; + +/* considering the LZMA performance, no need to use a lockless list for now */ +static DEFINE_SPINLOCK(z_erofs_lzma_lock); +static unsigned int z_erofs_lzma_max_dictsize; +static unsigned int z_erofs_lzma_nstrms, z_erofs_lzma_avail_strms; +static struct z_erofs_lzma *z_erofs_lzma_head; +static DECLARE_WAIT_QUEUE_HEAD(z_erofs_lzma_wq); + +module_param_named(lzma_streams, z_erofs_lzma_nstrms, uint, 0444); + +void z_erofs_lzma_exit(void) +{ + /* there should be no running fs instance */ + while (z_erofs_lzma_avail_strms) { + struct z_erofs_lzma *strm; + + spin_lock(&z_erofs_lzma_lock); + strm = z_erofs_lzma_head; + if (!strm) { + spin_unlock(&z_erofs_lzma_lock); + DBG_BUGON(1); + return; + } + z_erofs_lzma_head = NULL; + spin_unlock(&z_erofs_lzma_lock); + + while (strm) { + struct z_erofs_lzma *n = strm->next; + + if (strm->state) + xz_dec_microlzma_end(strm->state); + kfree(strm); + --z_erofs_lzma_avail_strms; + strm = n; + } + } +} + +int z_erofs_lzma_init(void) +{ + unsigned int i; + + /* by default, use # of possible CPUs instead */ + if (!z_erofs_lzma_nstrms) + z_erofs_lzma_nstrms = num_possible_cpus(); + + for (i = 0; i < z_erofs_lzma_nstrms; ++i) { + struct z_erofs_lzma *strm = kzalloc(sizeof(*strm), GFP_KERNEL); + + if (!strm) { + z_erofs_lzma_exit(); + return -ENOMEM; + } + spin_lock(&z_erofs_lzma_lock); + strm->next = z_erofs_lzma_head; + z_erofs_lzma_head = strm; + spin_unlock(&z_erofs_lzma_lock); + ++z_erofs_lzma_avail_strms; + } + return 0; +} + +int z_erofs_load_lzma_config(struct super_block *sb, + struct erofs_super_block *dsb, + struct z_erofs_lzma_cfgs *lzma, int size) +{ + static DEFINE_MUTEX(lzma_resize_mutex); + unsigned int dict_size, i; + struct z_erofs_lzma *strm, *head = NULL; + int err; + + if (!lzma || size < sizeof(struct z_erofs_lzma_cfgs)) { + erofs_err(sb, "invalid lzma cfgs, size=%u", size); + return -EINVAL; + } + if (lzma->format) { + erofs_err(sb, "unidentified lzma format %x, please check kernel version", + le16_to_cpu(lzma->format)); + return -EINVAL; + } + dict_size = le32_to_cpu(lzma->dict_size); + if (dict_size > Z_EROFS_LZMA_MAX_DICT_SIZE || dict_size < 4096) { + erofs_err(sb, "unsupported lzma dictionary size %u", + dict_size); + return -EINVAL; + } + + erofs_info(sb, "EXPERIMENTAL MicroLZMA in use. Use at your own risk!"); + + /* in case 2 z_erofs_load_lzma_config() race to avoid deadlock */ + mutex_lock(&lzma_resize_mutex); + + if (z_erofs_lzma_max_dictsize >= dict_size) { + mutex_unlock(&lzma_resize_mutex); + return 0; + } + + /* 1. collect/isolate all streams for the following check */ + for (i = 0; i < z_erofs_lzma_avail_strms; ++i) { + struct z_erofs_lzma *last; + +again: + spin_lock(&z_erofs_lzma_lock); + strm = z_erofs_lzma_head; + if (!strm) { + spin_unlock(&z_erofs_lzma_lock); + wait_event(z_erofs_lzma_wq, + READ_ONCE(z_erofs_lzma_head)); + goto again; + } + z_erofs_lzma_head = NULL; + spin_unlock(&z_erofs_lzma_lock); + + for (last = strm; last->next; last = last->next) + ++i; + last->next = head; + head = strm; + } + + err = 0; + /* 2. walk each isolated stream and grow max dict_size if needed */ + for (strm = head; strm; strm = strm->next) { + if (strm->state) + xz_dec_microlzma_end(strm->state); + strm->state = xz_dec_microlzma_alloc(XZ_PREALLOC, dict_size); + if (!strm->state) + err = -ENOMEM; + } + + /* 3. push back all to the global list and update max dict_size */ + spin_lock(&z_erofs_lzma_lock); + DBG_BUGON(z_erofs_lzma_head); + z_erofs_lzma_head = head; + spin_unlock(&z_erofs_lzma_lock); + + z_erofs_lzma_max_dictsize = dict_size; + mutex_unlock(&lzma_resize_mutex); + return err; +} + +int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq, + struct page **pagepool) +{ + const unsigned int nrpages_out = + PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT; + const unsigned int nrpages_in = + PAGE_ALIGN(rq->inputsize) >> PAGE_SHIFT; + unsigned int inputmargin, inlen, outlen, pageofs; + struct z_erofs_lzma *strm; + u8 *kin; + bool bounced = false; + int no, ni, j, err = 0; + + /* 1. get the exact LZMA compressed size */ + kin = kmap(*rq->in); + inputmargin = 0; + while (!kin[inputmargin & ~PAGE_MASK]) + if (!(++inputmargin & ~PAGE_MASK)) + break; + + if (inputmargin >= PAGE_SIZE) { + kunmap(*rq->in); + return -EFSCORRUPTED; + } + rq->inputsize -= inputmargin; + + /* 2. get an available lzma context */ +again: + spin_lock(&z_erofs_lzma_lock); + strm = z_erofs_lzma_head; + if (!strm) { + spin_unlock(&z_erofs_lzma_lock); + wait_event(z_erofs_lzma_wq, READ_ONCE(z_erofs_lzma_head)); + goto again; + } + z_erofs_lzma_head = strm->next; + spin_unlock(&z_erofs_lzma_lock); + + /* 3. multi-call decompress */ + inlen = rq->inputsize; + outlen = rq->outputsize; + xz_dec_microlzma_reset(strm->state, inlen, outlen, + !rq->partial_decoding); + pageofs = rq->pageofs_out; + strm->buf.in = kin + inputmargin; + strm->buf.in_pos = 0; + strm->buf.in_size = min_t(u32, inlen, PAGE_SIZE - inputmargin); + inlen -= strm->buf.in_size; + strm->buf.out = NULL; + strm->buf.out_pos = 0; + strm->buf.out_size = 0; + + for (ni = 0, no = -1;;) { + enum xz_ret xz_err; + + if (strm->buf.out_pos == strm->buf.out_size) { + if (strm->buf.out) { + kunmap(rq->out[no]); + strm->buf.out = NULL; + } + + if (++no >= nrpages_out || !outlen) { + erofs_err(rq->sb, "decompressed buf out of bound"); + err = -EFSCORRUPTED; + break; + } + strm->buf.out_pos = 0; + strm->buf.out_size = min_t(u32, outlen, + PAGE_SIZE - pageofs); + outlen -= strm->buf.out_size; + if (rq->out[no]) + strm->buf.out = kmap(rq->out[no]) + pageofs; + pageofs = 0; + } else if (strm->buf.in_pos == strm->buf.in_size) { + kunmap(rq->in[ni]); + + if (++ni >= nrpages_in || !inlen) { + erofs_err(rq->sb, "compressed buf out of bound"); + err = -EFSCORRUPTED; + break; + } + strm->buf.in_pos = 0; + strm->buf.in_size = min_t(u32, inlen, PAGE_SIZE); + inlen -= strm->buf.in_size; + kin = kmap(rq->in[ni]); + strm->buf.in = kin; + bounced = false; + } + + /* + * Handle overlapping: Use bounced buffer if the compressed + * data is under processing; Otherwise, Use short-lived pages + * from the on-stack pagepool where pages share with the same + * request. + */ + if (!bounced && rq->out[no] == rq->in[ni]) { + memcpy(strm->bounce, strm->buf.in, strm->buf.in_size); + strm->buf.in = strm->bounce; + bounced = true; + } + for (j = ni + 1; j < nrpages_in; ++j) { + struct page *tmppage; + + if (rq->out[no] != rq->in[j]) + continue; + + DBG_BUGON(erofs_page_is_managed(EROFS_SB(rq->sb), + rq->in[j])); + tmppage = erofs_allocpage(pagepool, + GFP_KERNEL | __GFP_NOFAIL); + set_page_private(tmppage, Z_EROFS_SHORTLIVED_PAGE); + copy_highpage(tmppage, rq->in[j]); + rq->in[j] = tmppage; + } + xz_err = xz_dec_microlzma_run(strm->state, &strm->buf); + DBG_BUGON(strm->buf.out_pos > strm->buf.out_size); + DBG_BUGON(strm->buf.in_pos > strm->buf.in_size); + + if (xz_err != XZ_OK) { + if (xz_err == XZ_STREAM_END && !outlen) + break; + erofs_err(rq->sb, "failed to decompress %d in[%u] out[%u]", + xz_err, rq->inputsize, rq->outputsize); + err = -EFSCORRUPTED; + break; + } + } + if (no < nrpages_out && strm->buf.out) + kunmap(rq->in[no]); + if (ni < nrpages_in) + kunmap(rq->in[ni]); + /* 4. push back LZMA stream context to the global list */ + spin_lock(&z_erofs_lzma_lock); + strm->next = z_erofs_lzma_head; + z_erofs_lzma_head = strm; + spin_unlock(&z_erofs_lzma_lock); + wake_up(&z_erofs_lzma_wq); + return err; +} diff --git a/fs/erofs/dir.c b/fs/erofs/dir.c index 2776bb832127d79d55c91da1b0a1a6fe4705a27e..eee9b0b31b639b17f35e63158abfa5b7cd72dcf8 100644 --- a/fs/erofs/dir.c +++ b/fs/erofs/dir.c @@ -2,7 +2,6 @@ /* * Copyright (C) 2017-2018 HUAWEI, Inc. * https://www.huawei.com/ - * Created by Gao Xiang */ #include "internal.h" @@ -139,4 +138,3 @@ const struct file_operations erofs_dir_fops = { .read = generic_read_dir, .iterate_shared = erofs_readdir, }; - diff --git a/fs/erofs/erofs_fs.h b/fs/erofs/erofs_fs.h index dc7cc79410dfd08c32017fe3c65f72b1644ba0fd..083997a034e5279d64021acb39da8507ddb0afa9 100644 --- a/fs/erofs/erofs_fs.h +++ b/fs/erofs/erofs_fs.h @@ -4,7 +4,7 @@ * * Copyright (C) 2017-2018 HUAWEI, Inc. * https://www.huawei.com/ - * Created by Gao Xiang + * Copyright (C) 2021, Alibaba Cloud */ #ifndef __EROFS_FS_H #define __EROFS_FS_H @@ -18,15 +18,39 @@ * be incompatible with this kernel version. */ #define EROFS_FEATURE_INCOMPAT_LZ4_0PADDING 0x00000001 -#define EROFS_ALL_FEATURE_INCOMPAT EROFS_FEATURE_INCOMPAT_LZ4_0PADDING +#define EROFS_FEATURE_INCOMPAT_COMPR_CFGS 0x00000002 +#define EROFS_FEATURE_INCOMPAT_BIG_PCLUSTER 0x00000002 +#define EROFS_FEATURE_INCOMPAT_CHUNKED_FILE 0x00000004 +#define EROFS_FEATURE_INCOMPAT_DEVICE_TABLE 0x00000008 +#define EROFS_FEATURE_INCOMPAT_COMPR_HEAD2 0x00000008 +#define EROFS_ALL_FEATURE_INCOMPAT \ + (EROFS_FEATURE_INCOMPAT_LZ4_0PADDING | \ + EROFS_FEATURE_INCOMPAT_COMPR_CFGS | \ + EROFS_FEATURE_INCOMPAT_BIG_PCLUSTER | \ + EROFS_FEATURE_INCOMPAT_CHUNKED_FILE | \ + EROFS_FEATURE_INCOMPAT_DEVICE_TABLE | \ + EROFS_FEATURE_INCOMPAT_COMPR_HEAD2) + +#define EROFS_SB_EXTSLOT_SIZE 16 + +struct erofs_deviceslot { + union { + u8 uuid[16]; /* used for device manager later */ + u8 userdata[64]; /* digest(sha256), etc. */ + } u; + __le32 blocks; /* total fs blocks of this device */ + __le32 mapped_blkaddr; /* map starting at mapped_blkaddr */ + u8 reserved[56]; +}; +#define EROFS_DEVT_SLOT_SIZE sizeof(struct erofs_deviceslot) -/* 128-byte erofs on-disk super block */ +/* erofs on-disk super block (currently 128 bytes) */ struct erofs_super_block { __le32 magic; /* file system magic number */ __le32 checksum; /* crc32c(super_block) */ __le32 feature_compat; __u8 blkszbits; /* support block_size == PAGE_SIZE only */ - __u8 reserved; + __u8 sb_extslots; /* superblock size = 128 + sb_extslots * 16 */ __le16 root_nid; /* nid of root directory */ __le64 inos; /* total valid ino # (== f_files - f_favail) */ @@ -39,9 +63,15 @@ struct erofs_super_block { __u8 uuid[16]; /* 128-bit uuid for volume */ __u8 volume_name[16]; /* volume name */ __le32 feature_incompat; - /* customized lz4 sliding window size instead of 64k by default */ - __le16 lz4_max_distance; - __u8 reserved2[42]; + union { + /* bitmap for available compression algorithms */ + __le16 available_compr_algs; + /* customized sliding window size instead of 64k by default */ + __le16 lz4_max_distance; + } __packed u1; + __le16 extra_devices; /* # of devices besides the primary device */ + __le16 devt_slotoff; /* startoff = devt_slotoff * devt_slotsize */ + __u8 reserved2[38]; }; /* @@ -54,13 +84,16 @@ struct erofs_super_block { * inode, [xattrs], last_inline_data, ... | ... | no-holed data * 3 - inode compression D: * inode, [xattrs], map_header, extents ... | ... - * 4~7 - reserved + * 4 - inode chunk-based E: + * inode, [xattrs], chunk indexes ... | ... + * 5~7 - reserved */ enum { EROFS_INODE_FLAT_PLAIN = 0, EROFS_INODE_FLAT_COMPRESSION_LEGACY = 1, EROFS_INODE_FLAT_INLINE = 2, EROFS_INODE_FLAT_COMPRESSION = 3, + EROFS_INODE_CHUNK_BASED = 4, EROFS_INODE_DATALAYOUT_MAX }; @@ -80,6 +113,19 @@ static inline bool erofs_inode_is_data_compressed(unsigned int datamode) #define EROFS_I_ALL \ ((1 << (EROFS_I_DATALAYOUT_BIT + EROFS_I_DATALAYOUT_BITS)) - 1) +/* indicate chunk blkbits, thus 'chunksize = blocksize << chunk blkbits' */ +#define EROFS_CHUNK_FORMAT_BLKBITS_MASK 0x001F +/* with chunk indexes or just a 4-byte blkaddr array */ +#define EROFS_CHUNK_FORMAT_INDEXES 0x0020 + +#define EROFS_CHUNK_FORMAT_ALL \ + (EROFS_CHUNK_FORMAT_BLKBITS_MASK | EROFS_CHUNK_FORMAT_INDEXES) + +struct erofs_inode_chunk_info { + __le16 format; /* chunk blkbits, etc. */ + __le16 reserved; +}; + /* 32-byte reduced form of an ondisk inode */ struct erofs_inode_compact { __le16 i_format; /* inode format hints */ @@ -97,6 +143,9 @@ struct erofs_inode_compact { /* for device files, used to indicate old/new device # */ __le32 rdev; + + /* for chunk-based files, it contains the summary info */ + struct erofs_inode_chunk_info c; } i_u; __le32 i_ino; /* only used for 32-bit stat compatibility */ __le16 i_uid; @@ -125,6 +174,9 @@ struct erofs_inode_extended { /* for device files, used to indicate old/new device # */ __le32 rdev; + + /* for chunk-based files, it contains the summary info */ + struct erofs_inode_chunk_info c; } i_u; /* only used for 32-bit stat compatibility */ @@ -194,20 +246,56 @@ static inline unsigned int erofs_xattr_entry_size(struct erofs_xattr_entry *e) e->e_name_len + le16_to_cpu(e->e_value_size)); } +/* represent a zeroed chunk (hole) */ +#define EROFS_NULL_ADDR -1 + +/* 4-byte block address array */ +#define EROFS_BLOCK_MAP_ENTRY_SIZE sizeof(__le32) + +/* 8-byte inode chunk indexes */ +struct erofs_inode_chunk_index { + __le16 advise; /* always 0, don't care for now */ + __le16 device_id; /* back-end storage id (with bits masked) */ + __le32 blkaddr; /* start block address of this inode chunk */ +}; + +/* maximum supported size of a physical compression cluster */ +#define Z_EROFS_PCLUSTER_MAX_SIZE (1024 * 1024) + /* available compression algorithm types (for h_algorithmtype) */ enum { - Z_EROFS_COMPRESSION_LZ4 = 0, + Z_EROFS_COMPRESSION_LZ4 = 0, + Z_EROFS_COMPRESSION_LZMA = 1, Z_EROFS_COMPRESSION_MAX }; +#define Z_EROFS_ALL_COMPR_ALGS ((1 << Z_EROFS_COMPRESSION_MAX) - 1) + +/* 14 bytes (+ length field = 16 bytes) */ +struct z_erofs_lz4_cfgs { + __le16 max_distance; + __le16 max_pclusterblks; + u8 reserved[10]; +} __packed; + +/* 14 bytes (+ length field = 16 bytes) */ +struct z_erofs_lzma_cfgs { + __le32 dict_size; + __le16 format; + u8 reserved[8]; +} __packed; + +#define Z_EROFS_LZMA_MAX_DICT_SIZE (8 * Z_EROFS_PCLUSTER_MAX_SIZE) /* * bit 0 : COMPACTED_2B indexes (0 - off; 1 - on) * e.g. for 4k logical cluster size, 4B if compacted 2B is off; * (4B) + 2B + (4B) if compacted 2B is on. + * bit 1 : HEAD1 big pcluster (0 - off; 1 - on) + * bit 2 : HEAD2 big pcluster (0 - off; 1 - on) */ -#define Z_EROFS_ADVISE_COMPACTED_2B_BIT 0 - -#define Z_EROFS_ADVISE_COMPACTED_2B (1 << Z_EROFS_ADVISE_COMPACTED_2B_BIT) +#define Z_EROFS_ADVISE_COMPACTED_2B 0x0001 +#define Z_EROFS_ADVISE_BIG_PCLUSTER_1 0x0002 +#define Z_EROFS_ADVISE_BIG_PCLUSTER_2 0x0004 struct z_erofs_map_header { __le32 h_reserved1; @@ -219,9 +307,7 @@ struct z_erofs_map_header { __u8 h_algorithmtype; /* * bit 0-2 : logical cluster bits - 12, e.g. 0 for 4096; - * bit 3-4 : (physical - logical) cluster bits of head 1: - * For example, if logical clustersize = 4096, 1 for 8192. - * bit 5-7 : (physical - logical) cluster bits of head 2. + * bit 3-7 : reserved. */ __u8 h_clusterbits; }; @@ -229,41 +315,47 @@ struct z_erofs_map_header { #define Z_EROFS_VLE_LEGACY_HEADER_PADDING 8 /* - * Fixed-sized output compression ondisk Logical Extent cluster type: - * 0 - literal (uncompressed) cluster - * 1 - compressed cluster (for the head logical cluster) - * 2 - compressed cluster (for the other logical clusters) + * Fixed-sized output compression on-disk logical cluster type: + * 0 - literal (uncompressed) lcluster + * 1,3 - compressed lcluster (for HEAD lclusters) + * 2 - compressed lcluster (for NONHEAD lclusters) * * In detail, - * 0 - literal (uncompressed) cluster, + * 0 - literal (uncompressed) lcluster, * di_advise = 0 - * di_clusterofs = the literal data offset of the cluster - * di_blkaddr = the blkaddr of the literal cluster + * di_clusterofs = the literal data offset of the lcluster + * di_blkaddr = the blkaddr of the literal pcluster * - * 1 - compressed cluster (for the head logical cluster) - * di_advise = 1 - * di_clusterofs = the decompressed data offset of the cluster - * di_blkaddr = the blkaddr of the compressed cluster + * 1,3 - compressed lcluster (for HEAD lclusters) + * di_advise = 1 or 3 + * di_clusterofs = the decompressed data offset of the lcluster + * di_blkaddr = the blkaddr of the compressed pcluster * - * 2 - compressed cluster (for the other logical clusters) + * 2 - compressed lcluster (for NONHEAD lclusters) * di_advise = 2 * di_clusterofs = - * the decompressed data offset in its own head cluster - * di_u.delta[0] = distance to its corresponding head cluster - * di_u.delta[1] = distance to its corresponding tail cluster - * (di_advise could be 0, 1 or 2) + * the decompressed data offset in its own HEAD lcluster + * di_u.delta[0] = distance to this HEAD lcluster + * di_u.delta[1] = distance to the next HEAD lcluster */ enum { Z_EROFS_VLE_CLUSTER_TYPE_PLAIN = 0, - Z_EROFS_VLE_CLUSTER_TYPE_HEAD = 1, + Z_EROFS_VLE_CLUSTER_TYPE_HEAD1 = 1, Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD = 2, - Z_EROFS_VLE_CLUSTER_TYPE_RESERVED = 3, + Z_EROFS_VLE_CLUSTER_TYPE_HEAD2 = 3, Z_EROFS_VLE_CLUSTER_TYPE_MAX }; #define Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS 2 #define Z_EROFS_VLE_DI_CLUSTER_TYPE_BIT 0 +/* + * D0_CBLKCNT will be marked _only_ at the 1st non-head lcluster to store the + * compressed block count of a compressed extent (in logical clusters, aka. + * block count of a pcluster). + */ +#define Z_EROFS_VLE_DI_D0_CBLKCNT (1 << 11) + struct z_erofs_vle_decompressed_index { __le16 di_advise; /* where to decompress in the head cluster */ @@ -310,13 +402,18 @@ static inline void erofs_check_ondisk_layout_definitions(void) BUILD_BUG_ON(sizeof(struct erofs_inode_extended) != 64); BUILD_BUG_ON(sizeof(struct erofs_xattr_ibody_header) != 12); BUILD_BUG_ON(sizeof(struct erofs_xattr_entry) != 4); + BUILD_BUG_ON(sizeof(struct erofs_inode_chunk_info) != 4); + BUILD_BUG_ON(sizeof(struct erofs_inode_chunk_index) != 8); BUILD_BUG_ON(sizeof(struct z_erofs_map_header) != 8); BUILD_BUG_ON(sizeof(struct z_erofs_vle_decompressed_index) != 8); BUILD_BUG_ON(sizeof(struct erofs_dirent) != 12); + /* keep in sync between 2 index structures for better extendibility */ + BUILD_BUG_ON(sizeof(struct erofs_inode_chunk_index) != + sizeof(struct z_erofs_vle_decompressed_index)); + BUILD_BUG_ON(sizeof(struct erofs_deviceslot) != 128); BUILD_BUG_ON(BIT(Z_EROFS_VLE_DI_CLUSTER_TYPE_BITS) < Z_EROFS_VLE_CLUSTER_TYPE_MAX - 1); } #endif - diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c index 0a94a52a119fbcb89f2d8a1e8662ee362dc2a0a9..53a7d3cab2aed3c4816597ea45f96216ee4d8195 100644 --- a/fs/erofs/inode.c +++ b/fs/erofs/inode.c @@ -2,7 +2,7 @@ /* * Copyright (C) 2017-2018 HUAWEI, Inc. * https://www.huawei.com/ - * Created by Gao Xiang + * Copyright (C) 2021, Alibaba Cloud */ #include "xattr.h" @@ -123,8 +123,11 @@ static struct page *erofs_read_inode(struct inode *inode, /* total blocks for compressed files */ if (erofs_inode_is_data_compressed(vi->datalayout)) nblks = le32_to_cpu(die->i_u.compressed_blocks); - + else if (vi->datalayout == EROFS_INODE_CHUNK_BASED) + /* fill chunked inode summary info */ + vi->chunkformat = le16_to_cpu(die->i_u.c.format); kfree(copied); + copied = NULL; break; case EROFS_INODE_LAYOUT_COMPACT: vi->inode_isize = sizeof(struct erofs_inode_compact); @@ -161,6 +164,8 @@ static struct page *erofs_read_inode(struct inode *inode, inode->i_size = le32_to_cpu(dic->i_size); if (erofs_inode_is_data_compressed(vi->datalayout)) nblks = le32_to_cpu(dic->i_u.compressed_blocks); + else if (vi->datalayout == EROFS_INODE_CHUNK_BASED) + vi->chunkformat = le16_to_cpu(dic->i_u.c.format); break; default: erofs_err(inode->i_sb, @@ -170,11 +175,26 @@ static struct page *erofs_read_inode(struct inode *inode, goto err_out; } + if (vi->datalayout == EROFS_INODE_CHUNK_BASED) { + if (!(vi->chunkformat & EROFS_CHUNK_FORMAT_ALL)) { + erofs_err(inode->i_sb, + "unsupported chunk format %x of nid %llu", + vi->chunkformat, vi->nid); + err = -EOPNOTSUPP; + goto err_out; + } + vi->chunkbits = LOG_BLOCK_SIZE + + (vi->chunkformat & EROFS_CHUNK_FORMAT_BLKBITS_MASK); + } inode->i_mtime.tv_sec = inode->i_ctime.tv_sec; inode->i_atime.tv_sec = inode->i_ctime.tv_sec; inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec; inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec; + inode->i_flags &= ~S_DAX; + if (test_opt(&sbi->opt, DAX_ALWAYS) && S_ISREG(inode->i_mode) && + vi->datalayout == EROFS_INODE_FLAT_PLAIN) + inode->i_flags |= S_DAX; if (!nblks) /* measure inode.i_blocks as generic filesystems */ inode->i_blocks = roundup(inode->i_size, EROFS_BLKSIZ) >> 9; @@ -248,7 +268,10 @@ static int erofs_fill_inode(struct inode *inode, int isdir) switch (inode->i_mode & S_IFMT) { case S_IFREG: inode->i_op = &erofs_generic_iops; - inode->i_fop = &generic_ro_fops; + if (erofs_inode_is_data_compressed(vi->datalayout)) + inode->i_fop = &generic_ro_fops; + else + inode->i_fop = &erofs_file_fops; break; case S_IFDIR: inode->i_op = &erofs_dir_iops; @@ -358,6 +381,7 @@ const struct inode_operations erofs_generic_iops = { .getattr = erofs_getattr, .listxattr = erofs_listxattr, .get_acl = erofs_get_acl, + .fiemap = erofs_fiemap, }; const struct inode_operations erofs_symlink_iops = { @@ -373,4 +397,3 @@ const struct inode_operations erofs_fast_symlink_iops = { .listxattr = erofs_listxattr, .get_acl = erofs_get_acl, }; - diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h index 4758b3ceeee41ab046d47df4376f866acbbc0ef9..d75a45c6913468ff4d1cd179bca33ab583674d5b 100644 --- a/fs/erofs/internal.h +++ b/fs/erofs/internal.h @@ -2,7 +2,7 @@ /* * Copyright (C) 2017-2018 HUAWEI, Inc. * https://www.huawei.com/ - * Created by Gao Xiang + * Copyright (C) 2021, Alibaba Cloud */ #ifndef __EROFS_INTERNAL_H #define __EROFS_INTERNAL_H @@ -16,6 +16,7 @@ #include #include #include +#include #include "erofs_fs.h" /* redefine pr_fmt "erofs: " */ @@ -46,7 +47,16 @@ typedef u64 erofs_off_t; /* data type for filesystem-wide blocks number */ typedef u32 erofs_blk_t; -struct erofs_fs_context { +struct erofs_device_info { + char *path; + struct block_device *bdev; + struct dax_device *dax_dev; + + u32 blocks; + u32 mapped_blkaddr; +}; + +struct erofs_mount_opts { #ifdef CONFIG_EROFS_FS_ZIP /* current strategy of how to use managed cache */ unsigned char cache_strategy; @@ -59,13 +69,28 @@ struct erofs_fs_context { unsigned int mount_opt; }; +struct erofs_dev_context { + struct idr tree; + struct rw_semaphore rwsem; + + unsigned int extra_devices; +}; + +struct erofs_fs_context { + struct erofs_mount_opts opt; + struct erofs_dev_context *devs; +}; + /* all filesystem-wide lz4 configurations */ struct erofs_sb_lz4_info { /* # of pages needed for EROFS lz4 rolling decompression */ u16 max_distance_pages; + /* maximum possible blocks for pclusters in the filesystem */ + u16 max_pclusterblks; }; struct erofs_sb_info { + struct erofs_mount_opts opt; /* options */ #ifdef CONFIG_EROFS_FS_ZIP /* list for all registered superblocks, mainly for shrinker */ struct list_head list; @@ -75,21 +100,28 @@ struct erofs_sb_info { struct xarray managed_pslots; unsigned int shrinker_run_no; + u16 available_compr_algs; /* pseudo inode to manage cached pages */ struct inode *managed_cache; struct erofs_sb_lz4_info lz4; #endif /* CONFIG_EROFS_FS_ZIP */ - u32 blocks; + struct erofs_dev_context *devs; + struct dax_device *dax_dev; + u64 total_blocks; + u32 primarydevice_blocks; + u32 meta_blkaddr; #ifdef CONFIG_EROFS_FS_XATTR u32 xattr_blkaddr; #endif + u16 device_id_mask; /* valid bits of device id to be used */ /* inode slot unit size in bit shift */ unsigned char islotbits; + u32 sb_size; /* total superblock size */ u32 build_time_nsec; u64 build_time; @@ -103,7 +135,9 @@ struct erofs_sb_info { u32 feature_compat; u32 feature_incompat; - struct erofs_fs_context ctx; /* options */ + /* sysfs support */ + struct kobject s_kobj; /* /sys/fs/erofs/ */ + struct completion s_kobj_unregister; }; #define EROFS_SB(sb) ((struct erofs_sb_info *)(sb)->s_fs_info) @@ -112,10 +146,12 @@ struct erofs_sb_info { /* Mount flags set via mount options or defaults */ #define EROFS_MOUNT_XATTR_USER 0x00000010 #define EROFS_MOUNT_POSIX_ACL 0x00000020 +#define EROFS_MOUNT_DAX_ALWAYS 0x00000040 +#define EROFS_MOUNT_DAX_NEVER 0x00000080 -#define clear_opt(ctx, option) ((ctx)->mount_opt &= ~EROFS_MOUNT_##option) -#define set_opt(ctx, option) ((ctx)->mount_opt |= EROFS_MOUNT_##option) -#define test_opt(ctx, option) ((ctx)->mount_opt & EROFS_MOUNT_##option) +#define clear_opt(opt, option) ((opt)->mount_opt &= ~EROFS_MOUNT_##option) +#define set_opt(opt, option) ((opt)->mount_opt |= EROFS_MOUNT_##option) +#define test_opt(opt, option) ((opt)->mount_opt & EROFS_MOUNT_##option) enum { EROFS_ZIP_CACHE_DISABLED, @@ -192,12 +228,6 @@ static inline int erofs_wait_on_workgroup_freezed(struct erofs_workgroup *grp) return v; } #endif /* !CONFIG_SMP */ - -/* hard limit of pages per compressed cluster */ -#define Z_EROFS_CLUSTER_MAX_PAGES (CONFIG_EROFS_FS_CLUSTER_PAGE_LIMIT) -#define EROFS_PCPUBUF_NR_PAGES Z_EROFS_CLUSTER_MAX_PAGES -#else -#define EROFS_PCPUBUF_NR_PAGES 0 #endif /* !CONFIG_EROFS_FS_ZIP */ /* we strictly follow PAGE_SIZE and no buffer head yet */ @@ -226,6 +256,20 @@ static inline erofs_off_t iloc(struct erofs_sb_info *sbi, erofs_nid_t nid) return blknr_to_addr(sbi->meta_blkaddr) + (nid << sbi->islotbits); } +#define EROFS_FEATURE_FUNCS(name, compat, feature) \ +static inline bool erofs_sb_has_##name(struct erofs_sb_info *sbi) \ +{ \ + return sbi->feature_##compat & EROFS_FEATURE_##feature; \ +} + +EROFS_FEATURE_FUNCS(lz4_0padding, incompat, INCOMPAT_LZ4_0PADDING) +EROFS_FEATURE_FUNCS(compr_cfgs, incompat, INCOMPAT_COMPR_CFGS) +EROFS_FEATURE_FUNCS(big_pcluster, incompat, INCOMPAT_BIG_PCLUSTER) +EROFS_FEATURE_FUNCS(chunked_file, incompat, INCOMPAT_CHUNKED_FILE) +EROFS_FEATURE_FUNCS(device_table, incompat, INCOMPAT_DEVICE_TABLE) +EROFS_FEATURE_FUNCS(compr_head2, incompat, INCOMPAT_COMPR_HEAD2) +EROFS_FEATURE_FUNCS(sb_chksum, compat, COMPAT_SB_CHKSUM) + /* atomic flag definitions */ #define EROFS_I_EA_INITED_BIT 0 #define EROFS_I_Z_INITED_BIT 1 @@ -249,12 +293,15 @@ struct erofs_inode { union { erofs_blk_t raw_blkaddr; + struct { + unsigned short chunkformat; + unsigned char chunkbits; + }; #ifdef CONFIG_EROFS_FS_ZIP struct { unsigned short z_advise; unsigned char z_algorithmtype[2]; unsigned char z_logical_clusterbits; - unsigned char z_physical_clusterbits[2]; }; #endif /* CONFIG_EROFS_FS_ZIP */ }; @@ -297,7 +344,7 @@ extern const struct address_space_operations erofs_raw_access_aops; extern const struct address_space_operations z_erofs_aops; /* - * Logical to physical block mapping, used by erofs_map_blocks() + * Logical to physical block mapping * * Different with other file systems, it is used for 2 access modes: * @@ -322,7 +369,7 @@ extern const struct address_space_operations z_erofs_aops; * of the corresponding uncompressed data in the file. */ enum { - BH_Zipped = BH_PrivateStart, + BH_Encoded = BH_PrivateStart, BH_FullMapped, }; @@ -330,8 +377,8 @@ enum { #define EROFS_MAP_MAPPED (1 << BH_Mapped) /* Located in metadata (could be copied from bd_inode) */ #define EROFS_MAP_META (1 << BH_Meta) -/* The extent has been compressed */ -#define EROFS_MAP_ZIPPED (1 << BH_Zipped) +/* The extent is encoded */ +#define EROFS_MAP_ENCODED (1 << BH_Encoded) /* The length of extent is full */ #define EROFS_MAP_FULL_MAPPED (1 << BH_FullMapped) @@ -339,15 +386,31 @@ struct erofs_map_blocks { erofs_off_t m_pa, m_la; u64 m_plen, m_llen; + unsigned short m_deviceid; + char m_algorithmformat; unsigned int m_flags; struct page *mpage; }; -/* Flags used by erofs_map_blocks() */ +/* Flags used by erofs_map_blocks_flatmode() */ #define EROFS_GET_BLOCKS_RAW 0x0001 +/* + * Used to get the exact decompressed length, e.g. fiemap (consider lookback + * approach instead if possible since it's more metadata lightweight.) + */ +#define EROFS_GET_BLOCKS_FIEMAP 0x0002 +/* Used to map the whole extent if non-negligible data is requested for LZMA */ +#define EROFS_GET_BLOCKS_READMORE 0x0004 + +enum { + Z_EROFS_COMPRESSION_SHIFTED = Z_EROFS_COMPRESSION_MAX, + Z_EROFS_COMPRESSION_RUNTIME_MAX +}; /* zmap.c */ +extern const struct iomap_ops z_erofs_iomap_report_ops; + #ifdef CONFIG_EROFS_FS_ZIP int z_erofs_fill_inode(struct inode *inode); int z_erofs_map_blocks_iter(struct inode *inode, @@ -363,10 +426,20 @@ static inline int z_erofs_map_blocks_iter(struct inode *inode, } #endif /* !CONFIG_EROFS_FS_ZIP */ +struct erofs_map_dev { + struct block_device *m_bdev; + struct dax_device *m_daxdev; + + erofs_off_t m_pa; + unsigned int m_deviceid; +}; + /* data.c */ +extern const struct file_operations erofs_file_fops; struct page *erofs_get_meta_page(struct super_block *sb, erofs_blk_t blkaddr); - -int erofs_map_blocks(struct inode *, struct erofs_map_blocks *, int); +int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *dev); +int erofs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, + u64 start, u64 len); /* inode.c */ static inline unsigned long erofs_inode_hash(erofs_nid_t nid) @@ -395,23 +468,43 @@ int erofs_namei(struct inode *dir, struct qstr *name, /* dir.c */ extern const struct file_operations erofs_dir_fops; -/* utils.c / zdata.c */ -struct page *erofs_allocpage(struct list_head *pool, gfp_t gfp); - -#if (EROFS_PCPUBUF_NR_PAGES > 0) -void *erofs_get_pcpubuf(unsigned int pagenr); -#define erofs_put_pcpubuf(buf) do { \ - (void)&(buf); \ - preempt_enable(); \ -} while (0) -#else -static inline void *erofs_get_pcpubuf(unsigned int pagenr) +static inline void *erofs_vm_map_ram(struct page **pages, unsigned int count) { - return ERR_PTR(-EOPNOTSUPP); + int retried = 0; + + while (1) { + void *p = vm_map_ram(pages, count, -1); + + /* retry two more times (totally 3 times) */ + if (p || ++retried >= 3) + return p; + vm_unmap_aliases(); + } + return NULL; } -#define erofs_put_pcpubuf(buf) do {} while (0) -#endif +/* pcpubuf.c */ +void *erofs_get_pcpubuf(unsigned int requiredpages); +void erofs_put_pcpubuf(void *ptr); +int erofs_pcpubuf_growsize(unsigned int nrpages); +void erofs_pcpubuf_init(void); +void erofs_pcpubuf_exit(void); + +/* sysfs.c */ +int erofs_register_sysfs(struct super_block *sb); +void erofs_unregister_sysfs(struct super_block *sb); +int __init erofs_init_sysfs(void); +void erofs_exit_sysfs(void); + +/* utils.c / zdata.c */ +struct page *erofs_allocpage(struct page **pagepool, gfp_t gfp); +static inline void erofs_pagepool_add(struct page **pagepool, + struct page *page) +{ + set_page_private(page, (unsigned long)*pagepool); + *pagepool = page; +} +void erofs_release_pages(struct page **pagepool); #ifdef CONFIG_EROFS_FS_ZIP int erofs_workgroup_put(struct erofs_workgroup *grp); @@ -428,10 +521,10 @@ int __init z_erofs_init_zip_subsystem(void); void z_erofs_exit_zip_subsystem(void); int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi, struct erofs_workgroup *egrp); -int erofs_try_to_free_cached_page(struct address_space *mapping, - struct page *page); +int erofs_try_to_free_cached_page(struct page *page); int z_erofs_load_lz4_config(struct super_block *sb, - struct erofs_super_block *dsb); + struct erofs_super_block *dsb, + struct z_erofs_lz4_cfgs *lz4, int len); #else static inline void erofs_shrinker_register(struct super_block *sb) {} static inline void erofs_shrinker_unregister(struct super_block *sb) {} @@ -440,9 +533,10 @@ static inline void erofs_exit_shrinker(void) {} static inline int z_erofs_init_zip_subsystem(void) { return 0; } static inline void z_erofs_exit_zip_subsystem(void) {} static inline int z_erofs_load_lz4_config(struct super_block *sb, - struct erofs_super_block *dsb) + struct erofs_super_block *dsb, + struct z_erofs_lz4_cfgs *lz4, int len) { - if (dsb->lz4_max_distance) { + if (lz4 || dsb->u1.lz4_max_distance) { erofs_err(sb, "lz4 algorithm isn't enabled"); return -EINVAL; } @@ -450,7 +544,26 @@ static inline int z_erofs_load_lz4_config(struct super_block *sb, } #endif /* !CONFIG_EROFS_FS_ZIP */ +#ifdef CONFIG_EROFS_FS_ZIP_LZMA +int z_erofs_lzma_init(void); +void z_erofs_lzma_exit(void); +int z_erofs_load_lzma_config(struct super_block *sb, + struct erofs_super_block *dsb, + struct z_erofs_lzma_cfgs *lzma, int size); +#else +static inline int z_erofs_lzma_init(void) { return 0; } +static inline int z_erofs_lzma_exit(void) { return 0; } +static inline int z_erofs_load_lzma_config(struct super_block *sb, + struct erofs_super_block *dsb, + struct z_erofs_lzma_cfgs *lzma, int size) { + if (lzma) { + erofs_err(sb, "lzma algorithm isn't enabled"); + return -EINVAL; + } + return 0; +} +#endif /* !CONFIG_EROFS_FS_ZIP */ + #define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ #endif /* __EROFS_INTERNAL_H */ - diff --git a/fs/erofs/namei.c b/fs/erofs/namei.c index 5f8cc7346c69415409e554c34d22f25f2879737f..8629e616028cbc79f98b18d672d17d58738af55b 100644 --- a/fs/erofs/namei.c +++ b/fs/erofs/namei.c @@ -2,7 +2,6 @@ /* * Copyright (C) 2017-2018 HUAWEI, Inc. * https://www.huawei.com/ - * Created by Gao Xiang */ #include "xattr.h" @@ -234,8 +233,8 @@ static struct dentry *erofs_lookup(struct inode *dir, } else if (err) { inode = ERR_PTR(err); } else { - erofs_dbg("%s, %s (nid %llu) found, d_type %u", __func__, - dentry->d_name.name, nid, d_type); + erofs_dbg("%s, %pd (nid %llu) found, d_type %u", __func__, + dentry, nid, d_type); inode = erofs_iget(dir->i_sb, nid, d_type == FT_DIR); } return d_splice_alias(inode, dentry); @@ -246,5 +245,5 @@ const struct inode_operations erofs_dir_iops = { .getattr = erofs_getattr, .listxattr = erofs_listxattr, .get_acl = erofs_get_acl, + .fiemap = erofs_fiemap, }; - diff --git a/fs/erofs/pcpubuf.c b/fs/erofs/pcpubuf.c new file mode 100644 index 0000000000000000000000000000000000000000..a2efd833d1b6c32791c3ed380d5792361160fd05 --- /dev/null +++ b/fs/erofs/pcpubuf.c @@ -0,0 +1,148 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) Gao Xiang + * + * For low-latency decompression algorithms (e.g. lz4), reserve consecutive + * per-CPU virtual memory (in pages) in advance to store such inplace I/O + * data if inplace decompression is failed (due to unmet inplace margin for + * example). + */ +#include "internal.h" + +struct erofs_pcpubuf { + raw_spinlock_t lock; + void *ptr; + struct page **pages; + unsigned int nrpages; +}; + +static DEFINE_PER_CPU(struct erofs_pcpubuf, erofs_pcb); + +void *erofs_get_pcpubuf(unsigned int requiredpages) + __acquires(pcb->lock) +{ + struct erofs_pcpubuf *pcb = &get_cpu_var(erofs_pcb); + + raw_spin_lock(&pcb->lock); + /* check if the per-CPU buffer is too small */ + if (requiredpages > pcb->nrpages) { + raw_spin_unlock(&pcb->lock); + put_cpu_var(erofs_pcb); + /* (for sparse checker) pretend pcb->lock is still taken */ + __acquire(pcb->lock); + return NULL; + } + return pcb->ptr; +} + +void erofs_put_pcpubuf(void *ptr) __releases(pcb->lock) +{ + struct erofs_pcpubuf *pcb = &per_cpu(erofs_pcb, smp_processor_id()); + + DBG_BUGON(pcb->ptr != ptr); + raw_spin_unlock(&pcb->lock); + put_cpu_var(erofs_pcb); +} + +/* the next step: support per-CPU page buffers hotplug */ +int erofs_pcpubuf_growsize(unsigned int nrpages) +{ + static DEFINE_MUTEX(pcb_resize_mutex); + static unsigned int pcb_nrpages; + struct page *pagepool = NULL; + int delta, cpu, ret, i; + + mutex_lock(&pcb_resize_mutex); + delta = nrpages - pcb_nrpages; + ret = 0; + /* avoid shrinking pcpubuf, since no idea how many fses rely on */ + if (delta <= 0) + goto out; + + for_each_possible_cpu(cpu) { + struct erofs_pcpubuf *pcb = &per_cpu(erofs_pcb, cpu); + struct page **pages, **oldpages; + void *ptr, *old_ptr; + + pages = kmalloc_array(nrpages, sizeof(*pages), GFP_KERNEL); + if (!pages) { + ret = -ENOMEM; + break; + } + + for (i = 0; i < nrpages; ++i) { + pages[i] = erofs_allocpage(&pagepool, GFP_KERNEL); + if (!pages[i]) { + ret = -ENOMEM; + oldpages = pages; + goto free_pagearray; + } + } + ptr = vmap(pages, nrpages, VM_MAP, PAGE_KERNEL); + if (!ptr) { + ret = -ENOMEM; + oldpages = pages; + goto free_pagearray; + } + raw_spin_lock(&pcb->lock); + old_ptr = pcb->ptr; + pcb->ptr = ptr; + oldpages = pcb->pages; + pcb->pages = pages; + i = pcb->nrpages; + pcb->nrpages = nrpages; + raw_spin_unlock(&pcb->lock); + + if (!oldpages) { + DBG_BUGON(old_ptr); + continue; + } + + if (old_ptr) + vunmap(old_ptr); +free_pagearray: + while (i) + erofs_pagepool_add(&pagepool, oldpages[--i]); + kfree(oldpages); + if (ret) + break; + } + pcb_nrpages = nrpages; + erofs_release_pages(&pagepool); +out: + mutex_unlock(&pcb_resize_mutex); + return ret; +} + +void erofs_pcpubuf_init(void) +{ + int cpu; + + for_each_possible_cpu(cpu) { + struct erofs_pcpubuf *pcb = &per_cpu(erofs_pcb, cpu); + + raw_spin_lock_init(&pcb->lock); + } +} + +void erofs_pcpubuf_exit(void) +{ + int cpu, i; + + for_each_possible_cpu(cpu) { + struct erofs_pcpubuf *pcb = &per_cpu(erofs_pcb, cpu); + + if (pcb->ptr) { + vunmap(pcb->ptr); + pcb->ptr = NULL; + } + if (!pcb->pages) + continue; + + for (i = 0; i < pcb->nrpages; ++i) + if (pcb->pages[i]) + put_page(pcb->pages[i]); + kfree(pcb->pages); + pcb->pages = NULL; + } +} diff --git a/fs/erofs/super.c b/fs/erofs/super.c index b7ac8510feba0eb088696b6717c1acab69a07684..46c80eab75274cfcdfe2705c80a654cea8cf110a 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -2,7 +2,6 @@ /* * Copyright (C) 2017-2018 HUAWEI, Inc. * https://www.huawei.com/ - * Created by Gao Xiang */ #include #include @@ -12,6 +11,7 @@ #include #include #include +#include #include "xattr.h" #define CREATE_TRACE_POINTS @@ -122,6 +122,212 @@ static bool check_layout_compatibility(struct super_block *sb, return true; } +#ifdef CONFIG_EROFS_FS_ZIP +/* read variable-sized metadata, offset will be aligned by 4-byte */ +static void *erofs_read_metadata(struct super_block *sb, struct page **pagep, + erofs_off_t *offset, int *lengthp) +{ + struct page *page = *pagep; + u8 *buffer, *ptr; + int len, i, cnt; + erofs_blk_t blk; + + *offset = round_up(*offset, 4); + blk = erofs_blknr(*offset); + + if (!page || page->index != blk) { + if (page) { + unlock_page(page); + put_page(page); + } + page = erofs_get_meta_page(sb, blk); + if (IS_ERR(page)) + goto err_nullpage; + } + + ptr = kmap(page); + len = le16_to_cpu(*(__le16 *)&ptr[erofs_blkoff(*offset)]); + if (!len) + len = U16_MAX + 1; + buffer = kmalloc(len, GFP_KERNEL); + if (!buffer) { + buffer = ERR_PTR(-ENOMEM); + goto out; + } + *offset += sizeof(__le16); + *lengthp = len; + + for (i = 0; i < len; i += cnt) { + cnt = min(EROFS_BLKSIZ - (int)erofs_blkoff(*offset), len - i); + blk = erofs_blknr(*offset); + + if (!page || page->index != blk) { + if (page) { + kunmap(page); + unlock_page(page); + put_page(page); + } + page = erofs_get_meta_page(sb, blk); + if (IS_ERR(page)) { + kfree(buffer); + goto err_nullpage; + } + ptr = kmap(page); + } + memcpy(buffer + i, ptr + erofs_blkoff(*offset), cnt); + *offset += cnt; + } +out: + kunmap(page); + *pagep = page; + return buffer; +err_nullpage: + *pagep = NULL; + return page; +} + +static int erofs_load_compr_cfgs(struct super_block *sb, + struct erofs_super_block *dsb) +{ + struct erofs_sb_info *sbi; + struct page *page; + unsigned int algs, alg; + erofs_off_t offset; + int size, ret; + + sbi = EROFS_SB(sb); + sbi->available_compr_algs = le16_to_cpu(dsb->u1.available_compr_algs); + + if (sbi->available_compr_algs & ~Z_EROFS_ALL_COMPR_ALGS) { + erofs_err(sb, "try to load compressed fs with unsupported algorithms %x", + sbi->available_compr_algs & ~Z_EROFS_ALL_COMPR_ALGS); + return -EINVAL; + } + + offset = EROFS_SUPER_OFFSET + sbi->sb_size; + page = NULL; + alg = 0; + ret = 0; + + for (algs = sbi->available_compr_algs; algs; algs >>= 1, ++alg) { + void *data; + + if (!(algs & 1)) + continue; + + data = erofs_read_metadata(sb, &page, &offset, &size); + if (IS_ERR(data)) { + ret = PTR_ERR(data); + goto err; + } + + switch (alg) { + case Z_EROFS_COMPRESSION_LZ4: + ret = z_erofs_load_lz4_config(sb, dsb, data, size); + break; + case Z_EROFS_COMPRESSION_LZMA: + ret = z_erofs_load_lzma_config(sb, dsb, data, size); + break; + default: + DBG_BUGON(1); + ret = -EFAULT; + } + kfree(data); + if (ret) + goto err; + } +err: + if (page) { + unlock_page(page); + put_page(page); + } + return ret; +} +#else +static int erofs_load_compr_cfgs(struct super_block *sb, + struct erofs_super_block *dsb) +{ + if (dsb->u1.available_compr_algs) { + erofs_err(sb, "try to load compressed fs when compression is disabled"); + return -EINVAL; + } + return 0; +} +#endif + +static int erofs_init_devices(struct super_block *sb, + struct erofs_super_block *dsb) +{ + struct erofs_sb_info *sbi = EROFS_SB(sb); + unsigned int ondisk_extradevs; + erofs_off_t pos; + struct page *page = NULL; + struct erofs_device_info *dif; + struct erofs_deviceslot *dis; + void *ptr; + int id, err = 0; + + sbi->total_blocks = sbi->primarydevice_blocks; + if (!erofs_sb_has_device_table(sbi)) + ondisk_extradevs = 0; + else + ondisk_extradevs = le16_to_cpu(dsb->extra_devices); + + if (ondisk_extradevs != sbi->devs->extra_devices) { + erofs_err(sb, "extra devices don't match (ondisk %u, given %u)", + ondisk_extradevs, sbi->devs->extra_devices); + return -EINVAL; + } + if (!ondisk_extradevs) + return 0; + + sbi->device_id_mask = roundup_pow_of_two(ondisk_extradevs + 1) - 1; + pos = le16_to_cpu(dsb->devt_slotoff) * EROFS_DEVT_SLOT_SIZE; + down_read(&sbi->devs->rwsem); + idr_for_each_entry(&sbi->devs->tree, dif, id) { + erofs_blk_t blk = erofs_blknr(pos); + struct block_device *bdev; + + if (!page || page->index != blk) { + if (page) { + kunmap(page); + unlock_page(page); + put_page(page); + } + + page = erofs_get_meta_page(sb, blk); + if (IS_ERR(page)) { + up_read(&sbi->devs->rwsem); + return PTR_ERR(page); + } + ptr = kmap(page); + } + dis = ptr + erofs_blkoff(pos); + + bdev = blkdev_get_by_path(dif->path, + FMODE_READ | FMODE_EXCL, + sb->s_type); + if (IS_ERR(bdev)) { + err = PTR_ERR(bdev); + goto err_out; + } + dif->bdev = bdev; + dif->dax_dev = fs_dax_get_by_bdev(bdev); + dif->blocks = le32_to_cpu(dis->blocks); + dif->mapped_blkaddr = le32_to_cpu(dis->mapped_blkaddr); + sbi->total_blocks += dif->blocks; + pos += EROFS_DEVT_SLOT_SIZE; + } +err_out: + up_read(&sbi->devs->rwsem); + if (page) { + kunmap(page); + unlock_page(page); + put_page(page); + } + return err; +} + static int erofs_read_superblock(struct super_block *sb) { struct erofs_sb_info *sbi; @@ -149,7 +355,7 @@ static int erofs_read_superblock(struct super_block *sb) } sbi->feature_compat = le32_to_cpu(dsb->feature_compat); - if (sbi->feature_compat & EROFS_FEATURE_COMPAT_SB_CHKSUM) { + if (erofs_sb_has_sb_chksum(sbi)) { ret = erofs_superblock_csum_verify(sb, data); if (ret) goto out; @@ -167,7 +373,13 @@ static int erofs_read_superblock(struct super_block *sb) if (!check_layout_compatibility(sb, dsb)) goto out; - sbi->blocks = le32_to_cpu(dsb->blocks); + sbi->sb_size = 128 + dsb->sb_extslots * EROFS_SB_EXTSLOT_SIZE; + if (sbi->sb_size > EROFS_BLKSIZ) { + erofs_err(sb, "invalid sb_extslots %u (more than a fs block)", + sbi->sb_size); + goto out; + } + sbi->primarydevice_blocks = le32_to_cpu(dsb->blocks); sbi->meta_blkaddr = le32_to_cpu(dsb->meta_blkaddr); #ifdef CONFIG_EROFS_FS_XATTR sbi->xattr_blkaddr = le32_to_cpu(dsb->xattr_blkaddr); @@ -190,7 +402,15 @@ static int erofs_read_superblock(struct super_block *sb) } /* parse on-disk compression configurations */ - ret = z_erofs_load_lz4_config(sb, dsb); + if (erofs_sb_has_compr_cfgs(sbi)) + ret = erofs_load_compr_cfgs(sb, dsb); + else + ret = z_erofs_load_lz4_config(sb, dsb, NULL, 0); + if (ret < 0) + goto out; + + /* handle multiple devices */ + ret = erofs_init_devices(sb, dsb); out: kunmap(page); put_page(page); @@ -201,15 +421,15 @@ out: static void erofs_default_options(struct erofs_fs_context *ctx) { #ifdef CONFIG_EROFS_FS_ZIP - ctx->cache_strategy = EROFS_ZIP_CACHE_READAROUND; - ctx->max_sync_decompress_pages = 3; - ctx->readahead_sync_decompress = false; + ctx->opt.cache_strategy = EROFS_ZIP_CACHE_READAROUND; + ctx->opt.max_sync_decompress_pages = 3; + ctx->opt.readahead_sync_decompress = false; #endif #ifdef CONFIG_EROFS_FS_XATTR - set_opt(ctx, XATTR_USER); + set_opt(&ctx->opt, XATTR_USER); #endif #ifdef CONFIG_EROFS_FS_POSIX_ACL - set_opt(ctx, POSIX_ACL); + set_opt(&ctx->opt, POSIX_ACL); #endif } @@ -217,6 +437,9 @@ enum { Opt_user_xattr, Opt_acl, Opt_cache_strategy, + Opt_dax, + Opt_dax_enum, + Opt_device, Opt_err }; @@ -227,20 +450,55 @@ static const struct constant_table erofs_param_cache_strategy[] = { {} }; +static const struct constant_table erofs_dax_param_enums[] = { + {"always", EROFS_MOUNT_DAX_ALWAYS}, + {"never", EROFS_MOUNT_DAX_NEVER}, + {} +}; + static const struct fs_parameter_spec erofs_fs_parameters[] = { fsparam_flag_no("user_xattr", Opt_user_xattr), fsparam_flag_no("acl", Opt_acl), fsparam_enum("cache_strategy", Opt_cache_strategy, erofs_param_cache_strategy), + fsparam_flag("dax", Opt_dax), + fsparam_enum("dax", Opt_dax_enum, erofs_dax_param_enums), + fsparam_string("device", Opt_device), {} }; +static bool erofs_fc_set_dax_mode(struct fs_context *fc, unsigned int mode) +{ +#ifdef CONFIG_FS_DAX + struct erofs_fs_context *ctx = fc->fs_private; + + switch (mode) { + case EROFS_MOUNT_DAX_ALWAYS: + warnfc(fc, "DAX enabled. Warning: EXPERIMENTAL, use at your own risk"); + set_opt(&ctx->opt, DAX_ALWAYS); + clear_opt(&ctx->opt, DAX_NEVER); + return true; + case EROFS_MOUNT_DAX_NEVER: + set_opt(&ctx->opt, DAX_NEVER); + clear_opt(&ctx->opt, DAX_ALWAYS); + return true; + default: + DBG_BUGON(1); + return false; + } +#else + errorfc(fc, "dax options not supported"); + return false; +#endif +} + static int erofs_fc_parse_param(struct fs_context *fc, struct fs_parameter *param) { - struct erofs_fs_context *ctx __maybe_unused = fc->fs_private; + struct erofs_fs_context *ctx = fc->fs_private; struct fs_parse_result result; - int opt; + struct erofs_device_info *dif; + int opt, ret; opt = fs_parse(fc, erofs_fs_parameters, param, &result); if (opt < 0) @@ -250,9 +508,9 @@ static int erofs_fc_parse_param(struct fs_context *fc, case Opt_user_xattr: #ifdef CONFIG_EROFS_FS_XATTR if (result.boolean) - set_opt(ctx, XATTR_USER); + set_opt(&ctx->opt, XATTR_USER); else - clear_opt(ctx, XATTR_USER); + clear_opt(&ctx->opt, XATTR_USER); #else errorfc(fc, "{,no}user_xattr options not supported"); #endif @@ -260,20 +518,47 @@ static int erofs_fc_parse_param(struct fs_context *fc, case Opt_acl: #ifdef CONFIG_EROFS_FS_POSIX_ACL if (result.boolean) - set_opt(ctx, POSIX_ACL); + set_opt(&ctx->opt, POSIX_ACL); else - clear_opt(ctx, POSIX_ACL); + clear_opt(&ctx->opt, POSIX_ACL); #else errorfc(fc, "{,no}acl options not supported"); #endif break; case Opt_cache_strategy: #ifdef CONFIG_EROFS_FS_ZIP - ctx->cache_strategy = result.uint_32; + ctx->opt.cache_strategy = result.uint_32; #else errorfc(fc, "compression not supported, cache_strategy ignored"); #endif break; + case Opt_dax: + if (!erofs_fc_set_dax_mode(fc, EROFS_MOUNT_DAX_ALWAYS)) + return -EINVAL; + break; + case Opt_dax_enum: + if (!erofs_fc_set_dax_mode(fc, result.uint_32)) + return -EINVAL; + break; + case Opt_device: + dif = kzalloc(sizeof(*dif), GFP_KERNEL); + if (!dif) + return -ENOMEM; + dif->path = kstrdup(param->string, GFP_KERNEL); + if (!dif->path) { + kfree(dif); + return -ENOMEM; + } + down_write(&ctx->devs->rwsem); + ret = idr_alloc(&ctx->devs->tree, dif, 0, 0, GFP_KERNEL); + up_write(&ctx->devs->rwsem); + if (ret < 0) { + kfree(dif->path); + kfree(dif); + return ret; + } + ++ctx->devs->extra_devices; + break; default: return -ENOPARAM; } @@ -292,7 +577,7 @@ static int erofs_managed_cache_releasepage(struct page *page, gfp_t gfp_mask) DBG_BUGON(mapping->a_ops != &managed_cache_aops); if (PagePrivate(page)) - ret = erofs_try_to_free_cached_page(mapping, page); + ret = erofs_try_to_free_cached_page(page); return ret; } @@ -358,10 +643,20 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc) return -ENOMEM; sb->s_fs_info = sbi; + sbi->opt = ctx->opt; + sbi->dax_dev = fs_dax_get_by_bdev(sb->s_bdev); + sbi->devs = ctx->devs; + ctx->devs = NULL; + err = erofs_read_superblock(sb); if (err) return err; + if (test_opt(&sbi->opt, DAX_ALWAYS) && + !bdev_dax_supported(sb->s_bdev, EROFS_BLKSIZ)) { + errorfc(fc, "DAX unsupported by block device. Turning off DAX."); + clear_opt(&sbi->opt, DAX_ALWAYS); + } sb->s_flags |= SB_RDONLY | SB_NOATIME; sb->s_maxbytes = MAX_LFS_FILESIZE; sb->s_time_gran = 1; @@ -369,13 +664,11 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc) sb->s_op = &erofs_sops; sb->s_xattr = erofs_xattr_handlers; - if (test_opt(ctx, POSIX_ACL)) + if (test_opt(&sbi->opt, POSIX_ACL)) sb->s_flags |= SB_POSIXACL; else sb->s_flags &= ~SB_POSIXACL; - sbi->ctx = *ctx; - #ifdef CONFIG_EROFS_FS_ZIP xa_init(&sbi->managed_pslots); #endif @@ -402,6 +695,10 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc) if (err) return err; + err = erofs_register_sysfs(sb); + if (err) + return err; + erofs_info(sb, "mounted with root inode @ nid %llu.", ROOT_NID(sbi)); return 0; } @@ -419,20 +716,44 @@ static int erofs_fc_reconfigure(struct fs_context *fc) DBG_BUGON(!sb_rdonly(sb)); - if (test_opt(ctx, POSIX_ACL)) + if (test_opt(&ctx->opt, POSIX_ACL)) fc->sb_flags |= SB_POSIXACL; else fc->sb_flags &= ~SB_POSIXACL; - sbi->ctx = *ctx; + sbi->opt = ctx->opt; fc->sb_flags |= SB_RDONLY; return 0; } +static int erofs_release_device_info(int id, void *ptr, void *data) +{ + struct erofs_device_info *dif = ptr; + + fs_put_dax(dif->dax_dev); + if (dif->bdev) + blkdev_put(dif->bdev, FMODE_READ | FMODE_EXCL); + kfree(dif->path); + kfree(dif); + return 0; +} + +static void erofs_free_dev_context(struct erofs_dev_context *devs) +{ + if (!devs) + return; + idr_for_each(&devs->tree, &erofs_release_device_info, NULL); + idr_destroy(&devs->tree); + kfree(devs); +} + static void erofs_fc_free(struct fs_context *fc) { - kfree(fc->fs_private); + struct erofs_fs_context *ctx = fc->fs_private; + + erofs_free_dev_context(ctx->devs); + kfree(ctx); } static const struct fs_context_operations erofs_context_ops = { @@ -444,15 +765,21 @@ static const struct fs_context_operations erofs_context_ops = { static int erofs_init_fs_context(struct fs_context *fc) { - fc->fs_private = kzalloc(sizeof(struct erofs_fs_context), GFP_KERNEL); - if (!fc->fs_private) - return -ENOMEM; + struct erofs_fs_context *ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); - /* set default mount options */ - erofs_default_options(fc->fs_private); + if (!ctx) + return -ENOMEM; + ctx->devs = kzalloc(sizeof(struct erofs_dev_context), GFP_KERNEL); + if (!ctx->devs) { + kfree(ctx); + return -ENOMEM; + } + fc->fs_private = ctx; + idr_init(&ctx->devs->tree); + init_rwsem(&ctx->devs->rwsem); + erofs_default_options(ctx); fc->ops = &erofs_context_ops; - return 0; } @@ -471,6 +798,9 @@ static void erofs_kill_sb(struct super_block *sb) sbi = EROFS_SB(sb); if (!sbi) return; + + erofs_free_dev_context(sbi->devs); + fs_put_dax(sbi->dax_dev); kfree(sbi); sb->s_fs_info = NULL; } @@ -482,6 +812,7 @@ static void erofs_put_super(struct super_block *sb) DBG_BUGON(!sbi); + erofs_unregister_sysfs(sb); erofs_shrinker_unregister(sb); #ifdef CONFIG_EROFS_FS_ZIP iput(sbi->managed_cache); @@ -517,10 +848,19 @@ static int __init erofs_module_init(void) if (err) goto shrinker_err; + err = z_erofs_lzma_init(); + if (err) + goto lzma_err; + + erofs_pcpubuf_init(); err = z_erofs_init_zip_subsystem(); if (err) goto zip_err; + err = erofs_init_sysfs(); + if (err) + goto sysfs_err; + err = register_filesystem(&erofs_fs_type); if (err) goto fs_err; @@ -528,8 +868,12 @@ static int __init erofs_module_init(void) return 0; fs_err: + erofs_exit_sysfs(); +sysfs_err: z_erofs_exit_zip_subsystem(); zip_err: + z_erofs_lzma_exit(); +lzma_err: erofs_exit_shrinker(); shrinker_err: kmem_cache_destroy(erofs_inode_cachep); @@ -540,12 +884,16 @@ icache_err: static void __exit erofs_module_exit(void) { unregister_filesystem(&erofs_fs_type); - z_erofs_exit_zip_subsystem(); - erofs_exit_shrinker(); - /* Ensure all RCU free inodes are safe before cache is destroyed. */ + /* Ensure all RCU free inodes / pclusters are safe to be destroyed. */ rcu_barrier(); + + erofs_exit_sysfs(); + z_erofs_exit_zip_subsystem(); + z_erofs_lzma_exit(); + erofs_exit_shrinker(); kmem_cache_destroy(erofs_inode_cachep); + erofs_pcpubuf_exit(); } /* get filesystem statistics */ @@ -557,7 +905,7 @@ static int erofs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_type = sb->s_magic; buf->f_bsize = EROFS_BLKSIZ; - buf->f_blocks = sbi->blocks; + buf->f_blocks = sbi->total_blocks; buf->f_bfree = buf->f_bavail = 0; buf->f_files = ULLONG_MAX; @@ -571,29 +919,33 @@ static int erofs_statfs(struct dentry *dentry, struct kstatfs *buf) static int erofs_show_options(struct seq_file *seq, struct dentry *root) { - struct erofs_sb_info *sbi __maybe_unused = EROFS_SB(root->d_sb); - struct erofs_fs_context *ctx __maybe_unused = &sbi->ctx; + struct erofs_sb_info *sbi = EROFS_SB(root->d_sb); + struct erofs_mount_opts *opt = &sbi->opt; #ifdef CONFIG_EROFS_FS_XATTR - if (test_opt(ctx, XATTR_USER)) + if (test_opt(opt, XATTR_USER)) seq_puts(seq, ",user_xattr"); else seq_puts(seq, ",nouser_xattr"); #endif #ifdef CONFIG_EROFS_FS_POSIX_ACL - if (test_opt(ctx, POSIX_ACL)) + if (test_opt(opt, POSIX_ACL)) seq_puts(seq, ",acl"); else seq_puts(seq, ",noacl"); #endif #ifdef CONFIG_EROFS_FS_ZIP - if (ctx->cache_strategy == EROFS_ZIP_CACHE_DISABLED) + if (opt->cache_strategy == EROFS_ZIP_CACHE_DISABLED) seq_puts(seq, ",cache_strategy=disabled"); - else if (ctx->cache_strategy == EROFS_ZIP_CACHE_READAHEAD) + else if (opt->cache_strategy == EROFS_ZIP_CACHE_READAHEAD) seq_puts(seq, ",cache_strategy=readahead"); - else if (ctx->cache_strategy == EROFS_ZIP_CACHE_READAROUND) + else if (opt->cache_strategy == EROFS_ZIP_CACHE_READAROUND) seq_puts(seq, ",cache_strategy=readaround"); #endif + if (test_opt(opt, DAX_ALWAYS)) + seq_puts(seq, ",dax=always"); + if (test_opt(opt, DAX_NEVER)) + seq_puts(seq, ",dax=never"); return 0; } @@ -611,4 +963,4 @@ module_exit(erofs_module_exit); MODULE_DESCRIPTION("Enhanced ROM File System"); MODULE_AUTHOR("Gao Xiang, Chao Yu, Miao Xie, CONSUMER BG, HUAWEI Inc."); MODULE_LICENSE("GPL"); - +MODULE_IMPORT_NS(ANDROID_GKI_VFS_EXPORT_ONLY); diff --git a/fs/erofs/sysfs.c b/fs/erofs/sysfs.c new file mode 100644 index 0000000000000000000000000000000000000000..33e15fa63c82d91bb7a1c0cc87d2ea1cb0094d2e --- /dev/null +++ b/fs/erofs/sysfs.c @@ -0,0 +1,239 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C), 2008-2021, OPPO Mobile Comm Corp., Ltd. + * https://www.oppo.com/ + */ +#include +#include + +#include "internal.h" + +enum { + attr_feature, + attr_pointer_ui, + attr_pointer_bool, +}; + +enum { + struct_erofs_sb_info, +}; + +struct erofs_attr { + struct attribute attr; + short attr_id; + int struct_type, offset; +}; + +#define EROFS_ATTR(_name, _mode, _id) \ +static struct erofs_attr erofs_attr_##_name = { \ + .attr = {.name = __stringify(_name), .mode = _mode }, \ + .attr_id = attr_##_id, \ +} +#define EROFS_ATTR_FUNC(_name, _mode) EROFS_ATTR(_name, _mode, _name) +#define EROFS_ATTR_FEATURE(_name) EROFS_ATTR(_name, 0444, feature) + +#define EROFS_ATTR_OFFSET(_name, _mode, _id, _struct) \ +static struct erofs_attr erofs_attr_##_name = { \ + .attr = {.name = __stringify(_name), .mode = _mode }, \ + .attr_id = attr_##_id, \ + .struct_type = struct_##_struct, \ + .offset = offsetof(struct _struct, _name),\ +} + +#define EROFS_ATTR_RW(_name, _id, _struct) \ + EROFS_ATTR_OFFSET(_name, 0644, _id, _struct) + +#define EROFS_RO_ATTR(_name, _id, _struct) \ + EROFS_ATTR_OFFSET(_name, 0444, _id, _struct) + +#define EROFS_ATTR_RW_UI(_name, _struct) \ + EROFS_ATTR_RW(_name, pointer_ui, _struct) + +#define EROFS_ATTR_RW_BOOL(_name, _struct) \ + EROFS_ATTR_RW(_name, pointer_bool, _struct) + +#define ATTR_LIST(name) (&erofs_attr_##name.attr) + +static struct attribute *erofs_attrs[] = { + NULL, +}; +ATTRIBUTE_GROUPS(erofs); + +/* Features this copy of erofs supports */ +EROFS_ATTR_FEATURE(zero_padding); +EROFS_ATTR_FEATURE(compr_cfgs); +EROFS_ATTR_FEATURE(big_pcluster); +EROFS_ATTR_FEATURE(chunked_file); +EROFS_ATTR_FEATURE(device_table); +EROFS_ATTR_FEATURE(compr_head2); +EROFS_ATTR_FEATURE(sb_chksum); + +static struct attribute *erofs_feat_attrs[] = { + ATTR_LIST(zero_padding), + ATTR_LIST(compr_cfgs), + ATTR_LIST(big_pcluster), + ATTR_LIST(chunked_file), + ATTR_LIST(device_table), + ATTR_LIST(compr_head2), + ATTR_LIST(sb_chksum), + NULL, +}; +ATTRIBUTE_GROUPS(erofs_feat); + +static unsigned char *__struct_ptr(struct erofs_sb_info *sbi, + int struct_type, int offset) +{ + if (struct_type == struct_erofs_sb_info) + return (unsigned char *)sbi + offset; + return NULL; +} + +static ssize_t erofs_attr_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct erofs_sb_info *sbi = container_of(kobj, struct erofs_sb_info, + s_kobj); + struct erofs_attr *a = container_of(attr, struct erofs_attr, attr); + unsigned char *ptr = __struct_ptr(sbi, a->struct_type, a->offset); + + switch (a->attr_id) { + case attr_feature: + return sysfs_emit(buf, "supported\n"); + case attr_pointer_ui: + if (!ptr) + return 0; + return sysfs_emit(buf, "%u\n", *(unsigned int *)ptr); + case attr_pointer_bool: + if (!ptr) + return 0; + return sysfs_emit(buf, "%d\n", *(bool *)ptr); + } + return 0; +} + +static ssize_t erofs_attr_store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t len) +{ + struct erofs_sb_info *sbi = container_of(kobj, struct erofs_sb_info, + s_kobj); + struct erofs_attr *a = container_of(attr, struct erofs_attr, attr); + unsigned char *ptr = __struct_ptr(sbi, a->struct_type, a->offset); + unsigned long t; + int ret; + + switch (a->attr_id) { + case attr_pointer_ui: + if (!ptr) + return 0; + ret = kstrtoul(skip_spaces(buf), 0, &t); + if (ret) + return ret; + if (t != (unsigned int)t) + return -ERANGE; + *(unsigned int *)ptr = t; + return len; + case attr_pointer_bool: + if (!ptr) + return 0; + ret = kstrtoul(skip_spaces(buf), 0, &t); + if (ret) + return ret; + if (t != 0 && t != 1) + return -EINVAL; + *(bool *)ptr = !!t; + return len; + } + return 0; +} + +static void erofs_sb_release(struct kobject *kobj) +{ + struct erofs_sb_info *sbi = container_of(kobj, struct erofs_sb_info, + s_kobj); + complete(&sbi->s_kobj_unregister); +} + +static const struct sysfs_ops erofs_attr_ops = { + .show = erofs_attr_show, + .store = erofs_attr_store, +}; + +static struct kobj_type erofs_sb_ktype = { + .default_groups = erofs_groups, + .sysfs_ops = &erofs_attr_ops, + .release = erofs_sb_release, +}; + +static struct kobj_type erofs_ktype = { + .sysfs_ops = &erofs_attr_ops, +}; + +static struct kset erofs_root = { + .kobj = {.ktype = &erofs_ktype}, +}; + +static struct kobj_type erofs_feat_ktype = { + .default_groups = erofs_feat_groups, + .sysfs_ops = &erofs_attr_ops, +}; + +static struct kobject erofs_feat = { + .kset = &erofs_root, +}; + +int erofs_register_sysfs(struct super_block *sb) +{ + struct erofs_sb_info *sbi = EROFS_SB(sb); + int err; + + sbi->s_kobj.kset = &erofs_root; + init_completion(&sbi->s_kobj_unregister); + err = kobject_init_and_add(&sbi->s_kobj, &erofs_sb_ktype, NULL, + "%s", sb->s_id); + if (err) + goto put_sb_kobj; + return 0; + +put_sb_kobj: + kobject_put(&sbi->s_kobj); + wait_for_completion(&sbi->s_kobj_unregister); + return err; +} + +void erofs_unregister_sysfs(struct super_block *sb) +{ + struct erofs_sb_info *sbi = EROFS_SB(sb); + + kobject_del(&sbi->s_kobj); + kobject_put(&sbi->s_kobj); + wait_for_completion(&sbi->s_kobj_unregister); +} + +int __init erofs_init_sysfs(void) +{ + int ret; + + kobject_set_name(&erofs_root.kobj, "erofs"); + erofs_root.kobj.parent = fs_kobj; + ret = kset_register(&erofs_root); + if (ret) + goto root_err; + + ret = kobject_init_and_add(&erofs_feat, &erofs_feat_ktype, + NULL, "features"); + if (ret) + goto feat_err; + return ret; + +feat_err: + kobject_put(&erofs_feat); + kset_unregister(&erofs_root); +root_err: + return ret; +} + +void erofs_exit_sysfs(void) +{ + kobject_put(&erofs_feat); + kset_unregister(&erofs_root); +} diff --git a/fs/erofs/tagptr.h b/fs/erofs/tagptr.h index a72897c86744c057240a2ee6fc6461c5cbe19924..64ceb7270b5c1845ef2a85d18ccdc56755710277 100644 --- a/fs/erofs/tagptr.h +++ b/fs/erofs/tagptr.h @@ -1,8 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * A tagged pointer implementation - * - * Copyright (C) 2018 Gao Xiang */ #ifndef __EROFS_FS_TAGPTR_H #define __EROFS_FS_TAGPTR_H @@ -107,4 +105,3 @@ tagptr_init(o, cmpxchg(&ptptr->v, o.v, n.v)); }) *ptptr; }) #endif /* __EROFS_FS_TAGPTR_H */ - diff --git a/fs/erofs/utils.c b/fs/erofs/utils.c index de9986d2f82fd5f46b16c90e14fbb4024569e9fa..ec9a1d780dc143c0bd8ab3d6b42b8532f022fe48 100644 --- a/fs/erofs/utils.c +++ b/fs/erofs/utils.c @@ -2,36 +2,32 @@ /* * Copyright (C) 2018 HUAWEI, Inc. * https://www.huawei.com/ - * Created by Gao Xiang */ #include "internal.h" #include -struct page *erofs_allocpage(struct list_head *pool, gfp_t gfp) +struct page *erofs_allocpage(struct page **pagepool, gfp_t gfp) { - struct page *page; + struct page *page = *pagepool; - if (!list_empty(pool)) { - page = lru_to_page(pool); + if (page) { DBG_BUGON(page_ref_count(page) != 1); - list_del(&page->lru); + *pagepool = (struct page *)page_private(page); } else { page = alloc_page(gfp); } return page; } -#if (EROFS_PCPUBUF_NR_PAGES > 0) -static struct { - u8 data[PAGE_SIZE * EROFS_PCPUBUF_NR_PAGES]; -} ____cacheline_aligned_in_smp erofs_pcpubuf[NR_CPUS]; - -void *erofs_get_pcpubuf(unsigned int pagenr) +void erofs_release_pages(struct page **pagepool) { - preempt_disable(); - return &erofs_pcpubuf[smp_processor_id()].data[pagenr * PAGE_SIZE]; + while (*pagepool) { + struct page *page = *pagepool; + + *pagepool = (struct page *)page_private(page); + put_page(page); + } } -#endif #ifdef CONFIG_EROFS_FS_ZIP /* global shrink count (for all mounted EROFS instances) */ @@ -154,7 +150,7 @@ static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi, * however in order to avoid some race conditions, add a * DBG_BUGON to observe this in advance. */ - DBG_BUGON(xa_erase(&sbi->managed_pslots, grp->index) != grp); + DBG_BUGON(__xa_erase(&sbi->managed_pslots, grp->index) != grp); /* last refcount should be connected with its managed pslot. */ erofs_workgroup_unfreeze(grp, 0); @@ -169,15 +165,19 @@ static unsigned long erofs_shrink_workstation(struct erofs_sb_info *sbi, unsigned int freed = 0; unsigned long index; + xa_lock(&sbi->managed_pslots); xa_for_each(&sbi->managed_pslots, index, grp) { /* try to shrink each valid workgroup */ if (!erofs_try_to_release_workgroup(sbi, grp)) continue; + xa_unlock(&sbi->managed_pslots); ++freed; if (!--nr_shrink) - break; + return freed; + xa_lock(&sbi->managed_pslots); } + xa_unlock(&sbi->managed_pslots); return freed; } @@ -290,4 +290,3 @@ void erofs_exit_shrinker(void) unregister_shrinker(&erofs_shrinker_info); } #endif /* !CONFIG_EROFS_FS_ZIP */ - diff --git a/fs/erofs/xattr.c b/fs/erofs/xattr.c index 6330bca9d91dae87a41c2ea0b59259bc168b5824..f89e6666a5a96ce4abeb90c315a3849253bdc8ad 100644 --- a/fs/erofs/xattr.c +++ b/fs/erofs/xattr.c @@ -2,7 +2,6 @@ /* * Copyright (C) 2017-2018 HUAWEI, Inc. * https://www.huawei.com/ - * Created by Gao Xiang */ #include #include "xattr.h" @@ -430,7 +429,7 @@ static int shared_getxattr(struct inode *inode, struct getxattr_iter *it) static bool erofs_xattr_user_list(struct dentry *dentry) { - return test_opt(&EROFS_SB(dentry->d_sb)->ctx, XATTR_USER); + return test_opt(&EROFS_SB(dentry->d_sb)->opt, XATTR_USER); } static bool erofs_xattr_trusted_list(struct dentry *dentry) @@ -478,7 +477,7 @@ static int erofs_xattr_generic_get(const struct xattr_handler *handler, switch (handler->flags) { case EROFS_XATTR_INDEX_USER: - if (!test_opt(&sbi->ctx, XATTR_USER)) + if (!test_opt(&sbi->opt, XATTR_USER)) return -EOPNOTSUPP; break; case EROFS_XATTR_INDEX_TRUSTED: @@ -710,4 +709,3 @@ struct posix_acl *erofs_get_acl(struct inode *inode, int type) return acl; } #endif - diff --git a/fs/erofs/xattr.h b/fs/erofs/xattr.h index 815304bd335f53286e4b1a826e7373538b270324..366dcb400525f01297236a11644ce797628e033b 100644 --- a/fs/erofs/xattr.h +++ b/fs/erofs/xattr.h @@ -2,7 +2,6 @@ /* * Copyright (C) 2017-2018 HUAWEI, Inc. * https://www.huawei.com/ - * Created by Gao Xiang */ #ifndef __EROFS_XATTR_H #define __EROFS_XATTR_H diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index 59a969d7ed1f0527cd06f1363a528f21a785122c..03a3dafc82f3e54c62b23daac8790f0151aceedd 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -2,7 +2,6 @@ /* * Copyright (C) 2018 HUAWEI, Inc. * https://www.huawei.com/ - * Created by Gao Xiang */ #include "zdata.h" #include "compress.h" @@ -11,15 +10,95 @@ #include /* - * a compressed_pages[] placeholder in order to avoid - * being filled with file pages for in-place decompression. + * since pclustersize is variable for big pcluster feature, introduce slab + * pools implementation for different pcluster sizes. */ -#define PAGE_UNALLOCATED ((void *)0x5F0E4B1D) +struct z_erofs_pcluster_slab { + struct kmem_cache *slab; + unsigned int maxpages; + char name[48]; +}; + +#define _PCLP(n) { .maxpages = n } + +static struct z_erofs_pcluster_slab pcluster_pool[] __read_mostly = { + _PCLP(1), _PCLP(4), _PCLP(16), _PCLP(64), _PCLP(128), + _PCLP(Z_EROFS_PCLUSTER_MAX_PAGES) +}; + +static void z_erofs_destroy_pcluster_pool(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(pcluster_pool); ++i) { + if (!pcluster_pool[i].slab) + continue; + kmem_cache_destroy(pcluster_pool[i].slab); + pcluster_pool[i].slab = NULL; + } +} + +static int z_erofs_create_pcluster_pool(void) +{ + struct z_erofs_pcluster_slab *pcs; + struct z_erofs_pcluster *a; + unsigned int size; + + for (pcs = pcluster_pool; + pcs < pcluster_pool + ARRAY_SIZE(pcluster_pool); ++pcs) { + size = struct_size(a, compressed_pages, pcs->maxpages); + + sprintf(pcs->name, "erofs_pcluster-%u", pcs->maxpages); + pcs->slab = kmem_cache_create(pcs->name, size, 0, + SLAB_RECLAIM_ACCOUNT, NULL); + if (pcs->slab) + continue; + + z_erofs_destroy_pcluster_pool(); + return -ENOMEM; + } + return 0; +} + +static struct z_erofs_pcluster *z_erofs_alloc_pcluster(unsigned int nrpages) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(pcluster_pool); ++i) { + struct z_erofs_pcluster_slab *pcs = pcluster_pool + i; + struct z_erofs_pcluster *pcl; + + if (nrpages > pcs->maxpages) + continue; + + pcl = kmem_cache_zalloc(pcs->slab, GFP_NOFS); + if (!pcl) + return ERR_PTR(-ENOMEM); + pcl->pclusterpages = nrpages; + return pcl; + } + return ERR_PTR(-EINVAL); +} + +static void z_erofs_free_pcluster(struct z_erofs_pcluster *pcl) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(pcluster_pool); ++i) { + struct z_erofs_pcluster_slab *pcs = pcluster_pool + i; + + if (pcl->pclusterpages > pcs->maxpages) + continue; + + kmem_cache_free(pcs->slab, pcl); + return; + } + DBG_BUGON(1); +} /* how to allocate cached pages for a pcluster */ enum z_erofs_cache_alloctype { DONTALLOC, /* don't allocate any cached pages */ - DELAYEDALLOC, /* delayed allocation (at the time of submitting io) */ /* * try to use cached I/O if page allocation succeeds or fallback * to in-place I/O instead to avoid any direct reclaim. @@ -37,12 +116,11 @@ typedef tagptr1_t compressed_page_t; tagptr_fold(compressed_page_t, page, 1) static struct workqueue_struct *z_erofs_workqueue __read_mostly; -static struct kmem_cache *pcluster_cachep __read_mostly; void z_erofs_exit_zip_subsystem(void) { destroy_workqueue(z_erofs_workqueue); - kmem_cache_destroy(pcluster_cachep); + z_erofs_destroy_pcluster_pool(); } static inline int z_erofs_init_workqueue(void) @@ -59,32 +137,16 @@ static inline int z_erofs_init_workqueue(void) return z_erofs_workqueue ? 0 : -ENOMEM; } -static void z_erofs_pcluster_init_once(void *ptr) -{ - struct z_erofs_pcluster *pcl = ptr; - struct z_erofs_collection *cl = z_erofs_primarycollection(pcl); - unsigned int i; - - mutex_init(&cl->lock); - cl->nr_pages = 0; - cl->vcnt = 0; - for (i = 0; i < Z_EROFS_CLUSTER_MAX_PAGES; ++i) - pcl->compressed_pages[i] = NULL; -} - int __init z_erofs_init_zip_subsystem(void) { - pcluster_cachep = kmem_cache_create("erofs_compress", - Z_EROFS_WORKGROUP_SIZE, 0, - SLAB_RECLAIM_ACCOUNT, - z_erofs_pcluster_init_once); - if (pcluster_cachep) { - if (!z_erofs_init_workqueue()) - return 0; - - kmem_cache_destroy(pcluster_cachep); - } - return -ENOMEM; + int err = z_erofs_create_pcluster_pool(); + + if (err) + return err; + err = z_erofs_init_workqueue(); + if (err) + z_erofs_destroy_pcluster_pool(); + return err; } enum z_erofs_collectmode { @@ -104,6 +166,12 @@ enum z_erofs_collectmode { * |_______PRIMARY_FOLLOWED_______|________PRIMARY_HOOKED___________| */ COLLECT_PRIMARY_HOOKED, + /* + * a weak form of COLLECT_PRIMARY_FOLLOWED, the difference is that it + * could be dispatched into bypass queue later due to uptodated managed + * pages. All related online pages cannot be reused for inplace I/O (or + * pagevec) since it can be directly decoded without I/O submission. + */ COLLECT_PRIMARY_FOLLOWED_NOINPLACE, /* * The current collection has been linked with the owned chain, and @@ -128,7 +196,8 @@ struct z_erofs_collector { struct z_erofs_pcluster *pcl, *tailpcl; struct z_erofs_collection *cl; - struct page **compressedpages; + /* a pointer used to pick up inplace I/O pages */ + struct page **icpage_ptr; z_erofs_next_pcluster_t owned_head; enum z_erofs_collectmode mode; @@ -160,20 +229,21 @@ static DEFINE_MUTEX(z_pagemap_global_lock); static void preload_compressed_pages(struct z_erofs_collector *clt, struct address_space *mc, enum z_erofs_cache_alloctype type, - struct list_head *pagepool) + struct page **pagepool) { - const struct z_erofs_pcluster *pcl = clt->pcl; - const unsigned int clusterpages = BIT(pcl->clusterbits); - struct page **pages = clt->compressedpages; - pgoff_t index = pcl->obj.index + (pages - pcl->compressed_pages); + struct z_erofs_pcluster *pcl = clt->pcl; bool standalone = true; gfp_t gfp = (mapping_gfp_mask(mc) & ~__GFP_DIRECT_RECLAIM) | __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN; + struct page **pages; + pgoff_t index; if (clt->mode < COLLECT_PRIMARY_FOLLOWED) return; - for (; pages < pcl->compressed_pages + clusterpages; ++pages) { + pages = pcl->compressed_pages; + index = pcl->obj.index; + for (; index < pcl->obj.index + pcl->pclusterpages; ++index, ++pages) { struct page *page; compressed_page_t t; struct page *newpage = NULL; @@ -186,35 +256,37 @@ static void preload_compressed_pages(struct z_erofs_collector *clt, if (page) { t = tag_compressed_page_justfound(page); - } else if (type == DELAYEDALLOC) { - t = tagptr_init(compressed_page_t, PAGE_UNALLOCATED); - } else if (type == TRYALLOC) { - newpage = erofs_allocpage(pagepool, gfp); - if (!newpage) - goto dontalloc; - - set_page_private(newpage, Z_EROFS_PREALLOCATED_PAGE); - t = tag_compressed_page_justfound(newpage); - } else { /* DONTALLOC */ -dontalloc: - if (standalone) - clt->compressedpages = pages; + } else { + /* I/O is needed, no possible to decompress directly */ standalone = false; - continue; + switch (type) { + case TRYALLOC: + newpage = erofs_allocpage(pagepool, gfp); + if (!newpage) + continue; + set_page_private(newpage, + Z_EROFS_PREALLOCATED_PAGE); + t = tag_compressed_page_justfound(newpage); + break; + default: /* DONTALLOC */ + continue; + } } if (!cmpxchg_relaxed(pages, NULL, tagptr_cast_ptr(t))) continue; - if (page) { + if (page) put_page(page); - } else if (newpage) { - set_page_private(newpage, 0); - list_add(&newpage->lru, pagepool); - } + else if (newpage) + erofs_pagepool_add(pagepool, newpage); } - if (standalone) /* downgrade to PRIMARY_FOLLOWED_NOINPLACE */ + /* + * don't do inplace I/O if all compressed pages are available in + * managed cache since it can be moved to the bypass queue instead. + */ + if (standalone) clt->mode = COLLECT_PRIMARY_FOLLOWED_NOINPLACE; } @@ -224,15 +296,13 @@ int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi, { struct z_erofs_pcluster *const pcl = container_of(grp, struct z_erofs_pcluster, obj); - struct address_space *const mapping = MNGD_MAPPING(sbi); - const unsigned int clusterpages = BIT(pcl->clusterbits); int i; /* * refcount of workgroup is now freezed as 1, * therefore no need to worry about available decompression users. */ - for (i = 0; i < clusterpages; ++i) { + for (i = 0; i < pcl->pclusterpages; ++i) { struct page *page = pcl->compressed_pages[i]; if (!page) @@ -242,7 +312,7 @@ int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi, if (!trylock_page(page)) return -EBUSY; - if (page->mapping != mapping) + if (!erofs_page_is_managed(sbi, page)) continue; /* barrier is implied in the following 'unlock_page' */ @@ -253,17 +323,15 @@ int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi, return 0; } -int erofs_try_to_free_cached_page(struct address_space *mapping, - struct page *page) +int erofs_try_to_free_cached_page(struct page *page) { struct z_erofs_pcluster *const pcl = (void *)page_private(page); - const unsigned int clusterpages = BIT(pcl->clusterbits); int ret = 0; /* 0 - busy */ if (erofs_workgroup_try_to_freeze(&pcl->obj, 1)) { unsigned int i; - for (i = 0; i < clusterpages; ++i) { + for (i = 0; i < pcl->pclusterpages; ++i) { if (pcl->compressed_pages[i] == page) { WRITE_ONCE(pcl->compressed_pages[i], NULL); ret = 1; @@ -279,26 +347,23 @@ int erofs_try_to_free_cached_page(struct address_space *mapping, } /* page_type must be Z_EROFS_PAGE_TYPE_EXCLUSIVE */ -static inline bool z_erofs_try_inplace_io(struct z_erofs_collector *clt, - struct page *page) +static bool z_erofs_try_inplace_io(struct z_erofs_collector *clt, + struct page *page) { struct z_erofs_pcluster *const pcl = clt->pcl; - const unsigned int clusterpages = BIT(pcl->clusterbits); - while (clt->compressedpages < pcl->compressed_pages + clusterpages) { - if (!cmpxchg(clt->compressedpages++, NULL, page)) + while (clt->icpage_ptr > pcl->compressed_pages) + if (!cmpxchg(--clt->icpage_ptr, NULL, page)) return true; - } return false; } /* callers must be with collection lock held */ static int z_erofs_attach_page(struct z_erofs_collector *clt, - struct page *page, - enum z_erofs_page_type type) + struct page *page, enum z_erofs_page_type type, + bool pvec_safereuse) { int ret; - bool occupied; /* give priority for inplaceio */ if (clt->mode >= COLLECT_PRIMARY && @@ -306,41 +371,39 @@ static int z_erofs_attach_page(struct z_erofs_collector *clt, z_erofs_try_inplace_io(clt, page)) return 0; - ret = z_erofs_pagevec_enqueue(&clt->vector, - page, type, &occupied); + ret = z_erofs_pagevec_enqueue(&clt->vector, page, type, + pvec_safereuse); clt->cl->vcnt += (unsigned int)ret; - return ret ? 0 : -EAGAIN; } -static enum z_erofs_collectmode -try_to_claim_pcluster(struct z_erofs_pcluster *pcl, - z_erofs_next_pcluster_t *owned_head) +static void z_erofs_try_to_claim_pcluster(struct z_erofs_collector *clt) { - /* let's claim these following types of pclusters */ -retry: - if (pcl->next == Z_EROFS_PCLUSTER_NIL) { - /* type 1, nil pcluster */ - if (cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_NIL, - *owned_head) != Z_EROFS_PCLUSTER_NIL) - goto retry; + struct z_erofs_pcluster *pcl = clt->pcl; + z_erofs_next_pcluster_t *owned_head = &clt->owned_head; + /* type 1, nil pcluster (this pcluster doesn't belong to any chain.) */ + if (cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_NIL, + *owned_head) == Z_EROFS_PCLUSTER_NIL) { *owned_head = &pcl->next; - /* lucky, I am the followee :) */ - return COLLECT_PRIMARY_FOLLOWED; - } else if (pcl->next == Z_EROFS_PCLUSTER_TAIL) { - /* - * type 2, link to the end of a existing open chain, - * be careful that its submission itself is governed - * by the original owned chain. - */ - if (cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_TAIL, - *owned_head) != Z_EROFS_PCLUSTER_TAIL) - goto retry; + /* so we can attach this pcluster to our submission chain. */ + clt->mode = COLLECT_PRIMARY_FOLLOWED; + return; + } + + /* + * type 2, link to the end of an existing open chain, be careful + * that its submission is controlled by the original attached chain. + */ + if (cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_TAIL, + *owned_head) == Z_EROFS_PCLUSTER_TAIL) { *owned_head = Z_EROFS_PCLUSTER_TAIL; - return COLLECT_PRIMARY_HOOKED; + clt->mode = COLLECT_PRIMARY_HOOKED; + clt->tailpcl = NULL; + return; } - return COLLECT_PRIMARY; /* :( better luck next time */ + /* type 3, it belongs to a chain, but it isn't the end of the chain */ + clt->mode = COLLECT_PRIMARY; } static int z_erofs_lookup_collection(struct z_erofs_collector *clt, @@ -385,10 +448,8 @@ static int z_erofs_lookup_collection(struct z_erofs_collector *clt, /* used to check tail merging loop due to corrupted images */ if (clt->owned_head == Z_EROFS_PCLUSTER_TAIL) clt->tailpcl = pcl; - clt->mode = try_to_claim_pcluster(pcl, &clt->owned_head); - /* clean tailpcl if the current owned_head is Z_EROFS_PCLUSTER_TAIL */ - if (clt->owned_head == Z_EROFS_PCLUSTER_TAIL) - clt->tailpcl = NULL; + + z_erofs_try_to_claim_pcluster(clt); clt->cl = cl; return 0; } @@ -402,42 +463,35 @@ static int z_erofs_register_collection(struct z_erofs_collector *clt, struct erofs_workgroup *grp; int err; - /* no available workgroup, let's allocate one */ - pcl = kmem_cache_alloc(pcluster_cachep, GFP_NOFS); - if (!pcl) - return -ENOMEM; + if (!(map->m_flags & EROFS_MAP_ENCODED)) { + DBG_BUGON(1); + return -EFSCORRUPTED; + } + + /* no available pcluster, let's allocate one */ + pcl = z_erofs_alloc_pcluster(map->m_plen >> PAGE_SHIFT); + if (IS_ERR(pcl)) + return PTR_ERR(pcl); atomic_set(&pcl->obj.refcount, 1); pcl->obj.index = map->m_pa >> PAGE_SHIFT; - + pcl->algorithmformat = map->m_algorithmformat; pcl->length = (map->m_llen << Z_EROFS_PCLUSTER_LENGTH_BIT) | (map->m_flags & EROFS_MAP_FULL_MAPPED ? Z_EROFS_PCLUSTER_FULL_LENGTH : 0); - if (map->m_flags & EROFS_MAP_ZIPPED) - pcl->algorithmformat = Z_EROFS_COMPRESSION_LZ4; - else - pcl->algorithmformat = Z_EROFS_COMPRESSION_SHIFTED; - - pcl->clusterbits = EROFS_I(inode)->z_physical_clusterbits[0]; - pcl->clusterbits -= PAGE_SHIFT; - /* new pclusters should be claimed as type 1, primary and followed */ pcl->next = clt->owned_head; clt->mode = COLLECT_PRIMARY_FOLLOWED; cl = z_erofs_primarycollection(pcl); - - /* must be cleaned before freeing to slab */ - DBG_BUGON(cl->nr_pages); - DBG_BUGON(cl->vcnt); - cl->pageofs = map->m_la & ~PAGE_MASK; /* * lock all primary followed works before visible to others * and mutex_trylock *never* fails for a new pcluster. */ + mutex_init(&cl->lock); DBG_BUGON(!mutex_trylock(&cl->lock)); grp = erofs_insert_workgroup(inode->i_sb, &pcl->obj); @@ -461,7 +515,7 @@ static int z_erofs_register_collection(struct z_erofs_collector *clt, err_out: mutex_unlock(&cl->lock); - kmem_cache_free(pcluster_cachep, pcl); + z_erofs_free_pcluster(pcl); return err; } @@ -505,9 +559,8 @@ out: z_erofs_pagevec_ctor_init(&clt->vector, Z_EROFS_NR_INLINE_PAGEVECS, clt->cl->pagevec, clt->cl->vcnt); - clt->compressedpages = clt->pcl->compressed_pages; - if (clt->mode <= COLLECT_PRIMARY) /* cannot do in-place I/O */ - clt->compressedpages += Z_EROFS_CLUSTER_MAX_PAGES; + /* since file-backed online pages are traversed in reverse order */ + clt->icpage_ptr = clt->pcl->compressed_pages + clt->pcl->pclusterpages; return 0; } @@ -520,9 +573,8 @@ static void z_erofs_rcu_callback(struct rcu_head *head) struct z_erofs_collection *const cl = container_of(head, struct z_erofs_collection, rcu); - kmem_cache_free(pcluster_cachep, - container_of(cl, struct z_erofs_pcluster, - primary_collection)); + z_erofs_free_pcluster(container_of(cl, struct z_erofs_pcluster, + primary_collection)); } void erofs_workgroup_free_rcu(struct erofs_workgroup *grp) @@ -578,7 +630,7 @@ static bool should_alloc_managed_pages(struct z_erofs_decompress_frontend *fe, } static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe, - struct page *page, struct list_head *pagepool) + struct page *page, struct page **pagepool) { struct inode *const inode = fe->inode; struct erofs_sb_info *const sbi = EROFS_I_SB(inode); @@ -630,7 +682,7 @@ restart_now: goto err_out; /* preload all compressed pages (maybe downgrade role if necessary) */ - if (should_alloc_managed_pages(fe, sbi->ctx.cache_strategy, map->m_la)) + if (should_alloc_managed_pages(fe, sbi->opt.cache_strategy, map->m_la)) cache_strategy = TRYALLOC; else cache_strategy = DONTALLOC; @@ -664,7 +716,8 @@ hitted: tight &= (clt->mode >= COLLECT_PRIMARY_FOLLOWED); retry: - err = z_erofs_attach_page(clt, page, page_type); + err = z_erofs_attach_page(clt, page, page_type, + clt->mode >= COLLECT_PRIMARY_FOLLOWED); /* should allocate an additional short-lived page for pagevec */ if (err == -EAGAIN) { struct page *const newpage = @@ -672,7 +725,7 @@ retry: set_page_private(newpage, Z_EROFS_SHORTLIVED_PAGE); err = z_erofs_attach_page(clt, newpage, - Z_EROFS_PAGE_TYPE_EXCLUSIVE); + Z_EROFS_PAGE_TYPE_EXCLUSIVE, true); if (!err) goto retry; } @@ -731,7 +784,7 @@ static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io, /* Use workqueue and sync decompression for atomic contexts only */ if (in_atomic() || irqs_disabled()) { queue_work(z_erofs_workqueue, &io->u.work); - sbi->ctx.readahead_sync_decompress = true; + sbi->opt.readahead_sync_decompress = true; return; } z_erofs_decompressqueue_work(&io->u.work); @@ -771,12 +824,11 @@ static void z_erofs_decompressqueue_endio(struct bio *bio) static int z_erofs_decompress_pcluster(struct super_block *sb, struct z_erofs_pcluster *pcl, - struct list_head *pagepool) + struct page **pagepool) { struct erofs_sb_info *const sbi = EROFS_SB(sb); - const unsigned int clusterpages = BIT(pcl->clusterbits); struct z_erofs_pagevec_ctor ctor; - unsigned int i, outputsize, llen, nr_pages; + unsigned int i, inputsize, outputsize, llen, nr_pages; struct page *pages_onstack[Z_EROFS_VMAP_ONSTACK_PAGES]; struct page **pages, **compressed_pages, *page; @@ -856,7 +908,7 @@ static int z_erofs_decompress_pcluster(struct super_block *sb, overlapped = false; compressed_pages = pcl->compressed_pages; - for (i = 0; i < clusterpages; ++i) { + for (i = 0; i < pcl->pclusterpages; ++i) { unsigned int pagenr; page = compressed_pages[i]; @@ -909,12 +961,13 @@ static int z_erofs_decompress_pcluster(struct super_block *sb, partial = true; } + inputsize = pcl->pclusterpages * PAGE_SIZE; err = z_erofs_decompress(&(struct z_erofs_decompress_req) { .sb = sb, .in = compressed_pages, .out = pages, .pageofs_out = cl->pageofs, - .inputsize = PAGE_SIZE, + .inputsize = inputsize, .outputsize = outputsize, .alg = pcl->algorithmformat, .inplace_io = overlapped, @@ -922,8 +975,8 @@ static int z_erofs_decompress_pcluster(struct super_block *sb, }, pagepool); out: - /* must handle all compressed pages before endding pages */ - for (i = 0; i < clusterpages; ++i) { + /* must handle all compressed pages before ending pages */ + for (i = 0; i < pcl->pclusterpages; ++i) { page = compressed_pages[i]; if (erofs_page_is_managed(sbi, page)) @@ -971,7 +1024,7 @@ out: } static void z_erofs_decompress_queue(const struct z_erofs_decompressqueue *io, - struct list_head *pagepool) + struct page **pagepool) { z_erofs_next_pcluster_t owned = io->head; @@ -995,18 +1048,18 @@ static void z_erofs_decompressqueue_work(struct work_struct *work) { struct z_erofs_decompressqueue *bgq = container_of(work, struct z_erofs_decompressqueue, u.work); - LIST_HEAD(pagepool); + struct page *pagepool = NULL; DBG_BUGON(bgq->head == Z_EROFS_PCLUSTER_TAIL_CLOSED); z_erofs_decompress_queue(bgq, &pagepool); - put_pages_list(&pagepool); + erofs_release_pages(&pagepool); kvfree(bgq); } static struct page *pickup_page_for_submission(struct z_erofs_pcluster *pcl, unsigned int nr, - struct list_head *pagepool, + struct page **pagepool, struct address_space *mc, gfp_t gfp) { @@ -1026,15 +1079,6 @@ repeat: if (!page) goto out_allocpage; - /* - * the cached page has not been allocated and - * an placeholder is out there, prepare it now. - */ - if (page == PAGE_UNALLOCATED) { - tocache = true; - goto out_allocpage; - } - /* process the target tagged pointer */ t = tagptr_init(compressed_page_t, page); justfound = tagptr_unfold_tags(t); @@ -1108,7 +1152,7 @@ repeat: out_allocpage: page = erofs_allocpage(pagepool, gfp | __GFP_NOFAIL); if (oldpage != cmpxchg(&pcl->compressed_pages[nr], oldpage, page)) { - list_add(&page->lru, pagepool); + erofs_pagepool_add(pagepool, page); cond_resched(); goto repeat; } @@ -1192,7 +1236,7 @@ static void move_to_bypass_jobqueue(struct z_erofs_pcluster *pcl, static void z_erofs_submit_queue(struct super_block *sb, struct z_erofs_decompress_frontend *f, - struct list_head *pagepool, + struct page **pagepool, struct z_erofs_decompressqueue *fgq, bool *force_fg) { @@ -1201,8 +1245,9 @@ static void z_erofs_submit_queue(struct super_block *sb, struct z_erofs_decompressqueue *q[NR_JOBQUEUES]; void *bi_private; z_erofs_next_pcluster_t owned_head = f->clt.owned_head; - /* since bio will be NULL, no need to initialize last_index */ + /* bio is NULL initially, so no need to initialize last_{index,bdev} */ pgoff_t last_index; + struct block_device *last_bdev; unsigned int nr_bios = 0; struct bio *bio = NULL; @@ -1214,6 +1259,7 @@ static void z_erofs_submit_queue(struct super_block *sb, q[JQ_SUBMIT]->head = owned_head; do { + struct erofs_map_dev mdev; struct z_erofs_pcluster *pcl; pgoff_t cur, end; unsigned int i = 0; @@ -1225,8 +1271,14 @@ static void z_erofs_submit_queue(struct super_block *sb, pcl = container_of(owned_head, struct z_erofs_pcluster, next); - cur = pcl->obj.index; - end = cur + BIT(pcl->clusterbits); + /* no device id here, thus it will always succeed */ + mdev = (struct erofs_map_dev) { + .m_pa = blknr_to_addr(pcl->obj.index), + }; + (void)erofs_map_dev(sb, &mdev); + + cur = erofs_blknr(mdev.m_pa); + end = cur + pcl->pclusterpages; /* close the main owned chain at first */ owned_head = cmpxchg(&pcl->next, Z_EROFS_PCLUSTER_TAIL, @@ -1241,7 +1293,8 @@ static void z_erofs_submit_queue(struct super_block *sb, if (!page) continue; - if (bio && cur != last_index + 1) { + if (bio && (cur != last_index + 1 || + last_bdev != mdev.m_bdev)) { submit_bio_retry: submit_bio(bio); bio = NULL; @@ -1251,7 +1304,9 @@ submit_bio_retry: bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES); bio->bi_end_io = z_erofs_decompressqueue_endio; - bio_set_dev(bio, sb->s_bdev); + + bio_set_dev(bio, mdev.m_bdev); + last_bdev = mdev.m_bdev; bio->bi_iter.bi_sector = (sector_t)cur << LOG_SECTORS_PER_BLOCK; bio->bi_private = bi_private; @@ -1290,7 +1345,7 @@ submit_bio_retry: static void z_erofs_runqueue(struct super_block *sb, struct z_erofs_decompress_frontend *f, - struct list_head *pagepool, bool force_fg) + struct page **pagepool, bool force_fg) { struct z_erofs_decompressqueue io[NR_JOBQUEUES]; @@ -1316,8 +1371,8 @@ static int z_erofs_readpage(struct file *file, struct page *page) { struct inode *const inode = page->mapping->host; struct z_erofs_decompress_frontend f = DECOMPRESS_FRONTEND_INIT(inode); + struct page *pagepool = NULL; int err; - LIST_HEAD(pagepool); trace_erofs_readpage(page, false); @@ -1335,8 +1390,7 @@ static int z_erofs_readpage(struct file *file, struct page *page) if (f.map.mpage) put_page(f.map.mpage); - /* clean up the remaining free pages */ - put_pages_list(&pagepool); + erofs_release_pages(&pagepool); return err; } @@ -1346,11 +1400,10 @@ static void z_erofs_readahead(struct readahead_control *rac) struct erofs_sb_info *const sbi = EROFS_I_SB(inode); unsigned int nr_pages = readahead_count(rac); - bool sync = (sbi->ctx.readahead_sync_decompress && - nr_pages <= sbi->ctx.max_sync_decompress_pages); + bool sync = (sbi->opt.readahead_sync_decompress && + nr_pages <= sbi->opt.max_sync_decompress_pages); struct z_erofs_decompress_frontend f = DECOMPRESS_FRONTEND_INIT(inode); - struct page *page, *head = NULL; - LIST_HEAD(pagepool); + struct page *pagepool = NULL, *head = NULL, *page; trace_erofs_readpages(inode, readahead_index(rac), nr_pages, false); @@ -1392,13 +1445,10 @@ static void z_erofs_readahead(struct readahead_control *rac) if (f.map.mpage) put_page(f.map.mpage); - - /* clean up the remaining free pages */ - put_pages_list(&pagepool); + erofs_release_pages(&pagepool); } const struct address_space_operations z_erofs_aops = { .readpage = z_erofs_readpage, .readahead = z_erofs_readahead, }; - diff --git a/fs/erofs/zdata.h b/fs/erofs/zdata.h index b503b353d4abc3e5603cf3899ce04a5c45613363..4a69515dea7552d990f9c31c8a884033e5483aa5 100644 --- a/fs/erofs/zdata.h +++ b/fs/erofs/zdata.h @@ -2,7 +2,6 @@ /* * Copyright (C) 2018 HUAWEI, Inc. * https://www.huawei.com/ - * Created by Gao Xiang */ #ifndef __EROFS_FS_ZDATA_H #define __EROFS_FS_ZDATA_H @@ -10,6 +9,7 @@ #include "internal.h" #include "zpvec.h" +#define Z_EROFS_PCLUSTER_MAX_PAGES (Z_EROFS_PCLUSTER_MAX_SIZE / PAGE_SIZE) #define Z_EROFS_NR_INLINE_PAGEVECS 3 /* @@ -59,16 +59,17 @@ struct z_erofs_pcluster { /* A: point to next chained pcluster or TAILs */ z_erofs_next_pcluster_t next; - /* A: compressed pages (including multi-usage pages) */ - struct page *compressed_pages[Z_EROFS_CLUSTER_MAX_PAGES]; - /* A: lower limit of decompressed length and if full length or not */ unsigned int length; + /* I: physical cluster size in pages */ + unsigned short pclusterpages; + /* I: compression algorithm format */ unsigned char algorithmformat; - /* I: bit shift of physical cluster size */ - unsigned char clusterbits; + + /* A: compressed pages (can be cached or inplaced pages) */ + struct page *compressed_pages[]; }; #define z_erofs_primarycollection(pcluster) (&(pcluster)->primary_collection) @@ -82,8 +83,6 @@ struct z_erofs_pcluster { #define Z_EROFS_PCLUSTER_NIL (NULL) -#define Z_EROFS_WORKGROUP_SIZE sizeof(struct z_erofs_pcluster) - struct z_erofs_decompressqueue { struct super_block *sb; atomic_t pending_bios; @@ -95,13 +94,6 @@ struct z_erofs_decompressqueue { } u; }; -#define MNGD_MAPPING(sbi) ((sbi)->managed_cache->i_mapping) -static inline bool erofs_page_is_managed(const struct erofs_sb_info *sbi, - struct page *page) -{ - return page->mapping == MNGD_MAPPING(sbi); -} - #define Z_EROFS_ONLINEPAGE_COUNT_BITS 2 #define Z_EROFS_ONLINEPAGE_COUNT_MASK ((1 << Z_EROFS_ONLINEPAGE_COUNT_BITS) - 1) #define Z_EROFS_ONLINEPAGE_INDEX_SHIFT (Z_EROFS_ONLINEPAGE_COUNT_BITS) @@ -187,4 +179,3 @@ static inline void z_erofs_onlinepage_endio(struct page *page) #define Z_EROFS_VMAP_GLOBAL_PAGES 2048 #endif - diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c index 14d2de35110ccd25c005ac96865c660c06d43f44..09624cb38b7ee1ce6348347babf399bd45640f7e 100644 --- a/fs/erofs/zmap.c +++ b/fs/erofs/zmap.c @@ -2,7 +2,6 @@ /* * Copyright (C) 2018-2019 HUAWEI, Inc. * https://www.huawei.com/ - * Created by Gao Xiang */ #include "internal.h" #include @@ -11,17 +10,16 @@ int z_erofs_fill_inode(struct inode *inode) { struct erofs_inode *const vi = EROFS_I(inode); + struct erofs_sb_info *sbi = EROFS_SB(inode->i_sb); - if (vi->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY) { + if (!erofs_sb_has_big_pcluster(sbi) && + vi->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY) { vi->z_advise = 0; vi->z_algorithmtype[0] = 0; vi->z_algorithmtype[1] = 0; vi->z_logical_clusterbits = LOG_BLOCK_SIZE; - vi->z_physical_clusterbits[0] = vi->z_logical_clusterbits; - vi->z_physical_clusterbits[1] = vi->z_logical_clusterbits; set_bit(EROFS_I_Z_INITED_BIT, &vi->flags); } - inode->i_mapping->a_ops = &z_erofs_aops; return 0; } @@ -30,7 +28,7 @@ static int z_erofs_fill_inode_lazy(struct inode *inode) { struct erofs_inode *const vi = EROFS_I(inode); struct super_block *const sb = inode->i_sb; - int err; + int err, headnr; erofs_off_t pos; struct page *page; void *kaddr; @@ -52,7 +50,8 @@ static int z_erofs_fill_inode_lazy(struct inode *inode) if (test_bit(EROFS_I_Z_INITED_BIT, &vi->flags)) goto out_unlock; - DBG_BUGON(vi->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY); + DBG_BUGON(!erofs_sb_has_big_pcluster(EROFS_SB(sb)) && + vi->datalayout == EROFS_INODE_FLAT_COMPRESSION_LEGACY); pos = ALIGN(iloc(EROFS_SB(sb), vi->nid) + vi->inode_isize + vi->xattr_isize, 8); @@ -69,26 +68,32 @@ static int z_erofs_fill_inode_lazy(struct inode *inode) vi->z_algorithmtype[0] = h->h_algorithmtype & 15; vi->z_algorithmtype[1] = h->h_algorithmtype >> 4; - if (vi->z_algorithmtype[0] >= Z_EROFS_COMPRESSION_MAX) { - erofs_err(sb, "unknown compression format %u for nid %llu, please upgrade kernel", - vi->z_algorithmtype[0], vi->nid); + headnr = 0; + if (vi->z_algorithmtype[0] >= Z_EROFS_COMPRESSION_MAX || + vi->z_algorithmtype[++headnr] >= Z_EROFS_COMPRESSION_MAX) { + erofs_err(sb, "unknown HEAD%u format %u for nid %llu, please upgrade kernel", + headnr + 1, vi->z_algorithmtype[headnr], vi->nid); err = -EOPNOTSUPP; goto unmap_done; } vi->z_logical_clusterbits = LOG_BLOCK_SIZE + (h->h_clusterbits & 7); - vi->z_physical_clusterbits[0] = vi->z_logical_clusterbits + - ((h->h_clusterbits >> 3) & 3); - - if (vi->z_physical_clusterbits[0] != LOG_BLOCK_SIZE) { - erofs_err(sb, "unsupported physical clusterbits %u for nid %llu, please upgrade kernel", - vi->z_physical_clusterbits[0], vi->nid); - err = -EOPNOTSUPP; + if (!erofs_sb_has_big_pcluster(EROFS_SB(sb)) && + vi->z_advise & (Z_EROFS_ADVISE_BIG_PCLUSTER_1 | + Z_EROFS_ADVISE_BIG_PCLUSTER_2)) { + erofs_err(sb, "per-inode big pcluster without sb feature for nid %llu", + vi->nid); + err = -EFSCORRUPTED; + goto unmap_done; + } + if (vi->datalayout == EROFS_INODE_FLAT_COMPRESSION && + !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1) ^ + !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_2)) { + erofs_err(sb, "big pcluster head1/2 of compact indexes should be consistent for nid %llu", + vi->nid); + err = -EFSCORRUPTED; goto unmap_done; } - - vi->z_physical_clusterbits[1] = vi->z_logical_clusterbits + - ((h->h_clusterbits >> 5) & 7); /* paired with smp_mb() at the beginning of the function */ smp_mb(); set_bit(EROFS_I_Z_INITED_BIT, &vi->flags); @@ -108,10 +113,10 @@ struct z_erofs_maprecorder { unsigned long lcn; /* compression extent information gathered */ - u8 type; + u8 type, headtype; u16 clusterofs; u16 delta[2]; - erofs_blk_t pblk; + erofs_blk_t pblk, compressedlcs; }; static int z_erofs_reload_indexes(struct z_erofs_maprecorder *m, @@ -174,10 +179,21 @@ static int legacy_load_cluster_from_disk(struct z_erofs_maprecorder *m, case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD: m->clusterofs = 1 << vi->z_logical_clusterbits; m->delta[0] = le16_to_cpu(di->di_u.delta[0]); + if (m->delta[0] & Z_EROFS_VLE_DI_D0_CBLKCNT) { + if (!(vi->z_advise & (Z_EROFS_ADVISE_BIG_PCLUSTER_1 | + Z_EROFS_ADVISE_BIG_PCLUSTER_2))) { + DBG_BUGON(1); + return -EFSCORRUPTED; + } + m->compressedlcs = m->delta[0] & + ~Z_EROFS_VLE_DI_D0_CBLKCNT; + m->delta[0] = 1; + } m->delta[1] = le16_to_cpu(di->di_u.delta[1]); break; case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN: - case Z_EROFS_VLE_CLUSTER_TYPE_HEAD: + case Z_EROFS_VLE_CLUSTER_TYPE_HEAD1: + case Z_EROFS_VLE_CLUSTER_TYPE_HEAD2: m->clusterofs = le16_to_cpu(di->di_clusterofs); m->pblk = le32_to_cpu(di->di_u.blkaddr); break; @@ -200,9 +216,34 @@ static unsigned int decode_compactedbits(unsigned int lobits, return lo; } +static int get_compacted_la_distance(unsigned int lclusterbits, + unsigned int encodebits, + unsigned int vcnt, u8 *in, int i) +{ + const unsigned int lomask = (1 << lclusterbits) - 1; + unsigned int lo, d1 = 0; + u8 type; + + DBG_BUGON(i >= vcnt); + + do { + lo = decode_compactedbits(lclusterbits, lomask, + in, encodebits * i, &type); + + if (type != Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) + return d1; + ++d1; + } while (++i < vcnt); + + /* vcnt - 1 (Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) item */ + if (!(lo & Z_EROFS_VLE_DI_D0_CBLKCNT)) + d1 += lo - 1; + return d1; +} + static int unpack_compacted_index(struct z_erofs_maprecorder *m, unsigned int amortizedshift, - unsigned int eofs) + unsigned int eofs, bool lookahead) { struct erofs_inode *const vi = EROFS_I(m->inode); const unsigned int lclusterbits = vi->z_logical_clusterbits; @@ -210,6 +251,7 @@ static int unpack_compacted_index(struct z_erofs_maprecorder *m, unsigned int vcnt, base, lo, encodebits, nblk; int i; u8 *in, type; + bool big_pcluster; if (1 << amortizedshift == 4) vcnt = 2; @@ -218,6 +260,7 @@ static int unpack_compacted_index(struct z_erofs_maprecorder *m, else return -EOPNOTSUPP; + big_pcluster = vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1; encodebits = ((vcnt << amortizedshift) - sizeof(__le32)) * 8 / vcnt; base = round_down(eofs, vcnt << amortizedshift); in = m->kaddr + base; @@ -229,7 +272,20 @@ static int unpack_compacted_index(struct z_erofs_maprecorder *m, m->type = type; if (type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) { m->clusterofs = 1 << lclusterbits; - if (i + 1 != vcnt) { + + /* figure out lookahead_distance: delta[1] if needed */ + if (lookahead) + m->delta[1] = get_compacted_la_distance(lclusterbits, + encodebits, vcnt, in, i); + if (lo & Z_EROFS_VLE_DI_D0_CBLKCNT) { + if (!big_pcluster) { + DBG_BUGON(1); + return -EFSCORRUPTED; + } + m->compressedlcs = lo & ~Z_EROFS_VLE_DI_D0_CBLKCNT; + m->delta[0] = 1; + return 0; + } else if (i + 1 != (int)vcnt) { m->delta[0] = lo; return 0; } @@ -242,22 +298,48 @@ static int unpack_compacted_index(struct z_erofs_maprecorder *m, in, encodebits * (i - 1), &type); if (type != Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) lo = 0; + else if (lo & Z_EROFS_VLE_DI_D0_CBLKCNT) + lo = 1; m->delta[0] = lo + 1; return 0; } m->clusterofs = lo; m->delta[0] = 0; /* figout out blkaddr (pblk) for HEAD lclusters */ - nblk = 1; - while (i > 0) { - --i; - lo = decode_compactedbits(lclusterbits, lomask, - in, encodebits * i, &type); - if (type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) - i -= lo; - - if (i >= 0) + if (!big_pcluster) { + nblk = 1; + while (i > 0) { + --i; + lo = decode_compactedbits(lclusterbits, lomask, + in, encodebits * i, &type); + if (type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) + i -= lo; + + if (i >= 0) + ++nblk; + } + } else { + nblk = 0; + while (i > 0) { + --i; + lo = decode_compactedbits(lclusterbits, lomask, + in, encodebits * i, &type); + if (type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) { + if (lo & Z_EROFS_VLE_DI_D0_CBLKCNT) { + --i; + nblk += lo & ~Z_EROFS_VLE_DI_D0_CBLKCNT; + continue; + } + /* bigpcluster shouldn't have plain d0 == 1 */ + if (lo <= 1) { + DBG_BUGON(1); + return -EFSCORRUPTED; + } + i -= lo - 2; + continue; + } ++nblk; + } } in += (vcnt << amortizedshift) - sizeof(__le32); m->pblk = le32_to_cpu(*(__le32 *)in) + nblk; @@ -265,7 +347,7 @@ static int unpack_compacted_index(struct z_erofs_maprecorder *m, } static int compacted_load_cluster_from_disk(struct z_erofs_maprecorder *m, - unsigned long lcn) + unsigned long lcn, bool lookahead) { struct inode *const inode = m->inode; struct erofs_inode *const vi = EROFS_I(inode); @@ -316,11 +398,12 @@ out: err = z_erofs_reload_indexes(m, erofs_blknr(pos)); if (err) return err; - return unpack_compacted_index(m, amortizedshift, erofs_blkoff(pos)); + return unpack_compacted_index(m, amortizedshift, erofs_blkoff(pos), + lookahead); } static int z_erofs_load_cluster_from_disk(struct z_erofs_maprecorder *m, - unsigned int lcn) + unsigned int lcn, bool lookahead) { const unsigned int datamode = EROFS_I(m->inode)->datalayout; @@ -328,7 +411,7 @@ static int z_erofs_load_cluster_from_disk(struct z_erofs_maprecorder *m, return legacy_load_cluster_from_disk(m, lcn); if (datamode == EROFS_INODE_FLAT_COMPRESSION) - return compacted_load_cluster_from_disk(m, lcn); + return compacted_load_cluster_from_disk(m, lcn, lookahead); return -EINVAL; } @@ -351,7 +434,7 @@ static int z_erofs_extent_lookback(struct z_erofs_maprecorder *m, /* load extent head logical cluster if needed */ lcn -= lookback_distance; - err = z_erofs_load_cluster_from_disk(m, lcn); + err = z_erofs_load_cluster_from_disk(m, lcn, false); if (err) return err; @@ -366,9 +449,9 @@ static int z_erofs_extent_lookback(struct z_erofs_maprecorder *m, } return z_erofs_extent_lookback(m, m->delta[0]); case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN: - map->m_flags &= ~EROFS_MAP_ZIPPED; - fallthrough; - case Z_EROFS_VLE_CLUSTER_TYPE_HEAD: + case Z_EROFS_VLE_CLUSTER_TYPE_HEAD1: + case Z_EROFS_VLE_CLUSTER_TYPE_HEAD2: + m->headtype = m->type; map->m_la = (lcn << lclusterbits) | m->clusterofs; break; default: @@ -381,6 +464,124 @@ static int z_erofs_extent_lookback(struct z_erofs_maprecorder *m, return 0; } +static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m, + unsigned int initial_lcn) +{ + struct erofs_inode *const vi = EROFS_I(m->inode); + struct erofs_map_blocks *const map = m->map; + const unsigned int lclusterbits = vi->z_logical_clusterbits; + unsigned long lcn; + int err; + + DBG_BUGON(m->type != Z_EROFS_VLE_CLUSTER_TYPE_PLAIN && + m->type != Z_EROFS_VLE_CLUSTER_TYPE_HEAD1 && + m->type != Z_EROFS_VLE_CLUSTER_TYPE_HEAD2); + DBG_BUGON(m->type != m->headtype); + + if (m->headtype == Z_EROFS_VLE_CLUSTER_TYPE_PLAIN || + ((m->headtype == Z_EROFS_VLE_CLUSTER_TYPE_HEAD1) && + !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_1)) || + ((m->headtype == Z_EROFS_VLE_CLUSTER_TYPE_HEAD2) && + !(vi->z_advise & Z_EROFS_ADVISE_BIG_PCLUSTER_2))) { + map->m_plen = 1 << lclusterbits; + return 0; + } + lcn = m->lcn + 1; + if (m->compressedlcs) + goto out; + + err = z_erofs_load_cluster_from_disk(m, lcn, false); + if (err) + return err; + + /* + * If the 1st NONHEAD lcluster has already been handled initially w/o + * valid compressedlcs, which means at least it mustn't be CBLKCNT, or + * an internal implemenatation error is detected. + * + * The following code can also handle it properly anyway, but let's + * BUG_ON in the debugging mode only for developers to notice that. + */ + DBG_BUGON(lcn == initial_lcn && + m->type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD); + + switch (m->type) { + case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN: + case Z_EROFS_VLE_CLUSTER_TYPE_HEAD1: + case Z_EROFS_VLE_CLUSTER_TYPE_HEAD2: + /* + * if the 1st NONHEAD lcluster is actually PLAIN or HEAD type + * rather than CBLKCNT, it's a 1 lcluster-sized pcluster. + */ + m->compressedlcs = 1; + break; + case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD: + if (m->delta[0] != 1) + goto err_bonus_cblkcnt; + if (m->compressedlcs) + break; + fallthrough; + default: + erofs_err(m->inode->i_sb, + "cannot found CBLKCNT @ lcn %lu of nid %llu", + lcn, vi->nid); + DBG_BUGON(1); + return -EFSCORRUPTED; + } +out: + map->m_plen = m->compressedlcs << lclusterbits; + return 0; +err_bonus_cblkcnt: + erofs_err(m->inode->i_sb, + "bogus CBLKCNT @ lcn %lu of nid %llu", + lcn, vi->nid); + DBG_BUGON(1); + return -EFSCORRUPTED; +} + +static int z_erofs_get_extent_decompressedlen(struct z_erofs_maprecorder *m) +{ + struct inode *inode = m->inode; + struct erofs_inode *vi = EROFS_I(inode); + struct erofs_map_blocks *map = m->map; + unsigned int lclusterbits = vi->z_logical_clusterbits; + u64 lcn = m->lcn, headlcn = map->m_la >> lclusterbits; + int err; + + do { + /* handle the last EOF pcluster (no next HEAD lcluster) */ + if ((lcn << lclusterbits) >= inode->i_size) { + map->m_llen = inode->i_size - map->m_la; + return 0; + } + + err = z_erofs_load_cluster_from_disk(m, lcn, true); + if (err) + return err; + + if (m->type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD) { + DBG_BUGON(!m->delta[1] && + m->clusterofs != 1 << lclusterbits); + } else if (m->type == Z_EROFS_VLE_CLUSTER_TYPE_PLAIN || + m->type == Z_EROFS_VLE_CLUSTER_TYPE_HEAD1 || + m->type == Z_EROFS_VLE_CLUSTER_TYPE_HEAD2) { + /* go on until the next HEAD lcluster */ + if (lcn != headlcn) + break; + m->delta[1] = 1; + } else { + erofs_err(inode->i_sb, "unknown type %u @ lcn %llu of nid %llu", + m->type, lcn, vi->nid); + DBG_BUGON(1); + return -EOPNOTSUPP; + } + lcn += m->delta[1]; + } while (m->delta[1]); + + map->m_llen = (lcn << lclusterbits) + m->clusterofs - map->m_la; + return 0; +} + int z_erofs_map_blocks_iter(struct inode *inode, struct erofs_map_blocks *map, int flags) @@ -392,6 +593,7 @@ int z_erofs_map_blocks_iter(struct inode *inode, }; int err = 0; unsigned int lclusterbits, endoff; + unsigned long initial_lcn; unsigned long long ofs, end; trace_z_erofs_map_blocks_iter_enter(inode, map, flags); @@ -410,23 +612,22 @@ int z_erofs_map_blocks_iter(struct inode *inode, lclusterbits = vi->z_logical_clusterbits; ofs = map->m_la; - m.lcn = ofs >> lclusterbits; + initial_lcn = ofs >> lclusterbits; endoff = ofs & ((1 << lclusterbits) - 1); - err = z_erofs_load_cluster_from_disk(&m, m.lcn); + err = z_erofs_load_cluster_from_disk(&m, initial_lcn, false); if (err) goto unmap_out; - map->m_flags = EROFS_MAP_ZIPPED; /* by default, compressed */ + map->m_flags = EROFS_MAP_MAPPED | EROFS_MAP_ENCODED; end = (m.lcn + 1ULL) << lclusterbits; switch (m.type) { case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN: - if (endoff >= m.clusterofs) - map->m_flags &= ~EROFS_MAP_ZIPPED; - fallthrough; - case Z_EROFS_VLE_CLUSTER_TYPE_HEAD: + case Z_EROFS_VLE_CLUSTER_TYPE_HEAD1: + case Z_EROFS_VLE_CLUSTER_TYPE_HEAD2: if (endoff >= m.clusterofs) { + m.headtype = m.type; map->m_la = (m.lcn << lclusterbits) | m.clusterofs; break; } @@ -443,7 +644,7 @@ int z_erofs_map_blocks_iter(struct inode *inode, m.delta[0] = 1; fallthrough; case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD: - /* get the correspoinding first chunk */ + /* get the corresponding first chunk */ err = z_erofs_extent_lookback(&m, m.delta[0]); if (err) goto unmap_out; @@ -457,10 +658,27 @@ int z_erofs_map_blocks_iter(struct inode *inode, } map->m_llen = end - map->m_la; - map->m_plen = 1 << lclusterbits; map->m_pa = blknr_to_addr(m.pblk); - map->m_flags |= EROFS_MAP_MAPPED; + err = z_erofs_get_extent_compressedlen(&m, initial_lcn); + if (err) + goto out; + + if (m.headtype == Z_EROFS_VLE_CLUSTER_TYPE_PLAIN) + map->m_algorithmformat = Z_EROFS_COMPRESSION_SHIFTED; + else if (m.headtype == Z_EROFS_VLE_CLUSTER_TYPE_HEAD2) + map->m_algorithmformat = vi->z_algorithmtype[1]; + else + map->m_algorithmformat = vi->z_algorithmtype[0]; + + if ((flags & EROFS_GET_BLOCKS_FIEMAP) || + ((flags & EROFS_GET_BLOCKS_READMORE) && + map->m_algorithmformat == Z_EROFS_COMPRESSION_LZMA && + map->m_llen >= EROFS_BLKSIZ)) { + err = z_erofs_get_extent_decompressedlen(&m); + if (!err) + map->m_flags |= EROFS_MAP_FULL_MAPPED; + } unmap_out: if (m.kaddr) kunmap_atomic(m.kaddr); @@ -477,3 +695,40 @@ out: return err; } +static int z_erofs_iomap_begin_report(struct inode *inode, loff_t offset, + loff_t length, unsigned int flags, + struct iomap *iomap, struct iomap *srcmap) +{ + int ret; + struct erofs_map_blocks map = { .m_la = offset }; + + ret = z_erofs_map_blocks_iter(inode, &map, EROFS_GET_BLOCKS_FIEMAP); + if (map.mpage) + put_page(map.mpage); + if (ret < 0) + return ret; + + iomap->bdev = inode->i_sb->s_bdev; + iomap->offset = map.m_la; + iomap->length = map.m_llen; + if (map.m_flags & EROFS_MAP_MAPPED) { + iomap->type = IOMAP_MAPPED; + iomap->addr = map.m_pa; + } else { + iomap->type = IOMAP_HOLE; + iomap->addr = IOMAP_NULL_ADDR; + /* + * No strict rule how to describe extents for post EOF, yet + * we need do like below. Otherwise, iomap itself will get + * into an endless loop on post EOF. + */ + if (iomap->offset >= inode->i_size) + iomap->length = length + map.m_la - offset; + } + iomap->flags = 0; + return 0; +} + +const struct iomap_ops z_erofs_iomap_report_ops = { + .iomap_begin = z_erofs_iomap_begin_report, +}; diff --git a/fs/erofs/zpvec.h b/fs/erofs/zpvec.h index 1d67cbd3870421429dc7ac06c878001a873a8755..b05464f4a80831a9a4f5bdeaed21bd6d5eae4208 100644 --- a/fs/erofs/zpvec.h +++ b/fs/erofs/zpvec.h @@ -2,7 +2,6 @@ /* * Copyright (C) 2018 HUAWEI, Inc. * https://www.huawei.com/ - * Created by Gao Xiang */ #ifndef __EROFS_FS_ZPVEC_H #define __EROFS_FS_ZPVEC_H @@ -108,12 +107,17 @@ static inline void z_erofs_pagevec_ctor_init(struct z_erofs_pagevec_ctor *ctor, static inline bool z_erofs_pagevec_enqueue(struct z_erofs_pagevec_ctor *ctor, struct page *page, enum z_erofs_page_type type, - bool *occupied) + bool pvec_safereuse) { - *occupied = false; - if (!ctor->next && type) - if (ctor->index + 1 == ctor->nr) + if (!ctor->next) { + /* some pages cannot be reused as pvec safely without I/O */ + if (type == Z_EROFS_PAGE_TYPE_EXCLUSIVE && !pvec_safereuse) + type = Z_EROFS_VLE_PAGE_TYPE_TAIL_SHARED; + + if (type != Z_EROFS_PAGE_TYPE_EXCLUSIVE && + ctor->index + 1 == ctor->nr) return false; + } if (ctor->index >= ctor->nr) z_erofs_pagevec_ctor_pagedown(ctor, false); @@ -125,7 +129,6 @@ static inline bool z_erofs_pagevec_enqueue(struct z_erofs_pagevec_ctor *ctor, /* should remind that collector->next never equal to 1, 2 */ if (type == (uintptr_t)ctor->next) { ctor->next = page; - *occupied = true; } ctor->pages[ctor->index++] = tagptr_fold(erofs_vtptr_t, page, type); return true; @@ -154,4 +157,3 @@ z_erofs_pagevec_dequeue(struct z_erofs_pagevec_ctor *ctor, return tagptr_unfold_ptr(t); } #endif - diff --git a/fs/exec.c b/fs/exec.c index ec5ef10ce2dbbd70cf271a2f3ebc714f88cfc193..5b45f14f67f9f9fc2252518f43c2a727d15f1f7e 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -235,7 +235,7 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, static void put_arg_page(struct page *page) { - put_user_page(page); + put_page(page); } static void free_arg_pages(struct linux_binprm *bprm) @@ -1010,6 +1010,7 @@ static int exec_mmap(struct mm_struct *mm) active_mm = tsk->active_mm; tsk->active_mm = mm; tsk->mm = mm; + lru_gen_add_mm(mm); /* * This prevents preemption while active_mm is being loaded and * it and mm are being updated, which could cause problems for @@ -1022,6 +1023,7 @@ static int exec_mmap(struct mm_struct *mm) activate_mm(active_mm, mm); if (IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM)) local_irq_enable(); + lru_gen_use_mm(mm); tsk->mm->vmacache_seqnum = 0; vmacache_flush(tsk); task_unlock(tsk); diff --git a/fs/exfat/file.c b/fs/exfat/file.c index a92478eabfa4e43f2198ccf9b62b8632b697c8dc..c819e8427ea577f1734c783986fbddc19445b721 100644 --- a/fs/exfat/file.c +++ b/fs/exfat/file.c @@ -109,8 +109,7 @@ int __exfat_truncate(struct inode *inode, loff_t new_size) exfat_set_volume_dirty(sb); num_clusters_new = EXFAT_B_TO_CLU_ROUND_UP(i_size_read(inode), sbi); - num_clusters_phys = - EXFAT_B_TO_CLU_ROUND_UP(EXFAT_I(inode)->i_size_ondisk, sbi); + num_clusters_phys = EXFAT_B_TO_CLU_ROUND_UP(ei->i_size_ondisk, sbi); exfat_chain_set(&clu, ei->start_clu, num_clusters_phys, ei->flags); @@ -227,12 +226,13 @@ void exfat_truncate(struct inode *inode, loff_t size) { struct super_block *sb = inode->i_sb; struct exfat_sb_info *sbi = EXFAT_SB(sb); + struct exfat_inode_info *ei = EXFAT_I(inode); unsigned int blocksize = i_blocksize(inode); loff_t aligned_size; int err; mutex_lock(&sbi->s_lock); - if (EXFAT_I(inode)->start_clu == 0) { + if (ei->start_clu == 0) { /* * Empty start_clu != ~0 (not allocated) */ @@ -250,8 +250,8 @@ void exfat_truncate(struct inode *inode, loff_t size) else mark_inode_dirty(inode); - inode->i_blocks = ((i_size_read(inode) + (sbi->cluster_size - 1)) & - ~(sbi->cluster_size - 1)) >> inode->i_blkbits; + inode->i_blocks = round_up(i_size_read(inode), sbi->cluster_size) >> + inode->i_blkbits; write_size: aligned_size = i_size_read(inode); if (aligned_size & (blocksize - 1)) { @@ -259,11 +259,11 @@ write_size: aligned_size++; } - if (EXFAT_I(inode)->i_size_ondisk > i_size_read(inode)) - EXFAT_I(inode)->i_size_ondisk = aligned_size; + if (ei->i_size_ondisk > i_size_read(inode)) + ei->i_size_ondisk = aligned_size; - if (EXFAT_I(inode)->i_size_aligned > i_size_read(inode)) - EXFAT_I(inode)->i_size_aligned = aligned_size; + if (ei->i_size_aligned > i_size_read(inode)) + ei->i_size_aligned = aligned_size; mutex_unlock(&sbi->s_lock); } diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c index 730373e0965aff2a8c366b34482c61afeec9cd56..2a9f6a80584ee3d63d9325b3f540d5457fa9a217 100644 --- a/fs/exfat/inode.c +++ b/fs/exfat/inode.c @@ -114,10 +114,9 @@ static int exfat_map_cluster(struct inode *inode, unsigned int clu_offset, unsigned int local_clu_offset = clu_offset; unsigned int num_to_be_allocated = 0, num_clusters = 0; - if (EXFAT_I(inode)->i_size_ondisk > 0) + if (ei->i_size_ondisk > 0) num_clusters = - EXFAT_B_TO_CLU_ROUND_UP(EXFAT_I(inode)->i_size_ondisk, - sbi); + EXFAT_B_TO_CLU_ROUND_UP(ei->i_size_ondisk, sbi); if (clu_offset >= num_clusters) num_to_be_allocated = clu_offset - num_clusters + 1; @@ -415,10 +414,10 @@ static int exfat_write_end(struct file *file, struct address_space *mapping, err = generic_write_end(file, mapping, pos, len, copied, pagep, fsdata); - if (EXFAT_I(inode)->i_size_aligned < i_size_read(inode)) { + if (ei->i_size_aligned < i_size_read(inode)) { exfat_fs_error(inode->i_sb, "invalid size(size(%llu) > aligned(%llu)\n", - i_size_read(inode), EXFAT_I(inode)->i_size_aligned); + i_size_read(inode), ei->i_size_aligned); return -EIO; } @@ -601,8 +600,8 @@ static int exfat_fill_inode(struct inode *inode, struct exfat_dir_entry *info) exfat_save_attr(inode, info->attr); - inode->i_blocks = ((i_size_read(inode) + (sbi->cluster_size - 1)) & - ~(sbi->cluster_size - 1)) >> inode->i_blkbits; + inode->i_blocks = round_up(i_size_read(inode), sbi->cluster_size) >> + inode->i_blkbits; inode->i_mtime = info->mtime; inode->i_ctime = info->mtime; ei->i_crtime = info->crtime; diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c index 2932b23a3b6c36ebfb82e5a5e7621c07fbb2822d..935f6005090091342c109456c50c0e5a27b8bc5b 100644 --- a/fs/exfat/namei.c +++ b/fs/exfat/namei.c @@ -395,9 +395,9 @@ static int exfat_find_empty_entry(struct inode *inode, /* directory inode should be updated in here */ i_size_write(inode, size); - EXFAT_I(inode)->i_size_ondisk += sbi->cluster_size; - EXFAT_I(inode)->i_size_aligned += sbi->cluster_size; - EXFAT_I(inode)->flags = p_dir->flags; + ei->i_size_ondisk += sbi->cluster_size; + ei->i_size_aligned += sbi->cluster_size; + ei->flags = p_dir->flags; inode->i_blocks += 1 << sbi->sect_per_clus_bits; } diff --git a/fs/exfat/super.c b/fs/exfat/super.c index c6d8d2e534865236d16386575627e0a442a8db52..d3bf120e31e1dad7a45ff709b204cb9570e722c3 100644 --- a/fs/exfat/super.c +++ b/fs/exfat/super.c @@ -364,11 +364,11 @@ static int exfat_read_root(struct inode *inode) inode->i_op = &exfat_dir_inode_operations; inode->i_fop = &exfat_dir_operations; - inode->i_blocks = ((i_size_read(inode) + (sbi->cluster_size - 1)) - & ~(sbi->cluster_size - 1)) >> inode->i_blkbits; - EXFAT_I(inode)->i_pos = ((loff_t)sbi->root_dir << 32) | 0xffffffff; - EXFAT_I(inode)->i_size_aligned = i_size_read(inode); - EXFAT_I(inode)->i_size_ondisk = i_size_read(inode); + inode->i_blocks = round_up(i_size_read(inode), sbi->cluster_size) >> + inode->i_blkbits; + ei->i_pos = ((loff_t)sbi->root_dir << 32) | 0xffffffff; + ei->i_size_aligned = i_size_read(inode); + ei->i_size_ondisk = i_size_read(inode); exfat_save_attr(inode, ATTR_SUBDIR); inode->i_mtime = inode->i_atime = inode->i_ctime = ei->i_crtime = @@ -836,5 +836,6 @@ module_exit(exit_exfat_fs); MODULE_ALIAS_FS("exfat"); MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(ANDROID_GKI_VFS_EXPORT_ONLY); MODULE_DESCRIPTION("exFAT filesystem support"); MODULE_AUTHOR("Samsung Electronics Co., Ltd."); diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c index 1f3f4326bf3ce82d1a2fb350479ce59a13128fdb..c17ccc19b938e22d996476bc8567c5df22716daa 100644 --- a/fs/ext2/balloc.c +++ b/fs/ext2/balloc.c @@ -48,10 +48,9 @@ struct ext2_group_desc * ext2_get_group_desc(struct super_block * sb, struct ext2_sb_info *sbi = EXT2_SB(sb); if (block_group >= sbi->s_groups_count) { - ext2_error (sb, "ext2_get_group_desc", - "block_group >= groups_count - " - "block_group = %d, groups_count = %lu", - block_group, sbi->s_groups_count); + WARN(1, "block_group >= groups_count - " + "block_group = %d, groups_count = %lu", + block_group, sbi->s_groups_count); return NULL; } @@ -59,10 +58,9 @@ struct ext2_group_desc * ext2_get_group_desc(struct super_block * sb, group_desc = block_group >> EXT2_DESC_PER_BLOCK_BITS(sb); offset = block_group & (EXT2_DESC_PER_BLOCK(sb) - 1); if (!sbi->s_group_desc[group_desc]) { - ext2_error (sb, "ext2_get_group_desc", - "Group descriptor not loaded - " - "block_group = %d, group_desc = %lu, desc = %lu", - block_group, group_desc, offset); + WARN(1, "Group descriptor not loaded - " + "block_group = %d, group_desc = %lu, desc = %lu", + block_group, group_desc, offset); return NULL; } diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 09f1fe67697278d4963ab036ea75fdc0dd234488..3b772568cccc72aaf46d061852bac3d543befd01 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -1650,5 +1650,6 @@ static void __exit exit_ext2_fs(void) MODULE_AUTHOR("Remy Card and others"); MODULE_DESCRIPTION("Second Extended Filesystem"); MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(ANDROID_GKI_VFS_EXPORT_ONLY); module_init(init_ext2_fs) module_exit(exit_ext2_fs) diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index 21a98288de4951ac854074666fe6b70b716d0efb..c943d24815237107530883600a9c55fe2525cac8 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -551,7 +551,7 @@ static int ext4_dx_readdir(struct file *file, struct dir_context *ctx) struct dir_private_info *info = file->private_data; struct inode *inode = file_inode(file); struct fname *fname; - int ret; + int ret = 0; if (!info) { info = ext4_htree_create_dir_info(file, ctx->pos); @@ -599,7 +599,7 @@ static int ext4_dx_readdir(struct file *file, struct dir_context *ctx) info->curr_minor_hash, &info->next_hash); if (ret < 0) - return ret; + goto finished; if (ret == 0) { ctx->pos = ext4_get_htree_eof(file); break; @@ -630,7 +630,7 @@ static int ext4_dx_readdir(struct file *file, struct dir_context *ctx) } finished: info->last_pos = ctx->pos; - return 0; + return ret < 0 ? ret : 0; } static int ext4_release_dir(struct inode *inode, struct file *filp) diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index b29f325a3d08b7a3669a88b6f580cfabb26e86d6..7317d32850f4718910b18d9b4fade029976c2b55 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2822,6 +2822,10 @@ bool ext4_fc_replay_check_excluded(struct super_block *sb, ext4_fsblk_t block); void ext4_fc_replay_cleanup(struct super_block *sb); int ext4_fc_commit(journal_t *journal, tid_t commit_tid); int __init ext4_fc_init_dentry_cache(void); +void ext4_fc_destroy_dentry_cache(void); +int ext4_fc_record_regions(struct super_block *sb, int ino, + ext4_lblk_t lblk, ext4_fsblk_t pblk, + int len, int replay); /* mballoc.c */ extern const struct seq_operations ext4_mb_seq_groups_ops; diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index 0fd0c42a4f7db23689f98bbc21bef44ca2d1896a..6ff7b4020df8ad18899f88495fb112ae90db8428 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c @@ -162,6 +162,8 @@ int __ext4_journal_ensure_credits(handle_t *handle, int check_cred, { if (!ext4_handle_valid(handle)) return 0; + if (is_handle_aborted(handle)) + return -EROFS; if (jbd2_handle_buffer_credits(handle) >= check_cred && handle->h_revoke_credits >= revoke_cred) return 0; diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index e00a35530a4e00d91326f063f34ad4321e10674d..0fda3051760d1fe6ae976ffc2b1cb5d62fae74ed 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -136,14 +136,24 @@ int ext4_datasem_ensure_credits(handle_t *handle, struct inode *inode, static int ext4_ext_get_access(handle_t *handle, struct inode *inode, struct ext4_ext_path *path) { + int err = 0; + if (path->p_bh) { /* path points to block */ BUFFER_TRACE(path->p_bh, "get_write_access"); - return ext4_journal_get_write_access(handle, path->p_bh); + err = ext4_journal_get_write_access(handle, path->p_bh); + /* + * The extent buffer's verified bit will be set again in + * __ext4_ext_dirty(). We could leave an inconsistent + * buffer if the extents updating procudure break off du + * to some error happens, force to check it again. + */ + if (!err) + clear_buffer_verified(path->p_bh); } /* path points to leaf/index in inode body */ /* we use in-core data, no need to protect them */ - return 0; + return err; } /* @@ -164,6 +174,9 @@ static int __ext4_ext_dirty(const char *where, unsigned int line, /* path points to block */ err = __ext4_handle_dirty_metadata(where, line, handle, inode, path->p_bh); + /* Extents updating done, re-set verified flag */ + if (!err) + set_buffer_verified(path->p_bh); } else { /* path points to leaf/index in inode body */ err = ext4_mark_inode_dirty(handle, inode); @@ -353,9 +366,13 @@ static int ext4_valid_extent_idx(struct inode *inode, static int ext4_valid_extent_entries(struct inode *inode, struct ext4_extent_header *eh, - ext4_fsblk_t *pblk, int depth) + ext4_lblk_t lblk, ext4_fsblk_t *pblk, + int depth) { unsigned short entries; + ext4_lblk_t lblock = 0; + ext4_lblk_t prev = 0; + if (eh->eh_entries == 0) return 1; @@ -364,31 +381,51 @@ static int ext4_valid_extent_entries(struct inode *inode, if (depth == 0) { /* leaf entries */ struct ext4_extent *ext = EXT_FIRST_EXTENT(eh); - ext4_lblk_t lblock = 0; - ext4_lblk_t prev = 0; - int len = 0; + + /* + * The logical block in the first entry should equal to + * the number in the index block. + */ + if (depth != ext_depth(inode) && + lblk != le32_to_cpu(ext->ee_block)) + return 0; while (entries) { if (!ext4_valid_extent(inode, ext)) return 0; /* Check for overlapping extents */ lblock = le32_to_cpu(ext->ee_block); - len = ext4_ext_get_actual_len(ext); if ((lblock <= prev) && prev) { *pblk = ext4_ext_pblock(ext); return 0; } + prev = lblock + ext4_ext_get_actual_len(ext) - 1; ext++; entries--; - prev = lblock + len - 1; } } else { struct ext4_extent_idx *ext_idx = EXT_FIRST_INDEX(eh); + + /* + * The logical block in the first entry should equal to + * the number in the parent index block. + */ + if (depth != ext_depth(inode) && + lblk != le32_to_cpu(ext_idx->ei_block)) + return 0; while (entries) { if (!ext4_valid_extent_idx(inode, ext_idx)) return 0; + + /* Check for overlapping index extents */ + lblock = le32_to_cpu(ext_idx->ei_block); + if ((lblock <= prev) && prev) { + *pblk = ext4_idx_pblock(ext_idx); + return 0; + } ext_idx++; entries--; + prev = lblock; } } return 1; @@ -396,7 +433,7 @@ static int ext4_valid_extent_entries(struct inode *inode, static int __ext4_ext_check(const char *function, unsigned int line, struct inode *inode, struct ext4_extent_header *eh, - int depth, ext4_fsblk_t pblk) + int depth, ext4_fsblk_t pblk, ext4_lblk_t lblk) { const char *error_msg; int max = 0, err = -EFSCORRUPTED; @@ -422,7 +459,7 @@ static int __ext4_ext_check(const char *function, unsigned int line, error_msg = "invalid eh_entries"; goto corrupted; } - if (!ext4_valid_extent_entries(inode, eh, &pblk, depth)) { + if (!ext4_valid_extent_entries(inode, eh, lblk, &pblk, depth)) { error_msg = "invalid extent entries"; goto corrupted; } @@ -452,7 +489,7 @@ corrupted: } #define ext4_ext_check(inode, eh, depth, pblk) \ - __ext4_ext_check(__func__, __LINE__, (inode), (eh), (depth), (pblk)) + __ext4_ext_check(__func__, __LINE__, (inode), (eh), (depth), (pblk), 0) int ext4_ext_check_inode(struct inode *inode) { @@ -485,16 +522,18 @@ static void ext4_cache_extents(struct inode *inode, static struct buffer_head * __read_extent_tree_block(const char *function, unsigned int line, - struct inode *inode, ext4_fsblk_t pblk, int depth, - int flags) + struct inode *inode, struct ext4_extent_idx *idx, + int depth, int flags) { struct buffer_head *bh; int err; gfp_t gfp_flags = __GFP_MOVABLE | GFP_NOFS; + ext4_fsblk_t pblk; if (flags & EXT4_EX_NOFAIL) gfp_flags |= __GFP_NOFAIL; + pblk = ext4_idx_pblock(idx); bh = sb_getblk_gfp(inode->i_sb, pblk, gfp_flags); if (unlikely(!bh)) return ERR_PTR(-ENOMEM); @@ -507,8 +546,8 @@ __read_extent_tree_block(const char *function, unsigned int line, } if (buffer_verified(bh) && !(flags & EXT4_EX_FORCE_CACHE)) return bh; - err = __ext4_ext_check(function, line, inode, - ext_block_hdr(bh), depth, pblk); + err = __ext4_ext_check(function, line, inode, ext_block_hdr(bh), + depth, pblk, le32_to_cpu(idx->ei_block)); if (err) goto errout; set_buffer_verified(bh); @@ -526,8 +565,8 @@ errout: } -#define read_extent_tree_block(inode, pblk, depth, flags) \ - __read_extent_tree_block(__func__, __LINE__, (inode), (pblk), \ +#define read_extent_tree_block(inode, idx, depth, flags) \ + __read_extent_tree_block(__func__, __LINE__, (inode), (idx), \ (depth), (flags)) /* @@ -577,8 +616,7 @@ int ext4_ext_precache(struct inode *inode) i--; continue; } - bh = read_extent_tree_block(inode, - ext4_idx_pblock(path[i].p_idx++), + bh = read_extent_tree_block(inode, path[i].p_idx++, depth - i - 1, EXT4_EX_FORCE_CACHE); if (IS_ERR(bh)) { @@ -883,8 +921,7 @@ ext4_find_extent(struct inode *inode, ext4_lblk_t block, path[ppos].p_depth = i; path[ppos].p_ext = NULL; - bh = read_extent_tree_block(inode, path[ppos].p_block, --i, - flags); + bh = read_extent_tree_block(inode, path[ppos].p_idx, --i, flags); if (IS_ERR(bh)) { ret = PTR_ERR(bh); goto err; @@ -1489,7 +1526,6 @@ static int ext4_ext_search_right(struct inode *inode, struct ext4_extent_header *eh; struct ext4_extent_idx *ix; struct ext4_extent *ex; - ext4_fsblk_t block; int depth; /* Note, NOT eh_depth; depth from top of tree */ int ee_len; @@ -1556,20 +1592,17 @@ got_index: * follow it and find the closest allocated * block to the right */ ix++; - block = ext4_idx_pblock(ix); while (++depth < path->p_depth) { /* subtract from p_depth to get proper eh_depth */ - bh = read_extent_tree_block(inode, block, - path->p_depth - depth, 0); + bh = read_extent_tree_block(inode, ix, path->p_depth - depth, 0); if (IS_ERR(bh)) return PTR_ERR(bh); eh = ext_block_hdr(bh); ix = EXT_FIRST_INDEX(eh); - block = ext4_idx_pblock(ix); put_bh(bh); } - bh = read_extent_tree_block(inode, block, path->p_depth - depth, 0); + bh = read_extent_tree_block(inode, ix, path->p_depth - depth, 0); if (IS_ERR(bh)) return PTR_ERR(bh); eh = ext_block_hdr(bh); @@ -2948,9 +2981,9 @@ again: ext_debug(inode, "move to level %d (block %llu)\n", i + 1, ext4_idx_pblock(path[i].p_idx)); memset(path + i + 1, 0, sizeof(*path)); - bh = read_extent_tree_block(inode, - ext4_idx_pblock(path[i].p_idx), depth - i - 1, - EXT4_EX_NOCACHE); + bh = read_extent_tree_block(inode, path[i].p_idx, + depth - i - 1, + EXT4_EX_NOCACHE); if (IS_ERR(bh)) { /* should we reset i_size? */ err = PTR_ERR(bh); @@ -4605,8 +4638,6 @@ static long ext4_zero_range(struct file *file, loff_t offset, ret = ext4_mark_inode_dirty(handle, inode); if (unlikely(ret)) goto out_handle; - ext4_fc_track_range(handle, inode, offset >> inode->i_sb->s_blocksize_bits, - (offset + len - 1) >> inode->i_sb->s_blocksize_bits); /* Zero out partial block at the edges of the range */ ret = ext4_zero_partial_blocks(handle, inode, offset, len); if (ret >= 0) @@ -4970,36 +5001,6 @@ int ext4_get_es_cache(struct inode *inode, struct fiemap_extent_info *fieinfo, return ext4_fill_es_cache_info(inode, start_blk, len_blks, fieinfo); } -/* - * ext4_access_path: - * Function to access the path buffer for marking it dirty. - * It also checks if there are sufficient credits left in the journal handle - * to update path. - */ -static int -ext4_access_path(handle_t *handle, struct inode *inode, - struct ext4_ext_path *path) -{ - int credits, err; - - if (!ext4_handle_valid(handle)) - return 0; - - /* - * Check if need to extend journal credits - * 3 for leaf, sb, and inode plus 2 (bmap and group - * descriptor) for each block group; assume two block - * groups - */ - credits = ext4_writepage_trans_blocks(inode); - err = ext4_datasem_ensure_credits(handle, inode, 7, credits, 0); - if (err < 0) - return err; - - err = ext4_ext_get_access(handle, inode, path); - return err; -} - /* * ext4_ext_shift_path_extents: * Shift the extents of a path structure lying between path[depth].p_ext @@ -5014,6 +5015,7 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift, int depth, err = 0; struct ext4_extent *ex_start, *ex_last; bool update = false; + int credits, restart_credits; depth = path->p_depth; while (depth >= 0) { @@ -5023,13 +5025,26 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift, return -EFSCORRUPTED; ex_last = EXT_LAST_EXTENT(path[depth].p_hdr); + /* leaf + sb + inode */ + credits = 3; + if (ex_start == EXT_FIRST_EXTENT(path[depth].p_hdr)) { + update = true; + /* extent tree + sb + inode */ + credits = depth + 2; + } - err = ext4_access_path(handle, inode, path + depth); - if (err) + restart_credits = ext4_writepage_trans_blocks(inode); + err = ext4_datasem_ensure_credits(handle, inode, credits, + restart_credits, 0); + if (err) { + if (err > 0) + err = -EAGAIN; goto out; + } - if (ex_start == EXT_FIRST_EXTENT(path[depth].p_hdr)) - update = true; + err = ext4_ext_get_access(handle, inode, path + depth); + if (err) + goto out; while (ex_start <= ex_last) { if (SHIFT == SHIFT_LEFT) { @@ -5060,7 +5075,7 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift, } /* Update index too */ - err = ext4_access_path(handle, inode, path + depth); + err = ext4_ext_get_access(handle, inode, path + depth); if (err) goto out; @@ -5099,6 +5114,7 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle, int ret = 0, depth; struct ext4_extent *extent; ext4_lblk_t stop, *iterator, ex_start, ex_end; + ext4_lblk_t tmp = EXT_MAX_BLOCKS; /* Let path point to the last extent */ path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, @@ -5152,11 +5168,15 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle, * till we reach stop. In case of right shift, iterator points to stop * and it is decreased till we reach start. */ +again: if (SHIFT == SHIFT_LEFT) iterator = &start; else iterator = &stop; + if (tmp != EXT_MAX_BLOCKS) + *iterator = tmp; + /* * Its safe to start updating extents. Start and stop are unsigned, so * in case of right shift if extent with 0 block is reached, iterator @@ -5185,6 +5205,7 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle, } } + tmp = *iterator; if (SHIFT == SHIFT_LEFT) { extent = EXT_LAST_EXTENT(path[depth].p_hdr); *iterator = le32_to_cpu(extent->ee_block) + @@ -5203,6 +5224,9 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle, } ret = ext4_ext_shift_path_extents(path, shift, inode, handle, SHIFT); + /* iterator can be NULL which means we should break */ + if (ret == -EAGAIN) + goto again; if (ret) break; } @@ -5907,7 +5931,7 @@ void ext4_ext_replay_shrink_inode(struct inode *inode, ext4_lblk_t end) } /* Check if *cur is a hole and if it is, skip it */ -static void skip_hole(struct inode *inode, ext4_lblk_t *cur) +static int skip_hole(struct inode *inode, ext4_lblk_t *cur) { int ret; struct ext4_map_blocks map; @@ -5916,9 +5940,12 @@ static void skip_hole(struct inode *inode, ext4_lblk_t *cur) map.m_len = ((inode->i_size) >> inode->i_sb->s_blocksize_bits) - *cur; ret = ext4_map_blocks(NULL, inode, &map, 0); + if (ret < 0) + return ret; if (ret != 0) - return; + return 0; *cur = *cur + map.m_len; + return 0; } /* Count number of blocks used by this inode and update i_blocks */ @@ -5967,7 +5994,9 @@ int ext4_ext_replay_set_iblocks(struct inode *inode) * iblocks by total number of differences found. */ cur = 0; - skip_hole(inode, &cur); + ret = skip_hole(inode, &cur); + if (ret < 0) + goto out; path = ext4_find_extent(inode, cur, NULL, 0); if (IS_ERR(path)) goto out; @@ -5986,8 +6015,12 @@ int ext4_ext_replay_set_iblocks(struct inode *inode) } cur = max(cur + 1, le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex)); - skip_hole(inode, &cur); - + ret = skip_hole(inode, &cur); + if (ret < 0) { + ext4_ext_drop_refs(path); + kfree(path); + break; + } path2 = ext4_find_extent(inode, cur, NULL, 0); if (IS_ERR(path2)) { ext4_ext_drop_refs(path); @@ -6055,11 +6088,15 @@ int ext4_ext_clear_bb(struct inode *inode) ext4_mb_mark_bb(inode->i_sb, path[j].p_block, 1, 0); + ext4_fc_record_regions(inode->i_sb, inode->i_ino, + 0, path[j].p_block, 1, 1); } ext4_ext_drop_refs(path); kfree(path); } ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, 0); + ext4_fc_record_regions(inode->i_sb, inode->i_ino, + map.m_lblk, map.m_pblk, map.m_len, 1); } cur = cur + map.m_len; } diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index 53647fa0387731bf1fbc25d59237cb77d14fca2d..501e60713010e57a0f84ad3b4335b9a43d23d497 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -832,6 +832,12 @@ static int ext4_fc_write_inode_data(struct inode *inode, u32 *crc) sizeof(lrange), (u8 *)&lrange, crc)) return -ENOSPC; } else { + unsigned int max = (map.m_flags & EXT4_MAP_UNWRITTEN) ? + EXT_UNWRITTEN_MAX_LEN : EXT_INIT_MAX_LEN; + + /* Limit the number of blocks in one extent */ + map.m_len = min(max, map.m_len); + fc_ext.fc_ino = cpu_to_le32(inode->i_ino); ex = (struct ext4_extent *)&fc_ext.fc_ex; ex->ee_block = cpu_to_le32(map.m_lblk); @@ -1382,14 +1388,15 @@ static int ext4_fc_record_modified_inode(struct super_block *sb, int ino) if (state->fc_modified_inodes[i] == ino) return 0; if (state->fc_modified_inodes_used == state->fc_modified_inodes_size) { - state->fc_modified_inodes_size += - EXT4_FC_REPLAY_REALLOC_INCREMENT; state->fc_modified_inodes = krealloc( - state->fc_modified_inodes, sizeof(int) * - state->fc_modified_inodes_size, - GFP_KERNEL); + state->fc_modified_inodes, + sizeof(int) * (state->fc_modified_inodes_size + + EXT4_FC_REPLAY_REALLOC_INCREMENT), + GFP_KERNEL); if (!state->fc_modified_inodes) return -ENOMEM; + state->fc_modified_inodes_size += + EXT4_FC_REPLAY_REALLOC_INCREMENT; } state->fc_modified_inodes[state->fc_modified_inodes_used++] = ino; return 0; @@ -1421,7 +1428,9 @@ static int ext4_fc_replay_inode(struct super_block *sb, struct ext4_fc_tl *tl, } inode = NULL; - ext4_fc_record_modified_inode(sb, ino); + ret = ext4_fc_record_modified_inode(sb, ino); + if (ret) + goto out; raw_fc_inode = (struct ext4_inode *) (val + offsetof(struct ext4_fc_inode, fc_raw_inode)); @@ -1552,16 +1561,23 @@ out: } /* - * Record physical disk regions which are in use as per fast commit area. Our - * simple replay phase allocator excludes these regions from allocation. + * Record physical disk regions which are in use as per fast commit area, + * and used by inodes during replay phase. Our simple replay phase + * allocator excludes these regions from allocation. */ -static int ext4_fc_record_regions(struct super_block *sb, int ino, - ext4_lblk_t lblk, ext4_fsblk_t pblk, int len) +int ext4_fc_record_regions(struct super_block *sb, int ino, + ext4_lblk_t lblk, ext4_fsblk_t pblk, int len, int replay) { struct ext4_fc_replay_state *state; struct ext4_fc_alloc_region *region; state = &EXT4_SB(sb)->s_fc_replay_state; + /* + * during replay phase, the fc_regions_valid may not same as + * fc_regions_used, update it when do new additions. + */ + if (replay && state->fc_regions_used != state->fc_regions_valid) + state->fc_regions_used = state->fc_regions_valid; if (state->fc_regions_used == state->fc_regions_size) { state->fc_regions_size += EXT4_FC_REPLAY_REALLOC_INCREMENT; @@ -1579,6 +1595,9 @@ static int ext4_fc_record_regions(struct super_block *sb, int ino, region->pblk = pblk; region->len = len; + if (replay) + state->fc_regions_valid++; + return 0; } @@ -1610,6 +1629,8 @@ static int ext4_fc_replay_add_range(struct super_block *sb, } ret = ext4_fc_record_modified_inode(sb, inode->i_ino); + if (ret) + goto out; start = le32_to_cpu(ex->ee_block); start_pblk = ext4_ext_pblock(ex); @@ -1627,18 +1648,14 @@ static int ext4_fc_replay_add_range(struct super_block *sb, map.m_pblk = 0; ret = ext4_map_blocks(NULL, inode, &map, 0); - if (ret < 0) { - iput(inode); - return 0; - } + if (ret < 0) + goto out; if (ret == 0) { /* Range is not mapped */ path = ext4_find_extent(inode, cur, NULL, 0); - if (IS_ERR(path)) { - iput(inode); - return 0; - } + if (IS_ERR(path)) + goto out; memset(&newex, 0, sizeof(newex)); newex.ee_block = cpu_to_le32(cur); ext4_ext_store_pblock( @@ -1652,10 +1669,8 @@ static int ext4_fc_replay_add_range(struct super_block *sb, up_write((&EXT4_I(inode)->i_data_sem)); ext4_ext_drop_refs(path); kfree(path); - if (ret) { - iput(inode); - return 0; - } + if (ret) + goto out; goto next; } @@ -1668,10 +1683,8 @@ static int ext4_fc_replay_add_range(struct super_block *sb, ret = ext4_ext_replay_update_ex(inode, cur, map.m_len, ext4_ext_is_unwritten(ex), start_pblk + cur - start); - if (ret) { - iput(inode); - return 0; - } + if (ret) + goto out; /* * Mark the old blocks as free since they aren't used * anymore. We maintain an array of all the modified @@ -1691,10 +1704,8 @@ static int ext4_fc_replay_add_range(struct super_block *sb, ext4_ext_is_unwritten(ex), map.m_pblk); ret = ext4_ext_replay_update_ex(inode, cur, map.m_len, ext4_ext_is_unwritten(ex), map.m_pblk); - if (ret) { - iput(inode); - return 0; - } + if (ret) + goto out; /* * We may have split the extent tree while toggling the state. * Try to shrink the extent tree now. @@ -1706,6 +1717,7 @@ next: } ext4_ext_replay_shrink_inode(inode, i_size_read(inode) >> sb->s_blocksize_bits); +out: iput(inode); return 0; } @@ -1735,6 +1747,8 @@ ext4_fc_replay_del_range(struct super_block *sb, struct ext4_fc_tl *tl, } ret = ext4_fc_record_modified_inode(sb, inode->i_ino); + if (ret) + goto out; jbd_debug(1, "DEL_RANGE, inode %ld, lblk %d, len %d\n", inode->i_ino, le32_to_cpu(lrange.fc_lblk), @@ -1744,10 +1758,8 @@ ext4_fc_replay_del_range(struct super_block *sb, struct ext4_fc_tl *tl, map.m_len = remaining; ret = ext4_map_blocks(NULL, inode, &map, 0); - if (ret < 0) { - iput(inode); - return 0; - } + if (ret < 0) + goto out; if (ret > 0) { remaining -= ret; cur += ret; @@ -1758,16 +1770,18 @@ ext4_fc_replay_del_range(struct super_block *sb, struct ext4_fc_tl *tl, } } - ret = ext4_punch_hole(inode, - le32_to_cpu(lrange.fc_lblk) << sb->s_blocksize_bits, - le32_to_cpu(lrange.fc_len) << sb->s_blocksize_bits); + down_write(&EXT4_I(inode)->i_data_sem); + ret = ext4_ext_remove_space(inode, le32_to_cpu(lrange.fc_lblk), + le32_to_cpu(lrange.fc_lblk) + + le32_to_cpu(lrange.fc_len) - 1); + up_write(&EXT4_I(inode)->i_data_sem); if (ret) - jbd_debug(1, "ext4_punch_hole returned %d", ret); + goto out; ext4_ext_replay_shrink_inode(inode, i_size_read(inode) >> sb->s_blocksize_bits); ext4_mark_inode_dirty(NULL, inode); +out: iput(inode); - return 0; } @@ -1945,7 +1959,7 @@ static int ext4_fc_replay_scan(journal_t *journal, ret = ext4_fc_record_regions(sb, le32_to_cpu(ext.fc_ino), le32_to_cpu(ex->ee_block), ext4_ext_pblock(ex), - ext4_ext_get_actual_len(ex)); + ext4_ext_get_actual_len(ex), 0); if (ret < 0) break; ret = JBD2_FC_REPLAY_CONTINUE; @@ -2160,3 +2174,8 @@ int __init ext4_fc_init_dentry_cache(void) return 0; } + +void ext4_fc_destroy_dentry_cache(void) +{ + kmem_cache_destroy(ext4_fc_dentry_cachep); +} diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index a5c60d9c3b82eb81d360cf8ae4fa593515923631..afc5f656c3e76fab26cf1d0defa50b185a7a30de 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -747,18 +747,13 @@ int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len, void *kaddr; struct ext4_iloc iloc; - if (unlikely(copied < len)) { - if (!PageUptodate(page)) { - copied = 0; - goto out; - } - } + if (unlikely(copied < len) && !PageUptodate(page)) + return 0; ret = ext4_get_inode_loc(inode, &iloc); if (ret) { ext4_std_error(inode->i_sb, ret); - copied = 0; - goto out; + return ret; } ext4_write_lock_xattr(inode, &no_expand); @@ -771,7 +766,7 @@ int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len, (void) ext4_find_inline_data_nolock(inode); kaddr = kmap_atomic(page); - ext4_write_inline_data(inode, &iloc, kaddr, pos, len); + ext4_write_inline_data(inode, &iloc, kaddr, pos, copied); kunmap_atomic(kaddr); SetPageUptodate(page); /* clear page dirty so that writepages wouldn't work for us. */ @@ -780,7 +775,7 @@ int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len, ext4_write_unlock_xattr(inode, &no_expand); brelse(iloc.bh); mark_inode_dirty(inode); -out: + return copied; } @@ -1139,7 +1134,15 @@ static void ext4_restore_inline_data(handle_t *handle, struct inode *inode, struct ext4_iloc *iloc, void *buf, int inline_size) { - ext4_create_inline_data(handle, inode, inline_size); + int ret; + + ret = ext4_create_inline_data(handle, inode, inline_size); + if (ret) { + ext4_msg(inode->i_sb, KERN_EMERG, + "error restoring inline_data for inode -- potential data loss! (inode %lu, error %d)", + inode->i_ino, ret); + return; + } ext4_write_inline_data(inode, iloc, buf, 0, inline_size); ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); } diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index a5f5a00e95ff282f02d23dd6090a3bf7378a52c9..04c51065c1c99d99f763681ec61c356e7206d638 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -742,10 +742,11 @@ out_sem: if (ret) return ret; } - ext4_fc_track_range(handle, inode, map->m_lblk, - map->m_lblk + map->m_len - 1); } - + if (retval > 0 && (map->m_flags & EXT4_MAP_UNWRITTEN || + map->m_flags & EXT4_MAP_MAPPED)) + ext4_fc_track_range(handle, inode, map->m_lblk, + map->m_lblk + map->m_len - 1); if (retval < 0) ext_debug(inode, "failed with err %d\n", retval); return retval; @@ -1308,6 +1309,7 @@ static int ext4_write_end(struct file *file, goto errout; } copied = ret; + ret = 0; } else copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); @@ -1334,13 +1336,14 @@ static int ext4_write_end(struct file *file, if (i_size_changed || inline_data) ret = ext4_mark_inode_dirty(handle, inode); +errout: if (pos + len > inode->i_size && !verity && ext4_can_truncate(inode)) /* if we have allocated more blocks and copied * less. We will have blocks allocated outside * inode->i_size. So truncate them */ ext4_orphan_add(handle, inode); -errout: + ret2 = ext4_journal_stop(handle); if (!ret) ret = ret2; @@ -1424,6 +1427,7 @@ static int ext4_journalled_write_end(struct file *file, goto errout; } copied = ret; + ret = 0; } else if (unlikely(copied < len) && !PageUptodate(page)) { copied = 0; ext4_journalled_zero_new_buffers(handle, page, from, to); @@ -1453,6 +1457,7 @@ static int ext4_journalled_write_end(struct file *file, ret = ret2; } +errout: if (pos + len > inode->i_size && !verity && ext4_can_truncate(inode)) /* if we have allocated more blocks and copied * less. We will have blocks allocated outside @@ -1460,7 +1465,6 @@ static int ext4_journalled_write_end(struct file *file, */ ext4_orphan_add(handle, inode); -errout: ret2 = ext4_journal_stop(handle); if (!ret) ret = ret2; @@ -1654,6 +1658,7 @@ static int ext4_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk) struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); int ret; bool allocated = false; + bool reserved = false; /* * If the cluster containing lblk is shared with a delayed, @@ -1670,6 +1675,7 @@ static int ext4_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk) ret = ext4_da_reserve_space(inode); if (ret != 0) /* ENOSPC */ goto errout; + reserved = true; } else { /* bigalloc */ if (!ext4_es_scan_clu(inode, &ext4_es_is_delonly, lblk)) { if (!ext4_es_scan_clu(inode, @@ -1682,6 +1688,7 @@ static int ext4_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk) ret = ext4_da_reserve_space(inode); if (ret != 0) /* ENOSPC */ goto errout; + reserved = true; } else { allocated = true; } @@ -1692,6 +1699,8 @@ static int ext4_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk) } ret = ext4_es_insert_delayed_block(inode, lblk, allocated); + if (ret && reserved) + ext4_da_release_space(inode, 1); errout: return ret; @@ -2027,6 +2036,15 @@ static int ext4_writepage(struct page *page, else len = PAGE_SIZE; + /* Should never happen but for bugs in other kernel subsystems */ + if (!page_has_buffers(page)) { + ext4_warning_inode(inode, + "page %lu does not have buffers attached", page->index); + ClearPageDirty(page); + unlock_page(page); + return 0; + } + page_bufs = page_buffers(page); /* * We cannot do block allocation or other extent handling in this @@ -2630,6 +2648,22 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd) wait_on_page_writeback(page); BUG_ON(PageWriteback(page)); + /* + * Should never happen but for buggy code in + * other subsystems that call + * set_page_dirty() without properly warning + * the file system first. See [1] for more + * information. + * + * [1] https://lore.kernel.org/linux-mm/20180103100430.GE4911@quack2.suse.cz + */ + if (!page_has_buffers(page)) { + ext4_warning_inode(mpd->inode, "page %lu does not have buffers attached", page->index); + ClearPageDirty(page); + unlock_page(page); + continue; + } + if (mpd->map.m_len == 0) mpd->first_page = page->index; mpd->next_page = page->index + 1; @@ -3109,35 +3143,37 @@ static int ext4_da_write_end(struct file *file, end = start + copied - 1; /* - * generic_write_end() will run mark_inode_dirty() if i_size - * changes. So let's piggyback the i_disksize mark_inode_dirty - * into that. + * Since we are holding inode lock, we are sure i_disksize <= + * i_size. We also know that if i_disksize < i_size, there are + * delalloc writes pending in the range upto i_size. If the end of + * the current write is <= i_size, there's no need to touch + * i_disksize since writeback will push i_disksize upto i_size + * eventually. If the end of the current write is > i_size and + * inside an allocated block (ext4_da_should_update_i_disksize() + * check), we need to update i_disksize here as neither + * ext4_writepage() nor certain ext4_writepages() paths not + * allocating blocks update i_disksize. + * + * Note that we defer inode dirtying to generic_write_end() / + * ext4_da_write_inline_data_end(). */ new_i_size = pos + copied; - if (copied && new_i_size > EXT4_I(inode)->i_disksize) { + if (copied && new_i_size > inode->i_size) { if (ext4_has_inline_data(inode) || - ext4_da_should_update_i_disksize(page, end)) { + ext4_da_should_update_i_disksize(page, end)) ext4_update_i_disksize(inode, new_i_size); - /* We need to mark inode dirty even if - * new_i_size is less that inode->i_size - * bu greater than i_disksize.(hint delalloc) - */ - ret = ext4_mark_inode_dirty(handle, inode); - } } if (write_mode != CONVERT_INLINE_DATA && ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA) && ext4_has_inline_data(inode)) - ret2 = ext4_da_write_inline_data_end(inode, pos, len, copied, + ret = ext4_da_write_inline_data_end(inode, pos, len, copied, page); else - ret2 = generic_write_end(file, mapping, pos, len, copied, + ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata); - copied = ret2; - if (ret2 < 0) - ret = ret2; + copied = ret; ret2 = ext4_journal_stop(handle); if (unlikely(ret2 && !ret)) ret = ret2; @@ -3506,10 +3542,9 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length, if (ret < 0) return ret; out: - /* * When inline encryption is enabled, sometimes I/O to an encrypted file - * has to be broken up to guarantee DUN contiguity. Handle this by + * has to be broken up to guarantee DUN contiguity. Handle this by * limiting the length of the mapping returned. */ map.m_len = fscrypt_limit_io_blocks(inode, map.m_lblk, map.m_len); @@ -4467,7 +4502,7 @@ has_buffer: static int __ext4_get_inode_loc_noinmem(struct inode *inode, struct ext4_iloc *iloc) { - ext4_fsblk_t err_blk; + ext4_fsblk_t err_blk = 0; int ret; ret = __ext4_get_inode_loc(inode->i_sb, inode->i_ino, iloc, 0, @@ -4482,7 +4517,7 @@ static int __ext4_get_inode_loc_noinmem(struct inode *inode, int ext4_get_inode_loc(struct inode *inode, struct ext4_iloc *iloc) { - ext4_fsblk_t err_blk; + ext4_fsblk_t err_blk = 0; int ret; /* We have all inode data except xattrs in memory here. */ @@ -5489,8 +5524,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) ext4_fc_track_range(handle, inode, (attr->ia_size > 0 ? attr->ia_size - 1 : 0) >> inode->i_sb->s_blocksize_bits, - (oldsize > 0 ? oldsize - 1 : 0) >> - inode->i_sb->s_blocksize_bits); + EXT_MAX_BLOCKS - 1); else ext4_fc_track_range( handle, inode, diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 8d54a8aabab3137450cc519eee99a730a205abeb..a740d7c3716c77e57555c82b98867c0902ec7e24 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -1123,8 +1123,6 @@ resizefs_out: sizeof(range))) return -EFAULT; - range.minlen = max((unsigned int)range.minlen, - q->limits.discard_granularity); ret = ext4_trim_fs(sb, &range); if (ret < 0) return ret; diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index d7cb7d719ee5845924146bd92c5837b4042de266..110c25824a67fd1ead6c89dfdec1e614a2a07671 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -4234,7 +4234,7 @@ ext4_mb_release_group_pa(struct ext4_buddy *e4b, */ static noinline_for_stack int ext4_mb_discard_group_preallocations(struct super_block *sb, - ext4_group_t group, int needed) + ext4_group_t group, int *busy) { struct ext4_group_info *grp = ext4_get_group_info(sb, group); struct buffer_head *bitmap_bh = NULL; @@ -4242,8 +4242,7 @@ ext4_mb_discard_group_preallocations(struct super_block *sb, struct list_head list; struct ext4_buddy e4b; int err; - int busy = 0; - int free, free_total = 0; + int free = 0; mb_debug(sb, "discard preallocation for group %u\n", group); if (list_empty(&grp->bb_prealloc_list)) @@ -4266,19 +4265,14 @@ ext4_mb_discard_group_preallocations(struct super_block *sb, goto out_dbg; } - if (needed == 0) - needed = EXT4_CLUSTERS_PER_GROUP(sb) + 1; - INIT_LIST_HEAD(&list); -repeat: - free = 0; ext4_lock_group(sb, group); list_for_each_entry_safe(pa, tmp, &grp->bb_prealloc_list, pa_group_list) { spin_lock(&pa->pa_lock); if (atomic_read(&pa->pa_count)) { spin_unlock(&pa->pa_lock); - busy = 1; + *busy = 1; continue; } if (pa->pa_deleted) { @@ -4318,22 +4312,13 @@ repeat: call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); } - free_total += free; - - /* if we still need more blocks and some PAs were used, try again */ - if (free_total < needed && busy) { - ext4_unlock_group(sb, group); - cond_resched(); - busy = 0; - goto repeat; - } ext4_unlock_group(sb, group); ext4_mb_unload_buddy(&e4b); put_bh(bitmap_bh); out_dbg: mb_debug(sb, "discarded (%d) blocks preallocated for group %u bb_free (%d)\n", - free_total, group, grp->bb_free); - return free_total; + free, group, grp->bb_free); + return free; } /* @@ -4875,13 +4860,24 @@ static int ext4_mb_discard_preallocations(struct super_block *sb, int needed) { ext4_group_t i, ngroups = ext4_get_groups_count(sb); int ret; - int freed = 0; + int freed = 0, busy = 0; + int retry = 0; trace_ext4_mb_discard_preallocations(sb, needed); + + if (needed == 0) + needed = EXT4_CLUSTERS_PER_GROUP(sb) + 1; + repeat: for (i = 0; i < ngroups && needed > 0; i++) { - ret = ext4_mb_discard_group_preallocations(sb, i, needed); + ret = ext4_mb_discard_group_preallocations(sb, i, &busy); freed += ret; needed -= ret; + cond_resched(); + } + + if (needed > 0 && busy && ++retry < 3) { + busy = 0; + goto repeat; } return freed; @@ -5177,7 +5173,8 @@ static ext4_fsblk_t ext4_mb_new_blocks_simple(handle_t *handle, struct super_block *sb = ar->inode->i_sb; ext4_group_t group; ext4_grpblk_t blkoff; - int i = sb->s_blocksize; + ext4_grpblk_t max = EXT4_CLUSTERS_PER_GROUP(sb); + ext4_grpblk_t i = 0; ext4_fsblk_t goal, block; struct ext4_super_block *es = EXT4_SB(sb)->s_es; @@ -5199,19 +5196,26 @@ static ext4_fsblk_t ext4_mb_new_blocks_simple(handle_t *handle, ext4_get_group_no_and_offset(sb, max(ext4_group_first_block_no(sb, group), goal), NULL, &blkoff); - i = mb_find_next_zero_bit(bitmap_bh->b_data, sb->s_blocksize, + while (1) { + i = mb_find_next_zero_bit(bitmap_bh->b_data, max, blkoff); + if (i >= max) + break; + if (ext4_fc_replay_check_excluded(sb, + ext4_group_first_block_no(sb, group) + i)) { + blkoff = i + 1; + } else + break; + } brelse(bitmap_bh); - if (i >= sb->s_blocksize) - continue; - if (ext4_fc_replay_check_excluded(sb, - ext4_group_first_block_no(sb, group) + i)) - continue; - break; + if (i < max) + break; } - if (group >= ext4_get_groups_count(sb) && i >= sb->s_blocksize) + if (group >= ext4_get_groups_count(sb) || i >= max) { + *errp = -ENOSPC; return 0; + } block = ext4_group_first_block_no(sb, group) + i; ext4_mb_mark_bb(sb, block, 1, 1); @@ -5815,6 +5819,7 @@ out: */ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) { + struct request_queue *q = bdev_get_queue(sb->s_bdev); struct ext4_group_info *grp; ext4_group_t group, first_group, last_group; ext4_grpblk_t cnt = 0, first_cluster, last_cluster; @@ -5833,6 +5838,13 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) start >= max_blks || range->len < sb->s_blocksize) return -EINVAL; + /* No point to try to trim less than discard granularity */ + if (range->minlen < q->limits.discard_granularity) { + minlen = EXT4_NUM_B2C(EXT4_SB(sb), + q->limits.discard_granularity >> sb->s_blocksize_bits); + if (minlen > EXT4_CLUSTERS_PER_GROUP(sb)) + goto out; + } if (end >= max_blks) end = max_blks - 1; if (end <= first_data_blk) diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index c5e3fc998211acfe9449e8be42ef2c6cd96542d7..49912814f3d8dda93f50b98f664f586a81c8d072 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -437,12 +437,12 @@ int ext4_ext_migrate(struct inode *inode) percpu_down_write(&sbi->s_writepages_rwsem); /* - * Worst case we can touch the allocation bitmaps, a bgd - * block, and a block to link in the orphan list. We do need - * need to worry about credits for modifying the quota inode. + * Worst case we can touch the allocation bitmaps and a block + * group descriptor block. We do need need to worry about + * credits for modifying the quota inode. */ handle = ext4_journal_start(inode, EXT4_HT_MIGRATE, - 4 + EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb)); + 3 + EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb)); if (IS_ERR(handle)) { retval = PTR_ERR(handle); @@ -459,6 +459,13 @@ int ext4_ext_migrate(struct inode *inode) ext4_journal_stop(handle); goto out_unlock; } + /* + * Use the correct seed for checksum (i.e. the seed from 'inode'). This + * is so that the metadata blocks will have the correct checksum after + * the migration. + */ + ei = EXT4_I(inode); + EXT4_I(tmp_inode)->i_csum_seed = ei->i_csum_seed; i_size_write(tmp_inode, i_size_read(inode)); /* * Set the i_nlink to zero so it will be deleted later @@ -467,7 +474,6 @@ int ext4_ext_migrate(struct inode *inode) clear_nlink(tmp_inode); ext4_ext_tree_init(handle, tmp_inode); - ext4_orphan_add(handle, tmp_inode); ext4_journal_stop(handle); /* @@ -492,17 +498,10 @@ int ext4_ext_migrate(struct inode *inode) handle = ext4_journal_start(inode, EXT4_HT_MIGRATE, 1); if (IS_ERR(handle)) { - /* - * It is impossible to update on-disk structures without - * a handle, so just rollback in-core changes and live other - * work to orphan_list_cleanup() - */ - ext4_orphan_del(NULL, tmp_inode); retval = PTR_ERR(handle); goto out_tmp_inode; } - ei = EXT4_I(inode); i_data = ei->i_data; memset(&lb, 0, sizeof(lb)); diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c index 6cb598b549ca169af7bc533c87298ff369b0d539..bc364c119af6ac00bcfedcec131af0178ecfa428 100644 --- a/fs/ext4/mmp.c +++ b/fs/ext4/mmp.c @@ -156,7 +156,12 @@ static int kmmpd(void *data) memcpy(mmp->mmp_nodename, init_utsname()->nodename, sizeof(mmp->mmp_nodename)); - while (!kthread_should_stop()) { + while (!kthread_should_stop() && !sb_rdonly(sb)) { + if (!ext4_has_feature_mmp(sb)) { + ext4_warning(sb, "kmmpd being stopped since MMP feature" + " has been disabled."); + goto wait_to_exit; + } if (++seq > EXT4_MMP_SEQ_MAX) seq = 1; @@ -177,16 +182,6 @@ static int kmmpd(void *data) failed_writes++; } - if (!(le32_to_cpu(es->s_feature_incompat) & - EXT4_FEATURE_INCOMPAT_MMP)) { - ext4_warning(sb, "kmmpd being stopped since MMP feature" - " has been disabled."); - goto exit_thread; - } - - if (sb_rdonly(sb)) - break; - diff = jiffies - last_update_time; if (diff < mmp_update_interval * HZ) schedule_timeout_interruptible(mmp_update_interval * @@ -207,7 +202,7 @@ static int kmmpd(void *data) ext4_error_err(sb, -retval, "error reading MMP data: %d", retval); - goto exit_thread; + goto wait_to_exit; } mmp_check = (struct mmp_struct *)(bh_check->b_data); @@ -221,7 +216,7 @@ static int kmmpd(void *data) ext4_error_err(sb, EBUSY, "abort"); put_bh(bh_check); retval = -EBUSY; - goto exit_thread; + goto wait_to_exit; } put_bh(bh_check); } @@ -244,7 +239,13 @@ static int kmmpd(void *data) retval = write_mmp_block(sb, bh); -exit_thread: +wait_to_exit: + while (!kthread_should_stop()) { + set_current_state(TASK_INTERRUPTIBLE); + if (!kthread_should_stop()) + schedule(); + } + set_current_state(TASK_RUNNING); return retval; } @@ -391,5 +392,3 @@ failed: brelse(bh); return 1; } - - diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c index 0deb2565178e8c9ebf29f9968dd645fa86505298..fdabff565d62fd7a482706e2e9ce822294415bcd 100644 --- a/fs/ext4/readpage.c +++ b/fs/ext4/readpage.c @@ -410,8 +410,7 @@ int ext4_mpage_readpages(struct inode *inode, * bio_alloc will _always_ be able to allocate a bio if * __GFP_DIRECT_RECLAIM is set, see bio_alloc_bioset(). */ - bio = bio_alloc(GFP_KERNEL, - min_t(int, nr_pages, BIO_MAX_PAGES)); + bio = bio_alloc(GFP_KERNEL, bio_max_segs(nr_pages)); fscrypt_set_bio_crypt_ctx(bio, inode, next_block, GFP_KERNEL); ext4_set_bio_post_read_ctx(bio, inode, page->index); diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 928700d57eb67e5bc01340af2dd8826a6e9c6718..6513079c728be35448812ca120127f390f62a2fc 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -74,6 +74,11 @@ int ext4_resize_begin(struct super_block *sb) return -EPERM; } + if (ext4_has_feature_sparse_super2(sb)) { + ext4_msg(sb, KERN_ERR, "Online resizing not supported with sparse_super2"); + return -EOPNOTSUPP; + } + if (test_and_set_bit_lock(EXT4_FLAGS_RESIZING, &EXT4_SB(sb)->s_ext4_flags)) ret = -EBUSY; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index f486648c6292f2c6dc06061291f86d300acfca39..bccf4f403bf8b4f8d764f8da2a99993e704e6a4f 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1356,6 +1356,12 @@ static void ext4_destroy_inode(struct inode *inode) true); dump_stack(); } + + if (EXT4_I(inode)->i_reserved_data_blocks) + ext4_msg(inode->i_sb, KERN_ERR, + "Inode %lu (%p): i_reserved_data_blocks (%u) not cleared!", + inode->i_ino, EXT4_I(inode), + EXT4_I(inode)->i_reserved_data_blocks); } static void init_once(void *foo) @@ -1593,7 +1599,6 @@ static const struct fscrypt_operations ext4_cryptops = { .set_context = ext4_set_context, .get_dummy_policy = ext4_get_dummy_policy, .empty_dir = ext4_empty_dir, - .max_namelen = EXT4_NAME_LEN, .has_stable_inodes = ext4_has_stable_inodes, .get_ino_and_lblk_bits = ext4_get_ino_and_lblk_bits, }; @@ -3194,17 +3199,17 @@ static loff_t ext4_max_size(int blkbits, int has_huge_files) */ static loff_t ext4_max_bitmap_size(int bits, int has_huge_files) { - loff_t res = EXT4_NDIR_BLOCKS; + unsigned long long upper_limit, res = EXT4_NDIR_BLOCKS; int meta_blocks; - loff_t upper_limit; - /* This is calculated to be the largest file size for a dense, block + + /* + * This is calculated to be the largest file size for a dense, block * mapped file such that the file's total number of 512-byte sectors, * including data and all indirect blocks, does not exceed (2^48 - 1). * * __u32 i_blocks_lo and _u16 i_blocks_high represent the total * number of 512-byte sectors of the file. */ - if (!has_huge_files) { /* * !has_huge_files or implies that the inode i_block field @@ -3247,7 +3252,7 @@ static loff_t ext4_max_bitmap_size(int bits, int has_huge_files) if (res > MAX_LFS_FILESIZE) res = MAX_LFS_FILESIZE; - return res; + return (loff_t)res; } static ext4_fsblk_t descriptor_loc(struct super_block *sb, @@ -3430,9 +3435,9 @@ static int ext4_run_li_request(struct ext4_li_request *elr) struct super_block *sb = elr->lr_super; ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count; ext4_group_t group = elr->lr_next_group; - unsigned long timeout = 0; unsigned int prefetch_ios = 0; int ret = 0; + u64 start_time; if (elr->lr_mode == EXT4_LI_MODE_PREFETCH_BBITMAP) { elr->lr_next_group = ext4_mb_prefetch(sb, group, @@ -3469,14 +3474,13 @@ static int ext4_run_li_request(struct ext4_li_request *elr) ret = 1; if (!ret) { - timeout = jiffies; + start_time = ktime_get_real_ns(); ret = ext4_init_inode_table(sb, group, elr->lr_timeout ? 0 : 1); trace_ext4_lazy_itable_init(sb, group); if (elr->lr_timeout == 0) { - timeout = (jiffies - timeout) * - EXT4_SB(elr->lr_super)->s_li_wait_mult; - elr->lr_timeout = timeout; + elr->lr_timeout = nsecs_to_jiffies((ktime_get_real_ns() - start_time) * + EXT4_SB(elr->lr_super)->s_li_wait_mult); } elr->lr_next_sched = jiffies + elr->lr_timeout; elr->lr_next_group = group + 1; @@ -5915,7 +5919,6 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) */ ext4_mark_recovery_complete(sb, es); } - ext4_stop_mmpd(sbi); } else { /* Make sure we can mount this feature set readwrite */ if (ext4_has_feature_readonly(sb) || @@ -6029,6 +6032,9 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) if (!test_opt(sb, BLOCK_VALIDITY) && sbi->s_system_blks) ext4_release_system_zone(sb); + if (!ext4_has_feature_mmp(sb) || sb_rdonly(sb)) + ext4_stop_mmpd(sbi); + /* * Some options can be enabled by ext4 and/or by VFS mount flag * either way we need to make sure it matches in both *flags and @@ -6061,6 +6067,8 @@ restore_opts: for (i = 0; i < EXT4_MAXQUOTAS; i++) kfree(to_free[i]); #endif + if (!ext4_has_feature_mmp(sb) || sb_rdonly(sb)) + ext4_stop_mmpd(sbi); kfree(orig_data); return err; } @@ -6321,10 +6329,7 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id, lockdep_set_quota_inode(path->dentry->d_inode, I_DATA_SEM_QUOTA); err = dquot_quota_on(sb, type, format_id, path); - if (err) { - lockdep_set_quota_inode(path->dentry->d_inode, - I_DATA_SEM_NORMAL); - } else { + if (!err) { struct inode *inode = d_inode(path->dentry); handle_t *handle; @@ -6344,7 +6349,12 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id, ext4_journal_stop(handle); unlock_inode: inode_unlock(inode); + if (err) + dquot_quota_off(sb, type); } + if (err) + lockdep_set_quota_inode(path->dentry->d_inode, + I_DATA_SEM_NORMAL); return err; } @@ -6407,8 +6417,19 @@ static int ext4_enable_quotas(struct super_block *sb) "Failed to enable quota tracking " "(type=%d, err=%d). Please run " "e2fsck to fix.", type, err); - for (type--; type >= 0; type--) + for (type--; type >= 0; type--) { + struct inode *inode; + + inode = sb_dqopt(sb)->files[type]; + if (inode) + inode = igrab(inode); dquot_quota_off(sb, type); + if (inode) { + lockdep_set_quota_inode(inode, + I_DATA_SEM_NORMAL); + iput(inode); + } + } return err; } @@ -6512,7 +6533,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type, struct buffer_head *bh; handle_t *handle = journal_current_handle(); - if (EXT4_SB(sb)->s_journal && !handle) { + if (!handle) { ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)" " cancelled because transaction is not started", (unsigned long long)off, (unsigned long long)len); @@ -6696,6 +6717,7 @@ static int __init ext4_init_fs(void) out: unregister_as_ext2(); unregister_as_ext3(); + ext4_fc_destroy_dentry_cache(); out05: destroy_inodecache(); out1: @@ -6722,6 +6744,7 @@ static void __exit ext4_exit_fs(void) unregister_as_ext2(); unregister_as_ext3(); unregister_filesystem(&ext4_fs_type); + ext4_fc_destroy_dentry_cache(); destroy_inodecache(); ext4_exit_mballoc(); ext4_exit_sysfs(); @@ -6735,6 +6758,7 @@ static void __exit ext4_exit_fs(void) MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); MODULE_DESCRIPTION("Fourth Extended Filesystem"); MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(ANDROID_GKI_VFS_EXPORT_ONLY); MODULE_SOFTDEP("pre: crc32c"); module_init(ext4_init_fs) module_exit(ext4_exit_fs) diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig index 7eea3cfd894d1dfb0ec7d554f0094083f4aa01a7..03ef087537c7c48d251e936a6d4c029b9dcc0592 100644 --- a/fs/f2fs/Kconfig +++ b/fs/f2fs/Kconfig @@ -7,6 +7,7 @@ config F2FS_FS select CRYPTO_CRC32 select F2FS_FS_XATTR if FS_ENCRYPTION select FS_ENCRYPTION_ALGS if FS_ENCRYPTION + select FS_IOMAP select LZ4_COMPRESS if F2FS_FS_LZ4 select LZ4_DECOMPRESS if F2FS_FS_LZ4 select LZ4HC_COMPRESS if F2FS_FS_LZ4HC @@ -142,3 +143,10 @@ config F2FS_IOSTAT Support getting IO statistics through sysfs and printing out periodic IO statistics tracepoint events. You have to turn on "iostat_enable" sysfs node to enable this feature. + +config F2FS_UNFAIR_RWSEM + bool "F2FS unfair rw_semaphore" + depends on F2FS_FS && BLK_CGROUP + help + Use unfair rw_semaphore, if system configured IO priority by block + cgroup. diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 0b3b1b53b356b7b53e5d2e536cbfafbc679a12c1..bd2a0f5854aaa048831b46ee1e6a6fea198d9c92 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -98,6 +98,13 @@ repeat: } if (unlikely(!PageUptodate(page))) { + if (page->index == sbi->metapage_eio_ofs && + sbi->metapage_eio_cnt++ == MAX_RETRY_META_PAGE_EIO) { + set_ckpt_flags(sbi, CP_ERROR_FLAG); + } else { + sbi->metapage_eio_ofs = page->index; + sbi->metapage_eio_cnt = 0; + } f2fs_put_page(page, 1); return ERR_PTR(-EIO); } @@ -282,18 +289,22 @@ out: return blkno - start; } -void f2fs_ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index) +void f2fs_ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index, + unsigned int ra_blocks) { struct page *page; bool readahead = false; + if (ra_blocks == RECOVERY_MIN_RA_BLOCKS) + return; + page = find_get_page(META_MAPPING(sbi), index); if (!page || !PageUptodate(page)) readahead = true; f2fs_put_page(page, 0); if (readahead) - f2fs_ra_meta_pages(sbi, index, BIO_MAX_PAGES, META_POR, true); + f2fs_ra_meta_pages(sbi, index, ra_blocks, META_POR, true); } static int __f2fs_write_meta_page(struct page *page, @@ -351,13 +362,13 @@ static int f2fs_write_meta_pages(struct address_space *mapping, goto skip_write; /* if locked failed, cp will flush dirty pages instead */ - if (!down_write_trylock(&sbi->cp_global_sem)) + if (!f2fs_down_write_trylock(&sbi->cp_global_sem)) goto skip_write; trace_f2fs_writepages(mapping->host, wbc, META); diff = nr_pages_to_write(sbi, META, wbc); written = f2fs_sync_meta_pages(sbi, META, wbc->nr_to_write, FS_META_IO); - up_write(&sbi->cp_global_sem); + f2fs_up_write(&sbi->cp_global_sem); wbc->nr_to_write = max((long)0, wbc->nr_to_write - written - diff); return 0; @@ -653,7 +664,7 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) return PTR_ERR(inode); } - err = dquot_initialize(inode); + err = f2fs_dquot_initialize(inode); if (err) { iput(inode); goto err_out; @@ -664,7 +675,7 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) /* truncate all the data during iput */ iput(inode); - err = f2fs_get_node_info(sbi, ino, &ni); + err = f2fs_get_node_info(sbi, ino, &ni, false); if (err) goto err_out; @@ -705,9 +716,6 @@ int f2fs_recover_orphan_inodes(struct f2fs_sb_info *sbi) } #ifdef CONFIG_QUOTA - /* Needed for iput() to work correctly and not trash data */ - sbi->sb->s_flags |= SB_ACTIVE; - /* * Turn on quotas which were not enabled for read-only mounts if * filesystem has quota feature, so that they are updated correctly. @@ -867,6 +875,7 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, struct page *cp_page_1 = NULL, *cp_page_2 = NULL; struct f2fs_checkpoint *cp_block = NULL; unsigned long long cur_version = 0, pre_version = 0; + unsigned int cp_blocks; int err; err = get_checkpoint_version(sbi, cp_addr, &cp_block, @@ -874,15 +883,16 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, if (err) return NULL; - if (le32_to_cpu(cp_block->cp_pack_total_block_count) > - sbi->blocks_per_seg) { + cp_blocks = le32_to_cpu(cp_block->cp_pack_total_block_count); + + if (cp_blocks > sbi->blocks_per_seg || cp_blocks <= F2FS_CP_PACKS) { f2fs_warn(sbi, "invalid cp_pack_total_block_count:%u", le32_to_cpu(cp_block->cp_pack_total_block_count)); goto invalid_cp; } pre_version = *version; - cp_addr += le32_to_cpu(cp_block->cp_pack_total_block_count) - 1; + cp_addr += cp_blocks - 1; err = get_checkpoint_version(sbi, cp_addr, &cp_block, &cp_page_2, version); if (err) @@ -1162,7 +1172,8 @@ static bool __need_flush_quota(struct f2fs_sb_info *sbi) if (!is_journalled_quota(sbi)) return false; - down_write(&sbi->quota_sem); + if (!f2fs_down_write_trylock(&sbi->quota_sem)) + return true; if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH)) { ret = false; } else if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_REPAIR)) { @@ -1173,7 +1184,7 @@ static bool __need_flush_quota(struct f2fs_sb_info *sbi) } else if (get_pages(sbi, F2FS_DIRTY_QDATA)) { ret = true; } - up_write(&sbi->quota_sem); + f2fs_up_write(&sbi->quota_sem); return ret; } @@ -1230,10 +1241,10 @@ retry_flush_dents: * POR: we should ensure that there are no dirty node pages * until finishing nat/sit flush. inode->i_blocks can be updated. */ - down_write(&sbi->node_change); + f2fs_down_write(&sbi->node_change); if (get_pages(sbi, F2FS_DIRTY_IMETA)) { - up_write(&sbi->node_change); + f2fs_up_write(&sbi->node_change); f2fs_unlock_all(sbi); err = f2fs_sync_inode_meta(sbi); if (err) @@ -1243,15 +1254,15 @@ retry_flush_dents: } retry_flush_nodes: - down_write(&sbi->node_write); + f2fs_down_write(&sbi->node_write); if (get_pages(sbi, F2FS_DIRTY_NODES)) { - up_write(&sbi->node_write); + f2fs_up_write(&sbi->node_write); atomic_inc(&sbi->wb_sync_req[NODE]); err = f2fs_sync_node_pages(sbi, &wbc, false, FS_CP_NODE_IO); atomic_dec(&sbi->wb_sync_req[NODE]); if (err) { - up_write(&sbi->node_change); + f2fs_up_write(&sbi->node_change); f2fs_unlock_all(sbi); return err; } @@ -1264,13 +1275,13 @@ retry_flush_nodes: * dirty node blocks and some checkpoint values by block allocation. */ __prepare_cp_block(sbi); - up_write(&sbi->node_change); + f2fs_up_write(&sbi->node_change); return err; } static void unblock_operations(struct f2fs_sb_info *sbi) { - up_write(&sbi->node_write); + f2fs_up_write(&sbi->node_write); f2fs_unlock_all(sbi); } @@ -1304,8 +1315,8 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc) unsigned long flags; if (cpc->reason & CP_UMOUNT) { - if (le32_to_cpu(ckpt->cp_pack_total_block_count) > - sbi->blocks_per_seg - NM_I(sbi)->nat_bits_blocks) { + if (le32_to_cpu(ckpt->cp_pack_total_block_count) + + NM_I(sbi)->nat_bits_blocks > sbi->blocks_per_seg) { clear_ckpt_flags(sbi, CP_NAT_BITS_FLAG); f2fs_notice(sbi, "Disable nat_bits due to no space"); } else if (!is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG) && @@ -1545,6 +1556,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) /* update user_block_counts */ sbi->last_valid_block_count = sbi->total_valid_block_count; percpu_counter_set(&sbi->alloc_valid_block_count, 0); + percpu_counter_set(&sbi->rf_node_block_count, 0); /* Here, we have one bio having CP pack except cp pack 2 page */ f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO); @@ -1614,7 +1626,7 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) f2fs_warn(sbi, "Start checkpoint disabled!"); } if (cpc->reason != CP_RESIZE) - down_write(&sbi->cp_global_sem); + f2fs_down_write(&sbi->cp_global_sem); if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) && ((cpc->reason & CP_FASTBOOT) || (cpc->reason & CP_SYNC) || @@ -1695,7 +1707,7 @@ stop: trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint"); out: if (cpc->reason != CP_RESIZE) - up_write(&sbi->cp_global_sem); + f2fs_up_write(&sbi->cp_global_sem); return err; } @@ -1743,9 +1755,9 @@ static int __write_checkpoint_sync(struct f2fs_sb_info *sbi) struct cp_control cpc = { .reason = CP_SYNC, }; int err; - down_write(&sbi->gc_lock); + f2fs_down_write(&sbi->gc_lock); err = f2fs_write_checkpoint(sbi, &cpc); - up_write(&sbi->gc_lock); + f2fs_up_write(&sbi->gc_lock); return err; } @@ -1833,9 +1845,9 @@ int f2fs_issue_checkpoint(struct f2fs_sb_info *sbi) if (!test_opt(sbi, MERGE_CHECKPOINT) || cpc.reason != CP_SYNC) { int ret; - down_write(&sbi->gc_lock); + f2fs_down_write(&sbi->gc_lock); ret = f2fs_write_checkpoint(sbi, &cpc); - up_write(&sbi->gc_lock); + f2fs_up_write(&sbi->gc_lock); return ret; } diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index c1bf9ad4c2207fb9ff1d8aab94046ddbb280d389..750573d1feaaa5168d5bb3c65163c6c14e3c18ec 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -153,6 +153,7 @@ void f2fs_destroy_compress_ctx(struct compress_ctx *cc, bool reuse) cc->rpages = NULL; cc->nr_rpages = 0; cc->nr_cpages = 0; + cc->valid_nr_cpages = 0; if (!reuse) cc->cluster_idx = NULL_CLUSTER; } @@ -312,10 +313,9 @@ static int lz4_decompress_pages(struct decompress_io_ctx *dic) } if (ret != PAGE_SIZE << dic->log_cluster_size) { - printk_ratelimited("%sF2FS-fs (%s): lz4 invalid rlen:%zu, " + printk_ratelimited("%sF2FS-fs (%s): lz4 invalid ret:%d, " "expected:%lu\n", KERN_ERR, - F2FS_I_SB(dic->inode)->sb->s_id, - dic->rlen, + F2FS_I_SB(dic->inode)->sb->s_id, ret, PAGE_SIZE << dic->log_cluster_size); return -EIO; } @@ -619,7 +619,6 @@ static int f2fs_compress_pages(struct compress_ctx *cc) const struct f2fs_compress_ops *cops = f2fs_cops[fi->i_compress_algorithm]; unsigned int max_len, new_nr_cpages; - struct page **new_cpages; u32 chksum = 0; int i, ret; @@ -634,6 +633,7 @@ static int f2fs_compress_pages(struct compress_ctx *cc) max_len = COMPRESS_HEADER_SIZE + cc->clen; cc->nr_cpages = DIV_ROUND_UP(max_len, PAGE_SIZE); + cc->valid_nr_cpages = cc->nr_cpages; cc->cpages = page_array_alloc(cc->inode, cc->nr_cpages); if (!cc->cpages) { @@ -684,13 +684,6 @@ static int f2fs_compress_pages(struct compress_ctx *cc) new_nr_cpages = DIV_ROUND_UP(cc->clen + COMPRESS_HEADER_SIZE, PAGE_SIZE); - /* Now we're going to cut unnecessary tail pages */ - new_cpages = page_array_alloc(cc->inode, new_nr_cpages); - if (!new_cpages) { - ret = -ENOMEM; - goto out_vunmap_cbuf; - } - /* zero out any unused part of the last page */ memset(&cc->cbuf->cdata[cc->clen], 0, (new_nr_cpages * PAGE_SIZE) - @@ -700,10 +693,8 @@ static int f2fs_compress_pages(struct compress_ctx *cc) vm_unmap_ram(cc->rbuf, cc->cluster_size); for (i = 0; i < cc->nr_cpages; i++) { - if (i < new_nr_cpages) { - new_cpages[i] = cc->cpages[i]; + if (i < new_nr_cpages) continue; - } f2fs_compress_free_page(cc->cpages[i]); cc->cpages[i] = NULL; } @@ -711,9 +702,7 @@ static int f2fs_compress_pages(struct compress_ctx *cc) if (cops->destroy_compress_ctx) cops->destroy_compress_ctx(cc); - page_array_free(cc->inode, cc->cpages, cc->nr_cpages); - cc->cpages = new_cpages; - cc->nr_cpages = new_nr_cpages; + cc->valid_nr_cpages = new_nr_cpages; trace_f2fs_compress_pages_end(cc->inode, cc->cluster_idx, cc->clen, ret); @@ -881,6 +870,25 @@ bool f2fs_cluster_can_merge_page(struct compress_ctx *cc, pgoff_t index) return is_page_in_cluster(cc, index); } +bool f2fs_all_cluster_page_loaded(struct compress_ctx *cc, struct pagevec *pvec, + int index, int nr_pages) +{ + unsigned long pgidx; + int i; + + if (nr_pages - index < cc->cluster_size) + return false; + + pgidx = pvec->pages[index]->index; + + for (i = 1; i < cc->cluster_size; i++) { + if (pvec->pages[index + i]->index != pgidx + i) + return false; + } + + return true; +} + static bool cluster_has_invalid_data(struct compress_ctx *cc) { loff_t i_size = i_size_read(cc->inode); @@ -1257,7 +1265,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, * checkpoint. This can only happen to quota writes which can cause * the below discard race condition. */ - down_read(&sbi->node_write); + f2fs_down_read(&sbi->node_write); } else if (!f2fs_trylock_op(sbi)) { goto out_free; } @@ -1276,7 +1284,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, psize = (loff_t)(cc->rpages[last_index]->index + 1) << PAGE_SHIFT; - err = f2fs_get_node_info(fio.sbi, dn.nid, &ni); + err = f2fs_get_node_info(fio.sbi, dn.nid, &ni, false); if (err) goto out_put_dnode; @@ -1288,14 +1296,14 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, cic->magic = F2FS_COMPRESSED_PAGE_MAGIC; cic->inode = inode; - atomic_set(&cic->pending_pages, cc->nr_cpages); + atomic_set(&cic->pending_pages, cc->valid_nr_cpages); cic->rpages = page_array_alloc(cc->inode, cc->cluster_size); if (!cic->rpages) goto out_put_cic; cic->nr_rpages = cc->cluster_size; - for (i = 0; i < cc->nr_cpages; i++) { + for (i = 0; i < cc->valid_nr_cpages; i++) { f2fs_set_compressed_page(cc->cpages[i], inode, cc->rpages[i + 1]->index, cic); fio.compressed_page = cc->cpages[i]; @@ -1340,7 +1348,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc, if (fio.compr_blocks && __is_valid_data_blkaddr(blkaddr)) fio.compr_blocks++; - if (i > cc->nr_cpages) { + if (i > cc->valid_nr_cpages) { if (__is_valid_data_blkaddr(blkaddr)) { f2fs_invalidate_blocks(sbi, blkaddr); f2fs_update_data_blkaddr(&dn, NEW_ADDR); @@ -1365,8 +1373,8 @@ unlock_continue: if (fio.compr_blocks) f2fs_i_compr_blocks_update(inode, fio.compr_blocks - 1, false); - f2fs_i_compr_blocks_update(inode, cc->nr_cpages, true); - add_compr_block_stat(inode, cc->nr_cpages); + f2fs_i_compr_blocks_update(inode, cc->valid_nr_cpages, true); + add_compr_block_stat(inode, cc->valid_nr_cpages); set_inode_flag(cc->inode, FI_APPEND_WRITE); if (cc->cluster_idx == 0) @@ -1374,7 +1382,7 @@ unlock_continue: f2fs_put_dnode(&dn); if (IS_NOQUOTA(inode)) - up_read(&sbi->node_write); + f2fs_up_read(&sbi->node_write); else f2fs_unlock_op(sbi); @@ -1400,13 +1408,11 @@ out_put_dnode: f2fs_put_dnode(&dn); out_unlock_op: if (IS_NOQUOTA(inode)) - up_read(&sbi->node_write); + f2fs_up_read(&sbi->node_write); else f2fs_unlock_op(sbi); out_free: - for (i = 0; i < cc->nr_cpages; i++) { - if (!cc->cpages[i]) - continue; + for (i = 0; i < cc->valid_nr_cpages; i++) { f2fs_compress_free_page(cc->cpages[i]); cc->cpages[i] = NULL; } @@ -1448,25 +1454,38 @@ static int f2fs_write_raw_pages(struct compress_ctx *cc, enum iostat_type io_type) { struct address_space *mapping = cc->inode->i_mapping; - int _submitted, compr_blocks, ret; - int i = -1, err = 0; + int _submitted, compr_blocks, ret, i; compr_blocks = f2fs_compressed_blocks(cc); - if (compr_blocks < 0) { - err = compr_blocks; - goto out_err; + + for (i = 0; i < cc->cluster_size; i++) { + if (!cc->rpages[i]) + continue; + + redirty_page_for_writepage(wbc, cc->rpages[i]); + unlock_page(cc->rpages[i]); } + if (compr_blocks < 0) + return compr_blocks; + for (i = 0; i < cc->cluster_size; i++) { if (!cc->rpages[i]) continue; retry_write: + lock_page(cc->rpages[i]); + if (cc->rpages[i]->mapping != mapping) { +continue_unlock: unlock_page(cc->rpages[i]); continue; } - BUG_ON(!PageLocked(cc->rpages[i])); + if (!PageDirty(cc->rpages[i])) + goto continue_unlock; + + if (!clear_page_dirty_for_io(cc->rpages[i])) + goto continue_unlock; ret = f2fs_write_single_data_page(cc->rpages[i], &_submitted, NULL, NULL, wbc, io_type, @@ -1481,26 +1500,15 @@ retry_write: * avoid deadlock caused by cluster update race * from foreground operation. */ - if (IS_NOQUOTA(cc->inode)) { - err = 0; - goto out_err; - } + if (IS_NOQUOTA(cc->inode)) + return 0; ret = 0; cond_resched(); congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT); - lock_page(cc->rpages[i]); - - if (!PageDirty(cc->rpages[i])) { - unlock_page(cc->rpages[i]); - continue; - } - - clear_page_dirty_for_io(cc->rpages[i]); goto retry_write; } - err = ret; - goto out_err; + return ret; } *submitted += _submitted; @@ -1509,14 +1517,6 @@ retry_write: f2fs_balance_fs(F2FS_M_SB(mapping), true); return 0; -out_err: - for (++i; i < cc->cluster_size; i++) { - if (!cc->rpages[i]) - continue; - redirty_page_for_writepage(wbc, cc->rpages[i]); - unlock_page(cc->rpages[i]); - } - return err; } int f2fs_write_multi_pages(struct compress_ctx *cc, @@ -1530,6 +1530,7 @@ int f2fs_write_multi_pages(struct compress_ctx *cc, if (cluster_may_compress(cc)) { err = f2fs_compress_pages(cc); if (err == -EAGAIN) { + add_compr_block_stat(cc->inode, cc->cluster_size); goto write; } else if (err) { f2fs_put_rpages_wbc(cc, wbc, true, 1); diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 4e67e335464236a961cf460afc0376acc2aa1344..5c49817dbcc97899999c3568ad96422960761a82 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -21,6 +21,7 @@ #include #include #include +#include #include "f2fs.h" #include "node.h" @@ -605,7 +606,7 @@ static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type btype = PAGE_TYPE_OF_BIO(type); struct f2fs_bio_info *io = sbi->write_io[btype] + temp; - down_write(&io->io_rwsem); + f2fs_down_write(&io->io_rwsem); /* change META to META_FLUSH in the checkpoint procedure */ if (type >= META_FLUSH) { @@ -616,7 +617,7 @@ static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi, io->fio.op_flags |= REQ_PREFLUSH | REQ_FUA; } __submit_merged_bio(io); - up_write(&io->io_rwsem); + f2fs_up_write(&io->io_rwsem); } static void __submit_merged_write_cond(struct f2fs_sb_info *sbi, @@ -631,9 +632,9 @@ static void __submit_merged_write_cond(struct f2fs_sb_info *sbi, enum page_type btype = PAGE_TYPE_OF_BIO(type); struct f2fs_bio_info *io = sbi->write_io[btype] + temp; - down_read(&io->io_rwsem); + f2fs_down_read(&io->io_rwsem); ret = __has_merged_page(io->bio, inode, page, ino); - up_read(&io->io_rwsem); + f2fs_up_read(&io->io_rwsem); } if (ret) __f2fs_submit_merged_write(sbi, type, temp); @@ -757,9 +758,9 @@ static void add_bio_entry(struct f2fs_sb_info *sbi, struct bio *bio, if (bio_add_page(bio, page, PAGE_SIZE, 0) != PAGE_SIZE) f2fs_bug_on(sbi, 1); - down_write(&io->bio_list_lock); + f2fs_down_write(&io->bio_list_lock); list_add_tail(&be->list, &io->bio_list); - up_write(&io->bio_list_lock); + f2fs_up_write(&io->bio_list_lock); } static void del_bio_entry(struct bio_entry *be) @@ -781,7 +782,7 @@ static int add_ipu_page(struct f2fs_io_info *fio, struct bio **bio, struct list_head *head = &io->bio_list; struct bio_entry *be; - down_write(&io->bio_list_lock); + f2fs_down_write(&io->bio_list_lock); list_for_each_entry(be, head, list) { if (be->bio != *bio) continue; @@ -805,7 +806,7 @@ static int add_ipu_page(struct f2fs_io_info *fio, struct bio **bio, __submit_bio(sbi, *bio, DATA); break; } - up_write(&io->bio_list_lock); + f2fs_up_write(&io->bio_list_lock); } if (ret) { @@ -831,7 +832,7 @@ void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi, if (list_empty(head)) continue; - down_read(&io->bio_list_lock); + f2fs_down_read(&io->bio_list_lock); list_for_each_entry(be, head, list) { if (target) found = (target == be->bio); @@ -841,14 +842,14 @@ void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi, if (found) break; } - up_read(&io->bio_list_lock); + f2fs_up_read(&io->bio_list_lock); if (!found) continue; found = false; - down_write(&io->bio_list_lock); + f2fs_down_write(&io->bio_list_lock); list_for_each_entry(be, head, list) { if (target) found = (target == be->bio); @@ -861,7 +862,7 @@ void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi, break; } } - up_write(&io->bio_list_lock); + f2fs_up_write(&io->bio_list_lock); } if (found) @@ -921,7 +922,7 @@ void f2fs_submit_page_write(struct f2fs_io_info *fio) f2fs_bug_on(sbi, is_read_io(fio->op)); - down_write(&io->io_rwsem); + f2fs_down_write(&io->io_rwsem); next: if (fio->in_list) { spin_lock(&io->io_lock); @@ -988,7 +989,7 @@ out: if (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) || !f2fs_is_checkpoint_ready(sbi)) __submit_merged_bio(io); - up_write(&io->io_rwsem); + f2fs_up_write(&io->io_rwsem); } static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, @@ -1001,8 +1002,7 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, unsigned int post_read_steps = 0; bio = bio_alloc_bioset(for_write ? GFP_NOIO : GFP_KERNEL, - min_t(int, nr_pages, BIO_MAX_PAGES), - &f2fs_bioset); + bio_max_segs(nr_pages), &f2fs_bioset); if (!bio) return ERR_PTR(-ENOMEM); @@ -1370,7 +1370,7 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type) if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC))) return -EPERM; - err = f2fs_get_node_info(sbi, dn->nid, &ni); + err = f2fs_get_node_info(sbi, dn->nid, &ni, false); if (err) return err; @@ -1392,68 +1392,16 @@ alloc: f2fs_invalidate_compress_page(sbi, old_blkaddr); } f2fs_update_data_blkaddr(dn, dn->data_blkaddr); - - /* - * i_size will be updated by direct_IO. Otherwise, we'll get stale - * data from unwritten block via dio_read. - */ return 0; } -int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from) -{ - struct inode *inode = file_inode(iocb->ki_filp); - struct f2fs_map_blocks map; - int flag; - int err = 0; - bool direct_io = iocb->ki_flags & IOCB_DIRECT; - - map.m_lblk = F2FS_BLK_ALIGN(iocb->ki_pos); - map.m_len = F2FS_BYTES_TO_BLK(iocb->ki_pos + iov_iter_count(from)); - if (map.m_len > map.m_lblk) - map.m_len -= map.m_lblk; - else - map.m_len = 0; - - map.m_next_pgofs = NULL; - map.m_next_extent = NULL; - map.m_seg_type = NO_CHECK_TYPE; - map.m_may_create = true; - - if (direct_io) { - map.m_seg_type = f2fs_rw_hint_to_seg_type(iocb->ki_hint); - flag = f2fs_force_buffered_io(inode, iocb, from) ? - F2FS_GET_BLOCK_PRE_AIO : - F2FS_GET_BLOCK_PRE_DIO; - goto map_blocks; - } - if (iocb->ki_pos + iov_iter_count(from) > MAX_INLINE_DATA(inode)) { - err = f2fs_convert_inline_inode(inode); - if (err) - return err; - } - if (f2fs_has_inline_data(inode)) - return err; - - flag = F2FS_GET_BLOCK_PRE_AIO; - -map_blocks: - err = f2fs_map_blocks(inode, &map, 1, flag); - if (map.m_len > 0 && err == -ENOSPC) { - if (!direct_io) - set_inode_flag(inode, FI_NO_PREALLOC); - err = 0; - } - return err; -} - void f2fs_do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock) { if (flag == F2FS_GET_BLOCK_PRE_AIO) { if (lock) - down_read(&sbi->node_change); + f2fs_down_read(&sbi->node_change); else - up_read(&sbi->node_change); + f2fs_up_read(&sbi->node_change); } else { if (lock) f2fs_lock_op(sbi); @@ -1481,10 +1429,15 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, struct extent_info ei = {0, }; block_t blkaddr; unsigned int start_pgofs; + int bidx = 0; if (!maxblocks) return 0; + map->m_bdev = inode->i_sb->s_bdev; + map->m_multidev_dio = + f2fs_allow_multi_device_dio(F2FS_I_SB(inode), flag); + map->m_len = 0; map->m_flags = 0; @@ -1507,6 +1460,21 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, if (flag == F2FS_GET_BLOCK_DIO) f2fs_wait_on_block_writeback_range(inode, map->m_pblk, map->m_len); + + if (map->m_multidev_dio) { + block_t blk_addr = map->m_pblk; + + bidx = f2fs_target_device_index(sbi, map->m_pblk); + + map->m_bdev = FDEV(bidx).bdev; + map->m_pblk -= FDEV(bidx).start_blk; + map->m_len = min(map->m_len, + FDEV(bidx).end_blk + 1 - map->m_pblk); + + if (map->m_may_create) + f2fs_update_device_state(sbi, inode->i_ino, + blk_addr, map->m_len); + } goto out; } @@ -1586,8 +1554,11 @@ next_block: flag != F2FS_GET_BLOCK_DIO); err = __allocate_data_block(&dn, map->m_seg_type); - if (!err) + if (!err) { + if (flag == F2FS_GET_BLOCK_PRE_DIO) + file_need_truncate(inode); set_inode_flag(inode, FI_APPEND_WRITE); + } } if (err) goto sync_out; @@ -1625,6 +1596,9 @@ next_block: if (flag == F2FS_GET_BLOCK_PRE_AIO) goto skip; + if (map->m_multidev_dio) + bidx = f2fs_target_device_index(sbi, blkaddr); + if (map->m_len == 0) { /* preallocated unwritten block should be mapped for fiemap. */ if (blkaddr == NEW_ADDR) @@ -1633,10 +1607,15 @@ next_block: map->m_pblk = blkaddr; map->m_len = 1; + + if (map->m_multidev_dio) + map->m_bdev = FDEV(bidx).bdev; } else if ((map->m_pblk != NEW_ADDR && blkaddr == (map->m_pblk + ofs)) || (map->m_pblk == NEW_ADDR && blkaddr == NEW_ADDR) || flag == F2FS_GET_BLOCK_PRE_DIO) { + if (map->m_multidev_dio && map->m_bdev != FDEV(bidx).bdev) + goto sync_out; ofs++; map->m_len++; } else { @@ -1689,10 +1668,32 @@ skip: sync_out: - /* for hardware encryption, but to avoid potential issue in future */ - if (flag == F2FS_GET_BLOCK_DIO && map->m_flags & F2FS_MAP_MAPPED) + if (flag == F2FS_GET_BLOCK_DIO && map->m_flags & F2FS_MAP_MAPPED) { + /* + * for hardware encryption, but to avoid potential issue + * in future + */ f2fs_wait_on_block_writeback_range(inode, map->m_pblk, map->m_len); + invalidate_mapping_pages(META_MAPPING(sbi), + map->m_pblk, map->m_pblk); + + if (map->m_multidev_dio) { + block_t blk_addr = map->m_pblk; + + bidx = f2fs_target_device_index(sbi, map->m_pblk); + + map->m_bdev = FDEV(bidx).bdev; + map->m_pblk -= FDEV(bidx).start_blk; + + if (map->m_may_create) + f2fs_update_device_state(sbi, inode->i_ino, + blk_addr, map->m_len); + + f2fs_bug_on(sbi, blk_addr + map->m_len > + FDEV(bidx).end_blk + 1); + } + } if (flag == F2FS_GET_BLOCK_PRECACHE) { if (map->m_flags & F2FS_MAP_MAPPED) { @@ -1712,7 +1713,7 @@ unlock_out: f2fs_balance_fs(sbi, dn.node_changed); } out: - trace_f2fs_map_blocks(inode, map, err); + trace_f2fs_map_blocks(inode, map, create, flag, err); return err; } @@ -1752,47 +1753,6 @@ static inline u64 blks_to_bytes(struct inode *inode, u64 blks) return (blks << inode->i_blkbits); } -static int __get_data_block(struct inode *inode, sector_t iblock, - struct buffer_head *bh, int create, int flag, - pgoff_t *next_pgofs, int seg_type, bool may_write) -{ - struct f2fs_map_blocks map; - int err; - - map.m_lblk = iblock; - map.m_len = bytes_to_blks(inode, bh->b_size); - map.m_next_pgofs = next_pgofs; - map.m_next_extent = NULL; - map.m_seg_type = seg_type; - map.m_may_create = may_write; - - err = f2fs_map_blocks(inode, &map, create, flag); - if (!err) { - map_bh(bh, inode->i_sb, map.m_pblk); - bh->b_state = (bh->b_state & ~F2FS_MAP_FLAGS) | map.m_flags; - bh->b_size = blks_to_bytes(inode, map.m_len); - } - return err; -} - -static int get_data_block_dio_write(struct inode *inode, sector_t iblock, - struct buffer_head *bh_result, int create) -{ - return __get_data_block(inode, iblock, bh_result, create, - F2FS_GET_BLOCK_DIO, NULL, - f2fs_rw_hint_to_seg_type(inode->i_write_hint), - true); -} - -static int get_data_block_dio(struct inode *inode, sector_t iblock, - struct buffer_head *bh_result, int create) -{ - return __get_data_block(inode, iblock, bh_result, create, - F2FS_GET_BLOCK_DIO, NULL, - f2fs_rw_hint_to_seg_type(inode->i_write_hint), - false); -} - static int f2fs_xattr_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo) { @@ -1812,7 +1772,7 @@ static int f2fs_xattr_fiemap(struct inode *inode, if (!page) return -ENOMEM; - err = f2fs_get_node_info(sbi, inode->i_ino, &ni); + err = f2fs_get_node_info(sbi, inode->i_ino, &ni, false); if (err) { f2fs_put_page(page, 1); return err; @@ -1844,7 +1804,7 @@ static int f2fs_xattr_fiemap(struct inode *inode, if (!page) return -ENOMEM; - err = f2fs_get_node_info(sbi, xnid, &ni); + err = f2fs_get_node_info(sbi, xnid, &ni, false); if (err) { f2fs_put_page(page, 1); return err; @@ -2528,6 +2488,9 @@ static inline bool check_inplace_update_policy(struct inode *inode, struct f2fs_sb_info *sbi = F2FS_I_SB(inode); unsigned int policy = SM_I(sbi)->ipu_policy; + if (policy & (0x1 << F2FS_IPU_HONOR_OPU_WRITE) && + is_inode_flag_set(inode, FI_OPU_WRITE)) + return false; if (policy & (0x1 << F2FS_IPU_FORCE)) return true; if (policy & (0x1 << F2FS_IPU_SSR) && f2fs_need_SSR(sbi)) @@ -2580,6 +2543,11 @@ bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + /* The below cases were checked when setting it. */ + if (f2fs_is_pinned_file(inode)) + return false; + if (fio && is_sbi_flag_set(sbi, SBI_NEED_FSCK)) + return true; if (f2fs_lfs_mode(sbi)) return true; if (S_ISDIR(inode->i_mode)) @@ -2588,13 +2556,14 @@ bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio) return true; if (f2fs_is_atomic_file(inode)) return true; - if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) - return true; /* swap file is migrating in aligned write mode */ if (is_inode_flag_set(inode, FI_ALIGNED_WRITE)) return true; + if (is_inode_flag_set(inode, FI_OPU_WRITE)) + return true; + if (fio) { if (page_private_gcing(fio->page)) return true; @@ -2701,7 +2670,7 @@ got_it: fio->need_lock = LOCK_REQ; } - err = f2fs_get_node_info(fio->sbi, dn.nid, &ni); + err = f2fs_get_node_info(fio->sbi, dn.nid, &ni, false); if (err) goto out_writepage; @@ -2814,13 +2783,13 @@ write: * the below discard race condition. */ if (IS_NOQUOTA(inode)) - down_read(&sbi->node_write); + f2fs_down_read(&sbi->node_write); fio.need_lock = LOCK_DONE; err = f2fs_do_write_data_page(&fio); if (IS_NOQUOTA(inode)) - up_read(&sbi->node_write); + f2fs_up_read(&sbi->node_write); goto done; } @@ -2950,6 +2919,7 @@ static int f2fs_write_cache_pages(struct address_space *mapping, .rpages = NULL, .nr_rpages = 0, .cpages = NULL, + .valid_nr_cpages = 0, .rbuf = NULL, .cbuf = NULL, .rlen = PAGE_SIZE * F2FS_I(inode)->i_cluster_size, @@ -3005,6 +2975,10 @@ readd: need_readd = false; #ifdef CONFIG_F2FS_FS_COMPRESSION if (f2fs_compressed_file(inode)) { + void *fsdata = NULL; + struct page *pagep; + int ret2; + ret = f2fs_init_compress_ctx(&cc); if (ret) { done = 1; @@ -3023,27 +2997,23 @@ readd: if (unlikely(f2fs_cp_error(sbi))) goto lock_page; - if (f2fs_cluster_is_empty(&cc)) { - void *fsdata = NULL; - struct page *pagep; - int ret2; + if (!f2fs_cluster_is_empty(&cc)) + goto lock_page; - ret2 = f2fs_prepare_compress_overwrite( + ret2 = f2fs_prepare_compress_overwrite( inode, &pagep, page->index, &fsdata); - if (ret2 < 0) { - ret = ret2; - done = 1; - break; - } else if (ret2 && - !f2fs_compress_write_end(inode, - fsdata, page->index, - 1)) { - retry = 1; - break; - } - } else { - goto lock_page; + if (ret2 < 0) { + ret = ret2; + done = 1; + break; + } else if (ret2 && + (!f2fs_compress_write_end(inode, + fsdata, page->index, 1) || + !f2fs_all_cluster_page_loaded(&cc, + &pvec, i, nr_pages))) { + retry = 1; + break; } } #endif @@ -3218,8 +3188,8 @@ static int __f2fs_write_data_pages(struct address_space *mapping, f2fs_available_free_memory(sbi, DIRTY_DENTS)) goto skip_write; - /* skip writing during file defragment */ - if (is_inode_flag_set(inode, FI_DO_DEFRAG)) + /* skip writing in file defragment preparing stage */ + if (is_inode_flag_set(inode, FI_SKIP_WRITES)) goto skip_write; trace_f2fs_writepages(mapping->host, wbc, DATA); @@ -3227,8 +3197,12 @@ static int __f2fs_write_data_pages(struct address_space *mapping, /* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */ if (wbc->sync_mode == WB_SYNC_ALL) atomic_inc(&sbi->wb_sync_req[DATA]); - else if (atomic_read(&sbi->wb_sync_req[DATA])) + else if (atomic_read(&sbi->wb_sync_req[DATA])) { + /* to avoid potential deadlock */ + if (current->plug) + blk_finish_plug(current->plug); goto skip_write; + } if (__should_serialize_io(inode, wbc)) { mutex_lock(&sbi->writepages); @@ -3268,7 +3242,7 @@ static int f2fs_write_data_pages(struct address_space *mapping, FS_CP_DATA_IO : FS_DATA_IO); } -static void f2fs_write_failed(struct inode *inode, loff_t to) +void f2fs_write_failed(struct inode *inode, loff_t to) { loff_t i_size = i_size_read(inode); @@ -3277,14 +3251,14 @@ static void f2fs_write_failed(struct inode *inode, loff_t to) /* In the fs-verity case, f2fs_end_enable_verity() does the truncate */ if (to > i_size && !f2fs_verity_in_progress(inode)) { - down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); - down_write(&F2FS_I(inode)->i_mmap_sem); + f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_down_write(&F2FS_I(inode)->i_mmap_sem); truncate_pagecache(inode, i_size); f2fs_truncate_blocks(inode, i_size, true); - up_write(&F2FS_I(inode)->i_mmap_sem); - up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_up_write(&F2FS_I(inode)->i_mmap_sem); + f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); } } @@ -3302,12 +3276,10 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi, int flag; /* - * we already allocated all the blocks, so we don't need to get - * the block addresses when there is no need to fill the page. + * If a whole page is being written and we already preallocated all the + * blocks, then there is no need to get a block address now. */ - if (!f2fs_has_inline_data(inode) && len == PAGE_SIZE && - !is_inode_flag_set(inode, FI_NO_PREALLOC) && - !f2fs_verity_in_progress(inode)) + if (len == PAGE_SIZE && is_inode_flag_set(inode, FI_PREALLOCATED_ALL)) return 0; /* f2fs_lock_op avoids race between write CP and convert_inline_page */ @@ -3387,7 +3359,13 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, block_t blkaddr = NULL_ADDR; int err = 0; - if (trace_android_fs_datawrite_start_enabled()) { + /* + * Should avoid quota operations which can make deadlock: + * kswapd -> f2fs_evict_inode -> dquot_drop -> + * f2fs_dquot_commit -> f2fs_write_begin -> + * d_obtain_alias -> __d_alloc -> kmem_cache_alloc(GFP_KERNEL) + */ + if (trace_android_fs_datawrite_start_enabled() && !IS_NOQUOTA(inode)) { char *path, pathbuf[MAX_TRACE_PATHBUF_LEN]; path = android_fstrace_get_pathname(pathbuf, @@ -3429,7 +3407,7 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, *fsdata = NULL; - if (len == PAGE_SIZE) + if (len == PAGE_SIZE && !(f2fs_is_atomic_file(inode))) goto repeat; ret = f2fs_prepare_compress_overwrite(inode, pagep, @@ -3569,188 +3547,6 @@ unlock_out: return copied; } -static int check_direct_IO(struct inode *inode, struct iov_iter *iter, - loff_t offset) -{ - unsigned i_blkbits = READ_ONCE(inode->i_blkbits); - unsigned blkbits = i_blkbits; - unsigned blocksize_mask = (1 << blkbits) - 1; - unsigned long align = offset | iov_iter_alignment(iter); - struct block_device *bdev = inode->i_sb->s_bdev; - - if (iov_iter_rw(iter) == READ && offset >= i_size_read(inode)) - return 1; - - if (align & blocksize_mask) { - if (bdev) - blkbits = blksize_bits(bdev_logical_block_size(bdev)); - blocksize_mask = (1 << blkbits) - 1; - if (align & blocksize_mask) - return -EINVAL; - return 1; - } - return 0; -} - -static void f2fs_dio_end_io(struct bio *bio) -{ - struct f2fs_private_dio *dio = bio->bi_private; - - dec_page_count(F2FS_I_SB(dio->inode), - dio->write ? F2FS_DIO_WRITE : F2FS_DIO_READ); - - bio->bi_private = dio->orig_private; - bio->bi_end_io = dio->orig_end_io; - - kfree(dio); - - bio_endio(bio); -} - -static void f2fs_dio_submit_bio(struct bio *bio, struct inode *inode, - loff_t file_offset) -{ - struct f2fs_private_dio *dio; - bool write = (bio_op(bio) == REQ_OP_WRITE); - - dio = f2fs_kzalloc(F2FS_I_SB(inode), - sizeof(struct f2fs_private_dio), GFP_NOFS); - if (!dio) - goto out; - - dio->inode = inode; - dio->orig_end_io = bio->bi_end_io; - dio->orig_private = bio->bi_private; - dio->write = write; - - bio->bi_end_io = f2fs_dio_end_io; - bio->bi_private = dio; - - inc_page_count(F2FS_I_SB(inode), - write ? F2FS_DIO_WRITE : F2FS_DIO_READ); - - submit_bio(bio); - return; -out: - bio->bi_status = BLK_STS_IOERR; - bio_endio(bio); -} - -static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) -{ - struct address_space *mapping = iocb->ki_filp->f_mapping; - struct inode *inode = mapping->host; - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct f2fs_inode_info *fi = F2FS_I(inode); - size_t count = iov_iter_count(iter); - loff_t offset = iocb->ki_pos; - int rw = iov_iter_rw(iter); - int err; - enum rw_hint hint = iocb->ki_hint; - int whint_mode = F2FS_OPTION(sbi).whint_mode; - bool do_opu; - - err = check_direct_IO(inode, iter, offset); - if (err) - return err < 0 ? err : 0; - - if (f2fs_force_buffered_io(inode, iocb, iter)) - return 0; - - do_opu = rw == WRITE && f2fs_lfs_mode(sbi); - - trace_f2fs_direct_IO_enter(inode, offset, count, rw); - - if (trace_android_fs_dataread_start_enabled() && - (rw == READ)) { - char *path, pathbuf[MAX_TRACE_PATHBUF_LEN]; - - path = android_fstrace_get_pathname(pathbuf, - MAX_TRACE_PATHBUF_LEN, - inode); - trace_android_fs_dataread_start(inode, offset, - count, current->pid, path, - current->comm); - } - if (trace_android_fs_datawrite_start_enabled() && - (rw == WRITE)) { - char *path, pathbuf[MAX_TRACE_PATHBUF_LEN]; - - path = android_fstrace_get_pathname(pathbuf, - MAX_TRACE_PATHBUF_LEN, - inode); - trace_android_fs_datawrite_start(inode, offset, count, - current->pid, path, - current->comm); - } - - if (rw == WRITE && whint_mode == WHINT_MODE_OFF) - iocb->ki_hint = WRITE_LIFE_NOT_SET; - - if (iocb->ki_flags & IOCB_NOWAIT) { - if (!down_read_trylock(&fi->i_gc_rwsem[rw])) { - iocb->ki_hint = hint; - err = -EAGAIN; - goto out; - } - if (do_opu && !down_read_trylock(&fi->i_gc_rwsem[READ])) { - up_read(&fi->i_gc_rwsem[rw]); - iocb->ki_hint = hint; - err = -EAGAIN; - goto out; - } - } else { - down_read(&fi->i_gc_rwsem[rw]); - if (do_opu) - down_read(&fi->i_gc_rwsem[READ]); - } - - err = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, - iter, rw == WRITE ? get_data_block_dio_write : - get_data_block_dio, NULL, f2fs_dio_submit_bio, - rw == WRITE ? DIO_LOCKING | DIO_SKIP_HOLES : - DIO_SKIP_HOLES); - - if (do_opu) - up_read(&fi->i_gc_rwsem[READ]); - - up_read(&fi->i_gc_rwsem[rw]); - - if (rw == WRITE) { - if (whint_mode == WHINT_MODE_OFF) - iocb->ki_hint = hint; - if (err > 0) { - f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_IO, - err); - if (!do_opu) - set_inode_flag(inode, FI_UPDATE_WRITE); - } else if (err == -EIOCBQUEUED) { - f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_IO, - count - iov_iter_count(iter)); - } else if (err < 0) { - f2fs_write_failed(inode, offset + count); - } - } else { - if (err > 0) - f2fs_update_iostat(sbi, APP_DIRECT_READ_IO, err); - else if (err == -EIOCBQUEUED) - f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_READ_IO, - count - iov_iter_count(iter)); - } - -out: - if (trace_android_fs_dataread_start_enabled() && - (rw == READ)) - trace_android_fs_dataread_end(inode, offset, count); - if (trace_android_fs_datawrite_start_enabled() && - (rw == WRITE)) - trace_android_fs_datawrite_end(inode, offset, count); - - trace_f2fs_direct_IO_exit(inode, offset, count, rw, err); - - return err; -} - void f2fs_invalidate_page(struct page *page, unsigned int offset, unsigned int length) { @@ -3774,12 +3570,9 @@ void f2fs_invalidate_page(struct page *page, unsigned int offset, clear_page_private_gcing(page); - if (test_opt(sbi, COMPRESS_CACHE)) { - if (f2fs_compressed_file(inode)) - f2fs_invalidate_compress_pages(sbi, inode->i_ino); - if (inode->i_ino == F2FS_COMPRESS_INO(sbi)) - clear_page_private_data(page); - } + if (test_opt(sbi, COMPRESS_CACHE) && + inode->i_ino == F2FS_COMPRESS_INO(sbi)) + clear_page_private_data(page); if (page_private_atomic(page)) return f2fs_drop_inmem_page(inode, page); @@ -3799,12 +3592,9 @@ int f2fs_release_page(struct page *page, gfp_t wait) return 0; if (test_opt(F2FS_P_SB(page), COMPRESS_CACHE)) { - struct f2fs_sb_info *sbi = F2FS_P_SB(page); struct inode *inode = page->mapping->host; - if (f2fs_compressed_file(inode)) - f2fs_invalidate_compress_pages(sbi, inode->i_ino); - if (inode->i_ino == F2FS_COMPRESS_INO(sbi)) + if (inode->i_ino == F2FS_COMPRESS_INO(F2FS_I_SB(inode))) clear_page_private_data(page); } @@ -3986,19 +3776,20 @@ static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk, unsigned int end_sec = secidx + blkcnt / blk_per_sec; int ret = 0; - down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); - down_write(&F2FS_I(inode)->i_mmap_sem); + f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_down_write(&F2FS_I(inode)->i_mmap_sem); set_inode_flag(inode, FI_ALIGNED_WRITE); + set_inode_flag(inode, FI_OPU_WRITE); for (; secidx < end_sec; secidx++) { - down_write(&sbi->pin_sem); + f2fs_down_write(&sbi->pin_sem); f2fs_lock_op(sbi); f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false); f2fs_unlock_op(sbi); - set_inode_flag(inode, FI_DO_DEFRAG); + set_inode_flag(inode, FI_SKIP_WRITES); for (blkofs = 0; blkofs < blk_per_sec; blkofs++) { struct page *page; @@ -4006,7 +3797,7 @@ static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk, page = f2fs_get_lock_data_page(inode, blkidx, true); if (IS_ERR(page)) { - up_write(&sbi->pin_sem); + f2fs_up_write(&sbi->pin_sem); ret = PTR_ERR(page); goto done; } @@ -4015,22 +3806,23 @@ static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk, f2fs_put_page(page, 1); } - clear_inode_flag(inode, FI_DO_DEFRAG); + clear_inode_flag(inode, FI_SKIP_WRITES); ret = filemap_fdatawrite(inode->i_mapping); - up_write(&sbi->pin_sem); + f2fs_up_write(&sbi->pin_sem); if (ret) break; } done: - clear_inode_flag(inode, FI_DO_DEFRAG); + clear_inode_flag(inode, FI_SKIP_WRITES); + clear_inode_flag(inode, FI_OPU_WRITE); clear_inode_flag(inode, FI_ALIGNED_WRITE); - up_write(&F2FS_I(inode)->i_mmap_sem); - up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_up_write(&F2FS_I(inode)->i_mmap_sem); + f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); return ret; } @@ -4206,7 +3998,7 @@ const struct address_space_operations f2fs_dblock_aops = { .set_page_dirty = f2fs_set_data_page_dirty, .invalidatepage = f2fs_invalidate_page, .releasepage = f2fs_release_page, - .direct_IO = f2fs_direct_IO, + .direct_IO = noop_direct_IO, .bmap = f2fs_bmap, .swap_activate = f2fs_swap_activate, .swap_deactivate = f2fs_swap_deactivate, @@ -4286,3 +4078,65 @@ void f2fs_destroy_bio_entry_cache(void) { kmem_cache_destroy(bio_entry_slab); } + +static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, + unsigned int flags, struct iomap *iomap, + struct iomap *srcmap) +{ + struct f2fs_map_blocks map = {}; + pgoff_t next_pgofs = 0; + int err; + + map.m_lblk = bytes_to_blks(inode, offset); + map.m_len = bytes_to_blks(inode, offset + length - 1) - map.m_lblk + 1; + map.m_next_pgofs = &next_pgofs; + map.m_seg_type = f2fs_rw_hint_to_seg_type(inode->i_write_hint); + if (flags & IOMAP_WRITE) + map.m_may_create = true; + + err = f2fs_map_blocks(inode, &map, flags & IOMAP_WRITE, + F2FS_GET_BLOCK_DIO); + if (err) + return err; + + iomap->offset = blks_to_bytes(inode, map.m_lblk); + + /* + * When inline encryption is enabled, sometimes I/O to an encrypted file + * has to be broken up to guarantee DUN contiguity. Handle this by + * limiting the length of the mapping returned. + */ + map.m_len = fscrypt_limit_io_blocks(inode, map.m_lblk, map.m_len); + + if (map.m_flags & (F2FS_MAP_MAPPED | F2FS_MAP_UNWRITTEN)) { + iomap->length = blks_to_bytes(inode, map.m_len); + if (map.m_flags & F2FS_MAP_MAPPED) { + iomap->type = IOMAP_MAPPED; + iomap->flags |= IOMAP_F_MERGED; + } else { + iomap->type = IOMAP_UNWRITTEN; + } + if (WARN_ON_ONCE(!__is_valid_data_blkaddr(map.m_pblk))) + return -EINVAL; + + iomap->bdev = map.m_bdev; + iomap->addr = blks_to_bytes(inode, map.m_pblk); + } else { + iomap->length = blks_to_bytes(inode, next_pgofs) - + iomap->offset; + iomap->type = IOMAP_HOLE; + iomap->addr = IOMAP_NULL_ADDR; + } + + if (map.m_flags & F2FS_MAP_NEW) + iomap->flags |= IOMAP_F_NEW; + if ((inode->i_state & I_DIRTY_DATASYNC) || + offset + length > i_size_read(inode)) + iomap->flags |= IOMAP_F_DIRTY; + + return 0; +} + +const struct iomap_ops f2fs_iomap_ops = { + .iomap_begin = f2fs_iomap_begin, +}; diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 8c50518475a99a32e7ac9c4f724a982c745ad46b..fcdf253cd211e1f6ff058418a4130366283d886b 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -21,7 +21,7 @@ #include "gc.h" static LIST_HEAD(f2fs_stat_list); -static DEFINE_MUTEX(f2fs_stat_mutex); +static DEFINE_RAW_SPINLOCK(f2fs_stat_lock); #ifdef CONFIG_DEBUG_FS static struct dentry *f2fs_debugfs_root; #endif @@ -338,14 +338,16 @@ static char *s_flag[] = { [SBI_QUOTA_SKIP_FLUSH] = " quota_skip_flush", [SBI_QUOTA_NEED_REPAIR] = " quota_need_repair", [SBI_IS_RESIZEFS] = " resizefs", + [SBI_IS_FREEZING] = " freezefs", }; static int stat_show(struct seq_file *s, void *v) { struct f2fs_stat_info *si; int i = 0, j = 0; + unsigned long flags; - mutex_lock(&f2fs_stat_mutex); + raw_spin_lock_irqsave(&f2fs_stat_lock, flags); list_for_each_entry(si, &f2fs_stat_list, stat_list) { update_general_status(si->sbi); @@ -474,12 +476,14 @@ static int stat_show(struct seq_file *s, void *v) si->node_segs, si->bg_node_segs); seq_printf(s, " - Reclaimed segs : Normal (%d), Idle CB (%d), " "Idle Greedy (%d), Idle AT (%d), " - "Urgent High (%d), Urgent Low (%d)\n", + "Urgent High (%d), Urgent Mid (%d), " + "Urgent Low (%d)\n", si->sbi->gc_reclaimed_segs[GC_NORMAL], si->sbi->gc_reclaimed_segs[GC_IDLE_CB], si->sbi->gc_reclaimed_segs[GC_IDLE_GREEDY], si->sbi->gc_reclaimed_segs[GC_IDLE_AT], si->sbi->gc_reclaimed_segs[GC_URGENT_HIGH], + si->sbi->gc_reclaimed_segs[GC_URGENT_MID], si->sbi->gc_reclaimed_segs[GC_URGENT_LOW]); seq_printf(s, "Try to move %d blocks (BG: %d)\n", si->tot_blks, si->bg_data_blks + si->bg_node_blks); @@ -532,6 +536,9 @@ static int stat_show(struct seq_file *s, void *v) si->ndirty_meta, si->meta_pages); seq_printf(s, " - imeta: %4d\n", si->ndirty_imeta); + seq_printf(s, " - fsync mark: %4lld\n", + percpu_counter_sum_positive( + &si->sbi->rf_node_block_count)); seq_printf(s, " - NATs: %9d/%9d\n - SITs: %9d/%9d\n", si->dirty_nats, si->nats, si->dirty_sits, si->sits); seq_printf(s, " - free_nids: %9d/%9d\n - alloc_nids: %9d\n", @@ -573,7 +580,7 @@ static int stat_show(struct seq_file *s, void *v) seq_printf(s, " - paged : %llu KB\n", si->page_mem >> 10); } - mutex_unlock(&f2fs_stat_mutex); + raw_spin_unlock_irqrestore(&f2fs_stat_lock, flags); return 0; } @@ -584,6 +591,7 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi) { struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); struct f2fs_stat_info *si; + unsigned long flags; int i; si = f2fs_kzalloc(sbi, sizeof(struct f2fs_stat_info), GFP_KERNEL); @@ -619,9 +627,9 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi) atomic_set(&sbi->max_aw_cnt, 0); atomic_set(&sbi->max_vw_cnt, 0); - mutex_lock(&f2fs_stat_mutex); + raw_spin_lock_irqsave(&f2fs_stat_lock, flags); list_add_tail(&si->stat_list, &f2fs_stat_list); - mutex_unlock(&f2fs_stat_mutex); + raw_spin_unlock_irqrestore(&f2fs_stat_lock, flags); return 0; } @@ -629,10 +637,11 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi) void f2fs_destroy_stats(struct f2fs_sb_info *sbi) { struct f2fs_stat_info *si = F2FS_STAT(sbi); + unsigned long flags; - mutex_lock(&f2fs_stat_mutex); + raw_spin_lock_irqsave(&f2fs_stat_lock, flags); list_del(&si->stat_list); - mutex_unlock(&f2fs_stat_mutex); + raw_spin_unlock_irqrestore(&f2fs_stat_lock, flags); kfree(si); } diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 11bef1b7f2510a483596cce276e00a2a213fc364..4316fbfc503495550c45177ae75cc70bc262e325 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -768,7 +768,7 @@ add_dentry: f2fs_wait_on_page_writeback(dentry_page, DATA, true, true); if (inode) { - down_write(&F2FS_I(inode)->i_sem); + f2fs_down_write(&F2FS_I(inode)->i_sem); page = f2fs_init_inode_metadata(inode, dir, fname, NULL); if (IS_ERR(page)) { err = PTR_ERR(page); @@ -795,7 +795,7 @@ add_dentry: f2fs_update_parent_metadata(dir, inode, current_depth); fail: if (inode) - up_write(&F2FS_I(inode)->i_sem); + f2fs_up_write(&F2FS_I(inode)->i_sem); f2fs_put_page(dentry_page, 1); @@ -860,7 +860,7 @@ int f2fs_do_tmpfile(struct inode *inode, struct inode *dir) struct page *page; int err = 0; - down_write(&F2FS_I(inode)->i_sem); + f2fs_down_write(&F2FS_I(inode)->i_sem); page = f2fs_init_inode_metadata(inode, dir, NULL, NULL); if (IS_ERR(page)) { err = PTR_ERR(page); @@ -871,7 +871,7 @@ int f2fs_do_tmpfile(struct inode *inode, struct inode *dir) clear_inode_flag(inode, FI_NEW_INODE); f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); fail: - up_write(&F2FS_I(inode)->i_sem); + f2fs_up_write(&F2FS_I(inode)->i_sem); return err; } @@ -879,7 +879,7 @@ void f2fs_drop_nlink(struct inode *dir, struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(dir); - down_write(&F2FS_I(inode)->i_sem); + f2fs_down_write(&F2FS_I(inode)->i_sem); if (S_ISDIR(inode->i_mode)) f2fs_i_links_write(dir, false); @@ -890,7 +890,7 @@ void f2fs_drop_nlink(struct inode *dir, struct inode *inode) f2fs_i_links_write(inode, false); f2fs_i_size_write(inode, 0); } - up_write(&F2FS_I(inode)->i_sem); + f2fs_up_write(&F2FS_I(inode)->i_sem); if (inode->i_nlink == 0) f2fs_add_orphan_inode(inode); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 47807da5fa6d2a52acfcbe560ed046ee5188d06b..6b5b3e293ae1f5dfd8b0159ba5f88a863846f50b 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -55,6 +55,8 @@ enum { FAULT_DISCARD, FAULT_WRITE_IO, FAULT_SLAB_ALLOC, + FAULT_DQUOT_INIT, + FAULT_LOCK_OP, FAULT_MAX, }; @@ -119,6 +121,20 @@ typedef u32 nid_t; #define COMPRESS_EXT_NUM 16 +/* + * An implementation of an rwsem that is explicitly unfair to readers. This + * prevents priority inversion when a low-priority reader acquires the read lock + * while sleeping on the write lock but the write lock is needed by + * higher-priority clients. + */ + +struct f2fs_rwsem { + struct rw_semaphore internal_rwsem; +#ifdef CONFIG_F2FS_UNFAIR_RWSEM + wait_queue_head_t read_waiters; +#endif +}; + struct f2fs_mount_info { unsigned int opt; int write_io_size_bits; /* Write IO size bits */ @@ -382,6 +398,10 @@ struct discard_cmd_control { struct mutex cmd_lock; unsigned int nr_discards; /* # of discards in the list */ unsigned int max_discards; /* max. discards to be issued */ + unsigned int max_discard_request; /* max. discard request per round */ + unsigned int min_discard_issue_time; /* min. interval between discard issue */ + unsigned int mid_discard_issue_time; /* mid. interval between discard issue */ + unsigned int max_discard_issue_time; /* max. interval between discard issue */ unsigned int discard_granularity; /* discard granularity */ unsigned int undiscard_blks; /* # of undiscard blocks */ unsigned int next_pos; /* next discard position */ @@ -557,16 +577,25 @@ enum { /* maximum retry quota flush count */ #define DEFAULT_RETRY_QUOTA_FLUSH_COUNT 8 +/* maximum retry of EIO'ed meta page */ +#define MAX_RETRY_META_PAGE_EIO 100 + #define F2FS_LINK_MAX 0xffffffff /* maximum link count per file */ #define MAX_DIR_RA_PAGES 4 /* maximum ra pages of dir */ +/* dirty segments threshold for triggering CP */ +#define DEFAULT_DIRTY_THRESHOLD 4 + /* for in-memory extent cache entry */ #define F2FS_MIN_EXTENT_LEN 64 /* minimum extent length */ /* number of extent info in extent cache we try to shrink */ #define EXTENT_CACHE_SHRINK_NUMBER 128 +#define RECOVERY_MAX_RA_BLOCKS BIO_MAX_PAGES +#define RECOVERY_MIN_RA_BLOCKS 1 + struct rb_entry { struct rb_node rb_node; /* rb node located in rb-tree */ union { @@ -617,6 +646,7 @@ struct extent_tree { F2FS_MAP_UNWRITTEN) struct f2fs_map_blocks { + struct block_device *m_bdev; /* for multi-device dio */ block_t m_pblk; block_t m_lblk; unsigned int m_len; @@ -625,6 +655,7 @@ struct f2fs_map_blocks { pgoff_t *m_next_extent; /* point to next possible extent */ int m_seg_type; bool m_may_create; /* indicate it is from write path */ + bool m_multidev_dio; /* indicate it allows multi-device dio */ }; /* for flag in get_data_block */ @@ -648,6 +679,7 @@ enum { #define FADVISE_KEEP_SIZE_BIT 0x10 #define FADVISE_HOT_BIT 0x20 #define FADVISE_VERITY_BIT 0x40 +#define FADVISE_TRUNC_BIT 0x80 #define FADVISE_MODIFIABLE_BITS (FADVISE_COLD_BIT | FADVISE_HOT_BIT) @@ -675,6 +707,10 @@ enum { #define file_is_verity(inode) is_file(inode, FADVISE_VERITY_BIT) #define file_set_verity(inode) set_file(inode, FADVISE_VERITY_BIT) +#define file_should_truncate(inode) is_file(inode, FADVISE_TRUNC_BIT) +#define file_need_truncate(inode) set_file(inode, FADVISE_TRUNC_BIT) +#define file_dont_truncate(inode) clear_file(inode, FADVISE_TRUNC_BIT) + #define DEF_DIR_LEVEL 0 enum { @@ -707,9 +743,10 @@ enum { FI_DROP_CACHE, /* drop dirty page cache */ FI_DATA_EXIST, /* indicate data exists */ FI_INLINE_DOTS, /* indicate inline dot dentries */ - FI_DO_DEFRAG, /* indicate defragment is running */ + FI_SKIP_WRITES, /* should skip data page writeback */ + FI_OPU_WRITE, /* used for opu per file */ FI_DIRTY_FILE, /* indicate regular/symlink has dirty pages */ - FI_NO_PREALLOC, /* indicate skipped preallocated blocks */ + FI_PREALLOCATED_ALL, /* all blocks for write were preallocated */ FI_HOT_DATA, /* indicate file is hot */ FI_EXTRA_ATTR, /* indicate file has extra attribute */ FI_PROJ_INHERIT, /* indicate file inherits projectid */ @@ -738,7 +775,7 @@ struct f2fs_inode_info { /* Use below internally in f2fs*/ unsigned long flags[BITS_TO_LONGS(FI_MAX)]; /* use to pass per-file flags */ - struct rw_semaphore i_sem; /* protect fi info */ + struct f2fs_rwsem i_sem; /* protect fi info */ atomic_t dirty_pages; /* # of dirty pages */ f2fs_hash_t chash; /* hash value of given file name */ unsigned int clevel; /* maximum level of given file name */ @@ -763,9 +800,9 @@ struct f2fs_inode_info { struct extent_tree *extent_tree; /* cached extent_tree entry */ /* avoid racing between foreground op and gc */ - struct rw_semaphore i_gc_rwsem[2]; - struct rw_semaphore i_mmap_sem; - struct rw_semaphore i_xattr_sem; /* avoid racing between reading and changing EAs */ + struct f2fs_rwsem i_gc_rwsem[2]; + struct f2fs_rwsem i_mmap_sem; + struct f2fs_rwsem i_xattr_sem; /* avoid racing between reading and changing EAs */ int i_extra_isize; /* size of extra space located in i_addr */ kprojid_t i_projid; /* id for project quota */ @@ -884,6 +921,7 @@ struct f2fs_nm_info { nid_t max_nid; /* maximum possible node ids */ nid_t available_nids; /* # of available node ids */ nid_t next_scan_nid; /* the next nid to be scanned */ + nid_t max_rf_node_blocks; /* max # of nodes for recovery */ unsigned int ram_thresh; /* control the memory footprint */ unsigned int ra_nid_pages; /* # of nid pages to be readaheaded */ unsigned int dirty_nats_ratio; /* control dirty nats ratio threshold */ @@ -891,7 +929,7 @@ struct f2fs_nm_info { /* NAT cache management */ struct radix_tree_root nat_root;/* root of the nat entry cache */ struct radix_tree_root nat_set_root;/* root of the nat set cache */ - struct rw_semaphore nat_tree_lock; /* protect nat entry tree */ + struct f2fs_rwsem nat_tree_lock; /* protect nat entry tree */ struct list_head nat_entries; /* cached nat entry list (clean) */ spinlock_t nat_list_lock; /* protect clean nat entry list */ unsigned int nat_cnt[MAX_NAT_STATE]; /* the # of cached nat entries */ @@ -1004,7 +1042,7 @@ struct f2fs_sm_info { struct dirty_seglist_info *dirty_info; /* dirty segment information */ struct curseg_info *curseg_array; /* active segment information */ - struct rw_semaphore curseg_lock; /* for preventing curseg change */ + struct f2fs_rwsem curseg_lock; /* for preventing curseg change */ block_t seg0_blkaddr; /* block address of 0'th segment */ block_t main_blkaddr; /* start block address of main area */ @@ -1013,6 +1051,7 @@ struct f2fs_sm_info { unsigned int segment_count; /* total # of segments */ unsigned int main_segments; /* # of segments in main area */ unsigned int reserved_segments; /* # of reserved segments */ + unsigned int additional_reserved_segments;/* reserved segs for IO align feature */ unsigned int ovp_segments; /* # of overprovision segments */ /* a threshold to reclaim prefree segments */ @@ -1187,11 +1226,11 @@ struct f2fs_bio_info { struct bio *bio; /* bios to merge */ sector_t last_block_in_bio; /* last block number */ struct f2fs_io_info fio; /* store buffered io info. */ - struct rw_semaphore io_rwsem; /* blocking op for bio */ + struct f2fs_rwsem io_rwsem; /* blocking op for bio */ spinlock_t io_lock; /* serialize DATA/NODE IOs */ struct list_head io_list; /* track fios */ struct list_head bio_list; /* bio entry list head */ - struct rw_semaphore bio_list_lock; /* lock to protect bio entry list */ + struct f2fs_rwsem bio_list_lock; /* lock to protect bio entry list */ }; #define FDEV(i) (sbi->devs[i]) @@ -1253,6 +1292,7 @@ enum { SBI_QUOTA_SKIP_FLUSH, /* skip flushing quota in current CP */ SBI_QUOTA_NEED_REPAIR, /* quota file may be corrupted */ SBI_IS_RESIZEFS, /* resizefs is in process */ + SBI_IS_FREEZING, /* freezefs is in process */ }; enum { @@ -1272,6 +1312,7 @@ enum { GC_IDLE_AT, GC_URGENT_HIGH, GC_URGENT_LOW, + GC_URGENT_MID, MAX_GC_MODE, }; @@ -1285,8 +1326,10 @@ enum { }; enum { - FS_MODE_ADAPTIVE, /* use both lfs/ssr allocation */ - FS_MODE_LFS, /* use lfs allocation only */ + FS_MODE_ADAPTIVE, /* use both lfs/ssr allocation */ + FS_MODE_LFS, /* use lfs allocation only */ + FS_MODE_FRAGMENT_SEG, /* segment fragmentation mode */ + FS_MODE_FRAGMENT_BLK, /* block fragmentation mode */ }; enum { @@ -1479,6 +1522,7 @@ struct compress_ctx { unsigned int nr_rpages; /* total page number in rpages */ struct page **cpages; /* pages store compressed data in cluster */ unsigned int nr_cpages; /* total page number in cpages */ + unsigned int valid_nr_cpages; /* valid page number in cpages */ void *rbuf; /* virtual mapped address on rpages */ struct compress_data *cbuf; /* virtual mapped address on cpages */ size_t rlen; /* valid data length in rbuf */ @@ -1554,7 +1598,7 @@ struct f2fs_sb_info { struct super_block *sb; /* pointer to VFS super block */ struct proc_dir_entry *s_proc; /* proc entry */ struct f2fs_super_block *raw_super; /* raw super block pointer */ - struct rw_semaphore sb_lock; /* lock for raw super block */ + struct f2fs_rwsem sb_lock; /* lock for raw super block */ int valid_super_block; /* valid super block no */ unsigned long s_flag; /* flags for sbi */ struct mutex writepages; /* mutex for writepages() */ @@ -1574,18 +1618,20 @@ struct f2fs_sb_info { /* for bio operations */ struct f2fs_bio_info *write_io[NR_PAGE_TYPE]; /* for write bios */ /* keep migration IO order for LFS mode */ - struct rw_semaphore io_order_lock; + struct f2fs_rwsem io_order_lock; mempool_t *write_io_dummy; /* Dummy pages */ + pgoff_t metapage_eio_ofs; /* EIO page offset */ + int metapage_eio_cnt; /* EIO count */ /* for checkpoint */ struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */ int cur_cp_pack; /* remain current cp pack */ spinlock_t cp_lock; /* for flag in ckpt */ struct inode *meta_inode; /* cache meta blocks */ - struct rw_semaphore cp_global_sem; /* checkpoint procedure lock */ - struct rw_semaphore cp_rwsem; /* blocking FS operations */ - struct rw_semaphore node_write; /* locking node writes */ - struct rw_semaphore node_change; /* locking node change */ + struct f2fs_rwsem cp_global_sem; /* checkpoint procedure lock */ + struct f2fs_rwsem cp_rwsem; /* blocking FS operations */ + struct f2fs_rwsem node_write; /* locking node writes */ + struct f2fs_rwsem node_change; /* locking node change */ wait_queue_head_t cp_wait; unsigned long last_time[MAX_TIME]; /* to store time in jiffies */ long interval_time[MAX_TIME]; /* to store thresholds */ @@ -1645,12 +1691,14 @@ struct f2fs_sb_info { block_t unusable_block_count; /* # of blocks saved by last cp */ unsigned int nquota_files; /* # of quota sysfile */ - struct rw_semaphore quota_sem; /* blocking cp for flags */ + struct f2fs_rwsem quota_sem; /* blocking cp for flags */ /* # of pages, see count_type */ atomic_t nr_pages[NR_COUNT_TYPE]; /* # of allocated blocks */ struct percpu_counter alloc_valid_block_count; + /* # of node block writes as roll forward recovery */ + struct percpu_counter rf_node_block_count; /* writeback control */ atomic_t wb_sync_req[META]; /* count # of WB_SYNC threads */ @@ -1661,7 +1709,7 @@ struct f2fs_sb_info { struct f2fs_mount_info mount_opt; /* mount options */ /* for cleaning operations */ - struct rw_semaphore gc_lock; /* + struct f2fs_rwsem gc_lock; /* * semaphore for GC, avoid * race between GC and GC or CP */ @@ -1670,6 +1718,9 @@ struct f2fs_sb_info { unsigned int cur_victim_sec; /* current victim section num */ unsigned int gc_mode; /* current GC state */ unsigned int next_victim_seg[2]; /* next segment in victim section */ + spinlock_t gc_urgent_high_lock; + bool gc_urgent_high_limited; /* indicates having limited trial count */ + unsigned int gc_urgent_high_remaining; /* remaining trial count for GC_URGENT_HIGH */ /* for skip statistic */ unsigned int atomic_files; /* # of opened atomic file */ @@ -1678,7 +1729,7 @@ struct f2fs_sb_info { /* threshold for gc trials on pinned files */ u64 gc_pin_file_threshold; - struct rw_semaphore pin_sem; + struct f2fs_rwsem pin_sem; /* maximum # of trials to find a victim segment for SSR and GC */ unsigned int max_victim_search; @@ -1729,12 +1780,15 @@ struct f2fs_sb_info { /* For shrinker support */ struct list_head s_list; + struct mutex umount_mutex; + unsigned int shrinker_run_no; + + /* For multi devices */ int s_ndevs; /* number of devices */ struct f2fs_dev_info *devs; /* for device list */ unsigned int dirty_device; /* for checkpoint data flush */ spinlock_t dev_lock; /* protect dirty_device */ - struct mutex umount_mutex; - unsigned int shrinker_run_no; + bool aligned_blksize; /* all devices has the same logical blksize */ /* For write statistics */ u64 sectors_written_start; @@ -1757,6 +1811,9 @@ struct f2fs_sb_info { unsigned long seq_file_ra_mul; /* multiplier for ra_pages of seq. files in fadvise */ + int max_fragment_chunk; /* max chunk size for block fragmentation mode */ + int max_fragment_hole; /* max hole size for block fragmentation mode */ + #ifdef CONFIG_F2FS_FS_COMPRESSION struct kmem_cache *page_array_slab; /* page array entry */ unsigned int page_array_slab_size; /* default page array slab size */ @@ -1788,13 +1845,6 @@ struct f2fs_sb_info { #endif }; -struct f2fs_private_dio { - struct inode *inode; - void *orig_private; - bio_end_io_t *orig_end_io; - bool write; -}; - #ifdef CONFIG_F2FS_FAULT_INJECTION #define f2fs_show_injection_info(sbi, type) \ printk_ratelimited("%sF2FS-fs (%s) : inject %s in %s of %pS\n", \ @@ -2073,29 +2123,105 @@ static inline void clear_ckpt_flags(struct f2fs_sb_info *sbi, unsigned int f) spin_unlock_irqrestore(&sbi->cp_lock, flags); } +#define init_f2fs_rwsem(sem) \ +do { \ + static struct lock_class_key __key; \ + \ + __init_f2fs_rwsem((sem), #sem, &__key); \ +} while (0) + +static inline void __init_f2fs_rwsem(struct f2fs_rwsem *sem, + const char *sem_name, struct lock_class_key *key) +{ + __init_rwsem(&sem->internal_rwsem, sem_name, key); +#ifdef CONFIG_F2FS_UNFAIR_RWSEM + init_waitqueue_head(&sem->read_waiters); +#endif +} + +static inline int f2fs_rwsem_is_locked(struct f2fs_rwsem *sem) +{ + return rwsem_is_locked(&sem->internal_rwsem); +} + +static inline int f2fs_rwsem_is_contended(struct f2fs_rwsem *sem) +{ + return rwsem_is_contended(&sem->internal_rwsem); +} + +static inline void f2fs_down_read(struct f2fs_rwsem *sem) +{ +#ifdef CONFIG_F2FS_UNFAIR_RWSEM + wait_event(sem->read_waiters, down_read_trylock(&sem->internal_rwsem)); +#else + down_read(&sem->internal_rwsem); +#endif +} + +static inline int f2fs_down_read_trylock(struct f2fs_rwsem *sem) +{ + return down_read_trylock(&sem->internal_rwsem); +} + +#ifdef CONFIG_DEBUG_LOCK_ALLOC +static inline void f2fs_down_read_nested(struct f2fs_rwsem *sem, int subclass) +{ + down_read_nested(&sem->internal_rwsem, subclass); +} +#else +#define f2fs_down_read_nested(sem, subclass) f2fs_down_read(sem) +#endif + +static inline void f2fs_up_read(struct f2fs_rwsem *sem) +{ + up_read(&sem->internal_rwsem); +} + +static inline void f2fs_down_write(struct f2fs_rwsem *sem) +{ + down_write(&sem->internal_rwsem); +} + +static inline int f2fs_down_write_trylock(struct f2fs_rwsem *sem) +{ + return down_write_trylock(&sem->internal_rwsem); +} + +static inline void f2fs_up_write(struct f2fs_rwsem *sem) +{ + up_write(&sem->internal_rwsem); +#ifdef CONFIG_F2FS_UNFAIR_RWSEM + wake_up_all(&sem->read_waiters); +#endif +} + static inline void f2fs_lock_op(struct f2fs_sb_info *sbi) { - down_read(&sbi->cp_rwsem); + f2fs_down_read(&sbi->cp_rwsem); } static inline int f2fs_trylock_op(struct f2fs_sb_info *sbi) { - return down_read_trylock(&sbi->cp_rwsem); + if (time_to_inject(sbi, FAULT_LOCK_OP)) { + f2fs_show_injection_info(sbi, FAULT_LOCK_OP); + return 0; + } + return f2fs_down_read_trylock(&sbi->cp_rwsem); } static inline void f2fs_unlock_op(struct f2fs_sb_info *sbi) { - up_read(&sbi->cp_rwsem); + f2fs_up_read(&sbi->cp_rwsem); } static inline void f2fs_lock_all(struct f2fs_sb_info *sbi) { - down_write(&sbi->cp_rwsem); + f2fs_down_write(&sbi->cp_rwsem); } static inline void f2fs_unlock_all(struct f2fs_sb_info *sbi) { - up_write(&sbi->cp_rwsem); + f2fs_up_write(&sbi->cp_rwsem); } static inline int __get_cp_reason(struct f2fs_sb_info *sbi) @@ -2185,6 +2311,11 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi, if (!__allow_reserved_blocks(sbi, inode, true)) avail_user_block_count -= F2FS_OPTION(sbi).root_reserved_blocks; + + if (F2FS_IO_ALIGNED(sbi)) + avail_user_block_count -= sbi->blocks_per_seg * + SM_I(sbi)->additional_reserved_segments; + if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) { if (avail_user_block_count > sbi->unusable_block_count) avail_user_block_count -= sbi->unusable_block_count; @@ -2431,6 +2562,11 @@ static inline int inc_valid_node_count(struct f2fs_sb_info *sbi, if (!__allow_reserved_blocks(sbi, inode, false)) valid_block_count += F2FS_OPTION(sbi).root_reserved_blocks; + + if (F2FS_IO_ALIGNED(sbi)) + valid_block_count += sbi->blocks_per_seg * + SM_I(sbi)->additional_reserved_segments; + user_block_count = sbi->user_block_count; if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) user_block_count -= sbi->unusable_block_count; @@ -2648,6 +2784,9 @@ static inline bool is_idle(struct f2fs_sb_info *sbi, int type) if (is_inflight_io(sbi, type)) return false; + if (sbi->gc_mode == GC_URGENT_MID) + return true; + if (sbi->gc_mode == GC_URGENT_LOW && (type == DISCARD_TIME || type == GC_TIME)) return true; @@ -3103,12 +3242,16 @@ static inline int is_file(struct inode *inode, int type) static inline void set_file(struct inode *inode, int type) { + if (is_file(inode, type)) + return; F2FS_I(inode)->i_advise |= type; f2fs_mark_inode_dirty_sync(inode, true); } static inline void clear_file(struct inode *inode, int type) { + if (!is_file(inode, type)) + return; F2FS_I(inode)->i_advise &= ~type; f2fs_mark_inode_dirty_sync(inode, true); } @@ -3360,6 +3503,7 @@ static inline int f2fs_add_link(struct dentry *dentry, struct inode *inode) */ int f2fs_inode_dirtied(struct inode *inode, bool sync); void f2fs_inode_synced(struct inode *inode); +int f2fs_dquot_initialize(struct inode *inode); int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly); int f2fs_quota_sync(struct super_block *sb, int type); loff_t max_file_blocks(struct inode *inode); @@ -3388,7 +3532,7 @@ int f2fs_need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid); bool f2fs_is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid); bool f2fs_need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino); int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid, - struct node_info *ni); + struct node_info *ni, bool checkpoint_context); pgoff_t f2fs_get_next_page_offset(struct dnode_of_data *dn, pgoff_t pgofs); int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode); int f2fs_truncate_inode_blocks(struct inode *inode, pgoff_t from); @@ -3489,6 +3633,8 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, block_t old_blkaddr, block_t *new_blkaddr, struct f2fs_summary *sum, int type, struct f2fs_io_info *fio); +void f2fs_update_device_state(struct f2fs_sb_info *sbi, nid_t ino, + block_t blkaddr, unsigned int blkcnt); void f2fs_wait_on_page_writeback(struct page *page, enum page_type type, bool ordered, bool locked); void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr); @@ -3513,6 +3659,16 @@ unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi, unsigned int f2fs_usable_blks_in_seg(struct f2fs_sb_info *sbi, unsigned int segno); +#define DEF_FRAGMENT_SIZE 4 +#define MIN_FRAGMENT_SIZE 1 +#define MAX_FRAGMENT_SIZE 512 + +static inline bool f2fs_need_rand_seg(struct f2fs_sb_info *sbi) +{ + return F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_SEG || + F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK; +} + /* * checkpoint.c */ @@ -3525,7 +3681,8 @@ bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type); int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type, bool sync); -void f2fs_ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index); +void f2fs_ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index, + unsigned int ra_blocks); long f2fs_sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type, long nr_to_write, enum iostat_type io_type); void f2fs_add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type); @@ -3584,7 +3741,6 @@ void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr); int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count); int f2fs_reserve_new_block(struct dnode_of_data *dn); int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index); -int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from); int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index); struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index, int op_flags, bool for_write); @@ -3607,6 +3763,7 @@ int f2fs_write_single_data_page(struct page *page, int *submitted, struct writeback_control *wbc, enum iostat_type io_type, int compr_blocks, bool allow_balance); +void f2fs_write_failed(struct inode *inode, loff_t to); void f2fs_invalidate_page(struct page *page, unsigned int offset, unsigned int length); int f2fs_release_page(struct page *page, gfp_t wait); @@ -3620,6 +3777,7 @@ int f2fs_init_post_read_processing(void); void f2fs_destroy_post_read_processing(void); int f2fs_init_post_read_wq(struct f2fs_sb_info *sbi); void f2fs_destroy_post_read_wq(struct f2fs_sb_info *sbi); +extern const struct iomap_ops f2fs_iomap_ops; /* * gc.c @@ -4024,6 +4182,8 @@ void f2fs_end_read_compressed_page(struct page *page, bool failed, block_t blkaddr); bool f2fs_cluster_is_empty(struct compress_ctx *cc); bool f2fs_cluster_can_merge_page(struct compress_ctx *cc, pgoff_t index); +bool f2fs_all_cluster_page_loaded(struct compress_ctx *cc, struct pagevec *pvec, + int index, int nr_pages); bool f2fs_sanity_check_cluster(struct dnode_of_data *dn); void f2fs_compress_ctx_add_page(struct compress_ctx *cc, struct page *page); int f2fs_write_multi_pages(struct compress_ctx *cc, @@ -4149,8 +4309,7 @@ static inline bool f2fs_disable_compressed_file(struct inode *inode) if (!f2fs_compressed_file(inode)) return true; - if (S_ISREG(inode->i_mode) && - (get_dirty_pages(inode) || atomic_read(&fi->i_compr_blocks))) + if (S_ISREG(inode->i_mode) && F2FS_HAS_BLOCKS(inode)) return false; fi->i_flags &= ~F2FS_COMPR_FL; @@ -4299,6 +4458,16 @@ static inline int block_unaligned_IO(struct inode *inode, return align & blocksize_mask; } +static inline bool f2fs_allow_multi_device_dio(struct f2fs_sb_info *sbi, + int flag) +{ + if (!f2fs_is_multi_device(sbi)) + return false; + if (flag != F2FS_GET_BLOCK_DIO) + return false; + return sbi->aligned_blksize; +} + static inline bool f2fs_force_buffered_io(struct inode *inode, struct kiocb *iocb, struct iov_iter *iter) { @@ -4311,7 +4480,9 @@ static inline bool f2fs_force_buffered_io(struct inode *inode, return true; if (f2fs_compressed_file(inode)) return true; - if (f2fs_is_multi_device(sbi)) + + /* disallow direct IO if any of devices has unaligned blksize */ + if (f2fs_is_multi_device(sbi) && !sbi->aligned_blksize) return true; /* * for blkzoned device, fallback direct IO to buffered IO, so diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index e6474e40d1830dd00679e6f95b68777d2d3cd87f..387b653e1ad213c97b8bd3ecb9c2a3a1aada6620 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -23,6 +23,7 @@ #include #include #include +#include #include "f2fs.h" #include "node.h" @@ -32,6 +33,7 @@ #include "gc.h" #include "iostat.h" #include +#include #include static vm_fault_t f2fs_filemap_fault(struct vm_fault *vmf) @@ -39,9 +41,9 @@ static vm_fault_t f2fs_filemap_fault(struct vm_fault *vmf) struct inode *inode = file_inode(vmf->vma->vm_file); vm_fault_t ret; - down_read(&F2FS_I(inode)->i_mmap_sem); + f2fs_down_read(&F2FS_I(inode)->i_mmap_sem); ret = filemap_fault(vmf); - up_read(&F2FS_I(inode)->i_mmap_sem); + f2fs_up_read(&F2FS_I(inode)->i_mmap_sem); if (!ret) f2fs_update_iostat(F2FS_I_SB(inode), APP_MAPPED_READ_IO, @@ -102,7 +104,7 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) f2fs_bug_on(sbi, f2fs_has_inline_data(inode)); file_update_time(vmf->vma->vm_file); - down_read(&F2FS_I(inode)->i_mmap_sem); + f2fs_down_read(&F2FS_I(inode)->i_mmap_sem); lock_page(page); if (unlikely(page->mapping != inode->i_mapping || page_offset(page) > i_size_read(inode) || @@ -160,7 +162,7 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) trace_f2fs_vm_page_mkwrite(page, DATA); out_sem: - up_read(&F2FS_I(inode)->i_mmap_sem); + f2fs_up_read(&F2FS_I(inode)->i_mmap_sem); sb_end_pagefault(inode->i_sb); err: @@ -241,13 +243,13 @@ static void try_to_fix_pino(struct inode *inode) struct f2fs_inode_info *fi = F2FS_I(inode); nid_t pino; - down_write(&fi->i_sem); + f2fs_down_write(&fi->i_sem); if (file_wrong_pino(inode) && inode->i_nlink == 1 && get_parent_ino(inode, &pino)) { f2fs_i_pino_write(inode, pino); file_got_pino(inode); } - up_write(&fi->i_sem); + f2fs_up_write(&fi->i_sem); } static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end, @@ -322,9 +324,9 @@ go_write: * Both of fdatasync() and fsync() are able to be recovered from * sudden-power-off. */ - down_read(&F2FS_I(inode)->i_sem); + f2fs_down_read(&F2FS_I(inode)->i_sem); cp_reason = need_do_checkpoint(inode); - up_read(&F2FS_I(inode)->i_sem); + f2fs_up_read(&F2FS_I(inode)->i_sem); if (cp_reason) { /* all the dirty node pages should be flushed for POR */ @@ -791,7 +793,7 @@ int f2fs_truncate(struct inode *inode) return -EIO; } - err = dquot_initialize(inode); + err = f2fs_dquot_initialize(inode); if (err) return err; @@ -816,7 +818,7 @@ int f2fs_getattr(const struct path *path, struct kstat *stat, { struct inode *inode = d_inode(path->dentry); struct f2fs_inode_info *fi = F2FS_I(inode); - struct f2fs_inode *ri; + struct f2fs_inode *ri = NULL; unsigned int flags; if (f2fs_has_extra_attr(inode) && @@ -919,7 +921,7 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) return err; if (is_quota_modification(inode, attr)) { - err = dquot_initialize(inode); + err = f2fs_dquot_initialize(inode); if (err) return err; } @@ -960,8 +962,8 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) return err; } - down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); - down_write(&F2FS_I(inode)->i_mmap_sem); + f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_down_write(&F2FS_I(inode)->i_mmap_sem); truncate_setsize(inode, attr->ia_size); @@ -971,8 +973,8 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) * do not trim all blocks after i_size if target size is * larger than i_size. */ - up_write(&F2FS_I(inode)->i_mmap_sem); - up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_up_write(&F2FS_I(inode)->i_mmap_sem); + f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); if (err) return err; @@ -1112,8 +1114,8 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len) blk_start = (loff_t)pg_start << PAGE_SHIFT; blk_end = (loff_t)pg_end << PAGE_SHIFT; - down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); - down_write(&F2FS_I(inode)->i_mmap_sem); + f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_down_write(&F2FS_I(inode)->i_mmap_sem); truncate_pagecache_range(inode, blk_start, blk_end - 1); @@ -1121,8 +1123,8 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len) ret = f2fs_truncate_hole(inode, pg_start, pg_end); f2fs_unlock_op(sbi); - up_write(&F2FS_I(inode)->i_mmap_sem); - up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_up_write(&F2FS_I(inode)->i_mmap_sem); + f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); } } @@ -1233,7 +1235,7 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode, if (ret) return ret; - ret = f2fs_get_node_info(sbi, dn.nid, &ni); + ret = f2fs_get_node_info(sbi, dn.nid, &ni, false); if (ret) { f2fs_put_dnode(&dn); return ret; @@ -1355,8 +1357,8 @@ static int f2fs_do_collapse(struct inode *inode, loff_t offset, loff_t len) f2fs_balance_fs(sbi, true); /* avoid gc operation during block exchange */ - down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); - down_write(&F2FS_I(inode)->i_mmap_sem); + f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_down_write(&F2FS_I(inode)->i_mmap_sem); f2fs_lock_op(sbi); f2fs_drop_extent_tree(inode); @@ -1364,8 +1366,8 @@ static int f2fs_do_collapse(struct inode *inode, loff_t offset, loff_t len) ret = __exchange_data_block(inode, inode, end, start, nrpages - end, true); f2fs_unlock_op(sbi); - up_write(&F2FS_I(inode)->i_mmap_sem); - up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_up_write(&F2FS_I(inode)->i_mmap_sem); + f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); return ret; } @@ -1395,13 +1397,13 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len) return ret; /* write out all moved pages, if possible */ - down_write(&F2FS_I(inode)->i_mmap_sem); + f2fs_down_write(&F2FS_I(inode)->i_mmap_sem); filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX); truncate_pagecache(inode, offset); new_size = i_size_read(inode) - len; ret = f2fs_truncate_blocks(inode, new_size, true); - up_write(&F2FS_I(inode)->i_mmap_sem); + f2fs_up_write(&F2FS_I(inode)->i_mmap_sem); if (!ret) f2fs_i_size_write(inode, new_size); return ret; @@ -1500,8 +1502,8 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, unsigned int end_offset; pgoff_t end; - down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); - down_write(&F2FS_I(inode)->i_mmap_sem); + f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_down_write(&F2FS_I(inode)->i_mmap_sem); truncate_pagecache_range(inode, (loff_t)index << PAGE_SHIFT, @@ -1513,8 +1515,8 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, ret = f2fs_get_dnode_of_data(&dn, index, ALLOC_NODE); if (ret) { f2fs_unlock_op(sbi); - up_write(&F2FS_I(inode)->i_mmap_sem); - up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_up_write(&F2FS_I(inode)->i_mmap_sem); + f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); goto out; } @@ -1525,8 +1527,8 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, f2fs_put_dnode(&dn); f2fs_unlock_op(sbi); - up_write(&F2FS_I(inode)->i_mmap_sem); - up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_up_write(&F2FS_I(inode)->i_mmap_sem); + f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); f2fs_balance_fs(sbi, dn.node_changed); @@ -1582,9 +1584,9 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len) f2fs_balance_fs(sbi, true); - down_write(&F2FS_I(inode)->i_mmap_sem); + f2fs_down_write(&F2FS_I(inode)->i_mmap_sem); ret = f2fs_truncate_blocks(inode, i_size_read(inode), true); - up_write(&F2FS_I(inode)->i_mmap_sem); + f2fs_up_write(&F2FS_I(inode)->i_mmap_sem); if (ret) return ret; @@ -1599,8 +1601,8 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len) idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); /* avoid gc operation during block exchange */ - down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); - down_write(&F2FS_I(inode)->i_mmap_sem); + f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_down_write(&F2FS_I(inode)->i_mmap_sem); truncate_pagecache(inode, offset); while (!ret && idx > pg_start) { @@ -1616,14 +1618,14 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len) idx + delta, nr, false); f2fs_unlock_op(sbi); } - up_write(&F2FS_I(inode)->i_mmap_sem); - up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_up_write(&F2FS_I(inode)->i_mmap_sem); + f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); /* write out all moved pages, if possible */ - down_write(&F2FS_I(inode)->i_mmap_sem); + f2fs_down_write(&F2FS_I(inode)->i_mmap_sem); filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX); truncate_pagecache(inode, offset); - up_write(&F2FS_I(inode)->i_mmap_sem); + f2fs_up_write(&F2FS_I(inode)->i_mmap_sem); if (!ret) f2fs_i_size_write(inode, new_size); @@ -1673,13 +1675,13 @@ static int expand_inode_data(struct inode *inode, loff_t offset, next_alloc: if (has_not_enough_free_secs(sbi, 0, GET_SEC_FROM_SEG(sbi, overprovision_segments(sbi)))) { - down_write(&sbi->gc_lock); + f2fs_down_write(&sbi->gc_lock); err = f2fs_gc(sbi, true, false, false, NULL_SEGNO); if (err && err != -ENODATA && err != -EAGAIN) goto out_err; } - down_write(&sbi->pin_sem); + f2fs_down_write(&sbi->pin_sem); f2fs_lock_op(sbi); f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false); @@ -1687,8 +1689,9 @@ next_alloc: map.m_seg_type = CURSEG_COLD_DATA_PINNED; err = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_DIO); + file_dont_truncate(inode); - up_write(&sbi->pin_sem); + f2fs_up_write(&sbi->pin_sem); expanded += map.m_len; sec_len -= map.m_len; @@ -1748,7 +1751,11 @@ static long f2fs_fallocate(struct file *file, int mode, (mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE))) return -EOPNOTSUPP; - if (f2fs_compressed_file(inode) && + /* + * Pinned file should not support partial trucation since the block + * can be used by applications. + */ + if ((f2fs_compressed_file(inode) || f2fs_is_pinned_file(inode)) && (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE | FALLOC_FL_INSERT_RANGE))) return -EOPNOTSUPP; @@ -2060,7 +2067,10 @@ static int f2fs_ioc_start_atomic_write(struct file *filp) inode_lock(inode); - f2fs_disable_compressed_file(inode); + if (!f2fs_disable_compressed_file(inode)) { + ret = -EINVAL; + goto out; + } if (f2fs_is_atomic_file(inode)) { if (is_inode_flag_set(inode, FI_ATOMIC_REVOKE_REQUEST)) @@ -2072,7 +2082,7 @@ static int f2fs_ioc_start_atomic_write(struct file *filp) if (ret) goto out; - down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); /* * Should wait end_io to count F2FS_WB_CP_DATA correctly by @@ -2083,7 +2093,7 @@ static int f2fs_ioc_start_atomic_write(struct file *filp) inode->i_ino, get_dirty_pages(inode)); ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); if (ret) { - up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); goto out; } @@ -2096,7 +2106,7 @@ static int f2fs_ioc_start_atomic_write(struct file *filp) /* add inode in inmem_list first and set atomic_file */ set_inode_flag(inode, FI_ATOMIC_FILE); clear_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST); - up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); F2FS_I(inode)->inmem_task = current; @@ -2403,7 +2413,7 @@ static int f2fs_ioc_get_encryption_pwsalt(struct file *filp, unsigned long arg) if (err) return err; - down_write(&sbi->sb_lock); + f2fs_down_write(&sbi->sb_lock); if (uuid_is_nonzero(sbi->raw_super->encrypt_pw_salt)) goto got_it; @@ -2422,7 +2432,7 @@ got_it: 16)) err = -EFAULT; out_err: - up_write(&sbi->sb_lock); + f2fs_up_write(&sbi->sb_lock); mnt_drop_write_file(filp); return err; } @@ -2499,12 +2509,12 @@ static int f2fs_ioc_gc(struct file *filp, unsigned long arg) return ret; if (!sync) { - if (!down_write_trylock(&sbi->gc_lock)) { + if (!f2fs_down_write_trylock(&sbi->gc_lock)) { ret = -EBUSY; goto out; } } else { - down_write(&sbi->gc_lock); + f2fs_down_write(&sbi->gc_lock); } ret = f2fs_gc(sbi, sync, true, false, NULL_SEGNO); @@ -2535,12 +2545,12 @@ static int __f2fs_ioc_gc_range(struct file *filp, struct f2fs_gc_range *range) do_more: if (!range->sync) { - if (!down_write_trylock(&sbi->gc_lock)) { + if (!f2fs_down_write_trylock(&sbi->gc_lock)) { ret = -EBUSY; goto out; } } else { - down_write(&sbi->gc_lock); + f2fs_down_write(&sbi->gc_lock); } ret = f2fs_gc(sbi, range->sync, true, false, @@ -2611,10 +2621,6 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, bool fragmented = false; int err; - /* if in-place-update policy is enabled, don't waste time here */ - if (f2fs_should_update_inplace(inode, NULL)) - return -EINVAL; - pg_start = range->start >> PAGE_SHIFT; pg_end = (range->start + range->len) >> PAGE_SHIFT; @@ -2622,6 +2628,13 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, inode_lock(inode); + /* if in-place-update policy is enabled, don't waste time here */ + set_inode_flag(inode, FI_OPU_WRITE); + if (f2fs_should_update_inplace(inode, NULL)) { + err = -EINVAL; + goto out; + } + /* writeback all dirty pages in the range */ err = filemap_write_and_wait_range(inode->i_mapping, range->start, range->start + range->len - 1); @@ -2703,7 +2716,7 @@ do_map: goto check; } - set_inode_flag(inode, FI_DO_DEFRAG); + set_inode_flag(inode, FI_SKIP_WRITES); idx = map.m_lblk; while (idx < map.m_lblk + map.m_len && cnt < blk_per_seg) { @@ -2728,15 +2741,16 @@ check: if (map.m_lblk < pg_end && cnt < blk_per_seg) goto do_map; - clear_inode_flag(inode, FI_DO_DEFRAG); + clear_inode_flag(inode, FI_SKIP_WRITES); err = filemap_fdatawrite(inode->i_mapping); if (err) goto out; } clear_out: - clear_inode_flag(inode, FI_DO_DEFRAG); + clear_inode_flag(inode, FI_SKIP_WRITES); out: + clear_inode_flag(inode, FI_OPU_WRITE); inode_unlock(inode); if (!err) range->len = (u64)total << PAGE_SHIFT; @@ -2872,10 +2886,10 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in, f2fs_balance_fs(sbi, true); - down_write(&F2FS_I(src)->i_gc_rwsem[WRITE]); + f2fs_down_write(&F2FS_I(src)->i_gc_rwsem[WRITE]); if (src != dst) { ret = -EBUSY; - if (!down_write_trylock(&F2FS_I(dst)->i_gc_rwsem[WRITE])) + if (!f2fs_down_write_trylock(&F2FS_I(dst)->i_gc_rwsem[WRITE])) goto out_src; } @@ -2893,9 +2907,9 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in, f2fs_unlock_op(sbi); if (src != dst) - up_write(&F2FS_I(dst)->i_gc_rwsem[WRITE]); + f2fs_up_write(&F2FS_I(dst)->i_gc_rwsem[WRITE]); out_src: - up_write(&F2FS_I(src)->i_gc_rwsem[WRITE]); + f2fs_up_write(&F2FS_I(src)->i_gc_rwsem[WRITE]); out_unlock: if (src != dst) inode_unlock(dst); @@ -2990,7 +3004,7 @@ static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg) end_segno = min(start_segno + range.segments, dev_end_segno); while (start_segno < end_segno) { - if (!down_write_trylock(&sbi->gc_lock)) { + if (!f2fs_down_write_trylock(&sbi->gc_lock)) { ret = -EBUSY; goto out; } @@ -3043,7 +3057,7 @@ static int f2fs_ioc_setproject(struct file *filp, __u32 projid) struct inode *inode = file_inode(filp); struct f2fs_inode_info *fi = F2FS_I(inode); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct page *ipage; + struct f2fs_inode *ri = NULL; kprojid_t kprojid; int err; @@ -3067,19 +3081,10 @@ static int f2fs_ioc_setproject(struct file *filp, __u32 projid) if (IS_NOQUOTA(inode)) return err; - ipage = f2fs_get_node_page(sbi, inode->i_ino); - if (IS_ERR(ipage)) - return PTR_ERR(ipage); + if (!F2FS_FITS_IN_INODE(ri, fi->i_extra_isize, i_projid)) + return -EOVERFLOW; - if (!F2FS_FITS_IN_INODE(F2FS_INODE(ipage), fi->i_extra_isize, - i_projid)) { - err = -EOVERFLOW; - f2fs_put_page(ipage, 1); - return err; - } - f2fs_put_page(ipage, 1); - - err = dquot_initialize(inode); + err = f2fs_dquot_initialize(inode); if (err) return err; @@ -3270,17 +3275,17 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) inode_lock(inode); - if (f2fs_should_update_outplace(inode, NULL)) { - ret = -EINVAL; - goto out; - } - if (!pin) { clear_inode_flag(inode, FI_PIN_FILE); f2fs_i_gc_failures_write(inode, 0); goto done; } + if (f2fs_should_update_outplace(inode, NULL)) { + ret = -EINVAL; + goto out; + } + if (f2fs_pin_file_control(inode, false)) { ret = -EAGAIN; goto out; @@ -3336,9 +3341,9 @@ int f2fs_precache_extents(struct inode *inode) while (map.m_lblk < end) { map.m_len = end - map.m_lblk; - down_write(&fi->i_gc_rwsem[WRITE]); + f2fs_down_write(&fi->i_gc_rwsem[WRITE]); err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_PRECACHE); - up_write(&fi->i_gc_rwsem[WRITE]); + f2fs_up_write(&fi->i_gc_rwsem[WRITE]); if (err) return err; @@ -3415,11 +3420,11 @@ static int f2fs_ioc_getfslabel(struct file *filp, unsigned long arg) if (!vbuf) return -ENOMEM; - down_read(&sbi->sb_lock); + f2fs_down_read(&sbi->sb_lock); count = utf16s_to_utf8s(sbi->raw_super->volume_name, ARRAY_SIZE(sbi->raw_super->volume_name), UTF16_LITTLE_ENDIAN, vbuf, MAX_VOLUME_NAME); - up_read(&sbi->sb_lock); + f2fs_up_read(&sbi->sb_lock); if (copy_to_user((char __user *)arg, vbuf, min(FSLABEL_MAX, count))) @@ -3447,7 +3452,7 @@ static int f2fs_ioc_setfslabel(struct file *filp, unsigned long arg) if (err) goto out; - down_write(&sbi->sb_lock); + f2fs_down_write(&sbi->sb_lock); memset(sbi->raw_super->volume_name, 0, sizeof(sbi->raw_super->volume_name)); @@ -3457,7 +3462,7 @@ static int f2fs_ioc_setfslabel(struct file *filp, unsigned long arg) err = f2fs_commit_super(sbi, false); - up_write(&sbi->sb_lock); + f2fs_up_write(&sbi->sb_lock); mnt_drop_write_file(filp); out: @@ -3583,8 +3588,8 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) if (!atomic_read(&F2FS_I(inode)->i_compr_blocks)) goto out; - down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); - down_write(&F2FS_I(inode)->i_mmap_sem); + f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_down_write(&F2FS_I(inode)->i_mmap_sem); last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); @@ -3619,8 +3624,8 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) released_blocks += ret; } - up_write(&F2FS_I(inode)->i_mmap_sem); - up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_up_write(&F2FS_I(inode)->i_mmap_sem); + f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); out: inode_unlock(inode); @@ -3736,8 +3741,8 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) goto unlock_inode; } - down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); - down_write(&F2FS_I(inode)->i_mmap_sem); + f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_down_write(&F2FS_I(inode)->i_mmap_sem); last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE); @@ -3772,8 +3777,8 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) reserved_blocks += ret; } - up_write(&F2FS_I(inode)->i_mmap_sem); - up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_up_write(&F2FS_I(inode)->i_mmap_sem); + f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); if (ret >= 0) { clear_inode_flag(inode, FI_COMPRESS_RELEASED); @@ -3891,8 +3896,8 @@ static int f2fs_sec_trim_file(struct file *filp, unsigned long arg) if (ret) goto err; - down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); - down_write(&F2FS_I(inode)->i_mmap_sem); + f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_down_write(&F2FS_I(inode)->i_mmap_sem); ret = filemap_write_and_wait_range(mapping, range.start, to_end ? LLONG_MAX : end_addr - 1); @@ -3979,8 +3984,8 @@ static int f2fs_sec_trim_file(struct file *filp, unsigned long arg) ret = f2fs_secure_erase(prev_bdev, inode, prev_index, prev_block, len, range.flags); out: - up_write(&F2FS_I(inode)->i_mmap_sem); - up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_up_write(&F2FS_I(inode)->i_mmap_sem); + f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); err: inode_unlock(inode); file_end_write(filp); @@ -4353,133 +4358,467 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return __f2fs_ioctl(filp, cmd, arg); } -static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) +/* + * Return %true if the given read or write request should use direct I/O, or + * %false if it should use buffered I/O. + */ +static bool f2fs_should_use_dio(struct inode *inode, struct kiocb *iocb, + struct iov_iter *iter) +{ + unsigned int align; + + if (!(iocb->ki_flags & IOCB_DIRECT)) + return false; + + if (f2fs_force_buffered_io(inode, iocb, iter)) + return false; + + /* + * Direct I/O not aligned to the disk's logical_block_size will be + * attempted, but will fail with -EINVAL. + * + * f2fs additionally requires that direct I/O be aligned to the + * filesystem block size, which is often a stricter requirement. + * However, f2fs traditionally falls back to buffered I/O on requests + * that are logical_block_size-aligned but not fs-block aligned. + * + * The below logic implements this behavior. + */ + align = iocb->ki_pos | iov_iter_alignment(iter); + if (!IS_ALIGNED(align, i_blocksize(inode)) && + IS_ALIGNED(align, bdev_logical_block_size(inode->i_sb->s_bdev))) + return false; + + return true; +} + +static int f2fs_dio_read_end_io(struct kiocb *iocb, ssize_t size, int error, + unsigned int flags) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(iocb->ki_filp)); + + dec_page_count(sbi, F2FS_DIO_READ); + if (error) + return error; + f2fs_update_iostat(sbi, APP_DIRECT_READ_IO, size); + return 0; +} + +static const struct iomap_dio_ops f2fs_iomap_dio_read_ops = { + .end_io = f2fs_dio_read_end_io, +}; + +static ssize_t f2fs_dio_read_iter(struct kiocb *iocb, struct iov_iter *to) { struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); - int ret; + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct f2fs_inode_info *fi = F2FS_I(inode); + const loff_t pos = iocb->ki_pos; + const size_t count = iov_iter_count(to); + struct iomap_dio *dio; + ssize_t ret; + + if (count == 0) + return 0; /* skip atime update */ + + trace_f2fs_direct_IO_enter(inode, iocb, count, READ); + if (trace_android_fs_dataread_start_enabled()) { + char *path, pathbuf[MAX_TRACE_PATHBUF_LEN]; + + path = android_fstrace_get_pathname(pathbuf, + MAX_TRACE_PATHBUF_LEN, + inode); + trace_android_fs_dataread_start(inode, pos, + count, current->pid, path, + current->comm); + } + + if (iocb->ki_flags & IOCB_NOWAIT) { + if (!f2fs_down_read_trylock(&fi->i_gc_rwsem[READ])) { + ret = -EAGAIN; + goto out; + } + } else { + f2fs_down_read(&fi->i_gc_rwsem[READ]); + } + + /* + * We have to use __iomap_dio_rw() and iomap_dio_complete() instead of + * the higher-level function iomap_dio_rw() in order to ensure that the + * F2FS_DIO_READ counter will be decremented correctly in all cases. + */ + inc_page_count(sbi, F2FS_DIO_READ); + dio = __iomap_dio_rw(iocb, to, &f2fs_iomap_ops, + &f2fs_iomap_dio_read_ops, is_sync_kiocb(iocb)); + if (IS_ERR_OR_NULL(dio)) { + ret = PTR_ERR_OR_ZERO(dio); + if (ret != -EIOCBQUEUED) + dec_page_count(sbi, F2FS_DIO_READ); + } else { + ret = iomap_dio_complete(dio); + } + + f2fs_up_read(&fi->i_gc_rwsem[READ]); + + file_accessed(file); +out: + if (trace_android_fs_dataread_start_enabled()) + trace_android_fs_dataread_end(inode, pos, count); + trace_f2fs_direct_IO_exit(inode, pos, count, READ, ret); + return ret; +} + +static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) +{ + struct inode *inode = file_inode(iocb->ki_filp); + ssize_t ret = 0; if (!f2fs_is_compress_backend_ready(inode)) return -EOPNOTSUPP; - ret = generic_file_read_iter(iocb, iter); + if (f2fs_should_use_dio(inode, iocb, to)) + return f2fs_dio_read_iter(iocb, to); + ret = generic_file_buffered_read(iocb, to, ret); if (ret > 0) - f2fs_update_iostat(F2FS_I_SB(inode), APP_READ_IO, ret); - + f2fs_update_iostat(F2FS_I_SB(inode), APP_BUFFERED_READ_IO, ret); return ret; } -static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) +static ssize_t f2fs_write_checks(struct kiocb *iocb, struct iov_iter *from) +{ + struct file *file = iocb->ki_filp; + struct inode *inode = file_inode(file); + ssize_t count; + int err; + + if (IS_IMMUTABLE(inode)) + return -EPERM; + + if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) + return -EPERM; + + count = generic_write_checks(iocb, from); + if (count <= 0) + return count; + + err = file_modified(file); + if (err) + return err; + return count; +} + +/* + * Preallocate blocks for a write request, if it is possible and helpful to do + * so. Returns a positive number if blocks may have been preallocated, 0 if no + * blocks were preallocated, or a negative errno value if something went + * seriously wrong. Also sets FI_PREALLOCATED_ALL on the inode if *all* the + * requested blocks (not just some of them) have been allocated. + */ +static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter, + bool dio) +{ + struct inode *inode = file_inode(iocb->ki_filp); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + const loff_t pos = iocb->ki_pos; + const size_t count = iov_iter_count(iter); + struct f2fs_map_blocks map = {}; + int flag; + int ret; + + /* If it will be an out-of-place direct write, don't bother. */ + if (dio && f2fs_lfs_mode(sbi)) + return 0; + /* + * Don't preallocate holes aligned to DIO_SKIP_HOLES which turns into + * buffered IO, if DIO meets any holes. + */ + if (dio && i_size_read(inode) && + (F2FS_BYTES_TO_BLK(pos) < F2FS_BLK_ALIGN(i_size_read(inode)))) + return 0; + + /* No-wait I/O can't allocate blocks. */ + if (iocb->ki_flags & IOCB_NOWAIT) + return 0; + + /* If it will be a short write, don't bother. */ + if (iov_iter_fault_in_readable(iter, count)) + return 0; + + if (f2fs_has_inline_data(inode)) { + /* If the data will fit inline, don't bother. */ + if (pos + count <= MAX_INLINE_DATA(inode)) + return 0; + ret = f2fs_convert_inline_inode(inode); + if (ret) + return ret; + } + + /* Do not preallocate blocks that will be written partially in 4KB. */ + map.m_lblk = F2FS_BLK_ALIGN(pos); + map.m_len = F2FS_BYTES_TO_BLK(pos + count); + if (map.m_len > map.m_lblk) + map.m_len -= map.m_lblk; + else + map.m_len = 0; + map.m_may_create = true; + if (dio) { + map.m_seg_type = f2fs_rw_hint_to_seg_type(inode->i_write_hint); + flag = F2FS_GET_BLOCK_PRE_DIO; + } else { + map.m_seg_type = NO_CHECK_TYPE; + flag = F2FS_GET_BLOCK_PRE_AIO; + } + + ret = f2fs_map_blocks(inode, &map, 1, flag); + /* -ENOSPC|-EDQUOT are fine to report the number of allocated blocks. */ + if (ret < 0 && !((ret == -ENOSPC || ret == -EDQUOT) && map.m_len > 0)) + return ret; + if (ret == 0) + set_inode_flag(inode, FI_PREALLOCATED_ALL); + return map.m_len; +} + +static ssize_t f2fs_buffered_write_iter(struct kiocb *iocb, + struct iov_iter *from) { struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); ssize_t ret; - if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) { - ret = -EIO; - goto out; + if (iocb->ki_flags & IOCB_NOWAIT) + return -EOPNOTSUPP; + + current->backing_dev_info = inode_to_bdi(inode); + ret = generic_perform_write(file, from, iocb->ki_pos); + current->backing_dev_info = NULL; + + if (ret > 0) { + iocb->ki_pos += ret; + f2fs_update_iostat(F2FS_I_SB(inode), APP_BUFFERED_IO, ret); } + return ret; +} - if (!f2fs_is_compress_backend_ready(inode)) { - ret = -EOPNOTSUPP; - goto out; +static int f2fs_dio_write_end_io(struct kiocb *iocb, ssize_t size, int error, + unsigned int flags) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(iocb->ki_filp)); + + dec_page_count(sbi, F2FS_DIO_WRITE); + if (error) + return error; + f2fs_update_iostat(sbi, APP_DIRECT_IO, size); + return 0; +} + +static const struct iomap_dio_ops f2fs_iomap_dio_write_ops = { + .end_io = f2fs_dio_write_end_io, +}; + +static ssize_t f2fs_dio_write_iter(struct kiocb *iocb, struct iov_iter *from, + bool *may_need_sync) +{ + struct file *file = iocb->ki_filp; + struct inode *inode = file_inode(file); + struct f2fs_inode_info *fi = F2FS_I(inode); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + const bool do_opu = f2fs_lfs_mode(sbi); + const int whint_mode = F2FS_OPTION(sbi).whint_mode; + const loff_t pos = iocb->ki_pos; + const ssize_t count = iov_iter_count(from); + const enum rw_hint hint = iocb->ki_hint; + unsigned int dio_flags; + struct iomap_dio *dio; + ssize_t ret; + + trace_f2fs_direct_IO_enter(inode, iocb, count, WRITE); + if (trace_android_fs_datawrite_start_enabled()) { + char *path, pathbuf[MAX_TRACE_PATHBUF_LEN]; + + path = android_fstrace_get_pathname(pathbuf, + MAX_TRACE_PATHBUF_LEN, + inode); + trace_android_fs_datawrite_start(inode, pos, count, + current->pid, path, + current->comm); } if (iocb->ki_flags & IOCB_NOWAIT) { - if (!inode_trylock(inode)) { + /* f2fs_convert_inline_inode() and block allocation can block */ + if (f2fs_has_inline_data(inode) || + !f2fs_overwrite_io(inode, pos, count)) { + ret = -EAGAIN; + goto out; + } + + if (!f2fs_down_read_trylock(&fi->i_gc_rwsem[WRITE])) { + ret = -EAGAIN; + goto out; + } + if (do_opu && !f2fs_down_read_trylock(&fi->i_gc_rwsem[READ])) { + f2fs_up_read(&fi->i_gc_rwsem[WRITE]); ret = -EAGAIN; goto out; } } else { - inode_lock(inode); - } + ret = f2fs_convert_inline_inode(inode); + if (ret) + goto out; - if (unlikely(IS_IMMUTABLE(inode))) { - ret = -EPERM; - goto unlock; + f2fs_down_read(&fi->i_gc_rwsem[WRITE]); + if (do_opu) + f2fs_down_read(&fi->i_gc_rwsem[READ]); } + if (whint_mode == WHINT_MODE_OFF) + iocb->ki_hint = WRITE_LIFE_NOT_SET; - if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { - ret = -EPERM; - goto unlock; + /* + * We have to use __iomap_dio_rw() and iomap_dio_complete() instead of + * the higher-level function iomap_dio_rw() in order to ensure that the + * F2FS_DIO_WRITE counter will be decremented correctly in all cases. + */ + inc_page_count(sbi, F2FS_DIO_WRITE); + dio_flags = 0; + if (pos + count > inode->i_size) + dio_flags = 1; /* IOMAP_DIO_FORCE_WAIT */ + dio = __iomap_dio_rw(iocb, from, &f2fs_iomap_ops, + &f2fs_iomap_dio_write_ops, + dio_flags || is_sync_kiocb(iocb)); + if (IS_ERR_OR_NULL(dio)) { + ret = PTR_ERR_OR_ZERO(dio); + if (ret == -ENOTBLK) + ret = 0; + if (ret != -EIOCBQUEUED) + dec_page_count(sbi, F2FS_DIO_WRITE); + } else { + ret = iomap_dio_complete(dio); } - ret = generic_write_checks(iocb, from); - if (ret > 0) { - bool preallocated = false; - size_t target_size = 0; - int err; - - if (iov_iter_fault_in_readable(from, iov_iter_count(from))) - set_inode_flag(inode, FI_NO_PREALLOC); - - if ((iocb->ki_flags & IOCB_NOWAIT)) { - if (!f2fs_overwrite_io(inode, iocb->ki_pos, - iov_iter_count(from)) || - f2fs_has_inline_data(inode) || - f2fs_force_buffered_io(inode, iocb, from)) { - clear_inode_flag(inode, FI_NO_PREALLOC); - inode_unlock(inode); - ret = -EAGAIN; + if (whint_mode == WHINT_MODE_OFF) + iocb->ki_hint = hint; + if (do_opu) + f2fs_up_read(&fi->i_gc_rwsem[READ]); + f2fs_up_read(&fi->i_gc_rwsem[WRITE]); + + if (ret < 0) + goto out; + if (pos + ret > inode->i_size) + f2fs_i_size_write(inode, pos + ret); + if (!do_opu) + set_inode_flag(inode, FI_UPDATE_WRITE); + + if (iov_iter_count(from)) { + ssize_t ret2; + loff_t bufio_start_pos = iocb->ki_pos; + + /* + * The direct write was partial, so we need to fall back to a + * buffered write for the remainder. + */ + + ret2 = f2fs_buffered_write_iter(iocb, from); + if (iov_iter_count(from)) + f2fs_write_failed(inode, iocb->ki_pos); + if (ret2 < 0) + goto out; + + /* + * Ensure that the pagecache pages are written to disk and + * invalidated to preserve the expected O_DIRECT semantics. + */ + if (ret2 > 0) { + loff_t bufio_end_pos = bufio_start_pos + ret2 - 1; + + ret += ret2; + + ret2 = filemap_write_and_wait_range(file->f_mapping, + bufio_start_pos, + bufio_end_pos); + if (ret2 < 0) goto out; - } - goto write; + invalidate_mapping_pages(file->f_mapping, + bufio_start_pos >> PAGE_SHIFT, + bufio_end_pos >> PAGE_SHIFT); } + } else { + /* iomap_dio_rw() already handled the generic_write_sync(). */ + *may_need_sync = false; + } +out: + if (trace_android_fs_datawrite_start_enabled()) + trace_android_fs_datawrite_end(inode, pos, count); + trace_f2fs_direct_IO_exit(inode, pos, count, WRITE, ret); + return ret; +} - if (is_inode_flag_set(inode, FI_NO_PREALLOC)) - goto write; +static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) +{ + struct inode *inode = file_inode(iocb->ki_filp); + const loff_t orig_pos = iocb->ki_pos; + const size_t orig_count = iov_iter_count(from); + loff_t target_size; + bool dio; + bool may_need_sync = true; + int preallocated; + ssize_t ret; - if (iocb->ki_flags & IOCB_DIRECT) { - /* - * Convert inline data for Direct I/O before entering - * f2fs_direct_IO(). - */ - err = f2fs_convert_inline_inode(inode); - if (err) - goto out_err; - /* - * If force_buffere_io() is true, we have to allocate - * blocks all the time, since f2fs_direct_IO will fall - * back to buffered IO. - */ - if (!f2fs_force_buffered_io(inode, iocb, from) && - f2fs_lfs_mode(F2FS_I_SB(inode))) - goto write; - } - preallocated = true; - target_size = iocb->ki_pos + iov_iter_count(from); + if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) { + ret = -EIO; + goto out; + } - err = f2fs_preallocate_blocks(iocb, from); - if (err) { -out_err: - clear_inode_flag(inode, FI_NO_PREALLOC); - inode_unlock(inode); - ret = err; + if (!f2fs_is_compress_backend_ready(inode)) { + ret = -EOPNOTSUPP; + goto out; + } + + if (iocb->ki_flags & IOCB_NOWAIT) { + if (!inode_trylock(inode)) { + ret = -EAGAIN; goto out; } -write: - ret = __generic_file_write_iter(iocb, from); - clear_inode_flag(inode, FI_NO_PREALLOC); - - /* if we couldn't write data, we should deallocate blocks. */ - if (preallocated && i_size_read(inode) < target_size) { - down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); - down_write(&F2FS_I(inode)->i_mmap_sem); - f2fs_truncate(inode); - up_write(&F2FS_I(inode)->i_mmap_sem); - up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); - } + } else { + inode_lock(inode); + } + + ret = f2fs_write_checks(iocb, from); + if (ret <= 0) + goto out_unlock; - if (ret > 0) - f2fs_update_iostat(F2FS_I_SB(inode), APP_WRITE_IO, ret); + /* Determine whether we will do a direct write or a buffered write. */ + dio = f2fs_should_use_dio(inode, iocb, from); + + /* Possibly preallocate the blocks for the write. */ + target_size = iocb->ki_pos + iov_iter_count(from); + preallocated = f2fs_preallocate_blocks(iocb, from, dio); + if (preallocated < 0) + ret = preallocated; + else + /* Do the actual write. */ + ret = dio ? + f2fs_dio_write_iter(iocb, from, &may_need_sync): + f2fs_buffered_write_iter(iocb, from); + + /* Don't leave any preallocated blocks around past i_size. */ + if (preallocated && i_size_read(inode) < target_size) { + f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_down_write(&F2FS_I(inode)->i_mmap_sem); + if (!f2fs_truncate(inode)) + file_dont_truncate(inode); + f2fs_up_write(&F2FS_I(inode)->i_mmap_sem); + f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + } else { + file_dont_truncate(inode); } -unlock: + + clear_inode_flag(inode, FI_PREALLOCATED_ALL); +out_unlock: inode_unlock(inode); out: - trace_f2fs_file_write_iter(inode, iocb->ki_pos, - iov_iter_count(from), ret); - if (ret > 0) + trace_f2fs_file_write_iter(inode, orig_pos, orig_count, ret); + if (ret > 0 && may_need_sync) ret = generic_write_sync(iocb, ret); return ret; } @@ -4487,12 +4826,12 @@ out: static int f2fs_file_fadvise(struct file *filp, loff_t offset, loff_t len, int advice) { - struct inode *inode; struct address_space *mapping; struct backing_dev_info *bdi; + struct inode *inode = file_inode(filp); + int err; if (advice == POSIX_FADV_SEQUENTIAL) { - inode = file_inode(filp); if (S_ISFIFO(inode->i_mode)) return -ESPIPE; @@ -4509,7 +4848,13 @@ static int f2fs_file_fadvise(struct file *filp, loff_t offset, loff_t len, return 0; } - return generic_fadvise(filp, offset, len, advice); + err = generic_fadvise(filp, offset, len, advice); + if (!err && advice == POSIX_FADV_DONTNEED && + test_opt(F2FS_I_SB(inode), COMPRESS_CACHE) && + f2fs_compressed_file(inode)) + f2fs_invalidate_compress_pages(F2FS_I_SB(inode), inode->i_ino); + + return err; } #ifdef CONFIG_COMPAT diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 77391e3b7d68f86fc2be4ec0b4a191b4b48e0fc1..79dc38eacb1938435afb7f118d99789649b2a929 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "f2fs.h" #include "node.h" @@ -91,22 +92,37 @@ static int gc_thread_func(void *data) * So, I'd like to wait some time to collect dirty segments. */ if (sbi->gc_mode == GC_URGENT_HIGH) { + spin_lock(&sbi->gc_urgent_high_lock); + if (sbi->gc_urgent_high_limited) { + if (!sbi->gc_urgent_high_remaining) { + sbi->gc_urgent_high_limited = false; + spin_unlock(&sbi->gc_urgent_high_lock); + sbi->gc_mode = GC_NORMAL; + continue; + } + sbi->gc_urgent_high_remaining--; + } + spin_unlock(&sbi->gc_urgent_high_lock); + } + + if (sbi->gc_mode == GC_URGENT_HIGH || + sbi->gc_mode == GC_URGENT_MID) { wait_ms = gc_th->urgent_sleep_time; - down_write(&sbi->gc_lock); + f2fs_down_write(&sbi->gc_lock); goto do_gc; } if (foreground) { - down_write(&sbi->gc_lock); + f2fs_down_write(&sbi->gc_lock); goto do_gc; - } else if (!down_write_trylock(&sbi->gc_lock)) { + } else if (!f2fs_down_write_trylock(&sbi->gc_lock)) { stat_other_skip_bggc_count(sbi); goto next; } if (!is_idle(sbi, GC_TIME)) { increase_sleep_time(gc_th, &wait_ms); - up_write(&sbi->gc_lock); + f2fs_up_write(&sbi->gc_lock); stat_io_skip_bggc_count(sbi); goto next; } @@ -257,7 +273,9 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type, p->max_search = sbi->max_victim_search; /* let's select beginning hot/small space first in no_heap mode*/ - if (test_opt(sbi, NOHEAP) && + if (f2fs_need_rand_seg(sbi)) + p->offset = prandom_u32() % (MAIN_SECS(sbi) * sbi->segs_per_sec); + else if (test_opt(sbi, NOHEAP) && (type == CURSEG_HOT_DATA || IS_NODESEG(type))) p->offset = 0; else @@ -944,7 +962,7 @@ next_step: continue; } - if (f2fs_get_node_info(sbi, nid, &ni)) { + if (f2fs_get_node_info(sbi, nid, &ni, false)) { f2fs_put_page(node_page, 1); continue; } @@ -1012,7 +1030,7 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, if (IS_ERR(node_page)) return false; - if (f2fs_get_node_info(sbi, nid, dni)) { + if (f2fs_get_node_info(sbi, nid, dni, false)) { f2fs_put_page(node_page, 1); return false; } @@ -1023,6 +1041,11 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, set_sbi_flag(sbi, SBI_NEED_FSCK); } + if (f2fs_check_nid_range(sbi, dni->ino)) { + f2fs_put_page(node_page, 1); + return false; + } + *nofs = ofs_of_node(node_page); source_blkaddr = data_blkaddr(NULL, node_page, ofs_in_node); f2fs_put_page(node_page, 1); @@ -1036,7 +1059,7 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, if (!test_and_set_bit(segno, SIT_I(sbi)->invalid_segmap)) { f2fs_err(sbi, "mismatched blkaddr %u (source_blkaddr %u) in seg %u", blkaddr, source_blkaddr, segno); - f2fs_bug_on(sbi, 1); + set_sbi_flag(sbi, SBI_NEED_FSCK); } } #endif @@ -1203,7 +1226,7 @@ static int move_data_block(struct inode *inode, block_t bidx, f2fs_wait_on_block_writeback(inode, dn.data_blkaddr); - err = f2fs_get_node_info(fio.sbi, dn.nid, &ni); + err = f2fs_get_node_info(fio.sbi, dn.nid, &ni, false); if (err) goto put_out; @@ -1212,7 +1235,7 @@ static int move_data_block(struct inode *inode, block_t bidx, fio.new_blkaddr = fio.old_blkaddr = dn.data_blkaddr; if (lfs_mode) - down_write(&fio.sbi->io_order_lock); + f2fs_down_write(&fio.sbi->io_order_lock); mpage = f2fs_grab_cache_page(META_MAPPING(fio.sbi), fio.old_blkaddr, false); @@ -1298,7 +1321,7 @@ recover_block: true, true, true); up_out: if (lfs_mode) - up_write(&fio.sbi->io_order_lock); + f2fs_up_write(&fio.sbi->io_order_lock); put_out: f2fs_put_dnode(&dn); out: @@ -1454,10 +1477,11 @@ next_step: if (phase == 3) { inode = f2fs_iget(sb, dni.ino); - if (IS_ERR(inode) || is_bad_inode(inode)) + if (IS_ERR(inode) || is_bad_inode(inode) || + special_file(inode->i_mode)) continue; - if (!down_write_trylock( + if (!f2fs_down_write_trylock( &F2FS_I(inode)->i_gc_rwsem[WRITE])) { iput(inode); sbi->skipped_gc_rwsem++; @@ -1470,7 +1494,7 @@ next_step: if (f2fs_post_read_required(inode)) { int err = ra_data_block(inode, start_bidx); - up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); if (err) { iput(inode); continue; @@ -1481,7 +1505,7 @@ next_step: data_page = f2fs_get_read_data_page(inode, start_bidx, REQ_RAHEAD, true); - up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); if (IS_ERR(data_page)) { iput(inode); continue; @@ -1500,14 +1524,14 @@ next_step: int err; if (S_ISREG(inode->i_mode)) { - if (!down_write_trylock(&fi->i_gc_rwsem[READ])) { + if (!f2fs_down_write_trylock(&fi->i_gc_rwsem[READ])) { sbi->skipped_gc_rwsem++; continue; } - if (!down_write_trylock( + if (!f2fs_down_write_trylock( &fi->i_gc_rwsem[WRITE])) { sbi->skipped_gc_rwsem++; - up_write(&fi->i_gc_rwsem[READ]); + f2fs_up_write(&fi->i_gc_rwsem[READ]); continue; } locked = true; @@ -1530,8 +1554,8 @@ next_step: submitted++; if (locked) { - up_write(&fi->i_gc_rwsem[WRITE]); - up_write(&fi->i_gc_rwsem[READ]); + f2fs_up_write(&fi->i_gc_rwsem[WRITE]); + f2fs_up_write(&fi->i_gc_rwsem[READ]); } stat_inc_data_blk_count(sbi, 1, gc_type); @@ -1789,7 +1813,7 @@ stop: reserved_segments(sbi), prefree_segments(sbi)); - up_write(&sbi->gc_lock); + f2fs_up_write(&sbi->gc_lock); put_gc_inode(&gc_list); @@ -1918,7 +1942,7 @@ static void update_sb_metadata(struct f2fs_sb_info *sbi, int secs) long long block_count; int segs = secs * sbi->segs_per_sec; - down_write(&sbi->sb_lock); + f2fs_down_write(&sbi->sb_lock); section_count = le32_to_cpu(raw_sb->section_count); segment_count = le32_to_cpu(raw_sb->segment_count); @@ -1939,7 +1963,7 @@ static void update_sb_metadata(struct f2fs_sb_info *sbi, int secs) cpu_to_le32(dev_segs + segs); } - up_write(&sbi->sb_lock); + f2fs_up_write(&sbi->sb_lock); } static void update_fs_metadata(struct f2fs_sb_info *sbi, int secs) @@ -2013,7 +2037,7 @@ int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count) secs = div_u64(shrunk_blocks, BLKS_PER_SEC(sbi)); /* stop other GC */ - if (!down_write_trylock(&sbi->gc_lock)) + if (!f2fs_down_write_trylock(&sbi->gc_lock)) return -EAGAIN; /* stop CP to protect MAIN_SEC in free_segment_range */ @@ -2033,15 +2057,15 @@ int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count) out_unlock: f2fs_unlock_op(sbi); - up_write(&sbi->gc_lock); + f2fs_up_write(&sbi->gc_lock); if (err) return err; set_sbi_flag(sbi, SBI_IS_RESIZEFS); freeze_super(sbi->sb); - down_write(&sbi->gc_lock); - down_write(&sbi->cp_global_sem); + f2fs_down_write(&sbi->gc_lock); + f2fs_down_write(&sbi->cp_global_sem); spin_lock(&sbi->stat_lock); if (shrunk_blocks + valid_user_blocks(sbi) + @@ -2086,8 +2110,8 @@ recover_out: spin_unlock(&sbi->stat_lock); } out_err: - up_write(&sbi->cp_global_sem); - up_write(&sbi->gc_lock); + f2fs_up_write(&sbi->cp_global_sem); + f2fs_up_write(&sbi->gc_lock); thaw_super(sbi->sb); clear_sbi_flag(sbi, SBI_IS_RESIZEFS); return err; diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 2311c76ffc37e3723fb8fdaeeb68bdfebb69143c..a0386ba9d98de7401c2940b94bc80836deb9db7e 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -149,7 +149,7 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) if (err) return err; - err = f2fs_get_node_info(fio.sbi, dn->nid, &ni); + err = f2fs_get_node_info(fio.sbi, dn->nid, &ni, false); if (err) { f2fs_truncate_data_blocks_range(dn, 1); f2fs_put_dnode(dn); @@ -210,7 +210,7 @@ int f2fs_convert_inline_inode(struct inode *inode) f2fs_hw_is_readonly(sbi) || f2fs_readonly(sbi->sb)) return 0; - err = dquot_initialize(inode); + err = f2fs_dquot_initialize(inode); if (err) return err; @@ -647,7 +647,7 @@ int f2fs_add_inline_entry(struct inode *dir, const struct f2fs_filename *fname, } if (inode) { - down_write(&F2FS_I(inode)->i_sem); + f2fs_down_write(&F2FS_I(inode)->i_sem); page = f2fs_init_inode_metadata(inode, dir, fname, ipage); if (IS_ERR(page)) { err = PTR_ERR(page); @@ -676,7 +676,7 @@ int f2fs_add_inline_entry(struct inode *dir, const struct f2fs_filename *fname, f2fs_update_parent_metadata(dir, inode, 0); fail: if (inode) - up_write(&F2FS_I(inode)->i_sem); + f2fs_up_write(&F2FS_I(inode)->i_sem); out: f2fs_put_page(ipage, 1); return err; @@ -804,7 +804,7 @@ int f2fs_inline_data_fiemap(struct inode *inode, ilen = start + len; ilen -= start; - err = f2fs_get_node_info(F2FS_I_SB(inode), inode->i_ino, &ni); + err = f2fs_get_node_info(F2FS_I_SB(inode), inode->i_ino, &ni, false); if (err) goto out; diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 1213f15ffd68c24c07a34862cb9f4ce5d084f56b..5fffc22689645c09d5f2d53fe4180b60aecc2d58 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -516,6 +516,11 @@ make_now: } else if (ino == F2FS_COMPRESS_INO(sbi)) { #ifdef CONFIG_F2FS_FS_COMPRESSION inode->i_mapping->a_ops = &f2fs_compress_aops; + /* + * generic_error_remove_page only truncates pages of regular + * inode + */ + inode->i_mode |= S_IFREG; #endif mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS | __GFP_HIGHMEM | __GFP_MOVABLE); @@ -544,6 +549,15 @@ make_now: goto bad_inode; } f2fs_set_inode_flags(inode); + + if (file_should_truncate(inode) && + !is_sbi_flag_set(sbi, SBI_POR_DOING)) { + ret = f2fs_truncate(inode); + if (ret) + goto bad_inode; + file_dont_truncate(inode); + } + unlock_new_inode(inode); trace_f2fs_iget(inode); return inode; @@ -738,7 +752,8 @@ void f2fs_evict_inode(struct inode *inode) trace_f2fs_evict_inode(inode); truncate_inode_pages_final(&inode->i_data); - if (test_opt(sbi, COMPRESS_CACHE) && f2fs_compressed_file(inode)) + if ((inode->i_nlink || is_bad_inode(inode)) && + test_opt(sbi, COMPRESS_CACHE) && f2fs_compressed_file(inode)) f2fs_invalidate_compress_pages(sbi, inode->i_ino); if (inode->i_ino == F2FS_NODE_INO(sbi) || @@ -754,7 +769,7 @@ void f2fs_evict_inode(struct inode *inode) if (inode->i_nlink || is_bad_inode(inode)) goto no_delete; - err = dquot_initialize(inode); + err = f2fs_dquot_initialize(inode); if (err) { err = 0; set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); @@ -764,7 +779,8 @@ void f2fs_evict_inode(struct inode *inode) f2fs_remove_ino_entry(sbi, inode->i_ino, UPDATE_INO); f2fs_remove_ino_entry(sbi, inode->i_ino, FLUSH_INO); - sb_start_intwrite(inode->i_sb); + if (!is_sbi_flag_set(sbi, SBI_IS_FREEZING)) + sb_start_intwrite(inode->i_sb); set_inode_flag(inode, FI_NO_ALLOC); i_size_write(inode, 0); retry: @@ -795,7 +811,8 @@ retry: if (dquot_initialize_needed(inode)) set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); } - sb_end_intwrite(inode->i_sb); + if (!is_sbi_flag_set(sbi, SBI_IS_FREEZING)) + sb_end_intwrite(inode->i_sb); no_delete: dquot_drop(inode); @@ -868,9 +885,10 @@ void f2fs_handle_failed_inode(struct inode *inode) * so we can prevent losing this orphan when encoutering checkpoint * and following suddenly power-off. */ - err = f2fs_get_node_info(sbi, inode->i_ino, &ni); + err = f2fs_get_node_info(sbi, inode->i_ino, &ni, false); if (err) { set_sbi_flag(sbi, SBI_NEED_FSCK); + set_inode_flag(inode, FI_FREE_NID); f2fs_warn(sbi, "May loss orphan inode, run fsck to fix."); goto out; } diff --git a/fs/f2fs/iostat.c b/fs/f2fs/iostat.c index cdcf54ae0db8f1e1180592eceb3d1d08cb354c35..be599f31d3c483533396d5d95ff33a2c51f1532e 100644 --- a/fs/f2fs/iostat.c +++ b/fs/f2fs/iostat.c @@ -92,7 +92,7 @@ static inline void __record_iostat_latency(struct f2fs_sb_info *sbi) struct f2fs_iostat_latency iostat_lat[MAX_IO_TYPE][NR_PAGE_TYPE]; struct iostat_lat_info *io_lat = sbi->iostat_io_lat; - spin_lock_irq(&sbi->iostat_lat_lock); + spin_lock_bh(&sbi->iostat_lat_lock); for (idx = 0; idx < MAX_IO_TYPE; idx++) { for (io = 0; io < NR_PAGE_TYPE; io++) { cnt = io_lat->bio_cnt[idx][io]; @@ -106,7 +106,7 @@ static inline void __record_iostat_latency(struct f2fs_sb_info *sbi) io_lat->bio_cnt[idx][io] = 0; } } - spin_unlock_irq(&sbi->iostat_lat_lock); + spin_unlock_bh(&sbi->iostat_lat_lock); trace_f2fs_iostat_latency(sbi, iostat_lat); } @@ -120,9 +120,9 @@ static inline void f2fs_record_iostat(struct f2fs_sb_info *sbi) return; /* Need double check under the lock */ - spin_lock(&sbi->iostat_lock); + spin_lock_bh(&sbi->iostat_lock); if (time_is_after_jiffies(sbi->iostat_next_period)) { - spin_unlock(&sbi->iostat_lock); + spin_unlock_bh(&sbi->iostat_lock); return; } sbi->iostat_next_period = jiffies + @@ -133,7 +133,7 @@ static inline void f2fs_record_iostat(struct f2fs_sb_info *sbi) sbi->prev_rw_iostat[i]; sbi->prev_rw_iostat[i] = sbi->rw_iostat[i]; } - spin_unlock(&sbi->iostat_lock); + spin_unlock_bh(&sbi->iostat_lock); trace_f2fs_iostat(sbi, iostat_diff); @@ -145,16 +145,16 @@ void f2fs_reset_iostat(struct f2fs_sb_info *sbi) struct iostat_lat_info *io_lat = sbi->iostat_io_lat; int i; - spin_lock(&sbi->iostat_lock); + spin_lock_bh(&sbi->iostat_lock); for (i = 0; i < NR_IO_TYPE; i++) { sbi->rw_iostat[i] = 0; sbi->prev_rw_iostat[i] = 0; } - spin_unlock(&sbi->iostat_lock); + spin_unlock_bh(&sbi->iostat_lock); - spin_lock_irq(&sbi->iostat_lat_lock); + spin_lock_bh(&sbi->iostat_lat_lock); memset(io_lat, 0, sizeof(struct iostat_lat_info)); - spin_unlock_irq(&sbi->iostat_lat_lock); + spin_unlock_bh(&sbi->iostat_lat_lock); } void f2fs_update_iostat(struct f2fs_sb_info *sbi, @@ -163,19 +163,16 @@ void f2fs_update_iostat(struct f2fs_sb_info *sbi, if (!sbi->iostat_enable) return; - spin_lock(&sbi->iostat_lock); + spin_lock_bh(&sbi->iostat_lock); sbi->rw_iostat[type] += io_bytes; - if (type == APP_WRITE_IO || type == APP_DIRECT_IO) - sbi->rw_iostat[APP_BUFFERED_IO] = - sbi->rw_iostat[APP_WRITE_IO] - - sbi->rw_iostat[APP_DIRECT_IO]; + if (type == APP_BUFFERED_IO || type == APP_DIRECT_IO) + sbi->rw_iostat[APP_WRITE_IO] += io_bytes; - if (type == APP_READ_IO || type == APP_DIRECT_READ_IO) - sbi->rw_iostat[APP_BUFFERED_READ_IO] = - sbi->rw_iostat[APP_READ_IO] - - sbi->rw_iostat[APP_DIRECT_READ_IO]; - spin_unlock(&sbi->iostat_lock); + if (type == APP_BUFFERED_READ_IO || type == APP_DIRECT_READ_IO) + sbi->rw_iostat[APP_READ_IO] += io_bytes; + + spin_unlock_bh(&sbi->iostat_lock); f2fs_record_iostat(sbi); } @@ -185,7 +182,6 @@ static inline void __update_iostat_latency(struct bio_iostat_ctx *iostat_ctx, { unsigned long ts_diff; unsigned int iotype = iostat_ctx->type; - unsigned long flags; struct f2fs_sb_info *sbi = iostat_ctx->sbi; struct iostat_lat_info *io_lat = sbi->iostat_io_lat; int idx; @@ -206,12 +202,12 @@ static inline void __update_iostat_latency(struct bio_iostat_ctx *iostat_ctx, idx = WRITE_ASYNC_IO; } - spin_lock_irqsave(&sbi->iostat_lat_lock, flags); + spin_lock_bh(&sbi->iostat_lat_lock); io_lat->sum_lat[idx][iotype] += ts_diff; io_lat->bio_cnt[idx][iotype]++; if (ts_diff > io_lat->peak_lat[idx][iotype]) io_lat->peak_lat[idx][iotype] = ts_diff; - spin_unlock_irqrestore(&sbi->iostat_lat_lock, flags); + spin_unlock_bh(&sbi->iostat_lat_lock); } void iostat_update_and_unbind_ctx(struct bio *bio, int rw) diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 848c11960af721c6a7a033d5e8441f95d684fb17..0de98abd72829053c6daa651e0311fc5c790afca 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -74,7 +74,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) if (err) goto fail_drop; - err = dquot_initialize(inode); + err = f2fs_dquot_initialize(inode); if (err) goto fail_drop; @@ -196,7 +196,7 @@ static inline void set_file_temperature(struct f2fs_sb_info *sbi, struct inode * __u8 (*extlist)[F2FS_EXTENSION_LEN] = sbi->raw_super->extension_list; int i, cold_count, hot_count; - down_read(&sbi->sb_lock); + f2fs_down_read(&sbi->sb_lock); cold_count = le32_to_cpu(sbi->raw_super->extension_count); hot_count = sbi->raw_super->hot_ext_count; @@ -206,7 +206,7 @@ static inline void set_file_temperature(struct f2fs_sb_info *sbi, struct inode * break; } - up_read(&sbi->sb_lock); + f2fs_up_read(&sbi->sb_lock); if (i == cold_count + hot_count) return; @@ -299,19 +299,19 @@ static void set_compress_inode(struct f2fs_sb_info *sbi, struct inode *inode, (!ext_cnt && !noext_cnt)) return; - down_read(&sbi->sb_lock); + f2fs_down_read(&sbi->sb_lock); cold_count = le32_to_cpu(sbi->raw_super->extension_count); hot_count = sbi->raw_super->hot_ext_count; for (i = cold_count; i < cold_count + hot_count; i++) { if (is_extension_exist(name, extlist[i], false)) { - up_read(&sbi->sb_lock); + f2fs_up_read(&sbi->sb_lock); return; } } - up_read(&sbi->sb_lock); + f2fs_up_read(&sbi->sb_lock); for (i = 0; i < noext_cnt; i++) { if (is_extension_exist(name, noext[i], false)) { @@ -345,7 +345,7 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, if (!f2fs_is_checkpoint_ready(sbi)) return -ENOSPC; - err = dquot_initialize(dir); + err = f2fs_dquot_initialize(dir); if (err) return err; @@ -404,7 +404,7 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir, F2FS_I(old_dentry->d_inode)->i_projid))) return -EXDEV; - err = dquot_initialize(dir); + err = f2fs_dquot_initialize(dir); if (err) return err; @@ -461,7 +461,7 @@ static int __recover_dot_dentries(struct inode *dir, nid_t pino) return 0; } - err = dquot_initialize(dir); + err = f2fs_dquot_initialize(dir); if (err) return err; @@ -599,10 +599,10 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry) goto fail; } - err = dquot_initialize(dir); + err = f2fs_dquot_initialize(dir); if (err) goto fail; - err = dquot_initialize(inode); + err = f2fs_dquot_initialize(inode); if (err) goto fail; @@ -676,7 +676,7 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, if (err) return err; - err = dquot_initialize(dir); + err = f2fs_dquot_initialize(dir); if (err) return err; @@ -746,7 +746,7 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) if (unlikely(f2fs_cp_error(sbi))) return -EIO; - err = dquot_initialize(dir); + err = f2fs_dquot_initialize(dir); if (err) return err; @@ -803,7 +803,7 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry, if (!f2fs_is_checkpoint_ready(sbi)) return -ENOSPC; - err = dquot_initialize(dir); + err = f2fs_dquot_initialize(dir); if (err) return err; @@ -841,7 +841,7 @@ static int __f2fs_tmpfile(struct inode *dir, struct dentry *dentry, struct inode *inode; int err; - err = dquot_initialize(dir); + err = f2fs_dquot_initialize(dir); if (err) return err; @@ -964,16 +964,16 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, return err; } - err = dquot_initialize(old_dir); + err = f2fs_dquot_initialize(old_dir); if (err) goto out; - err = dquot_initialize(new_dir); + err = f2fs_dquot_initialize(new_dir); if (err) goto out; if (new_inode) { - err = dquot_initialize(new_inode); + err = f2fs_dquot_initialize(new_inode); if (err) goto out; } @@ -1022,11 +1022,11 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, new_page = NULL; new_inode->i_ctime = current_time(new_inode); - down_write(&F2FS_I(new_inode)->i_sem); + f2fs_down_write(&F2FS_I(new_inode)->i_sem); if (old_dir_entry) f2fs_i_links_write(new_inode, false); f2fs_i_links_write(new_inode, false); - up_write(&F2FS_I(new_inode)->i_sem); + f2fs_up_write(&F2FS_I(new_inode)->i_sem); if (!new_inode->i_nlink) f2fs_add_orphan_inode(new_inode); @@ -1047,13 +1047,13 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, f2fs_i_links_write(new_dir, true); } - down_write(&F2FS_I(old_inode)->i_sem); + f2fs_down_write(&F2FS_I(old_inode)->i_sem); if (!old_dir_entry || whiteout) file_lost_pino(old_inode); else /* adjust dir's i_pino to pass fsck check */ f2fs_i_pino_write(old_inode, new_dir->i_ino); - up_write(&F2FS_I(old_inode)->i_sem); + f2fs_up_write(&F2FS_I(old_inode)->i_sem); old_inode->i_ctime = current_time(old_inode); f2fs_mark_inode_dirty_sync(old_inode, false); @@ -1106,8 +1106,7 @@ out_dir: out_old: f2fs_put_page(old_page, 0); out: - if (whiteout) - iput(whiteout); + iput(whiteout); return err; } @@ -1137,11 +1136,11 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, F2FS_I(new_dentry->d_inode)->i_projid))) return -EXDEV; - err = dquot_initialize(old_dir); + err = f2fs_dquot_initialize(old_dir); if (err) goto out; - err = dquot_initialize(new_dir); + err = f2fs_dquot_initialize(new_dir); if (err) goto out; @@ -1213,38 +1212,38 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, /* update directory entry info of old dir inode */ f2fs_set_link(old_dir, old_entry, old_page, new_inode); - down_write(&F2FS_I(old_inode)->i_sem); + f2fs_down_write(&F2FS_I(old_inode)->i_sem); if (!old_dir_entry) file_lost_pino(old_inode); else /* adjust dir's i_pino to pass fsck check */ f2fs_i_pino_write(old_inode, new_dir->i_ino); - up_write(&F2FS_I(old_inode)->i_sem); + f2fs_up_write(&F2FS_I(old_inode)->i_sem); old_dir->i_ctime = current_time(old_dir); if (old_nlink) { - down_write(&F2FS_I(old_dir)->i_sem); + f2fs_down_write(&F2FS_I(old_dir)->i_sem); f2fs_i_links_write(old_dir, old_nlink > 0); - up_write(&F2FS_I(old_dir)->i_sem); + f2fs_up_write(&F2FS_I(old_dir)->i_sem); } f2fs_mark_inode_dirty_sync(old_dir, false); /* update directory entry info of new dir inode */ f2fs_set_link(new_dir, new_entry, new_page, old_inode); - down_write(&F2FS_I(new_inode)->i_sem); + f2fs_down_write(&F2FS_I(new_inode)->i_sem); if (!new_dir_entry) file_lost_pino(new_inode); else /* adjust dir's i_pino to pass fsck check */ f2fs_i_pino_write(new_inode, old_dir->i_ino); - up_write(&F2FS_I(new_inode)->i_sem); + f2fs_up_write(&F2FS_I(new_inode)->i_sem); new_dir->i_ctime = current_time(new_dir); if (new_nlink) { - down_write(&F2FS_I(new_dir)->i_sem); + f2fs_down_write(&F2FS_I(new_dir)->i_sem); f2fs_i_links_write(new_dir, new_nlink > 0); - up_write(&F2FS_I(new_dir)->i_sem); + f2fs_up_write(&F2FS_I(new_dir)->i_sem); } f2fs_mark_inode_dirty_sync(new_dir, false); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 897b8377ea8bfaaadfcefb741f0645bf40b02326..ba0766a56dfaabd3ec5434dfb3597bb53e3ef39c 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -382,14 +382,14 @@ int f2fs_need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid) struct nat_entry *e; bool need = false; - down_read(&nm_i->nat_tree_lock); + f2fs_down_read(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, nid); if (e) { if (!get_nat_flag(e, IS_CHECKPOINTED) && !get_nat_flag(e, HAS_FSYNCED_INODE)) need = true; } - up_read(&nm_i->nat_tree_lock); + f2fs_up_read(&nm_i->nat_tree_lock); return need; } @@ -399,11 +399,11 @@ bool f2fs_is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid) struct nat_entry *e; bool is_cp = true; - down_read(&nm_i->nat_tree_lock); + f2fs_down_read(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, nid); if (e && !get_nat_flag(e, IS_CHECKPOINTED)) is_cp = false; - up_read(&nm_i->nat_tree_lock); + f2fs_up_read(&nm_i->nat_tree_lock); return is_cp; } @@ -413,13 +413,13 @@ bool f2fs_need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino) struct nat_entry *e; bool need_update = true; - down_read(&nm_i->nat_tree_lock); + f2fs_down_read(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, ino); if (e && get_nat_flag(e, HAS_LAST_FSYNC) && (get_nat_flag(e, IS_CHECKPOINTED) || get_nat_flag(e, HAS_FSYNCED_INODE))) need_update = false; - up_read(&nm_i->nat_tree_lock); + f2fs_up_read(&nm_i->nat_tree_lock); return need_update; } @@ -430,11 +430,15 @@ static void cache_nat_entry(struct f2fs_sb_info *sbi, nid_t nid, struct f2fs_nm_info *nm_i = NM_I(sbi); struct nat_entry *new, *e; + /* Let's mitigate lock contention of nat_tree_lock during checkpoint */ + if (f2fs_rwsem_is_locked(&sbi->cp_global_sem)) + return; + new = __alloc_nat_entry(sbi, nid, false); if (!new) return; - down_write(&nm_i->nat_tree_lock); + f2fs_down_write(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, nid); if (!e) e = __init_nat_entry(nm_i, new, ne, false); @@ -443,7 +447,7 @@ static void cache_nat_entry(struct f2fs_sb_info *sbi, nid_t nid, nat_get_blkaddr(e) != le32_to_cpu(ne->block_addr) || nat_get_version(e) != ne->version); - up_write(&nm_i->nat_tree_lock); + f2fs_up_write(&nm_i->nat_tree_lock); if (e != new) __free_nat_entry(new); } @@ -455,7 +459,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, struct nat_entry *e; struct nat_entry *new = __alloc_nat_entry(sbi, ni->nid, true); - down_write(&nm_i->nat_tree_lock); + f2fs_down_write(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, ni->nid); if (!e) { e = __init_nat_entry(nm_i, new, NULL, true); @@ -504,7 +508,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, set_nat_flag(e, HAS_FSYNCED_INODE, true); set_nat_flag(e, HAS_LAST_FSYNC, fsync_done); } - up_write(&nm_i->nat_tree_lock); + f2fs_up_write(&nm_i->nat_tree_lock); } int f2fs_try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) @@ -512,7 +516,7 @@ int f2fs_try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) struct f2fs_nm_info *nm_i = NM_I(sbi); int nr = nr_shrink; - if (!down_write_trylock(&nm_i->nat_tree_lock)) + if (!f2fs_down_write_trylock(&nm_i->nat_tree_lock)) return 0; spin_lock(&nm_i->nat_list_lock); @@ -534,12 +538,12 @@ int f2fs_try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) } spin_unlock(&nm_i->nat_list_lock); - up_write(&nm_i->nat_tree_lock); + f2fs_up_write(&nm_i->nat_tree_lock); return nr - nr_shrink; } int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid, - struct node_info *ni) + struct node_info *ni, bool checkpoint_context) { struct f2fs_nm_info *nm_i = NM_I(sbi); struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); @@ -556,13 +560,13 @@ int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid, ni->nid = nid; retry: /* Check nat cache */ - down_read(&nm_i->nat_tree_lock); + f2fs_down_read(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, nid); if (e) { ni->ino = nat_get_ino(e); ni->blk_addr = nat_get_blkaddr(e); ni->version = nat_get_version(e); - up_read(&nm_i->nat_tree_lock); + f2fs_up_read(&nm_i->nat_tree_lock); return 0; } @@ -572,10 +576,11 @@ retry: * nat_tree_lock. Therefore, we should retry, if we failed to grab here * while not bothering checkpoint. */ - if (!rwsem_is_locked(&sbi->cp_global_sem)) { + if (!f2fs_rwsem_is_locked(&sbi->cp_global_sem) || checkpoint_context) { down_read(&curseg->journal_rwsem); - } else if (!down_read_trylock(&curseg->journal_rwsem)) { - up_read(&nm_i->nat_tree_lock); + } else if (f2fs_rwsem_is_contended(&nm_i->nat_tree_lock) || + !down_read_trylock(&curseg->journal_rwsem)) { + f2fs_up_read(&nm_i->nat_tree_lock); goto retry; } @@ -584,15 +589,15 @@ retry: ne = nat_in_journal(journal, i); node_info_from_raw_nat(ni, &ne); } - up_read(&curseg->journal_rwsem); + up_read(&curseg->journal_rwsem); if (i >= 0) { - up_read(&nm_i->nat_tree_lock); + f2fs_up_read(&nm_i->nat_tree_lock); goto cache; } /* Fill node_info from nat page */ index = current_nat_addr(sbi, nid); - up_read(&nm_i->nat_tree_lock); + f2fs_up_read(&nm_i->nat_tree_lock); page = f2fs_get_meta_page(sbi, index); if (IS_ERR(page)) @@ -887,7 +892,7 @@ static int truncate_node(struct dnode_of_data *dn) int err; pgoff_t index; - err = f2fs_get_node_info(sbi, dn->nid, &ni); + err = f2fs_get_node_info(sbi, dn->nid, &ni, false); if (err) return err; @@ -1286,7 +1291,7 @@ struct page *f2fs_new_node_page(struct dnode_of_data *dn, unsigned int ofs) goto fail; #ifdef CONFIG_F2FS_CHECK_FS - err = f2fs_get_node_info(sbi, dn->nid, &new_ni); + err = f2fs_get_node_info(sbi, dn->nid, &new_ni, false); if (err) { dec_valid_node_count(sbi, dn->inode, !ofs); goto fail; @@ -1348,7 +1353,7 @@ static int read_node_page(struct page *page, int op_flags) return LOCKED_PAGE; } - err = f2fs_get_node_info(sbi, page->index, &ni); + err = f2fs_get_node_info(sbi, page->index, &ni, false); if (err) return err; @@ -1443,6 +1448,7 @@ page_hit: nid, nid_of_node(page), ino_of_node(page), ofs_of_node(page), cpver_of_node(page), next_blkaddr_of_node(page)); + set_sbi_flag(sbi, SBI_NEED_FSCK); err = -EINVAL; out_err: ClearPageUptodate(page); @@ -1599,21 +1605,21 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, nid = nid_of_node(page); f2fs_bug_on(sbi, page->index != nid); - if (f2fs_get_node_info(sbi, nid, &ni)) + if (f2fs_get_node_info(sbi, nid, &ni, !do_balance)) goto redirty_out; if (wbc->for_reclaim) { - if (!down_read_trylock(&sbi->node_write)) + if (!f2fs_down_read_trylock(&sbi->node_write)) goto redirty_out; } else { - down_read(&sbi->node_write); + f2fs_down_read(&sbi->node_write); } /* This page is already truncated */ if (unlikely(ni.blk_addr == NULL_ADDR)) { ClearPageUptodate(page); dec_page_count(sbi, F2FS_DIRTY_NODES); - up_read(&sbi->node_write); + f2fs_up_read(&sbi->node_write); unlock_page(page); return 0; } @@ -1621,7 +1627,7 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, if (__is_valid_data_blkaddr(ni.blk_addr) && !f2fs_is_valid_blkaddr(sbi, ni.blk_addr, DATA_GENERIC_ENHANCE)) { - up_read(&sbi->node_write); + f2fs_up_read(&sbi->node_write); goto redirty_out; } @@ -1642,7 +1648,7 @@ static int __write_node_page(struct page *page, bool atomic, bool *submitted, f2fs_do_write_node_page(nid, &fio); set_node_addr(sbi, &ni, fio.new_blkaddr, is_fsync_dnode(page)); dec_page_count(sbi, F2FS_DIRTY_NODES); - up_read(&sbi->node_write); + f2fs_up_read(&sbi->node_write); if (wbc->for_reclaim) { f2fs_submit_merged_write_cond(sbi, NULL, page, 0, NODE); @@ -1776,6 +1782,7 @@ continue_unlock: if (!atomic || page == last_page) { set_fsync_mark(page, 1); + percpu_counter_inc(&sbi->rf_node_block_count); if (IS_INODE(page)) { if (is_inode_flag_set(inode, FI_DIRTY_INODE)) @@ -2105,8 +2112,12 @@ static int f2fs_write_node_pages(struct address_space *mapping, if (wbc->sync_mode == WB_SYNC_ALL) atomic_inc(&sbi->wb_sync_req[NODE]); - else if (atomic_read(&sbi->wb_sync_req[NODE])) + else if (atomic_read(&sbi->wb_sync_req[NODE])) { + /* to avoid potential deadlock */ + if (current->plug) + blk_finish_plug(current->plug); goto skip_write; + } trace_f2fs_writepages(mapping->host, wbc, NODE); @@ -2219,14 +2230,14 @@ bool f2fs_nat_bitmap_enabled(struct f2fs_sb_info *sbi) unsigned int i; bool ret = true; - down_read(&nm_i->nat_tree_lock); + f2fs_down_read(&nm_i->nat_tree_lock); for (i = 0; i < nm_i->nat_blocks; i++) { if (!test_bit_le(i, nm_i->nat_block_bitmap)) { ret = false; break; } } - up_read(&nm_i->nat_tree_lock); + f2fs_up_read(&nm_i->nat_tree_lock); return ret; } @@ -2409,7 +2420,7 @@ static void scan_free_nid_bits(struct f2fs_sb_info *sbi) unsigned int i, idx; nid_t nid; - down_read(&nm_i->nat_tree_lock); + f2fs_down_read(&nm_i->nat_tree_lock); for (i = 0; i < nm_i->nat_blocks; i++) { if (!test_bit_le(i, nm_i->nat_block_bitmap)) @@ -2432,7 +2443,7 @@ static void scan_free_nid_bits(struct f2fs_sb_info *sbi) out: scan_curseg_cache(sbi); - up_read(&nm_i->nat_tree_lock); + f2fs_up_read(&nm_i->nat_tree_lock); } static int __f2fs_build_free_nids(struct f2fs_sb_info *sbi, @@ -2467,7 +2478,7 @@ static int __f2fs_build_free_nids(struct f2fs_sb_info *sbi, f2fs_ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES, META_NAT, true); - down_read(&nm_i->nat_tree_lock); + f2fs_down_read(&nm_i->nat_tree_lock); while (1) { if (!test_bit_le(NAT_BLOCK_OFFSET(nid), @@ -2482,7 +2493,7 @@ static int __f2fs_build_free_nids(struct f2fs_sb_info *sbi, } if (ret) { - up_read(&nm_i->nat_tree_lock); + f2fs_up_read(&nm_i->nat_tree_lock); f2fs_err(sbi, "NAT is corrupt, run fsck to fix it"); return ret; } @@ -2502,7 +2513,7 @@ static int __f2fs_build_free_nids(struct f2fs_sb_info *sbi, /* find free nids from current sum_pages */ scan_curseg_cache(sbi); - up_read(&nm_i->nat_tree_lock); + f2fs_up_read(&nm_i->nat_tree_lock); f2fs_ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid), nm_i->ra_nid_pages, META_NAT, false); @@ -2700,7 +2711,7 @@ int f2fs_recover_xattr_data(struct inode *inode, struct page *page) goto recover_xnid; /* 1: invalidate the previous xattr nid */ - err = f2fs_get_node_info(sbi, prev_xnid, &ni); + err = f2fs_get_node_info(sbi, prev_xnid, &ni, false); if (err) return err; @@ -2740,7 +2751,7 @@ int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) struct page *ipage; int err; - err = f2fs_get_node_info(sbi, ino, &old_ni); + err = f2fs_get_node_info(sbi, ino, &old_ni, false); if (err) return err; @@ -2817,7 +2828,7 @@ int f2fs_restore_node_summary(struct f2fs_sb_info *sbi, sum_entry = &sum->entries[0]; for (i = 0; i < last_offset; i += nrpages, addr += nrpages) { - nrpages = min(last_offset - i, BIO_MAX_PAGES); + nrpages = bio_max_segs(last_offset - i); /* readahead node pages */ f2fs_ra_meta_pages(sbi, addr, nrpages, META_POR, true); @@ -2947,7 +2958,7 @@ void f2fs_enable_nat_bits(struct f2fs_sb_info *sbi) struct f2fs_nm_info *nm_i = NM_I(sbi); unsigned int nat_ofs; - down_read(&nm_i->nat_tree_lock); + f2fs_down_read(&nm_i->nat_tree_lock); for (nat_ofs = 0; nat_ofs < nm_i->nat_blocks; nat_ofs++) { unsigned int valid = 0, nid_ofs = 0; @@ -2967,7 +2978,7 @@ void f2fs_enable_nat_bits(struct f2fs_sb_info *sbi) __update_nat_bits(nm_i, nat_ofs, valid); } - up_read(&nm_i->nat_tree_lock); + f2fs_up_read(&nm_i->nat_tree_lock); } static int __flush_nat_entry_set(struct f2fs_sb_info *sbi, @@ -3065,15 +3076,15 @@ int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) * nat_cnt[DIRTY_NAT]. */ if (cpc->reason & CP_UMOUNT) { - down_write(&nm_i->nat_tree_lock); + f2fs_down_write(&nm_i->nat_tree_lock); remove_nats_in_journal(sbi); - up_write(&nm_i->nat_tree_lock); + f2fs_up_write(&nm_i->nat_tree_lock); } if (!nm_i->nat_cnt[DIRTY_NAT]) return 0; - down_write(&nm_i->nat_tree_lock); + f2fs_down_write(&nm_i->nat_tree_lock); /* * if there are no enough space in journal to store dirty nat @@ -3102,7 +3113,7 @@ int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) break; } - up_write(&nm_i->nat_tree_lock); + f2fs_up_write(&nm_i->nat_tree_lock); /* Allow dirty nats by node block allocation in write_begin */ return err; @@ -3212,6 +3223,7 @@ static int init_node_manager(struct f2fs_sb_info *sbi) nm_i->ram_thresh = DEF_RAM_THRESHOLD; nm_i->ra_nid_pages = DEF_RA_NID_PAGES; nm_i->dirty_nats_ratio = DEF_DIRTY_NAT_RATIO_THRESHOLD; + nm_i->max_rf_node_blocks = DEF_RF_NODE_BLOCKS; INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC); INIT_LIST_HEAD(&nm_i->free_nid_list); @@ -3222,7 +3234,7 @@ static int init_node_manager(struct f2fs_sb_info *sbi) mutex_init(&nm_i->build_lock); spin_lock_init(&nm_i->nid_list_lock); - init_rwsem(&nm_i->nat_tree_lock); + init_f2fs_rwsem(&nm_i->nat_tree_lock); nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid); nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP); @@ -3328,7 +3340,7 @@ void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi) spin_unlock(&nm_i->nid_list_lock); /* destroy nat cache */ - down_write(&nm_i->nat_tree_lock); + f2fs_down_write(&nm_i->nat_tree_lock); while ((found = __gang_lookup_nat_cache(nm_i, nid, NATVEC_SIZE, natvec))) { unsigned idx; @@ -3358,7 +3370,7 @@ void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi) kmem_cache_free(nat_entry_set_slab, setvec[idx]); } } - up_write(&nm_i->nat_tree_lock); + f2fs_up_write(&nm_i->nat_tree_lock); kvfree(nm_i->nat_block_bitmap); if (nm_i->free_nid_bitmap) { diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index ff14a6e5ac1c9ed51dfc95f4c76194ef4019e052..4c1d34bfea7811e3bbb20dd5fc57d34d5802fcec 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -31,6 +31,9 @@ /* control total # of nats */ #define DEF_NAT_CACHE_THRESHOLD 100000 +/* control total # of node writes used for roll-fowrad recovery */ +#define DEF_RF_NODE_BLOCKS 0 + /* vector size for gang look-up from nat cache that consists of radix tree */ #define NATVEC_SIZE 64 #define SETVEC_SIZE 32 @@ -138,11 +141,6 @@ static inline bool excess_cached_nats(struct f2fs_sb_info *sbi) return NM_I(sbi)->nat_cnt[TOTAL_NAT] >= DEF_NAT_CACHE_THRESHOLD; } -static inline bool excess_dirty_nodes(struct f2fs_sb_info *sbi) -{ - return get_pages(sbi, F2FS_DIRTY_NODES) >= sbi->blocks_per_seg * 8; -} - enum mem_type { FREE_NIDS, /* indicates the free nid list */ NAT_ENTRIES, /* indicates the cached nat entry */ diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 04655511d7f514593d388ef88bd6ac4be17b4988..329ae992ee0eb8b3a08ee071358e3b007ac356f5 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -55,6 +55,10 @@ bool f2fs_space_for_roll_forward(struct f2fs_sb_info *sbi) if (sbi->last_valid_block_count + nalloc > sbi->user_block_count) return false; + if (NM_I(sbi)->max_rf_node_blocks && + percpu_counter_sum_positive(&sbi->rf_node_block_count) >= + NM_I(sbi)->max_rf_node_blocks) + return false; return true; } @@ -81,7 +85,7 @@ static struct fsync_inode_entry *add_fsync_inode(struct f2fs_sb_info *sbi, if (IS_ERR(inode)) return ERR_CAST(inode); - err = dquot_initialize(inode); + err = f2fs_dquot_initialize(inode); if (err) goto err_out; @@ -203,7 +207,7 @@ retry: goto out_put; } - err = dquot_initialize(einode); + err = f2fs_dquot_initialize(einode); if (err) { iput(einode); goto out_put; @@ -342,6 +346,19 @@ static int recover_inode(struct inode *inode, struct page *page) return 0; } +static unsigned int adjust_por_ra_blocks(struct f2fs_sb_info *sbi, + unsigned int ra_blocks, unsigned int blkaddr, + unsigned int next_blkaddr) +{ + if (blkaddr + 1 == next_blkaddr) + ra_blocks = min_t(unsigned int, RECOVERY_MAX_RA_BLOCKS, + ra_blocks * 2); + else if (next_blkaddr % sbi->blocks_per_seg) + ra_blocks = max_t(unsigned int, RECOVERY_MIN_RA_BLOCKS, + ra_blocks / 2); + return ra_blocks; +} + static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head, bool check_only) { @@ -349,6 +366,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head, struct page *page = NULL; block_t blkaddr; unsigned int loop_cnt = 0; + unsigned int ra_blocks = RECOVERY_MAX_RA_BLOCKS; unsigned int free_blocks = MAIN_SEGS(sbi) * sbi->blocks_per_seg - valid_user_blocks(sbi); int err = 0; @@ -423,11 +441,14 @@ next: break; } + ra_blocks = adjust_por_ra_blocks(sbi, ra_blocks, blkaddr, + next_blkaddr_of_node(page)); + /* check next segment */ blkaddr = next_blkaddr_of_node(page); f2fs_put_page(page, 1); - f2fs_ra_meta_pages_cond(sbi, blkaddr); + f2fs_ra_meta_pages_cond(sbi, blkaddr, ra_blocks); } return err; } @@ -508,7 +529,7 @@ got_it: if (IS_ERR(inode)) return PTR_ERR(inode); - ret = dquot_initialize(inode); + ret = f2fs_dquot_initialize(inode); if (ret) { iput(inode); return ret; @@ -595,7 +616,7 @@ retry_dn: f2fs_wait_on_page_writeback(dn.node_page, NODE, true, true); - err = f2fs_get_node_info(sbi, dn.nid, &ni); + err = f2fs_get_node_info(sbi, dn.nid, &ni, false); if (err) goto err; @@ -704,6 +725,7 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list, struct page *page = NULL; int err = 0; block_t blkaddr; + unsigned int ra_blocks = RECOVERY_MAX_RA_BLOCKS; /* get node pages in the current segment */ curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); @@ -715,8 +737,6 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list, if (!f2fs_is_valid_blkaddr(sbi, blkaddr, META_POR)) break; - f2fs_ra_meta_pages_cond(sbi, blkaddr); - page = f2fs_get_tmp_page(sbi, blkaddr); if (IS_ERR(page)) { err = PTR_ERR(page); @@ -759,9 +779,14 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list, if (entry->blkaddr == blkaddr) list_move_tail(&entry->list, tmp_inode_list); next: + ra_blocks = adjust_por_ra_blocks(sbi, ra_blocks, blkaddr, + next_blkaddr_of_node(page)); + /* check next segment */ blkaddr = next_blkaddr_of_node(page); f2fs_put_page(page, 1); + + f2fs_ra_meta_pages_cond(sbi, blkaddr, ra_blocks); } if (!err) f2fs_allocate_new_segments(sbi); @@ -787,8 +812,6 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) } #ifdef CONFIG_QUOTA - /* Needed for iput() to work correctly and not trash data */ - sbi->sb->s_flags |= SB_ACTIVE; /* Turn on quotas so that they are updated correctly */ quota_enabled = f2fs_enable_quota_files(sbi, s_flags & SB_RDONLY); #endif @@ -798,7 +821,7 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) INIT_LIST_HEAD(&dir_list); /* prevent checkpoint */ - down_write(&sbi->cp_global_sem); + f2fs_down_write(&sbi->cp_global_sem); /* step #1: find fsynced inode numbers */ err = find_fsync_dnodes(sbi, &inode_list, check_only); @@ -816,10 +839,8 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) err = recover_data(sbi, &inode_list, &tmp_inode_list, &dir_list); if (!err) f2fs_bug_on(sbi, !list_empty(&inode_list)); - else { - /* restore s_flags to let iput() trash data */ - sbi->sb->s_flags = s_flags; - } + else + f2fs_bug_on(sbi, sbi->sb->s_flags & SB_ACTIVE); skip: fix_curseg_write_pointer = !check_only || list_empty(&inode_list); @@ -849,7 +870,7 @@ skip: if (!err) clear_sbi_flag(sbi, SBI_POR_DOING); - up_write(&sbi->cp_global_sem); + f2fs_up_write(&sbi->cp_global_sem); /* let's drop all the directory inodes for clean checkpoint */ destroy_fsync_dnodes(&dir_list, err); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 3f6a58a46e8bd59713a97de31c1e16fe84338a04..e5e4956ead314b4a8e7b06e7523794393b1d6e31 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "f2fs.h" #include "segment.h" @@ -253,7 +254,7 @@ retry: goto next; } - err = f2fs_get_node_info(sbi, dn.nid, &ni); + err = f2fs_get_node_info(sbi, dn.nid, &ni, false); if (err) { f2fs_put_dnode(&dn); return err; @@ -473,7 +474,7 @@ int f2fs_commit_inmem_pages(struct inode *inode) f2fs_balance_fs(sbi, true); - down_write(&fi->i_gc_rwsem[WRITE]); + f2fs_down_write(&fi->i_gc_rwsem[WRITE]); f2fs_lock_op(sbi); set_inode_flag(inode, FI_ATOMIC_COMMIT); @@ -485,7 +486,7 @@ int f2fs_commit_inmem_pages(struct inode *inode) clear_inode_flag(inode, FI_ATOMIC_COMMIT); f2fs_unlock_op(sbi); - up_write(&fi->i_gc_rwsem[WRITE]); + f2fs_up_write(&fi->i_gc_rwsem[WRITE]); return err; } @@ -523,12 +524,31 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) io_schedule(); finish_wait(&sbi->gc_thread->fggc_wq, &wait); } else { - down_write(&sbi->gc_lock); + f2fs_down_write(&sbi->gc_lock); f2fs_gc(sbi, false, false, false, NULL_SEGNO); } } } +static inline bool excess_dirty_threshold(struct f2fs_sb_info *sbi) +{ + int factor = f2fs_rwsem_is_locked(&sbi->cp_rwsem) ? 3 : 2; + unsigned int dents = get_pages(sbi, F2FS_DIRTY_DENTS); + unsigned int qdata = get_pages(sbi, F2FS_DIRTY_QDATA); + unsigned int nodes = get_pages(sbi, F2FS_DIRTY_NODES); + unsigned int meta = get_pages(sbi, F2FS_DIRTY_META); + unsigned int imeta = get_pages(sbi, F2FS_DIRTY_IMETA); + unsigned int threshold = sbi->blocks_per_seg * factor * + DEFAULT_DIRTY_THRESHOLD; + unsigned int global_threshold = threshold * 3 / 2; + + if (dents >= threshold || qdata >= threshold || + nodes >= threshold || meta >= threshold || + imeta >= threshold) + return true; + return dents + qdata + nodes + meta + imeta > global_threshold; +} + void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi, bool from_bg) { if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) @@ -547,13 +567,13 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi, bool from_bg) else f2fs_build_free_nids(sbi, false, false); - if (excess_dirty_nats(sbi) || excess_dirty_nodes(sbi) || - excess_prefree_segs(sbi)) + if (excess_dirty_nats(sbi) || excess_dirty_threshold(sbi) || + excess_prefree_segs(sbi) || !f2fs_space_for_roll_forward(sbi)) goto do_sync; /* there is background inflight IO or foreground operation recently */ if (is_inflight_io(sbi, REQ_TIME) || - (!f2fs_time_over(sbi, REQ_TIME) && rwsem_is_locked(&sbi->cp_rwsem))) + (!f2fs_time_over(sbi, REQ_TIME) && f2fs_rwsem_is_locked(&sbi->cp_rwsem))) return; /* exceed periodical checkpoint timeout threshold */ @@ -561,7 +581,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi, bool from_bg) goto do_sync; /* checkpoint is the only way to shrink partial cached entries */ - if (f2fs_available_free_memory(sbi, NAT_ENTRIES) || + if (f2fs_available_free_memory(sbi, NAT_ENTRIES) && f2fs_available_free_memory(sbi, INO_ENTRIES)) return; @@ -1139,14 +1159,14 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi, dpolicy->ordered = false; dpolicy->granularity = granularity; - dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST; + dpolicy->max_requests = dcc->max_discard_request; dpolicy->io_aware_gran = MAX_PLIST_NUM; dpolicy->timeout = false; if (discard_type == DPOLICY_BG) { - dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME; - dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME; - dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME; + dpolicy->min_interval = dcc->min_discard_issue_time; + dpolicy->mid_interval = dcc->mid_discard_issue_time; + dpolicy->max_interval = dcc->max_discard_issue_time; dpolicy->io_aware = true; dpolicy->sync = false; dpolicy->ordered = true; @@ -1154,12 +1174,12 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi, dpolicy->granularity = 1; if (atomic_read(&dcc->discard_cmd_cnt)) dpolicy->max_interval = - DEF_MIN_DISCARD_ISSUE_TIME; + dcc->min_discard_issue_time; } } else if (discard_type == DPOLICY_FORCE) { - dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME; - dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME; - dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME; + dpolicy->min_interval = dcc->min_discard_issue_time; + dpolicy->mid_interval = dcc->mid_discard_issue_time; + dpolicy->max_interval = dcc->max_discard_issue_time; dpolicy->io_aware = false; } else if (discard_type == DPOLICY_FSTRIM) { dpolicy->io_aware = false; @@ -1764,7 +1784,7 @@ static int issue_discard_thread(void *data) struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; wait_queue_head_t *q = &dcc->discard_wait_queue; struct discard_policy dpolicy; - unsigned int wait_ms = DEF_MIN_DISCARD_ISSUE_TIME; + unsigned int wait_ms = dcc->min_discard_issue_time; int issued; set_freezable(); @@ -2163,6 +2183,10 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi) atomic_set(&dcc->discard_cmd_cnt, 0); dcc->nr_discards = 0; dcc->max_discards = MAIN_SEGS(sbi) << sbi->log_blocks_per_seg; + dcc->max_discard_request = DEF_MAX_DISCARD_REQUEST; + dcc->min_discard_issue_time = DEF_MIN_DISCARD_ISSUE_TIME; + dcc->mid_discard_issue_time = DEF_MID_DISCARD_ISSUE_TIME; + dcc->max_discard_issue_time = DEF_MAX_DISCARD_ISSUE_TIME; dcc->undiscard_blks = 0; dcc->next_pos = 0; dcc->root = RB_ROOT_CACHED; @@ -2630,6 +2654,8 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type) unsigned short seg_type = curseg->seg_type; sanity_check_seg_type(sbi, seg_type); + if (f2fs_need_rand_seg(sbi)) + return prandom_u32() % (MAIN_SECS(sbi) * sbi->segs_per_sec); /* if segs_per_sec is large than 1, we need to keep original policy. */ if (__is_large_section(sbi)) @@ -2681,6 +2707,9 @@ static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec) curseg->next_segno = segno; reset_curseg(sbi, type, 1); curseg->alloc_type = LFS; + if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK) + curseg->fragment_remained_chunk = + prandom_u32() % sbi->max_fragment_chunk + 1; } static int __next_free_blkoff(struct f2fs_sb_info *sbi, @@ -2707,12 +2736,22 @@ static int __next_free_blkoff(struct f2fs_sb_info *sbi, static void __refresh_next_blkoff(struct f2fs_sb_info *sbi, struct curseg_info *seg) { - if (seg->alloc_type == SSR) + if (seg->alloc_type == SSR) { seg->next_blkoff = __next_free_blkoff(sbi, seg->segno, seg->next_blkoff + 1); - else + } else { seg->next_blkoff++; + if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK) { + /* To allocate block chunks in different sizes, use random number */ + if (--seg->fragment_remained_chunk <= 0) { + seg->fragment_remained_chunk = + prandom_u32() % sbi->max_fragment_chunk + 1; + seg->next_blkoff += + prandom_u32() % sbi->max_fragment_hole + 1; + } + } + } } bool f2fs_segment_has_free_slot(struct f2fs_sb_info *sbi, int segno) @@ -2789,7 +2828,7 @@ static void __f2fs_init_atgc_curseg(struct f2fs_sb_info *sbi) if (!sbi->am.atgc_enabled) return; - down_read(&SM_I(sbi)->curseg_lock); + f2fs_down_read(&SM_I(sbi)->curseg_lock); mutex_lock(&curseg->curseg_mutex); down_write(&SIT_I(sbi)->sentry_lock); @@ -2799,7 +2838,7 @@ static void __f2fs_init_atgc_curseg(struct f2fs_sb_info *sbi) up_write(&SIT_I(sbi)->sentry_lock); mutex_unlock(&curseg->curseg_mutex); - up_read(&SM_I(sbi)->curseg_lock); + f2fs_up_read(&SM_I(sbi)->curseg_lock); } void f2fs_init_inmem_curseg(struct f2fs_sb_info *sbi) @@ -2950,7 +2989,7 @@ void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type, struct curseg_info *curseg = CURSEG_I(sbi, type); unsigned int segno; - down_read(&SM_I(sbi)->curseg_lock); + f2fs_down_read(&SM_I(sbi)->curseg_lock); mutex_lock(&curseg->curseg_mutex); down_write(&SIT_I(sbi)->sentry_lock); @@ -2974,7 +3013,7 @@ unlock: type, segno, curseg->segno); mutex_unlock(&curseg->curseg_mutex); - up_read(&SM_I(sbi)->curseg_lock); + f2fs_up_read(&SM_I(sbi)->curseg_lock); } static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type, @@ -3006,23 +3045,23 @@ static void __allocate_new_section(struct f2fs_sb_info *sbi, void f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force) { - down_read(&SM_I(sbi)->curseg_lock); + f2fs_down_read(&SM_I(sbi)->curseg_lock); down_write(&SIT_I(sbi)->sentry_lock); __allocate_new_section(sbi, type, force); up_write(&SIT_I(sbi)->sentry_lock); - up_read(&SM_I(sbi)->curseg_lock); + f2fs_up_read(&SM_I(sbi)->curseg_lock); } void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi) { int i; - down_read(&SM_I(sbi)->curseg_lock); + f2fs_down_read(&SM_I(sbi)->curseg_lock); down_write(&SIT_I(sbi)->sentry_lock); for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) __allocate_new_segment(sbi, i, false, false); up_write(&SIT_I(sbi)->sentry_lock); - up_read(&SM_I(sbi)->curseg_lock); + f2fs_up_read(&SM_I(sbi)->curseg_lock); } static const struct segment_allocation default_salloc_ops = { @@ -3160,9 +3199,9 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) if (sbi->discard_blks == 0) goto out; - down_write(&sbi->gc_lock); + f2fs_down_write(&sbi->gc_lock); err = f2fs_write_checkpoint(sbi, &cpc); - up_write(&sbi->gc_lock); + f2fs_up_write(&sbi->gc_lock); if (err) goto out; @@ -3399,7 +3438,7 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, bool from_gc = (type == CURSEG_ALL_DATA_ATGC); struct seg_entry *se = NULL; - down_read(&SM_I(sbi)->curseg_lock); + f2fs_down_read(&SM_I(sbi)->curseg_lock); mutex_lock(&curseg->curseg_mutex); down_write(&sit_i->sentry_lock); @@ -3482,27 +3521,33 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, mutex_unlock(&curseg->curseg_mutex); - up_read(&SM_I(sbi)->curseg_lock); + f2fs_up_read(&SM_I(sbi)->curseg_lock); } -static void update_device_state(struct f2fs_io_info *fio) +void f2fs_update_device_state(struct f2fs_sb_info *sbi, nid_t ino, + block_t blkaddr, unsigned int blkcnt) { - struct f2fs_sb_info *sbi = fio->sbi; - unsigned int devidx; - if (!f2fs_is_multi_device(sbi)) return; - devidx = f2fs_target_device_index(sbi, fio->new_blkaddr); + while (1) { + unsigned int devidx = f2fs_target_device_index(sbi, blkaddr); + unsigned int blks = FDEV(devidx).end_blk - blkaddr + 1; - /* update device state for fsync */ - f2fs_set_dirty_device(sbi, fio->ino, devidx, FLUSH_INO); + /* update device state for fsync */ + f2fs_set_dirty_device(sbi, ino, devidx, FLUSH_INO); - /* update device state for checkpoint */ - if (!f2fs_test_bit(devidx, (char *)&sbi->dirty_device)) { - spin_lock(&sbi->dev_lock); - f2fs_set_bit(devidx, (char *)&sbi->dirty_device); - spin_unlock(&sbi->dev_lock); + /* update device state for checkpoint */ + if (!f2fs_test_bit(devidx, (char *)&sbi->dirty_device)) { + spin_lock(&sbi->dev_lock); + f2fs_set_bit(devidx, (char *)&sbi->dirty_device); + spin_unlock(&sbi->dev_lock); + } + + if (blkcnt <= blks) + break; + blkcnt -= blks; + blkaddr += blks; } } @@ -3512,7 +3557,7 @@ static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) bool keep_order = (f2fs_lfs_mode(fio->sbi) && type == CURSEG_COLD_DATA); if (keep_order) - down_read(&fio->sbi->io_order_lock); + f2fs_down_read(&fio->sbi->io_order_lock); reallocate: f2fs_allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr, &fio->new_blkaddr, sum, type, fio); @@ -3529,10 +3574,10 @@ reallocate: goto reallocate; } - update_device_state(fio); + f2fs_update_device_state(fio->sbi, fio->ino, fio->new_blkaddr, 1); if (keep_order) - up_read(&fio->sbi->io_order_lock); + f2fs_up_read(&fio->sbi->io_order_lock); } void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct page *page, @@ -3611,6 +3656,9 @@ int f2fs_inplace_write_data(struct f2fs_io_info *fio) goto drop_bio; } + invalidate_mapping_pages(META_MAPPING(sbi), + fio->new_blkaddr, fio->new_blkaddr); + stat_inc_inplace_blocks(fio->sbi); if (fio->bio && !(SM_I(sbi)->ipu_policy & (1 << F2FS_IPU_NOCACHE))) @@ -3618,7 +3666,8 @@ int f2fs_inplace_write_data(struct f2fs_io_info *fio) else err = f2fs_submit_page_bio(fio); if (!err) { - update_device_state(fio); + f2fs_update_device_state(fio->sbi, fio->ino, + fio->new_blkaddr, 1); f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE); } @@ -3663,7 +3712,7 @@ void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, se = get_seg_entry(sbi, segno); type = se->type; - down_write(&SM_I(sbi)->curseg_lock); + f2fs_down_write(&SM_I(sbi)->curseg_lock); if (!recover_curseg) { /* for recovery flow */ @@ -3732,7 +3781,7 @@ void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, up_write(&sit_i->sentry_lock); mutex_unlock(&curseg->curseg_mutex); - up_write(&SM_I(sbi)->curseg_lock); + f2fs_up_write(&SM_I(sbi)->curseg_lock); } void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn, @@ -4747,6 +4796,13 @@ static int sanity_check_curseg(struct f2fs_sb_info *sbi) sanity_check_seg_type(sbi, curseg->seg_type); + if (curseg->alloc_type != LFS && curseg->alloc_type != SSR) { + f2fs_err(sbi, + "Current segment has invalid alloc_type:%d", + curseg->alloc_type); + return -EFSCORRUPTED; + } + if (f2fs_test_bit(blkofs, se->cur_valid_map)) goto out; @@ -5216,7 +5272,7 @@ int f2fs_build_segment_manager(struct f2fs_sb_info *sbi) INIT_LIST_HEAD(&sm_info->sit_entry_set); - init_rwsem(&sm_info->curseg_lock); + init_f2fs_rwsem(&sm_info->curseg_lock); if (!f2fs_readonly(sbi->sb)) { err = f2fs_create_flush_cmd_control(sbi); diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index c030b4436617919d9edbc12e3462955f5e73369c..aeb008f2377be13eb8878220a978fbe9d500dac9 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -314,6 +314,7 @@ struct curseg_info { unsigned short next_blkoff; /* next block offset to write */ unsigned int zone; /* current zone number */ unsigned int next_segno; /* preallocated segment */ + int fragment_remained_chunk; /* remained block size in a chunk for block fragmentation mode */ bool inited; /* indicate inmem log is inited */ }; @@ -537,7 +538,8 @@ static inline unsigned int free_segments(struct f2fs_sb_info *sbi) static inline unsigned int reserved_segments(struct f2fs_sb_info *sbi) { - return SM_I(sbi)->reserved_segments; + return SM_I(sbi)->reserved_segments + + SM_I(sbi)->additional_reserved_segments; } static inline unsigned int free_sections(struct f2fs_sb_info *sbi) @@ -649,7 +651,9 @@ static inline int utilization(struct f2fs_sb_info *sbi) * pages over min_fsync_blocks. (=default option) * F2FS_IPU_ASYNC - do IPU given by asynchronous write requests. * F2FS_IPU_NOCACHE - disable IPU bio cache. - * F2FS_IPUT_DISABLE - disable IPU. (=default option in LFS mode) + * F2FS_IPU_HONOR_OPU_WRITE - use OPU write prior to IPU write if inode has + * FI_OPU_WRITE flag. + * F2FS_IPU_DISABLE - disable IPU. (=default option in LFS mode) */ #define DEF_MIN_IPU_UTIL 70 #define DEF_MIN_FSYNC_BLOCKS 8 @@ -665,6 +669,7 @@ enum { F2FS_IPU_FSYNC, F2FS_IPU_ASYNC, F2FS_IPU_NOCACHE, + F2FS_IPU_HONOR_OPU_WRITE, }; static inline unsigned int curseg_segno(struct f2fs_sb_info *sbi, diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index fa732d012e99b7d2b261a94cdb219dfec407cfbe..3d13fe053b308e1b8f1c51ef9357c569eb820e3d 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -58,6 +58,8 @@ const char *f2fs_fault_name[FAULT_MAX] = { [FAULT_DISCARD] = "discard error", [FAULT_WRITE_IO] = "write IO error", [FAULT_SLAB_ALLOC] = "slab alloc", + [FAULT_DQUOT_INIT] = "dquot initialize", + [FAULT_LOCK_OP] = "lock_op", }; void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate, @@ -327,6 +329,46 @@ static inline void limit_reserve_root(struct f2fs_sb_info *sbi) F2FS_OPTION(sbi).s_resgid)); } +static inline int adjust_reserved_segment(struct f2fs_sb_info *sbi) +{ + unsigned int sec_blks = sbi->blocks_per_seg * sbi->segs_per_sec; + unsigned int avg_vblocks; + unsigned int wanted_reserved_segments; + block_t avail_user_block_count; + + if (!F2FS_IO_ALIGNED(sbi)) + return 0; + + /* average valid block count in section in worst case */ + avg_vblocks = sec_blks / F2FS_IO_SIZE(sbi); + + /* + * we need enough free space when migrating one section in worst case + */ + wanted_reserved_segments = (F2FS_IO_SIZE(sbi) / avg_vblocks) * + reserved_segments(sbi); + wanted_reserved_segments -= reserved_segments(sbi); + + avail_user_block_count = sbi->user_block_count - + sbi->current_reserved_blocks - + F2FS_OPTION(sbi).root_reserved_blocks; + + if (wanted_reserved_segments * sbi->blocks_per_seg > + avail_user_block_count) { + f2fs_err(sbi, "IO align feature can't grab additional reserved segment: %u, available segments: %u", + wanted_reserved_segments, + avail_user_block_count >> sbi->log_blocks_per_seg); + return -ENOSPC; + } + + SM_I(sbi)->additional_reserved_segments = wanted_reserved_segments; + + f2fs_info(sbi, "IO align feature needs additional reserved segment: %u", + wanted_reserved_segments); + + return 0; +} + static inline void adjust_unusable_cap_perc(struct f2fs_sb_info *sbi) { if (!F2FS_OPTION(sbi).unusable_cap_perc) @@ -817,6 +859,10 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) F2FS_OPTION(sbi).fs_mode = FS_MODE_ADAPTIVE; } else if (!strcmp(name, "lfs")) { F2FS_OPTION(sbi).fs_mode = FS_MODE_LFS; + } else if (!strcmp(name, "fragment:segment")) { + F2FS_OPTION(sbi).fs_mode = FS_MODE_FRAGMENT_SEG; + } else if (!strcmp(name, "fragment:block")) { + F2FS_OPTION(sbi).fs_mode = FS_MODE_FRAGMENT_BLK; } else { kfree(name); return -EINVAL; @@ -1292,7 +1338,7 @@ default_check: /* Not pass down write hints if the number of active logs is lesser * than NR_CURSEG_PERSIST_TYPE. */ - if (F2FS_OPTION(sbi).active_logs != NR_CURSEG_TYPE) + if (F2FS_OPTION(sbi).active_logs != NR_CURSEG_PERSIST_TYPE) F2FS_OPTION(sbi).whint_mode = WHINT_MODE_OFF; if (f2fs_sb_has_readonly(sbi) && !f2fs_readonly(sbi->sb)) { @@ -1316,17 +1362,17 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) /* Initialize f2fs-specific inode info */ atomic_set(&fi->dirty_pages, 0); atomic_set(&fi->i_compr_blocks, 0); - init_rwsem(&fi->i_sem); + init_f2fs_rwsem(&fi->i_sem); spin_lock_init(&fi->i_size_lock); INIT_LIST_HEAD(&fi->dirty_list); INIT_LIST_HEAD(&fi->gdirty_list); INIT_LIST_HEAD(&fi->inmem_ilist); INIT_LIST_HEAD(&fi->inmem_pages); mutex_init(&fi->inmem_lock); - init_rwsem(&fi->i_gc_rwsem[READ]); - init_rwsem(&fi->i_gc_rwsem[WRITE]); - init_rwsem(&fi->i_mmap_sem); - init_rwsem(&fi->i_xattr_sem); + init_f2fs_rwsem(&fi->i_gc_rwsem[READ]); + init_f2fs_rwsem(&fi->i_gc_rwsem[WRITE]); + init_f2fs_rwsem(&fi->i_mmap_sem); + init_f2fs_rwsem(&fi->i_xattr_sem); /* Will be used by directory only */ fi->i_dir_level = F2FS_SB(sb)->dir_level; @@ -1466,8 +1512,9 @@ static void f2fs_free_inode(struct inode *inode) static void destroy_percpu_info(struct f2fs_sb_info *sbi) { - percpu_counter_destroy(&sbi->alloc_valid_block_count); percpu_counter_destroy(&sbi->total_valid_inode_count); + percpu_counter_destroy(&sbi->rf_node_block_count); + percpu_counter_destroy(&sbi->alloc_valid_block_count); } static void destroy_device_list(struct f2fs_sb_info *sbi) @@ -1627,11 +1674,15 @@ static int f2fs_freeze(struct super_block *sb) /* ensure no checkpoint required */ if (!llist_empty(&F2FS_SB(sb)->cprc_info.issue_list)) return -EINVAL; + + /* to avoid deadlock on f2fs_evict_inode->SB_FREEZE_FS */ + set_sbi_flag(F2FS_SB(sb), SBI_IS_FREEZING); return 0; } static int f2fs_unfreeze(struct super_block *sb) { + clear_sbi_flag(F2FS_SB(sb), SBI_IS_FREEZING); return 0; } @@ -1900,6 +1951,10 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_puts(seq, "adaptive"); else if (F2FS_OPTION(sbi).fs_mode == FS_MODE_LFS) seq_puts(seq, "lfs"); + else if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_SEG) + seq_puts(seq, "fragment:segment"); + else if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK) + seq_puts(seq, "fragment:block"); seq_printf(seq, ",active_logs=%u", F2FS_OPTION(sbi).active_logs); if (test_opt(sbi, RESERVE_ROOT)) seq_printf(seq, ",reserve_root=%u,resuid=%u,resgid=%u", @@ -2036,6 +2091,7 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi) { unsigned int s_flags = sbi->sb->s_flags; struct cp_control cpc; + unsigned int gc_mode; int err = 0; int ret; block_t unusable; @@ -2048,8 +2104,11 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi) f2fs_update_time(sbi, DISABLE_TIME); + gc_mode = sbi->gc_mode; + sbi->gc_mode = GC_URGENT_HIGH; + while (!f2fs_time_over(sbi, DISABLE_TIME)) { - down_write(&sbi->gc_lock); + f2fs_down_write(&sbi->gc_lock); err = f2fs_gc(sbi, true, false, false, NULL_SEGNO); if (err == -ENODATA) { err = 0; @@ -2071,7 +2130,7 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi) goto restore_flag; } - down_write(&sbi->gc_lock); + f2fs_down_write(&sbi->gc_lock); cpc.reason = CP_PAUSE; set_sbi_flag(sbi, SBI_CP_DISABLED); err = f2fs_write_checkpoint(sbi, &cpc); @@ -2083,8 +2142,9 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi) spin_unlock(&sbi->stat_lock); out_unlock: - up_write(&sbi->gc_lock); + f2fs_up_write(&sbi->gc_lock); restore_flag: + sbi->gc_mode = gc_mode; sbi->sb->s_flags = s_flags; /* Restore SB_RDONLY status */ return err; } @@ -2103,12 +2163,12 @@ static void f2fs_enable_checkpoint(struct f2fs_sb_info *sbi) if (unlikely(retry < 0)) f2fs_warn(sbi, "checkpoint=enable has some unwritten data."); - down_write(&sbi->gc_lock); + f2fs_down_write(&sbi->gc_lock); f2fs_dirty_to_prefree(sbi); clear_sbi_flag(sbi, SBI_CP_DISABLED); set_sbi_flag(sbi, SBI_IS_DIRTY); - up_write(&sbi->gc_lock); + f2fs_up_write(&sbi->gc_lock); f2fs_sync_fs(sbi->sb, 1); } @@ -2495,6 +2555,16 @@ retry: return len - towrite; } +int f2fs_dquot_initialize(struct inode *inode) +{ + if (time_to_inject(F2FS_I_SB(inode), FAULT_DQUOT_INIT)) { + f2fs_show_injection_info(F2FS_I_SB(inode), FAULT_DQUOT_INIT); + return -ESRCH; + } + + return dquot_initialize(inode); +} + static struct dquot **f2fs_get_dquots(struct inode *inode) { return F2FS_I(inode)->i_dquot; @@ -2640,7 +2710,7 @@ int f2fs_quota_sync(struct super_block *sb, int type) struct f2fs_sb_info *sbi = F2FS_SB(sb); struct quota_info *dqopt = sb_dqopt(sb); int cnt; - int ret; + int ret = 0; /* * Now when everything is written we can discard the pagecache so @@ -2651,26 +2721,26 @@ int f2fs_quota_sync(struct super_block *sb, int type) if (type != -1 && cnt != type) continue; - if (!sb_has_quota_active(sb, type)) - return 0; + if (!sb_has_quota_active(sb, cnt)) + continue; inode_lock(dqopt->files[cnt]); /* * do_quotactl * f2fs_quota_sync - * down_read(quota_sem) + * f2fs_down_read(quota_sem) * dquot_writeback_dquots() * f2fs_dquot_commit * block_operation - * down_read(quota_sem) + * f2fs_down_read(quota_sem) */ f2fs_lock_op(sbi); - down_read(&sbi->quota_sem); + f2fs_down_read(&sbi->quota_sem); ret = f2fs_quota_sync_file(sbi, cnt); - up_read(&sbi->quota_sem); + f2fs_up_read(&sbi->quota_sem); f2fs_unlock_op(sbi); inode_unlock(dqopt->files[cnt]); @@ -2795,11 +2865,11 @@ static int f2fs_dquot_commit(struct dquot *dquot) struct f2fs_sb_info *sbi = F2FS_SB(dquot->dq_sb); int ret; - down_read_nested(&sbi->quota_sem, SINGLE_DEPTH_NESTING); + f2fs_down_read_nested(&sbi->quota_sem, SINGLE_DEPTH_NESTING); ret = dquot_commit(dquot); if (ret < 0) set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); - up_read(&sbi->quota_sem); + f2fs_up_read(&sbi->quota_sem); return ret; } @@ -2808,11 +2878,11 @@ static int f2fs_dquot_acquire(struct dquot *dquot) struct f2fs_sb_info *sbi = F2FS_SB(dquot->dq_sb); int ret; - down_read(&sbi->quota_sem); + f2fs_down_read(&sbi->quota_sem); ret = dquot_acquire(dquot); if (ret < 0) set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR); - up_read(&sbi->quota_sem); + f2fs_up_read(&sbi->quota_sem); return ret; } @@ -2879,6 +2949,11 @@ static const struct quotactl_ops f2fs_quotactl_ops = { .get_nextdqblk = dquot_get_next_dqblk, }; #else +int f2fs_dquot_initialize(struct inode *inode) +{ + return 0; +} + int f2fs_quota_sync(struct super_block *sb, int type) { return 0; @@ -2980,7 +3055,6 @@ static const struct fscrypt_operations f2fs_cryptops = { .set_context = f2fs_set_context, .get_dummy_policy = f2fs_get_dummy_policy, .empty_dir = f2fs_empty_dir, - .max_namelen = F2FS_NAME_LEN, .has_stable_inodes = f2fs_has_stable_inodes, .get_ino_and_lblk_bits = f2fs_get_ino_and_lblk_bits, .get_num_devices = f2fs_get_num_devices, @@ -3491,7 +3565,7 @@ skip_cross: NR_CURSEG_PERSIST_TYPE + nat_bits_blocks >= blocks_per_seg)) { f2fs_warn(sbi, "Insane cp_payload: %u, nat_bits_blocks: %u)", cp_payload, nat_bits_blocks); - return -EFSCORRUPTED; + return 1; } if (unlikely(f2fs_cp_error(sbi))) { @@ -3522,11 +3596,15 @@ static void init_sb_info(struct f2fs_sb_info *sbi) F2FS_NODE_INO(sbi) = le32_to_cpu(raw_super->node_ino); F2FS_META_INO(sbi) = le32_to_cpu(raw_super->meta_ino); sbi->cur_victim_sec = NULL_SECNO; + sbi->gc_mode = GC_NORMAL; sbi->next_victim_seg[BG_GC] = NULL_SEGNO; sbi->next_victim_seg[FG_GC] = NULL_SEGNO; sbi->max_victim_search = DEF_MAX_VICTIM_SEARCH; sbi->migration_granularity = sbi->segs_per_sec; sbi->seq_file_ra_mul = MIN_RA_MUL; + sbi->max_fragment_chunk = DEF_FRAGMENT_SIZE; + sbi->max_fragment_hole = DEF_FRAGMENT_SIZE; + spin_lock_init(&sbi->gc_urgent_high_lock); sbi->dir_level = DEF_DIR_LEVEL; sbi->interval_time[CP_TIME] = DEF_CP_INTERVAL; @@ -3546,14 +3624,14 @@ static void init_sb_info(struct f2fs_sb_info *sbi) INIT_LIST_HEAD(&sbi->s_list); mutex_init(&sbi->umount_mutex); - init_rwsem(&sbi->io_order_lock); + init_f2fs_rwsem(&sbi->io_order_lock); spin_lock_init(&sbi->cp_lock); sbi->dirty_device = 0; spin_lock_init(&sbi->dev_lock); - init_rwsem(&sbi->sb_lock); - init_rwsem(&sbi->pin_sem); + init_f2fs_rwsem(&sbi->sb_lock); + init_f2fs_rwsem(&sbi->pin_sem); } static int init_percpu_info(struct f2fs_sb_info *sbi) @@ -3564,11 +3642,20 @@ static int init_percpu_info(struct f2fs_sb_info *sbi) if (err) return err; + err = percpu_counter_init(&sbi->rf_node_block_count, 0, GFP_KERNEL); + if (err) + goto err_valid_block; + err = percpu_counter_init(&sbi->total_valid_inode_count, 0, GFP_KERNEL); if (err) - percpu_counter_destroy(&sbi->alloc_valid_block_count); + goto err_node_block; + return 0; +err_node_block: + percpu_counter_destroy(&sbi->rf_node_block_count); +err_valid_block: + percpu_counter_destroy(&sbi->alloc_valid_block_count); return err; } @@ -3751,6 +3838,7 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi) { struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); unsigned int max_devices = MAX_DEVICES; + unsigned int logical_blksize; int i; /* Initialize single device information */ @@ -3771,6 +3859,9 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi) if (!sbi->devs) return -ENOMEM; + logical_blksize = bdev_logical_block_size(sbi->sb->s_bdev); + sbi->aligned_blksize = true; + for (i = 0; i < max_devices; i++) { if (i > 0 && !RDEV(i).path[0]) @@ -3807,6 +3898,9 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi) /* to release errored devices */ sbi->s_ndevs = i + 1; + if (logical_blksize != bdev_logical_block_size(FDEV(i).bdev)) + sbi->aligned_blksize = false; + #ifdef CONFIG_BLK_DEV_ZONED if (bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HM && !f2fs_sb_has_blkzoned(sbi)) { @@ -3888,7 +3982,8 @@ static void f2fs_tuning_parameters(struct f2fs_sb_info *sbi) F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_REUSE; if (f2fs_block_unit_discard(sbi)) sm_i->dcc_info->discard_granularity = 1; - sm_i->ipu_policy = 1 << F2FS_IPU_FORCE; + sm_i->ipu_policy = 1 << F2FS_IPU_FORCE | + 1 << F2FS_IPU_HONOR_OPU_WRITE; } sbi->readdir_ra = 1; @@ -3998,11 +4093,11 @@ try_onemore: /* init f2fs-specific super block info */ sbi->valid_super_block = valid_super_block; - init_rwsem(&sbi->gc_lock); + init_f2fs_rwsem(&sbi->gc_lock); mutex_init(&sbi->writepages); - init_rwsem(&sbi->cp_global_sem); - init_rwsem(&sbi->node_write); - init_rwsem(&sbi->node_change); + init_f2fs_rwsem(&sbi->cp_global_sem); + init_f2fs_rwsem(&sbi->node_write); + init_f2fs_rwsem(&sbi->node_change); /* disallow all the data/node/meta page writes */ set_sbi_flag(sbi, SBI_POR_DOING); @@ -4023,18 +4118,18 @@ try_onemore: } for (j = HOT; j < n; j++) { - init_rwsem(&sbi->write_io[i][j].io_rwsem); + init_f2fs_rwsem(&sbi->write_io[i][j].io_rwsem); sbi->write_io[i][j].sbi = sbi; sbi->write_io[i][j].bio = NULL; spin_lock_init(&sbi->write_io[i][j].io_lock); INIT_LIST_HEAD(&sbi->write_io[i][j].io_list); INIT_LIST_HEAD(&sbi->write_io[i][j].bio_list); - init_rwsem(&sbi->write_io[i][j].bio_list_lock); + init_f2fs_rwsem(&sbi->write_io[i][j].bio_list_lock); } } - init_rwsem(&sbi->cp_rwsem); - init_rwsem(&sbi->quota_sem); + init_f2fs_rwsem(&sbi->cp_rwsem); + init_f2fs_rwsem(&sbi->quota_sem); init_waitqueue_head(&sbi->cp_wait); init_sb_info(sbi); @@ -4152,6 +4247,10 @@ try_onemore: goto free_nm; } + err = adjust_reserved_segment(sbi); + if (err) + goto free_nm; + /* For write statistics */ sbi->sectors_written_start = f2fs_get_sectors_written(sbi); @@ -4356,6 +4455,8 @@ free_node_inode: free_stats: f2fs_destroy_stats(sbi); free_nm: + /* stop discard thread before destroying node manager */ + f2fs_stop_discard_thread(sbi); f2fs_destroy_node_manager(sbi); free_sm: f2fs_destroy_segment_manager(sbi); @@ -4605,5 +4706,6 @@ module_exit(exit_f2fs_fs) MODULE_AUTHOR("Samsung Electronics's Praesto Team"); MODULE_DESCRIPTION("Flash Friendly File System"); MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(ANDROID_GKI_VFS_EXPORT_ONLY); MODULE_SOFTDEP("pre: crc32"); diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index a1a3e0f6d65827a5f047e457abe5378589baf425..f240f55933940b1ed659c95e5c50656b7b5ef38f 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -41,6 +41,16 @@ enum { ATGC_INFO, /* struct atgc_management */ }; +static const char *gc_mode_names[MAX_GC_MODE] = { + "GC_NORMAL", + "GC_IDLE_CB", + "GC_IDLE_GREEDY", + "GC_IDLE_AT", + "GC_URGENT_HIGH", + "GC_URGENT_LOW", + "GC_URGENT_MID" +}; + struct f2fs_attr { struct attribute attr; ssize_t (*show)(struct f2fs_attr *, struct f2fs_sb_info *, char *); @@ -118,6 +128,15 @@ static ssize_t sb_status_show(struct f2fs_attr *a, return sprintf(buf, "%lx\n", sbi->s_flag); } +static ssize_t pending_discard_show(struct f2fs_attr *a, + struct f2fs_sb_info *sbi, char *buf) +{ + if (!SM_I(sbi)->dcc_info) + return -EINVAL; + return sprintf(buf, "%llu\n", (unsigned long long)atomic_read( + &SM_I(sbi)->dcc_info->discard_cmd_cnt)); +} + static ssize_t features_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) { @@ -196,7 +215,7 @@ static ssize_t encoding_show(struct f2fs_attr *a, struct super_block *sb = sbi->sb; if (f2fs_sb_has_casefold(sbi)) - return snprintf(buf, PAGE_SIZE, "%s (%d.%d.%d)\n", + return sysfs_emit(buf, "%s (%d.%d.%d)\n", sb->s_encoding->charset, (sb->s_encoding->version >> 16) & 0xff, (sb->s_encoding->version >> 8) & 0xff, @@ -245,7 +264,7 @@ static ssize_t avg_vblocks_show(struct f2fs_attr *a, static ssize_t main_blkaddr_show(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf) { - return snprintf(buf, PAGE_SIZE, "%llu\n", + return sysfs_emit(buf, "%llu\n", (unsigned long long)MAIN_BLKADDR(sbi)); } @@ -308,8 +327,13 @@ static ssize_t f2fs_sbi_show(struct f2fs_attr *a, return sysfs_emit(buf, "%u\n", sbi->compr_new_inode); #endif + if (!strcmp(a->attr.name, "gc_urgent")) + return sysfs_emit(buf, "%s\n", + gc_mode_names[sbi->gc_mode]); + if (!strcmp(a->attr.name, "gc_segment_mode")) - return sysfs_emit(buf, "%u\n", sbi->gc_segment_mode); + return sysfs_emit(buf, "%s\n", + gc_mode_names[sbi->gc_segment_mode]); if (!strcmp(a->attr.name, "gc_reclaimed_segments")) { return sysfs_emit(buf, "%u\n", @@ -355,7 +379,7 @@ static ssize_t __sbi_store(struct f2fs_attr *a, if (!strlen(name) || strlen(name) >= F2FS_EXTENSION_LEN) return -EINVAL; - down_write(&sbi->sb_lock); + f2fs_down_write(&sbi->sb_lock); ret = f2fs_update_extension_list(sbi, name, hot, set); if (ret) @@ -365,7 +389,7 @@ static ssize_t __sbi_store(struct f2fs_attr *a, if (ret) f2fs_update_extension_list(sbi, name, hot, !set); out: - up_write(&sbi->sb_lock); + f2fs_up_write(&sbi->sb_lock); return ret ? ret : count; } @@ -415,7 +439,9 @@ out: if (a->struct_type == RESERVED_BLOCKS) { spin_lock(&sbi->stat_lock); if (t > (unsigned long)(sbi->user_block_count - - F2FS_OPTION(sbi).root_reserved_blocks)) { + F2FS_OPTION(sbi).root_reserved_blocks - + sbi->blocks_per_seg * + SM_I(sbi)->additional_reserved_segments)) { spin_unlock(&sbi->stat_lock); return -EINVAL; } @@ -458,6 +484,13 @@ out: } } else if (t == 2) { sbi->gc_mode = GC_URGENT_LOW; + } else if (t == 3) { + sbi->gc_mode = GC_URGENT_MID; + if (sbi->gc_thread) { + sbi->gc_thread->gc_wake = 1; + wake_up_interruptible_all( + &sbi->gc_thread->gc_wait_queue_head); + } } else { return -EINVAL; } @@ -471,13 +504,22 @@ out: } else if (t == GC_IDLE_AT) { if (!sbi->am.atgc_enabled) return -EINVAL; - sbi->gc_mode = GC_AT; + sbi->gc_mode = GC_IDLE_AT; } else { sbi->gc_mode = GC_NORMAL; } return count; } + if (!strcmp(a->attr.name, "gc_urgent_high_remaining")) { + spin_lock(&sbi->gc_urgent_high_lock); + sbi->gc_urgent_high_limited = t != 0; + sbi->gc_urgent_high_remaining = t; + spin_unlock(&sbi->gc_urgent_high_lock); + + return count; + } + #ifdef CONFIG_F2FS_IOSTAT if (!strcmp(a->attr.name, "iostat_enable")) { sbi->iostat_enable = !!t; @@ -551,6 +593,22 @@ out: return count; } + if (!strcmp(a->attr.name, "max_fragment_chunk")) { + if (t >= MIN_FRAGMENT_SIZE && t <= MAX_FRAGMENT_SIZE) + sbi->max_fragment_chunk = t; + else + return -EINVAL; + return count; + } + + if (!strcmp(a->attr.name, "max_fragment_hole")) { + if (t >= MIN_FRAGMENT_SIZE && t <= MAX_FRAGMENT_SIZE) + sbi->max_fragment_hole = t; + else + return -EINVAL; + return count; + } + *ui = (unsigned int)t; return count; @@ -681,6 +739,10 @@ F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_idle, gc_mode); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_urgent, gc_mode); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments); F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_small_discards, max_discards); +F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_discard_request, max_discard_request); +F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, min_discard_issue_time, min_discard_issue_time); +F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, mid_discard_issue_time, mid_discard_issue_time); +F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_discard_issue_time, max_discard_issue_time); F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, discard_granularity, discard_granularity); F2FS_RW_ATTR(RESERVED_BLOCKS, f2fs_sb_info, reserved_blocks, reserved_blocks); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections); @@ -693,6 +755,7 @@ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ssr_sections, min_ssr_sections); F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh); F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ra_nid_pages, ra_nid_pages); F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, dirty_nats_ratio, dirty_nats_ratio); +F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, max_roll_forward_node_blocks, max_rf_node_blocks); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, migration_granularity, migration_granularity); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level); @@ -717,6 +780,7 @@ F2FS_RW_ATTR(FAULT_INFO_TYPE, f2fs_fault_info, inject_type, inject_type); #endif F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, data_io_flag, data_io_flag); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, node_io_flag, node_io_flag); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_urgent_high_remaining, gc_urgent_high_remaining); F2FS_RW_ATTR(CPRC_INFO, ckpt_req_control, ckpt_thread_ioprio, ckpt_thread_ioprio); F2FS_GENERAL_RO_ATTR(dirty_segments); F2FS_GENERAL_RO_ATTR(free_segments); @@ -728,6 +792,7 @@ F2FS_GENERAL_RO_ATTR(unusable); F2FS_GENERAL_RO_ATTR(encoding); F2FS_GENERAL_RO_ATTR(mounted_time_sec); F2FS_GENERAL_RO_ATTR(main_blkaddr); +F2FS_GENERAL_RO_ATTR(pending_discard); #ifdef CONFIG_F2FS_STAT_FS F2FS_STAT_ATTR(STAT_INFO, f2fs_stat_info, cp_foreground_calls, cp_count); F2FS_STAT_ATTR(STAT_INFO, f2fs_stat_info, cp_background_calls, bg_cp_count); @@ -781,6 +846,8 @@ F2FS_RW_ATTR(ATGC_INFO, atgc_management, atgc_age_threshold, age_threshold); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, seq_file_ra_mul, seq_file_ra_mul); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_segment_mode, gc_segment_mode); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_reclaimed_segments, gc_reclaimed_segs); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_fragment_chunk, max_fragment_chunk); +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_fragment_hole, max_fragment_hole); #define ATTR_LIST(name) (&f2fs_attr_##name.attr) static struct attribute *f2fs_attrs[] = { @@ -793,7 +860,12 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(reclaim_segments), ATTR_LIST(main_blkaddr), ATTR_LIST(max_small_discards), + ATTR_LIST(max_discard_request), + ATTR_LIST(min_discard_issue_time), + ATTR_LIST(mid_discard_issue_time), + ATTR_LIST(max_discard_issue_time), ATTR_LIST(discard_granularity), + ATTR_LIST(pending_discard), ATTR_LIST(batched_trim_sections), ATTR_LIST(ipu_policy), ATTR_LIST(min_ipu_util), @@ -807,6 +879,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(ram_thresh), ATTR_LIST(ra_nid_pages), ATTR_LIST(dirty_nats_ratio), + ATTR_LIST(max_roll_forward_node_blocks), ATTR_LIST(cp_interval), ATTR_LIST(idle_interval), ATTR_LIST(discard_idle_interval), @@ -826,6 +899,7 @@ static struct attribute *f2fs_attrs[] = { #endif ATTR_LIST(data_io_flag), ATTR_LIST(node_io_flag), + ATTR_LIST(gc_urgent_high_remaining), ATTR_LIST(ckpt_thread_ioprio), ATTR_LIST(dirty_segments), ATTR_LIST(free_segments), @@ -859,6 +933,8 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(seq_file_ra_mul), ATTR_LIST(gc_segment_mode), ATTR_LIST(gc_reclaimed_segments), + ATTR_LIST(max_fragment_chunk), + ATTR_LIST(max_fragment_hole), NULL, }; ATTRIBUTE_GROUPS(f2fs); diff --git a/fs/f2fs/verity.c b/fs/f2fs/verity.c index 15ba36926fad7a17003f571f2876837adee5568d..5cbc463247872ace17dee48efd87c339731f0275 100644 --- a/fs/f2fs/verity.c +++ b/fs/f2fs/verity.c @@ -136,7 +136,7 @@ static int f2fs_begin_enable_verity(struct file *filp) * here and not rely on ->open() doing it. This must be done before * evicting the inline data. */ - err = dquot_initialize(inode); + err = f2fs_dquot_initialize(inode); if (err) return err; @@ -208,7 +208,7 @@ cleanup: * from re-instantiating cached pages we are truncating (since unlike * normal file accesses, garbage collection isn't limited by i_size). */ - down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); truncate_inode_pages(inode->i_mapping, inode->i_size); err2 = f2fs_truncate(inode); if (err2) { @@ -216,7 +216,7 @@ cleanup: err2); set_sbi_flag(sbi, SBI_NEED_FSCK); } - up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); + f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); clear_inode_flag(inode, FI_VERITY_IN_PROGRESS); return err ?: err2; } diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 0e6c5af79767682be694946b36e13d336f75d6fa..8d1c8efb643c228743afbeaaf5fa4a71a215f4a6 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -224,15 +224,18 @@ static inline const struct xattr_handler *f2fs_xattr_handler(int index) } static struct f2fs_xattr_entry *__find_xattr(void *base_addr, - void *last_base_addr, int index, - size_t len, const char *name) + void *last_base_addr, void **last_addr, + int index, size_t len, const char *name) { struct f2fs_xattr_entry *entry; list_for_each_xattr(entry, base_addr) { if ((void *)(entry) + sizeof(__u32) > last_base_addr || - (void *)XATTR_NEXT_ENTRY(entry) > last_base_addr) + (void *)XATTR_NEXT_ENTRY(entry) > last_base_addr) { + if (last_addr) + *last_addr = entry; return NULL; + } if (entry->e_name_index != index) continue; @@ -252,19 +255,9 @@ static struct f2fs_xattr_entry *__find_inline_xattr(struct inode *inode, unsigned int inline_size = inline_xattr_size(inode); void *max_addr = base_addr + inline_size; - list_for_each_xattr(entry, base_addr) { - if ((void *)entry + sizeof(__u32) > max_addr || - (void *)XATTR_NEXT_ENTRY(entry) > max_addr) { - *last_addr = entry; - return NULL; - } - if (entry->e_name_index != index) - continue; - if (entry->e_name_len != len) - continue; - if (!memcmp(entry->e_name, name, len)) - break; - } + entry = __find_xattr(base_addr, max_addr, last_addr, index, len, name); + if (!entry) + return NULL; /* inline xattr header or entry across max inline xattr size */ if (IS_XATTR_LAST_ENTRY(entry) && @@ -366,7 +359,7 @@ static int lookup_all_xattrs(struct inode *inode, struct page *ipage, else cur_addr = txattr_addr; - *xe = __find_xattr(cur_addr, last_txattr_addr, index, len, name); + *xe = __find_xattr(cur_addr, last_txattr_addr, NULL, index, len, name); if (!*xe) { f2fs_err(F2FS_I_SB(inode), "inode (%lu) has corrupted xattr", inode->i_ino); @@ -530,10 +523,10 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name, if (len > F2FS_NAME_LEN) return -ERANGE; - down_read(&F2FS_I(inode)->i_xattr_sem); + f2fs_down_read(&F2FS_I(inode)->i_xattr_sem); error = lookup_all_xattrs(inode, ipage, index, len, name, &entry, &base_addr, &base_size, &is_inline); - up_read(&F2FS_I(inode)->i_xattr_sem); + f2fs_up_read(&F2FS_I(inode)->i_xattr_sem); if (error) return error; @@ -567,9 +560,9 @@ ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) int error; size_t rest = buffer_size; - down_read(&F2FS_I(inode)->i_xattr_sem); + f2fs_down_read(&F2FS_I(inode)->i_xattr_sem); error = read_all_xattrs(inode, NULL, &base_addr); - up_read(&F2FS_I(inode)->i_xattr_sem); + f2fs_up_read(&F2FS_I(inode)->i_xattr_sem); if (error) return error; @@ -657,7 +650,7 @@ static int __f2fs_setxattr(struct inode *inode, int index, last_base_addr = (void *)base_addr + XATTR_SIZE(inode); /* find entry with wanted name. */ - here = __find_xattr(base_addr, last_base_addr, index, len, name); + here = __find_xattr(base_addr, last_base_addr, NULL, index, len, name); if (!here) { f2fs_err(F2FS_I_SB(inode), "inode (%lu) has corrupted xattr", inode->i_ino); @@ -682,8 +675,17 @@ static int __f2fs_setxattr(struct inode *inode, int index, } last = here; - while (!IS_XATTR_LAST_ENTRY(last)) + while (!IS_XATTR_LAST_ENTRY(last)) { + if ((void *)(last) + sizeof(__u32) > last_base_addr || + (void *)XATTR_NEXT_ENTRY(last) > last_base_addr) { + f2fs_err(F2FS_I_SB(inode), "inode (%lu) has invalid last xattr entry, entry_size: %zu", + inode->i_ino, ENTRY_SIZE(last)); + set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_FSCK); + error = -EFSCORRUPTED; + goto exit; + } last = XATTR_NEXT_ENTRY(last); + } newsize = XATTR_ALIGN(sizeof(struct f2fs_xattr_entry) + len + size); @@ -771,7 +773,7 @@ int f2fs_setxattr(struct inode *inode, int index, const char *name, if (!f2fs_is_checkpoint_ready(sbi)) return -ENOSPC; - err = dquot_initialize(inode); + err = f2fs_dquot_initialize(inode); if (err) return err; @@ -782,9 +784,9 @@ int f2fs_setxattr(struct inode *inode, int index, const char *name, f2fs_balance_fs(sbi, true); f2fs_lock_op(sbi); - down_write(&F2FS_I(inode)->i_xattr_sem); + f2fs_down_write(&F2FS_I(inode)->i_xattr_sem); err = __f2fs_setxattr(inode, index, name, value, size, ipage, flags); - up_write(&F2FS_I(inode)->i_xattr_sem); + f2fs_up_write(&F2FS_I(inode)->i_xattr_sem); f2fs_unlock_op(sbi); f2fs_update_time(sbi, REQ_TIME); diff --git a/fs/fat/inode.c b/fs/fat/inode.c index bab9b202b496686b5f97447609db4b32f23cf900..577c73a55c68ddb3ff816ef57b1c1f9cd9f32399 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -1979,3 +1979,4 @@ module_init(init_fat_fs) module_exit(exit_fat_fs) MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(ANDROID_GKI_VFS_EXPORT_ONLY); diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c index 9d062886fbc19d10dbb81cd2dc5f1cc19f5fbc24..8376577ba01495005d60e08c0fc7a4271ca7ceab 100644 --- a/fs/fat/namei_msdos.c +++ b/fs/fat/namei_msdos.c @@ -680,6 +680,7 @@ static void __exit exit_msdos_fs(void) } MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(ANDROID_GKI_VFS_EXPORT_ONLY); MODULE_AUTHOR("Werner Almesberger"); MODULE_DESCRIPTION("MS-DOS filesystem support"); diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index 0cdd0fb9f742a09f9a32f0ada71dd25c888b72d4..01fab05dc7a1694c93a9cec8b2212377485e4fdd 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -1077,6 +1077,7 @@ static void __exit exit_vfat_fs(void) } MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(ANDROID_GKI_VFS_EXPORT_ONLY); MODULE_DESCRIPTION("VFAT filesystem support"); MODULE_AUTHOR("Gordon Chaffee"); diff --git a/fs/file.c b/fs/file.c index 21c0893f2f1df8f2feda08913cdeceeaed7a0b89..dcb7918b13742e54d14f83cf6d51515e90c557cd 100644 --- a/fs/file.c +++ b/fs/file.c @@ -670,7 +670,49 @@ int __close_fd(struct files_struct *files, unsigned fd) return filp_close(file, files); } -EXPORT_SYMBOL(__close_fd); /* for ksys_close() */ +EXPORT_SYMBOL_NS(__close_fd, ANDROID_GKI_VFS_EXPORT_ONLY); /* for ksys_close() */ + +/** + * last_fd - return last valid index into fd table + * @cur_fds: files struct + * + * Context: Either rcu read lock or files_lock must be held. + * + * Returns: Last valid index into fdtable. + */ +static inline unsigned last_fd(struct fdtable *fdt) +{ + return fdt->max_fds - 1; +} + +static inline void __range_cloexec(struct files_struct *cur_fds, + unsigned int fd, unsigned int max_fd) +{ + struct fdtable *fdt; + + /* make sure we're using the correct maximum value */ + spin_lock(&cur_fds->file_lock); + fdt = files_fdtable(cur_fds); + max_fd = min(last_fd(fdt), max_fd); + if (fd <= max_fd) + bitmap_set(fdt->close_on_exec, fd, max_fd - fd + 1); + spin_unlock(&cur_fds->file_lock); +} + +static inline void __range_close(struct files_struct *cur_fds, unsigned int fd, + unsigned int max_fd) +{ + while (fd <= max_fd) { + struct file *file; + + file = pick_file(cur_fds, fd++); + if (!file) + continue; + + filp_close(file, cur_fds); + cond_resched(); + } +} /** * __close_range() - Close all file descriptors in a given range. @@ -687,7 +729,7 @@ int __close_range(unsigned fd, unsigned max_fd, unsigned int flags) struct task_struct *me = current; struct files_struct *cur_fds = me->files, *fds = NULL; - if (flags & ~CLOSE_RANGE_UNSHARE) + if (flags & ~(CLOSE_RANGE_UNSHARE | CLOSE_RANGE_CLOEXEC)) return -EINVAL; if (fd > max_fd) @@ -708,8 +750,10 @@ int __close_range(unsigned fd, unsigned max_fd, unsigned int flags) * If the requested range is greater than the current maximum, * we're closing everything so only copy all file descriptors * beneath the lowest file descriptor. + * If the caller requested all fds to be made cloexec copy all + * of the file descriptors since they still want to use them. */ - if (max_fd >= cur_max) + if (!(flags & CLOSE_RANGE_CLOEXEC) && (max_fd >= cur_max)) max_unshare_fds = fd; ret = unshare_fd(CLONE_FILES, max_unshare_fds, &fds); @@ -725,16 +769,11 @@ int __close_range(unsigned fd, unsigned max_fd, unsigned int flags) } max_fd = min(max_fd, cur_max); - while (fd <= max_fd) { - struct file *file; - file = pick_file(cur_fds, fd++); - if (!file) - continue; - - filp_close(file, cur_fds); - cond_resched(); - } + if (flags & CLOSE_RANGE_CLOEXEC) + __range_cloexec(cur_fds, fd, max_fd); + else + __range_close(cur_fds, fd, max_fd); if (fds) { /* @@ -817,24 +856,68 @@ void do_close_on_exec(struct files_struct *files) spin_unlock(&files->file_lock); } +static inline struct file *__fget_files_rcu(struct files_struct *files, + unsigned int fd, fmode_t mask, unsigned int refs) +{ + for (;;) { + struct file *file; + struct fdtable *fdt = rcu_dereference_raw(files->fdt); + struct file __rcu **fdentry; + + if (unlikely(fd >= fdt->max_fds)) + return NULL; + + fdentry = fdt->fd + array_index_nospec(fd, fdt->max_fds); + file = rcu_dereference_raw(*fdentry); + if (unlikely(!file)) + return NULL; + + if (unlikely(file->f_mode & mask)) + return NULL; + + /* + * Ok, we have a file pointer. However, because we do + * this all locklessly under RCU, we may be racing with + * that file being closed. + * + * Such a race can take two forms: + * + * (a) the file ref already went down to zero, + * and get_file_rcu_many() fails. Just try + * again: + */ + if (unlikely(!get_file_rcu_many(file, refs))) + continue; + + /* + * (b) the file table entry has changed under us. + * Note that we don't need to re-check the 'fdt->fd' + * pointer having changed, because it always goes + * hand-in-hand with 'fdt'. + * + * If so, we need to put our refs and try again. + */ + if (unlikely(rcu_dereference_raw(files->fdt) != fdt) || + unlikely(rcu_dereference_raw(*fdentry) != file)) { + fput_many(file, refs); + continue; + } + + /* + * Ok, we have a ref to the file, and checked that it + * still exists. + */ + return file; + } +} + static struct file *__fget_files(struct files_struct *files, unsigned int fd, fmode_t mask, unsigned int refs) { struct file *file; rcu_read_lock(); -loop: - file = fcheck_files(files, fd); - if (file) { - /* File object ref couldn't be taken. - * dup2() atomicity guarantee is the reason - * we loop to catch the new file (or NULL pointer) - */ - if (file->f_mode & mask) - file = NULL; - else if (!get_file_rcu_many(file, refs)) - goto loop; - } + file = __fget_files_rcu(files, fd, mask, refs); rcu_read_unlock(); return file; diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c index 578a5062706ee9db9aaa43005276ccefd0680cbc..88fe3f4704097ca4ca4b0759cdad5271a2d471c8 100644 --- a/fs/freevxfs/vxfs_super.c +++ b/fs/freevxfs/vxfs_super.c @@ -52,6 +52,7 @@ MODULE_AUTHOR("Christoph Hellwig, Krzysztof Blaszkowski"); MODULE_DESCRIPTION("Veritas Filesystem (VxFS) driver"); MODULE_LICENSE("Dual BSD/GPL"); +MODULE_IMPORT_NS(ANDROID_GKI_VFS_EXPORT_ONLY); static struct kmem_cache *vxfs_inode_cachep; diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index a0869194ab7390be901453abfc96b0df24c9c7e2..d51354221fe77c7543613b786123b573e39ee254 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -2338,7 +2338,7 @@ void __mark_inode_dirty(struct inode *inode, int flags) out_unlock_inode: spin_unlock(&inode->i_lock); } -EXPORT_SYMBOL(__mark_inode_dirty); +EXPORT_SYMBOL_NS(__mark_inode_dirty, ANDROID_GKI_VFS_EXPORT_ONLY); /* * The @s_sync_lock is used to serialise concurrent sync operations @@ -2504,7 +2504,7 @@ void try_to_writeback_inodes_sb(struct super_block *sb, enum wb_reason reason) __writeback_inodes_sb_nr(sb, get_nr_dirty_pages(), reason, true); up_read(&sb->s_umount); } -EXPORT_SYMBOL(try_to_writeback_inodes_sb); +EXPORT_SYMBOL_NS(try_to_writeback_inodes_sb, ANDROID_GKI_VFS_EXPORT_ONLY); /** * sync_inodes_sb - sync sb inode pages @@ -2571,7 +2571,7 @@ int write_inode_now(struct inode *inode, int sync) might_sleep(); return writeback_single_inode(inode, &wbc); } -EXPORT_SYMBOL(write_inode_now); +EXPORT_SYMBOL_NS(write_inode_now, ANDROID_GKI_VFS_EXPORT_ONLY); /** * sync_inode - write an inode and its pages to disk. @@ -2608,4 +2608,4 @@ int sync_inode_metadata(struct inode *inode, int wait) return sync_inode(inode, &wbc); } -EXPORT_SYMBOL(sync_inode_metadata); +EXPORT_SYMBOL_NS(sync_inode_metadata, ANDROID_GKI_VFS_EXPORT_ONLY); diff --git a/fs/fs_context.c b/fs/fs_context.c index 2834d1afa6e8053e93c6e78122be8d58107a36c9..740322dff4a30a5acbaebbffe5f57064912e88da 100644 --- a/fs/fs_context.c +++ b/fs/fs_context.c @@ -231,7 +231,7 @@ static struct fs_context *alloc_fs_context(struct file_system_type *fs_type, struct fs_context *fc; int ret = -ENOMEM; - fc = kzalloc(sizeof(struct fs_context), GFP_KERNEL); + fc = kzalloc(sizeof(struct fs_context), GFP_KERNEL_ACCOUNT); if (!fc) return ERR_PTR(-ENOMEM); @@ -530,7 +530,7 @@ static int legacy_parse_param(struct fs_context *fc, struct fs_parameter *param) param->key); } - if (len > PAGE_SIZE - 2 - size) + if (size + len + 2 > PAGE_SIZE) return invalf(fc, "VFS: Legacy: Cumulative options too large"); if (strchr(param->key, ',') || (param->type == fs_value_is_string && @@ -631,7 +631,7 @@ const struct fs_context_operations legacy_fs_context_ops = { */ static int legacy_init_fs_context(struct fs_context *fc) { - fc->fs_private = kzalloc(sizeof(struct legacy_fs_context), GFP_KERNEL); + fc->fs_private = kzalloc(sizeof(struct legacy_fs_context), GFP_KERNEL_ACCOUNT); if (!fc->fs_private) return -ENOMEM; fc->ops = &legacy_fs_context_ops; diff --git a/fs/fs_types.c b/fs/fs_types.c index 78365e5dc08caa3aac60b2c0e11b6fc4597cdd42..a11a1d8c7811f6782fd06c4c22de14223d8ed7d7 100644 --- a/fs/fs_types.c +++ b/fs/fs_types.c @@ -41,7 +41,7 @@ unsigned char fs_ftype_to_dtype(unsigned int filetype) return fs_dtype_by_ftype[filetype]; } -EXPORT_SYMBOL_GPL(fs_ftype_to_dtype); +EXPORT_SYMBOL_NS_GPL(fs_ftype_to_dtype, ANDROID_GKI_VFS_EXPORT_ONLY); /* * dirent file type to fs on-disk file type conversion diff --git a/fs/fscache/Makefile b/fs/fscache/Makefile index 79e08e05ef84803437a4d1af3fa6589eba62a62b..9121382cf16da1e4d9a917ee1952a4ed58446924 100644 --- a/fs/fscache/Makefile +++ b/fs/fscache/Makefile @@ -3,6 +3,8 @@ # Makefile for general filesystem caching code # +ccflags-y += -DDEFAULT_SYMBOL_NAMESPACE=ANDROID_GKI_VFS_EXPORT_ONLY + fscache-y := \ cache.o \ cookie.o \ diff --git a/fs/fuse/Kconfig b/fs/fuse/Kconfig index 40ce9a1c12e5d18cddd5d0f701ab0c9d07410fa6..d6bd90b5c38ae6e1423655b8b9fbe74ef55a350b 100644 --- a/fs/fuse/Kconfig +++ b/fs/fuse/Kconfig @@ -52,3 +52,11 @@ config FUSE_DAX If you want to allow mounting a Virtio Filesystem with the "dax" option, answer Y. + +config FUSE_BPF + bool "Adds BPF to fuse" + depends on FUSE_FS + depends on BPF + help + Extends FUSE by adding BPF to prefilter calls and potentially pass to a + backing file system diff --git a/fs/fuse/Makefile b/fs/fuse/Makefile index 20ed23aa16fa6a10e82cce28ba432a4a34771617..7bd922ff8604fd67aa8d03dfced974add9bba609 100644 --- a/fs/fuse/Makefile +++ b/fs/fuse/Makefile @@ -7,8 +7,8 @@ obj-$(CONFIG_FUSE_FS) += fuse.o obj-$(CONFIG_CUSE) += cuse.o obj-$(CONFIG_VIRTIO_FS) += virtiofs.o -fuse-y := dev.o dir.o file.o inode.o control.o xattr.o acl.o readdir.o -fuse-y += passthrough.o +fuse-y := dev.o dir.o file.o inode.o control.o xattr.o acl.o readdir.o passthrough.o fuse-$(CONFIG_FUSE_DAX) += dax.o +fuse-$(CONFIG_FUSE_BPF) += backing.o virtiofs-y := virtio_fs.o diff --git a/fs/fuse/backing.c b/fs/fuse/backing.c new file mode 100644 index 0000000000000000000000000000000000000000..b68f4f53497437bef982466563ef7d685bf8e9cc --- /dev/null +++ b/fs/fuse/backing.c @@ -0,0 +1,2455 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * FUSE-BPF: Filesystem in Userspace with BPF + * Copyright (c) 2021 Google LLC + */ + +#include "fuse_i.h" + +#include +#include +#include +#include + +#include "../internal.h" + +#define FUSE_BPF_IOCB_MASK (IOCB_APPEND | IOCB_DSYNC | IOCB_HIPRI | IOCB_NOWAIT | IOCB_SYNC) + +struct fuse_bpf_aio_req { + struct kiocb iocb; + refcount_t ref; + struct kiocb *iocb_orig; +}; + +static struct kmem_cache *fuse_bpf_aio_request_cachep; + +static void fuse_file_accessed(struct file *dst_file, struct file *src_file) +{ + struct inode *dst_inode; + struct inode *src_inode; + + if (dst_file->f_flags & O_NOATIME) + return; + + dst_inode = file_inode(dst_file); + src_inode = file_inode(src_file); + + if ((!timespec64_equal(&dst_inode->i_mtime, &src_inode->i_mtime) || + !timespec64_equal(&dst_inode->i_ctime, &src_inode->i_ctime))) { + dst_inode->i_mtime = src_inode->i_mtime; + dst_inode->i_ctime = src_inode->i_ctime; + } + + touch_atime(&dst_file->f_path); +} + +struct bpf_prog *fuse_get_bpf_prog(struct file *file) +{ + struct bpf_prog *bpf_prog = ERR_PTR(-EINVAL); + + if (!file || IS_ERR(file)) + return bpf_prog; + /** + * Two ways of getting a bpf prog from another task's fd, since + * bpf_prog_get_type_dev only works with an fd + * + * 1) Duplicate a little of the needed code. Requires access to + * bpf_prog_fops for validation, which is not exported for modules + * 2) Insert the bpf_file object into a fd from the current task + * Stupidly complex, but I think OK, as security checks are not run + * during the existence of the handle + * + * Best would be to upstream 1) into kernel/bpf/syscall.c and export it + * for use here. Failing that, we have to use 2, since fuse must be + * compilable as a module. + */ +#if 0 + if (file->f_op != &bpf_prog_fops) + goto out; + + bpf_prog = file->private_data; + if (bpf_prog->type == BPF_PROG_TYPE_FUSE) + bpf_prog_inc(bpf_prog); + else + bpf_prog = ERR_PTR(-EINVAL); + +#else + { + int task_fd = get_unused_fd_flags(file->f_flags); + + if (task_fd < 0) + goto out; + + fd_install(task_fd, file); + + bpf_prog = bpf_prog_get_type_dev(task_fd, BPF_PROG_TYPE_FUSE, + false); + + /* Close the fd, which also closes the file */ + __close_fd(current->files, task_fd); + file = NULL; + } +#endif + +out: + if (file) + fput(file); + return bpf_prog; +} + +int fuse_open_initialize(struct fuse_bpf_args *fa, struct fuse_open_io *foio, + struct inode *inode, struct file *file, bool isdir) +{ + foio->foi = (struct fuse_open_in) { + .flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY), + }; + + foio->foo = (struct fuse_open_out) {0}; + + *fa = (struct fuse_bpf_args) { + .nodeid = get_fuse_inode(inode)->nodeid, + .opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN, + .in_numargs = 1, + .out_numargs = 1, + .in_args[0] = (struct fuse_bpf_in_arg) { + .size = sizeof(foio->foi), + .value = &foio->foi, + }, + .out_args[0] = (struct fuse_bpf_arg) { + .size = sizeof(foio->foo), + .value = &foio->foo, + }, + }; + + return 0; +} + +int fuse_open_backing(struct fuse_bpf_args *fa, + struct inode *inode, struct file *file, bool isdir) +{ + struct fuse_mount *fm = get_fuse_mount(inode); + const struct fuse_open_in *foi = fa->in_args[0].value; + struct fuse_file *ff; + int retval; + int mask; + struct fuse_dentry *fd = get_fuse_dentry(file->f_path.dentry); + struct file *backing_file; + + ff = fuse_file_alloc(fm); + if (!ff) + return -ENOMEM; + file->private_data = ff; + + switch (foi->flags & O_ACCMODE) { + case O_RDONLY: + mask = MAY_READ; + break; + + case O_WRONLY: + mask = MAY_WRITE; + break; + + case O_RDWR: + mask = MAY_READ | MAY_WRITE; + break; + + default: + return -EINVAL; + } + + retval = inode_permission(get_fuse_inode(inode)->backing_inode, mask); + if (retval) + return retval; + + backing_file = dentry_open(&fd->backing_path, + foi->flags, + current_cred()); + + if (IS_ERR(backing_file)) { + fuse_file_free(ff); + file->private_data = NULL; + return PTR_ERR(backing_file); + } + ff->backing_file = backing_file; + + return 0; +} + +void *fuse_open_finalize(struct fuse_bpf_args *fa, + struct inode *inode, struct file *file, bool isdir) +{ + struct fuse_file *ff = file->private_data; + struct fuse_open_out *foo = fa->out_args[0].value; + + if (ff) + ff->fh = foo->fh; + return 0; +} + +int fuse_create_open_initialize( + struct fuse_bpf_args *fa, struct fuse_create_open_io *fcoio, + struct inode *dir, struct dentry *entry, + struct file *file, unsigned int flags, umode_t mode) +{ + fcoio->fci = (struct fuse_create_in) { + .flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY), + .mode = mode, + }; + + fcoio->feo = (struct fuse_entry_out) {0}; + fcoio->foo = (struct fuse_open_out) {0}; + + *fa = (struct fuse_bpf_args) { + .nodeid = get_node_id(dir), + .opcode = FUSE_CREATE, + .in_numargs = 2, + .out_numargs = 2, + .in_args[0] = (struct fuse_bpf_in_arg) { + .size = sizeof(fcoio->fci), + .value = &fcoio->fci, + }, + .in_args[1] = (struct fuse_bpf_in_arg) { + .size = entry->d_name.len + 1, + .value = entry->d_name.name, + }, + .out_args[0] = (struct fuse_bpf_arg) { + .size = sizeof(fcoio->feo), + .value = &fcoio->feo, + }, + .out_args[1] = (struct fuse_bpf_arg) { + .size = sizeof(fcoio->foo), + .value = &fcoio->foo, + }, + }; + + return 0; +} + +static int fuse_open_file_backing(struct inode *inode, struct file *file) +{ + struct fuse_mount *fm = get_fuse_mount(inode); + struct dentry *entry = file->f_path.dentry; + struct fuse_dentry *fuse_dentry = get_fuse_dentry(entry); + struct fuse_file *fuse_file; + struct file *backing_file; + + fuse_file = fuse_file_alloc(fm); + if (!fuse_file) + return -ENOMEM; + file->private_data = fuse_file; + + backing_file = dentry_open(&fuse_dentry->backing_path, file->f_flags, + current_cred()); + if (IS_ERR(backing_file)) { + fuse_file_free(fuse_file); + file->private_data = NULL; + return PTR_ERR(backing_file); + } + fuse_file->backing_file = backing_file; + + return 0; +} + +int fuse_create_open_backing( + struct fuse_bpf_args *fa, + struct inode *dir, struct dentry *entry, + struct file *file, unsigned int flags, umode_t mode) +{ + struct fuse_inode *dir_fuse_inode = get_fuse_inode(dir); + struct fuse_dentry *dir_fuse_dentry = get_fuse_dentry(entry->d_parent); + struct dentry *backing_dentry = NULL; + struct inode *inode = NULL; + struct dentry *newent; + int err = 0; + const struct fuse_create_in *fci = fa->in_args[0].value; + + if (!dir_fuse_inode || !dir_fuse_dentry) + return -EIO; + + inode_lock_nested(dir_fuse_inode->backing_inode, I_MUTEX_PARENT); + backing_dentry = lookup_one_len(fa->in_args[1].value, + dir_fuse_dentry->backing_path.dentry, + strlen(fa->in_args[1].value)); + inode_unlock(dir_fuse_inode->backing_inode); + + if (IS_ERR(backing_dentry)) + return PTR_ERR(backing_dentry); + + if (d_really_is_positive(backing_dentry)) { + err = -EIO; + goto out; + } + + err = vfs_create(dir_fuse_inode->backing_inode, backing_dentry, + fci->mode, true); + if (err) + goto out; + + if (get_fuse_dentry(entry)->backing_path.dentry) + path_put(&get_fuse_dentry(entry)->backing_path); + get_fuse_dentry(entry)->backing_path = (struct path) { + .mnt = dir_fuse_dentry->backing_path.mnt, + .dentry = backing_dentry, + }; + path_get(&get_fuse_dentry(entry)->backing_path); + + inode = fuse_iget_backing(dir->i_sb, + get_fuse_dentry(entry)->backing_path.dentry->d_inode); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + goto out; + } + + if (get_fuse_inode(inode)->bpf) + bpf_prog_put(get_fuse_inode(inode)->bpf); + get_fuse_inode(inode)->bpf = dir_fuse_inode->bpf; + if (get_fuse_inode(inode)->bpf) + bpf_prog_inc(dir_fuse_inode->bpf); + + newent = d_splice_alias(inode, entry); + if (IS_ERR(newent)) { + err = PTR_ERR(newent); + goto out; + } + + entry = newent ? newent : entry; + err = finish_open(file, entry, fuse_open_file_backing); + +out: + dput(backing_dentry); + return err; +} + +void *fuse_create_open_finalize( + struct fuse_bpf_args *fa, + struct inode *dir, struct dentry *entry, + struct file *file, unsigned int flags, umode_t mode) +{ + struct fuse_file *ff = file->private_data; + struct fuse_inode *fi = get_fuse_inode(file->f_inode); + struct fuse_entry_out *feo = fa->out_args[0].value; + struct fuse_open_out *foo = fa->out_args[1].value; + + if (fi) + fi->nodeid = feo->nodeid; + if (ff) + ff->fh = foo->fh; + return 0; +} + +int fuse_release_initialize(struct fuse_bpf_args *fa, struct fuse_release_in *fri, + struct inode *inode, struct file *file) +{ + struct fuse_file *fuse_file = file->private_data; + + /* Always put backing file whatever bpf/userspace says */ + fput(fuse_file->backing_file); + + *fri = (struct fuse_release_in) { + .fh = ((struct fuse_file *)(file->private_data))->fh, + }; + + *fa = (struct fuse_bpf_args) { + .nodeid = get_fuse_inode(inode)->nodeid, + .opcode = FUSE_RELEASE, + .in_numargs = 1, + .in_args[0].size = sizeof(*fri), + .in_args[0].value = fri, + }; + + return 0; +} + +int fuse_releasedir_initialize(struct fuse_bpf_args *fa, + struct fuse_release_in *fri, + struct inode *inode, struct file *file) +{ + struct fuse_file *fuse_file = file->private_data; + + /* Always put backing file whatever bpf/userspace says */ + fput(fuse_file->backing_file); + + *fri = (struct fuse_release_in) { + .fh = ((struct fuse_file *)(file->private_data))->fh, + }; + + *fa = (struct fuse_bpf_args) { + .nodeid = get_fuse_inode(inode)->nodeid, + .opcode = FUSE_RELEASEDIR, + .in_numargs = 1, + .in_args[0].size = sizeof(*fri), + .in_args[0].value = fri, + }; + + return 0; +} + +int fuse_release_backing(struct fuse_bpf_args *fa, + struct inode *inode, struct file *file) +{ + return 0; +} + +void *fuse_release_finalize(struct fuse_bpf_args *fa, + struct inode *inode, struct file *file) +{ + fuse_file_free(file->private_data); + return NULL; +} + +int fuse_flush_initialize(struct fuse_bpf_args *fa, struct fuse_flush_in *ffi, + struct file *file, fl_owner_t id) +{ + struct fuse_file *fuse_file = file->private_data; + + *ffi = (struct fuse_flush_in) { + .fh = fuse_file->fh, + }; + + *fa = (struct fuse_bpf_args) { + .nodeid = get_node_id(file->f_inode), + .opcode = FUSE_FLUSH, + .in_numargs = 1, + .in_args[0].size = sizeof(*ffi), + .in_args[0].value = ffi, + .flags = FUSE_BPF_FORCE, + }; + + return 0; +} + +int fuse_flush_backing(struct fuse_bpf_args *fa, struct file *file, fl_owner_t id) +{ + struct fuse_file *fuse_file = file->private_data; + struct file *backing_file = fuse_file->backing_file; + + if (backing_file->f_op->flush) + return backing_file->f_op->flush(backing_file, id); + return 0; +} + +void *fuse_flush_finalize(struct fuse_bpf_args *fa, struct file *file, fl_owner_t id) +{ + return NULL; +} + +int fuse_lseek_initialize(struct fuse_bpf_args *fa, struct fuse_lseek_io *flio, + struct file *file, loff_t offset, int whence) +{ + struct fuse_file *fuse_file = file->private_data; + + flio->fli = (struct fuse_lseek_in) { + .fh = fuse_file->fh, + .offset = offset, + .whence = whence, + }; + + *fa = (struct fuse_bpf_args) { + .nodeid = get_node_id(file->f_inode), + .opcode = FUSE_LSEEK, + .in_numargs = 1, + .in_args[0].size = sizeof(flio->fli), + .in_args[0].value = &flio->fli, + .out_numargs = 1, + .out_args[0].size = sizeof(flio->flo), + .out_args[0].value = &flio->flo, + }; + + return 0; +} + +int fuse_lseek_backing(struct fuse_bpf_args *fa, struct file *file, loff_t offset, int whence) +{ + const struct fuse_lseek_in *fli = fa->in_args[0].value; + struct fuse_lseek_out *flo = fa->out_args[0].value; + struct fuse_file *fuse_file = file->private_data; + struct file *backing_file = fuse_file->backing_file; + loff_t ret; + + /* TODO: Handle changing of the file handle */ + if (offset == 0) { + if (whence == SEEK_CUR) { + flo->offset = file->f_pos; + return flo->offset; + } + + if (whence == SEEK_SET) { + flo->offset = vfs_setpos(file, 0, 0); + return flo->offset; + } + } + + inode_lock(file->f_inode); + backing_file->f_pos = file->f_pos; + ret = vfs_llseek(backing_file, fli->offset, fli->whence); + flo->offset = ret; + inode_unlock(file->f_inode); + return ret; +} + +void *fuse_lseek_finalize(struct fuse_bpf_args *fa, struct file *file, loff_t offset, int whence) +{ + struct fuse_lseek_out *flo = fa->out_args[0].value; + + if (!fa->error_in) + file->f_pos = flo->offset; + return ERR_PTR(flo->offset); +} + +int fuse_copy_file_range_initialize(struct fuse_bpf_args *fa, struct fuse_copy_file_range_io *fcf, + struct file *file_in, loff_t pos_in, struct file *file_out, + loff_t pos_out, size_t len, unsigned int flags) +{ + struct fuse_file *fuse_file_in = file_in->private_data; + struct fuse_file *fuse_file_out = file_out->private_data; + + + fcf->fci = (struct fuse_copy_file_range_in) { + .fh_in = fuse_file_in->fh, + .off_in = pos_in, + .nodeid_out = fuse_file_out->nodeid, + .fh_out = fuse_file_out->fh, + .off_out = pos_out, + .len = len, + .flags = flags, + }; + + *fa = (struct fuse_bpf_args) { + .nodeid = get_node_id(file_in->f_inode), + .opcode = FUSE_COPY_FILE_RANGE, + .in_numargs = 1, + .in_args[0].size = sizeof(fcf->fci), + .in_args[0].value = &fcf->fci, + .out_numargs = 1, + .out_args[0].size = sizeof(fcf->fwo), + .out_args[0].value = &fcf->fwo, + }; + + return 0; +} + +int fuse_copy_file_range_backing(struct fuse_bpf_args *fa, struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, size_t len, + unsigned int flags) +{ + const struct fuse_copy_file_range_in *fci = fa->in_args[0].value; + struct fuse_file *fuse_file_in = file_in->private_data; + struct file *backing_file_in = fuse_file_in->backing_file; + struct fuse_file *fuse_file_out = file_out->private_data; + struct file *backing_file_out = fuse_file_out->backing_file; + + /* TODO: Handle changing of in/out files */ + if (backing_file_out) + return vfs_copy_file_range(backing_file_in, fci->off_in, backing_file_out, + fci->off_out, fci->len, fci->flags); + else + return generic_copy_file_range(file_in, pos_in, file_out, pos_out, len, + flags); +} + +void *fuse_copy_file_range_finalize(struct fuse_bpf_args *fa, struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, size_t len, + unsigned int flags) +{ + return NULL; +} + +int fuse_fsync_initialize(struct fuse_bpf_args *fa, struct fuse_fsync_in *ffi, + struct file *file, loff_t start, loff_t end, int datasync) +{ + struct fuse_file *fuse_file = file->private_data; + + *ffi = (struct fuse_fsync_in) { + .fh = fuse_file->fh, + .fsync_flags = datasync ? FUSE_FSYNC_FDATASYNC : 0, + }; + + *fa = (struct fuse_bpf_args) { + .nodeid = get_fuse_inode(file->f_inode)->nodeid, + .opcode = FUSE_FSYNC, + .in_numargs = 1, + .in_args[0].size = sizeof(*ffi), + .in_args[0].value = ffi, + .flags = FUSE_BPF_FORCE, + }; + + return 0; +} + +int fuse_fsync_backing(struct fuse_bpf_args *fa, + struct file *file, loff_t start, loff_t end, int datasync) +{ + struct fuse_file *fuse_file = file->private_data; + struct file *backing_file = fuse_file->backing_file; + const struct fuse_fsync_in *ffi = fa->in_args[0].value; + int new_datasync = (ffi->fsync_flags & FUSE_FSYNC_FDATASYNC) ? 1 : 0; + + return vfs_fsync(backing_file, new_datasync); +} + +void *fuse_fsync_finalize(struct fuse_bpf_args *fa, + struct file *file, loff_t start, loff_t end, int datasync) +{ + return NULL; +} + +int fuse_dir_fsync_initialize(struct fuse_bpf_args *fa, struct fuse_fsync_in *ffi, + struct file *file, loff_t start, loff_t end, int datasync) +{ + struct fuse_file *fuse_file = file->private_data; + + *ffi = (struct fuse_fsync_in) { + .fh = fuse_file->fh, + .fsync_flags = datasync ? FUSE_FSYNC_FDATASYNC : 0, + }; + + *fa = (struct fuse_bpf_args) { + .nodeid = get_fuse_inode(file->f_inode)->nodeid, + .opcode = FUSE_FSYNCDIR, + .in_numargs = 1, + .in_args[0].size = sizeof(*ffi), + .in_args[0].value = ffi, + .flags = FUSE_BPF_FORCE, + }; + + return 0; +} + +int fuse_getxattr_initialize(struct fuse_bpf_args *fa, + struct fuse_getxattr_io *fgio, + struct dentry *dentry, const char *name, void *value, + size_t size) +{ + *fgio = (struct fuse_getxattr_io) { + .fgi.size = size, + }; + + *fa = (struct fuse_bpf_args) { + .nodeid = get_fuse_inode(dentry->d_inode)->nodeid, + .opcode = FUSE_GETXATTR, + .in_numargs = 2, + .out_numargs = 1, + .in_args[0] = (struct fuse_bpf_in_arg) { + .size = sizeof(fgio->fgi), + .value = &fgio->fgi, + }, + .in_args[1] = (struct fuse_bpf_in_arg) { + .size = strlen(name) + 1, + .value = name, + }, + .flags = size ? FUSE_BPF_OUT_ARGVAR : 0, + .out_args[0].size = size ? size : sizeof(fgio->fgo), + .out_args[0].value = size ? value : &fgio->fgo, + }; + return 0; +} + +int fuse_getxattr_backing(struct fuse_bpf_args *fa, + struct dentry *dentry, const char *name, void *value, + size_t size) +{ + ssize_t ret = vfs_getxattr(get_fuse_dentry(dentry)->backing_path.dentry, + fa->in_args[1].value, value, size); + + if (fa->flags & FUSE_BPF_OUT_ARGVAR) + fa->out_args[0].size = ret; + else + ((struct fuse_getxattr_out *)fa->out_args[0].value)->size = ret; + + return 0; +} + +void *fuse_getxattr_finalize(struct fuse_bpf_args *fa, + struct dentry *dentry, const char *name, void *value, + size_t size) +{ + struct fuse_getxattr_out *fgo; + + if (fa->flags & FUSE_BPF_OUT_ARGVAR) + return ERR_PTR(fa->out_args[0].size); + + fgo = fa->out_args[0].value; + + return ERR_PTR(fgo->size); + +} + +int fuse_listxattr_initialize(struct fuse_bpf_args *fa, + struct fuse_getxattr_io *fgio, + struct dentry *dentry, char *list, size_t size) +{ + *fgio = (struct fuse_getxattr_io){ + .fgi.size = size, + }; + + *fa = (struct fuse_bpf_args){ + .nodeid = get_fuse_inode(dentry->d_inode)->nodeid, + .opcode = FUSE_LISTXATTR, + .in_numargs = 1, + .out_numargs = 1, + .in_args[0] = + (struct fuse_bpf_in_arg){ + .size = sizeof(fgio->fgi), + .value = &fgio->fgi, + }, + .flags = size ? FUSE_BPF_OUT_ARGVAR : 0, + .out_args[0].size = size ? size : sizeof(fgio->fgo), + .out_args[0].value = size ? (void *)list : &fgio->fgo, + }; + + return 0; +} + +int fuse_listxattr_backing(struct fuse_bpf_args *fa, struct dentry *dentry, + char *list, size_t size) +{ + ssize_t ret = + vfs_listxattr(get_fuse_dentry(dentry)->backing_path.dentry, + list, size); + + if (ret < 0) + return ret; + + if (fa->flags & FUSE_BPF_OUT_ARGVAR) + fa->out_args[0].size = ret; + else + ((struct fuse_getxattr_out *)fa->out_args[0].value)->size = ret; + + return ret; +} + +void *fuse_listxattr_finalize(struct fuse_bpf_args *fa, struct dentry *dentry, + char *list, size_t size) +{ + struct fuse_getxattr_out *fgo; + + if (fa->error_in) + return NULL; + + if (fa->flags & FUSE_BPF_OUT_ARGVAR) + return ERR_PTR(fa->out_args[0].size); + + fgo = fa->out_args[0].value; + return ERR_PTR(fgo->size); +} + +int fuse_setxattr_initialize(struct fuse_bpf_args *fa, + struct fuse_setxattr_in *fsxi, + struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) +{ + *fsxi = (struct fuse_setxattr_in) { + .size = size, + .flags = flags, + }; + + *fa = (struct fuse_bpf_args) { + .nodeid = get_fuse_inode(dentry->d_inode)->nodeid, + .opcode = FUSE_SETXATTR, + .in_numargs = 3, + .in_args[0] = (struct fuse_bpf_in_arg) { + .size = sizeof(*fsxi), + .value = fsxi, + }, + .in_args[1] = (struct fuse_bpf_in_arg) { + .size = strlen(name) + 1, + .value = name, + }, + .in_args[2] = (struct fuse_bpf_in_arg) { + .size = size, + .value = value, + }, + }; + + return 0; +} + +int fuse_setxattr_backing(struct fuse_bpf_args *fa, struct dentry *dentry, + const char *name, const void *value, size_t size, + int flags) +{ + return vfs_setxattr(get_fuse_dentry(dentry)->backing_path.dentry, name, + value, size, flags); +} + +void *fuse_setxattr_finalize(struct fuse_bpf_args *fa, struct dentry *dentry, + const char *name, const void *value, size_t size, + int flags) +{ + return NULL; +} + +int fuse_removexattr_initialize(struct fuse_bpf_args *fa, + struct fuse_dummy_io *unused, + struct dentry *dentry, const char *name) +{ + *fa = (struct fuse_bpf_args) { + .nodeid = get_fuse_inode(dentry->d_inode)->nodeid, + .opcode = FUSE_REMOVEXATTR, + .in_numargs = 1, + .in_args[0] = (struct fuse_bpf_in_arg) { + .size = strlen(name) + 1, + .value = name, + }, + }; + + return 0; +} + +int fuse_removexattr_backing(struct fuse_bpf_args *fa, + struct dentry *dentry, const char *name) +{ + struct path *backing_path = + &get_fuse_dentry(dentry)->backing_path; + + /* TODO account for changes of the name by prefilter */ + return vfs_removexattr(backing_path->dentry, name); +} + +void *fuse_removexattr_finalize(struct fuse_bpf_args *fa, + struct dentry *dentry, const char *name) +{ + return NULL; +} + +static inline void fuse_bpf_aio_put(struct fuse_bpf_aio_req *aio_req) +{ + if (refcount_dec_and_test(&aio_req->ref)) + kmem_cache_free(fuse_bpf_aio_request_cachep, aio_req); +} + +static void fuse_bpf_aio_cleanup_handler(struct fuse_bpf_aio_req *aio_req) +{ + struct kiocb *iocb = &aio_req->iocb; + struct kiocb *iocb_orig = aio_req->iocb_orig; + + if (iocb->ki_flags & IOCB_WRITE) { + __sb_writers_acquired(file_inode(iocb->ki_filp)->i_sb, + SB_FREEZE_WRITE); + file_end_write(iocb->ki_filp); + fuse_copyattr(iocb_orig->ki_filp, iocb->ki_filp); + } + iocb_orig->ki_pos = iocb->ki_pos; + fuse_bpf_aio_put(aio_req); +} + +static void fuse_bpf_aio_rw_complete(struct kiocb *iocb, long res, long res2) +{ + struct fuse_bpf_aio_req *aio_req = + container_of(iocb, struct fuse_bpf_aio_req, iocb); + struct kiocb *iocb_orig = aio_req->iocb_orig; + + fuse_bpf_aio_cleanup_handler(aio_req); + iocb_orig->ki_complete(iocb_orig, res, res2); +} + + +int fuse_file_read_iter_initialize( + struct fuse_bpf_args *fa, struct fuse_file_read_iter_io *fri, + struct kiocb *iocb, struct iov_iter *to) +{ + struct file *file = iocb->ki_filp; + struct fuse_file *ff = file->private_data; + + fri->fri = (struct fuse_read_in) { + .fh = ff->fh, + .offset = iocb->ki_pos, + .size = to->count, + }; + + fri->frio = (struct fuse_read_iter_out) { + .ret = fri->fri.size, + }; + + /* TODO we can't assume 'to' is a kvec */ + /* TODO we also can't assume the vector has only one component */ + *fa = (struct fuse_bpf_args) { + .opcode = FUSE_READ, + .nodeid = ff->nodeid, + .in_numargs = 1, + .in_args[0].size = sizeof(fri->fri), + .in_args[0].value = &fri->fri, + .out_numargs = 1, + .out_args[0].size = sizeof(fri->frio), + .out_args[0].value = &fri->frio, + /* + * TODO Design this properly. + * Possible approach: do not pass buf to bpf + * If going to userland, do a deep copy + * For extra credit, do that to/from the vector, rather than + * making an extra copy in the kernel + */ + }; + + return 0; +} + +int fuse_file_read_iter_backing(struct fuse_bpf_args *fa, + struct kiocb *iocb, struct iov_iter *to) +{ + struct fuse_read_iter_out *frio = fa->out_args[0].value; + struct file *file = iocb->ki_filp; + struct fuse_file *ff = file->private_data; + ssize_t ret; + + if (!iov_iter_count(to)) + return 0; + + if ((iocb->ki_flags & IOCB_DIRECT) && + (!ff->backing_file->f_mapping->a_ops || + !ff->backing_file->f_mapping->a_ops->direct_IO)) + return -EINVAL; + + /* TODO This just plain ignores any change to fuse_read_in */ + if (is_sync_kiocb(iocb)) { + ret = vfs_iter_read(ff->backing_file, to, &iocb->ki_pos, + iocb_to_rw_flags(iocb->ki_flags, FUSE_BPF_IOCB_MASK)); + } else { + struct fuse_bpf_aio_req *aio_req; + + ret = -ENOMEM; + aio_req = kmem_cache_zalloc(fuse_bpf_aio_request_cachep, GFP_KERNEL); + if (!aio_req) + goto out; + + aio_req->iocb_orig = iocb; + kiocb_clone(&aio_req->iocb, iocb, ff->backing_file); + aio_req->iocb.ki_complete = fuse_bpf_aio_rw_complete; + refcount_set(&aio_req->ref, 2); + ret = vfs_iocb_iter_read(ff->backing_file, &aio_req->iocb, to); + fuse_bpf_aio_put(aio_req); + if (ret != -EIOCBQUEUED) + fuse_bpf_aio_cleanup_handler(aio_req); + } + + frio->ret = ret; + + /* TODO Need to point value at the buffer for post-modification */ + +out: + fuse_file_accessed(file, ff->backing_file); + + return ret; +} + +void *fuse_file_read_iter_finalize(struct fuse_bpf_args *fa, + struct kiocb *iocb, struct iov_iter *to) +{ + struct fuse_read_iter_out *frio = fa->out_args[0].value; + + return ERR_PTR(frio->ret); +} + +int fuse_file_write_iter_initialize( + struct fuse_bpf_args *fa, struct fuse_file_write_iter_io *fwio, + struct kiocb *iocb, struct iov_iter *from) +{ + struct file *file = iocb->ki_filp; + struct fuse_file *ff = file->private_data; + + *fwio = (struct fuse_file_write_iter_io) { + .fwi.fh = ff->fh, + .fwi.offset = iocb->ki_pos, + .fwi.size = from->count, + }; + + /* TODO we can't assume 'from' is a kvec */ + *fa = (struct fuse_bpf_args) { + .opcode = FUSE_WRITE, + .nodeid = ff->nodeid, + .in_numargs = 2, + .in_args[0].size = sizeof(fwio->fwi), + .in_args[0].value = &fwio->fwi, + .in_args[1].size = fwio->fwi.size, + .in_args[1].value = from->kvec->iov_base, + .out_numargs = 1, + .out_args[0].size = sizeof(fwio->fwio), + .out_args[0].value = &fwio->fwio, + }; + + return 0; +} + +int fuse_file_write_iter_backing(struct fuse_bpf_args *fa, + struct kiocb *iocb, struct iov_iter *from) +{ + struct file *file = iocb->ki_filp; + struct fuse_file *ff = file->private_data; + struct fuse_write_iter_out *fwio = fa->out_args[0].value; + ssize_t ret; + + if (!iov_iter_count(from)) + return 0; + + /* TODO This just plain ignores any change to fuse_write_in */ + /* TODO uint32_t seems smaller than ssize_t.... right? */ + inode_lock(file_inode(file)); + + fuse_copyattr(file, ff->backing_file); + + if (is_sync_kiocb(iocb)) { + file_start_write(ff->backing_file); + ret = vfs_iter_write(ff->backing_file, from, &iocb->ki_pos, + iocb_to_rw_flags(iocb->ki_flags, FUSE_BPF_IOCB_MASK)); + file_end_write(ff->backing_file); + + /* Must reflect change in size of backing file to upper file */ + if (ret > 0) + fuse_copyattr(file, ff->backing_file); + } else { + struct fuse_bpf_aio_req *aio_req; + + ret = -ENOMEM; + aio_req = kmem_cache_zalloc(fuse_bpf_aio_request_cachep, GFP_KERNEL); + if (!aio_req) + goto out; + + file_start_write(ff->backing_file); + __sb_writers_release(file_inode(ff->backing_file)->i_sb, SB_FREEZE_WRITE); + aio_req->iocb_orig = iocb; + kiocb_clone(&aio_req->iocb, iocb, ff->backing_file); + aio_req->iocb.ki_complete = fuse_bpf_aio_rw_complete; + refcount_set(&aio_req->ref, 2); + ret = vfs_iocb_iter_write(ff->backing_file, &aio_req->iocb, from); + fuse_bpf_aio_put(aio_req); + if (ret != -EIOCBQUEUED) + fuse_bpf_aio_cleanup_handler(aio_req); + } + +out: + inode_unlock(file_inode(file)); + fwio->ret = ret; + if (ret < 0) + return ret; + return 0; +} + +void *fuse_file_write_iter_finalize(struct fuse_bpf_args *fa, + struct kiocb *iocb, struct iov_iter *from) +{ + struct fuse_write_iter_out *fwio = fa->out_args[0].value; + + return ERR_PTR(fwio->ret); +} + +ssize_t fuse_backing_mmap(struct file *file, struct vm_area_struct *vma) +{ + int ret; + struct fuse_file *ff = file->private_data; + struct inode *fuse_inode = file_inode(file); + struct file *backing_file = ff->backing_file; + struct inode *backing_inode = file_inode(backing_file); + + if (!backing_file->f_op->mmap) + return -ENODEV; + + if (WARN_ON(file != vma->vm_file)) + return -EIO; + + vma->vm_file = get_file(backing_file); + + ret = call_mmap(vma->vm_file, vma); + + if (ret) + fput(backing_file); + else + fput(file); + + if (file->f_flags & O_NOATIME) + return ret; + + if ((!timespec64_equal(&fuse_inode->i_mtime, + &backing_inode->i_mtime) || + !timespec64_equal(&fuse_inode->i_ctime, + &backing_inode->i_ctime))) { + fuse_inode->i_mtime = backing_inode->i_mtime; + fuse_inode->i_ctime = backing_inode->i_ctime; + } + touch_atime(&file->f_path); + + return ret; +} + +int fuse_file_fallocate_initialize(struct fuse_bpf_args *fa, + struct fuse_fallocate_in *ffi, + struct file *file, int mode, loff_t offset, loff_t length) +{ + struct fuse_file *ff = file->private_data; + + *ffi = (struct fuse_fallocate_in) { + .fh = ff->fh, + .offset = offset, + .length = length, + .mode = mode + }; + + *fa = (struct fuse_bpf_args) { + .opcode = FUSE_FALLOCATE, + .nodeid = ff->nodeid, + .in_numargs = 1, + .in_args[0].size = sizeof(*ffi), + .in_args[0].value = ffi, + }; + + return 0; +} + +int fuse_file_fallocate_backing(struct fuse_bpf_args *fa, + struct file *file, int mode, loff_t offset, loff_t length) +{ + const struct fuse_fallocate_in *ffi = fa->in_args[0].value; + struct fuse_file *ff = file->private_data; + + return vfs_fallocate(ff->backing_file, ffi->mode, ffi->offset, + ffi->length); +} + +void *fuse_file_fallocate_finalize(struct fuse_bpf_args *fa, + struct file *file, int mode, loff_t offset, loff_t length) +{ + return NULL; +} + +/******************************************************************************* + * Directory operations after here * + ******************************************************************************/ + +int fuse_lookup_initialize(struct fuse_bpf_args *fa, struct fuse_lookup_io *fli, + struct inode *dir, struct dentry *entry, unsigned int flags) +{ + *fa = (struct fuse_bpf_args) { + .nodeid = get_fuse_inode(dir)->nodeid, + .opcode = FUSE_LOOKUP, + .in_numargs = 1, + .out_numargs = 2, + .flags = FUSE_BPF_OUT_ARGVAR, + .in_args[0] = (struct fuse_bpf_in_arg) { + .size = entry->d_name.len + 1, + .value = entry->d_name.name, + }, + .out_args[0] = (struct fuse_bpf_arg) { + .size = sizeof(fli->feo), + .value = &fli->feo, + }, + .out_args[1] = (struct fuse_bpf_arg) { + .size = sizeof(fli->feb.out), + .value = &fli->feb.out, + }, + }; + + return 0; +} + +int fuse_lookup_backing(struct fuse_bpf_args *fa, struct inode *dir, + struct dentry *entry, unsigned int flags) +{ + struct fuse_dentry *fuse_entry = get_fuse_dentry(entry); + struct fuse_dentry *dir_fuse_entry = get_fuse_dentry(entry->d_parent); + struct dentry *dir_backing_entry = dir_fuse_entry->backing_path.dentry; + struct inode *dir_backing_inode = dir_backing_entry->d_inode; + struct dentry *backing_entry; + + /* TODO this will not handle lookups over mount points */ + inode_lock_nested(dir_backing_inode, I_MUTEX_PARENT); + backing_entry = lookup_one_len(entry->d_name.name, dir_backing_entry, + strlen(entry->d_name.name)); + inode_unlock(dir_backing_inode); + + if (IS_ERR(backing_entry)) + return PTR_ERR(backing_entry); + + fuse_entry->backing_path = (struct path) { + .dentry = backing_entry, + .mnt = dir_fuse_entry->backing_path.mnt, + }; + + mntget(fuse_entry->backing_path.mnt); + return 0; +} + +struct dentry *fuse_lookup_finalize(struct fuse_bpf_args *fa, struct inode *dir, + struct dentry *entry, unsigned int flags) +{ + struct fuse_dentry *fd; + struct dentry *bd; + struct inode *inode, *backing_inode; + struct fuse_entry_out *feo = fa->out_args[0].value; + struct fuse_entry_bpf_out *febo = fa->out_args[1].value; + struct fuse_entry_bpf *feb = container_of(febo, struct fuse_entry_bpf, out); + + fd = get_fuse_dentry(entry); + if (!fd) + return ERR_PTR(-EIO); + bd = fd->backing_path.dentry; + if (!bd) + return ERR_PTR(-ENOENT); + backing_inode = bd->d_inode; + if (!backing_inode) + return 0; + + inode = fuse_iget_backing(dir->i_sb, backing_inode); + + if (IS_ERR(inode)) + return ERR_PTR(PTR_ERR(inode)); + + /* TODO Make sure this handles invalid handles */ + /* TODO Do we need the same code in revalidate */ + if (get_fuse_inode(inode)->bpf) { + bpf_prog_put(get_fuse_inode(inode)->bpf); + get_fuse_inode(inode)->bpf = NULL; + } + + switch (febo->bpf_action) { + case FUSE_ACTION_KEEP: + get_fuse_inode(inode)->bpf = get_fuse_inode(dir)->bpf; + if (get_fuse_inode(inode)->bpf) + bpf_prog_inc(get_fuse_inode(inode)->bpf); + break; + + case FUSE_ACTION_REMOVE: + get_fuse_inode(inode)->bpf = NULL; + break; + + case FUSE_ACTION_REPLACE: { + struct file *bpf_file = feb->bpf_file; + struct bpf_prog *bpf_prog = ERR_PTR(-EINVAL); + + if (bpf_file && !IS_ERR(bpf_file)) + bpf_prog = fuse_get_bpf_prog(bpf_file); + + if (IS_ERR(bpf_prog)) + return ERR_PTR(PTR_ERR(bpf_prog)); + + get_fuse_inode(inode)->bpf = bpf_prog; + break; + } + + default: + return ERR_PTR(-EIO); + } + + switch (febo->backing_action) { + case FUSE_ACTION_KEEP: + /* backing inode/path are added in fuse_lookup_backing */ + break; + + case FUSE_ACTION_REMOVE: + iput(get_fuse_inode(inode)->backing_inode); + get_fuse_inode(inode)->backing_inode = NULL; + path_put_init(&get_fuse_dentry(entry)->backing_path); + break; + + case FUSE_ACTION_REPLACE: { + struct fuse_conn *fc; + struct file *backing_file; + + fc = get_fuse_mount(dir)->fc; + backing_file = feb->backing_file; + if (!backing_file || IS_ERR(backing_file)) + return ERR_PTR(-EIO); + + iput(get_fuse_inode(inode)->backing_inode); + get_fuse_inode(inode)->backing_inode = + backing_file->f_inode; + ihold(get_fuse_inode(inode)->backing_inode); + + path_put(&get_fuse_dentry(entry)->backing_path); + get_fuse_dentry(entry)->backing_path = backing_file->f_path; + path_get(&get_fuse_dentry(entry)->backing_path); + + fput(backing_file); + break; + } + + default: + return ERR_PTR(-EIO); + } + + get_fuse_inode(inode)->nodeid = feo->nodeid; + + return d_splice_alias(inode, entry); +} + +int fuse_revalidate_backing(struct fuse_bpf_args *fa, struct inode *dir, + struct dentry *entry, unsigned int flags) +{ + struct fuse_dentry *fuse_dentry = get_fuse_dentry(entry); + struct dentry *backing_entry = fuse_dentry->backing_path.dentry; + + spin_lock(&backing_entry->d_lock); + if (d_unhashed(backing_entry)) { + spin_unlock(&backing_entry->d_lock); + return 0; + } + spin_unlock(&backing_entry->d_lock); + + if (unlikely(backing_entry->d_flags & DCACHE_OP_REVALIDATE)) + return backing_entry->d_op->d_revalidate(backing_entry, flags); + return 1; +} + +void *fuse_revalidate_finalize(struct fuse_bpf_args *fa, struct inode *dir, + struct dentry *entry, unsigned int flags) +{ + return 0; +} + +int fuse_canonical_path_initialize(struct fuse_bpf_args *fa, + struct fuse_dummy_io *fdi, + const struct path *path, + struct path *canonical_path) +{ + fa->opcode = FUSE_CANONICAL_PATH; + return 0; +} + +int fuse_canonical_path_backing(struct fuse_bpf_args *fa, const struct path *path, + struct path *canonical_path) +{ + get_fuse_backing_path(path->dentry, canonical_path); + return 0; +} + +void *fuse_canonical_path_finalize(struct fuse_bpf_args *fa, + const struct path *path, + struct path *canonical_path) +{ + return NULL; +} + +int fuse_mknod_initialize( + struct fuse_bpf_args *fa, struct fuse_mknod_in *fmi, + struct inode *dir, struct dentry *entry, umode_t mode, dev_t rdev) +{ + *fmi = (struct fuse_mknod_in) { + .mode = mode, + .rdev = new_encode_dev(rdev), + .umask = current_umask(), + }; + *fa = (struct fuse_bpf_args) { + .nodeid = get_node_id(dir), + .opcode = FUSE_MKNOD, + .in_numargs = 2, + .in_args[0] = (struct fuse_bpf_in_arg) { + .size = sizeof(*fmi), + .value = fmi, + }, + .in_args[1] = (struct fuse_bpf_in_arg) { + .size = entry->d_name.len + 1, + .value = entry->d_name.name, + }, + }; + + return 0; +} + +int fuse_mknod_backing( + struct fuse_bpf_args *fa, + struct inode *dir, struct dentry *entry, umode_t mode, dev_t rdev) +{ + int err = 0; + const struct fuse_mknod_in *fmi = fa->in_args[0].value; + struct inode *backing_inode = get_fuse_inode(dir)->backing_inode; + struct path backing_path = {}; + struct inode *inode = NULL; + + //TODO Actually deal with changing the backing entry in mknod + get_fuse_backing_path(entry, &backing_path); + if (!backing_path.dentry) + return -EBADF; + + inode_lock_nested(backing_inode, I_MUTEX_PARENT); + mode = fmi->mode; + if (!IS_POSIXACL(backing_inode)) + mode &= ~fmi->umask; + err = vfs_mknod(backing_inode, backing_path.dentry, + mode, new_decode_dev(fmi->rdev)); + inode_unlock(backing_inode); + if (err) + goto out; + if (d_really_is_negative(backing_path.dentry) || + unlikely(d_unhashed(backing_path.dentry))) { + err = -EINVAL; + /** + * TODO: overlayfs responds to this situation with a + * lookupOneLen. Should we do that too? + */ + goto out; + } + inode = fuse_iget_backing(dir->i_sb, backing_inode); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + goto out; + } + d_instantiate(entry, inode); +out: + path_put(&backing_path); + return err; +} + +void *fuse_mknod_finalize( + struct fuse_bpf_args *fa, + struct inode *dir, struct dentry *entry, umode_t mode, dev_t rdev) +{ + return NULL; +} + +int fuse_mkdir_initialize( + struct fuse_bpf_args *fa, struct fuse_mkdir_in *fmi, + struct inode *dir, struct dentry *entry, umode_t mode) +{ + *fmi = (struct fuse_mkdir_in) { + .mode = mode, + .umask = current_umask(), + }; + *fa = (struct fuse_bpf_args) { + .nodeid = get_node_id(dir), + .opcode = FUSE_MKDIR, + .in_numargs = 2, + .in_args[0] = (struct fuse_bpf_in_arg) { + .size = sizeof(*fmi), + .value = fmi, + }, + .in_args[1] = (struct fuse_bpf_in_arg) { + .size = entry->d_name.len + 1, + .value = entry->d_name.name, + }, + }; + + return 0; +} + +int fuse_mkdir_backing( + struct fuse_bpf_args *fa, + struct inode *dir, struct dentry *entry, umode_t mode) +{ + int err = 0; + const struct fuse_mkdir_in *fmi = fa->in_args[0].value; + struct inode *backing_inode = get_fuse_inode(dir)->backing_inode; + struct path backing_path = {}; + struct inode *inode = NULL; + struct dentry *d; + + //TODO Actually deal with changing the backing entry in mkdir + get_fuse_backing_path(entry, &backing_path); + if (!backing_path.dentry) + return -EBADF; + + inode_lock_nested(backing_inode, I_MUTEX_PARENT); + mode = fmi->mode; + if (!IS_POSIXACL(backing_inode)) + mode &= ~fmi->umask; + err = vfs_mkdir(backing_inode, backing_path.dentry, mode); + if (err) + goto out; + if (d_really_is_negative(backing_path.dentry) || + unlikely(d_unhashed(backing_path.dentry))) { + d = lookup_one_len(entry->d_name.name, backing_path.dentry->d_parent, + entry->d_name.len); + if (IS_ERR(d)) { + err = PTR_ERR(d); + goto out; + } + dput(backing_path.dentry); + backing_path.dentry = d; + } + inode = fuse_iget_backing(dir->i_sb, backing_inode); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + goto out; + } + d_instantiate(entry, inode); +out: + inode_unlock(backing_inode); + path_put(&backing_path); + return err; +} + +void *fuse_mkdir_finalize( + struct fuse_bpf_args *fa, + struct inode *dir, struct dentry *entry, umode_t mode) +{ + return NULL; +} + +int fuse_rmdir_initialize( + struct fuse_bpf_args *fa, struct fuse_dummy_io *dummy, + struct inode *dir, struct dentry *entry) +{ + *fa = (struct fuse_bpf_args) { + .nodeid = get_node_id(dir), + .opcode = FUSE_RMDIR, + .in_numargs = 1, + .in_args[0] = (struct fuse_bpf_in_arg) { + .size = entry->d_name.len + 1, + .value = entry->d_name.name, + }, + }; + + return 0; +} + +int fuse_rmdir_backing( + struct fuse_bpf_args *fa, + struct inode *dir, struct dentry *entry) +{ + int err = 0; + struct path backing_path = {}; + struct dentry *backing_parent_dentry; + struct inode *backing_inode; + + /* TODO Actually deal with changing the backing entry in rmdir */ + get_fuse_backing_path(entry, &backing_path); + if (!backing_path.dentry) + return -EBADF; + + /* TODO Not sure if we should reverify like overlayfs, or get inode from d_parent */ + backing_parent_dentry = dget_parent(backing_path.dentry); + backing_inode = d_inode(backing_parent_dentry); + + inode_lock_nested(backing_inode, I_MUTEX_PARENT); + err = vfs_rmdir(backing_inode, backing_path.dentry); + inode_unlock(backing_inode); + + dput(backing_parent_dentry); + if (!err) + d_drop(entry); + path_put(&backing_path); + return err; +} + +void *fuse_rmdir_finalize( + struct fuse_bpf_args *fa, + struct inode *dir, struct dentry *entry) +{ + return NULL; +} + +static int fuse_rename_backing_common( + struct inode *olddir, struct dentry *oldent, + struct inode *newdir, struct dentry *newent, + unsigned int flags) +{ + int err = 0; + struct path old_backing_path; + struct path new_backing_path; + struct dentry *old_backing_dir_dentry; + struct dentry *old_backing_dentry; + struct dentry *new_backing_dir_dentry; + struct dentry *new_backing_dentry; + struct dentry *trap = NULL; + struct inode *target_inode; + + //TODO Actually deal with changing anything that isn't a flag + get_fuse_backing_path(oldent, &old_backing_path); + if (!old_backing_path.dentry) + return -EBADF; + get_fuse_backing_path(newent, &new_backing_path); + if (!new_backing_path.dentry) { + /* + * TODO A file being moved from a backing path to another + * backing path which is not yet instrumented with FUSE-BPF. + * This may be slow and should be substituted with something + * more clever. + */ + err = -EXDEV; + goto put_old_path; + } + if (new_backing_path.mnt != old_backing_path.mnt) { + err = -EXDEV; + goto put_new_path; + } + old_backing_dentry = old_backing_path.dentry; + new_backing_dentry = new_backing_path.dentry; + old_backing_dir_dentry = dget_parent(old_backing_dentry); + new_backing_dir_dentry = dget_parent(new_backing_dentry); + target_inode = d_inode(newent); + + trap = lock_rename(old_backing_dir_dentry, new_backing_dir_dentry); + if (trap == old_backing_dentry) { + err = -EINVAL; + goto put_parents; + } + if (trap == new_backing_dentry) { + err = -ENOTEMPTY; + goto put_parents; + } + err = vfs_rename(d_inode(old_backing_dir_dentry), old_backing_dentry, + d_inode(new_backing_dir_dentry), new_backing_dentry, + NULL, flags); + if (err) + goto unlock; + if (target_inode) + fsstack_copy_attr_all(target_inode, + get_fuse_inode(target_inode)->backing_inode); + fsstack_copy_attr_all(d_inode(oldent), d_inode(old_backing_dentry)); +unlock: + unlock_rename(old_backing_dir_dentry, new_backing_dir_dentry); +put_parents: + dput(new_backing_dir_dentry); + dput(old_backing_dir_dentry); +put_new_path: + path_put(&new_backing_path); +put_old_path: + path_put(&old_backing_path); + return err; +} + +int fuse_rename2_initialize(struct fuse_bpf_args *fa, struct fuse_rename2_in *fri, + struct inode *olddir, struct dentry *oldent, + struct inode *newdir, struct dentry *newent, + unsigned int flags) +{ + *fri = (struct fuse_rename2_in) { + .newdir = get_node_id(newdir), + .flags = flags, + }; + *fa = (struct fuse_bpf_args) { + .nodeid = get_node_id(olddir), + .opcode = FUSE_RENAME2, + .in_numargs = 3, + .in_args[0] = (struct fuse_bpf_in_arg) { + .size = sizeof(*fri), + .value = fri, + }, + .in_args[1] = (struct fuse_bpf_in_arg) { + .size = oldent->d_name.len + 1, + .value = oldent->d_name.name, + }, + .in_args[2] = (struct fuse_bpf_in_arg) { + .size = newent->d_name.len + 1, + .value = newent->d_name.name, + }, + }; + + return 0; +} + +int fuse_rename2_backing(struct fuse_bpf_args *fa, + struct inode *olddir, struct dentry *oldent, + struct inode *newdir, struct dentry *newent, + unsigned int flags) +{ + const struct fuse_rename2_in *fri = fa->in_args[0].value; + + /* TODO: deal with changing dirs/ents */ + return fuse_rename_backing_common(olddir, oldent, newdir, newent, fri->flags); +} + +void *fuse_rename2_finalize(struct fuse_bpf_args *fa, + struct inode *olddir, struct dentry *oldent, + struct inode *newdir, struct dentry *newent, + unsigned int flags) +{ + return NULL; +} + +int fuse_rename_initialize(struct fuse_bpf_args *fa, struct fuse_rename_in *fri, + struct inode *olddir, struct dentry *oldent, + struct inode *newdir, struct dentry *newent) +{ + *fri = (struct fuse_rename_in) { + .newdir = get_node_id(newdir), + }; + *fa = (struct fuse_bpf_args) { + .nodeid = get_node_id(olddir), + .opcode = FUSE_RENAME, + .in_numargs = 3, + .in_args[0] = (struct fuse_bpf_in_arg) { + .size = sizeof(*fri), + .value = fri, + }, + .in_args[1] = (struct fuse_bpf_in_arg) { + .size = oldent->d_name.len + 1, + .value = oldent->d_name.name, + }, + .in_args[2] = (struct fuse_bpf_in_arg) { + .size = newent->d_name.len + 1, + .value = newent->d_name.name, + }, + }; + + return 0; +} + +int fuse_rename_backing(struct fuse_bpf_args *fa, + struct inode *olddir, struct dentry *oldent, + struct inode *newdir, struct dentry *newent) +{ + /* TODO: deal with changing dirs/ents */ + return fuse_rename_backing_common(olddir, oldent, newdir, newent, 0); +} + +void *fuse_rename_finalize(struct fuse_bpf_args *fa, + struct inode *olddir, struct dentry *oldent, + struct inode *newdir, struct dentry *newent) +{ + return NULL; +} + +int fuse_unlink_initialize( + struct fuse_bpf_args *fa, struct fuse_dummy_io *dummy, + struct inode *dir, struct dentry *entry) +{ + *fa = (struct fuse_bpf_args) { + .nodeid = get_node_id(dir), + .opcode = FUSE_UNLINK, + .in_numargs = 1, + .in_args[0] = (struct fuse_bpf_in_arg) { + .size = entry->d_name.len + 1, + .value = entry->d_name.name, + }, + }; + + return 0; +} + +int fuse_unlink_backing( + struct fuse_bpf_args *fa, + struct inode *dir, struct dentry *entry) +{ + int err = 0; + struct path backing_path = {}; + struct dentry *backing_parent_dentry; + struct inode *backing_inode; + + /* TODO Actually deal with changing the backing entry in unlink */ + get_fuse_backing_path(entry, &backing_path); + if (!backing_path.dentry) + return -EBADF; + + /* TODO Not sure if we should reverify like overlayfs, or get inode from d_parent */ + backing_parent_dentry = dget_parent(backing_path.dentry); + backing_inode = d_inode(backing_parent_dentry); + + inode_lock_nested(backing_inode, I_MUTEX_PARENT); + err = vfs_unlink(backing_inode, backing_path.dentry, NULL); + inode_unlock(backing_inode); + + dput(backing_parent_dentry); + if (!err) + d_drop(entry); + path_put(&backing_path); + return err; +} + +void *fuse_unlink_finalize( + struct fuse_bpf_args *fa, + struct inode *dir, struct dentry *entry) +{ + return NULL; +} + +int fuse_link_initialize(struct fuse_bpf_args *fa, struct fuse_link_in *fli, + struct dentry *entry, struct inode *dir, + struct dentry *newent) +{ + struct inode *src_inode = entry->d_inode; + + *fli = (struct fuse_link_in){ + .oldnodeid = get_node_id(src_inode), + }; + + fa->opcode = FUSE_LINK; + fa->in_numargs = 2; + fa->in_args[0].size = sizeof(*fli); + fa->in_args[0].value = fli; + fa->in_args[1].size = newent->d_name.len + 1; + fa->in_args[1].value = newent->d_name.name; + + return 0; +} + +int fuse_link_backing(struct fuse_bpf_args *fa, struct dentry *entry, + struct inode *dir, struct dentry *newent) +{ + int err = 0; + struct path backing_old_path = {}; + struct path backing_new_path = {}; + struct dentry *backing_dir_dentry; + struct inode *fuse_new_inode = NULL; + struct inode *backing_dir_inode = get_fuse_inode(dir)->backing_inode; + + get_fuse_backing_path(entry, &backing_old_path); + if (!backing_old_path.dentry) + return -EBADF; + + get_fuse_backing_path(newent, &backing_new_path); + if (!backing_new_path.dentry) { + err = -EBADF; + goto err_dst_path; + } + + backing_dir_dentry = dget_parent(backing_new_path.dentry); + backing_dir_inode = d_inode(backing_dir_dentry); + + inode_lock_nested(backing_dir_inode, I_MUTEX_PARENT); + err = vfs_link(backing_old_path.dentry, backing_dir_inode, backing_new_path.dentry, NULL); + inode_unlock(backing_dir_inode); + if (err) + goto out; + + if (d_really_is_negative(backing_new_path.dentry) || + unlikely(d_unhashed(backing_new_path.dentry))) { + err = -EINVAL; + /** + * TODO: overlayfs responds to this situation with a + * lookupOneLen. Should we do that too? + */ + goto out; + } + + fuse_new_inode = fuse_iget_backing(dir->i_sb, backing_dir_inode); + if (IS_ERR(fuse_new_inode)) { + err = PTR_ERR(fuse_new_inode); + goto out; + } + d_instantiate(newent, fuse_new_inode); + +out: + dput(backing_dir_dentry); + path_put(&backing_new_path); +err_dst_path: + path_put(&backing_old_path); + return err; +} + +void *fuse_link_finalize(struct fuse_bpf_args *fa, struct dentry *entry, + struct inode *dir, struct dentry *newent) +{ + return NULL; +} + +int fuse_getattr_initialize(struct fuse_bpf_args *fa, struct fuse_getattr_io *fgio, + const struct dentry *entry, struct kstat *stat, + u32 request_mask, unsigned int flags) +{ + fgio->fgi = (struct fuse_getattr_in) { + .getattr_flags = flags, + .fh = -1, /* TODO is this OK? */ + }; + + fgio->fao = (struct fuse_attr_out) {0}; + + *fa = (struct fuse_bpf_args) { + .nodeid = get_node_id(entry->d_inode), + .opcode = FUSE_GETATTR, + .in_numargs = 1, + .out_numargs = 1, + .in_args[0] = (struct fuse_bpf_in_arg) { + .size = sizeof(fgio->fgi), + .value = &fgio->fgi, + }, + .out_args[0] = (struct fuse_bpf_arg) { + .size = sizeof(fgio->fao), + .value = &fgio->fao, + }, + }; + + return 0; +} + +static void fuse_stat_to_attr(struct fuse_conn *fc, struct inode *inode, + struct kstat *stat, struct fuse_attr *attr) +{ + unsigned int blkbits; + + /* see the comment in fuse_change_attributes() */ + if (fc->writeback_cache && S_ISREG(inode->i_mode)) { + stat->size = i_size_read(inode); + stat->mtime.tv_sec = inode->i_mtime.tv_sec; + stat->mtime.tv_nsec = inode->i_mtime.tv_nsec; + stat->ctime.tv_sec = inode->i_ctime.tv_sec; + stat->ctime.tv_nsec = inode->i_ctime.tv_nsec; + } + + attr->ino = stat->ino; + attr->mode = (inode->i_mode & S_IFMT) | (stat->mode & 07777); + attr->nlink = stat->nlink; + attr->uid = from_kuid(fc->user_ns, stat->uid); + attr->gid = from_kgid(fc->user_ns, stat->gid); + attr->atime = stat->atime.tv_sec; + attr->atimensec = stat->atime.tv_nsec; + attr->mtime = stat->mtime.tv_sec; + attr->mtimensec = stat->mtime.tv_nsec; + attr->ctime = stat->ctime.tv_sec; + attr->ctimensec = stat->ctime.tv_nsec; + attr->size = stat->size; + attr->blocks = stat->blocks; + + if (stat->blksize != 0) + blkbits = ilog2(stat->blksize); + else + blkbits = inode->i_sb->s_blocksize_bits; + + attr->blksize = 1 << blkbits; +} + +int fuse_getattr_backing(struct fuse_bpf_args *fa, + const struct dentry *entry, struct kstat *stat, + u32 request_mask, unsigned int flags) +{ + struct path *backing_path = + &get_fuse_dentry(entry)->backing_path; + struct inode *backing_inode = backing_path->dentry->d_inode; + struct fuse_attr_out *fao = fa->out_args[0].value; + struct kstat tmp; + int err; + + if (!stat) + stat = &tmp; + + err = vfs_getattr(backing_path, stat, request_mask, flags); + + if (!err) + fuse_stat_to_attr(get_fuse_conn(entry->d_inode), + backing_inode, stat, &fao->attr); + + return err; +} + +void *fuse_getattr_finalize(struct fuse_bpf_args *fa, + const struct dentry *entry, struct kstat *stat, + u32 request_mask, unsigned int flags) +{ + struct fuse_attr_out *outarg = fa->out_args[0].value; + struct inode *inode = entry->d_inode; + u64 attr_version = fuse_get_attr_version(get_fuse_mount(inode)->fc); + int err = 0; + + /* TODO: Ensure this doesn't happen if we had an error getting attrs in + * backing. + */ + err = finalize_attr(inode, outarg, attr_version, stat); + return ERR_PTR(err); +} + +static void fattr_to_iattr(struct fuse_conn *fc, + const struct fuse_setattr_in *arg, + struct iattr *iattr) +{ + unsigned int fvalid = arg->valid; + + if (fvalid & FATTR_MODE) + iattr->ia_valid |= ATTR_MODE, iattr->ia_mode = arg->mode; + if (fvalid & FATTR_UID) { + iattr->ia_valid |= ATTR_UID; + iattr->ia_uid = make_kuid(fc->user_ns, arg->uid); + } + if (fvalid & FATTR_GID) { + iattr->ia_valid |= ATTR_GID; + iattr->ia_gid = make_kgid(fc->user_ns, arg->gid); + } + if (fvalid & FATTR_SIZE) + iattr->ia_valid |= ATTR_SIZE, iattr->ia_size = arg->size; + if (fvalid & FATTR_ATIME) { + iattr->ia_valid |= ATTR_ATIME; + iattr->ia_atime.tv_sec = arg->atime; + iattr->ia_atime.tv_nsec = arg->atimensec; + if (!(fvalid & FATTR_ATIME_NOW)) + iattr->ia_valid |= ATTR_ATIME_SET; + } + if (fvalid & FATTR_MTIME) { + iattr->ia_valid |= ATTR_MTIME; + iattr->ia_mtime.tv_sec = arg->mtime; + iattr->ia_mtime.tv_nsec = arg->mtimensec; + if (!(fvalid & FATTR_MTIME_NOW)) + iattr->ia_valid |= ATTR_MTIME_SET; + } + if (fvalid & FATTR_CTIME) { + iattr->ia_valid |= ATTR_CTIME; + iattr->ia_ctime.tv_sec = arg->ctime; + iattr->ia_ctime.tv_nsec = arg->ctimensec; + } +} + +int fuse_setattr_initialize(struct fuse_bpf_args *fa, struct fuse_setattr_io *fsio, + struct dentry *dentry, struct iattr *attr, struct file *file) +{ + struct fuse_conn *fc = get_fuse_conn(dentry->d_inode); + + *fsio = (struct fuse_setattr_io) {0}; + iattr_to_fattr(fc, attr, &fsio->fsi, true); + + *fa = (struct fuse_bpf_args) { + .opcode = FUSE_SETATTR, + .nodeid = get_node_id(dentry->d_inode), + .in_numargs = 1, + .in_args[0].size = sizeof(fsio->fsi), + .in_args[0].value = &fsio->fsi, + .out_numargs = 1, + .out_args[0].size = sizeof(fsio->fao), + .out_args[0].value = &fsio->fao, + }; + + return 0; +} + +int fuse_setattr_backing(struct fuse_bpf_args *fa, + struct dentry *dentry, struct iattr *attr, struct file *file) +{ + struct fuse_conn *fc = get_fuse_conn(dentry->d_inode); + const struct fuse_setattr_in *fsi = fa->in_args[0].value; + struct iattr new_attr = {0}; + struct path *backing_path = &get_fuse_dentry(dentry)->backing_path; + int res; + + fattr_to_iattr(fc, fsi, &new_attr); + /* TODO: Some info doesn't get saved by the attr->fattr->attr transition + * When we actually allow the bpf to change these, we may have to consider + * the extra flags more, or pass more info into the bpf. Until then we can + * keep everything except for ATTR_FILE, since we'd need a file on the + * lower fs. For what it's worth, neither f2fs nor ext4 make use of that + * even if it is present. + */ + new_attr.ia_valid = attr->ia_valid & ~ATTR_FILE; + inode_lock(d_inode(backing_path->dentry)); + res = notify_change(backing_path->dentry, &new_attr, NULL); + inode_unlock(d_inode(backing_path->dentry)); + + if (res == 0 && (new_attr.ia_valid & ATTR_SIZE)) + i_size_write(dentry->d_inode, new_attr.ia_size); + return res; +} + +void *fuse_setattr_finalize(struct fuse_bpf_args *fa, + struct dentry *dentry, struct iattr *attr, struct file *file) +{ + return NULL; +} + +int fuse_statfs_initialize( + struct fuse_bpf_args *fa, struct fuse_statfs_out *fso, + struct dentry *dentry, struct kstatfs *buf) +{ + *fso = (struct fuse_statfs_out) {0}; + *fa = (struct fuse_bpf_args) { + .nodeid = get_node_id(d_inode(dentry)), + .opcode = FUSE_STATFS, + .out_numargs = 1, + .out_numargs = 1, + .out_args[0].size = sizeof(fso), + .out_args[0].value = fso, + }; + + return 0; +} + +int fuse_statfs_backing( + struct fuse_bpf_args *fa, + struct dentry *dentry, struct kstatfs *buf) +{ + int err = 0; + struct path backing_path; + struct fuse_statfs_out *fso = fa->out_args[0].value; + + get_fuse_backing_path(dentry, &backing_path); + if (!backing_path.dentry) + return -EBADF; + err = vfs_statfs(&backing_path, buf); + path_put(&backing_path); + buf->f_type = FUSE_SUPER_MAGIC; + + //TODO Provide postfilter opportunity to modify + if (!err) + convert_statfs_to_fuse(&fso->st, buf); + + return err; +} + +void *fuse_statfs_finalize( + struct fuse_bpf_args *fa, + struct dentry *dentry, struct kstatfs *buf) +{ + struct fuse_statfs_out *fso = fa->out_args[0].value; + + if (!fa->error_in) + convert_fuse_statfs(buf, &fso->st); + return NULL; +} + +int fuse_get_link_initialize(struct fuse_bpf_args *fa, struct fuse_dummy_io *unused, + struct inode *inode, struct dentry *dentry, + struct delayed_call *callback, const char **out) +{ + /* + * TODO + * If we want to handle changing these things, we'll need to copy + * the lower fs's data into our own buffer, and provide our own callback + * to free that buffer. + * + * Pre could change the name we're looking at + * postfilter can change the name we return + * + * We ought to only make that buffer if it's been requested, so leaving + * this unimplemented for the moment + */ + *fa = (struct fuse_bpf_args) { + .opcode = FUSE_READLINK, + .nodeid = get_node_id(inode), + .in_numargs = 1, + .in_args[0] = (struct fuse_bpf_in_arg) { + .size = dentry->d_name.len + 1, + .value = dentry->d_name.name, + }, + /* + * .out_argvar = 1, + * .out_numargs = 1, + * .out_args[0].size = , + * .out_args[0].value = , + */ + }; + + return 0; +} + +int fuse_get_link_backing(struct fuse_bpf_args *fa, + struct inode *inode, struct dentry *dentry, + struct delayed_call *callback, const char **out) +{ + struct path backing_path; + + if (!dentry) { + *out = ERR_PTR(-ECHILD); + return PTR_ERR(*out); + } + + get_fuse_backing_path(dentry, &backing_path); + if (!backing_path.dentry) { + *out = ERR_PTR(-ECHILD); + return PTR_ERR(*out); + } + + /* + * TODO: If we want to do our own thing, copy the data and then call the + * callback + */ + *out = vfs_get_link(backing_path.dentry, callback); + + path_put(&backing_path); + return 0; +} + +void *fuse_get_link_finalize(struct fuse_bpf_args *fa, + struct inode *inode, struct dentry *dentry, + struct delayed_call *callback, const char **out) +{ + return NULL; +} + +int fuse_symlink_initialize( + struct fuse_bpf_args *fa, struct fuse_dummy_io *unused, + struct inode *dir, struct dentry *entry, const char *link, int len) +{ + *fa = (struct fuse_bpf_args) { + .nodeid = get_node_id(dir), + .opcode = FUSE_SYMLINK, + .in_numargs = 2, + .in_args[0] = (struct fuse_bpf_in_arg) { + .size = entry->d_name.len + 1, + .value = entry->d_name.name, + }, + .in_args[1] = (struct fuse_bpf_in_arg) { + .size = len, + .value = link, + }, + }; + + return 0; +} + +int fuse_symlink_backing( + struct fuse_bpf_args *fa, + struct inode *dir, struct dentry *entry, const char *link, int len) +{ + int err = 0; + struct inode *backing_inode = get_fuse_inode(dir)->backing_inode; + struct path backing_path = {}; + struct inode *inode = NULL; + + //TODO Actually deal with changing the backing entry in symlink + get_fuse_backing_path(entry, &backing_path); + if (!backing_path.dentry) + return -EBADF; + + inode_lock_nested(backing_inode, I_MUTEX_PARENT); + err = vfs_symlink(backing_inode, backing_path.dentry, link); + inode_unlock(backing_inode); + if (err) + goto out; + if (d_really_is_negative(backing_path.dentry) || + unlikely(d_unhashed(backing_path.dentry))) { + err = -EINVAL; + /** + * TODO: overlayfs responds to this situation with a + * lookupOneLen. Should we do that too? + */ + goto out; + } + inode = fuse_iget_backing(dir->i_sb, backing_inode); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + goto out; + } + d_instantiate(entry, inode); +out: + path_put(&backing_path); + return err; +} + +void *fuse_symlink_finalize( + struct fuse_bpf_args *fa, + struct inode *dir, struct dentry *entry, const char *link, int len) +{ + return NULL; +} + +int fuse_readdir_initialize(struct fuse_bpf_args *fa, struct fuse_read_io *frio, + struct file *file, struct dir_context *ctx, + bool *force_again, bool *allow_force, bool is_continued) +{ + struct fuse_file *ff = file->private_data; + u8 *page = (u8 *)__get_free_page(GFP_KERNEL); + + if (!page) + return -ENOMEM; + + *fa = (struct fuse_bpf_args) { + .nodeid = ff->nodeid, + .opcode = FUSE_READDIR, + .in_numargs = 1, + .flags = FUSE_BPF_OUT_ARGVAR, + .out_numargs = 2, + .in_args[0] = (struct fuse_bpf_in_arg) { + .size = sizeof(frio->fri), + .value = &frio->fri, + }, + .out_args[0] = (struct fuse_bpf_arg) { + .size = sizeof(frio->fro), + .value = &frio->fro, + }, + .out_args[1] = (struct fuse_bpf_arg) { + .size = PAGE_SIZE, + .value = page, + }, + }; + + frio->fri = (struct fuse_read_in) { + .fh = ff->fh, + .offset = ctx->pos, + .size = PAGE_SIZE, + }; + frio->fro = (struct fuse_read_out) { + .again = 0, + .offset = 0, + }; + *force_again = false; + *allow_force = true; + return 0; +} + +struct extfuse_ctx { + struct dir_context ctx; + u8 *addr; + size_t offset; +}; + +static int filldir(struct dir_context *ctx, const char *name, int namelen, + loff_t offset, u64 ino, unsigned int d_type) +{ + struct extfuse_ctx *ec = container_of(ctx, struct extfuse_ctx, ctx); + struct fuse_dirent *fd = (struct fuse_dirent *) (ec->addr + ec->offset); + + if (ec->offset + sizeof(struct fuse_dirent) + namelen > PAGE_SIZE) + return -ENOMEM; + + *fd = (struct fuse_dirent) { + .ino = ino, + .off = offset, + .namelen = namelen, + .type = d_type, + }; + + memcpy(fd->name, name, namelen); + ec->offset += FUSE_DIRENT_SIZE(fd); + + return 0; +} + +static int parse_dirfile(char *buf, size_t nbytes, struct dir_context *ctx) +{ + while (nbytes >= FUSE_NAME_OFFSET) { + struct fuse_dirent *dirent = (struct fuse_dirent *) buf; + size_t reclen = FUSE_DIRENT_SIZE(dirent); + + if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX) + return -EIO; + if (reclen > nbytes) + break; + if (memchr(dirent->name, '/', dirent->namelen) != NULL) + return -EIO; + + ctx->pos = dirent->off; + if (!dir_emit(ctx, dirent->name, dirent->namelen, dirent->ino, + dirent->type)) + break; + + buf += reclen; + nbytes -= reclen; + } + + return 0; +} + + +int fuse_readdir_backing(struct fuse_bpf_args *fa, + struct file *file, struct dir_context *ctx, + bool *force_again, bool *allow_force, bool is_continued) +{ + struct fuse_file *ff = file->private_data; + struct file *backing_dir = ff->backing_file; + struct fuse_read_out *fro = fa->out_args[0].value; + struct extfuse_ctx ec; + int err; + + ec = (struct extfuse_ctx) { + .ctx.actor = filldir, + .ctx.pos = ctx->pos, + .addr = fa->out_args[1].value, + }; + + if (!ec.addr) + return -ENOMEM; + + if (!is_continued) + backing_dir->f_pos = file->f_pos; + + err = iterate_dir(backing_dir, &ec.ctx); + if (ec.offset == 0) + *allow_force = false; + fa->out_args[1].size = ec.offset; + + fro->offset = ec.ctx.pos; + fro->again = false; + return err; +} + +void *fuse_readdir_finalize(struct fuse_bpf_args *fa, + struct file *file, struct dir_context *ctx, + bool *force_again, bool *allow_force, bool is_continued) +{ + struct fuse_read_out *fro = fa->out_args[0].value; + struct fuse_file *ff = file->private_data; + struct file *backing_dir = ff->backing_file; + int err = 0; + + err = parse_dirfile(fa->out_args[1].value, fa->out_args[1].size, ctx); + *force_again = !!fro->again; + if (*force_again && !*allow_force) + err = -EINVAL; + + ctx->pos = fro->offset; + backing_dir->f_pos = fro->offset; + + free_page((unsigned long) fa->out_args[1].value); + return ERR_PTR(err); +} + +int fuse_access_initialize(struct fuse_bpf_args *fa, struct fuse_access_in *fai, + struct inode *inode, int mask) +{ + *fai = (struct fuse_access_in) { + .mask = mask, + }; + + *fa = (struct fuse_bpf_args) { + .opcode = FUSE_ACCESS, + .nodeid = get_node_id(inode), + .in_numargs = 1, + .in_args[0].size = sizeof(*fai), + .in_args[0].value = fai, + }; + + return 0; +} + +int fuse_access_backing(struct fuse_bpf_args *fa, struct inode *inode, int mask) +{ + struct fuse_inode *fi = get_fuse_inode(inode); + const struct fuse_access_in *fai = fa->in_args[0].value; + + return inode_permission(/* For mainline: init_user_ns,*/ + fi->backing_inode, fai->mask); +} + +void *fuse_access_finalize(struct fuse_bpf_args *fa, struct inode *inode, int mask) +{ + return NULL; +} + +int __init fuse_bpf_init(void) +{ + fuse_bpf_aio_request_cachep = kmem_cache_create("fuse_bpf_aio_req", + sizeof(struct fuse_bpf_aio_req), + 0, SLAB_HWCACHE_ALIGN, NULL); + if (!fuse_bpf_aio_request_cachep) + return -ENOMEM; + + return 0; +} + +void __exit fuse_bpf_cleanup(void) +{ + kmem_cache_destroy(fuse_bpf_aio_request_cachep); +} + +ssize_t fuse_bpf_simple_request(struct fuse_mount *fm, struct fuse_bpf_args *bpf_args) +{ + int i; + ssize_t res; + struct fuse_args args = { + .nodeid = bpf_args->nodeid, + .opcode = bpf_args->opcode, + .error_in = bpf_args->error_in, + .in_numargs = bpf_args->in_numargs, + .out_numargs = bpf_args->out_numargs, + .force = !!(bpf_args->flags & FUSE_BPF_FORCE), + .out_argvar = !!(bpf_args->flags & FUSE_BPF_OUT_ARGVAR), + }; + + for (i = 0; i < args.in_numargs; ++i) + args.in_args[i] = (struct fuse_in_arg) { + .size = bpf_args->in_args[i].size, + .value = bpf_args->in_args[i].value, + }; + for (i = 0; i < args.out_numargs; ++i) + args.out_args[i] = (struct fuse_arg) { + .size = bpf_args->out_args[i].size, + .value = bpf_args->out_args[i].value, + }; + + res = fuse_simple_request(fm, &args); + + *bpf_args = (struct fuse_bpf_args) { + .nodeid = args.nodeid, + .opcode = args.opcode, + .error_in = args.error_in, + .in_numargs = args.in_numargs, + .out_numargs = args.out_numargs, + }; + if (args.force) + bpf_args->flags |= FUSE_BPF_FORCE; + if (args.out_argvar) + bpf_args->flags |= FUSE_BPF_OUT_ARGVAR; + for (i = 0; i < args.in_numargs; ++i) + bpf_args->in_args[i] = (struct fuse_bpf_in_arg) { + .size = args.in_args[i].size, + .value = args.in_args[i].value, + }; + for (i = 0; i < args.out_numargs; ++i) + bpf_args->out_args[i] = (struct fuse_bpf_arg) { + .size = args.out_args[i].size, + .value = args.out_args[i].value, + }; + return res; +} diff --git a/fs/fuse/control.c b/fs/fuse/control.c index cc7e94d73c6cca0883166b12d2a0f90972ebf6a9..a4b16bc59f7a31903ac6abbdf8a8b9cfce14ba9a 100644 --- a/fs/fuse/control.c +++ b/fs/fuse/control.c @@ -395,7 +395,7 @@ int __init fuse_ctl_init(void) return register_filesystem(&fuse_ctl_fs_type); } -void __exit fuse_ctl_cleanup(void) +void fuse_ctl_cleanup(void) { unregister_filesystem(&fuse_ctl_fs_type); } diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 4089515fddbdc6130b9f0ed46fd24cbb629beca7..c13be2fa891f5fa894b783bb4616fb5bf4fd9439 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -208,10 +208,13 @@ static unsigned int fuse_req_hash(u64 unique) /** * A new request is available, wake fiq->waitq */ -static void fuse_dev_wake_and_unlock(struct fuse_iqueue *fiq) +static void fuse_dev_wake_and_unlock(struct fuse_iqueue *fiq, bool sync) __releases(fiq->lock) { - wake_up(&fiq->waitq); + if (sync) + wake_up_sync(&fiq->waitq); + else + wake_up(&fiq->waitq); kill_fasync(&fiq->fasync, SIGIO, POLL_IN); spin_unlock(&fiq->lock); } @@ -224,14 +227,14 @@ const struct fuse_iqueue_ops fuse_dev_fiq_ops = { EXPORT_SYMBOL_GPL(fuse_dev_fiq_ops); static void queue_request_and_unlock(struct fuse_iqueue *fiq, - struct fuse_req *req) + struct fuse_req *req, bool sync) __releases(fiq->lock) { req->in.h.len = sizeof(struct fuse_in_header) + fuse_len_args(req->args->in_numargs, (struct fuse_arg *) req->args->in_args); list_add_tail(&req->list, &fiq->pending); - fiq->ops->wake_pending_and_unlock(fiq); + fiq->ops->wake_pending_and_unlock(fiq, sync); } void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget, @@ -239,6 +242,11 @@ void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget, { struct fuse_iqueue *fiq = &fc->iq; + if (nodeid == 0) { + kfree(forget); + return; + } + forget->forget_one.nodeid = nodeid; forget->forget_one.nlookup = nlookup; @@ -246,7 +254,7 @@ void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget, if (fiq->connected) { fiq->forget_list_tail->next = forget; fiq->forget_list_tail = forget; - fiq->ops->wake_forget_and_unlock(fiq); + fiq->ops->wake_forget_and_unlock(fiq, false); } else { kfree(forget); spin_unlock(&fiq->lock); @@ -266,7 +274,7 @@ static void flush_bg_queue(struct fuse_conn *fc) fc->active_background++; spin_lock(&fiq->lock); req->in.h.unique = fuse_get_unique(fiq); - queue_request_and_unlock(fiq, req); + queue_request_and_unlock(fiq, req, false); } } @@ -359,7 +367,7 @@ static int queue_interrupt(struct fuse_req *req) spin_unlock(&fiq->lock); return 0; } - fiq->ops->wake_interrupt_and_unlock(fiq); + fiq->ops->wake_interrupt_and_unlock(fiq, false); } else { spin_unlock(&fiq->lock); } @@ -426,7 +434,7 @@ static void __fuse_request_send(struct fuse_req *req) /* acquire extra reference, since request is still needed after fuse_request_end() */ __fuse_get_request(req); - queue_request_and_unlock(fiq, req); + queue_request_and_unlock(fiq, req, true); request_wait_answer(req); /* Pairs with smp_wmb() in fuse_request_end() */ @@ -480,6 +488,7 @@ static void fuse_args_to_req(struct fuse_req *req, struct fuse_args *args) { req->in.h.opcode = args->opcode; req->in.h.nodeid = args->nodeid; + req->in.h.error_in = args->error_in; req->args = args; if (args->end) __set_bit(FR_ASYNC, &req->flags); @@ -601,7 +610,7 @@ static int fuse_simple_notify_reply(struct fuse_mount *fm, spin_lock(&fiq->lock); if (fiq->connected) { - queue_request_and_unlock(fiq, req); + queue_request_and_unlock(fiq, req, false); } else { err = -ENODEV; spin_unlock(&fiq->lock); @@ -684,11 +693,7 @@ static void fuse_copy_finish(struct fuse_copy_state *cs) flush_dcache_page(cs->pg); set_page_dirty_lock(cs->pg); } - /* - * The page could be GUP page(see iov_iter_get_pages in - * fuse_copy_fill) so use put_user_page to release it. - */ - put_user_page(cs->pg); + put_page(cs->pg); } cs->pg = NULL; } @@ -790,7 +795,8 @@ static int fuse_check_page(struct page *page) 1 << PG_active | 1 << PG_workingset | 1 << PG_reclaim | - 1 << PG_waiters))) { + 1 << PG_waiters | + LRU_GEN_MASK | LRU_REFS_MASK))) { dump_page(page, "fuse: trying to steal weird page"); return 1; } @@ -861,6 +867,12 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep) if (!(buf->flags & PIPE_BUF_FLAG_LRU)) lru_cache_add(newpage); + /* + * Release while we have extra ref on stolen page. Otherwise + * anon_pipe_buf_release() might think the page can be reused. + */ + pipe_buf_release(cs->pipe, buf); + err = 0; spin_lock(&cs->req->waitq.lock); if (test_bit(FR_ABORTED, &cs->req->flags)) @@ -944,7 +956,17 @@ static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep, while (count) { if (cs->write && cs->pipebufs && page) { - return fuse_ref_page(cs, page, offset, count); + /* + * Can't control lifetime of pipe buffers, so always + * copy user pages. + */ + if (cs->req->args->user_pages) { + err = fuse_copy_fill(cs); + if (err) + return err; + } else { + return fuse_ref_page(cs, page, offset, count); + } } else if (!cs->len) { if (cs->move_pages && page && offset == 0 && count == PAGE_SIZE) { @@ -1931,6 +1953,19 @@ static ssize_t fuse_dev_do_write(struct fuse_dev *fud, kern_path(path, 0, req->args->canonical_path); } + if (!err && (req->in.h.opcode == FUSE_LOOKUP || + req->in.h.opcode == (FUSE_LOOKUP | FUSE_POSTFILTER)) && + req->args->out_args[1].size == sizeof(struct fuse_entry_bpf_out)) { + struct fuse_entry_bpf_out *febo = (struct fuse_entry_bpf_out *) + req->args->out_args[1].value; + struct fuse_entry_bpf *feb = container_of(febo, struct fuse_entry_bpf, out); + + if (febo->backing_action == FUSE_ACTION_REPLACE) + feb->backing_file = fget(febo->backing_fd); + if (febo->bpf_action == FUSE_ACTION_REPLACE) + feb->bpf_file = fget(febo->bpf_fd); + } + spin_lock(&fpq->lock); clear_bit(FR_LOCKED, &req->flags); if (!fpq->connected) @@ -2048,8 +2083,12 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe, pipe_lock(pipe); out_free: - for (idx = 0; idx < nbuf; idx++) - pipe_buf_release(pipe, &bufs[idx]); + for (idx = 0; idx < nbuf; idx++) { + struct pipe_buffer *buf = &bufs[idx]; + + if (buf->ops) + pipe_buf_release(pipe, buf); + } pipe_unlock(pipe); kvfree(bufs); diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index b5439324685beeba41a6030c9defdd6a498b5db1..541fad4362e3a8e427e9740ecc7c4535c7366ab2 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -8,8 +8,10 @@ #include "fuse_i.h" +#include #include #include +#include #include #include #include @@ -18,6 +20,8 @@ #include #include +#include "../internal.h" + static void fuse_advise_use_readdirplus(struct inode *dir) { struct fuse_inode *fi = get_fuse_inode(dir); @@ -25,7 +29,7 @@ static void fuse_advise_use_readdirplus(struct inode *dir) set_bit(FUSE_I_ADVISE_RDPLUS, &fi->state); } -#if BITS_PER_LONG >= 64 +#if BITS_PER_LONG >= 64 && !defined(CONFIG_FUSE_BPF) static inline void __fuse_dentry_settime(struct dentry *entry, u64 time) { entry->d_fsdata = (void *) time; @@ -37,19 +41,15 @@ static inline u64 fuse_dentry_time(const struct dentry *entry) } #else -union fuse_dentry { - u64 time; - struct rcu_head rcu; -}; static inline void __fuse_dentry_settime(struct dentry *dentry, u64 time) { - ((union fuse_dentry *) dentry->d_fsdata)->time = time; + ((struct fuse_dentry *) dentry->d_fsdata)->time = time; } static inline u64 fuse_dentry_time(const struct dentry *entry) { - return ((union fuse_dentry *) entry->d_fsdata)->time; + return ((struct fuse_dentry *) entry->d_fsdata)->time; } #endif @@ -74,26 +74,16 @@ static void fuse_dentry_settime(struct dentry *dentry, u64 time) __fuse_dentry_settime(dentry, time); } -/* - * FUSE caches dentries and attributes with separate timeout. The - * time in jiffies until the dentry/attributes are valid is stored in - * dentry->d_fsdata and fuse_inode->i_time respectively. - */ - -/* - * Calculate the time in jiffies until a dentry/attributes are valid - */ -static u64 time_to_jiffies(u64 sec, u32 nsec) +void fuse_init_dentry_root(struct dentry *root, struct file *backing_dir) { - if (sec || nsec) { - struct timespec64 ts = { - sec, - min_t(u32, nsec, NSEC_PER_SEC - 1) - }; +#ifdef CONFIG_FUSE_BPF + struct fuse_dentry *fuse_dentry = root->d_fsdata; - return get_jiffies_64() + timespec64_to_jiffies(&ts); - } else - return 0; + if (backing_dir) { + fuse_dentry->backing_path = backing_dir->f_path; + path_get(&fuse_dentry->backing_path); + } +#endif } /* @@ -106,11 +96,6 @@ void fuse_change_entry_timeout(struct dentry *entry, struct fuse_entry_out *o) time_to_jiffies(o->entry_valid, o->entry_valid_nsec)); } -static u64 attr_timeout(struct fuse_attr_out *o) -{ - return time_to_jiffies(o->attr_valid, o->attr_valid_nsec); -} - u64 entry_attr_timeout(struct fuse_entry_out *o) { return time_to_jiffies(o->attr_valid, o->attr_valid_nsec); @@ -171,7 +156,8 @@ static void fuse_invalidate_entry(struct dentry *entry) static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_args *args, u64 nodeid, const struct qstr *name, - struct fuse_entry_out *outarg) + struct fuse_entry_out *outarg, + struct fuse_entry_bpf_out *bpf_outarg) { memset(outarg, 0, sizeof(struct fuse_entry_out)); args->opcode = FUSE_LOOKUP; @@ -179,9 +165,12 @@ static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_args *args, args->in_numargs = 1; args->in_args[0].size = name->len + 1; args->in_args[0].value = name->name; - args->out_numargs = 1; + args->out_argvar = true; + args->out_numargs = 2; args->out_args[0].size = sizeof(struct fuse_entry_out); args->out_args[0].value = outarg; + args->out_args[1].size = sizeof(struct fuse_entry_bpf_out); + args->out_args[1].value = bpf_outarg; } /* @@ -207,6 +196,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags) else if (time_before64(fuse_dentry_time(entry), get_jiffies_64()) || (flags & LOOKUP_REVAL)) { struct fuse_entry_out outarg; + struct fuse_entry_bpf bpf_arg; FUSE_ARGS(args); struct fuse_forget_link *forget; u64 attr_version; @@ -218,7 +208,20 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags) ret = -ECHILD; if (flags & LOOKUP_RCU) goto out; - +#ifdef CONFIG_FUSE_BPF + { + struct fuse_err_ret fer; + + fer = fuse_bpf_backing(entry->d_parent->d_inode, + struct fuse_lookup_io, + fuse_lookup_initialize, + fuse_revalidate_backing, + fuse_revalidate_finalize, + d_inode(entry->d_parent), entry, flags); + if (fer.ret) + return PTR_ERR(fer.result); + } +#endif fm = get_fuse_mount(inode); forget = fuse_alloc_forget(); @@ -229,10 +232,31 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags) attr_version = fuse_get_attr_version(fm->fc); parent = dget_parent(entry); + fuse_lookup_init(fm->fc, &args, get_node_id(d_inode(parent)), - &entry->d_name, &outarg); + &entry->d_name, &outarg, &bpf_arg.out); ret = fuse_simple_request(fm, &args); dput(parent); + + /* + * TODO This doesn't seem sufficient, though we don't plan to + * change the backing file ever, so not sure what is correct + * here yet, especially as we can't return an error to user + */ + if (bpf_arg.out.backing_action == FUSE_ACTION_REPLACE) { + struct file *file = bpf_arg.backing_file; + + if (file && !IS_ERR(file)) + fput(file); + } + + if (bpf_arg.out.bpf_action == FUSE_ACTION_REPLACE) { + struct file *file = bpf_arg.bpf_file; + + if (file && !IS_ERR(file)) + fput(file); + } + /* Zero nodeid is same as -ENOENT */ if (!ret && !outarg.nodeid) ret = -ENOENT; @@ -280,17 +304,20 @@ invalid: goto out; } -#if BITS_PER_LONG < 64 +#if BITS_PER_LONG < 64 || defined(CONFIG_FUSE_BPF) static int fuse_dentry_init(struct dentry *dentry) { - dentry->d_fsdata = kzalloc(sizeof(union fuse_dentry), + dentry->d_fsdata = kzalloc(sizeof(struct fuse_dentry), GFP_KERNEL_ACCOUNT | __GFP_RECLAIMABLE); return dentry->d_fsdata ? 0 : -ENOMEM; } static void fuse_dentry_release(struct dentry *dentry) { - union fuse_dentry *fd = dentry->d_fsdata; + struct fuse_dentry *fd = dentry->d_fsdata; + + if (fd && fd->backing_path.dentry) + path_put(&fd->backing_path); kfree_rcu(fd, rcu); } @@ -405,6 +432,18 @@ static void fuse_dentry_canonical_path(const struct path *path, char *path_name; int err; +#ifdef CONFIG_FUSE_BPF + struct fuse_err_ret fer; + + fer = fuse_bpf_backing(inode, struct fuse_dummy_io, + fuse_canonical_path_initialize, + fuse_canonical_path_backing, + fuse_canonical_path_finalize, path, + canonical_path); + if (fer.ret) + return; +#endif + path_name = (char *)get_zeroed_page(GFP_KERNEL); if (!path_name) goto default_path; @@ -431,7 +470,7 @@ default_path: const struct dentry_operations fuse_dentry_operations = { .d_revalidate = fuse_dentry_revalidate, .d_delete = fuse_dentry_delete, -#if BITS_PER_LONG < 64 +#if BITS_PER_LONG < 64 || defined(CONFIG_FUSE_BPF) .d_init = fuse_dentry_init, .d_release = fuse_dentry_release, #endif @@ -440,7 +479,7 @@ const struct dentry_operations fuse_dentry_operations = { }; const struct dentry_operations fuse_root_dentry_operations = { -#if BITS_PER_LONG < 64 +#if BITS_PER_LONG < 64 || defined(CONFIG_FUSE_BPF) .d_init = fuse_dentry_init, .d_release = fuse_dentry_release, #endif @@ -459,10 +498,13 @@ bool fuse_invalid_attr(struct fuse_attr *attr) } int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name, - struct fuse_entry_out *outarg, struct inode **inode) + struct fuse_entry_out *outarg, + struct dentry *entry, + struct inode **inode) { struct fuse_mount *fm = get_fuse_mount_super(sb); FUSE_ARGS(args); + struct fuse_entry_bpf bpf_arg = {0}; struct fuse_forget_link *forget; u64 attr_version; int err; @@ -480,23 +522,77 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name attr_version = fuse_get_attr_version(fm->fc); - fuse_lookup_init(fm->fc, &args, nodeid, name, outarg); + fuse_lookup_init(fm->fc, &args, nodeid, name, outarg, &bpf_arg.out); err = fuse_simple_request(fm, &args); - /* Zero nodeid is same as -ENOENT, but with valid timeout */ - if (err || !outarg->nodeid) - goto out_put_forget; - err = -EIO; - if (!outarg->nodeid) - goto out_put_forget; - if (fuse_invalid_attr(&outarg->attr)) - goto out_put_forget; - - *inode = fuse_iget(sb, outarg->nodeid, outarg->generation, - &outarg->attr, entry_attr_timeout(outarg), - attr_version); +#ifdef CONFIG_FUSE_BPF + if (err == sizeof(bpf_arg.out)) { + /* TODO Make sure this handles invalid handles */ + /* TODO Do we need the same code in revalidate */ + struct file *backing_file; + struct inode *backing_inode; + + err = -ENOENT; + if (!entry) + goto out_queue_forget; + + err = -EINVAL; + if (bpf_arg.out.backing_action != FUSE_ACTION_REPLACE) + goto out_queue_forget; + + backing_file = bpf_arg.backing_file; + if (!backing_file || IS_ERR(backing_file)) + goto out_queue_forget; + + backing_inode = backing_file->f_inode; + *inode = fuse_iget_backing(sb, backing_inode); + if (!*inode) + goto bpf_arg_out; + + if (bpf_arg.out.bpf_action == FUSE_ACTION_REPLACE) { + struct file *bpf_file = bpf_arg.bpf_file; + struct bpf_prog *bpf_prog = ERR_PTR(-EINVAL); + + if (bpf_file && !IS_ERR(bpf_file)) + bpf_prog = fuse_get_bpf_prog(bpf_file);; + + if (IS_ERR(bpf_prog)) { + iput(*inode); + *inode = NULL; + err = PTR_ERR(bpf_prog); + goto bpf_arg_out; + } + get_fuse_inode(*inode)->bpf = bpf_prog; + } + + get_fuse_dentry(entry)->backing_path = backing_file->f_path; + path_get(&get_fuse_dentry(entry)->backing_path); + +bpf_arg_out: + fput(backing_file); + } else +#endif + { + /* Zero nodeid is same as -ENOENT, but with valid timeout */ + if (err || !outarg->nodeid) + goto out_put_forget; + + err = -EIO; + if (!outarg->nodeid) + goto out_put_forget; + if (fuse_invalid_attr(&outarg->attr)) + goto out_put_forget; + + *inode = fuse_iget(sb, outarg->nodeid, outarg->generation, + &outarg->attr, entry_attr_timeout(outarg), + attr_version); + } + err = -ENOMEM; - if (!*inode) { +#ifdef CONFIG_FUSE_BPF +out_queue_forget: +#endif + if (!*inode && outarg->nodeid) { fuse_queue_forget(fm->fc, forget, outarg->nodeid, 1); goto out; } @@ -518,12 +614,23 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, bool outarg_valid = true; bool locked; +#ifdef CONFIG_FUSE_BPF + struct fuse_err_ret fer; + + fer = fuse_bpf_backing(dir, struct fuse_lookup_io, + fuse_lookup_initialize, fuse_lookup_backing, + fuse_lookup_finalize, + dir, entry, flags); + if (fer.ret) + return fer.result; +#endif + if (fuse_is_bad(dir)) return ERR_PTR(-EIO); locked = fuse_lock_inode(dir); err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name, - &outarg, &inode); + &outarg, entry, &inode); fuse_unlock_inode(dir, locked); if (err == -ENOENT) { outarg_valid = false; @@ -582,6 +689,20 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, /* Userspace expects S_IFREG in create mode */ BUG_ON((mode & S_IFMT) != S_IFREG); +#ifdef CONFIG_FUSE_BPF + { + struct fuse_err_ret fer; + + fer = fuse_bpf_backing(dir, struct fuse_create_open_io, + fuse_create_open_initialize, + fuse_create_open_backing, + fuse_create_open_finalize, + dir, entry, file, flags, mode); + if (fer.ret) + return PTR_ERR(fer.result); + } +#endif + forget = fuse_alloc_forget(); err = -ENOMEM; if (!forget) @@ -774,6 +895,17 @@ static int fuse_mknod(struct inode *dir, struct dentry *entry, umode_t mode, struct fuse_mount *fm = get_fuse_mount(dir); FUSE_ARGS(args); +#ifdef CONFIG_FUSE_BPF + struct fuse_err_ret fer; + + fer = fuse_bpf_backing(dir, struct fuse_mknod_in, + fuse_mknod_initialize, fuse_mknod_backing, + fuse_mknod_finalize, + dir, entry, mode, rdev); + if (fer.ret) + return PTR_ERR(fer.result); +#endif + if (!fm->fc->dont_mask) mode &= ~current_umask(); @@ -802,6 +934,17 @@ static int fuse_mkdir(struct inode *dir, struct dentry *entry, umode_t mode) struct fuse_mount *fm = get_fuse_mount(dir); FUSE_ARGS(args); +#ifdef CONFIG_FUSE_BPF + struct fuse_err_ret fer; + + fer = fuse_bpf_backing(dir, struct fuse_mkdir_in, + fuse_mkdir_initialize, fuse_mkdir_backing, + fuse_mkdir_finalize, + dir, entry, mode); + if (fer.ret) + return PTR_ERR(fer.result); +#endif + if (!fm->fc->dont_mask) mode &= ~current_umask(); @@ -824,6 +967,17 @@ static int fuse_symlink(struct inode *dir, struct dentry *entry, unsigned len = strlen(link) + 1; FUSE_ARGS(args); +#ifdef CONFIG_FUSE_BPF + struct fuse_err_ret fer; + + fer = fuse_bpf_backing(dir, struct fuse_dummy_io, + fuse_symlink_initialize, fuse_symlink_backing, + fuse_symlink_finalize, + dir, entry, link, len); + if (fer.ret) + return PTR_ERR(fer.result); +#endif + args.opcode = FUSE_SYMLINK; args.in_numargs = 2; args.in_args[0].size = entry->d_name.len + 1; @@ -833,11 +987,19 @@ static int fuse_symlink(struct inode *dir, struct dentry *entry, return create_new_entry(fm, &args, dir, entry, S_IFLNK); } +void fuse_flush_time_update(struct inode *inode) +{ + int err = sync_inode_metadata(inode, 1); + + mapping_set_error(inode->i_mapping, err); +} + void fuse_update_ctime(struct inode *inode) { if (!IS_NOCMTIME(inode)) { inode->i_ctime = current_time(inode); mark_inode_dirty_sync(inode); + fuse_flush_time_update(inode); } } @@ -850,6 +1012,20 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry) if (fuse_is_bad(dir)) return -EIO; +#ifdef CONFIG_FUSE_BPF + { + struct fuse_err_ret fer; + + fer = fuse_bpf_backing(dir, struct fuse_dummy_io, + fuse_unlink_initialize, + fuse_unlink_backing, + fuse_unlink_finalize, + dir, entry); + if (fer.ret) + return PTR_ERR(fer.result); + } +#endif + args.opcode = FUSE_UNLINK; args.nodeid = get_node_id(dir); args.in_numargs = 1; @@ -889,6 +1065,20 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry) if (fuse_is_bad(dir)) return -EIO; +#ifdef CONFIG_FUSE_BPF + { + struct fuse_err_ret fer; + + fer = fuse_bpf_backing(dir, struct fuse_dummy_io, + fuse_rmdir_initialize, + fuse_rmdir_backing, + fuse_rmdir_finalize, + dir, entry); + if (fer.ret) + return PTR_ERR(fer.result); + } +#endif + args.opcode = FUSE_RMDIR; args.nodeid = get_node_id(dir); args.in_numargs = 1; @@ -974,6 +1164,18 @@ static int fuse_rename2(struct inode *olddir, struct dentry *oldent, return -EINVAL; if (flags) { +#ifdef CONFIG_FUSE_BPF + struct fuse_err_ret fer; + + fer = fuse_bpf_backing(olddir, struct fuse_rename2_in, + fuse_rename2_initialize, fuse_rename2_backing, + fuse_rename2_finalize, + olddir, oldent, newdir, newent, flags); + if (fer.ret) + return PTR_ERR(fer.result); +#endif + + /* TODO: how should this go with bpfs involved? */ if (fc->no_rename2 || fc->minor < 23) return -EINVAL; @@ -985,6 +1187,17 @@ static int fuse_rename2(struct inode *olddir, struct dentry *oldent, err = -EINVAL; } } else { +#ifdef CONFIG_FUSE_BPF + struct fuse_err_ret fer; + + fer = fuse_bpf_backing(olddir, struct fuse_rename_in, + fuse_rename_initialize, fuse_rename_backing, + fuse_rename_finalize, + olddir, oldent, newdir, newent); + if (fer.ret) + return PTR_ERR(fer.result); +#endif + err = fuse_rename_common(olddir, oldent, newdir, newent, 0, FUSE_RENAME, sizeof(struct fuse_rename_in)); @@ -1002,6 +1215,16 @@ static int fuse_link(struct dentry *entry, struct inode *newdir, struct fuse_mount *fm = get_fuse_mount(inode); FUSE_ARGS(args); +#ifdef CONFIG_FUSE_BPF + struct fuse_err_ret fer; + + fer = fuse_bpf_backing(inode, struct fuse_link_in, fuse_link_initialize, + fuse_link_backing, fuse_link_finalize, entry, + newdir, newent); + if (fer.ret) + return PTR_ERR(fer.result); +#endif + memset(&inarg, 0, sizeof(inarg)); inarg.oldnodeid = get_node_id(inode); args.opcode = FUSE_LINK; @@ -1033,7 +1256,7 @@ static int fuse_link(struct dentry *entry, struct inode *newdir, return err; } -static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr, +void fuse_fillattr(struct inode *inode, struct fuse_attr *attr, struct kstat *stat) { unsigned int blkbits; @@ -1102,23 +1325,13 @@ static int fuse_do_getattr(struct inode *inode, struct kstat *stat, args.out_args[0].size = sizeof(outarg); args.out_args[0].value = &outarg; err = fuse_simple_request(fm, &args); - if (!err) { - if (fuse_invalid_attr(&outarg.attr) || - inode_wrong_type(inode, outarg.attr.mode)) { - fuse_make_bad(inode); - err = -EIO; - } else { - fuse_change_attributes(inode, &outarg.attr, - attr_timeout(&outarg), - attr_version); - if (stat) - fuse_fillattr(inode, &outarg.attr, stat); - } - } + if (!err) + err = finalize_attr(inode, &outarg, attr_version, stat); return err; } static int fuse_update_get_attr(struct inode *inode, struct file *file, + const struct path *path, struct kstat *stat, u32 request_mask, unsigned int flags) { @@ -1126,6 +1339,17 @@ static int fuse_update_get_attr(struct inode *inode, struct file *file, int err = 0; bool sync; +#ifdef CONFIG_FUSE_BPF + struct fuse_err_ret fer; + + fer = fuse_bpf_backing(inode, struct fuse_getattr_io, + fuse_getattr_initialize, fuse_getattr_backing, + fuse_getattr_finalize, + path->dentry, stat, request_mask, flags); + if (fer.ret) + return PTR_ERR(fer.result); +#endif + if (flags & AT_STATX_FORCE_SYNC) sync = true; else if (flags & AT_STATX_DONT_SYNC) @@ -1150,7 +1374,7 @@ static int fuse_update_get_attr(struct inode *inode, struct file *file, int fuse_update_attributes(struct inode *inode, struct file *file) { /* Do *not* need to get atime for internal purposes */ - return fuse_update_get_attr(inode, file, NULL, + return fuse_update_get_attr(inode, file, &file->f_path, NULL, STATX_BASIC_STATS & ~STATX_ATIME, 0); } @@ -1166,7 +1390,7 @@ int fuse_reverse_inval_entry(struct fuse_conn *fc, u64 parent_nodeid, if (!parent) return -ENOENT; - inode_lock(parent); + inode_lock_nested(parent, I_MUTEX_PARENT); if (!S_ISDIR(parent->i_mode)) goto unlock; @@ -1259,6 +1483,16 @@ static int fuse_access(struct inode *inode, int mask) struct fuse_access_in inarg; int err; +#ifdef CONFIG_FUSE_BPF + struct fuse_err_ret fer; + + fer = fuse_bpf_backing(inode, struct fuse_access_in, + fuse_access_initialize, fuse_access_backing, + fuse_access_finalize, inode, mask); + if (fer.ret) + return PTR_ERR(fer.result); +#endif + BUG_ON(mask & MAY_NOT_BLOCK); if (fm->fc->no_access) @@ -1306,6 +1540,10 @@ static int fuse_permission(struct inode *inode, int mask) struct fuse_conn *fc = get_fuse_conn(inode); bool refreshed = false; int err = 0; + struct fuse_inode *fi = get_fuse_inode(inode); +#ifdef CONFIG_FUSE_BPF + struct fuse_err_ret fer; +#endif if (fuse_is_bad(inode)) return -EIO; @@ -1313,12 +1551,19 @@ static int fuse_permission(struct inode *inode, int mask) if (!fuse_allow_current_process(fc)) return -EACCES; +#ifdef CONFIG_FUSE_BPF + fer = fuse_bpf_backing(inode, struct fuse_access_in, + fuse_access_initialize, fuse_access_backing, + fuse_access_finalize, inode, mask); + if (fer.ret) + return PTR_ERR(fer.result); +#endif + /* * If attributes are needed, refresh them before proceeding */ if (fc->default_permissions || ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))) { - struct fuse_inode *fi = get_fuse_inode(inode); u32 perm_mask = STATX_MODE | STATX_UID | STATX_GID; if (perm_mask & READ_ONCE(fi->inval_mask) || @@ -1408,6 +1653,21 @@ static const char *fuse_get_link(struct dentry *dentry, struct inode *inode, if (fuse_is_bad(inode)) goto out_err; +#ifdef CONFIG_FUSE_BPF + { + struct fuse_err_ret fer; + const char *out = NULL; + + fer = fuse_bpf_backing(inode, struct fuse_dummy_io, + fuse_get_link_initialize, + fuse_get_link_backing, + fuse_get_link_finalize, + inode, dentry, callback, &out); + if (fer.ret) + return fer.result ?: out; + } +#endif + if (fc->cache_symlinks) return page_get_link(dentry, inode, callback); @@ -1441,8 +1701,18 @@ static int fuse_dir_open(struct inode *inode, struct file *file) static int fuse_dir_release(struct inode *inode, struct file *file) { - fuse_release_common(file, true); +#ifdef CONFIG_FUSE_BPF + struct fuse_err_ret fer; + + fer = fuse_bpf_backing(inode, struct fuse_release_in, + fuse_releasedir_initialize, fuse_release_backing, + fuse_release_finalize, + inode, file); + if (fer.ret) + return PTR_ERR(fer.result); +#endif + fuse_release_common(file, true); return 0; } @@ -1456,6 +1726,19 @@ static int fuse_dir_fsync(struct file *file, loff_t start, loff_t end, if (fuse_is_bad(inode)) return -EIO; +#ifdef CONFIG_FUSE_BPF + { + struct fuse_err_ret fer; + + fer = fuse_bpf_backing(inode, struct fuse_fsync_in, + fuse_dir_fsync_initialize, fuse_fsync_backing, + fuse_fsync_finalize, + file, start, end, datasync); + if (fer.ret) + return PTR_ERR(fer.result); + } +#endif + if (fc->no_fsyncdir) return 0; @@ -1494,58 +1777,6 @@ static long fuse_dir_compat_ioctl(struct file *file, unsigned int cmd, FUSE_IOCTL_COMPAT | FUSE_IOCTL_DIR); } -static bool update_mtime(unsigned ivalid, bool trust_local_mtime) -{ - /* Always update if mtime is explicitly set */ - if (ivalid & ATTR_MTIME_SET) - return true; - - /* Or if kernel i_mtime is the official one */ - if (trust_local_mtime) - return true; - - /* If it's an open(O_TRUNC) or an ftruncate(), don't update */ - if ((ivalid & ATTR_SIZE) && (ivalid & (ATTR_OPEN | ATTR_FILE))) - return false; - - /* In all other cases update */ - return true; -} - -static void iattr_to_fattr(struct fuse_conn *fc, struct iattr *iattr, - struct fuse_setattr_in *arg, bool trust_local_cmtime) -{ - unsigned ivalid = iattr->ia_valid; - - if (ivalid & ATTR_MODE) - arg->valid |= FATTR_MODE, arg->mode = iattr->ia_mode; - if (ivalid & ATTR_UID) - arg->valid |= FATTR_UID, arg->uid = from_kuid(fc->user_ns, iattr->ia_uid); - if (ivalid & ATTR_GID) - arg->valid |= FATTR_GID, arg->gid = from_kgid(fc->user_ns, iattr->ia_gid); - if (ivalid & ATTR_SIZE) - arg->valid |= FATTR_SIZE, arg->size = iattr->ia_size; - if (ivalid & ATTR_ATIME) { - arg->valid |= FATTR_ATIME; - arg->atime = iattr->ia_atime.tv_sec; - arg->atimensec = iattr->ia_atime.tv_nsec; - if (!(ivalid & ATTR_ATIME_SET)) - arg->valid |= FATTR_ATIME_NOW; - } - if ((ivalid & ATTR_MTIME) && update_mtime(ivalid, trust_local_cmtime)) { - arg->valid |= FATTR_MTIME; - arg->mtime = iattr->ia_mtime.tv_sec; - arg->mtimensec = iattr->ia_mtime.tv_nsec; - if (!(ivalid & ATTR_MTIME_SET) && !trust_local_cmtime) - arg->valid |= FATTR_MTIME_NOW; - } - if ((ivalid & ATTR_CTIME) && trust_local_cmtime) { - arg->valid |= FATTR_CTIME; - arg->ctime = iattr->ia_ctime.tv_sec; - arg->ctimensec = iattr->ia_ctime.tv_nsec; - } -} - /* * Prevent concurrent writepages on inode * @@ -1659,6 +1890,16 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr, bool trust_local_cmtime = is_wb && S_ISREG(inode->i_mode); bool fault_blocked = false; +#ifdef CONFIG_FUSE_BPF + struct fuse_err_ret fer; + + fer = fuse_bpf_backing(inode, struct fuse_setattr_io, + fuse_setattr_initialize, fuse_setattr_backing, + fuse_setattr_finalize, dentry, attr, file); + if (fer.ret) + return PTR_ERR(fer.result); +#endif + if (!fc->default_permissions) attr->ia_valid |= ATTR_FORCE; @@ -1822,11 +2063,22 @@ static int fuse_setattr(struct dentry *entry, struct iattr *attr) * This should be done on write(), truncate() and chown(). */ if (!fc->handle_killpriv) { +#ifdef CONFIG_FUSE_BPF + struct fuse_err_ret fer; + /* * ia_mode calculation may have used stale i_mode. * Refresh and recalculate. */ - ret = fuse_do_getattr(inode, NULL, file); + fer = fuse_bpf_backing(inode, struct fuse_getattr_io, + fuse_getattr_initialize, fuse_getattr_backing, + fuse_getattr_finalize, + entry, NULL, 0, 0); + if (fer.ret) + ret = PTR_ERR(fer.result); + else +#endif + ret = fuse_do_getattr(inode, NULL, file); if (ret) return ret; @@ -1882,7 +2134,8 @@ static int fuse_getattr(const struct path *path, struct kstat *stat, return -EACCES; } - return fuse_update_get_attr(inode, NULL, stat, request_mask, flags); + return fuse_update_get_attr(inode, NULL, path, stat, request_mask, + flags); } static const struct inode_operations fuse_dir_inode_operations = { diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 09610d29c21d577bda211586693773fb5d695a49..d492898b33f52454fbaed380200c45ce29572ab0 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -8,6 +8,7 @@ #include "fuse_i.h" +#include #include #include #include @@ -137,7 +138,11 @@ int fuse_do_open(struct fuse_mount *fm, u64 nodeid, struct file *file, struct fuse_file *ff; int opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN; - ff = fuse_file_alloc(fm); + if (file->private_data) { + ff = file->private_data; + file->private_data = NULL; + } else + ff = fuse_file_alloc(fm); if (!ff) return -ENOMEM; @@ -236,6 +241,20 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir) if (err) return err; +#ifdef CONFIG_FUSE_BPF + { + struct fuse_err_ret fer; + + fer = fuse_bpf_backing(inode, struct fuse_open_io, + fuse_open_initialize, + fuse_open_backing, + fuse_open_finalize, + inode, file, isdir); + if (fer.ret) + return PTR_ERR(fer.result); + } +#endif + if (is_wb_truncate || dax_truncate) { inode_lock(inode); fuse_set_nowrite(inode); @@ -334,6 +353,17 @@ static int fuse_release(struct inode *inode, struct file *file) { struct fuse_conn *fc = get_fuse_conn(inode); +#ifdef CONFIG_FUSE_BPF + struct fuse_err_ret fer; + + fer = fuse_bpf_backing(inode, struct fuse_release_in, + fuse_release_initialize, fuse_release_backing, + fuse_release_finalize, + inode, file); + if (fer.ret) + return PTR_ERR(fer.result); +#endif + /* see fuse_vma_close() for !writeback_cache case */ if (fc->writeback_cache) write_inode_now(inode, 1); @@ -471,6 +501,17 @@ static int fuse_flush(struct file *file, fl_owner_t id) FUSE_ARGS(args); int err; +#ifdef CONFIG_FUSE_BPF + struct fuse_err_ret fer; + + fer = fuse_bpf_backing(file->f_inode, struct fuse_flush_in, + fuse_flush_initialize, fuse_flush_backing, + fuse_flush_finalize, + file, id); + if (fer.ret) + return PTR_ERR(fer.result); +#endif + if (fuse_is_bad(inode)) return -EIO; @@ -543,6 +584,17 @@ static int fuse_fsync(struct file *file, loff_t start, loff_t end, struct fuse_conn *fc = get_fuse_conn(inode); int err; +#ifdef CONFIG_FUSE_BPF + struct fuse_err_ret fer; + + fer = fuse_bpf_backing(inode, struct fuse_fsync_in, + fuse_fsync_initialize, fuse_fsync_backing, + fuse_fsync_finalize, + file, start, end, datasync); + if (fer.ret) + return PTR_ERR(fer.result); +#endif + if (fuse_is_bad(inode)) return -EIO; @@ -1420,6 +1472,7 @@ static int fuse_get_user_pages(struct fuse_args_pages *ap, struct iov_iter *ii, (PAGE_SIZE - ret) & (PAGE_SIZE - 1); } + ap->args.user_pages = true; if (write) ap->args.in_pages = true; else @@ -1586,6 +1639,20 @@ static ssize_t fuse_file_read_iter(struct kiocb *iocb, struct iov_iter *to) if (FUSE_IS_DAX(inode)) return fuse_dax_read_iter(iocb, to); +#ifdef CONFIG_FUSE_BPF + { + struct fuse_err_ret fer; + + fer = fuse_bpf_backing(inode, struct fuse_file_read_iter_io, + fuse_file_read_iter_initialize, + fuse_file_read_iter_backing, + fuse_file_read_iter_finalize, + iocb, to); + if (fer.ret) + return PTR_ERR(fer.result); + } +#endif + if (ff->passthrough.filp) return fuse_passthrough_read_iter(iocb, to); else if (!(ff->open_flags & FOPEN_DIRECT_IO)) @@ -1606,6 +1673,20 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from) if (FUSE_IS_DAX(inode)) return fuse_dax_write_iter(iocb, from); +#ifdef CONFIG_FUSE_BPF + { + struct fuse_err_ret fer; + + fer = fuse_bpf_backing(inode, struct fuse_file_write_iter_io, + fuse_file_write_iter_initialize, + fuse_file_write_iter_backing, + fuse_file_write_iter_finalize, + iocb, from); + if (fer.ret) + return PTR_ERR(fer.result); + } +#endif + if (ff->passthrough.filp) return fuse_passthrough_write_iter(iocb, from); else if (!(ff->open_flags & FOPEN_DIRECT_IO)) @@ -1855,6 +1936,29 @@ int fuse_write_inode(struct inode *inode, struct writeback_control *wbc) struct fuse_file *ff; int err; + /** + * TODO - fully understand why this is necessary + * + * With fuse-bpf, fsstress fails if rename is enabled without this + * + * We are getting writes here on directory inodes, which do not have an + * initialized file list so crash. + * + * The question is why we are getting those writes + */ + if (!S_ISREG(inode->i_mode)) + return 0; + /* + * Inode is always written before the last reference is dropped and + * hence this should not be reached from reclaim. + * + * Writing back the inode from reclaim can deadlock if the request + * processing itself needs an allocation. Allocations triggering + * reclaim while serving a request can't be prevented, because it can + * involve any number of unrelated userspace processes. + */ + WARN_ON(wbc->for_reclaim); + ff = __fuse_write_file_get(fc, fi); err = fuse_flush_times(inode, ff); if (ff) @@ -2381,6 +2485,12 @@ static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma) if (FUSE_IS_DAX(file_inode(file))) return fuse_dax_mmap(file, vma); +#ifdef CONFIG_FUSE_BPF + /* TODO - this is simply passthrough, not a proper BPF filter */ + if (ff->backing_file) + return fuse_backing_mmap(file, vma); +#endif + if (ff->passthrough.filp) return fuse_passthrough_mmap(file, vma); @@ -2629,6 +2739,17 @@ static loff_t fuse_file_llseek(struct file *file, loff_t offset, int whence) { loff_t retval; struct inode *inode = file_inode(file); +#ifdef CONFIG_FUSE_BPF + struct fuse_err_ret fer; + + fer = fuse_bpf_backing(inode, struct fuse_lseek_io, + fuse_lseek_initialize, + fuse_lseek_backing, + fuse_lseek_finalize, + file, offset, whence); + if (fer.ret) + return PTR_ERR(fer.result); +#endif switch (whence) { case SEEK_SET: @@ -3249,7 +3370,7 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter) static int fuse_writeback_range(struct inode *inode, loff_t start, loff_t end) { - int err = filemap_write_and_wait_range(inode->i_mapping, start, -1); + int err = filemap_write_and_wait_range(inode->i_mapping, start, LLONG_MAX); if (!err) fuse_sync_writes(inode); @@ -3277,6 +3398,18 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, bool block_faults = FUSE_IS_DAX(inode) && lock_inode; +#ifdef CONFIG_FUSE_BPF + struct fuse_err_ret fer; + + fer = fuse_bpf_backing(inode, struct fuse_fallocate_in, + fuse_file_fallocate_initialize, + fuse_file_fallocate_backing, + fuse_file_fallocate_finalize, + file, mode, offset, length); + if (fer.ret) + return PTR_ERR(fer.result); +#endif + if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) return -EOPNOTSUPP; @@ -3347,6 +3480,8 @@ out: if (lock_inode) inode_unlock(inode); + fuse_flush_time_update(inode); + return err; } @@ -3378,6 +3513,18 @@ static ssize_t __fuse_copy_file_range(struct file *file_in, loff_t pos_in, bool is_unstable = (!fc->writeback_cache) && ((pos_out + len) > inode_out->i_size); +#ifdef CONFIG_FUSE_BPF + struct fuse_err_ret fer; + + fer = fuse_bpf_backing(file_in->f_inode, struct fuse_copy_file_range_io, + fuse_copy_file_range_initialize, + fuse_copy_file_range_backing, + fuse_copy_file_range_finalize, + file_in, pos_in, file_out, pos_out, len, flags); + if (fer.ret) + return PTR_ERR(fer.result); +#endif + if (fc->no_copy_file_range) return -EOPNOTSUPP; @@ -3456,6 +3603,8 @@ out: inode_unlock(inode_out); file_accessed(file_in); + fuse_flush_time_update(inode_out); + return err; } diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 63225ed36f7c3b6594c593313ef4a2465a96a078..00e0c53242e760dca2a31b01fd3334d8e091c4d5 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -13,6 +13,8 @@ # define pr_fmt(fmt) "fuse: " fmt #endif +#include +#include #include #include #include @@ -31,6 +33,9 @@ #include #include #include +#include + +#define FUSE_SUPER_MAGIC 0x65735546 /** Default max number of pages that can be used in a single read request */ #define FUSE_DEFAULT_MAX_PAGES_PER_REQ 32 @@ -63,11 +68,57 @@ struct fuse_forget_link { struct fuse_forget_link *next; }; +/** FUSE specific dentry data */ +#if BITS_PER_LONG < 64 || defined(CONFIG_FUSE_BPF) +struct fuse_dentry { + union { + u64 time; + struct rcu_head rcu; + }; + struct path backing_path; +}; + +static inline struct fuse_dentry *get_fuse_dentry(const struct dentry *entry) +{ + return entry->d_fsdata; +} +#endif + +#ifdef CONFIG_FUSE_BPF +static inline void get_fuse_backing_path(const struct dentry *d, + struct path *path) +{ + struct fuse_dentry *di = get_fuse_dentry(d); + + if (!di) { + *path = (struct path) {}; + return; + } + + *path = di->backing_path; + path_get(path); +} +#endif + /** FUSE inode */ struct fuse_inode { /** Inode data */ struct inode inode; +#ifdef CONFIG_FUSE_BPF + /** + * Backing inode, if this inode is from a backing file system. + * If this is set, nodeid is 0. + */ + struct inode *backing_inode; + + /** + * bpf_prog, run on all operations to determine whether to pass through + * or handle in place + */ + struct bpf_prog *bpf; +#endif + /** Unique ID, which identifies the inode between userspace * and kernel */ u64 nodeid; @@ -239,6 +290,14 @@ struct fuse_file { /** Container for data related to the passthrough functionality */ struct fuse_passthrough passthrough; +#ifdef CONFIG_FUSE_BPF + /** + * TODO: Reconcile with passthrough file + * backing file when in bpf mode + */ + struct file *backing_file; +#endif + /** RB node to be linked on fuse_conn->polled_files */ struct rb_node polled_node; @@ -270,19 +329,21 @@ struct fuse_page_desc { struct fuse_args { uint64_t nodeid; uint32_t opcode; + uint32_t error_in; unsigned short in_numargs; unsigned short out_numargs; - bool force:1; - bool noreply:1; - bool nocreds:1; - bool in_pages:1; - bool out_pages:1; - bool out_argvar:1; - bool page_zeroing:1; - bool page_replace:1; - bool may_block:1; - struct fuse_in_arg in_args[3]; - struct fuse_arg out_args[2]; + int force:1; + int noreply:1; + int nocreds:1; + int in_pages:1; + int out_pages:1; + int user_pages:1; + int out_argvar:1; + int page_zeroing:1; + int page_replace:1; + int may_block:1; + struct fuse_in_arg in_args[FUSE_MAX_IN_ARGS]; + struct fuse_arg out_args[FUSE_MAX_OUT_ARGS]; void (*end)(struct fuse_mount *fm, struct fuse_args *args, int error); /* Path used for completing d_canonical_path */ @@ -411,19 +472,19 @@ struct fuse_iqueue_ops { /** * Signal that a forget has been queued */ - void (*wake_forget_and_unlock)(struct fuse_iqueue *fiq) + void (*wake_forget_and_unlock)(struct fuse_iqueue *fiq, bool sync) __releases(fiq->lock); /** * Signal that an INTERRUPT request has been queued */ - void (*wake_interrupt_and_unlock)(struct fuse_iqueue *fiq) + void (*wake_interrupt_and_unlock)(struct fuse_iqueue *fiq, bool sync) __releases(fiq->lock); /** * Signal that a request has been queued */ - void (*wake_pending_and_unlock)(struct fuse_iqueue *fiq) + void (*wake_pending_and_unlock)(struct fuse_iqueue *fiq, bool sync) __releases(fiq->lock); /** @@ -519,9 +580,12 @@ struct fuse_fs_context { bool no_force_umount:1; bool legacy_opts_show:1; bool dax:1; + bool no_daemon:1; unsigned int max_read; unsigned int blksize; const char *subtype; + struct bpf_prog *root_bpf; + struct file *root_dir; /* DAX device, may be NULL */ struct dax_device *dax_dev; @@ -765,6 +829,9 @@ struct fuse_conn { /** Passthrough mode for read/write IO */ unsigned int passthrough:1; + /** BPF Only, no Daemon running */ + unsigned int no_daemon:1; + /** The number of requests waiting for completion */ atomic_t num_waiting; @@ -911,12 +978,15 @@ extern const struct dentry_operations fuse_root_dentry_operations; /** * Get a filled in inode */ +struct inode *fuse_iget_backing(struct super_block *sb, + struct inode *backing_inode); struct inode *fuse_iget(struct super_block *sb, u64 nodeid, int generation, struct fuse_attr *attr, u64 attr_valid, u64 attr_version); int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name, - struct fuse_entry_out *outarg, struct inode **inode); + struct fuse_entry_out *outarg, + struct dentry *entry, struct inode **inode); /** * Send FORGET command @@ -953,7 +1023,6 @@ struct fuse_io_args { void fuse_read_args_fill(struct fuse_io_args *ia, struct file *file, loff_t pos, size_t count, int opcode); - /** * Send OPEN or OPENDIR request */ @@ -1022,7 +1091,7 @@ int fuse_dev_init(void); void fuse_dev_cleanup(void); int fuse_ctl_init(void); -void __exit fuse_ctl_cleanup(void); +void fuse_ctl_cleanup(void); /** * Simple request sending that does request allocation and freeing @@ -1050,6 +1119,7 @@ void fuse_invalidate_entry_cache(struct dentry *entry); void fuse_invalidate_atime(struct inode *inode); u64 entry_attr_timeout(struct fuse_entry_out *o); +void fuse_init_dentry_root(struct dentry *root, struct file *backing_dir); void fuse_change_entry_timeout(struct dentry *entry, struct fuse_entry_out *o); /** @@ -1137,6 +1207,7 @@ int fuse_allow_current_process(struct fuse_conn *fc); u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id); +void fuse_flush_time_update(struct inode *inode); void fuse_update_ctime(struct inode *inode); int fuse_update_attributes(struct inode *inode, struct file *file); @@ -1256,6 +1327,7 @@ bool fuse_dax_check_alignment(struct fuse_conn *fc, unsigned int map_alignment); void fuse_dax_cancel_work(struct fuse_conn *fc); /* passthrough.c */ +void fuse_copyattr(struct file *dst_file, struct file *src_file); int fuse_passthrough_open(struct fuse_dev *fud, u32 lower_fd); int fuse_passthrough_setup(struct fuse_conn *fc, struct fuse_file *ff, struct fuse_open_out *openarg); @@ -1264,4 +1336,636 @@ ssize_t fuse_passthrough_read_iter(struct kiocb *iocb, struct iov_iter *to); ssize_t fuse_passthrough_write_iter(struct kiocb *iocb, struct iov_iter *from); ssize_t fuse_passthrough_mmap(struct file *file, struct vm_area_struct *vma); +/* backing.c */ + +struct bpf_prog *fuse_get_bpf_prog(struct file *file); + +/* + * Dummy io passed to fuse_bpf_backing when io operation needs no scratch space + */ +struct fuse_dummy_io { + int unused; +}; + +struct fuse_open_io { + struct fuse_open_in foi; + struct fuse_open_out foo; +}; + +int fuse_open_initialize(struct fuse_bpf_args *fa, struct fuse_open_io *foi, + struct inode *inode, struct file *file, bool isdir); +int fuse_open_backing(struct fuse_bpf_args *fa, + struct inode *inode, struct file *file, bool isdir); +void *fuse_open_finalize(struct fuse_bpf_args *fa, + struct inode *inode, struct file *file, bool isdir); + +struct fuse_create_open_io { + struct fuse_create_in fci; + struct fuse_entry_out feo; + struct fuse_open_out foo; +}; + +int fuse_create_open_initialize( + struct fuse_bpf_args *fa, struct fuse_create_open_io *fcoi, + struct inode *dir, struct dentry *entry, + struct file *file, unsigned int flags, umode_t mode); +int fuse_create_open_backing( + struct fuse_bpf_args *fa, + struct inode *dir, struct dentry *entry, + struct file *file, unsigned int flags, umode_t mode); +void *fuse_create_open_finalize( + struct fuse_bpf_args *fa, + struct inode *dir, struct dentry *entry, + struct file *file, unsigned int flags, umode_t mode); + +int fuse_mknod_initialize( + struct fuse_bpf_args *fa, struct fuse_mknod_in *fmi, + struct inode *dir, struct dentry *entry, umode_t mode, dev_t rdev); +int fuse_mknod_backing( + struct fuse_bpf_args *fa, + struct inode *dir, struct dentry *entry, umode_t mode, dev_t rdev); +void *fuse_mknod_finalize( + struct fuse_bpf_args *fa, + struct inode *dir, struct dentry *entry, umode_t mode, dev_t rdev); + +int fuse_mkdir_initialize( + struct fuse_bpf_args *fa, struct fuse_mkdir_in *fmi, + struct inode *dir, struct dentry *entry, umode_t mode); +int fuse_mkdir_backing( + struct fuse_bpf_args *fa, + struct inode *dir, struct dentry *entry, umode_t mode); +void *fuse_mkdir_finalize( + struct fuse_bpf_args *fa, + struct inode *dir, struct dentry *entry, umode_t mode); + +int fuse_rmdir_initialize( + struct fuse_bpf_args *fa, struct fuse_dummy_io *fmi, + struct inode *dir, struct dentry *entry); +int fuse_rmdir_backing( + struct fuse_bpf_args *fa, + struct inode *dir, struct dentry *entry); +void *fuse_rmdir_finalize( + struct fuse_bpf_args *fa, + struct inode *dir, struct dentry *entry); + +int fuse_rename2_initialize(struct fuse_bpf_args *fa, struct fuse_rename2_in *fri, + struct inode *olddir, struct dentry *oldent, + struct inode *newdir, struct dentry *newent, + unsigned int flags); +int fuse_rename2_backing(struct fuse_bpf_args *fa, + struct inode *olddir, struct dentry *oldent, + struct inode *newdir, struct dentry *newent, + unsigned int flags); +void *fuse_rename2_finalize(struct fuse_bpf_args *fa, + struct inode *olddir, struct dentry *oldent, + struct inode *newdir, struct dentry *newent, + unsigned int flags); + +int fuse_rename_initialize(struct fuse_bpf_args *fa, struct fuse_rename_in *fri, + struct inode *olddir, struct dentry *oldent, + struct inode *newdir, struct dentry *newent); +int fuse_rename_backing(struct fuse_bpf_args *fa, + struct inode *olddir, struct dentry *oldent, + struct inode *newdir, struct dentry *newent); +void *fuse_rename_finalize(struct fuse_bpf_args *fa, + struct inode *olddir, struct dentry *oldent, + struct inode *newdir, struct dentry *newent); + +int fuse_unlink_initialize( + struct fuse_bpf_args *fa, struct fuse_dummy_io *fmi, + struct inode *dir, struct dentry *entry); +int fuse_unlink_backing( + struct fuse_bpf_args *fa, + struct inode *dir, struct dentry *entry); +void *fuse_unlink_finalize( + struct fuse_bpf_args *fa, + struct inode *dir, struct dentry *entry); + +int fuse_link_initialize(struct fuse_bpf_args *fa, struct fuse_link_in *fli, + struct dentry *entry, struct inode *dir, + struct dentry *newent); +int fuse_link_backing(struct fuse_bpf_args *fa, struct dentry *entry, + struct inode *dir, struct dentry *newent); +void *fuse_link_finalize(struct fuse_bpf_args *fa, struct dentry *entry, + struct inode *dir, struct dentry *newent); + +int fuse_release_initialize(struct fuse_bpf_args *fa, struct fuse_release_in *fri, + struct inode *inode, struct file *file); +int fuse_releasedir_initialize(struct fuse_bpf_args *fa, + struct fuse_release_in *fri, + struct inode *inode, struct file *file); +int fuse_release_backing(struct fuse_bpf_args *fa, + struct inode *inode, struct file *file); +void *fuse_release_finalize(struct fuse_bpf_args *fa, + struct inode *inode, struct file *file); + +int fuse_flush_initialize(struct fuse_bpf_args *fa, struct fuse_flush_in *ffi, + struct file *file, fl_owner_t id); +int fuse_flush_backing(struct fuse_bpf_args *fa, struct file *file, fl_owner_t id); +void *fuse_flush_finalize(struct fuse_bpf_args *fa, + struct file *file, fl_owner_t id); + +struct fuse_lseek_io { + struct fuse_lseek_in fli; + struct fuse_lseek_out flo; +}; + +int fuse_lseek_initialize(struct fuse_bpf_args *fa, struct fuse_lseek_io *fli, + struct file *file, loff_t offset, int whence); +int fuse_lseek_backing(struct fuse_bpf_args *fa, struct file *file, loff_t offset, int whence); +void *fuse_lseek_finalize(struct fuse_bpf_args *fa, struct file *file, loff_t offset, int whence); + +struct fuse_copy_file_range_io { + struct fuse_copy_file_range_in fci; + struct fuse_write_out fwo; +}; + +int fuse_copy_file_range_initialize(struct fuse_bpf_args *fa, + struct fuse_copy_file_range_io *fcf, + struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + size_t len, unsigned int flags); +int fuse_copy_file_range_backing(struct fuse_bpf_args *fa, + struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + size_t len, unsigned int flags); +void *fuse_copy_file_range_finalize(struct fuse_bpf_args *fa, + struct file *file_in, loff_t pos_in, + struct file *file_out, loff_t pos_out, + size_t len, unsigned int flags); + +int fuse_fsync_initialize(struct fuse_bpf_args *fa, struct fuse_fsync_in *ffi, + struct file *file, loff_t start, loff_t end, int datasync); +int fuse_fsync_backing(struct fuse_bpf_args *fa, + struct file *file, loff_t start, loff_t end, int datasync); +void *fuse_fsync_finalize(struct fuse_bpf_args *fa, + struct file *file, loff_t start, loff_t end, int datasync); +int fuse_dir_fsync_initialize(struct fuse_bpf_args *fa, struct fuse_fsync_in *ffi, + struct file *file, loff_t start, loff_t end, int datasync); + +struct fuse_getxattr_io { + struct fuse_getxattr_in fgi; + struct fuse_getxattr_out fgo; +}; + +int fuse_getxattr_initialize( + struct fuse_bpf_args *fa, struct fuse_getxattr_io *fgio, + struct dentry *dentry, const char *name, void *value, + size_t size); +int fuse_getxattr_backing( + struct fuse_bpf_args *fa, + struct dentry *dentry, const char *name, void *value, + size_t size); +void *fuse_getxattr_finalize( + struct fuse_bpf_args *fa, + struct dentry *dentry, const char *name, void *value, + size_t size); + +int fuse_listxattr_initialize(struct fuse_bpf_args *fa, + struct fuse_getxattr_io *fgio, + struct dentry *dentry, char *list, size_t size); +int fuse_listxattr_backing(struct fuse_bpf_args *fa, struct dentry *dentry, + char *list, size_t size); +void *fuse_listxattr_finalize(struct fuse_bpf_args *fa, struct dentry *dentry, + char *list, size_t size); + +int fuse_setxattr_initialize(struct fuse_bpf_args *fa, + struct fuse_setxattr_in *fsxi, + struct dentry *dentry, const char *name, + const void *value, size_t size, int flags); +int fuse_setxattr_backing(struct fuse_bpf_args *fa, struct dentry *dentry, + const char *name, const void *value, size_t size, + int flags); +void *fuse_setxattr_finalize(struct fuse_bpf_args *fa, struct dentry *dentry, + const char *name, const void *value, size_t size, + int flags); + +int fuse_removexattr_initialize(struct fuse_bpf_args *fa, + struct fuse_dummy_io *unused, + struct dentry *dentry, const char *name); +int fuse_removexattr_backing(struct fuse_bpf_args *fa, + struct dentry *dentry, const char *name); +void *fuse_removexattr_finalize(struct fuse_bpf_args *fa, + struct dentry *dentry, const char *name); + +struct fuse_read_iter_out { + uint64_t ret; +}; +struct fuse_file_read_iter_io { + struct fuse_read_in fri; + struct fuse_read_iter_out frio; +}; + +int fuse_file_read_iter_initialize( + struct fuse_bpf_args *fa, struct fuse_file_read_iter_io *fri, + struct kiocb *iocb, struct iov_iter *to); +int fuse_file_read_iter_backing(struct fuse_bpf_args *fa, + struct kiocb *iocb, struct iov_iter *to); +void *fuse_file_read_iter_finalize(struct fuse_bpf_args *fa, + struct kiocb *iocb, struct iov_iter *to); + +struct fuse_write_iter_out { + uint64_t ret; +}; +struct fuse_file_write_iter_io { + struct fuse_write_in fwi; + struct fuse_write_out fwo; + struct fuse_write_iter_out fwio; +}; + +int fuse_file_write_iter_initialize( + struct fuse_bpf_args *fa, struct fuse_file_write_iter_io *fwio, + struct kiocb *iocb, struct iov_iter *from); +int fuse_file_write_iter_backing(struct fuse_bpf_args *fa, + struct kiocb *iocb, struct iov_iter *from); +void *fuse_file_write_iter_finalize(struct fuse_bpf_args *fa, + struct kiocb *iocb, struct iov_iter *from); + +ssize_t fuse_backing_mmap(struct file *file, struct vm_area_struct *vma); + +int fuse_file_fallocate_initialize(struct fuse_bpf_args *fa, + struct fuse_fallocate_in *ffi, + struct file *file, int mode, loff_t offset, loff_t length); +int fuse_file_fallocate_backing(struct fuse_bpf_args *fa, + struct file *file, int mode, loff_t offset, loff_t length); +void *fuse_file_fallocate_finalize(struct fuse_bpf_args *fa, + struct file *file, int mode, loff_t offset, loff_t length); + +struct fuse_lookup_io { + struct fuse_entry_out feo; + struct fuse_entry_bpf feb; +}; + +int fuse_lookup_initialize(struct fuse_bpf_args *fa, struct fuse_lookup_io *feo, + struct inode *dir, struct dentry *entry, unsigned int flags); +int fuse_lookup_backing(struct fuse_bpf_args *fa, struct inode *dir, + struct dentry *entry, unsigned int flags); +struct dentry *fuse_lookup_finalize(struct fuse_bpf_args *fa, struct inode *dir, + struct dentry *entry, unsigned int flags); +int fuse_revalidate_backing(struct fuse_bpf_args *fa, struct inode *dir, + struct dentry *entry, unsigned int flags); +void *fuse_revalidate_finalize(struct fuse_bpf_args *fa, struct inode *dir, + struct dentry *entry, unsigned int flags); + +int fuse_canonical_path_initialize(struct fuse_bpf_args *fa, + struct fuse_dummy_io *fdi, + const struct path *path, + struct path *canonical_path); +int fuse_canonical_path_backing(struct fuse_bpf_args *fa, const struct path *path, + struct path *canonical_path); +void *fuse_canonical_path_finalize(struct fuse_bpf_args *fa, + const struct path *path, + struct path *canonical_path); + +struct fuse_getattr_io { + struct fuse_getattr_in fgi; + struct fuse_attr_out fao; +}; +int fuse_getattr_initialize(struct fuse_bpf_args *fa, struct fuse_getattr_io *fgio, + const struct dentry *entry, struct kstat *stat, + u32 request_mask, unsigned int flags); +int fuse_getattr_backing(struct fuse_bpf_args *fa, + const struct dentry *entry, struct kstat *stat, + u32 request_mask, unsigned int flags); +void *fuse_getattr_finalize(struct fuse_bpf_args *fa, + const struct dentry *entry, struct kstat *stat, + u32 request_mask, unsigned int flags); + +struct fuse_setattr_io { + struct fuse_setattr_in fsi; + struct fuse_attr_out fao; +}; + +int fuse_setattr_initialize(struct fuse_bpf_args *fa, struct fuse_setattr_io *fsi, + struct dentry *dentry, struct iattr *attr, struct file *file); +int fuse_setattr_backing(struct fuse_bpf_args *fa, + struct dentry *dentry, struct iattr *attr, struct file *file); +void *fuse_setattr_finalize(struct fuse_bpf_args *fa, + struct dentry *dentry, struct iattr *attr, struct file *file); + +int fuse_statfs_initialize(struct fuse_bpf_args *fa, struct fuse_statfs_out *fso, + struct dentry *dentry, struct kstatfs *buf); +int fuse_statfs_backing(struct fuse_bpf_args *fa, + struct dentry *dentry, struct kstatfs *buf); +void *fuse_statfs_finalize(struct fuse_bpf_args *fa, + struct dentry *dentry, struct kstatfs *buf); + +int fuse_get_link_initialize(struct fuse_bpf_args *fa, struct fuse_dummy_io *dummy, + struct inode *inode, struct dentry *dentry, + struct delayed_call *callback, const char **out); +int fuse_get_link_backing(struct fuse_bpf_args *fa, + struct inode *inode, struct dentry *dentry, + struct delayed_call *callback, const char **out); +void *fuse_get_link_finalize(struct fuse_bpf_args *fa, + struct inode *inode, struct dentry *dentry, + struct delayed_call *callback, const char **out); + +int fuse_symlink_initialize( + struct fuse_bpf_args *fa, struct fuse_dummy_io *unused, + struct inode *dir, struct dentry *entry, const char *link, int len); +int fuse_symlink_backing( + struct fuse_bpf_args *fa, + struct inode *dir, struct dentry *entry, const char *link, int len); +void *fuse_symlink_finalize( + struct fuse_bpf_args *fa, + struct inode *dir, struct dentry *entry, const char *link, int len); + +struct fuse_read_io { + struct fuse_read_in fri; + struct fuse_read_out fro; +}; + +int fuse_readdir_initialize(struct fuse_bpf_args *fa, struct fuse_read_io *frio, + struct file *file, struct dir_context *ctx, + bool *force_again, bool *allow_force, bool is_continued); +int fuse_readdir_backing(struct fuse_bpf_args *fa, + struct file *file, struct dir_context *ctx, + bool *force_again, bool *allow_force, bool is_continued); +void *fuse_readdir_finalize(struct fuse_bpf_args *fa, + struct file *file, struct dir_context *ctx, + bool *force_again, bool *allow_force, bool is_continued); + +int fuse_access_initialize(struct fuse_bpf_args *fa, struct fuse_access_in *fai, + struct inode *inode, int mask); +int fuse_access_backing(struct fuse_bpf_args *fa, struct inode *inode, int mask); +void *fuse_access_finalize(struct fuse_bpf_args *fa, struct inode *inode, int mask); + +/* + * FUSE caches dentries and attributes with separate timeout. The + * time in jiffies until the dentry/attributes are valid is stored in + * dentry->d_fsdata and fuse_inode->i_time respectively. + */ + +/* + * Calculate the time in jiffies until a dentry/attributes are valid + */ +static inline u64 time_to_jiffies(u64 sec, u32 nsec) +{ + if (sec || nsec) { + struct timespec64 ts = { + sec, + min_t(u32, nsec, NSEC_PER_SEC - 1) + }; + + return get_jiffies_64() + timespec64_to_jiffies(&ts); + } else + return 0; +} + +static inline u64 attr_timeout(struct fuse_attr_out *o) +{ + return time_to_jiffies(o->attr_valid, o->attr_valid_nsec); +} + +static inline bool update_mtime(unsigned ivalid, bool trust_local_mtime) +{ + /* Always update if mtime is explicitly set */ + if (ivalid & ATTR_MTIME_SET) + return true; + + /* Or if kernel i_mtime is the official one */ + if (trust_local_mtime) + return true; + + /* If it's an open(O_TRUNC) or an ftruncate(), don't update */ + if ((ivalid & ATTR_SIZE) && (ivalid & (ATTR_OPEN | ATTR_FILE))) + return false; + + /* In all other cases update */ + return true; +} + +void fuse_fillattr(struct inode *inode, struct fuse_attr *attr, + struct kstat *stat); + +static inline void iattr_to_fattr(struct fuse_conn *fc, struct iattr *iattr, + struct fuse_setattr_in *arg, bool trust_local_cmtime) +{ + unsigned ivalid = iattr->ia_valid; + + if (ivalid & ATTR_MODE) + arg->valid |= FATTR_MODE, arg->mode = iattr->ia_mode; + if (ivalid & ATTR_UID) + arg->valid |= FATTR_UID, arg->uid = from_kuid(fc->user_ns, iattr->ia_uid); + if (ivalid & ATTR_GID) + arg->valid |= FATTR_GID, arg->gid = from_kgid(fc->user_ns, iattr->ia_gid); + if (ivalid & ATTR_SIZE) + arg->valid |= FATTR_SIZE, arg->size = iattr->ia_size; + if (ivalid & ATTR_ATIME) { + arg->valid |= FATTR_ATIME; + arg->atime = iattr->ia_atime.tv_sec; + arg->atimensec = iattr->ia_atime.tv_nsec; + if (!(ivalid & ATTR_ATIME_SET)) + arg->valid |= FATTR_ATIME_NOW; + } + if ((ivalid & ATTR_MTIME) && update_mtime(ivalid, trust_local_cmtime)) { + arg->valid |= FATTR_MTIME; + arg->mtime = iattr->ia_mtime.tv_sec; + arg->mtimensec = iattr->ia_mtime.tv_nsec; + if (!(ivalid & ATTR_MTIME_SET) && !trust_local_cmtime) + arg->valid |= FATTR_MTIME_NOW; + } + if ((ivalid & ATTR_CTIME) && trust_local_cmtime) { + arg->valid |= FATTR_CTIME; + arg->ctime = iattr->ia_ctime.tv_sec; + arg->ctimensec = iattr->ia_ctime.tv_nsec; + } +} + +static inline int finalize_attr(struct inode *inode, struct fuse_attr_out *outarg, + u64 attr_version, struct kstat *stat) +{ + int err = 0; + + if (fuse_invalid_attr(&outarg->attr) || + ((inode->i_mode ^ outarg->attr.mode) & S_IFMT)) { + fuse_make_bad(inode); + err = -EIO; + } else { + fuse_change_attributes(inode, &outarg->attr, + attr_timeout(outarg), + attr_version); + if (stat) + fuse_fillattr(inode, &outarg->attr, stat); + } + return err; +} + +static inline void convert_statfs_to_fuse(struct fuse_kstatfs *attr, struct kstatfs *stbuf) +{ + attr->bsize = stbuf->f_bsize; + attr->frsize = stbuf->f_frsize; + attr->blocks = stbuf->f_blocks; + attr->bfree = stbuf->f_bfree; + attr->bavail = stbuf->f_bavail; + attr->files = stbuf->f_files; + attr->ffree = stbuf->f_ffree; + attr->namelen = stbuf->f_namelen; + /* fsid is left zero */ +} + +static inline void convert_fuse_statfs(struct kstatfs *stbuf, struct fuse_kstatfs *attr) +{ + stbuf->f_type = FUSE_SUPER_MAGIC; + stbuf->f_bsize = attr->bsize; + stbuf->f_frsize = attr->frsize; + stbuf->f_blocks = attr->blocks; + stbuf->f_bfree = attr->bfree; + stbuf->f_bavail = attr->bavail; + stbuf->f_files = attr->files; + stbuf->f_ffree = attr->ffree; + stbuf->f_namelen = attr->namelen; + /* fsid is left zero */ +} + +#ifdef CONFIG_FUSE_BPF +struct fuse_err_ret { + void *result; + bool ret; +}; + +int __init fuse_bpf_init(void); +void __exit fuse_bpf_cleanup(void); + +ssize_t fuse_bpf_simple_request(struct fuse_mount *fm, struct fuse_bpf_args *args); + +/* + * expression statement to wrap the backing filter logic + * struct inode *inode: inode with bpf and backing inode + * typedef io: (typically complex) type whose components fuse_args can point to. + * An instance of this type is created locally and passed to initialize + * void initialize(struct fuse_bpf_args *fa, io *in_out, args...): function that sets + * up fa and io based on args + * int backing(struct fuse_bpf_args *fa, args...): function that actually performs + * the backing io operation + * void *finalize(struct fuse_bpf_args *, args...): function that performs any final + * work needed to commit the backing io + */ +#define fuse_bpf_backing(inode, io, initialize, backing, finalize, \ + args...) \ +({ \ + struct fuse_err_ret fer = {0}; \ + int ext_flags; \ + struct fuse_inode *fuse_inode = get_fuse_inode(inode); \ + struct fuse_mount *fm = get_fuse_mount(inode); \ + io feo = {0}; \ + struct fuse_bpf_args fa = {0}, fa_backup = {0}; \ + bool locked; \ + ssize_t res; \ + void *err; \ + int i; \ + bool initialized = false; \ + \ + do { \ + if (!fuse_inode || !fuse_inode->backing_inode) \ + break; \ + \ + err = ERR_PTR(initialize(&fa, &feo, args)); \ + if (err) { \ + fer = (struct fuse_err_ret) { \ + err, \ + true, \ + }; \ + break; \ + } \ + initialized = true; \ + \ + fa_backup = fa; \ + fa.opcode |= FUSE_PREFILTER; \ + for (i = 0; i < fa.in_numargs; ++i) \ + fa.out_args[i] = (struct fuse_bpf_arg) { \ + .size = fa.in_args[i].size, \ + .value = (void *)fa.in_args[i].value, \ + }; \ + fa.out_numargs = fa.in_numargs; \ + \ + ext_flags = fuse_inode->bpf ? \ + BPF_PROG_RUN(fuse_inode->bpf, &fa) : \ + FUSE_BPF_BACKING; \ + if (ext_flags < 0) { \ + fer = (struct fuse_err_ret) { \ + ERR_PTR(ext_flags), \ + true, \ + }; \ + break; \ + } \ + \ + if (ext_flags & FUSE_BPF_USER_FILTER) { \ + locked = fuse_lock_inode(inode); \ + res = fuse_bpf_simple_request(fm, &fa); \ + fuse_unlock_inode(inode, locked); \ + if (res < 0) { \ + fer = (struct fuse_err_ret) { \ + ERR_PTR(res), \ + true, \ + }; \ + break; \ + } \ + } \ + \ + if (!(ext_flags & FUSE_BPF_BACKING)) \ + break; \ + \ + fa.opcode &= ~FUSE_PREFILTER; \ + for (i = 0; i < fa.in_numargs; ++i) \ + fa.in_args[i] = (struct fuse_bpf_in_arg) { \ + .size = fa.out_args[i].size, \ + .value = fa.out_args[i].value, \ + }; \ + for (i = 0; i < fa_backup.out_numargs; ++i) \ + fa.out_args[i] = (struct fuse_bpf_arg) { \ + .size = fa_backup.out_args[i].size, \ + .value = fa_backup.out_args[i].value, \ + }; \ + fa.out_numargs = fa_backup.out_numargs; \ + \ + fer = (struct fuse_err_ret) { \ + ERR_PTR(backing(&fa, args)), \ + true, \ + }; \ + if (IS_ERR(fer.result)) \ + fa.error_in = PTR_ERR(fer.result); \ + if (!(ext_flags & FUSE_BPF_POST_FILTER)) \ + break; \ + \ + fa.opcode |= FUSE_POSTFILTER; \ + for (i = 0; i < fa.out_numargs; ++i) \ + fa.in_args[fa.in_numargs++] = \ + (struct fuse_bpf_in_arg) { \ + .size = fa.out_args[i].size, \ + .value = fa.out_args[i].value, \ + }; \ + ext_flags = BPF_PROG_RUN(fuse_inode->bpf, &fa); \ + if (ext_flags < 0) { \ + fer = (struct fuse_err_ret) { \ + ERR_PTR(ext_flags), \ + true, \ + }; \ + break; \ + } \ + if (!(ext_flags & FUSE_BPF_USER_FILTER)) \ + break; \ + \ + fa.out_args[0].size = fa_backup.out_args[0].size; \ + fa.out_args[1].size = fa_backup.out_args[1].size; \ + fa.out_numargs = fa_backup.out_numargs; \ + locked = fuse_lock_inode(inode); \ + res = fuse_bpf_simple_request(fm, &fa); \ + fuse_unlock_inode(inode, locked); \ + if (res < 0) { \ + fer.result = ERR_PTR(res); \ + break; \ + } \ + } while (false); \ + \ + if (initialized && fer.ret) { \ + err = finalize(&fa, args); \ + if (err) \ + fer.result = err; \ + } \ + \ + fer; \ +}) +#endif /* CONFIG_FUSE_BPF */ + #endif /* _FS_FUSE_I_H */ diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index b740088b1abb282a9e93196c2d3d0e9771494933..fa692f9e4ef344d723971a9c00a277c160e7837e 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -27,6 +27,7 @@ MODULE_AUTHOR("Miklos Szeredi "); MODULE_DESCRIPTION("Filesystem in Userspace"); MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(ANDROID_GKI_VFS_EXPORT_ONLY); static struct kmem_cache *fuse_inode_cachep; struct list_head fuse_conn_list; @@ -50,8 +51,6 @@ MODULE_PARM_DESC(max_user_congthresh, "Global limit for the maximum congestion threshold an " "unprivileged user can set"); -#define FUSE_SUPER_MAGIC 0x65735546 - #define FUSE_DEFAULT_BLKSIZE 512 /** Maximum number of outstanding background requests */ @@ -79,6 +78,10 @@ static struct inode *fuse_alloc_inode(struct super_block *sb) fi->i_time = 0; fi->inval_mask = 0; +#ifdef CONFIG_FUSE_BPF + fi->backing_inode = NULL; + fi->bpf = NULL; +#endif fi->nodeid = 0; fi->nlookup = 0; fi->attr_version = 0; @@ -119,6 +122,16 @@ static void fuse_evict_inode(struct inode *inode) { struct fuse_inode *fi = get_fuse_inode(inode); + /* Will write inode on close/munmap and in all other dirtiers */ + WARN_ON(inode->i_state & I_DIRTY_INODE); + +#ifdef CONFIG_FUSE_BPF + iput(fi->backing_inode); + if (fi->bpf) + bpf_prog_put(fi->bpf); + fi->bpf = NULL; +#endif + truncate_inode_pages_final(&inode->i_data); clear_inode(inode); if (inode->i_sb->s_flags & SB_ACTIVE) { @@ -161,6 +174,28 @@ static ino_t fuse_squash_ino(u64 ino64) return ino; } +static void fuse_fill_attr_from_inode(struct fuse_attr *attr, + const struct inode *inode) +{ + *attr = (struct fuse_attr){ + .ino = inode->i_ino, + .size = inode->i_size, + .blocks = inode->i_blocks, + .atime = inode->i_atime.tv_sec, + .mtime = inode->i_mtime.tv_sec, + .ctime = inode->i_ctime.tv_sec, + .atimensec = inode->i_atime.tv_nsec, + .mtimensec = inode->i_mtime.tv_nsec, + .ctimensec = inode->i_ctime.tv_nsec, + .mode = inode->i_mode, + .nlink = inode->i_nlink, + .uid = inode->i_uid.val, + .gid = inode->i_gid.val, + .rdev = inode->i_rdev, + .blksize = 1u << inode->i_blkbits, + }; +} + void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, u64 attr_valid) { @@ -279,28 +314,79 @@ static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr) else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) || S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { fuse_init_common(inode); - init_special_inode(inode, inode->i_mode, - new_decode_dev(attr->rdev)); + init_special_inode(inode, inode->i_mode, attr->rdev); } else BUG(); } +struct fuse_inode_identifier { + u64 nodeid; + struct inode *backing_inode; +}; + static int fuse_inode_eq(struct inode *inode, void *_nodeidp) { - u64 nodeid = *(u64 *) _nodeidp; - if (get_node_id(inode) == nodeid) - return 1; - else - return 0; + struct fuse_inode_identifier *fii = + (struct fuse_inode_identifier *) _nodeidp; + struct fuse_inode *fi = get_fuse_inode(inode); + + return fii->nodeid == fi->nodeid +#ifdef CONFIG_FUSE_BPF + && fii->backing_inode == fi->backing_inode +#endif + ; } static int fuse_inode_set(struct inode *inode, void *_nodeidp) { - u64 nodeid = *(u64 *) _nodeidp; - get_fuse_inode(inode)->nodeid = nodeid; + struct fuse_inode_identifier *fii = + (struct fuse_inode_identifier *) _nodeidp; + struct fuse_inode *fi = get_fuse_inode(inode); + + fi->nodeid = fii->nodeid; +#ifdef CONFIG_FUSE_BPF + fi->backing_inode = fii->backing_inode; + if (fi->backing_inode) + ihold(fi->backing_inode); +#endif + return 0; } +struct inode *fuse_iget_backing(struct super_block *sb, + struct inode *backing_inode) +{ + struct inode *inode; + struct fuse_inode *fi; + struct fuse_conn *fc = get_fuse_conn_super(sb); + struct fuse_inode_identifier fii = { + .backing_inode = backing_inode, + }; + struct fuse_attr attr; + + fuse_fill_attr_from_inode(&attr, backing_inode); + inode = iget5_locked(sb, (unsigned long) backing_inode, fuse_inode_eq, + fuse_inode_set, &fii); + if (!inode) + return NULL; + + if ((inode->i_state & I_NEW)) { + inode->i_flags |= S_NOATIME; + if (!fc->writeback_cache) + inode->i_flags |= S_NOCMTIME; + fuse_init_common(inode); + unlock_new_inode(inode); + } + + fi = get_fuse_inode(inode); + fuse_init_inode(inode, &attr); + spin_lock(&fi->lock); + fi->nlookup++; + spin_unlock(&fi->lock); + + return inode; +} + struct inode *fuse_iget(struct super_block *sb, u64 nodeid, int generation, struct fuse_attr *attr, u64 attr_valid, u64 attr_version) @@ -308,6 +394,9 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid, struct inode *inode; struct fuse_inode *fi; struct fuse_conn *fc = get_fuse_conn_super(sb); + struct fuse_inode_identifier fii = { + .nodeid = nodeid, + }; /* * Auto mount points get their node id from the submount root, which is @@ -329,7 +418,7 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid, } retry: - inode = iget5_locked(sb, nodeid, fuse_inode_eq, fuse_inode_set, &nodeid); + inode = iget5_locked(sb, nodeid, fuse_inode_eq, fuse_inode_set, &fii); if (!inode) return NULL; @@ -361,13 +450,16 @@ struct inode *fuse_ilookup(struct fuse_conn *fc, u64 nodeid, { struct fuse_mount *fm_iter; struct inode *inode; + struct fuse_inode_identifier fii = { + .nodeid = nodeid, + }; WARN_ON(!rwsem_is_locked(&fc->killsb)); list_for_each_entry(fm_iter, &fc->mounts, fc_entry) { if (!fm_iter->sb) continue; - inode = ilookup5(fm_iter->sb, nodeid, fuse_inode_eq, &nodeid); + inode = ilookup5(fm_iter->sb, nodeid, fuse_inode_eq, &fii); if (inode) { if (fm) *fm = fm_iter; @@ -455,20 +547,6 @@ static void fuse_put_super(struct super_block *sb) fuse_mount_put(fm); } -static void convert_fuse_statfs(struct kstatfs *stbuf, struct fuse_kstatfs *attr) -{ - stbuf->f_type = FUSE_SUPER_MAGIC; - stbuf->f_bsize = attr->bsize; - stbuf->f_frsize = attr->frsize; - stbuf->f_blocks = attr->blocks; - stbuf->f_bfree = attr->bfree; - stbuf->f_bavail = attr->bavail; - stbuf->f_files = attr->files; - stbuf->f_ffree = attr->ffree; - stbuf->f_namelen = attr->namelen; - /* fsid is left zero */ -} - static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf) { struct super_block *sb = dentry->d_sb; @@ -476,12 +554,24 @@ static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf) FUSE_ARGS(args); struct fuse_statfs_out outarg; int err; +#ifdef CONFIG_FUSE_BPF + struct fuse_err_ret fer; +#endif if (!fuse_allow_current_process(fm->fc)) { buf->f_type = FUSE_SUPER_MAGIC; return 0; } +#ifdef CONFIG_FUSE_BPF + fer = fuse_bpf_backing(dentry->d_inode, struct fuse_statfs_out, + fuse_statfs_initialize, fuse_statfs_backing, + fuse_statfs_finalize, + dentry, buf); + if (fer.ret) + return PTR_ERR(fer.result); +#endif + memset(&outarg, 0, sizeof(outarg)); args.in_numargs = 0; args.opcode = FUSE_STATFS; @@ -506,6 +596,9 @@ enum { OPT_ALLOW_OTHER, OPT_MAX_READ, OPT_BLKSIZE, + OPT_ROOT_BPF, + OPT_ROOT_DIR, + OPT_NO_DAEMON, OPT_ERR }; @@ -520,6 +613,9 @@ static const struct fs_parameter_spec fuse_fs_parameters[] = { fsparam_u32 ("max_read", OPT_MAX_READ), fsparam_u32 ("blksize", OPT_BLKSIZE), fsparam_string ("subtype", OPT_SUBTYPE), + fsparam_u32 ("root_bpf", OPT_ROOT_BPF), + fsparam_u32 ("root_dir", OPT_ROOT_DIR), + fsparam_flag ("no_daemon", OPT_NO_DAEMON), {} }; @@ -603,6 +699,26 @@ static int fuse_parse_param(struct fs_context *fc, struct fs_parameter *param) ctx->blksize = result.uint_32; break; + case OPT_ROOT_BPF: + ctx->root_bpf = bpf_prog_get_type_dev(result.uint_32, + BPF_PROG_TYPE_FUSE, false); + if (IS_ERR(ctx->root_bpf)) { + ctx->root_bpf = NULL; + return invalfc(fc, "Unable to open bpf program"); + } + break; + + case OPT_ROOT_DIR: + ctx->root_dir = fget(result.uint_32); + if (!ctx->root_dir) + return invalfc(fc, "Unable to open root directory"); + break; + + case OPT_NO_DAEMON: + ctx->no_daemon = true; + ctx->fd_present = true; + break; + default: return -EINVAL; } @@ -615,6 +731,10 @@ static void fuse_free_fc(struct fs_context *fc) struct fuse_fs_context *ctx = fc->fs_private; if (ctx) { + if (ctx->root_dir) + fput(ctx->root_dir); + if (ctx->root_bpf) + bpf_prog_put(ctx->root_bpf); kfree(ctx->subtype); kfree(ctx); } @@ -751,15 +871,34 @@ struct fuse_mount *fuse_mount_get(struct fuse_mount *fm) } EXPORT_SYMBOL_GPL(fuse_mount_get); -static struct inode *fuse_get_root_inode(struct super_block *sb, unsigned mode) +static struct inode *fuse_get_root_inode(struct super_block *sb, + unsigned int mode, + struct bpf_prog *root_bpf, + struct file *backing_fd) { struct fuse_attr attr; - memset(&attr, 0, sizeof(attr)); + struct inode *inode; + memset(&attr, 0, sizeof(attr)); attr.mode = mode; attr.ino = FUSE_ROOT_ID; attr.nlink = 1; - return fuse_iget(sb, 1, 0, &attr, 0, 0); + inode = fuse_iget(sb, 1, 0, &attr, 0, 0); + if (!inode) + return NULL; + +#ifdef CONFIG_FUSE_BPF + get_fuse_inode(inode)->bpf = root_bpf; + if (root_bpf) + bpf_prog_inc(root_bpf); + + if (backing_fd) { + get_fuse_inode(inode)->backing_inode = backing_fd->f_inode; + ihold(backing_fd->f_inode); + } +#endif + + return inode; } struct fuse_inode_handle { @@ -774,11 +913,14 @@ static struct dentry *fuse_get_dentry(struct super_block *sb, struct inode *inode; struct dentry *entry; int err = -ESTALE; + struct fuse_inode_identifier fii = { + .nodeid = handle->nodeid, + }; if (handle->nodeid == 0) goto out_err; - inode = ilookup5(sb, handle->nodeid, fuse_inode_eq, &handle->nodeid); + inode = ilookup5(sb, handle->nodeid, fuse_inode_eq, &fii); if (!inode) { struct fuse_entry_out outarg; const struct qstr name = QSTR_INIT(".", 1); @@ -787,7 +929,7 @@ static struct dentry *fuse_get_dentry(struct super_block *sb, goto out_err; err = fuse_lookup_name(sb, handle->nodeid, &name, &outarg, - &inode); + NULL, &inode); if (err && err != -ENOENT) goto out_err; if (err || !inode) { @@ -888,7 +1030,7 @@ static struct dentry *fuse_get_parent(struct dentry *child) return ERR_PTR(-ESTALE); err = fuse_lookup_name(child_inode->i_sb, get_node_id(child_inode), - &name, &outarg, &inode); + &name, &outarg, NULL, &inode); if (err) { if (err == -ENOENT) return ERR_PTR(-ESTALE); @@ -1129,7 +1271,7 @@ void fuse_send_init(struct fuse_mount *fm) ia->args.nocreds = true; ia->args.end = process_init_reply; - if (fuse_simple_background(fm, &ia->args, GFP_KERNEL) != 0) + if (unlikely(fm->fc->no_daemon) || fuse_simple_background(fm, &ia->args, GFP_KERNEL) != 0) process_init_reply(fm, &ia->args, -ENOTCONN); } EXPORT_SYMBOL_GPL(fuse_send_init); @@ -1253,28 +1395,6 @@ void fuse_dev_free(struct fuse_dev *fud) } EXPORT_SYMBOL_GPL(fuse_dev_free); -static void fuse_fill_attr_from_inode(struct fuse_attr *attr, - const struct fuse_inode *fi) -{ - *attr = (struct fuse_attr){ - .ino = fi->inode.i_ino, - .size = fi->inode.i_size, - .blocks = fi->inode.i_blocks, - .atime = fi->inode.i_atime.tv_sec, - .mtime = fi->inode.i_mtime.tv_sec, - .ctime = fi->inode.i_ctime.tv_sec, - .atimensec = fi->inode.i_atime.tv_nsec, - .mtimensec = fi->inode.i_mtime.tv_nsec, - .ctimensec = fi->inode.i_ctime.tv_nsec, - .mode = fi->inode.i_mode, - .nlink = fi->inode.i_nlink, - .uid = fi->inode.i_uid.val, - .gid = fi->inode.i_gid.val, - .rdev = fi->inode.i_rdev, - .blksize = 1u << fi->inode.i_blkbits, - }; -} - static void fuse_sb_defaults(struct super_block *sb) { sb->s_magic = FUSE_SUPER_MAGIC; @@ -1318,7 +1438,7 @@ int fuse_fill_super_submount(struct super_block *sb, if (parent_sb->s_subtype && !sb->s_subtype) return -ENOMEM; - fuse_fill_attr_from_inode(&root_attr, parent_fi); + fuse_fill_attr_from_inode(&root_attr, &parent_fi->inode); root = fuse_iget(sb, parent_fi->nodeid, 0, &root_attr, 0, 0); /* * This inode is just a duplicate, so it is not looked up and @@ -1395,13 +1515,16 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx) fc->destroy = ctx->destroy; fc->no_control = ctx->no_control; fc->no_force_umount = ctx->no_force_umount; + fc->no_daemon = ctx->no_daemon; err = -ENOMEM; - root = fuse_get_root_inode(sb, ctx->rootmode); + root = fuse_get_root_inode(sb, ctx->rootmode, ctx->root_bpf, + ctx->root_dir); sb->s_d_op = &fuse_root_dentry_operations; root_dentry = d_make_root(root); if (!root_dentry) goto err_dev_free; + fuse_init_dentry_root(root_dentry, ctx->root_dir); /* Root dentry doesn't have .d_revalidate */ sb->s_d_op = &fuse_dentry_operations; @@ -1444,18 +1567,20 @@ static int fuse_fill_super(struct super_block *sb, struct fs_context *fsc) struct fuse_mount *fm; err = -EINVAL; - file = fget(ctx->fd); - if (!file) - goto err; + if (!ctx->no_daemon) { + file = fget(ctx->fd); + if (!file) + goto err; - /* - * Require mount to happen from the same user namespace which - * opened /dev/fuse to prevent potential attacks. - */ - if ((file->f_op != &fuse_dev_operations) || - (file->f_cred->user_ns != sb->s_user_ns)) - goto err_fput; - ctx->fudptr = &file->private_data; + /* + * Require mount to happen from the same user namespace which + * opened /dev/fuse to prevent potential attacks. + */ + if ((file->f_op != &fuse_dev_operations) || + (file->f_cred->user_ns != sb->s_user_ns)) + goto err_fput; + ctx->fudptr = &file->private_data; + } fc = kmalloc(sizeof(*fc), GFP_KERNEL); err = -ENOMEM; @@ -1481,7 +1606,8 @@ static int fuse_fill_super(struct super_block *sb, struct fs_context *fsc) * memory barrier for file->private_data to be visible on all * CPUs after this */ - fput(file); + if (!ctx->no_daemon) + fput(file); fuse_send_init(get_fuse_mount_super(sb)); return 0; @@ -1489,7 +1615,8 @@ static int fuse_fill_super(struct super_block *sb, struct fs_context *fsc) fuse_mount_put(fm); sb->s_fs_info = NULL; err_fput: - fput(file); + if (!ctx->no_daemon) + fput(file); err: return err; } @@ -1697,6 +1824,26 @@ static void fuse_fs_cleanup(void) static struct kobject *fuse_kobj; +/* TODO Remove this once BPF_PROG_TYPE_FUSE is upstreamed */ +static ssize_t bpf_prog_type_fuse_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buff) +{ + return sysfs_emit(buff, "%d\n", BPF_PROG_TYPE_FUSE); +} + +static struct kobj_attribute bpf_prog_type_fuse_attr = + __ATTR_RO(bpf_prog_type_fuse); + +static struct attribute *bpf_attributes[] = { + &bpf_prog_type_fuse_attr.attr, + NULL, +}; + +static const struct attribute_group bpf_attr_group = { + .attrs = bpf_attributes, +}; +/* TODO remove to here */ + static int fuse_sysfs_init(void) { int err; @@ -1711,8 +1858,15 @@ static int fuse_sysfs_init(void) if (err) goto out_fuse_unregister; + /* TODO Remove when BPF_PROG_TYPE_FUSE is upstreamed */ + err = sysfs_create_group(fuse_kobj, &bpf_attr_group); + if (err) + goto out_fuse_remove_mount_point; + return 0; + out_fuse_remove_mount_point: + sysfs_remove_mount_point(fuse_kobj, "connections"); out_fuse_unregister: kobject_put(fuse_kobj); out_err: @@ -1749,11 +1903,21 @@ static int __init fuse_init(void) if (res) goto err_sysfs_cleanup; +#ifdef CONFIG_FUSE_BPF + res = fuse_bpf_init(); + if (res) + goto err_ctl_cleanup; +#endif + sanitize_global_limit(&max_user_bgreq); sanitize_global_limit(&max_user_congthresh); return 0; +#ifdef CONFIG_FUSE_BPF + err_ctl_cleanup: + fuse_ctl_cleanup(); +#endif err_sysfs_cleanup: fuse_sysfs_cleanup(); err_dev_cleanup: @@ -1771,6 +1935,9 @@ static void __exit fuse_exit(void) fuse_ctl_cleanup(); fuse_sysfs_cleanup(); fuse_fs_cleanup(); +#ifdef CONFIG_FUSE_BPF + fuse_bpf_cleanup(); +#endif fuse_dev_cleanup(); } diff --git a/fs/fuse/passthrough.c b/fs/fuse/passthrough.c index 83125de0bfdc633ad99a24c3bd924b429e935325..6031e72f3b5aff30ec32f6b2784c7eed252dece6 100644 --- a/fs/fuse/passthrough.c +++ b/fs/fuse/passthrough.c @@ -34,7 +34,7 @@ static void fuse_file_accessed(struct file *dst_file, struct file *src_file) touch_atime(&dst_file->f_path); } -static void fuse_copyattr(struct file *dst_file, struct file *src_file) +void fuse_copyattr(struct file *dst_file, struct file *src_file) { struct inode *dst = file_inode(dst_file); struct inode *src = file_inode(src_file); diff --git a/fs/fuse/readdir.c b/fs/fuse/readdir.c index bc267832310c7c8c26703b3a13ec0c535f1a5d44..e802426d42a9d48c68bb7777a4e3910c4d445e42 100644 --- a/fs/fuse/readdir.c +++ b/fs/fuse/readdir.c @@ -571,6 +571,25 @@ int fuse_readdir(struct file *file, struct dir_context *ctx) struct inode *inode = file_inode(file); int err; +#ifdef CONFIG_FUSE_BPF + struct fuse_err_ret fer; + bool force_again, allow_force; + bool is_continued = false; + +again: + fer = fuse_bpf_backing(inode, struct fuse_read_io, + fuse_readdir_initialize, fuse_readdir_backing, + fuse_readdir_finalize, + file, ctx, &force_again, &allow_force, is_continued); + if (force_again && !IS_ERR(fer.result)) { + is_continued = true; + goto again; + } + + if (fer.ret) + return PTR_ERR(fer.result); +#endif + if (fuse_is_bad(inode)) return -EIO; diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index b9cfb1165ff422c36e22e9c1c2e5e9595c65c02f..90a574ba92cf99aeff71c0e8e791cd805eb2aaba 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -971,7 +971,7 @@ static struct virtio_driver virtio_fs_driver = { #endif }; -static void virtio_fs_wake_forget_and_unlock(struct fuse_iqueue *fiq) +static void virtio_fs_wake_forget_and_unlock(struct fuse_iqueue *fiq, bool sync) __releases(fiq->lock) { struct fuse_forget_link *link; @@ -1006,7 +1006,8 @@ __releases(fiq->lock) kfree(link); } -static void virtio_fs_wake_interrupt_and_unlock(struct fuse_iqueue *fiq) +static void virtio_fs_wake_interrupt_and_unlock(struct fuse_iqueue *fiq, + bool sync) __releases(fiq->lock) { /* @@ -1221,7 +1222,8 @@ out: return ret; } -static void virtio_fs_wake_pending_and_unlock(struct fuse_iqueue *fiq) +static void virtio_fs_wake_pending_and_unlock(struct fuse_iqueue *fiq, + bool sync) __releases(fiq->lock) { unsigned int queue_id = VQ_REQUEST; /* TODO multiqueue */ diff --git a/fs/fuse/xattr.c b/fs/fuse/xattr.c index aab4c9cf8483642ead3beefd00ee06007637310f..e9edbbcfcd5b2f13b65a6e819759bff2adfaf63f 100644 --- a/fs/fuse/xattr.c +++ b/fs/fuse/xattr.c @@ -113,6 +113,17 @@ ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size) struct fuse_getxattr_out outarg; ssize_t ret; +#ifdef CONFIG_FUSE_BPF + struct fuse_err_ret fer; + + fer = fuse_bpf_backing(inode, struct fuse_getxattr_io, + fuse_listxattr_initialize, + fuse_listxattr_backing, fuse_listxattr_finalize, + entry, list, size); + if (fer.ret) + return PTR_ERR(fer.result); +#endif + if (fuse_is_bad(inode)) return -EIO; @@ -181,6 +192,17 @@ static int fuse_xattr_get(const struct xattr_handler *handler, struct dentry *dentry, struct inode *inode, const char *name, void *value, size_t size, int flags) { +#ifdef CONFIG_FUSE_BPF + struct fuse_err_ret fer; + + fer = fuse_bpf_backing(inode, struct fuse_getxattr_io, + fuse_getxattr_initialize, fuse_getxattr_backing, + fuse_getxattr_finalize, + dentry, name, value, size); + if (fer.ret) + return PTR_ERR(fer.result); +#endif + if (fuse_is_bad(inode)) return -EIO; @@ -192,6 +214,24 @@ static int fuse_xattr_set(const struct xattr_handler *handler, const char *name, const void *value, size_t size, int flags) { +#ifdef CONFIG_FUSE_BPF + struct fuse_err_ret fer; + + if (value) + fer = fuse_bpf_backing(inode, struct fuse_setxattr_in, + fuse_setxattr_initialize, fuse_setxattr_backing, + fuse_setxattr_finalize, dentry, name, value, + size, flags); + else + fer = fuse_bpf_backing(inode, struct fuse_dummy_io, + fuse_removexattr_initialize, + fuse_removexattr_backing, + fuse_removexattr_finalize, + dentry, name); + if (fer.ret) + return PTR_ERR(fer.result); +#endif + if (fuse_is_bad(inode)) return -EIO; diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index a1f9dde33058f763c5b512ed0b2b25c222008ce6..b34c02985d9d2ea671f31afb6ebda29196f9fb0e 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -940,7 +940,7 @@ do_alloc: else if (height == ip->i_height) ret = gfs2_hole_size(inode, lblock, len, mp, iomap); else - iomap->length = size - pos; + iomap->length = size - iomap->offset; } else if (flags & IOMAP_WRITE) { u64 alloc_size; diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 03c3407c8e26fb08ccb522cfbb0a659fe4194971..dd052101e2266dccac176ede0d91ee3d4a44e786 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -1885,10 +1885,10 @@ static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp) do { rhashtable_walk_start(&iter); - while ((gl = rhashtable_walk_next(&iter)) && !IS_ERR(gl)) - if (gl->gl_name.ln_sbd == sdp && - lockref_get_not_dead(&gl->gl_lockref)) + while ((gl = rhashtable_walk_next(&iter)) && !IS_ERR(gl)) { + if (gl->gl_name.ln_sbd == sdp) examiner(gl); + } rhashtable_walk_stop(&iter); } while (cond_resched(), gl == ERR_PTR(-EAGAIN)); @@ -1911,7 +1911,7 @@ bool gfs2_queue_delete_work(struct gfs2_glock *gl, unsigned long delay) void gfs2_cancel_delete_work(struct gfs2_glock *gl) { - if (cancel_delayed_work_sync(&gl->gl_delete)) { + if (cancel_delayed_work(&gl->gl_delete)) { clear_bit(GLF_PENDING_DELETE, &gl->gl_flags); gfs2_glock_put(gl); } @@ -1930,7 +1930,6 @@ static void flush_delete_work(struct gfs2_glock *gl) &gl->gl_delete, 0); } } - gfs2_glock_queue_work(gl, 0); } void gfs2_flush_delete_work(struct gfs2_sbd *sdp) @@ -1947,10 +1946,10 @@ void gfs2_flush_delete_work(struct gfs2_sbd *sdp) static void thaw_glock(struct gfs2_glock *gl) { - if (!test_and_clear_bit(GLF_FROZEN, &gl->gl_flags)) { - gfs2_glock_put(gl); + if (!test_and_clear_bit(GLF_FROZEN, &gl->gl_flags)) + return; + if (!lockref_get_not_dead(&gl->gl_lockref)) return; - } set_bit(GLF_REPLY_PENDING, &gl->gl_flags); gfs2_glock_queue_work(gl, 0); } @@ -1966,9 +1965,12 @@ static void clear_glock(struct gfs2_glock *gl) gfs2_glock_remove_from_lru(gl); spin_lock(&gl->gl_lockref.lock); - if (gl->gl_state != LM_ST_UNLOCKED) - handle_callback(gl, LM_ST_UNLOCKED, 0, false); - __gfs2_glock_queue_work(gl, 0); + if (!__lockref_is_dead(&gl->gl_lockref)) { + gl->gl_lockref.count++; + if (gl->gl_state != LM_ST_UNLOCKED) + handle_callback(gl, LM_ST_UNLOCKED, 0, false); + __gfs2_glock_queue_work(gl, 0); + } spin_unlock(&gl->gl_lockref.lock); } diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index c7393ee9cf68369f588a52f5bea3e62290b6921c..81925fd2985fd5e24bfdeef163f3f9ab04ee067c 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c @@ -260,6 +260,7 @@ static void __exit exit_gfs2_fs(void) MODULE_DESCRIPTION("Global File System"); MODULE_AUTHOR("Red Hat, Inc."); MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(ANDROID_GKI_VFS_EXPORT_ONLY); module_init(init_gfs2_fs); module_exit(exit_gfs2_fs); diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 6a355e1347d7f9a7ed51675c79ab5051f21f86ad..d2b7ecbd1b1503a89b0116e66cd08986e0da258d 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1438,13 +1438,6 @@ out: gfs2_ordered_del_inode(ip); clear_inode(inode); gfs2_dir_hash_inval(ip); - if (ip->i_gl) { - glock_clear_object(ip->i_gl, ip); - wait_on_bit_io(&ip->i_flags, GIF_GLOP_PENDING, TASK_UNINTERRUPTIBLE); - gfs2_glock_add_to_lru(ip->i_gl); - gfs2_glock_put_eventually(ip->i_gl); - ip->i_gl = NULL; - } if (gfs2_holder_initialized(&ip->i_iopen_gh)) { struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl; @@ -1457,6 +1450,13 @@ out: gfs2_holder_uninit(&ip->i_iopen_gh); gfs2_glock_put_eventually(gl); } + if (ip->i_gl) { + glock_clear_object(ip->i_gl, ip); + wait_on_bit_io(&ip->i_flags, GIF_GLOP_PENDING, TASK_UNINTERRUPTIBLE); + gfs2_glock_add_to_lru(ip->i_gl); + gfs2_glock_put_eventually(ip->i_gl); + ip->i_gl = NULL; + } } static struct inode *gfs2_alloc_inode(struct super_block *sb) diff --git a/fs/hfs/super.c b/fs/hfs/super.c index 12d9bae393631c5f41938c1a8b0cf1653fe92376..6432d65a08727a5214b0207d6579c9ef92d50c81 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -29,6 +29,7 @@ static struct kmem_cache *hfs_inode_cachep; MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(ANDROID_GKI_VFS_EXPORT_ONLY); static int hfs_sync_fs(struct super_block *sb, int wait) { diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 807119ae5adf7370622c033dcd714608eea318f1..2b0031c6daeab0e305d46784ea92b8f295fe47a0 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -617,6 +617,7 @@ out: MODULE_AUTHOR("Brad Boyer"); MODULE_DESCRIPTION("Extended Macintosh Filesystem"); MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(ANDROID_GKI_VFS_EXPORT_ONLY); static struct kmem_cache *hfsplus_inode_cachep; diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index a7dbfc89202270e205c5fa1ee13a83c4d0e6824e..d7598d70f329c708ab269616b59d61d7fdf83328 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -791,3 +791,4 @@ static void __exit exit_hpfs_fs(void) module_init(init_hpfs_fs) module_exit(exit_hpfs_fs) MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(ANDROID_GKI_VFS_EXPORT_ONLY); diff --git a/fs/incfs/data_mgmt.h b/fs/incfs/data_mgmt.h index 84db0382765db96afb6a8560ba0c03421d1909ea..2227913674a4ff0547243b3add36f51cc9f4c983 100644 --- a/fs/incfs/data_mgmt.h +++ b/fs/incfs/data_mgmt.h @@ -131,8 +131,14 @@ struct mount_info { struct path mi_backing_dir_path; struct dentry *mi_index_dir; + /* For stacking mounts, if true, this indicates if the index dir needs + * to be freed for this SB otherwise it was created by lower level SB */ + bool mi_index_free; struct dentry *mi_incomplete_dir; + /* For stacking mounts, if true, this indicates if the incomplete dir + * needs to be freed for this SB. Similar to mi_index_free */ + bool mi_incomplete_free; const struct cred *mi_owner; diff --git a/fs/incfs/main.c b/fs/incfs/main.c index 23347acac8bf7da179bfd0735b2c6d6fd640b4ac..213faa5e911761d7d917458b0906f75692e856ed 100644 --- a/fs/incfs/main.c +++ b/fs/incfs/main.c @@ -44,5 +44,6 @@ module_init(init_incfs_module); module_exit(cleanup_incfs_module); MODULE_LICENSE("GPL v2"); +MODULE_IMPORT_NS(ANDROID_GKI_VFS_EXPORT_ONLY); MODULE_AUTHOR("Eugene Zemtsov "); MODULE_DESCRIPTION("Incremental File System"); diff --git a/fs/incfs/pseudo_files.c b/fs/incfs/pseudo_files.c index 1a664f1478d5a08920567348408ff035da651c20..1b9bf000f5986f0106a4e3bb352d3115ca694378 100644 --- a/fs/incfs/pseudo_files.c +++ b/fs/incfs/pseudo_files.c @@ -147,8 +147,12 @@ static long ioctl_permit_fill(struct file *f, void __user *arg) return -EFAULT; file = fget(permit_fill.file_descriptor); - if (IS_ERR(file)) + if (IS_ERR_OR_NULL(file)) { + if (!file) + return -ENOENT; + return PTR_ERR(file); + } if (file->f_op != &incfs_file_ops) { error = -EPERM; diff --git a/fs/incfs/vfs.c b/fs/incfs/vfs.c index 0650ee90f8b1e49e948ef29c79855d2ce253492c..e97b843a4da53c13bc9bb3f2acc243fb7c34d680 100644 --- a/fs/incfs/vfs.c +++ b/fs/incfs/vfs.c @@ -437,7 +437,8 @@ static int incfs_init_dentry(struct dentry *dentry, struct path *path) } static struct dentry *open_or_create_special_dir(struct dentry *backing_dir, - const char *name) + const char *name, + bool *created) { struct dentry *index_dentry; struct inode *backing_inode = d_inode(backing_dir); @@ -450,6 +451,7 @@ static struct dentry *open_or_create_special_dir(struct dentry *backing_dir, return index_dentry; } else if (d_really_is_positive(index_dentry)) { /* Index already exists. */ + *created = false; return index_dentry; } @@ -458,8 +460,10 @@ static struct dentry *open_or_create_special_dir(struct dentry *backing_dir, err = vfs_mkdir(backing_inode, index_dentry, 0777); inode_unlock(backing_inode); - if (err) + if (err) { + dput(index_dentry); return ERR_PTR(err); + } if (!d_really_is_positive(index_dentry) || unlikely(d_unhashed(index_dentry))) { @@ -467,6 +471,7 @@ static struct dentry *open_or_create_special_dir(struct dentry *backing_dir, return ERR_PTR(-EINVAL); } + *created = true; return index_dentry; } @@ -1745,6 +1750,7 @@ struct dentry *incfs_mount_fs(struct file_system_type *type, int flags, struct super_block *src_fs_sb = NULL; struct inode *root_inode = NULL; struct super_block *sb = sget(type, NULL, set_anon_super, flags, NULL); + bool dir_created = false; int error = 0; if (IS_ERR(sb)) @@ -1761,17 +1767,23 @@ struct dentry *incfs_mount_fs(struct file_system_type *type, int flags, BUILD_BUG_ON(PAGE_SIZE != INCFS_DATA_FILE_BLOCK_SIZE); + if (!dev_name) { + pr_err("incfs: Backing dir is not set, filesystem can't be mounted.\n"); + error = -ENOENT; + goto err_deactivate; + } + error = parse_options(&options, (char *)data); if (error != 0) { pr_err("incfs: Options parsing error. %d\n", error); - goto err; + goto err_deactivate; } sb->s_bdi->ra_pages = options.readahead_pages; if (!dev_name) { pr_err("incfs: Backing dir is not set, filesystem can't be mounted.\n"); error = -ENOENT; - goto err; + goto err_free_opts; } error = kern_path(dev_name, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, @@ -1780,69 +1792,81 @@ struct dentry *incfs_mount_fs(struct file_system_type *type, int flags, !d_really_is_positive(backing_dir_path.dentry)) { pr_err("incfs: Error accessing: %s.\n", dev_name); - goto err; + goto err_free_opts; } src_fs_sb = backing_dir_path.dentry->d_sb; sb->s_maxbytes = src_fs_sb->s_maxbytes; + sb->s_stack_depth = src_fs_sb->s_stack_depth + 1; - mi = incfs_alloc_mount_info(sb, &options, &backing_dir_path); + if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) { + error = -EINVAL; + goto err_put_path; + } + mi = incfs_alloc_mount_info(sb, &options, &backing_dir_path); if (IS_ERR_OR_NULL(mi)) { error = PTR_ERR(mi); pr_err("incfs: Error allocating mount info. %d\n", error); - mi = NULL; - goto err; + goto err_put_path; } + sb->s_fs_info = mi; + mi->mi_backing_dir_path = backing_dir_path; index_dir = open_or_create_special_dir(backing_dir_path.dentry, - INCFS_INDEX_NAME); + INCFS_INDEX_NAME, &dir_created); if (IS_ERR_OR_NULL(index_dir)) { error = PTR_ERR(index_dir); pr_err("incfs: Can't find or create .index dir in %s\n", dev_name); /* No need to null index_dir since we don't put it */ - goto err; + goto err_put_path; } + mi->mi_index_dir = index_dir; + mi->mi_index_free = dir_created; incomplete_dir = open_or_create_special_dir(backing_dir_path.dentry, - INCFS_INCOMPLETE_NAME); + INCFS_INCOMPLETE_NAME, + &dir_created); if (IS_ERR_OR_NULL(incomplete_dir)) { error = PTR_ERR(incomplete_dir); pr_err("incfs: Can't find or create .incomplete dir in %s\n", dev_name); /* No need to null incomplete_dir since we don't put it */ - goto err; + goto err_put_path; } mi->mi_incomplete_dir = incomplete_dir; + mi->mi_incomplete_free = dir_created; - sb->s_fs_info = mi; root_inode = fetch_regular_inode(sb, backing_dir_path.dentry); if (IS_ERR(root_inode)) { error = PTR_ERR(root_inode); - goto err; + goto err_put_path; } sb->s_root = d_make_root(root_inode); if (!sb->s_root) { error = -ENOMEM; - goto err; + goto err_put_path; } error = incfs_init_dentry(sb->s_root, &backing_dir_path); if (error) - goto err; + goto err_put_path; - path_put(&backing_dir_path); + mi->mi_backing_dir_path = backing_dir_path; sb->s_flags |= SB_ACTIVE; pr_debug("incfs: mount\n"); + free_options(&options); return dget(sb->s_root); -err: - sb->s_fs_info = NULL; + +err_put_path: path_put(&backing_dir_path); - incfs_free_mount_info(mi); - deactivate_locked_super(sb); +err_free_opts: free_options(&options); +err_deactivate: + deactivate_locked_super(sb); + pr_err("incfs: mount failed %d\n", error); return ERR_PTR(error); } @@ -1877,10 +1901,26 @@ out: void incfs_kill_sb(struct super_block *sb) { struct mount_info *mi = sb->s_fs_info; + struct inode *dinode = NULL; pr_debug("incfs: unmount\n"); - generic_shutdown_super(sb); - incfs_free_mount_info(mi); + + if (mi) { + if (mi->mi_backing_dir_path.dentry) + dinode = d_inode(mi->mi_backing_dir_path.dentry); + + if (dinode) { + if (mi->mi_index_dir && mi->mi_index_free) + vfs_rmdir(dinode, mi->mi_index_dir); + + if (mi->mi_incomplete_dir && mi->mi_incomplete_free) + vfs_rmdir(dinode, mi->mi_incomplete_dir); + } + + incfs_free_mount_info(mi); + sb->s_fs_info = NULL; + } + kill_anon_super(sb); } static int show_options(struct seq_file *m, struct dentry *root) diff --git a/fs/inode.c b/fs/inode.c index 5eea9912a0b9d89362d0f662898d1c0e8b7aa263..9246236bcdcafba83c4413d81cbcafca0c39dd57 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -307,7 +307,7 @@ void drop_nlink(struct inode *inode) if (!inode->i_nlink) atomic_long_inc(&inode->i_sb->s_remove_count); } -EXPORT_SYMBOL(drop_nlink); +EXPORT_SYMBOL_NS(drop_nlink, ANDROID_GKI_VFS_EXPORT_ONLY); /** * clear_nlink - directly zero an inode's link count @@ -346,7 +346,7 @@ void set_nlink(struct inode *inode, unsigned int nlink) inode->__i_nlink = nlink; } } -EXPORT_SYMBOL(set_nlink); +EXPORT_SYMBOL_NS(set_nlink, ANDROID_GKI_VFS_EXPORT_ONLY); /** * inc_nlink - directly increment an inode's link count @@ -399,7 +399,7 @@ void inode_init_once(struct inode *inode) __address_space_init_once(&inode->i_data); i_size_ordered_init(inode); } -EXPORT_SYMBOL(inode_init_once); +EXPORT_SYMBOL_NS(inode_init_once, ANDROID_GKI_VFS_EXPORT_ONLY); static void init_once(void *foo) { @@ -423,7 +423,7 @@ void ihold(struct inode *inode) { WARN_ON(atomic_inc_return(&inode->i_count) < 2); } -EXPORT_SYMBOL(ihold); +EXPORT_SYMBOL_NS(ihold, ANDROID_GKI_VFS_EXPORT_ONLY); static void inode_lru_list_add(struct inode *inode) { @@ -503,7 +503,7 @@ void __insert_inode_hash(struct inode *inode, unsigned long hashval) spin_unlock(&inode->i_lock); spin_unlock(&inode_hash_lock); } -EXPORT_SYMBOL(__insert_inode_hash); +EXPORT_SYMBOL_NS(__insert_inode_hash, ANDROID_GKI_VFS_EXPORT_ONLY); /** * __remove_inode_hash - remove an inode from the hash @@ -519,7 +519,7 @@ void __remove_inode_hash(struct inode *inode) spin_unlock(&inode->i_lock); spin_unlock(&inode_hash_lock); } -EXPORT_SYMBOL(__remove_inode_hash); +EXPORT_SYMBOL_NS(__remove_inode_hash, ANDROID_GKI_VFS_EXPORT_ONLY); void clear_inode(struct inode *inode) { @@ -539,7 +539,7 @@ void clear_inode(struct inode *inode) /* don't need i_lock here, no concurrent mods to i_state */ inode->i_state = I_FREEING | I_CLEAR; } -EXPORT_SYMBOL(clear_inode); +EXPORT_SYMBOL_NS(clear_inode, ANDROID_GKI_VFS_EXPORT_ONLY); /* * Free the inode passed in, removing it from the lists it is still connected @@ -1001,7 +1001,7 @@ void unlock_new_inode(struct inode *inode) wake_up_bit(&inode->i_state, __I_NEW); spin_unlock(&inode->i_lock); } -EXPORT_SYMBOL(unlock_new_inode); +EXPORT_SYMBOL_NS(unlock_new_inode, ANDROID_GKI_VFS_EXPORT_ONLY); void discard_new_inode(struct inode *inode) { @@ -1158,7 +1158,7 @@ struct inode *iget5_locked(struct super_block *sb, unsigned long hashval, } return inode; } -EXPORT_SYMBOL(iget5_locked); +EXPORT_SYMBOL_NS(iget5_locked, ANDROID_GKI_VFS_EXPORT_ONLY); /** * iget_locked - obtain an inode from a mounted file system @@ -1290,7 +1290,7 @@ ino_t iunique(struct super_block *sb, ino_t max_reserved) return res; } -EXPORT_SYMBOL(iunique); +EXPORT_SYMBOL_NS(iunique, ANDROID_GKI_VFS_EXPORT_ONLY); struct inode *igrab(struct inode *inode) { @@ -1373,7 +1373,7 @@ again: } return inode; } -EXPORT_SYMBOL(ilookup5); +EXPORT_SYMBOL_NS(ilookup5, ANDROID_GKI_VFS_EXPORT_ONLY); /** * ilookup - search for an inode in the inode cache @@ -1772,12 +1772,13 @@ EXPORT_SYMBOL(generic_update_time); * This does the actual work of updating an inodes time or version. Must have * had called mnt_want_write() before calling this. */ -static int update_time(struct inode *inode, struct timespec64 *time, int flags) +int inode_update_time(struct inode *inode, struct timespec64 *time, int flags) { if (inode->i_op->update_time) return inode->i_op->update_time(inode, time, flags); return generic_update_time(inode, time, flags); } +EXPORT_SYMBOL(inode_update_time); /** * touch_atime - update the access time @@ -1847,12 +1848,12 @@ void touch_atime(const struct path *path) * of the fs read only, e.g. subvolumes in Btrfs. */ now = current_time(inode); - update_time(inode, &now, S_ATIME); + inode_update_time(inode, &now, S_ATIME); __mnt_drop_write(mnt); skip_update: sb_end_write(inode->i_sb); } -EXPORT_SYMBOL(touch_atime); +EXPORT_SYMBOL_NS(touch_atime, ANDROID_GKI_VFS_EXPORT_ONLY); /* * The logic we want is @@ -1948,7 +1949,7 @@ int file_remove_privs(struct file *file) return error; } -EXPORT_SYMBOL(file_remove_privs); +EXPORT_SYMBOL_NS(file_remove_privs, ANDROID_GKI_VFS_EXPORT_ONLY); /** * file_update_time - update mtime and ctime time @@ -1991,7 +1992,7 @@ int file_update_time(struct file *file) if (__mnt_want_write_file(file)) return 0; - ret = update_time(inode, &now, sync_it); + ret = inode_update_time(inode, &now, sync_it); __mnt_drop_write_file(file); return ret; @@ -2129,7 +2130,7 @@ void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev) " inode %s:%lu\n", mode, inode->i_sb->s_id, inode->i_ino); } -EXPORT_SYMBOL(init_special_inode); +EXPORT_SYMBOL_NS(init_special_inode, ANDROID_GKI_VFS_EXPORT_ONLY); /** * inode_init_owner - Init uid,gid,mode for new inode according to posix standards @@ -2155,7 +2156,7 @@ void inode_init_owner(struct inode *inode, const struct inode *dir, inode->i_gid = current_fsgid(); inode->i_mode = mode; } -EXPORT_SYMBOL(inode_init_owner); +EXPORT_SYMBOL_NS(inode_init_owner, ANDROID_GKI_VFS_EXPORT_ONLY); /** * inode_owner_or_capable - check current task permissions to inode @@ -2209,7 +2210,7 @@ void inode_dio_wait(struct inode *inode) if (atomic_read(&inode->i_dio_count)) __inode_dio_wait(inode); } -EXPORT_SYMBOL(inode_dio_wait); +EXPORT_SYMBOL_NS(inode_dio_wait, ANDROID_GKI_VFS_EXPORT_ONLY); /* * inode_set_flags - atomically set some inode flags @@ -2233,7 +2234,7 @@ void inode_set_flags(struct inode *inode, unsigned int flags, WARN_ON_ONCE(flags & ~mask); set_mask_bits(&inode->i_flags, mask, flags); } -EXPORT_SYMBOL(inode_set_flags); +EXPORT_SYMBOL_NS(inode_set_flags, ANDROID_GKI_VFS_EXPORT_ONLY); void inode_nohighmem(struct inode *inode) { @@ -2270,7 +2271,7 @@ struct timespec64 timestamp_truncate(struct timespec64 t, struct inode *inode) WARN(1, "invalid file time granularity: %u", gran); return t; } -EXPORT_SYMBOL(timestamp_truncate); +EXPORT_SYMBOL_NS(timestamp_truncate, ANDROID_GKI_VFS_EXPORT_ONLY); /** * current_time - Return FS time diff --git a/fs/io_uring.c b/fs/io_uring.c index 26753d0cb4312092a6ca6e1182453fc89c7bbd02..fd188b97215119183b7fac8268f0f644b3cf5095 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1009,6 +1009,18 @@ static inline bool __io_match_files(struct io_kiocb *req, req->work.identity->files == files; } +static void io_refs_resurrect(struct percpu_ref *ref, struct completion *compl) +{ + bool got = percpu_ref_tryget(ref); + + /* already at zero, wait for ->release() */ + if (!got) + wait_for_completion(compl); + percpu_ref_resurrect(ref); + if (got) + percpu_ref_put(ref); +} + static bool io_match_task(struct io_kiocb *head, struct task_struct *task, struct files_struct *files) @@ -2075,7 +2087,9 @@ static void io_req_task_cancel(struct callback_head *cb) struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work); struct io_ring_ctx *ctx = req->ctx; + mutex_lock(&ctx->uring_lock); __io_req_task_cancel(req, -ECANCELED); + mutex_unlock(&ctx->uring_lock); percpu_ref_put(&ctx->refs); } @@ -4056,6 +4070,7 @@ static int io_add_buffers(struct io_provide_buf *pbuf, struct io_buffer **head) } else { list_add_tail(&buf->list, &(*head)->list); } + cond_resched(); } return i ? i : -ENOMEM; @@ -5559,7 +5574,7 @@ static int io_timeout_remove_prep(struct io_kiocb *req, return -EINVAL; if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT))) return -EINVAL; - if (sqe->ioprio || sqe->buf_index || sqe->len || sqe->timeout_flags | + if (sqe->ioprio || sqe->buf_index || sqe->len || sqe->timeout_flags || sqe->splice_fd_in) return -EINVAL; @@ -8757,9 +8772,10 @@ static void io_uring_cancel_task_requests(struct io_ring_ctx *ctx, io_cancel_defer_files(ctx, task, files); io_cqring_overflow_flush(ctx, true, task, files); - io_uring_cancel_files(ctx, task, files); if (!files) __io_uring_cancel_task_requests(ctx, task); + else + io_uring_cancel_files(ctx, task, files); if ((ctx->flags & IORING_SETUP_SQPOLL) && ctx->sq_data) { atomic_dec(&task->io_uring->in_idle); @@ -9753,12 +9769,11 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode, if (ret < 0) break; } while (1); - mutex_lock(&ctx->uring_lock); if (ret) { - percpu_ref_resurrect(&ctx->refs); - goto out_quiesce; + io_refs_resurrect(&ctx->refs, &ctx->ref_comp); + return ret; } } @@ -9851,7 +9866,6 @@ out: if (io_register_op_must_quiesce(opcode)) { /* bring the ctx back to life */ percpu_ref_reinit(&ctx->refs); -out_quiesce: reinit_completion(&ctx->ref_comp); } return ret; diff --git a/fs/ioctl.c b/fs/ioctl.c index 4e6cc0a7d69c9f27e3b900b2dad72f7904cc90c4..32d8bd3f958b6cd50988f17ce602b064a00488c3 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -146,7 +146,7 @@ int fiemap_fill_next_extent(struct fiemap_extent_info *fieinfo, u64 logical, return 1; return (flags & FIEMAP_EXTENT_LAST) ? 1 : 0; } -EXPORT_SYMBOL(fiemap_fill_next_extent); +EXPORT_SYMBOL_NS(fiemap_fill_next_extent, ANDROID_GKI_VFS_EXPORT_ONLY); /** * fiemap_prep - check validity of requested flags for fiemap @@ -191,7 +191,7 @@ int fiemap_prep(struct inode *inode, struct fiemap_extent_info *fieinfo, ret = filemap_write_and_wait(inode->i_mapping); return ret; } -EXPORT_SYMBOL(fiemap_prep); +EXPORT_SYMBOL_NS(fiemap_prep, ANDROID_GKI_VFS_EXPORT_ONLY); static int ioctl_fiemap(struct file *filp, struct fiemap __user *ufiemap) { diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index cd9f7baa5bb7b872c045816de96db4135c4f9c88..a2bd8fe45f4b6cbeb17ba5a0b2077dd43ac02347 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -205,24 +205,32 @@ struct iomap_readpage_ctx { struct readahead_control *rac; }; -static void -iomap_read_inline_data(struct inode *inode, struct page *page, +static int iomap_read_inline_data(struct inode *inode, struct page *page, struct iomap *iomap) { - size_t size = i_size_read(inode); + size_t size = i_size_read(inode) - iomap->offset; void *addr; if (PageUptodate(page)) - return; + return 0; - BUG_ON(page->index); - BUG_ON(size > PAGE_SIZE - offset_in_page(iomap->inline_data)); + /* inline data must start page aligned in the file */ + if (WARN_ON_ONCE(offset_in_page(iomap->offset))) + return -EIO; + if (WARN_ON_ONCE(size > PAGE_SIZE - + offset_in_page(iomap->inline_data))) + return -EIO; + if (WARN_ON_ONCE(size > iomap->length)) + return -EIO; + if (WARN_ON_ONCE(page_has_private(page))) + return -EIO; addr = kmap_atomic(page); memcpy(addr, iomap->inline_data, size); memset(addr + size, 0, PAGE_SIZE - size); kunmap_atomic(addr); SetPageUptodate(page); + return 0; } static inline bool iomap_block_needs_zeroing(struct inode *inode, @@ -239,19 +247,22 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data, { struct iomap_readpage_ctx *ctx = data; struct page *page = ctx->cur_page; - struct iomap_page *iop = iomap_page_create(inode, page); + struct iomap_page *iop; bool same_page = false, is_contig = false; loff_t orig_pos = pos; unsigned poff, plen; sector_t sector; if (iomap->type == IOMAP_INLINE) { - WARN_ON_ONCE(pos); - iomap_read_inline_data(inode, page, iomap); + int ret = iomap_read_inline_data(inode, page, iomap); + + if (ret) + return ret; return PAGE_SIZE; } /* zero post-eof blocks as the page may be mapped */ + iop = iomap_page_create(inode, page); iomap_adjust_read_range(inode, iop, &pos, length, &poff, &plen); if (plen == 0) goto done; @@ -278,14 +289,14 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data, if (!is_contig || bio_full(ctx->bio, plen)) { gfp_t gfp = mapping_gfp_constraint(page->mapping, GFP_KERNEL); gfp_t orig_gfp = gfp; - int nr_vecs = (length + PAGE_SIZE - 1) >> PAGE_SHIFT; + unsigned int nr_vecs = DIV_ROUND_UP(length, PAGE_SIZE); if (ctx->bio) submit_bio(ctx->bio); if (ctx->rac) /* same as readahead_gfp_mask */ gfp |= __GFP_NORETRY | __GFP_NOWARN; - ctx->bio = bio_alloc(gfp, min(BIO_MAX_PAGES, nr_vecs)); + ctx->bio = bio_alloc(gfp, bio_max_segs(nr_vecs)); /* * If the bio_alloc fails, try it again for a single page to * avoid having to deal with partial page reads. This emulates @@ -587,6 +598,15 @@ __iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, int flags, return 0; } +static int iomap_write_begin_inline(struct inode *inode, + struct page *page, struct iomap *srcmap) +{ + /* needs more work for the tailpacking case; disable for now */ + if (WARN_ON_ONCE(srcmap->offset != 0)) + return -EIO; + return iomap_read_inline_data(inode, page, srcmap); +} + static int iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags, struct page **pagep, struct iomap *iomap, struct iomap *srcmap) @@ -616,7 +636,7 @@ iomap_write_begin(struct inode *inode, loff_t pos, unsigned len, unsigned flags, } if (srcmap->type == IOMAP_INLINE) - iomap_read_inline_data(inode, page, srcmap); + status = iomap_write_begin_inline(inode, page, srcmap); else if (iomap->flags & IOMAP_F_BUFFER_HEAD) status = __block_write_begin_int(page, pos, len, NULL, srcmap); else @@ -694,11 +714,11 @@ static size_t iomap_write_end_inline(struct inode *inode, struct page *page, void *addr; WARN_ON_ONCE(!PageUptodate(page)); - BUG_ON(pos + copied > PAGE_SIZE - offset_in_page(iomap->inline_data)); + BUG_ON(!iomap_inline_data_valid(iomap)); flush_dcache_page(page); addr = kmap_atomic(page); - memcpy(iomap->inline_data + pos, addr + pos, copied); + memcpy(iomap_inline_data(iomap, pos), addr + pos, copied); kunmap_atomic(addr); mark_inode_dirty(inode); diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c index b4240cc3c9f9a563afaa1a6c5c52a71abb27cf27..fff29a3d34f42fdeb0fb108c36a1a8aaa8ea6b00 100644 --- a/fs/iomap/direct-io.c +++ b/fs/iomap/direct-io.c @@ -354,23 +354,25 @@ iomap_dio_inline_actor(struct inode *inode, loff_t pos, loff_t length, struct iomap_dio *dio, struct iomap *iomap) { struct iov_iter *iter = dio->submit.iter; + void *inline_data = iomap_inline_data(iomap, pos); size_t copied; - BUG_ON(pos + length > PAGE_SIZE - offset_in_page(iomap->inline_data)); + if (WARN_ON_ONCE(!iomap_inline_data_valid(iomap))) + return -EIO; if (dio->flags & IOMAP_DIO_WRITE) { loff_t size = inode->i_size; if (pos > size) - memset(iomap->inline_data + size, 0, pos - size); - copied = copy_from_iter(iomap->inline_data + pos, length, iter); + memset(iomap_inline_data(iomap, size), 0, pos - size); + copied = copy_from_iter(inline_data, length, iter); if (copied) { if (pos + copied > size) i_size_write(inode, pos + copied); mark_inode_dirty(inode); } } else { - copied = copy_to_iter(iomap->inline_data + pos, length, iter); + copied = copy_to_iter(inline_data, length, iter); } dio->size += copied; return copied; diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 35675a1065be841b51868ff32ab94f21953f7f74..ecbc8ef0da176c2f611afae1d73a58b4a7f8e11c 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -1321,6 +1321,8 @@ static int isofs_read_inode(struct inode *inode, int relocated) de = (struct iso_directory_record *) (bh->b_data + offset); de_len = *(unsigned char *) de; + if (de_len < sizeof(struct iso_directory_record)) + goto fail; if (offset + de_len > bufsize) { int frag1 = bufsize - offset; @@ -1610,3 +1612,4 @@ static void __exit exit_iso9660_fs(void) module_init(init_iso9660_fs) module_exit(exit_iso9660_fs) MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(ANDROID_GKI_VFS_EXPORT_ONLY); diff --git a/fs/jbd2/Makefile b/fs/jbd2/Makefile index 126b4da6c7de875be834cc96b63179a26ac0214c..b64f93331643ba4fd25be7e89ccfc71e0c0eacfa 100644 --- a/fs/jbd2/Makefile +++ b/fs/jbd2/Makefile @@ -3,6 +3,8 @@ # Makefile for the linux journaling routines. # +ccflags-y += -DDEFAULT_SYMBOL_NAMESPACE=ANDROID_GKI_VFS_EXPORT_ONLY + obj-$(CONFIG_JBD2) += jbd2.o jbd2-objs := transaction.o commit.o recovery.o checkpoint.o revoke.o journal.o diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 188f79d76988174c764c7dd41a9a163efd4245fb..099e4319851d8067cf27c0de2a8bb40b268fc2b2 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -2795,6 +2795,7 @@ struct journal_head *jbd2_journal_grab_journal_head(struct buffer_head *bh) jbd_unlock_bh_journal_head(bh); return jh; } +EXPORT_SYMBOL(jbd2_journal_grab_journal_head); static void __journal_remove_journal_head(struct buffer_head *bh) { @@ -2847,6 +2848,7 @@ void jbd2_journal_put_journal_head(struct journal_head *jh) jbd_unlock_bh_journal_head(bh); } } +EXPORT_SYMBOL(jbd2_journal_put_journal_head); /* * Initialize jbd inode head @@ -3012,6 +3014,7 @@ static void __exit journal_exit(void) } MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(ANDROID_GKI_VFS_EXPORT_ONLY); module_init(journal_init); module_exit(journal_exit); diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c index 4fc8cd698d1a4ac0b4f13b694a0c63c214c276ce..bd7d58d27bfc63574918095b14f14fcf13cf1b7c 100644 --- a/fs/jffs2/file.c +++ b/fs/jffs2/file.c @@ -136,20 +136,15 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping, struct page *pg; struct inode *inode = mapping->host; struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode); + struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb); pgoff_t index = pos >> PAGE_SHIFT; uint32_t pageofs = index << PAGE_SHIFT; int ret = 0; - pg = grab_cache_page_write_begin(mapping, index, flags); - if (!pg) - return -ENOMEM; - *pagep = pg; - jffs2_dbg(1, "%s()\n", __func__); if (pageofs > inode->i_size) { /* Make new hole frag from old EOF to new page */ - struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb); struct jffs2_raw_inode ri; struct jffs2_full_dnode *fn; uint32_t alloc_len; @@ -160,7 +155,7 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping, ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len, ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE); if (ret) - goto out_page; + goto out_err; mutex_lock(&f->sem); memset(&ri, 0, sizeof(ri)); @@ -190,7 +185,7 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping, ret = PTR_ERR(fn); jffs2_complete_reservation(c); mutex_unlock(&f->sem); - goto out_page; + goto out_err; } ret = jffs2_add_full_dnode_to_inode(c, f, fn); if (f->metadata) { @@ -205,13 +200,26 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping, jffs2_free_full_dnode(fn); jffs2_complete_reservation(c); mutex_unlock(&f->sem); - goto out_page; + goto out_err; } jffs2_complete_reservation(c); inode->i_size = pageofs; mutex_unlock(&f->sem); } + /* + * While getting a page and reading data in, lock c->alloc_sem until + * the page is Uptodate. Otherwise GC task may attempt to read the same + * page in read_cache_page(), which causes a deadlock. + */ + mutex_lock(&c->alloc_sem); + pg = grab_cache_page_write_begin(mapping, index, flags); + if (!pg) { + ret = -ENOMEM; + goto release_sem; + } + *pagep = pg; + /* * Read in the page if it wasn't already present. Cannot optimize away * the whole page write case until jffs2_write_end can handle the @@ -221,15 +229,17 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping, mutex_lock(&f->sem); ret = jffs2_do_readpage_nolock(inode, pg); mutex_unlock(&f->sem); - if (ret) - goto out_page; + if (ret) { + unlock_page(pg); + put_page(pg); + goto release_sem; + } } jffs2_dbg(1, "end write_begin(). pg->flags %lx\n", pg->flags); - return ret; -out_page: - unlock_page(pg); - put_page(pg); +release_sem: + mutex_unlock(&c->alloc_sem); +out_err: return ret; } diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index 81ca58c10b728cc6a7c4e132a80c503c0dde1df3..16dcc359fd35934e7135c0114dcbdfda38ca94bb 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c @@ -439,3 +439,4 @@ MODULE_DESCRIPTION("The Journalling Flash File System, v2"); MODULE_AUTHOR("Red Hat, Inc."); MODULE_LICENSE("GPL"); // Actually dual-licensed, but it doesn't matter for // the sake of this tag. It's Free Software. +MODULE_IMPORT_NS(ANDROID_GKI_VFS_EXPORT_ONLY); diff --git a/fs/jfs/jfs_mount.c b/fs/jfs/jfs_mount.c index 5d7d7170c03c0241b86c240a6f507d2a2b14b1e9..aa4ff7bcaff23dd8931de14fbd629467819772e0 100644 --- a/fs/jfs/jfs_mount.c +++ b/fs/jfs/jfs_mount.c @@ -81,14 +81,14 @@ int jfs_mount(struct super_block *sb) * (initialize mount inode from the superblock) */ if ((rc = chkSuper(sb))) { - goto errout20; + goto out; } ipaimap = diReadSpecial(sb, AGGREGATE_I, 0); if (ipaimap == NULL) { jfs_err("jfs_mount: Failed to read AGGREGATE_I"); rc = -EIO; - goto errout20; + goto out; } sbi->ipaimap = ipaimap; @@ -99,7 +99,7 @@ int jfs_mount(struct super_block *sb) */ if ((rc = diMount(ipaimap))) { jfs_err("jfs_mount: diMount(ipaimap) failed w/rc = %d", rc); - goto errout21; + goto err_ipaimap; } /* @@ -108,7 +108,7 @@ int jfs_mount(struct super_block *sb) ipbmap = diReadSpecial(sb, BMAP_I, 0); if (ipbmap == NULL) { rc = -EIO; - goto errout22; + goto err_umount_ipaimap; } jfs_info("jfs_mount: ipbmap:0x%p", ipbmap); @@ -120,7 +120,7 @@ int jfs_mount(struct super_block *sb) */ if ((rc = dbMount(ipbmap))) { jfs_err("jfs_mount: dbMount failed w/rc = %d", rc); - goto errout22; + goto err_ipbmap; } /* @@ -139,7 +139,7 @@ int jfs_mount(struct super_block *sb) if (!ipaimap2) { jfs_err("jfs_mount: Failed to read AGGREGATE_I"); rc = -EIO; - goto errout35; + goto err_umount_ipbmap; } sbi->ipaimap2 = ipaimap2; @@ -151,7 +151,7 @@ int jfs_mount(struct super_block *sb) if ((rc = diMount(ipaimap2))) { jfs_err("jfs_mount: diMount(ipaimap2) failed, rc = %d", rc); - goto errout35; + goto err_ipaimap2; } } else /* Secondary aggregate inode table is not valid */ @@ -168,7 +168,7 @@ int jfs_mount(struct super_block *sb) jfs_err("jfs_mount: Failed to read FILESYSTEM_I"); /* open fileset secondary inode allocation map */ rc = -EIO; - goto errout40; + goto err_umount_ipaimap2; } jfs_info("jfs_mount: ipimap:0x%p", ipimap); @@ -178,41 +178,34 @@ int jfs_mount(struct super_block *sb) /* initialize fileset inode allocation map */ if ((rc = diMount(ipimap))) { jfs_err("jfs_mount: diMount failed w/rc = %d", rc); - goto errout41; + goto err_ipimap; } - goto out; + return rc; /* * unwind on error */ - errout41: /* close fileset inode allocation map inode */ +err_ipimap: + /* close fileset inode allocation map inode */ diFreeSpecial(ipimap); - - errout40: /* fileset closed */ - +err_umount_ipaimap2: /* close secondary aggregate inode allocation map */ - if (ipaimap2) { + if (ipaimap2) diUnmount(ipaimap2, 1); +err_ipaimap2: + /* close aggregate inodes */ + if (ipaimap2) diFreeSpecial(ipaimap2); - } - - errout35: - - /* close aggregate block allocation map */ +err_umount_ipbmap: /* close aggregate block allocation map */ dbUnmount(ipbmap, 1); +err_ipbmap: /* close aggregate inodes */ diFreeSpecial(ipbmap); - - errout22: /* close aggregate inode allocation map */ - +err_umount_ipaimap: /* close aggregate inode allocation map */ diUnmount(ipaimap, 1); - - errout21: /* close aggregate inodes */ +err_ipaimap: /* close aggregate inodes */ diFreeSpecial(ipaimap); - errout20: /* aggregate closed */ - - out: - +out: if (rc) jfs_err("Mount JFS Failure: %d", rc); diff --git a/fs/jfs/super.c b/fs/jfs/super.c index b2dc4d1f9dcc559117e0a5bcdd38676e873d3e53..8a02b9bdea19bfe6adf2eeb8d8a735b4bd682325 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -37,6 +37,7 @@ MODULE_DESCRIPTION("The Journaled Filesystem (JFS)"); MODULE_AUTHOR("Steve Best/Dave Kleikamp/Barry Arndt, IBM"); MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(ANDROID_GKI_VFS_EXPORT_ONLY); static struct kmem_cache *jfs_inode_cachep; diff --git a/fs/kernel_read_file.c b/fs/kernel_read_file.c index 90d255fbdd9b34d366bd6b6fe64f6c760595ea8f..c84d87f558cb62eeb312ad9f74abc59aa257f9a1 100644 --- a/fs/kernel_read_file.c +++ b/fs/kernel_read_file.c @@ -178,7 +178,7 @@ int kernel_read_file_from_fd(int fd, loff_t offset, void **buf, struct fd f = fdget(fd); int ret = -EBADF; - if (!f.file) + if (!f.file || !(f.file->f_mode & FMODE_READ)) goto out; ret = kernel_read_file(f.file, offset, buf, buf_size, file_size, id); diff --git a/fs/libfs.c b/fs/libfs.c index 1b4a215f7b749ef48392d605f3c737ef25ef3be6..40359a0b57e139ea1ebf7fe42001c1d48cad927b 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -225,7 +225,7 @@ ssize_t generic_read_dir(struct file *filp, char __user *buf, size_t siz, loff_t { return -EISDIR; } -EXPORT_SYMBOL(generic_read_dir); +EXPORT_SYMBOL_NS(generic_read_dir, ANDROID_GKI_VFS_EXPORT_ONLY); const struct file_operations simple_dir_operations = { .open = dcache_dir_open, diff --git a/fs/minix/inode.c b/fs/minix/inode.c index 34f546404aa11385388f140c7a7a22d345f5fc31..8a9d4a28290d1207076df03bd2681fb1f6cae57d 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -719,4 +719,5 @@ static void __exit exit_minix_fs(void) module_init(init_minix_fs) module_exit(exit_minix_fs) MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(ANDROID_GKI_VFS_EXPORT_ONLY); diff --git a/fs/mpage.c b/fs/mpage.c index 6bdb8dc021ccd20584468f06da02404d8549f6a8..55d2bfa0629207dde7f992c4f301ffb98db131f5 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -340,9 +340,7 @@ alloc_new: goto out; } args->bio = mpage_alloc(bdev, blocks[0] << (blkbits - 9), - min_t(int, args->nr_pages, - BIO_MAX_PAGES), - gfp); + bio_max_segs(args->nr_pages), gfp); if (args->bio == NULL) goto confused; } @@ -430,7 +428,7 @@ void mpage_readahead(struct readahead_control *rac, get_block_t get_block) if (args.bio) mpage_bio_submit(REQ_OP_READ, REQ_RAHEAD, args.bio); } -EXPORT_SYMBOL(mpage_readahead); +EXPORT_SYMBOL_NS(mpage_readahead, ANDROID_GKI_VFS_EXPORT_ONLY); /* * This isn't called much at all @@ -448,7 +446,7 @@ int mpage_readpage(struct page *page, get_block_t get_block) mpage_bio_submit(REQ_OP_READ, 0, args.bio); return 0; } -EXPORT_SYMBOL(mpage_readpage); +EXPORT_SYMBOL_NS(mpage_readpage, ANDROID_GKI_VFS_EXPORT_ONLY); /* * Writing is not so simple. diff --git a/fs/namei.c b/fs/namei.c index 6d8635536485d3bc1320538078b8d98d81b99bab..d7b609a6f6dc94459628c5eb8ce64726ddc34af3 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2533,7 +2533,7 @@ int kern_path(const char *name, unsigned int flags, struct path *path) return filename_lookup(AT_FDCWD, getname_kernel(name), flags, path, NULL); } -EXPORT_SYMBOL(kern_path); +EXPORT_SYMBOL_NS(kern_path, ANDROID_GKI_VFS_EXPORT_ONLY); /** * vfs_path_lookup - lookup a file path relative to a dentry-vfsmount pair @@ -2887,7 +2887,7 @@ int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, fsnotify_create(dir, dentry); return error; } -EXPORT_SYMBOL(vfs_create); +EXPORT_SYMBOL_NS(vfs_create, ANDROID_GKI_VFS_EXPORT_ONLY); int vfs_mkobj(struct dentry *dentry, umode_t mode, int (*f)(struct dentry *, umode_t, void *), @@ -3727,7 +3727,7 @@ int vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) fsnotify_mkdir(dir, dentry); return error; } -EXPORT_SYMBOL(vfs_mkdir); +EXPORT_SYMBOL_NS(vfs_mkdir, ANDROID_GKI_VFS_EXPORT_ONLY); static long do_mkdirat(int dfd, const char __user *pathname, umode_t mode) { @@ -3793,16 +3793,15 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry) dentry->d_inode->i_flags |= S_DEAD; dont_mount(dentry); detach_mounts(dentry); - fsnotify_rmdir(dir, dentry); out: inode_unlock(dentry->d_inode); dput(dentry); if (!error) - d_delete(dentry); + d_delete_notify(dir, dentry); return error; } -EXPORT_SYMBOL(vfs_rmdir); +EXPORT_SYMBOL_NS(vfs_rmdir, ANDROID_GKI_VFS_EXPORT_ONLY); long do_rmdir(int dfd, struct filename *name) { @@ -3909,7 +3908,6 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry, struct inode **delegate if (!error) { dont_mount(dentry); detach_mounts(dentry); - fsnotify_unlink(dir, dentry); } } } @@ -3917,14 +3915,16 @@ out: inode_unlock(target); /* We don't d_delete() NFS sillyrenamed files--they still exist. */ - if (!error && !(dentry->d_flags & DCACHE_NFSFS_RENAMED)) { + if (!error && dentry->d_flags & DCACHE_NFSFS_RENAMED) { + fsnotify_unlink(dir, dentry); + } else if (!error) { fsnotify_link_count(target); - d_delete(dentry); + d_delete_notify(dir, dentry); } return error; } -EXPORT_SYMBOL(vfs_unlink); +EXPORT_SYMBOL_NS(vfs_unlink, ANDROID_GKI_VFS_EXPORT_ONLY); /* * Make sure that the actual truncation of the file will occur outside its @@ -4159,7 +4159,7 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de fsnotify_link(dir, inode, new_dentry); return error; } -EXPORT_SYMBOL(vfs_link); +EXPORT_SYMBOL_NS(vfs_link, ANDROID_GKI_VFS_EXPORT_ONLY); /* * Hardlinks are often used in delicate situations. We avoid @@ -4419,7 +4419,7 @@ out: return error; } -EXPORT_SYMBOL(vfs_rename); +EXPORT_SYMBOL_NS(vfs_rename, ANDROID_GKI_VFS_EXPORT_ONLY); static int do_renameat2(int olddfd, const char __user *oldname, int newdfd, const char __user *newname, unsigned int flags) diff --git a/fs/namespace.c b/fs/namespace.c index 046b084136c51a0a36972c326f63d65bae5d4e7d..6d1f11a6b2904af12db512cf1488463e0d37c677 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -415,7 +415,7 @@ int mnt_want_write_file(struct file *file) sb_end_write(file_inode(file)->i_sb); return ret; } -EXPORT_SYMBOL_GPL(mnt_want_write_file); +EXPORT_SYMBOL_NS_GPL(mnt_want_write_file, ANDROID_GKI_VFS_EXPORT_ONLY); /** * __mnt_drop_write - give up write access to a mount @@ -457,7 +457,7 @@ void mnt_drop_write_file(struct file *file) __mnt_drop_write_file(file); sb_end_write(file_inode(file)->i_sb); } -EXPORT_SYMBOL(mnt_drop_write_file); +EXPORT_SYMBOL_NS(mnt_drop_write_file, ANDROID_GKI_VFS_EXPORT_ONLY); static int mnt_make_readonly(struct mount *mnt) { diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 08108b6d2fa107e3bbc0cc86668fea662db4f2eb..feab9fdc022fb18c9521b199c112fb669e1b6ebd 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -115,13 +115,13 @@ bl_submit_bio(struct bio *bio) return NULL; } -static struct bio * -bl_alloc_init_bio(int npg, struct block_device *bdev, sector_t disk_sector, +static struct bio *bl_alloc_init_bio(unsigned int npg, + struct block_device *bdev, sector_t disk_sector, bio_end_io_t end_io, struct parallel_io *par) { struct bio *bio; - npg = min(npg, BIO_MAX_PAGES); + npg = bio_max_segs(npg); bio = bio_alloc(GFP_NOIO, npg); if (!bio && (current->flags & PF_MEMALLOC)) { while (!bio && (npg /= 2)) diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h index 6a2033131c068e82e9934c6baba20c243dfad62f..ccd4f245cae240b812c3a7e6d44e2de32de819c5 100644 --- a/fs/nfs/callback.h +++ b/fs/nfs/callback.h @@ -170,7 +170,7 @@ struct cb_devicenotifyitem { }; struct cb_devicenotifyargs { - int ndevs; + uint32_t ndevs; struct cb_devicenotifyitem *devs; }; diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index be546ece383f5af783f6bdbd5c75a696c04ec823..b44219ce60b869c5c6556ab6053ee5e47c12b170 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -353,7 +353,7 @@ __be32 nfs4_callback_devicenotify(void *argp, void *resp, struct cb_process_state *cps) { struct cb_devicenotifyargs *args = argp; - int i; + uint32_t i; __be32 res = 0; struct nfs_client *clp = cps->clp; struct nfs_server *server = NULL; diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 79ff172eb1c81ac0bc2b89740157b6163ca6aa8b..1725079a05276275c4b9d90d0af3bdc591079bc4 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -259,11 +259,9 @@ __be32 decode_devicenotify_args(struct svc_rqst *rqstp, void *argp) { struct cb_devicenotifyargs *args = argp; + uint32_t tmp, n, i; __be32 *p; __be32 status = 0; - u32 tmp; - int n, i; - args->ndevs = 0; /* Num of device notifications */ p = xdr_inline_decode(xdr, sizeof(uint32_t)); @@ -272,7 +270,7 @@ __be32 decode_devicenotify_args(struct svc_rqst *rqstp, goto out; } n = ntohl(*p++); - if (n <= 0) + if (n == 0) goto out; if (n > ULONG_MAX / sizeof(*args->devs)) { status = htonl(NFS4ERR_BADXDR); @@ -331,19 +329,21 @@ __be32 decode_devicenotify_args(struct svc_rqst *rqstp, dev->cbd_immediate = 0; } - args->ndevs++; - dprintk("%s: type %d layout 0x%x immediate %d\n", __func__, dev->cbd_notify_type, dev->cbd_layout_type, dev->cbd_immediate); } + args->ndevs = n; + dprintk("%s: ndevs %d\n", __func__, args->ndevs); + return 0; +err: + kfree(args->devs); out: + args->devs = NULL; + args->ndevs = 0; dprintk("%s: status %d ndevs %d\n", __func__, ntohl(status), args->ndevs); return status; -err: - kfree(args->devs); - goto out; } static __be32 decode_sessionid(struct xdr_stream *xdr, diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 723d425796cca39f07bda563fec4a1d5a3b5b33d..818ff8b1b99da7429cfef13b18c547ed23930221 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -177,6 +177,7 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init) INIT_LIST_HEAD(&clp->cl_superblocks); clp->cl_rpcclient = ERR_PTR(-EINVAL); + clp->cl_flags = cl_init->init_flags; clp->cl_proto = cl_init->proto; clp->cl_nconnect = cl_init->nconnect; clp->cl_net = get_net(cl_init->net); @@ -426,7 +427,6 @@ struct nfs_client *nfs_get_client(const struct nfs_client_initdata *cl_init) list_add_tail(&new->cl_share_link, &nn->nfs_client_list); spin_unlock(&nn->nfs_client_lock); - new->cl_flags = cl_init->init_flags; return rpc_ops->init_client(new, cl_init); } diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index c837675cd395ae13ac0258a074d2c12dd41eac52..2ad56ff4752c7f8a85f270539d0bdc43ad7bb393 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1061,13 +1061,12 @@ static bool nfs_verifier_is_delegated(struct dentry *dentry) static void nfs_set_verifier_locked(struct dentry *dentry, unsigned long verf) { struct inode *inode = d_inode(dentry); + struct inode *dir = d_inode(dentry->d_parent); - if (!nfs_verifier_is_delegated(dentry) && - !nfs_verify_change_attribute(d_inode(dentry->d_parent), verf)) - goto out; + if (!nfs_verify_change_attribute(dir, verf)) + return; if (inode && NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) nfs_set_verifier_delegated(&verf); -out: dentry->d_time = verf; } @@ -1778,6 +1777,24 @@ out: no_open: res = nfs_lookup(dir, dentry, lookup_flags); + if (!res) { + inode = d_inode(dentry); + if ((lookup_flags & LOOKUP_DIRECTORY) && inode && + !(S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))) + res = ERR_PTR(-ENOTDIR); + else if (inode && S_ISREG(inode->i_mode)) + res = ERR_PTR(-EOPENSTALE); + } else if (!IS_ERR(res)) { + inode = d_inode(res); + if ((lookup_flags & LOOKUP_DIRECTORY) && inode && + !(S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))) { + dput(res); + res = ERR_PTR(-ENOTDIR); + } else if (inode && S_ISREG(inode->i_mode)) { + dput(res); + res = ERR_PTR(-EOPENSTALE); + } + } if (switched) { d_lookup_done(dentry); if (!res) @@ -2175,6 +2192,8 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) trace_nfs_link_enter(inode, dir, dentry); d_drop(dentry); + if (S_ISREG(inode->i_mode)) + nfs_sync_inode(inode); error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name); if (error == 0) { ihold(inode); @@ -2263,6 +2282,8 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, } } + if (S_ISREG(old_inode->i_mode)) + nfs_sync_inode(old_inode); task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry, NULL); if (IS_ERR(task)) { error = PTR_ERR(task); @@ -2468,7 +2489,7 @@ static struct nfs_access_entry *nfs_access_search_rbtree(struct inode *inode, co return NULL; } -static int nfs_access_get_cached_locked(struct inode *inode, const struct cred *cred, struct nfs_access_entry *res, bool may_block) +static int nfs_access_get_cached_locked(struct inode *inode, const struct cred *cred, u32 *mask, bool may_block) { struct nfs_inode *nfsi = NFS_I(inode); struct nfs_access_entry *cache; @@ -2498,8 +2519,7 @@ static int nfs_access_get_cached_locked(struct inode *inode, const struct cred * spin_lock(&inode->i_lock); retry = false; } - res->cred = cache->cred; - res->mask = cache->mask; + *mask = cache->mask; list_move_tail(&cache->lru, &nfsi->access_cache_entry_lru); err = 0; out: @@ -2511,7 +2531,7 @@ out_zap: return -ENOENT; } -static int nfs_access_get_cached_rcu(struct inode *inode, const struct cred *cred, struct nfs_access_entry *res) +static int nfs_access_get_cached_rcu(struct inode *inode, const struct cred *cred, u32 *mask) { /* Only check the most recently returned cache entry, * but do it without locking. @@ -2533,22 +2553,21 @@ static int nfs_access_get_cached_rcu(struct inode *inode, const struct cred *cre goto out; if (nfs_check_cache_invalid(inode, NFS_INO_INVALID_ACCESS)) goto out; - res->cred = cache->cred; - res->mask = cache->mask; + *mask = cache->mask; err = 0; out: rcu_read_unlock(); return err; } -int nfs_access_get_cached(struct inode *inode, const struct cred *cred, struct -nfs_access_entry *res, bool may_block) +int nfs_access_get_cached(struct inode *inode, const struct cred *cred, + u32 *mask, bool may_block) { int status; - status = nfs_access_get_cached_rcu(inode, cred, res); + status = nfs_access_get_cached_rcu(inode, cred, mask); if (status != 0) - status = nfs_access_get_cached_locked(inode, cred, res, + status = nfs_access_get_cached_locked(inode, cred, mask, may_block); return status; @@ -2669,7 +2688,7 @@ static int nfs_do_access(struct inode *inode, const struct cred *cred, int mask) trace_nfs_access_enter(inode); - status = nfs_access_get_cached(inode, cred, &cache, may_block); + status = nfs_access_get_cached(inode, cred, &cache.mask, may_block); if (status == 0) goto out_cached; diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 2e894fec036b083b8431cf82883a7d06fe9b1e83..3c0335c15a730054b0c09f1d9801765610a1e9b7 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -620,7 +620,7 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data) nfs_unlock_and_release_request(req); } - if (atomic_dec_and_test(&cinfo.mds->rpcs_out)) + if (nfs_commit_end(cinfo.mds)) nfs_direct_write_complete(dreq); } diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index 3eda40a320a53ff40a37262391c2eb75741ca8f4..1f12297109b4186d46f54031606da22e10470527 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c @@ -378,10 +378,10 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, goto noconnect; ds = mirror->mirror_ds->ds; + if (READ_ONCE(ds->ds_clp)) + goto out; /* matching smp_wmb() in _nfs4_pnfs_v3/4_ds_connect */ smp_rmb(); - if (ds->ds_clp) - goto out; /* FIXME: For now we assume the server sent only one version of NFS * to use for the DS. diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 21addb78523d229666eff8cbd9a97247b160ba66..f17ad75d0afd845c668ce1ee4fc0ffaf926afdcc 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -195,6 +195,18 @@ bool nfs_check_cache_invalid(struct inode *inode, unsigned long flags) } EXPORT_SYMBOL_GPL(nfs_check_cache_invalid); +#ifdef CONFIG_NFS_V4_2 +static bool nfs_has_xattr_cache(const struct nfs_inode *nfsi) +{ + return nfsi->xattr_cache != NULL; +} +#else +static bool nfs_has_xattr_cache(const struct nfs_inode *nfsi) +{ + return false; +} +#endif + static void nfs_set_cache_invalid(struct inode *inode, unsigned long flags) { struct nfs_inode *nfsi = NFS_I(inode); @@ -210,6 +222,8 @@ static void nfs_set_cache_invalid(struct inode *inode, unsigned long flags) } else if (flags & NFS_INO_REVAL_PAGECACHE) flags |= NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_SIZE; + if (!nfs_has_xattr_cache(nfsi)) + flags &= ~NFS_INO_INVALID_XATTR; if (inode->i_mapping->nrpages == 0) flags &= ~(NFS_INO_INVALID_DATA|NFS_INO_DATA_INVAL_DEFER); nfsi->cache_validity |= flags; @@ -807,12 +821,9 @@ int nfs_getattr(const struct path *path, struct kstat *stat, } /* Flush out writes to the server in order to update c/mtime. */ - if ((request_mask & (STATX_CTIME|STATX_MTIME)) && - S_ISREG(inode->i_mode)) { - err = filemap_write_and_wait(inode->i_mapping); - if (err) - goto out; - } + if ((request_mask & (STATX_CTIME | STATX_MTIME)) && + S_ISREG(inode->i_mode)) + filemap_write_and_wait(inode->i_mapping); /* * We may force a getattr if the user cares about atime. @@ -2326,6 +2337,7 @@ static void __exit exit_nfs_fs(void) /* Not quite true; I just maintain it */ MODULE_AUTHOR("Olaf Kirch "); MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(ANDROID_GKI_VFS_EXPORT_ONLY); module_param(enable_ino64, bool, 0644); module_init(init_nfs_fs) diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 4ebcd9dd153524d5cf93ef70ea4803d5ab92e02d..2587b1b8e2ef760ea482065c4d1ab091d8c65969 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -362,8 +362,9 @@ static ssize_t _nfs42_proc_copy(struct file *src, goto out; } - truncate_pagecache_range(dst_inode, pos_dst, - pos_dst + res->write_res.count); + WARN_ON_ONCE(invalidate_inode_pages2_range(dst_inode->i_mapping, + pos_dst >> PAGE_SHIFT, + (pos_dst + res->write_res.count - 1) >> PAGE_SHIFT)); spin_lock(&dst_inode->i_lock); NFS_I(dst_inode)->cache_validity |= (NFS_INO_REVAL_PAGECACHE | NFS_INO_REVAL_FORCED | NFS_INO_INVALID_SIZE | diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index c078f88552695d0d9baf4181164058bb2637de13..f2248d9d4db51a86e196526c69516332623a240a 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c @@ -1396,8 +1396,7 @@ static int nfs4_xdr_dec_clone(struct rpc_rqst *rqstp, status = decode_clone(xdr); if (status) goto out; - status = decode_getfattr(xdr, res->dst_fattr, res->server); - + decode_getfattr(xdr, res->dst_fattr, res->server); out: res->rpc_status = status; return status; diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 3e344bec3647b09889b113c6b568d6316b41140d..6d916563356ef372d594a482a4b2246888d5f090 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -281,7 +281,8 @@ struct rpc_clnt *nfs4_negotiate_security(struct rpc_clnt *, struct inode *, int nfs4_submount(struct fs_context *, struct nfs_server *); int nfs4_replace_transport(struct nfs_server *server, const struct nfs4_fs_locations *locations); - +size_t nfs_parse_server_name(char *string, size_t len, struct sockaddr *sa, + size_t salen, struct net *net, int port); /* nfs4proc.c */ extern int nfs4_handle_exception(struct nfs_server *, int, struct nfs4_exception *); extern int nfs4_async_handle_error(struct rpc_task *task, diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 6d74f2e2de4615a14528f5696b7bdcc05cffcfc7..0e6437b08a3a502bdb3bbbb792294d59fb051b2a 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -1330,8 +1330,11 @@ int nfs4_update_server(struct nfs_server *server, const char *hostname, } nfs_put_client(clp); - if (server->nfs_client->cl_hostname == NULL) + if (server->nfs_client->cl_hostname == NULL) { server->nfs_client->cl_hostname = kstrdup(hostname, GFP_KERNEL); + if (server->nfs_client->cl_hostname == NULL) + return -ENOMEM; + } nfs_server_insert_lists(server); return nfs_probe_destination(server); diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c index 8d8aba305ecca02d8776c3439de736c951edee05..f331866dd418247c970b14c7c3054ecde8674045 100644 --- a/fs/nfs/nfs4idmap.c +++ b/fs/nfs/nfs4idmap.c @@ -487,7 +487,7 @@ nfs_idmap_new(struct nfs_client *clp) err_destroy_pipe: rpc_destroy_pipe_data(idmap->idmap_pipe); err: - get_user_ns(idmap->user_ns); + put_user_ns(idmap->user_ns); kfree(idmap); return error; } diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index 873342308dc0d9a131d93665b338e3eb1a49d6e9..3680c8da510c9787bcbae896a292a2cfec039469 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c @@ -164,16 +164,21 @@ static int nfs4_validate_fspath(struct dentry *dentry, return 0; } -static size_t nfs_parse_server_name(char *string, size_t len, - struct sockaddr *sa, size_t salen, struct net *net) +size_t nfs_parse_server_name(char *string, size_t len, struct sockaddr *sa, + size_t salen, struct net *net, int port) { ssize_t ret; ret = rpc_pton(net, string, len, sa, salen); if (ret == 0) { - ret = nfs_dns_resolve_name(net, string, len, sa, salen); - if (ret < 0) - ret = 0; + ret = rpc_uaddr2sockaddr(net, string, len, sa, salen); + if (ret == 0) { + ret = nfs_dns_resolve_name(net, string, len, sa, salen); + if (ret < 0) + ret = 0; + } + } else if (port) { + rpc_set_port(sa, port); } return ret; } @@ -328,7 +333,7 @@ static int try_location(struct fs_context *fc, nfs_parse_server_name(buf->data, buf->len, &ctx->nfs_server.address, sizeof(ctx->nfs_server._address), - fc->net_ns); + fc->net_ns, 0); if (ctx->nfs_server.addrlen == 0) continue; @@ -496,7 +501,7 @@ static int nfs4_try_replacing_one_location(struct nfs_server *server, continue; salen = nfs_parse_server_name(buf->data, buf->len, - sap, addr_bufsize, net); + sap, addr_bufsize, net, 0); if (salen == 0) continue; rpc_set_port(sap, NFS_PORT); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 6d3295bf8e9fc2e3c00cd91c8aa3818f354d90be..a3612c184f887efbd8cc34ff07f761842dabf272 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1590,15 +1590,16 @@ static bool nfs_stateid_is_sequential(struct nfs4_state *state, { if (test_bit(NFS_OPEN_STATE, &state->flags)) { /* The common case - we're updating to a new sequence number */ - if (nfs4_stateid_match_other(stateid, &state->open_stateid) && - nfs4_stateid_is_next(&state->open_stateid, stateid)) { - return true; + if (nfs4_stateid_match_other(stateid, &state->open_stateid)) { + if (nfs4_stateid_is_next(&state->open_stateid, stateid)) + return true; + return false; } - } else { - /* This is the first OPEN in this generation */ - if (stateid->seqid == cpu_to_be32(1)) - return true; + /* The server returned a new stateid */ } + /* This is the first OPEN in this generation */ + if (stateid->seqid == cpu_to_be32(1)) + return true; return false; } @@ -7598,7 +7599,7 @@ static int nfs4_xattr_set_nfs4_user(const struct xattr_handler *handler, const char *key, const void *buf, size_t buflen, int flags) { - struct nfs_access_entry cache; + u32 mask; int ret; if (!nfs_server_capable(inode, NFS_CAP_XATTR)) @@ -7613,8 +7614,8 @@ static int nfs4_xattr_set_nfs4_user(const struct xattr_handler *handler, * do a cached access check for the XA* flags to possibly avoid * doing an RPC and getting EACCES back. */ - if (!nfs_access_get_cached(inode, current_cred(), &cache, true)) { - if (!(cache.mask & NFS_ACCESS_XAWRITE)) + if (!nfs_access_get_cached(inode, current_cred(), &mask, true)) { + if (!(mask & NFS_ACCESS_XAWRITE)) return -EACCES; } @@ -7636,14 +7637,14 @@ static int nfs4_xattr_get_nfs4_user(const struct xattr_handler *handler, const char *key, void *buf, size_t buflen, int flags) { - struct nfs_access_entry cache; + u32 mask; ssize_t ret; if (!nfs_server_capable(inode, NFS_CAP_XATTR)) return -EOPNOTSUPP; - if (!nfs_access_get_cached(inode, current_cred(), &cache, true)) { - if (!(cache.mask & NFS_ACCESS_XAREAD)) + if (!nfs_access_get_cached(inode, current_cred(), &mask, true)) { + if (!(mask & NFS_ACCESS_XAREAD)) return -EACCES; } @@ -7668,13 +7669,13 @@ nfs4_listxattr_nfs4_user(struct inode *inode, char *list, size_t list_len) ssize_t ret, size; char *buf; size_t buflen; - struct nfs_access_entry cache; + u32 mask; if (!nfs_server_capable(inode, NFS_CAP_XATTR)) return 0; - if (!nfs_access_get_cached(inode, current_cred(), &cache, true)) { - if (!(cache.mask & NFS_ACCESS_XALIST)) + if (!nfs_access_get_cached(inode, current_cred(), &mask, true)) { + if (!(mask & NFS_ACCESS_XALIST)) return 0; } diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 4bf10792cb5b1049790ec0d0584f263fb3bac96c..cbeec29e9f21a69cc53a0833799954cd1a45fb11 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -2104,6 +2104,9 @@ static int nfs4_try_migration(struct nfs_server *server, const struct cred *cred } result = -NFS4ERR_NXIO; + if (!locations->nlocations) + goto out; + if (!(locations->fattr.valid & NFS_ATTR_FATTR_V4_LOCATIONS)) { dprintk("<-- %s: No fs_locations data, migration skipped\n", __func__); diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c index d09bcfd7db89488eec9291c4e985e13953f079d3..b422e8a09d2510445331c20ea838a094a093e036 100644 --- a/fs/nfs/nfs4super.c +++ b/fs/nfs/nfs4super.c @@ -309,6 +309,7 @@ static void __exit exit_nfs_v4(void) } MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(ANDROID_GKI_VFS_EXPORT_ONLY); module_init(init_nfs_v4); module_exit(exit_nfs_v4); diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index c16b93df1bc14222d7dd4393704f7d090cbd6d74..e2f0e3446e22a99ad6d06ec9af112fe696840595 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -3680,8 +3680,6 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st if (unlikely(!p)) goto out_eio; n = be32_to_cpup(p); - if (n <= 0) - goto out_eio; for (res->nlocations = 0; res->nlocations < n; res->nlocations++) { u32 m; struct nfs4_fs_location *loc; @@ -4184,10 +4182,11 @@ static int decode_attr_security_label(struct xdr_stream *xdr, uint32_t *bitmap, } else printk(KERN_WARNING "%s: label too long (%u)!\n", __func__, len); + if (label && label->label) + dprintk("%s: label=%.*s, len=%d, PI=%d, LFS=%d\n", + __func__, label->len, (char *)label->label, + label->len, label->pi, label->lfs); } - if (label && label->label) - dprintk("%s: label=%s, len=%d, PI=%d, LFS=%d\n", __func__, - (char *)label->label, label->len, label->pi, label->lfs); return status; } diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 132a345e937311885b6ef8e4ab8ed5fa8061454c..0212fe32e63aa285f871b80595ef7a339f36114d 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -515,7 +515,7 @@ pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg, { struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds; - if (!lseg || !fl_cinfo->ops->mark_request_commit) + if (!lseg || !fl_cinfo->ops || !fl_cinfo->ops->mark_request_commit) return false; fl_cinfo->ops->mark_request_commit(req, lseg, cinfo, ds_commit_idx); return true; diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index 251c4a3aef9a6806468b9624197e748610de2fc6..7b9d701bef016ebdb65ab220bb982a704ddaa95c 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -468,7 +468,6 @@ pnfs_bucket_alloc_ds_commits(struct list_head *list, goto out_error; data->ds_commit_index = i; list_add_tail(&data->list, list); - atomic_inc(&cinfo->mds->rpcs_out); nreq++; } mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex); @@ -520,7 +519,6 @@ pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages, data->ds_commit_index = -1; list_splice_init(mds_pages, &data->pages); list_add_tail(&data->list, &list); - atomic_inc(&cinfo->mds->rpcs_out); nreq++; } @@ -876,7 +874,7 @@ static int _nfs4_pnfs_v3_ds_connect(struct nfs_server *mds_srv, } smp_wmb(); - ds->ds_clp = clp; + WRITE_ONCE(ds->ds_clp, clp); dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr); out: return status; @@ -949,7 +947,7 @@ static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv, } smp_wmb(); - ds->ds_clp = clp; + WRITE_ONCE(ds->ds_clp, clp); dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr); out: return status; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 639c34fec04a8488dd3a5df5b1b1c9ba0f170753..bde4c362841f009cdb492d0b5787c14fbc100409 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1034,25 +1034,11 @@ nfs_scan_commit_list(struct list_head *src, struct list_head *dst, struct nfs_page *req, *tmp; int ret = 0; -restart: list_for_each_entry_safe(req, tmp, src, wb_list) { kref_get(&req->wb_kref); if (!nfs_lock_request(req)) { - int status; - - /* Prevent deadlock with nfs_lock_and_join_requests */ - if (!list_empty(dst)) { - nfs_release_request(req); - continue; - } - /* Ensure we make progress to prevent livelock */ - mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex); - status = nfs_wait_on_request(req); nfs_release_request(req); - mutex_lock(&NFS_I(cinfo->inode)->commit_mutex); - if (status < 0) - break; - goto restart; + continue; } nfs_request_remove_commit_list(req, cinfo); clear_bit(PG_COMMIT_TO_DS, &req->wb_flags); @@ -1663,10 +1649,13 @@ static void nfs_commit_begin(struct nfs_mds_commit_info *cinfo) atomic_inc(&cinfo->rpcs_out); } -static void nfs_commit_end(struct nfs_mds_commit_info *cinfo) +bool nfs_commit_end(struct nfs_mds_commit_info *cinfo) { - if (atomic_dec_and_test(&cinfo->rpcs_out)) + if (atomic_dec_and_test(&cinfo->rpcs_out)) { wake_up_var(&cinfo->rpcs_out); + return true; + } + return false; } void nfs_commitdata_release(struct nfs_commit_data *data) @@ -1766,6 +1755,7 @@ void nfs_init_commit(struct nfs_commit_data *data, data->res.fattr = &data->fattr; data->res.verf = &data->verf; nfs_fattr_init(&data->fattr); + nfs_commit_begin(cinfo->mds); } EXPORT_SYMBOL_GPL(nfs_init_commit); @@ -1811,7 +1801,6 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how, /* Set up the argument struct */ nfs_init_commit(data, head, NULL, cinfo); - atomic_inc(&cinfo->mds->rpcs_out); return nfs_initiate_commit(NFS_CLIENT(inode), data, NFS_PROTO(inode), data->mds_ops, how, RPC_TASK_CRED_NOREF); } @@ -1924,6 +1913,7 @@ static int __nfs_commit_inode(struct inode *inode, int how, int may_wait = how & FLUSH_SYNC; int ret, nscan; + how &= ~FLUSH_SYNC; nfs_init_cinfo_from_inode(&cinfo, inode); nfs_commit_begin(cinfo.mds); for (;;) { diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index a633044b0dc1f7b9022a627c0debca97df9e9e4d..981a4e4c9a3cf31cff0987c940cf6e6188cb7431 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -183,6 +183,11 @@ nfsd3_proc_write(struct svc_rqst *rqstp) (unsigned long long) argp->offset, argp->stable? " stable" : ""); + resp->status = nfserr_fbig; + if (argp->offset > (u64)OFFSET_MAX || + argp->offset + argp->len > (u64)OFFSET_MAX) + return rpc_success; + fh_copy(&resp->fh, &argp->fh); resp->committed = argp->stable; nvecs = svc_fill_write_vector(rqstp, rqstp->rq_arg.pages, diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 00440337efc1fd42f9523a6d14e7315e91ecbc32..7850d141c7621d7a6382d3b2fdca382f05b9456e 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1008,8 +1008,9 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, unsigned long cnt; int nvecs; - if (write->wr_offset >= OFFSET_MAX) - return nfserr_inval; + if (write->wr_offset > (u64)OFFSET_MAX || + write->wr_offset + write->wr_buflen > (u64)OFFSET_MAX) + return nfserr_fbig; cnt = write->wr_buflen; trace_nfsd_write_start(rqstp, &cstate->current_fh, diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 186fa2c2c6ba6a14d26f9d31bf6002793484f783..f9b730c43192db468f802480786a80d68ccefde2 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -2156,6 +2156,7 @@ static struct notifier_block nfsd4_cld_block = { int register_cld_notifier(void) { + WARN_ON(!nfsd_net_id); return rpc_pipefs_notifier_register(&nfsd4_cld_block); } diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 0313390fa4b4454686b65e438e424be5cfc9bad2..d01d7929753efec03f8e942d0b61e320e12c85b9 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1089,6 +1089,11 @@ hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp) return 0; } +static bool delegation_hashed(struct nfs4_delegation *dp) +{ + return !(list_empty(&dp->dl_perfile)); +} + static bool unhash_delegation_locked(struct nfs4_delegation *dp) { @@ -1096,7 +1101,7 @@ unhash_delegation_locked(struct nfs4_delegation *dp) lockdep_assert_held(&state_lock); - if (list_empty(&dp->dl_perfile)) + if (!delegation_hashed(dp)) return false; dp->dl_stid.sc_type = NFS4_CLOSED_DELEG_STID; @@ -3512,7 +3517,7 @@ static struct nfsd4_conn *__nfsd4_find_conn(struct svc_xprt *xpt, struct nfsd4_s } static __be32 nfsd4_match_existing_connection(struct svc_rqst *rqst, - struct nfsd4_session *session, u32 req) + struct nfsd4_session *session, u32 req, struct nfsd4_conn **conn) { struct nfs4_client *clp = session->se_client; struct svc_xprt *xpt = rqst->rq_xprt; @@ -3535,6 +3540,8 @@ static __be32 nfsd4_match_existing_connection(struct svc_rqst *rqst, else status = nfserr_inval; spin_unlock(&clp->cl_lock); + if (status == nfs_ok && conn) + *conn = c; return status; } @@ -3559,8 +3566,16 @@ __be32 nfsd4_bind_conn_to_session(struct svc_rqst *rqstp, status = nfserr_wrong_cred; if (!nfsd4_mach_creds_match(session->se_client, rqstp)) goto out; - status = nfsd4_match_existing_connection(rqstp, session, bcts->dir); - if (status == nfs_ok || status == nfserr_inval) + status = nfsd4_match_existing_connection(rqstp, session, + bcts->dir, &conn); + if (status == nfs_ok) { + if (bcts->dir == NFS4_CDFC4_FORE_OR_BOTH || + bcts->dir == NFS4_CDFC4_BACK) + conn->cn_flags |= NFS4_CDFC4_BACK; + nfsd4_probe_callback(session->se_client); + goto out; + } + if (status == nfserr_inval) goto out; status = nfsd4_map_bcts_dir(&bcts->dir); if (status) @@ -4032,8 +4047,10 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, status = nfserr_clid_inuse; if (client_has_state(old) && !same_creds(&unconf->cl_cred, - &old->cl_cred)) + &old->cl_cred)) { + old = NULL; goto out; + } status = mark_client_expired_locked(old); if (status) { old = NULL; @@ -4502,7 +4519,7 @@ static void nfsd4_cb_recall_prepare(struct nfsd4_callback *cb) * queued for a lease break. Don't queue it again. */ spin_lock(&state_lock); - if (dp->dl_time == 0) { + if (delegation_hashed(dp) && dp->dl_time == 0) { dp->dl_time = ktime_get_boottime_seconds(); list_add_tail(&dp->dl_recall_lru, &nn->del_recall_lru); } diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 5f5169b9c2e9044589e294e5e589f32c6bdf0783..46f825cf53f4f271a0d1fcb8d31b98cbbf15c1ee 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3427,15 +3427,18 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, goto fail; cd->rd_maxcount -= entry_bytes; /* - * RFC 3530 14.2.24 describes rd_dircount as only a "hint", so - * let's always let through the first entry, at least: + * RFC 3530 14.2.24 describes rd_dircount as only a "hint", and + * notes that it could be zero. If it is zero, then the server + * should enforce only the rd_maxcount value. */ - if (!cd->rd_dircount) - goto fail; - name_and_cookie = 4 + 4 * XDR_QUADLEN(namlen) + 8; - if (name_and_cookie > cd->rd_dircount && cd->cookie_offset) - goto fail; - cd->rd_dircount -= min(cd->rd_dircount, name_and_cookie); + if (cd->rd_dircount) { + name_and_cookie = 4 + 4 * XDR_QUADLEN(namlen) + 8; + if (name_and_cookie > cd->rd_dircount && cd->cookie_offset) + goto fail; + cd->rd_dircount -= min(cd->rd_dircount, name_and_cookie); + if (!cd->rd_dircount) + cd->rd_maxcount = 0; + } cd->cookie_offset = cookie_offset; skip_entry: diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 0759e589ab52bf4a240ea2b2cc678adc3ee843d5..2796ecfdfd5d0e5728d45a28c0d8b278168d3e8b 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -792,7 +792,10 @@ out_close: svc_xprt_put(xprt); } out_err: - nfsd_destroy(net); + if (!list_empty(&nn->nfsd_serv->sv_permsocks)) + nn->nfsd_serv->sv_nrthreads--; + else + nfsd_destroy(net); return err; } @@ -1244,7 +1247,8 @@ static void nfsdfs_remove_file(struct inode *dir, struct dentry *dentry) clear_ncl(d_inode(dentry)); dget(dentry); ret = simple_unlink(dir, dentry); - d_delete(dentry); + d_drop(dentry); + fsnotify_unlink(dir, dentry); dput(dentry); WARN_ON_ONCE(ret); } @@ -1333,8 +1337,8 @@ void nfsd_client_rmdir(struct dentry *dentry) dget(dentry); ret = simple_rmdir(dir, dentry); WARN_ON_ONCE(ret); + d_drop(dentry); fsnotify_rmdir(dir, dentry); - d_delete(dentry); dput(dentry); inode_unlock(dir); } @@ -1522,12 +1526,9 @@ static int __init init_nfsd(void) int retval; printk(KERN_INFO "Installing knfsd (copyright (C) 1996 okir@monad.swb.de).\n"); - retval = register_cld_notifier(); - if (retval) - return retval; retval = nfsd4_init_slabs(); if (retval) - goto out_unregister_notifier; + return retval; retval = nfsd4_init_pnfs(); if (retval) goto out_free_slabs; @@ -1544,10 +1545,15 @@ static int __init init_nfsd(void) goto out_free_exports; retval = register_pernet_subsys(&nfsd_net_ops); if (retval < 0) + goto out_free_filesystem; + retval = register_cld_notifier(); + if (retval) goto out_free_all; return 0; out_free_all: unregister_pernet_subsys(&nfsd_net_ops); +out_free_filesystem: + unregister_filesystem(&nfsd_fs_type); out_free_exports: remove_proc_entry("fs/nfs/exports", NULL); remove_proc_entry("fs/nfs", NULL); @@ -1559,13 +1565,12 @@ out_free_stat: nfsd4_exit_pnfs(); out_free_slabs: nfsd4_free_slabs(); -out_unregister_notifier: - unregister_cld_notifier(); return retval; } static void __exit exit_nfsd(void) { + unregister_cld_notifier(); unregister_pernet_subsys(&nfsd_net_ops); nfsd_drc_slab_free(); remove_proc_entry("fs/nfs/exports", NULL); @@ -1575,10 +1580,10 @@ static void __exit exit_nfsd(void) nfsd4_free_slabs(); nfsd4_exit_pnfs(); unregister_filesystem(&nfsd_fs_type); - unregister_cld_notifier(); } MODULE_AUTHOR("Olaf Kirch "); MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(ANDROID_GKI_VFS_EXPORT_ONLY); module_init(init_nfsd) module_exit(exit_nfsd) diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index c8ca73d69ad04f51bdedb325ceb4f43b44757fdb..a952f4a9b2a68012d4ff1e04b7190b946c8a2ce3 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -175,14 +175,14 @@ TRACE_EVENT(nfsd_export_update, DECLARE_EVENT_CLASS(nfsd_io_class, TP_PROTO(struct svc_rqst *rqstp, struct svc_fh *fhp, - loff_t offset, - unsigned long len), + u64 offset, + u32 len), TP_ARGS(rqstp, fhp, offset, len), TP_STRUCT__entry( __field(u32, xid) __field(u32, fh_hash) - __field(loff_t, offset) - __field(unsigned long, len) + __field(u64, offset) + __field(u32, len) ), TP_fast_assign( __entry->xid = be32_to_cpu(rqstp->rq_xid); @@ -190,7 +190,7 @@ DECLARE_EVENT_CLASS(nfsd_io_class, __entry->offset = offset; __entry->len = len; ), - TP_printk("xid=0x%08x fh_hash=0x%08x offset=%lld len=%lu", + TP_printk("xid=0x%08x fh_hash=0x%08x offset=%llu len=%u", __entry->xid, __entry->fh_hash, __entry->offset, __entry->len) ) @@ -199,8 +199,8 @@ DECLARE_EVENT_CLASS(nfsd_io_class, DEFINE_EVENT(nfsd_io_class, nfsd_##name, \ TP_PROTO(struct svc_rqst *rqstp, \ struct svc_fh *fhp, \ - loff_t offset, \ - unsigned long len), \ + u64 offset, \ + u32 len), \ TP_ARGS(rqstp, fhp, offset, len)) DEFINE_NFSD_IO_EVENT(read_start); diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 4abd928b0bc839db191e75d4dff935bee6ab7e29..ab1a5e8467f21c618bce27db3df3d513562bb52f 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -53,6 +53,7 @@ MODULE_AUTHOR("NTT Corp."); MODULE_DESCRIPTION("A New Implementation of the Log-structured Filesystem " "(NILFS)"); MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(ANDROID_GKI_VFS_EXPORT_ONLY); static struct kmem_cache *nilfs_inode_cachep; struct kmem_cache *nilfs_transaction_cachep; diff --git a/fs/nls/nls_base.c b/fs/nls/nls_base.c index 52ccd34b1e792370597b77f7d228c4d79bf96600..06c368ce3aa22ef90a01bfd2809c8077b5c41515 100644 --- a/fs/nls/nls_base.c +++ b/fs/nls/nls_base.c @@ -541,8 +541,8 @@ struct nls_table *load_nls_default(void) } EXPORT_SYMBOL(unregister_nls); -EXPORT_SYMBOL(unload_nls); -EXPORT_SYMBOL(load_nls); -EXPORT_SYMBOL(load_nls_default); +EXPORT_SYMBOL_NS(unload_nls, ANDROID_GKI_VFS_EXPORT_ONLY); +EXPORT_SYMBOL_NS(load_nls, ANDROID_GKI_VFS_EXPORT_ONLY); +EXPORT_SYMBOL_NS(load_nls_default, ANDROID_GKI_VFS_EXPORT_ONLY); MODULE_LICENSE("Dual BSD/GPL"); diff --git a/fs/nls/nls_euc-jp.c b/fs/nls/nls_euc-jp.c index 162b3f160353c6ab076644079c861a2b4f1fd736..498b8a435d7e17be106e74b0e706fbac8ff2b233 100644 --- a/fs/nls/nls_euc-jp.c +++ b/fs/nls/nls_euc-jp.c @@ -578,3 +578,4 @@ module_init(init_nls_euc_jp) module_exit(exit_nls_euc_jp) MODULE_LICENSE("Dual BSD/GPL"); +MODULE_IMPORT_NS(ANDROID_GKI_VFS_EXPORT_ONLY); diff --git a/fs/nls/nls_koi8-ru.c b/fs/nls/nls_koi8-ru.c index a80a741a8676dc5a6ead28c9a584169050fc3c79..99ceec9085ecd5e72a75354d4d8d0b5ca2959213 100644 --- a/fs/nls/nls_koi8-ru.c +++ b/fs/nls/nls_koi8-ru.c @@ -80,3 +80,4 @@ module_init(init_nls_koi8_ru) module_exit(exit_nls_koi8_ru) MODULE_LICENSE("Dual BSD/GPL"); +MODULE_IMPORT_NS(ANDROID_GKI_VFS_EXPORT_ONLY); diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 086b6bacbad176819cc0950a2f2bc0499eed49ec..18e014fa06480eae000d20fe7c16226b24b116a0 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -366,9 +366,6 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, if (fanotify_is_perm_event(event->mask)) FANOTIFY_PERM(event)->fd = fd; - if (f) - fd_install(fd, f); - /* Event info records order is: dir fid + name, child fid */ if (fanotify_event_dir_fh_len(event)) { info_type = info->name_len ? FAN_EVENT_INFO_TYPE_DFID_NAME : @@ -432,6 +429,9 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, count -= ret; } + if (f) + fd_install(fd, f); + return metadata.event_len; out_close_fd: diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index 0d7e948cb29c93193bf783d8cfb9accf86707263..56edd625cf4d2d9a88bc712f45731c6eed5dc10d 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c @@ -3186,6 +3186,7 @@ MODULE_AUTHOR("Anton Altaparmakov "); MODULE_DESCRIPTION("NTFS 1.2/3.x driver - Copyright (c) 2001-2014 Anton Altaparmakov and Tuxera Inc."); MODULE_VERSION(NTFS_VERSION); MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(ANDROID_GKI_VFS_EXPORT_ONLY); #ifdef DEBUG module_param(debug_msgs, bint, 0); MODULE_PARM_DESC(debug_msgs, "Enable debug messages."); diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 78710788c23703979419296f6d5a7b1071d8097f..a9a6276ff29bd5d3f0f28d187bedc754f3080854 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -7047,7 +7047,7 @@ void ocfs2_set_inode_data_inline(struct inode *inode, struct ocfs2_dinode *di) int ocfs2_convert_inline_data_to_extents(struct inode *inode, struct buffer_head *di_bh) { - int ret, i, has_data, num_pages = 0; + int ret, has_data, num_pages = 0; int need_free = 0; u32 bit_off, num; handle_t *handle; @@ -7056,26 +7056,17 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode, struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; struct ocfs2_alloc_context *data_ac = NULL; - struct page **pages = NULL; - loff_t end = osb->s_clustersize; + struct page *page = NULL; struct ocfs2_extent_tree et; int did_quota = 0; has_data = i_size_read(inode) ? 1 : 0; if (has_data) { - pages = kcalloc(ocfs2_pages_per_cluster(osb->sb), - sizeof(struct page *), GFP_NOFS); - if (pages == NULL) { - ret = -ENOMEM; - mlog_errno(ret); - return ret; - } - ret = ocfs2_reserve_clusters(osb, 1, &data_ac); if (ret) { mlog_errno(ret); - goto free_pages; + goto out; } } @@ -7095,7 +7086,8 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode, } if (has_data) { - unsigned int page_end; + unsigned int page_end = min_t(unsigned, PAGE_SIZE, + osb->s_clustersize); u64 phys; ret = dquot_alloc_space_nodirty(inode, @@ -7119,15 +7111,8 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode, */ block = phys = ocfs2_clusters_to_blocks(inode->i_sb, bit_off); - /* - * Non sparse file systems zero on extend, so no need - * to do that now. - */ - if (!ocfs2_sparse_alloc(osb) && - PAGE_SIZE < osb->s_clustersize) - end = PAGE_SIZE; - - ret = ocfs2_grab_eof_pages(inode, 0, end, pages, &num_pages); + ret = ocfs2_grab_eof_pages(inode, 0, page_end, &page, + &num_pages); if (ret) { mlog_errno(ret); need_free = 1; @@ -7138,20 +7123,15 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode, * This should populate the 1st page for us and mark * it up to date. */ - ret = ocfs2_read_inline_data(inode, pages[0], di_bh); + ret = ocfs2_read_inline_data(inode, page, di_bh); if (ret) { mlog_errno(ret); need_free = 1; goto out_unlock; } - page_end = PAGE_SIZE; - if (PAGE_SIZE > osb->s_clustersize) - page_end = osb->s_clustersize; - - for (i = 0; i < num_pages; i++) - ocfs2_map_and_dirty_page(inode, handle, 0, page_end, - pages[i], i > 0, &phys); + ocfs2_map_and_dirty_page(inode, handle, 0, page_end, page, 0, + &phys); } spin_lock(&oi->ip_lock); @@ -7182,8 +7162,8 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode, } out_unlock: - if (pages) - ocfs2_unlock_and_free_pages(pages, num_pages); + if (page) + ocfs2_unlock_and_free_pages(&page, num_pages); out_commit: if (ret < 0 && did_quota) @@ -7207,8 +7187,6 @@ out_commit: out: if (data_ac) ocfs2_free_alloc_context(data_ac); -free_pages: - kfree(pages); return ret; } diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c index 583820ec63e2d80173bbd3cc332d311a03456102..5b3a8681f87ef55d8f6a9acc6d7e672ef8b1ec62 100644 --- a/fs/ocfs2/dlmfs/dlmfs.c +++ b/fs/ocfs2/dlmfs/dlmfs.c @@ -615,6 +615,7 @@ static void __exit exit_dlmfs_fs(void) MODULE_AUTHOR("Oracle"); MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(ANDROID_GKI_VFS_EXPORT_ONLY); MODULE_DESCRIPTION("OCFS2 DLM-Filesystem"); module_init(init_dlmfs_fs) diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 919e552abf637692ff867a607e8a8df2ed30387d..1470b49adb2db3a244c3e8dacb2230cbb07ec2bc 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -478,10 +478,11 @@ int ocfs2_truncate_file(struct inode *inode, * greater than page size, so we have to truncate them * anyway. */ - unmap_mapping_range(inode->i_mapping, new_i_size + PAGE_SIZE - 1, 0, 1); - truncate_inode_pages(inode->i_mapping, new_i_size); if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { + unmap_mapping_range(inode->i_mapping, + new_i_size + PAGE_SIZE - 1, 0, 1); + truncate_inode_pages(inode->i_mapping, new_i_size); status = ocfs2_truncate_inline(inode, di_bh, new_i_size, i_size_read(inode), 1); if (status) @@ -500,6 +501,9 @@ int ocfs2_truncate_file(struct inode *inode, goto bail_unlock_sem; } + unmap_mapping_range(inode->i_mapping, new_i_size + PAGE_SIZE - 1, 0, 1); + truncate_inode_pages(inode->i_mapping, new_i_size); + status = ocfs2_commit_truncate(osb, inode, di_bh); if (status < 0) { mlog_errno(status); diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index 8c8cf7f4eb34e50a0203f833c72f81c077a52015..4f48003e4327138b67d30466649dc44f869ac584 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -1258,10 +1258,10 @@ static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh, if (ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap)) return 0; - if (!buffer_jbd(bg_bh)) + jh = jbd2_journal_grab_journal_head(bg_bh); + if (!jh) return 1; - jh = bh2jh(bg_bh); spin_lock(&jh->b_state_lock); bg = (struct ocfs2_group_desc *) jh->b_committed_data; if (bg) @@ -1269,6 +1269,7 @@ static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh, else ret = 1; spin_unlock(&jh->b_state_lock); + jbd2_journal_put_journal_head(jh); return ret; } diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 2febc76e9de70100f8fbc94f44bddaf5a3e3223a..f48369c0211149ea6a65b97ad57089b491a7cdaa 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -71,6 +71,7 @@ static struct dentry *ocfs2_debugfs_root; MODULE_AUTHOR("Oracle"); MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(ANDROID_GKI_VFS_EXPORT_ONLY); MODULE_DESCRIPTION("OCFS2 cluster file system"); struct mount_options @@ -2171,11 +2172,17 @@ static int ocfs2_initialize_super(struct super_block *sb, } if (ocfs2_clusterinfo_valid(osb)) { + /* + * ci_stack and ci_cluster in ocfs2_cluster_info may not be null + * terminated, so make sure no overflow happens here by using + * memcpy. Destination strings will always be null terminated + * because osb is allocated using kzalloc. + */ osb->osb_stackflags = OCFS2_RAW_SB(di)->s_cluster_info.ci_stackflags; - strlcpy(osb->osb_cluster_stack, + memcpy(osb->osb_cluster_stack, OCFS2_RAW_SB(di)->s_cluster_info.ci_stack, - OCFS2_STACK_LABEL_LEN + 1); + OCFS2_STACK_LABEL_LEN); if (strlen(osb->osb_cluster_stack) != OCFS2_STACK_LABEL_LEN) { mlog(ML_ERROR, "couldn't mount because of an invalid " @@ -2184,9 +2191,9 @@ static int ocfs2_initialize_super(struct super_block *sb, status = -EINVAL; goto bail; } - strlcpy(osb->osb_cluster_name, + memcpy(osb->osb_cluster_name, OCFS2_RAW_SB(di)->s_cluster_info.ci_cluster, - OCFS2_CLUSTER_NAME_LEN + 1); + OCFS2_CLUSTER_NAME_LEN); } else { /* The empty string is identical with classic tools that * don't know about s_cluster_info. */ diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c index ce93ccca86392badb8e7ecb490f0252e5342484d..27143a2cd962a9ba9dc8d407f335445ad29a9675 100644 --- a/fs/omfs/inode.c +++ b/fs/omfs/inode.c @@ -20,6 +20,7 @@ MODULE_AUTHOR("Bob Copeland "); MODULE_DESCRIPTION("OMFS (ReplayTV/Karma) Filesystem for Linux"); MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(ANDROID_GKI_VFS_EXPORT_ONLY); struct buffer_head *omfs_bread(struct super_block *sb, sector_t block) { diff --git a/fs/open.c b/fs/open.c index 78f32d483aefb4d180501005f3d1539be06b67f8..b9d55ca3763b652a227e6a9bc406b8352629f674 100644 --- a/fs/open.c +++ b/fs/open.c @@ -1158,7 +1158,7 @@ struct file *filp_open(const char *filename, int flags, umode_t mode) } return file; } -EXPORT_SYMBOL(filp_open); +EXPORT_SYMBOL_NS(filp_open, ANDROID_GKI_VFS_EXPORT_ONLY); /* ANDROID: Allow drivers to open only block files from kernel mode */ struct file *filp_open_block(const char *filename, int flags, umode_t mode) @@ -1390,7 +1390,7 @@ int generic_file_open(struct inode * inode, struct file * filp) return 0; } -EXPORT_SYMBOL(generic_file_open); +EXPORT_SYMBOL_NS(generic_file_open, ANDROID_GKI_VFS_EXPORT_ONLY); /* * This is used by subsystems that don't want seekable diff --git a/fs/orangefs/dcache.c b/fs/orangefs/dcache.c index fe484cf93e5cd50e749091e2c46cb22f63f3c346..8bbe9486e3a6228c40c46971d23c305127e2007b 100644 --- a/fs/orangefs/dcache.c +++ b/fs/orangefs/dcache.c @@ -26,8 +26,10 @@ static int orangefs_revalidate_lookup(struct dentry *dentry) gossip_debug(GOSSIP_DCACHE_DEBUG, "%s: attempting lookup.\n", __func__); new_op = op_alloc(ORANGEFS_VFS_OP_LOOKUP); - if (!new_op) + if (!new_op) { + ret = -ENOMEM; goto out_put_parent; + } new_op->upcall.req.lookup.sym_follow = ORANGEFS_LOOKUP_LINK_NO_FOLLOW; new_op->upcall.req.lookup.parent_refn = parent->refn; diff --git a/fs/orangefs/orangefs-bufmap.c b/fs/orangefs/orangefs-bufmap.c index 538e839590ef5c1d489aea7f6d94a7e9d1be821b..b501dc07f92224474f2cf1a27194e492429a019f 100644 --- a/fs/orangefs/orangefs-bufmap.c +++ b/fs/orangefs/orangefs-bufmap.c @@ -176,7 +176,7 @@ orangefs_bufmap_free(struct orangefs_bufmap *bufmap) { kfree(bufmap->page_array); kfree(bufmap->desc_array); - kfree(bufmap->buffer_index_array); + bitmap_free(bufmap->buffer_index_array); kfree(bufmap); } @@ -226,8 +226,7 @@ orangefs_bufmap_alloc(struct ORANGEFS_dev_map_desc *user_desc) bufmap->desc_size = user_desc->size; bufmap->desc_shift = ilog2(bufmap->desc_size); - bufmap->buffer_index_array = - kzalloc(DIV_ROUND_UP(bufmap->desc_count, BITS_PER_LONG), GFP_KERNEL); + bufmap->buffer_index_array = bitmap_zalloc(bufmap->desc_count, GFP_KERNEL); if (!bufmap->buffer_index_array) goto out_free_bufmap; @@ -250,7 +249,7 @@ orangefs_bufmap_alloc(struct ORANGEFS_dev_map_desc *user_desc) out_free_desc_array: kfree(bufmap->desc_array); out_free_index_array: - kfree(bufmap->buffer_index_array); + bitmap_free(bufmap->buffer_index_array); out_free_bufmap: kfree(bufmap); out: diff --git a/fs/orangefs/orangefs-mod.c b/fs/orangefs/orangefs-mod.c index 74a3d6337ef432c0a7b06bd9db53562970df38df..a76a6ba8b355314ab404e2a70371508445910a46 100644 --- a/fs/orangefs/orangefs-mod.c +++ b/fs/orangefs/orangefs-mod.c @@ -36,6 +36,7 @@ int orangefs_dcache_timeout_msecs = 50; int orangefs_getattr_timeout_msecs = 50; MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(ANDROID_GKI_VFS_EXPORT_ONLY); MODULE_AUTHOR("ORANGEFS Development Team"); MODULE_DESCRIPTION("The Linux Kernel VFS interface to ORANGEFS"); MODULE_PARM_DESC(module_parm_debug_mask, "debugging level (see orangefs-debug.h for values)"); diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 2c9b3897593c038bd070c7b2b1fa3f8af3a8f4b7..c3bc1c7a950cfc092e46e4f4abe3becd98032cc7 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -137,8 +137,7 @@ kill_whiteout: goto out; } -static int ovl_mkdir_real(struct inode *dir, struct dentry **newdentry, - umode_t mode) +int ovl_mkdir_real(struct inode *dir, struct dentry **newdentry, umode_t mode) { int err; struct dentry *d, *dentry = *newdentry; @@ -1217,9 +1216,13 @@ static int ovl_rename(struct inode *olddir, struct dentry *old, goto out_dput; } } else { - if (!d_is_negative(newdentry) && - (!new_opaque || !ovl_is_whiteout(newdentry))) - goto out_dput; + if (!d_is_negative(newdentry)) { + if (!new_opaque || !ovl_is_whiteout(newdentry)) + goto out_dput; + } else { + if (flags & RENAME_EXCHANGE) + goto out_dput; + } } if (olddentry == trap) diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c index d84651181db5c63a98acffb62f88dfb9363a89a3..719eec722492713f5380df2e7b4b535483c9e281 100644 --- a/fs/overlayfs/file.c +++ b/fs/overlayfs/file.c @@ -19,6 +19,7 @@ struct ovl_aio_req { struct kiocb iocb; + refcount_t ref; struct kiocb *orig_iocb; struct fd fd; }; @@ -243,6 +244,14 @@ static void ovl_file_accessed(struct file *file) touch_atime(&file->f_path); } +static inline void ovl_aio_put(struct ovl_aio_req *aio_req) +{ + if (refcount_dec_and_test(&aio_req->ref)) { + fdput(aio_req->fd); + kmem_cache_free(ovl_aio_request_cachep, aio_req); + } +} + static void ovl_aio_cleanup_handler(struct ovl_aio_req *aio_req) { struct kiocb *iocb = &aio_req->iocb; @@ -259,8 +268,7 @@ static void ovl_aio_cleanup_handler(struct ovl_aio_req *aio_req) } orig_iocb->ki_pos = iocb->ki_pos; - fdput(aio_req->fd); - kmem_cache_free(ovl_aio_request_cachep, aio_req); + ovl_aio_put(aio_req); } static void ovl_aio_rw_complete(struct kiocb *iocb, long res, long res2) @@ -287,6 +295,12 @@ static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter) if (ret) return ret; + ret = -EINVAL; + if (iocb->ki_flags & IOCB_DIRECT && + (!real.file->f_mapping->a_ops || + !real.file->f_mapping->a_ops->direct_IO)) + goto out_fdput; + old_cred = ovl_override_creds(file_inode(file)->i_sb); if (is_sync_kiocb(iocb)) { ret = vfs_iter_read(real.file, iter, &iocb->ki_pos, @@ -305,7 +319,9 @@ static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter) aio_req->orig_iocb = iocb; kiocb_clone(&aio_req->iocb, iocb, real.file); aio_req->iocb.ki_complete = ovl_aio_rw_complete; + refcount_set(&aio_req->ref, 2); ret = vfs_iocb_iter_read(real.file, &aio_req->iocb, iter); + ovl_aio_put(aio_req); if (ret != -EIOCBQUEUED) ovl_aio_cleanup_handler(aio_req); } @@ -313,7 +329,7 @@ out: ovl_revert_creds(file_inode(file)->i_sb, old_cred); ovl_file_accessed(file); - +out_fdput: fdput(real); return ret; @@ -342,6 +358,12 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter) if (ret) goto out_unlock; + ret = -EINVAL; + if (iocb->ki_flags & IOCB_DIRECT && + (!real.file->f_mapping->a_ops || + !real.file->f_mapping->a_ops->direct_IO)) + goto out_fdput; + if (!ovl_should_sync(OVL_FS(inode->i_sb))) ifl &= ~(IOCB_DSYNC | IOCB_SYNC); @@ -371,12 +393,15 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter) kiocb_clone(&aio_req->iocb, iocb, real.file); aio_req->iocb.ki_flags = ifl; aio_req->iocb.ki_complete = ovl_aio_rw_complete; + refcount_set(&aio_req->ref, 2); ret = vfs_iocb_iter_write(real.file, &aio_req->iocb, iter); + ovl_aio_put(aio_req); if (ret != -EIOCBQUEUED) ovl_aio_cleanup_handler(aio_req); } out: ovl_revert_creds(file_inode(file)->i_sb, old_cred); +out_fdput: fdput(real); out_unlock: @@ -385,45 +410,48 @@ out_unlock: return ret; } -static ssize_t ovl_splice_read(struct file *in, loff_t *ppos, - struct pipe_inode_info *pipe, size_t len, - unsigned int flags) +/* + * Calling iter_file_splice_write() directly from overlay's f_op may deadlock + * due to lock order inversion between pipe->mutex in iter_file_splice_write() + * and file_start_write(real.file) in ovl_write_iter(). + * + * So do everything ovl_write_iter() does and call iter_file_splice_write() on + * the real file. + */ +static ssize_t ovl_splice_write(struct pipe_inode_info *pipe, struct file *out, + loff_t *ppos, size_t len, unsigned int flags) { - ssize_t ret; struct fd real; const struct cred *old_cred; + struct inode *inode = file_inode(out); + struct inode *realinode = ovl_inode_real(inode); + ssize_t ret; - ret = ovl_real_fdget(in, &real); + inode_lock(inode); + /* Update mode */ + ovl_copyattr(realinode, inode); + ret = file_remove_privs(out); if (ret) - return ret; - - old_cred = ovl_override_creds(file_inode(in)->i_sb); - ret = generic_file_splice_read(real.file, ppos, pipe, len, flags); - ovl_revert_creds(file_inode(in)->i_sb, old_cred); - - ovl_file_accessed(in); - fdput(real); - return ret; -} - -static ssize_t -ovl_splice_write(struct pipe_inode_info *pipe, struct file *out, - loff_t *ppos, size_t len, unsigned int flags) -{ - struct fd real; - const struct cred *old_cred; - ssize_t ret; + goto out_unlock; ret = ovl_real_fdget(out, &real); if (ret) - return ret; + goto out_unlock; + + old_cred = ovl_override_creds(inode->i_sb); + file_start_write(real.file); - old_cred = ovl_override_creds(file_inode(out)->i_sb); ret = iter_file_splice_write(pipe, real.file, ppos, len, flags); - ovl_revert_creds(file_inode(out)->i_sb, old_cred); - ovl_file_accessed(out); + file_end_write(real.file); + /* Update size */ + ovl_copyattr(realinode, inode); + ovl_revert_creds(inode->i_sb, old_cred); fdput(real); + +out_unlock: + inode_unlock(inode); + return ret; } @@ -735,7 +763,7 @@ const struct file_operations ovl_file_operations = { #ifdef CONFIG_COMPAT .compat_ioctl = ovl_compat_ioctl, #endif - .splice_read = ovl_splice_read, + .splice_read = generic_file_splice_read, .splice_write = ovl_splice_write, .copy_file_range = ovl_copy_file_range, diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 825377de085915d2904055d2865a860235fec1cb..6cd8eb7bba94225e5261730599ba87b03f56e559 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -522,6 +522,7 @@ struct ovl_cattr { #define OVL_CATTR(m) (&(struct ovl_cattr) { .mode = (m) }) +int ovl_mkdir_real(struct inode *dir, struct dentry **newdentry, umode_t mode); struct dentry *ovl_create_real(struct inode *dir, struct dentry *newdentry, struct ovl_cattr *attr); int ovl_cleanup(struct inode *dir, struct dentry *dentry); diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index b66dc11476c19c5d3d53ee59cf8a95d27d70cb85..4a2ce2eeee88785fc5689dd071bc121f8d87e0c5 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -20,6 +20,7 @@ MODULE_AUTHOR("Miklos Szeredi "); MODULE_DESCRIPTION("Overlay filesystem"); MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(ANDROID_GKI_VFS_EXPORT_ONLY); struct ovl_dir_cache; @@ -764,10 +765,14 @@ retry: goto retry; } - work = ovl_create_real(dir, work, OVL_CATTR(attr.ia_mode)); - err = PTR_ERR(work); - if (IS_ERR(work)) - goto out_err; + err = ovl_mkdir_real(dir, &work, attr.ia_mode); + if (err) + goto out_dput; + + /* Weird filesystem returning with hashed negative (kernfs)? */ + err = -EINVAL; + if (d_really_is_negative(work)) + goto out_dput; /* * Try to remove POSIX ACL xattrs from workdir. We are good if: diff --git a/fs/pipe.c b/fs/pipe.c index d6d4019ba32f558ba1012c297fa1880fc03aa90c..9f2ca1b1c17ac443d1e32110bdcab84329dc5594 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -252,7 +252,8 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to) */ was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage); for (;;) { - unsigned int head = pipe->head; + /* Read ->head with a barrier vs post_one_notification() */ + unsigned int head = smp_load_acquire(&pipe->head); unsigned int tail = pipe->tail; unsigned int mask = pipe->ring_size - 1; @@ -830,10 +831,8 @@ void free_pipe_info(struct pipe_inode_info *pipe) int i; #ifdef CONFIG_WATCH_QUEUE - if (pipe->watch_queue) { + if (pipe->watch_queue) watch_queue_clear(pipe->watch_queue); - put_watch_queue(pipe->watch_queue); - } #endif (void) account_pipe_buffers(pipe->user, pipe->nr_accounted, 0); @@ -843,6 +842,10 @@ void free_pipe_info(struct pipe_inode_info *pipe) if (buf->ops) pipe_buf_release(pipe, buf); } +#ifdef CONFIG_WATCH_QUEUE + if (pipe->watch_queue) + put_watch_queue(pipe->watch_queue); +#endif if (pipe->tmp_page) __free_page(pipe->tmp_page); kfree(pipe->bufs); diff --git a/fs/proc/stat.c b/fs/proc/stat.c index 4695b6de315129aa62daba5131b82a3346bfd25a..3bed48d8228b4ebcc0fa4a8c42bb14ef0d2d1a4e 100644 --- a/fs/proc/stat.c +++ b/fs/proc/stat.c @@ -23,7 +23,7 @@ #ifdef arch_idle_time -static u64 get_idle_time(struct kernel_cpustat *kcs, int cpu) +u64 get_idle_time(struct kernel_cpustat *kcs, int cpu) { u64 idle; @@ -45,7 +45,7 @@ static u64 get_iowait_time(struct kernel_cpustat *kcs, int cpu) #else -static u64 get_idle_time(struct kernel_cpustat *kcs, int cpu) +u64 get_idle_time(struct kernel_cpustat *kcs, int cpu) { u64 idle, idle_usecs = -1ULL; diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 154b9f5299e2621949f70494e9ad5bb5c6f8f568..b1bcbe7aca0eb88ff963b1128ae5c1fb3b9fbcae 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include #include +#include #include #include #include @@ -123,56 +124,6 @@ static void release_task_mempolicy(struct proc_maps_private *priv) } #endif -static void seq_print_vma_name(struct seq_file *m, struct vm_area_struct *vma) -{ - const char __user *name = vma_get_anon_name(vma); - struct mm_struct *mm = vma->vm_mm; - - unsigned long page_start_vaddr; - unsigned long page_offset; - unsigned long num_pages; - unsigned long max_len = NAME_MAX; - int i; - - page_start_vaddr = (unsigned long)name & PAGE_MASK; - page_offset = (unsigned long)name - page_start_vaddr; - num_pages = DIV_ROUND_UP(page_offset + max_len, PAGE_SIZE); - - seq_puts(m, "[anon:"); - - for (i = 0; i < num_pages; i++) { - int len; - int write_len; - const char *kaddr; - long pages_pinned; - struct page *page; - - pages_pinned = get_user_pages_remote(mm, page_start_vaddr, 1, 0, - &page, NULL, NULL); - if (pages_pinned < 1) { - seq_puts(m, "]"); - return; - } - - kaddr = (const char *)kmap(page); - len = min(max_len, PAGE_SIZE - page_offset); - write_len = strnlen(kaddr + page_offset, len); - seq_write(m, kaddr + page_offset, write_len); - kunmap(page); - put_user_page(page); - - /* if strnlen hit a null terminator then we're done */ - if (write_len != len) - break; - - max_len -= len; - page_offset = 0; - page_start_vaddr += PAGE_SIZE; - } - - seq_putc(m, ']'); -} - static void *m_start(struct seq_file *m, loff_t *ppos) { struct proc_maps_private *priv = m->private; @@ -358,6 +309,8 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma) name = arch_vma_name(vma); if (!name) { + struct anon_vma_name *anon_name; + if (!mm) { name = "[vdso]"; goto done; @@ -374,9 +327,10 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma) goto done; } - if (vma_get_anon_name(vma)) { + anon_name = anon_vma_name(vma); + if (anon_name) { seq_pad(m, ' '); - seq_print_vma_name(m, vma); + seq_printf(m, "[anon:%s]", anon_name->name); } } @@ -487,7 +441,8 @@ static void smaps_page_accumulate(struct mem_size_stats *mss, } static void smaps_account(struct mem_size_stats *mss, struct page *page, - bool compound, bool young, bool dirty, bool locked) + bool compound, bool young, bool dirty, bool locked, + bool migration) { int i, nr = compound ? compound_nr(page) : 1; unsigned long size = nr * PAGE_SIZE; @@ -514,8 +469,15 @@ static void smaps_account(struct mem_size_stats *mss, struct page *page, * page_count(page) == 1 guarantees the page is mapped exactly once. * If any subpage of the compound page mapped with PTE it would elevate * page_count(). + * + * The page_mapcount() is called to get a snapshot of the mapcount. + * Without holding the page lock this snapshot can be slightly wrong as + * we cannot always read the mapcount atomically. It is not safe to + * call page_mapcount() even with PTL held if the page is not mapped, + * especially for migration entries. Treat regular migration entries + * as mapcount == 1. */ - if (page_count(page) == 1) { + if ((page_count(page) == 1) || migration) { smaps_page_accumulate(mss, page, size, size << PSS_SHIFT, dirty, locked, true); return; @@ -552,6 +514,7 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr, struct vm_area_struct *vma = walk->vma; bool locked = !!(vma->vm_flags & VM_LOCKED); struct page *page = NULL; + bool migration = false; if (pte_present(*pte)) { page = vm_normal_page(vma, addr, *pte); @@ -571,9 +534,10 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr, } else { mss->swap_pss += (u64)PAGE_SIZE << PSS_SHIFT; } - } else if (is_migration_entry(swpent)) + } else if (is_migration_entry(swpent)) { + migration = true; page = migration_entry_to_page(swpent); - else if (is_device_private_entry(swpent)) + } else if (is_device_private_entry(swpent)) page = device_private_entry_to_page(swpent); } else if (unlikely(IS_ENABLED(CONFIG_SHMEM) && mss->check_shmem_swap && pte_none(*pte))) { @@ -587,7 +551,8 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr, if (!page) return; - smaps_account(mss, page, false, pte_young(*pte), pte_dirty(*pte), locked); + smaps_account(mss, page, false, pte_young(*pte), pte_dirty(*pte), + locked, migration); } #ifdef CONFIG_TRANSPARENT_HUGEPAGE @@ -598,6 +563,7 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, struct vm_area_struct *vma = walk->vma; bool locked = !!(vma->vm_flags & VM_LOCKED); struct page *page = NULL; + bool migration = false; if (pmd_present(*pmd)) { /* FOLL_DUMP will return -EFAULT on huge zero page */ @@ -605,8 +571,10 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, } else if (unlikely(thp_migration_supported() && is_swap_pmd(*pmd))) { swp_entry_t entry = pmd_to_swp_entry(*pmd); - if (is_migration_entry(entry)) + if (is_migration_entry(entry)) { + migration = true; page = migration_entry_to_page(entry); + } } if (IS_ERR_OR_NULL(page)) return; @@ -618,7 +586,9 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, /* pass */; else mss->file_thp += HPAGE_PMD_SIZE; - smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd), locked); + + smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd), + locked, migration); } #else static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, @@ -880,11 +850,6 @@ static int show_smap(struct seq_file *m, void *v) smap_gather_stats(vma, &mss, 0); show_map_vma(m, vma); - if (vma_get_anon_name(vma)) { - seq_puts(m, "Name: "); - seq_print_vma_name(m, vma); - seq_putc(m, '\n'); - } SEQ_PUT_DEC("Size: ", vma->vm_end - vma->vm_start); SEQ_PUT_DEC(" kB\nKernelPageSize: ", vma_kernel_pagesize(vma)); @@ -1434,6 +1399,7 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm, { u64 frame = 0, flags = 0; struct page *page = NULL; + bool migration = false; if (pte_present(pte)) { if (pm->show_pfn) @@ -1451,8 +1417,10 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm, frame = swp_type(entry) | (swp_offset(entry) << MAX_SWAPFILES_SHIFT); flags |= PM_SWAP; - if (is_migration_entry(entry)) + if (is_migration_entry(entry)) { + migration = true; page = migration_entry_to_page(entry); + } if (is_device_private_entry(entry)) page = device_private_entry_to_page(entry); @@ -1460,7 +1428,7 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm, if (page && !PageAnon(page)) flags |= PM_FILE; - if (page && page_mapcount(page) == 1) + if (page && !migration && page_mapcount(page) == 1) flags |= PM_MMAP_EXCLUSIVE; if (vma->vm_flags & VM_SOFTDIRTY) flags |= PM_SOFT_DIRTY; @@ -1476,8 +1444,9 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end, spinlock_t *ptl; pte_t *pte, *orig_pte; int err = 0; - #ifdef CONFIG_TRANSPARENT_HUGEPAGE + bool migration = false; + ptl = pmd_trans_huge_lock(pmdp, vma); if (ptl) { u64 flags = 0, frame = 0; @@ -1512,11 +1481,12 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end, if (pmd_swp_soft_dirty(pmd)) flags |= PM_SOFT_DIRTY; VM_BUG_ON(!is_pmd_migration_entry(pmd)); + migration = is_migration_entry(entry); page = migration_entry_to_page(entry); } #endif - if (page && page_mapcount(page) == 1) + if (page && !migration && page_mapcount(page) == 1) flags |= PM_MMAP_EXCLUSIVE; for (; addr != end; addr += PAGE_SIZE) { diff --git a/fs/proc/uptime.c b/fs/proc/uptime.c index 5a1b228964fb760020e1d9198f4139a15fe40931..deb99bc9b7e6b009b8f2210bdf6a0959bfcbe9ed 100644 --- a/fs/proc/uptime.c +++ b/fs/proc/uptime.c @@ -12,18 +12,22 @@ static int uptime_proc_show(struct seq_file *m, void *v) { struct timespec64 uptime; struct timespec64 idle; - u64 nsec; + u64 idle_nsec; u32 rem; int i; - nsec = 0; - for_each_possible_cpu(i) - nsec += (__force u64) kcpustat_cpu(i).cpustat[CPUTIME_IDLE]; + idle_nsec = 0; + for_each_possible_cpu(i) { + struct kernel_cpustat kcs; + + kcpustat_cpu_fetch(&kcs, i); + idle_nsec += get_idle_time(&kcs, i); + } ktime_get_boottime_ts64(&uptime); timens_add_boottime(&uptime); - idle.tv_sec = div_u64_rem(nsec, NSEC_PER_SEC, &rem); + idle.tv_sec = div_u64_rem(idle_nsec, NSEC_PER_SEC, &rem); idle.tv_nsec = rem; seq_printf(m, "%lu.%02lu %lu.%02lu\n", (unsigned long) uptime.tv_sec, diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index c3a345c28a9330385ded488ef7a4ea5a4f0abf3b..0e4278d4a7691d07c9774caffa4f86ef941ea010 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -124,9 +124,13 @@ ssize_t read_from_oldmem(char *buf, size_t count, nr_bytes = count; /* If pfn is not ram, return zeros for sparse dump files */ - if (pfn_is_ram(pfn) == 0) - memset(buf, 0, nr_bytes); - else { + if (pfn_is_ram(pfn) == 0) { + tmp = 0; + if (!userbuf) + memset(buf, 0, nr_bytes); + else if (clear_user(buf, nr_bytes)) + tmp = -EFAULT; + } else { if (encrypted) tmp = copy_oldmem_page_encrypted(pfn, buf, nr_bytes, @@ -135,10 +139,10 @@ ssize_t read_from_oldmem(char *buf, size_t count, else tmp = copy_oldmem_page(pfn, buf, nr_bytes, offset, userbuf); - - if (tmp < 0) - return tmp; } + if (tmp < 0) + return tmp; + *ppos += nr_bytes; count -= nr_bytes; buf += nr_bytes; diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c index 3fb7fc819b4fc9ea54089e52ef594be08b661718..704fb7d09f0e2f321a2829d1dd06ffe75aa9c11d 100644 --- a/fs/qnx4/inode.c +++ b/fs/qnx4/inode.c @@ -420,4 +420,5 @@ static void __exit exit_qnx4_fs(void) module_init(init_qnx4_fs) module_exit(exit_qnx4_fs) MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(ANDROID_GKI_VFS_EXPORT_ONLY); diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c index 61191f7bdf62df8776edaf511574a228b77e4d2c..ac771f0e02388161704f1c10936ad613262076cb 100644 --- a/fs/qnx6/inode.c +++ b/fs/qnx6/inode.c @@ -679,3 +679,4 @@ static void __exit exit_qnx6_fs(void) module_init(init_qnx6_fs) module_exit(exit_qnx6_fs) MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(ANDROID_GKI_VFS_EXPORT_ONLY); diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 4f13734637660056e299d4ed04ceb38fb04cb580..09fb8459bb5cea2ef07ae1f58b935c2fa06250f6 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -692,9 +692,14 @@ int dquot_quota_sync(struct super_block *sb, int type) /* This is not very clever (and fast) but currently I don't know about * any other simple way of getting quota data to disk and we must get * them there for userspace to be visible... */ - if (sb->s_op->sync_fs) - sb->s_op->sync_fs(sb, 1); - sync_blockdev(sb->s_bdev); + if (sb->s_op->sync_fs) { + ret = sb->s_op->sync_fs(sb, 1); + if (ret) + return ret; + } + ret = sync_blockdev(sb->s_bdev); + if (ret) + return ret; /* * Now when everything is written we can discard the pagecache so diff --git a/fs/quota/quota_tree.c b/fs/quota/quota_tree.c index c5562c871c8beff8e12bb92dfa75b1066e3ff3fc..1a188fbdf34e5cff916063ea6071d0f74ed0a088 100644 --- a/fs/quota/quota_tree.c +++ b/fs/quota/quota_tree.c @@ -423,6 +423,7 @@ static int free_dqentry(struct qtree_mem_dqinfo *info, struct dquot *dquot, quota_error(dquot->dq_sb, "Quota structure has offset to " "other block (%u) than it should (%u)", blk, (uint)(dquot->dq_off >> info->dqi_blocksize_bits)); + ret = -EIO; goto out_buf; } ret = read_blk(info, blk, buf); @@ -488,6 +489,13 @@ static int remove_tree(struct qtree_mem_dqinfo *info, struct dquot *dquot, goto out_buf; } newblk = le32_to_cpu(ref[get_index(info, dquot->dq_id, depth)]); + if (newblk < QT_TREEOFF || newblk >= info->dqi_blocks) { + quota_error(dquot->dq_sb, "Getting block too big (%u >= %u)", + newblk, info->dqi_blocks); + ret = -EUCLEAN; + goto out_buf; + } + if (depth == info->dqi_qtree_depth - 1) { ret = free_dqentry(info, dquot, newblk); newblk = 0; @@ -587,6 +595,13 @@ static loff_t find_tree_dqentry(struct qtree_mem_dqinfo *info, blk = le32_to_cpu(ref[get_index(info, dquot->dq_id, depth)]); if (!blk) /* No reference? */ goto out_buf; + if (blk < QT_TREEOFF || blk >= info->dqi_blocks) { + quota_error(dquot->dq_sb, "Getting block too big (%u >= %u)", + blk, info->dqi_blocks); + ret = -EUCLEAN; + goto out_buf; + } + if (depth < info->dqi_qtree_depth - 1) ret = find_tree_dqentry(info, dquot, blk, depth+1); else diff --git a/fs/read_write.c b/fs/read_write.c index 75f764b434184238a33f3c9bc688bfb395b83cb9..ae703cb90599771aaaba7c078db3d3761ee67db4 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -471,7 +471,7 @@ ssize_t kernel_read(struct file *file, void *buf, size_t count, loff_t *pos) return ret; return __kernel_read(file, buf, count, pos); } -EXPORT_SYMBOL(kernel_read); +EXPORT_SYMBOL_NS(kernel_read, ANDROID_GKI_VFS_EXPORT_ONLY); ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos) { @@ -580,7 +580,7 @@ ssize_t kernel_write(struct file *file, const void *buf, size_t count, file_end_write(file); return ret; } -EXPORT_SYMBOL(kernel_write); +EXPORT_SYMBOL_NS(kernel_write, ANDROID_GKI_VFS_EXPORT_ONLY); ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos) { diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 913f5af9bf248988d16cfa53973da58a041a7b2c..d84c2f2df0b729e1088b697d0ba1f968ee42dc03 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -2655,6 +2655,7 @@ MODULE_ALIAS_FS("reiserfs"); MODULE_DESCRIPTION("ReiserFS journaled filesystem"); MODULE_AUTHOR("Hans Reiser "); MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(ANDROID_GKI_VFS_EXPORT_ONLY); module_init(init_reiserfs_fs); module_exit(exit_reiserfs_fs); diff --git a/fs/romfs/super.c b/fs/romfs/super.c index 259f684d9236e77d22c018c3844dc8801161d676..c335658c2e0f35ef64f0cdae0add0b7a96f2907d 100644 --- a/fs/romfs/super.c +++ b/fs/romfs/super.c @@ -666,3 +666,4 @@ module_exit(exit_romfs_fs); MODULE_DESCRIPTION("Direct-MTD Capable RomFS"); MODULE_AUTHOR("Red Hat, Inc."); MODULE_LICENSE("GPL"); /* Actually dual-licensed, but it doesn't matter for */ +MODULE_IMPORT_NS(ANDROID_GKI_VFS_EXPORT_ONLY); diff --git a/fs/select.c b/fs/select.c index 945896d0ac9e7624db36d48169ebc757d4f18adb..5edffee1162c2423821e2282153a5ff1f3c985eb 100644 --- a/fs/select.c +++ b/fs/select.c @@ -458,9 +458,11 @@ get_max: return max; } -#define POLLIN_SET (EPOLLRDNORM | EPOLLRDBAND | EPOLLIN | EPOLLHUP | EPOLLERR) -#define POLLOUT_SET (EPOLLWRBAND | EPOLLWRNORM | EPOLLOUT | EPOLLERR) -#define POLLEX_SET (EPOLLPRI) +#define POLLIN_SET (EPOLLRDNORM | EPOLLRDBAND | EPOLLIN | EPOLLHUP | EPOLLERR |\ + EPOLLNVAL) +#define POLLOUT_SET (EPOLLWRBAND | EPOLLWRNORM | EPOLLOUT | EPOLLERR |\ + EPOLLNVAL) +#define POLLEX_SET (EPOLLPRI | EPOLLNVAL) static inline void wait_key_set(poll_table *wait, unsigned long in, unsigned long out, unsigned long bit, @@ -527,6 +529,7 @@ static int do_select(int n, fd_set_bits *fds, struct timespec64 *end_time) break; if (!(bit & all_bits)) continue; + mask = EPOLLNVAL; f = fdget(i); if (f.file) { wait_key_set(wait, in, out, bit, @@ -534,34 +537,34 @@ static int do_select(int n, fd_set_bits *fds, struct timespec64 *end_time) mask = vfs_poll(f.file, wait); fdput(f); - if ((mask & POLLIN_SET) && (in & bit)) { - res_in |= bit; - retval++; - wait->_qproc = NULL; - } - if ((mask & POLLOUT_SET) && (out & bit)) { - res_out |= bit; - retval++; - wait->_qproc = NULL; - } - if ((mask & POLLEX_SET) && (ex & bit)) { - res_ex |= bit; - retval++; - wait->_qproc = NULL; - } - /* got something, stop busy polling */ - if (retval) { - can_busy_loop = false; - busy_flag = 0; - - /* - * only remember a returned - * POLL_BUSY_LOOP if we asked for it - */ - } else if (busy_flag & mask) - can_busy_loop = true; - } + if ((mask & POLLIN_SET) && (in & bit)) { + res_in |= bit; + retval++; + wait->_qproc = NULL; + } + if ((mask & POLLOUT_SET) && (out & bit)) { + res_out |= bit; + retval++; + wait->_qproc = NULL; + } + if ((mask & POLLEX_SET) && (ex & bit)) { + res_ex |= bit; + retval++; + wait->_qproc = NULL; + } + /* got something, stop busy polling */ + if (retval) { + can_busy_loop = false; + busy_flag = 0; + + /* + * only remember a returned + * POLL_BUSY_LOOP if we asked for it + */ + } else if (busy_flag & mask) + can_busy_loop = true; + } if (res_in) *rinp = res_in; diff --git a/fs/signalfd.c b/fs/signalfd.c index 456046e158737450055c77283321867f03b23ee0..b94fb5f81797a6ab74914da7e95597d3ee343e46 100644 --- a/fs/signalfd.c +++ b/fs/signalfd.c @@ -35,17 +35,7 @@ void signalfd_cleanup(struct sighand_struct *sighand) { - wait_queue_head_t *wqh = &sighand->signalfd_wqh; - /* - * The lockless check can race with remove_wait_queue() in progress, - * but in this case its caller should run under rcu_read_lock() and - * sighand_cachep is SLAB_TYPESAFE_BY_RCU, we can safely return. - */ - if (likely(!waitqueue_active(wqh))) - return; - - /* wait_queue_entry_t->func(POLLFREE) should do remove_wait_queue() */ - wake_up_poll(wqh, EPOLLHUP | POLLFREE); + wake_up_pollfree(&sighand->signalfd_wqh); } struct signalfd_ctx { diff --git a/fs/splice.c b/fs/splice.c index 866d5c2367b233091e98e372b2ab25c04c93fef3..036a47937c18ee55c8f385c0ac511ca37dd53388 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -326,7 +326,7 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos, return ret; } -EXPORT_SYMBOL(generic_file_splice_read); +EXPORT_SYMBOL_NS(generic_file_splice_read, ANDROID_GKI_VFS_EXPORT_ONLY); const struct pipe_buf_operations default_pipe_buf_ops = { .release = generic_pipe_buf_release, @@ -722,7 +722,7 @@ done: return ret; } -EXPORT_SYMBOL(iter_file_splice_write); +EXPORT_SYMBOL_NS(iter_file_splice_write, ANDROID_GKI_VFS_EXPORT_ONLY); /** * generic_splice_sendpage - splice data from a pipe to a socket diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index 88cc94be10765c7f76b223536a8195eeb30f0296..83c48769f4140c32a870131bb64136adc59764f6 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c @@ -498,3 +498,4 @@ module_exit(exit_squashfs_fs); MODULE_DESCRIPTION("squashfs 4.0, a compressed read-only filesystem"); MODULE_AUTHOR("Phillip Lougher "); MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(ANDROID_GKI_VFS_EXPORT_ONLY); diff --git a/fs/stat.c b/fs/stat.c index 1196af4d1ea03dfec6d003ed3233be4c21080805..c8a078f3acf89353b6995fefcf5c6dbb1689987b 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -49,7 +49,7 @@ void generic_fillattr(struct inode *inode, struct kstat *stat) stat->blksize = i_blocksize(inode); stat->blocks = inode->i_blocks; } -EXPORT_SYMBOL(generic_fillattr); +EXPORT_SYMBOL_NS(generic_fillattr, ANDROID_GKI_VFS_EXPORT_ONLY); /** * vfs_getattr_nosec - getattr without security checks diff --git a/fs/super.c b/fs/super.c index 98bb0629ee108e87ff8876457b535442addf695e..ae2b97dabe312c9804bd6bb143167a84643cb961 100644 --- a/fs/super.c +++ b/fs/super.c @@ -1435,7 +1435,7 @@ error_bdev: error: return ERR_PTR(error); } -EXPORT_SYMBOL(mount_bdev); +EXPORT_SYMBOL_NS(mount_bdev, ANDROID_GKI_VFS_EXPORT_ONLY); void kill_block_super(struct super_block *sb) { @@ -1449,7 +1449,7 @@ void kill_block_super(struct super_block *sb) blkdev_put(bdev, mode | FMODE_EXCL); } -EXPORT_SYMBOL(kill_block_super); +EXPORT_SYMBOL_NS(kill_block_super, ANDROID_GKI_VFS_EXPORT_ONLY); #endif struct dentry *mount_nodev(struct file_system_type *fs_type, @@ -1472,8 +1472,8 @@ struct dentry *mount_nodev(struct file_system_type *fs_type, } EXPORT_SYMBOL(mount_nodev); -static int reconfigure_single(struct super_block *s, - int flags, void *data) +int reconfigure_single(struct super_block *s, + int flags, void *data) { struct fs_context *fc; int ret; @@ -1667,11 +1667,9 @@ static void lockdep_sb_freeze_acquire(struct super_block *sb) percpu_rwsem_acquire(sb->s_writers.rw_sem + level, 0, _THIS_IP_); } -static void sb_freeze_unlock(struct super_block *sb) +static void sb_freeze_unlock(struct super_block *sb, int level) { - int level; - - for (level = SB_FREEZE_LEVELS - 1; level >= 0; level--) + for (level--; level >= 0; level--) percpu_up_write(sb->s_writers.rw_sem + level); } @@ -1742,7 +1740,14 @@ int freeze_super(struct super_block *sb) sb_wait_write(sb, SB_FREEZE_PAGEFAULT); /* All writers are done so after syncing there won't be dirty data */ - sync_filesystem(sb); + ret = sync_filesystem(sb); + if (ret) { + sb->s_writers.frozen = SB_UNFROZEN; + sb_freeze_unlock(sb, SB_FREEZE_PAGEFAULT); + wake_up(&sb->s_writers.wait_unfrozen); + deactivate_locked_super(sb); + return ret; + } /* Now wait for internal filesystem counter */ sb->s_writers.frozen = SB_FREEZE_FS; @@ -1754,7 +1759,7 @@ int freeze_super(struct super_block *sb) printk(KERN_ERR "VFS:Filesystem freeze failed\n"); sb->s_writers.frozen = SB_UNFROZEN; - sb_freeze_unlock(sb); + sb_freeze_unlock(sb, SB_FREEZE_FS); wake_up(&sb->s_writers.wait_unfrozen); deactivate_locked_super(sb); return ret; @@ -1805,7 +1810,7 @@ static int thaw_super_locked(struct super_block *sb) } sb->s_writers.frozen = SB_UNFROZEN; - sb_freeze_unlock(sb); + sb_freeze_unlock(sb, SB_FREEZE_FS); out: wake_up(&sb->s_writers.wait_unfrozen); deactivate_locked_super(sb); diff --git a/fs/sync.c b/fs/sync.c index 8e1c2272470fe234296d8ab5d6ec698a56569277..9bbaa61994fc384f8fbaaf2a69832dd7280be8d7 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -66,7 +66,7 @@ int sync_filesystem(struct super_block *sb) return ret; return __sync_filesystem(sb, 1); } -EXPORT_SYMBOL(sync_filesystem); +EXPORT_SYMBOL_NS(sync_filesystem, ANDROID_GKI_VFS_EXPORT_ONLY); static void sync_inodes_one_sb(struct super_block *sb, void *arg) { diff --git a/fs/sysv/super.c b/fs/sysv/super.c index cc8e2ed155c84e700ab1191efc563ada2266a65e..e5383bff5dadefc4ec3a8826187819439145a29e 100644 --- a/fs/sysv/super.c +++ b/fs/sysv/super.c @@ -592,3 +592,4 @@ static void __exit exit_sysv_fs(void) module_init(init_sysv_fs) module_exit(exit_sysv_fs) MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(ANDROID_GKI_VFS_EXPORT_ONLY); diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c index 0ee8c6dfb03640718072a294217b96f56d12f89d..7b126d3a469abe00498f5380743aaa9175a60381 100644 --- a/fs/tracefs/inode.c +++ b/fs/tracefs/inode.c @@ -159,6 +159,77 @@ struct tracefs_fs_info { struct tracefs_mount_opts mount_opts; }; +static void change_gid(struct dentry *dentry, kgid_t gid) +{ + if (!dentry->d_inode) + return; + dentry->d_inode->i_gid = gid; +} + +/* + * Taken from d_walk, but without he need for handling renames. + * Nothing can be renamed while walking the list, as tracefs + * does not support renames. This is only called when mounting + * or remounting the file system, to set all the files to + * the given gid. + */ +static void set_gid(struct dentry *parent, kgid_t gid) +{ + struct dentry *this_parent; + struct list_head *next; + + this_parent = parent; + spin_lock(&this_parent->d_lock); + + change_gid(this_parent, gid); +repeat: + next = this_parent->d_subdirs.next; +resume: + while (next != &this_parent->d_subdirs) { + struct list_head *tmp = next; + struct dentry *dentry = list_entry(tmp, struct dentry, d_child); + next = tmp->next; + + spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); + + change_gid(dentry, gid); + + if (!list_empty(&dentry->d_subdirs)) { + spin_unlock(&this_parent->d_lock); + spin_release(&dentry->d_lock.dep_map, _RET_IP_); + this_parent = dentry; + spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_); + goto repeat; + } + spin_unlock(&dentry->d_lock); + } + /* + * All done at this level ... ascend and resume the search. + */ + rcu_read_lock(); +ascend: + if (this_parent != parent) { + struct dentry *child = this_parent; + this_parent = child->d_parent; + + spin_unlock(&child->d_lock); + spin_lock(&this_parent->d_lock); + + /* go into the first sibling still alive */ + do { + next = child->d_child.next; + if (next == &this_parent->d_subdirs) + goto ascend; + child = list_entry(next, struct dentry, d_child); + } while (unlikely(child->d_flags & DCACHE_DENTRY_KILLED)); + rcu_read_unlock(); + goto resume; + } + rcu_read_unlock(); + spin_unlock(&this_parent->d_lock); + return; +} + static int tracefs_parse_options(char *data, struct tracefs_mount_opts *opts) { substring_t args[MAX_OPT_ARGS]; @@ -217,7 +288,9 @@ static int tracefs_apply_options(struct super_block *sb) inode->i_mode |= opts->mode; inode->i_uid = opts->uid; - inode->i_gid = opts->gid; + + /* Set all the group ids to the mount option */ + set_gid(sb->s_root, opts->gid); return 0; } @@ -412,6 +485,8 @@ struct dentry *tracefs_create_file(const char *name, umode_t mode, inode->i_mode = mode; inode->i_fop = fops ? fops : &tracefs_file_operations; inode->i_private = data; + inode->i_uid = d_inode(dentry->d_parent)->i_uid; + inode->i_gid = d_inode(dentry->d_parent)->i_gid; d_instantiate(dentry, inode); fsnotify_create(dentry->d_parent->d_inode, dentry); return end_creating(dentry); @@ -433,6 +508,8 @@ static struct dentry *__create_dir(const char *name, struct dentry *parent, inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO; inode->i_op = ops; inode->i_fop = &simple_dir_operations; + inode->i_uid = d_inode(dentry->d_parent)->i_uid; + inode->i_gid = d_inode(dentry->d_parent)->i_gid; /* directory inodes start off with i_nlink == 2 (for "." entry) */ inc_nlink(inode); diff --git a/fs/ubifs/crypto.c b/fs/ubifs/crypto.c index 22be7aeb96c4fe121b6963888de66408f972c86c..c57b46a352d8fd6637f2e51256e093f364161034 100644 --- a/fs/ubifs/crypto.c +++ b/fs/ubifs/crypto.c @@ -82,5 +82,4 @@ const struct fscrypt_operations ubifs_crypt_operations = { .get_context = ubifs_crypt_get_context, .set_context = ubifs_crypt_set_context, .empty_dir = ubifs_crypt_empty_dir, - .max_namelen = UBIFS_MAX_NLEN, }; diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index cfd46753a685656b75eef1cf74b1e2e5da46277c..54093d0a8f0254f7fcb20f78d55bcd3e9bc44e30 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -1853,7 +1853,6 @@ out: kthread_stop(c->bgt); c->bgt = NULL; } - free_wbufs(c); kfree(c->write_reserve_buf); c->write_reserve_buf = NULL; vfree(c->ileb_buf); @@ -2476,6 +2475,7 @@ static void __exit ubifs_exit(void) module_exit(ubifs_exit); MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(ANDROID_GKI_VFS_EXPORT_ONLY); MODULE_VERSION(__stringify(UBIFS_VERSION)); MODULE_AUTHOR("Artem Bityutskiy, Adrian Hunter"); MODULE_DESCRIPTION("UBIFS - UBI File System"); diff --git a/fs/udf/dir.c b/fs/udf/dir.c index c19dba45aa2096d010e536b2dab31ba62eb29824..d0f92a52e3babdc689354df59ec0c6787886f922 100644 --- a/fs/udf/dir.c +++ b/fs/udf/dir.c @@ -31,6 +31,7 @@ #include #include #include +#include #include "udf_i.h" #include "udf_sb.h" @@ -44,7 +45,7 @@ static int udf_readdir(struct file *file, struct dir_context *ctx) struct fileIdentDesc *fi = NULL; struct fileIdentDesc cfi; udf_pblk_t block, iblock; - loff_t nf_pos; + loff_t nf_pos, emit_pos = 0; int flen; unsigned char *fname = NULL, *copy_name = NULL; unsigned char *nameptr; @@ -58,6 +59,7 @@ static int udf_readdir(struct file *file, struct dir_context *ctx) int i, num, ret = 0; struct extent_position epos = { NULL, 0, {0, 0} }; struct super_block *sb = dir->i_sb; + bool pos_valid = false; if (ctx->pos == 0) { if (!dir_emit_dot(file, ctx)) @@ -68,6 +70,21 @@ static int udf_readdir(struct file *file, struct dir_context *ctx) if (nf_pos >= size) goto out; + /* + * Something changed since last readdir (either lseek was called or dir + * changed)? We need to verify the position correctly points at the + * beginning of some dir entry so that the directory parsing code does + * not get confused. Since UDF does not have any reliable way of + * identifying beginning of dir entry (names are under user control), + * we need to scan the directory from the beginning. + */ + if (!inode_eq_iversion(dir, file->f_version)) { + emit_pos = nf_pos; + nf_pos = 0; + } else { + pos_valid = true; + } + fname = kmalloc(UDF_NAME_LEN, GFP_NOFS); if (!fname) { ret = -ENOMEM; @@ -123,13 +140,21 @@ static int udf_readdir(struct file *file, struct dir_context *ctx) while (nf_pos < size) { struct kernel_lb_addr tloc; + loff_t cur_pos = nf_pos; - ctx->pos = (nf_pos >> 2) + 1; + /* Update file position only if we got past the current one */ + if (nf_pos >= emit_pos) { + ctx->pos = (nf_pos >> 2) + 1; + pos_valid = true; + } fi = udf_fileident_read(dir, &nf_pos, &fibh, &cfi, &epos, &eloc, &elen, &offset); if (!fi) goto out; + /* Still not at offset where user asked us to read from? */ + if (cur_pos < emit_pos) + continue; liu = le16_to_cpu(cfi.lengthOfImpUse); lfi = cfi.lengthFileIdent; @@ -187,8 +212,11 @@ static int udf_readdir(struct file *file, struct dir_context *ctx) } /* end while */ ctx->pos = (nf_pos >> 2) + 1; + pos_valid = true; out: + if (pos_valid) + file->f_version = inode_query_iversion(dir); if (fibh.sbh != fibh.ebh) brelse(fibh.ebh); brelse(fibh.sbh); diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c index 84ed23edebfd379fa5906860758e3fe44a09da18..87a77bf70ee19fcad4f4077b83d62c5741fb8f75 100644 --- a/fs/udf/ialloc.c +++ b/fs/udf/ialloc.c @@ -77,6 +77,7 @@ struct inode *udf_new_inode(struct inode *dir, umode_t mode) GFP_KERNEL); } if (!iinfo->i_data) { + make_bad_inode(inode); iput(inode); return ERR_PTR(-ENOMEM); } @@ -86,6 +87,7 @@ struct inode *udf_new_inode(struct inode *dir, umode_t mode) dinfo->i_location.partitionReferenceNum, start, &err); if (err) { + make_bad_inode(inode); iput(inode); return ERR_PTR(err); } diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 0dd2f93ac0480e55cab212dce3506e8c89427765..d32b836f6ca74b0bf678fd0e66914a79aecbcb6a 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -257,10 +257,6 @@ int udf_expand_file_adinicb(struct inode *inode) char *kaddr; struct udf_inode_info *iinfo = UDF_I(inode); int err; - struct writeback_control udf_wbc = { - .sync_mode = WB_SYNC_NONE, - .nr_to_write = 1, - }; WARN_ON_ONCE(!inode_is_locked(inode)); if (!iinfo->i_lenAlloc) { @@ -304,8 +300,10 @@ int udf_expand_file_adinicb(struct inode *inode) iinfo->i_alloc_type = ICBTAG_FLAG_AD_LONG; /* from now on we have normal address_space methods */ inode->i_data.a_ops = &udf_aops; + set_page_dirty(page); + unlock_page(page); up_write(&iinfo->i_data_sem); - err = inode->i_data.a_ops->writepage(page, &udf_wbc); + err = filemap_fdatawrite(inode->i_mapping); if (err) { /* Restore everything back so that we don't lose data... */ lock_page(page); @@ -316,6 +314,7 @@ int udf_expand_file_adinicb(struct inode *inode) unlock_page(page); iinfo->i_alloc_type = ICBTAG_FLAG_AD_IN_ICB; inode->i_data.a_ops = &udf_adinicb_aops; + iinfo->i_lenAlloc = inode->i_size; up_write(&iinfo->i_data_sem); } put_page(page); diff --git a/fs/udf/namei.c b/fs/udf/namei.c index f4a72ff8cf959b25a66599a042592d4bbab8bfcf..9f3aced46c68fdf0ec34c8b458756c5316fe4539 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -30,6 +30,7 @@ #include #include #include +#include static inline int udf_match(int len1, const unsigned char *name1, int len2, const unsigned char *name2) @@ -135,6 +136,8 @@ int udf_write_fi(struct inode *inode, struct fileIdentDesc *cfi, mark_buffer_dirty_inode(fibh->ebh, inode); mark_buffer_dirty_inode(fibh->sbh, inode); } + inode_inc_iversion(inode); + return 0; } diff --git a/fs/udf/super.c b/fs/udf/super.c index 5d2b820ef303af5d6d6b27e84f80519363818ea1..c31c5c44b9fe5ce08fd56d21f082f9b9a93db1f5 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -57,6 +57,7 @@ #include #include #include +#include #include "udf_sb.h" #include "udf_i.h" @@ -149,6 +150,7 @@ static struct inode *udf_alloc_inode(struct super_block *sb) init_rwsem(&ei->i_data_sem); ei->cached_extent.lstart = -1; spin_lock_init(&ei->i_extent_cache_lock); + inode_set_iversion(&ei->vfs_inode, 1); return &ei->vfs_inode; } @@ -2543,5 +2545,6 @@ static unsigned int udf_count_free(struct super_block *sb) MODULE_AUTHOR("Ben Fennema"); MODULE_DESCRIPTION("Universal Disk Format Filesystem"); MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(ANDROID_GKI_VFS_EXPORT_ONLY); module_init(init_udf_fs) module_exit(exit_udf_fs) diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 983558b572c70b3d87fe6657caa0db093812b15e..e0d7149f76ca3d75ea358f0e09f0dd2740a93c98 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -1542,3 +1542,4 @@ static void __exit exit_ufs_fs(void) module_init(init_ufs_fs) module_exit(exit_ufs_fs) MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(ANDROID_GKI_VFS_EXPORT_ONLY); diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 108897cb9a3c95f1df9021c0964e89cbff83b7d7..da8c8409528dc82dfe5dcffcd14b432f71f5503e 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -879,8 +880,7 @@ static int userfaultfd_release(struct inode *inode, struct file *file) new_flags, vma->anon_vma, vma->vm_file, vma->vm_pgoff, vma_policy(vma), - NULL_VM_UFFD_CTX, - vma_get_anon_name(vma)); + NULL_VM_UFFD_CTX, anon_vma_name(vma)); if (prev) vma = prev; else @@ -1438,7 +1438,7 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx, vma->anon_vma, vma->vm_file, vma->vm_pgoff, vma_policy(vma), ((struct vm_userfaultfd_ctx){ ctx }), - vma_get_anon_name(vma)); + anon_vma_name(vma)); if (prev) { vma = prev; goto next; @@ -1617,8 +1617,7 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx, prev = vma_merge(mm, prev, start, vma_end, new_flags, vma->anon_vma, vma->vm_file, vma->vm_pgoff, vma_policy(vma), - NULL_VM_UFFD_CTX, - vma_get_anon_name(vma)); + NULL_VM_UFFD_CTX, anon_vma_name(vma)); if (prev) { vma = prev; goto next; @@ -1834,9 +1833,15 @@ static int userfaultfd_writeprotect(struct userfaultfd_ctx *ctx, if (mode_wp && mode_dontwake) return -EINVAL; - ret = mwriteprotect_range(ctx->mm, uffdio_wp.range.start, - uffdio_wp.range.len, mode_wp, - &ctx->mmap_changing); + if (mmget_not_zero(ctx->mm)) { + ret = mwriteprotect_range(ctx->mm, uffdio_wp.range.start, + uffdio_wp.range.len, mode_wp, + &ctx->mmap_changing); + mmput(ctx->mm); + } else { + return -ESRCH; + } + if (ret) return ret; diff --git a/fs/vboxsf/super.c b/fs/vboxsf/super.c index d7816c01a4f62885f505001a752a80b2700dd82a..0d4a0408cbd7ea74f264035be3f2b978e165517d 100644 --- a/fs/vboxsf/super.c +++ b/fs/vboxsf/super.c @@ -21,10 +21,7 @@ #define VBOXSF_SUPER_MAGIC 0x786f4256 /* 'VBox' little endian */ -#define VBSF_MOUNT_SIGNATURE_BYTE_0 ('\000') -#define VBSF_MOUNT_SIGNATURE_BYTE_1 ('\377') -#define VBSF_MOUNT_SIGNATURE_BYTE_2 ('\376') -#define VBSF_MOUNT_SIGNATURE_BYTE_3 ('\375') +static const unsigned char VBSF_MOUNT_SIGNATURE[4] = "\000\377\376\375"; static int follow_symlinks; module_param(follow_symlinks, int, 0444); @@ -386,12 +383,7 @@ fail_nomem: static int vboxsf_parse_monolithic(struct fs_context *fc, void *data) { - unsigned char *options = data; - - if (options && options[0] == VBSF_MOUNT_SIGNATURE_BYTE_0 && - options[1] == VBSF_MOUNT_SIGNATURE_BYTE_1 && - options[2] == VBSF_MOUNT_SIGNATURE_BYTE_2 && - options[3] == VBSF_MOUNT_SIGNATURE_BYTE_3) { + if (data && !memcmp(data, VBSF_MOUNT_SIGNATURE, 4)) { vbg_err("vboxsf: Old binary mount data not supported, remove obsolete mount.vboxsf and/or update your VBoxService.\n"); return -EINVAL; } @@ -490,4 +482,5 @@ module_exit(vboxsf_fini); MODULE_DESCRIPTION("Oracle VM VirtualBox Module for Host File System Access"); MODULE_AUTHOR("Oracle Corporation"); MODULE_LICENSE("GPL v2"); +MODULE_IMPORT_NS(ANDROID_GKI_VFS_EXPORT_ONLY); MODULE_ALIAS_FS("vboxsf"); diff --git a/fs/verity/Makefile b/fs/verity/Makefile index 435559a4fa9eae1d947ef0d90d0945bc14b08ee8..4b8323450f90a4dab3431334ed0d34400c4db52b 100644 --- a/fs/verity/Makefile +++ b/fs/verity/Makefile @@ -1,5 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 +ccflags-y += -DDEFAULT_SYMBOL_NAMESPACE=ANDROID_GKI_VFS_EXPORT_ONLY + obj-$(CONFIG_FS_VERITY) += enable.o \ hash_algs.o \ init.o \ diff --git a/fs/verity/enable.c b/fs/verity/enable.c index f7e997a01ad07c4699d435223057af0b7fc49b82..bed2b10dfe3b7887253db5951b8d3e7f743a15b1 100644 --- a/fs/verity/enable.c +++ b/fs/verity/enable.c @@ -177,7 +177,7 @@ static int build_merkle_tree(struct file *filp, * (level 0) and ascending to the root node (level 'num_levels - 1'). * Then at the end (level 'num_levels'), calculate the root hash. */ - blocks = (inode->i_size + params->block_size - 1) >> + blocks = ((u64)inode->i_size + params->block_size - 1) >> params->log_blocksize; for (level = 0; level <= params->num_levels; level++) { err = build_merkle_tree_level(filp, level, blocks, params, diff --git a/fs/verity/open.c b/fs/verity/open.c index 60ff8af7219feafe90b06d5c8c5b0df301498186..92df87f5fa3881abec4f18d20686cc991558ced4 100644 --- a/fs/verity/open.c +++ b/fs/verity/open.c @@ -89,7 +89,7 @@ int fsverity_init_merkle_tree_params(struct merkle_tree_params *params, */ /* Compute number of levels and the number of blocks in each level */ - blocks = (inode->i_size + params->block_size - 1) >> log_blocksize; + blocks = ((u64)inode->i_size + params->block_size - 1) >> log_blocksize; pr_debug("Data is %lld bytes (%llu blocks)\n", inode->i_size, blocks); while (blocks > 1) { if (params->num_levels >= FS_VERITY_MAX_LEVELS) { diff --git a/fs/xattr.c b/fs/xattr.c index d6bf5a7e2420e7080add682f4a4926f2a2cecff0..f157f0b1bdfc238850db44c36fe41600e8ae4058 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -291,7 +291,7 @@ retry_deleg: } return error; } -EXPORT_SYMBOL_GPL(vfs_setxattr); +EXPORT_SYMBOL_NS_GPL(vfs_setxattr, ANDROID_GKI_VFS_EXPORT_ONLY); static ssize_t xattr_getsecurity(struct inode *inode, const char *name, void *value, @@ -405,7 +405,7 @@ vfs_getxattr(struct dentry *dentry, const char *name, void *value, size_t size) { return __vfs_getxattr(dentry, dentry->d_inode, name, value, size, 0); } -EXPORT_SYMBOL_GPL(vfs_getxattr); +EXPORT_SYMBOL_NS_GPL(vfs_getxattr, ANDROID_GKI_VFS_EXPORT_ONLY); ssize_t vfs_listxattr(struct dentry *dentry, char *list, size_t size) @@ -425,7 +425,7 @@ vfs_listxattr(struct dentry *dentry, char *list, size_t size) } return error; } -EXPORT_SYMBOL_GPL(vfs_listxattr); +EXPORT_SYMBOL_NS_GPL(vfs_listxattr, ANDROID_GKI_VFS_EXPORT_ONLY); int __vfs_removexattr(struct dentry *dentry, const char *name) diff --git a/fs/xfs/xfs_bio_io.c b/fs/xfs/xfs_bio_io.c index e2148f2d5d6bc048ebc666fc6ada98fb13fe5f06..17f36db2f792891604611d3b40d18cb281460edc 100644 --- a/fs/xfs/xfs_bio_io.c +++ b/fs/xfs/xfs_bio_io.c @@ -6,7 +6,7 @@ static inline unsigned int bio_max_vecs(unsigned int count) { - return min_t(unsigned, howmany(count, PAGE_SIZE), BIO_MAX_PAGES); + return bio_max_segs(howmany(count, PAGE_SIZE)); } int diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 118819030dbb502dedbb37bf18a3bd7e3ee46a49..616c4effb03846919458544e63c16f6736945396 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1480,7 +1480,7 @@ xfs_buf_ioapply_map( int op) { int page_index; - int total_nr_pages = bp->b_page_count; + unsigned int total_nr_pages = bp->b_page_count; int nr_pages; struct bio *bio; sector_t sector = bp->b_maps[map].bm_bn; @@ -1505,7 +1505,7 @@ xfs_buf_ioapply_map( next_chunk: atomic_inc(&bp->b_io_remaining); - nr_pages = min(total_nr_pages, BIO_MAX_PAGES); + nr_pages = bio_max_segs(total_nr_pages); bio = bio_alloc(GFP_NOIO, nr_pages); bio_set_dev(bio, bp->b_target->bt_bdev); diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 3fbd98f61ea5c02ce200f6c38cc5e1c8f781f4f3..646735aad45df823e67a1a04fcc8520d6f7669d5 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -686,7 +686,8 @@ xfs_ioc_space( if (bf->l_start > XFS_ISIZE(ip)) { error = xfs_alloc_file_space(ip, XFS_ISIZE(ip), - bf->l_start - XFS_ISIZE(ip), 0); + bf->l_start - XFS_ISIZE(ip), + XFS_BMAPI_PREALLOC); if (error) goto out_unlock; } diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index e3e229e52512adafac15b804f873eacd432b534d..6a7724a3560acebf8665ea1c911ff302ad53ab72 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -2224,3 +2224,4 @@ module_exit(exit_xfs_fs); MODULE_AUTHOR("Silicon Graphics, Inc."); MODULE_DESCRIPTION(XFS_VERSION_STRING " with " XFS_BUILD_OPTIONS " enabled"); MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(ANDROID_GKI_VFS_EXPORT_ONLY); diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c index 2243dc1fb48fe36146485bb5ce853ac170dc605b..0ed752c6ef25f9f59746202d6c5dd93074e73737 100644 --- a/fs/zonefs/super.c +++ b/fs/zonefs/super.c @@ -1799,5 +1799,7 @@ static void __exit zonefs_exit(void) MODULE_AUTHOR("Damien Le Moal"); MODULE_DESCRIPTION("Zone file system for zoned block devices"); MODULE_LICENSE("GPL"); +MODULE_ALIAS_FS("zonefs"); +MODULE_IMPORT_NS(ANDROID_GKI_VFS_EXPORT_ONLY); module_init(zonefs_init); module_exit(zonefs_exit); diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 6ad3b89a8a2e06c4d2403fd816f3ae7872c458f7..0f5366792d22e1a12c9564ee912dc3c3947c9006 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -605,9 +605,10 @@ int acpi_enable_wakeup_device_power(struct acpi_device *dev, int state); int acpi_disable_wakeup_device_power(struct acpi_device *dev); #ifdef CONFIG_X86 -bool acpi_device_always_present(struct acpi_device *adev); +bool acpi_device_override_status(struct acpi_device *adev, unsigned long long *status); #else -static inline bool acpi_device_always_present(struct acpi_device *adev) +static inline bool acpi_device_override_status(struct acpi_device *adev, + unsigned long long *status) { return false; } diff --git a/include/acpi/actypes.h b/include/acpi/actypes.h index 647cb11d0a0a3026321dddcc9a261a6b94c645ef..7334037624c5c3e493c21d7ce96dd1c893a38e04 100644 --- a/include/acpi/actypes.h +++ b/include/acpi/actypes.h @@ -536,8 +536,14 @@ typedef u64 acpi_integer; * Can be used with access_width of struct acpi_generic_address and access_size of * struct acpi_resource_generic_register. */ -#define ACPI_ACCESS_BIT_WIDTH(size) (1 << ((size) + 2)) -#define ACPI_ACCESS_BYTE_WIDTH(size) (1 << ((size) - 1)) +#define ACPI_ACCESS_BIT_SHIFT 2 +#define ACPI_ACCESS_BYTE_SHIFT -1 +#define ACPI_ACCESS_BIT_MAX (31 - ACPI_ACCESS_BIT_SHIFT) +#define ACPI_ACCESS_BYTE_MAX (31 - ACPI_ACCESS_BYTE_SHIFT) +#define ACPI_ACCESS_BIT_DEFAULT (8 - ACPI_ACCESS_BIT_SHIFT) +#define ACPI_ACCESS_BYTE_DEFAULT (8 - ACPI_ACCESS_BYTE_SHIFT) +#define ACPI_ACCESS_BIT_WIDTH(size) (1 << ((size) + ACPI_ACCESS_BIT_SHIFT)) +#define ACPI_ACCESS_BYTE_WIDTH(size) (1 << ((size) + ACPI_ACCESS_BYTE_SHIFT)) /******************************************************************************* * diff --git a/include/asm-generic/preempt.h b/include/asm-generic/preempt.h index d683f5e6d7913be7746e375908265b827d1b74c2..b4d43a4af5f794e9f7a0094cd002610fa1beb6d0 100644 --- a/include/asm-generic/preempt.h +++ b/include/asm-generic/preempt.h @@ -29,7 +29,7 @@ static __always_inline void preempt_count_set(int pc) } while (0) #define init_idle_preempt_count(p, cpu) do { \ - task_thread_info(p)->preempt_count = PREEMPT_ENABLED; \ + task_thread_info(p)->preempt_count = PREEMPT_DISABLED; \ } while (0) static __always_inline void set_preempt_need_resched(void) diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h index 864b9997efb20d55c5df3fd2b544575134e070e9..90f21898aad838660e993c3e9fce553ea4b7af56 100644 --- a/include/kvm/arm_pmu.h +++ b/include/kvm/arm_pmu.h @@ -61,7 +61,6 @@ int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu, int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu); -int kvm_pmu_probe_pmuver(void); #else struct kvm_pmu { }; @@ -118,8 +117,6 @@ static inline u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1) return 0; } -static inline int kvm_pmu_probe_pmuver(void) { return 0xf; } - #endif #endif diff --git a/include/kvm/arm_psci.h b/include/kvm/arm_psci.h index 5b58bd2fe088b1fc052e4c13c611abfbbd06773b..4a1003323a0c9e84dee7b3fb862d3a2dc293cf70 100644 --- a/include/kvm/arm_psci.h +++ b/include/kvm/arm_psci.h @@ -13,8 +13,9 @@ #define KVM_ARM_PSCI_0_1 PSCI_VERSION(0, 1) #define KVM_ARM_PSCI_0_2 PSCI_VERSION(0, 2) #define KVM_ARM_PSCI_1_0 PSCI_VERSION(1, 0) +#define KVM_ARM_PSCI_1_1 PSCI_VERSION(1, 1) -#define KVM_ARM_PSCI_LATEST KVM_ARM_PSCI_1_0 +#define KVM_ARM_PSCI_LATEST KVM_ARM_PSCI_1_1 /* * We need the KVM pointer independently from the vcpu as we can call diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index e602d848fc1ab7291fe04dc56745bc41e28b8b1c..65e09c3924be3a75ed1fc6fb6827ccc3eda0f06c 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -5,9 +5,11 @@ #ifndef __KVM_ARM_VGIC_H #define __KVM_ARM_VGIC_H -#include +#include #include #include +#include +#include #include #include #include @@ -378,8 +380,7 @@ bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int vintid); int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu); void kvm_vgic_load(struct kvm_vcpu *vcpu); -void kvm_vgic_put(struct kvm_vcpu *vcpu); -void kvm_vgic_vmcr_sync(struct kvm_vcpu *vcpu); +void kvm_vgic_put(struct kvm_vcpu *vcpu, bool blocking); #define irqchip_in_kernel(k) (!!((k)->arch.vgic.in_kernel)) #define vgic_initialized(k) ((k)->arch.vgic.initialized) diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 2f7508c3c2d6a1f0c35e25e4d71d0c11e68b44c6..b24659fca7f46a57d60a2a4e6429b0b9b2a02d6b 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -953,6 +953,15 @@ static inline struct acpi_device *acpi_resource_consumer(struct resource *res) return NULL; } +static inline int acpi_register_wakeup_handler(int wake_irq, + bool (*wakeup)(void *context), void *context) +{ + return -ENXIO; +} + +static inline void acpi_unregister_wakeup_handler( + bool (*wakeup)(void *context), void *context) { } + #endif /* !CONFIG_ACPI */ #ifdef CONFIG_ACPI_HOTPLUG_IOAPIC diff --git a/include/linux/android_debug_symbols.h b/include/linux/android_debug_symbols.h index 6a54e9116e3ddfed4874c9e6ef7f34abd847321b..07484238c8e4a05d70f55d8a126abdec608675e8 100644 --- a/include/linux/android_debug_symbols.h +++ b/include/linux/android_debug_symbols.h @@ -37,6 +37,7 @@ enum android_debug_symbol { #ifdef CONFIG_SYSCTL ADS_SYSCTL_LEGACY_VA_LAYOUT, #endif + ADS_SHOW_MEM, ADS_END }; diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index 5cef2b8b0479e463467b4560151c665414564a3e..32b0f91ed2bdebe4497646638bfb097b2d631170 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -104,6 +104,13 @@ /* KVM "vendor specific" services */ #define ARM_SMCCC_KVM_FUNC_FEATURES 0 #define ARM_SMCCC_KVM_FUNC_PTP 1 +#define ARM_SMCCC_KVM_FUNC_HYP_MEMINFO 2 +#define ARM_SMCCC_KVM_FUNC_MEM_SHARE 3 +#define ARM_SMCCC_KVM_FUNC_MEM_UNSHARE 4 +#define ARM_SMCCC_KVM_FUNC_MMIO_GUARD_INFO 5 +#define ARM_SMCCC_KVM_FUNC_MMIO_GUARD_ENROLL 6 +#define ARM_SMCCC_KVM_FUNC_MMIO_GUARD_MAP 7 +#define ARM_SMCCC_KVM_FUNC_MMIO_GUARD_UNMAP 8 #define ARM_SMCCC_KVM_FUNC_FEATURES_2 127 #define ARM_SMCCC_KVM_NUM_FUNCS 128 @@ -113,6 +120,11 @@ ARM_SMCCC_OWNER_VENDOR_HYP, \ ARM_SMCCC_KVM_FUNC_FEATURES) +#define ARM_SMCCC_ARCH_WORKAROUND_3 \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_32, \ + 0, 0x3fff) + #define SMCCC_ARCH_WORKAROUND_RET_UNAFFECTED 1 /* @@ -126,10 +138,52 @@ ARM_SMCCC_OWNER_VENDOR_HYP, \ ARM_SMCCC_KVM_FUNC_PTP) +#define ARM_SMCCC_VENDOR_HYP_KVM_HYP_MEMINFO_FUNC_ID \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_64, \ + ARM_SMCCC_OWNER_VENDOR_HYP, \ + ARM_SMCCC_KVM_FUNC_HYP_MEMINFO) + +#define ARM_SMCCC_VENDOR_HYP_KVM_MEM_SHARE_FUNC_ID \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_64, \ + ARM_SMCCC_OWNER_VENDOR_HYP, \ + ARM_SMCCC_KVM_FUNC_MEM_SHARE) + +#define ARM_SMCCC_VENDOR_HYP_KVM_MEM_UNSHARE_FUNC_ID \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_64, \ + ARM_SMCCC_OWNER_VENDOR_HYP, \ + ARM_SMCCC_KVM_FUNC_MEM_UNSHARE) + /* ptp_kvm counter type ID */ #define KVM_PTP_VIRT_COUNTER 0 #define KVM_PTP_PHYS_COUNTER 1 +#define ARM_SMCCC_VENDOR_HYP_KVM_MMIO_GUARD_INFO_FUNC_ID \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_64, \ + ARM_SMCCC_OWNER_VENDOR_HYP, \ + ARM_SMCCC_KVM_FUNC_MMIO_GUARD_INFO) + +#define ARM_SMCCC_VENDOR_HYP_KVM_MMIO_GUARD_ENROLL_FUNC_ID \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_64, \ + ARM_SMCCC_OWNER_VENDOR_HYP, \ + ARM_SMCCC_KVM_FUNC_MMIO_GUARD_ENROLL) + +#define ARM_SMCCC_VENDOR_HYP_KVM_MMIO_GUARD_MAP_FUNC_ID \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_64, \ + ARM_SMCCC_OWNER_VENDOR_HYP, \ + ARM_SMCCC_KVM_FUNC_MMIO_GUARD_MAP) + +#define ARM_SMCCC_VENDOR_HYP_KVM_MMIO_GUARD_UNMAP_FUNC_ID \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_64, \ + ARM_SMCCC_OWNER_VENDOR_HYP, \ + ARM_SMCCC_KVM_FUNC_MMIO_GUARD_UNMAP) + /* Paravirtualised time calls (defined by ARM DEN0057A) */ #define ARM_SMCCC_HV_PV_TIME_FEATURES \ ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ diff --git a/include/linux/arm_ffa.h b/include/linux/arm_ffa.h new file mode 100644 index 0000000000000000000000000000000000000000..f0cb5b72b87b5c0ff2251614ba4160f643b96937 --- /dev/null +++ b/include/linux/arm_ffa.h @@ -0,0 +1,360 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2021 ARM Ltd. + */ + +#ifndef _LINUX_ARM_FFA_H +#define _LINUX_ARM_FFA_H + +#include +#include +#include +#include + +#define FFA_SMC(calling_convention, func_num) \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, (calling_convention), \ + ARM_SMCCC_OWNER_STANDARD, (func_num)) + +#define FFA_SMC_32(func_num) FFA_SMC(ARM_SMCCC_SMC_32, (func_num)) +#define FFA_SMC_64(func_num) FFA_SMC(ARM_SMCCC_SMC_64, (func_num)) + +#define FFA_ERROR FFA_SMC_32(0x60) +#define FFA_SUCCESS FFA_SMC_32(0x61) +#define FFA_INTERRUPT FFA_SMC_32(0x62) +#define FFA_VERSION FFA_SMC_32(0x63) +#define FFA_FEATURES FFA_SMC_32(0x64) +#define FFA_RX_RELEASE FFA_SMC_32(0x65) +#define FFA_RXTX_MAP FFA_SMC_32(0x66) +#define FFA_FN64_RXTX_MAP FFA_SMC_64(0x66) +#define FFA_RXTX_UNMAP FFA_SMC_32(0x67) +#define FFA_PARTITION_INFO_GET FFA_SMC_32(0x68) +#define FFA_ID_GET FFA_SMC_32(0x69) +#define FFA_MSG_POLL FFA_SMC_32(0x6A) +#define FFA_MSG_WAIT FFA_SMC_32(0x6B) +#define FFA_YIELD FFA_SMC_32(0x6C) +#define FFA_RUN FFA_SMC_32(0x6D) +#define FFA_MSG_SEND FFA_SMC_32(0x6E) +#define FFA_MSG_SEND_DIRECT_REQ FFA_SMC_32(0x6F) +#define FFA_FN64_MSG_SEND_DIRECT_REQ FFA_SMC_64(0x6F) +#define FFA_MSG_SEND_DIRECT_RESP FFA_SMC_32(0x70) +#define FFA_FN64_MSG_SEND_DIRECT_RESP FFA_SMC_64(0x70) +#define FFA_MEM_DONATE FFA_SMC_32(0x71) +#define FFA_FN64_MEM_DONATE FFA_SMC_64(0x71) +#define FFA_MEM_LEND FFA_SMC_32(0x72) +#define FFA_FN64_MEM_LEND FFA_SMC_64(0x72) +#define FFA_MEM_SHARE FFA_SMC_32(0x73) +#define FFA_FN64_MEM_SHARE FFA_SMC_64(0x73) +#define FFA_MEM_RETRIEVE_REQ FFA_SMC_32(0x74) +#define FFA_FN64_MEM_RETRIEVE_REQ FFA_SMC_64(0x74) +#define FFA_MEM_RETRIEVE_RESP FFA_SMC_32(0x75) +#define FFA_MEM_RELINQUISH FFA_SMC_32(0x76) +#define FFA_MEM_RECLAIM FFA_SMC_32(0x77) +#define FFA_MEM_OP_PAUSE FFA_SMC_32(0x78) +#define FFA_MEM_OP_RESUME FFA_SMC_32(0x79) +#define FFA_MEM_FRAG_RX FFA_SMC_32(0x7A) +#define FFA_MEM_FRAG_TX FFA_SMC_32(0x7B) +#define FFA_NORMAL_WORLD_RESUME FFA_SMC_32(0x7C) + +/* + * For some calls it is necessary to use SMC64 to pass or return 64-bit values. + * For such calls FFA_FN_NATIVE(name) will choose the appropriate + * (native-width) function ID. + */ +#ifdef CONFIG_64BIT +#define FFA_FN_NATIVE(name) FFA_FN64_##name +#else +#define FFA_FN_NATIVE(name) FFA_##name +#endif + +/* FFA error codes. */ +#define FFA_RET_SUCCESS (0) +#define FFA_RET_NOT_SUPPORTED (-1) +#define FFA_RET_INVALID_PARAMETERS (-2) +#define FFA_RET_NO_MEMORY (-3) +#define FFA_RET_BUSY (-4) +#define FFA_RET_INTERRUPTED (-5) +#define FFA_RET_DENIED (-6) +#define FFA_RET_RETRY (-7) +#define FFA_RET_ABORTED (-8) + +/* FFA version encoding */ +#define FFA_MAJOR_VERSION_MASK GENMASK(30, 16) +#define FFA_MINOR_VERSION_MASK GENMASK(15, 0) +#define FFA_MAJOR_VERSION(x) ((u16)(FIELD_GET(FFA_MAJOR_VERSION_MASK, (x)))) +#define FFA_MINOR_VERSION(x) ((u16)(FIELD_GET(FFA_MINOR_VERSION_MASK, (x)))) +#define FFA_PACK_VERSION_INFO(major, minor) \ + (FIELD_PREP(FFA_MAJOR_VERSION_MASK, (major)) | \ + FIELD_PREP(FFA_MINOR_VERSION_MASK, (minor))) +#define FFA_VERSION_1_0 FFA_PACK_VERSION_INFO(1, 0) + +/** + * FF-A specification mentions explicitly about '4K pages'. This should + * not be confused with the kernel PAGE_SIZE, which is the translation + * granule kernel is configured and may be one among 4K, 16K and 64K. + */ +#define FFA_PAGE_SIZE SZ_4K + +/* + * Minimum buffer size/alignment encodings returned by an FFA_FEATURES + * query for FFA_RXTX_MAP. + */ +#define FFA_FEAT_RXTX_MIN_SZ_4K 0 +#define FFA_FEAT_RXTX_MIN_SZ_64K 1 +#define FFA_FEAT_RXTX_MIN_SZ_16K 2 + +/* FFA Bus/Device/Driver related */ +struct ffa_device { + int vm_id; + bool mode_32bit; + uuid_t uuid; + struct device dev; +}; + +#define to_ffa_dev(d) container_of(d, struct ffa_device, dev) + +struct ffa_device_id { + uuid_t uuid; +}; + +struct ffa_driver { + const char *name; + int (*probe)(struct ffa_device *sdev); + void (*remove)(struct ffa_device *sdev); + const struct ffa_device_id *id_table; + + struct device_driver driver; +}; + +#define to_ffa_driver(d) container_of(d, struct ffa_driver, driver) + +static inline void ffa_dev_set_drvdata(struct ffa_device *fdev, void *data) +{ + fdev->dev.driver_data = data; +} + +#if IS_REACHABLE(CONFIG_ARM_FFA_TRANSPORT) +struct ffa_device *ffa_device_register(const uuid_t *uuid, int vm_id); +void ffa_device_unregister(struct ffa_device *ffa_dev); +int ffa_driver_register(struct ffa_driver *driver, struct module *owner, + const char *mod_name); +void ffa_driver_unregister(struct ffa_driver *driver); +bool ffa_device_is_valid(struct ffa_device *ffa_dev); +const struct ffa_dev_ops *ffa_dev_ops_get(struct ffa_device *dev); + +#else +static inline +struct ffa_device *ffa_device_register(const uuid_t *uuid, int vm_id) +{ + return NULL; +} + +static inline void ffa_device_unregister(struct ffa_device *dev) {} + +static inline int +ffa_driver_register(struct ffa_driver *driver, struct module *owner, + const char *mod_name) +{ + return -EINVAL; +} + +static inline void ffa_driver_unregister(struct ffa_driver *driver) {} + +static inline +bool ffa_device_is_valid(struct ffa_device *ffa_dev) { return false; } + +static inline +const struct ffa_dev_ops *ffa_dev_ops_get(struct ffa_device *dev) +{ + return NULL; +} +#endif /* CONFIG_ARM_FFA_TRANSPORT */ + +#define ffa_register(driver) \ + ffa_driver_register(driver, THIS_MODULE, KBUILD_MODNAME) +#define ffa_unregister(driver) \ + ffa_driver_unregister(driver) + +/** + * module_ffa_driver() - Helper macro for registering a psa_ffa driver + * @__ffa_driver: ffa_driver structure + * + * Helper macro for psa_ffa drivers to set up proper module init / exit + * functions. Replaces module_init() and module_exit() and keeps people from + * printing pointless things to the kernel log when their driver is loaded. + */ +#define module_ffa_driver(__ffa_driver) \ + module_driver(__ffa_driver, ffa_register, ffa_unregister) + +/* FFA transport related */ +struct ffa_partition_info { + u16 id; + u16 exec_ctxt; +/* partition supports receipt of direct requests */ +#define FFA_PARTITION_DIRECT_RECV BIT(0) +/* partition can send direct requests. */ +#define FFA_PARTITION_DIRECT_SEND BIT(1) +/* partition can send and receive indirect messages. */ +#define FFA_PARTITION_INDIRECT_MSG BIT(2) + u32 properties; +}; + +/* For use with FFA_MSG_SEND_DIRECT_{REQ,RESP} which pass data via registers */ +struct ffa_send_direct_data { + unsigned long data0; /* w3/x3 */ + unsigned long data1; /* w4/x4 */ + unsigned long data2; /* w5/x5 */ + unsigned long data3; /* w6/x6 */ + unsigned long data4; /* w7/x7 */ +}; + +struct ffa_mem_region_addr_range { + /* The base IPA of the constituent memory region, aligned to 4 kiB */ + u64 address; + /* The number of 4 kiB pages in the constituent memory region. */ + u32 pg_cnt; + u32 reserved; +}; + +struct ffa_composite_mem_region { + /* + * The total number of 4 kiB pages included in this memory region. This + * must be equal to the sum of page counts specified in each + * `struct ffa_mem_region_addr_range`. + */ + u32 total_pg_cnt; + /* The number of constituents included in this memory region range */ + u32 addr_range_cnt; + u64 reserved; + /** An array of `addr_range_cnt` memory region constituents. */ + struct ffa_mem_region_addr_range constituents[]; +}; + +struct ffa_mem_region_attributes { + /* The ID of the VM to which the memory is being given or shared. */ + u16 receiver; + /* + * The permissions with which the memory region should be mapped in the + * receiver's page table. + */ +#define FFA_MEM_EXEC BIT(3) +#define FFA_MEM_NO_EXEC BIT(2) +#define FFA_MEM_RW BIT(1) +#define FFA_MEM_RO BIT(0) + u8 attrs; + /* + * Flags used during FFA_MEM_RETRIEVE_REQ and FFA_MEM_RETRIEVE_RESP + * for memory regions with multiple borrowers. + */ +#define FFA_MEM_RETRIEVE_SELF_BORROWER BIT(0) + u8 flag; + /* + * Offset in bytes from the start of the outer `ffa_memory_region` to + * an `struct ffa_mem_region_addr_range`. + */ + u32 composite_off; + u64 reserved; +}; + +struct ffa_mem_region { + /* The ID of the VM/owner which originally sent the memory region */ + u16 sender_id; +#define FFA_MEM_NORMAL BIT(5) +#define FFA_MEM_DEVICE BIT(4) + +#define FFA_MEM_WRITE_BACK (3 << 2) +#define FFA_MEM_NON_CACHEABLE (1 << 2) + +#define FFA_DEV_nGnRnE (0 << 2) +#define FFA_DEV_nGnRE (1 << 2) +#define FFA_DEV_nGRE (2 << 2) +#define FFA_DEV_GRE (3 << 2) + +#define FFA_MEM_NON_SHAREABLE (0) +#define FFA_MEM_OUTER_SHAREABLE (2) +#define FFA_MEM_INNER_SHAREABLE (3) + u8 attributes; + u8 reserved_0; +/* + * Clear memory region contents after unmapping it from the sender and + * before mapping it for any receiver. + */ +#define FFA_MEM_CLEAR BIT(0) +/* + * Whether the hypervisor may time slice the memory sharing or retrieval + * operation. + */ +#define FFA_TIME_SLICE_ENABLE BIT(1) + +#define FFA_MEM_RETRIEVE_TYPE_IN_RESP (0 << 3) +#define FFA_MEM_RETRIEVE_TYPE_SHARE (1 << 3) +#define FFA_MEM_RETRIEVE_TYPE_LEND (2 << 3) +#define FFA_MEM_RETRIEVE_TYPE_DONATE (3 << 3) + +#define FFA_MEM_RETRIEVE_ADDR_ALIGN_HINT BIT(9) +#define FFA_MEM_RETRIEVE_ADDR_ALIGN(x) ((x) << 5) + /* Flags to control behaviour of the transaction. */ + u32 flags; +#define HANDLE_LOW_MASK GENMASK_ULL(31, 0) +#define HANDLE_HIGH_MASK GENMASK_ULL(63, 32) +#define HANDLE_LOW(x) ((u32)(FIELD_GET(HANDLE_LOW_MASK, (x)))) +#define HANDLE_HIGH(x) ((u32)(FIELD_GET(HANDLE_HIGH_MASK, (x)))) + +#define PACK_HANDLE(l, h) \ + (FIELD_PREP(HANDLE_LOW_MASK, (l)) | FIELD_PREP(HANDLE_HIGH_MASK, (h))) + /* + * A globally-unique ID assigned by the hypervisor for a region + * of memory being sent between VMs. + */ + u64 handle; + /* + * An implementation defined value associated with the receiver and the + * memory region. + */ + u64 tag; + u32 reserved_1; + /* + * The number of `ffa_mem_region_attributes` entries included in this + * transaction. + */ + u32 ep_count; + /* + * An array of endpoint memory access descriptors. + * Each one specifies a memory region offset, an endpoint and the + * attributes with which this memory region should be mapped in that + * endpoint's page table. + */ + struct ffa_mem_region_attributes ep_mem_access[]; +}; + +#define COMPOSITE_OFFSET(x) \ + (offsetof(struct ffa_mem_region, ep_mem_access[x])) +#define CONSTITUENTS_OFFSET(x) \ + (offsetof(struct ffa_composite_mem_region, constituents[x])) +#define COMPOSITE_CONSTITUENTS_OFFSET(x, y) \ + (COMPOSITE_OFFSET(x) + CONSTITUENTS_OFFSET(y)) + +struct ffa_mem_ops_args { + bool use_txbuf; + u32 nattrs; + u32 flags; + u64 tag; + u64 g_handle; + struct scatterlist *sg; + struct ffa_mem_region_attributes *attrs; +}; + +struct ffa_dev_ops { + u32 (*api_version_get)(void); + int (*partition_info_get)(const char *uuid_str, + struct ffa_partition_info *buffer); + void (*mode_32bit_set)(struct ffa_device *dev); + int (*sync_send_receive)(struct ffa_device *dev, + struct ffa_send_direct_data *data); + int (*memory_reclaim)(u64 g_handle, u32 flags); + int (*memory_share)(struct ffa_device *dev, + struct ffa_mem_ops_args *args); + int (*memory_lend)(struct ffa_device *dev, + struct ffa_mem_ops_args *args); +}; + +#endif /* _LINUX_ARM_FFA_H */ diff --git a/include/linux/bio.h b/include/linux/bio.h index 0d4d61743b1adc78c04cb3aadb38cc612c695ef8..2cfea89540f35dc7a8f9f5881b82c6f35815bf1b 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -20,7 +20,12 @@ #define BIO_BUG_ON #endif -#define BIO_MAX_PAGES 256 +#define BIO_MAX_PAGES 256U + +static inline unsigned int bio_max_segs(unsigned int nr_segs) +{ + return min(nr_segs, BIO_MAX_PAGES); +} #define bio_prio(bio) (bio)->bi_ioprio #define bio_set_prio(bio, prio) ((bio)->bi_ioprio = prio) diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 1626eb27be2464d9fd35969313c123e4579ce74c..a2aa73d8437a09c088e6d135c92c664e88a1ffe0 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -394,7 +394,13 @@ enum { BLK_MQ_F_STACKING = 1 << 2, BLK_MQ_F_TAG_HCTX_SHARED = 1 << 3, BLK_MQ_F_BLOCKING = 1 << 5, + /* Do not allow an I/O scheduler to be configured. */ BLK_MQ_F_NO_SCHED = 1 << 6, + /* + * Select 'none' during queue registration in case of a single hwq + * or shared hwqs instead of 'mq-deadline'. + */ + BLK_MQ_F_NO_SCHED_BY_DEFAULT = 1 << 7, BLK_MQ_F_ALLOC_POLICY_START_BIT = 8, BLK_MQ_F_ALLOC_POLICY_BITS = 1, diff --git a/include/linux/blk-pm.h b/include/linux/blk-pm.h index b80c65aba2493aeba29c94e641bb7bd2e96ec6a4..2580e05a8ab67203efde59504ffb655480b531cb 100644 --- a/include/linux/blk-pm.h +++ b/include/linux/blk-pm.h @@ -14,7 +14,7 @@ extern void blk_pm_runtime_init(struct request_queue *q, struct device *dev); extern int blk_pre_runtime_suspend(struct request_queue *q); extern void blk_post_runtime_suspend(struct request_queue *q, int err); extern void blk_pre_runtime_resume(struct request_queue *q); -extern void blk_post_runtime_resume(struct request_queue *q, int err); +extern void blk_post_runtime_resume(struct request_queue *q); extern void blk_set_runtime_active(struct request_queue *q); #else static inline void blk_pm_runtime_init(struct request_queue *q, diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 76e1ca16cbe37092abb8a6002ab67ca5b032ea19..2e1dfba8d1fdd962128e1ee446f65c3b72a8eecd 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -61,6 +61,14 @@ struct blk_keyslot_manager; */ #define BLKCG_MAX_POLS 6 +static inline int blk_validate_block_size(unsigned int bsize) +{ + if (bsize < 512 || bsize > PAGE_SIZE || !is_power_of_2(bsize)) + return -EINVAL; + + return 0; +} + typedef void (rq_end_io_fn)(struct request *, blk_status_t); /* @@ -704,6 +712,18 @@ static inline bool queue_is_mq(struct request_queue *q) return q->mq_ops; } +#ifdef CONFIG_PM +static inline enum rpm_status queue_rpm_status(struct request_queue *q) +{ + return q->rpm_status; +} +#else +static inline enum rpm_status queue_rpm_status(struct request_queue *q) +{ + return RPM_ACTIVE; +} +#endif + static inline enum blk_zoned_model blk_queue_zoned_model(struct request_queue *q) { @@ -1248,8 +1268,6 @@ struct blk_plug { bool multiple_queues; bool nowait; }; -#define BLK_MAX_REQUEST_COUNT 16 -#define BLK_PLUG_FLUSH_SIZE (128 * 1024) struct blk_plug_cb; typedef void (*blk_plug_cb_fn)(struct blk_plug_cb *, bool); diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 6d0fdc560a0e517f6977571d6fed9fad52cfd0db..41d9608269c0bbb826e4e3d1acbea453c8d2a324 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -177,7 +177,7 @@ struct bpf_map { atomic64_t usercnt; struct work_struct work; struct mutex freeze_mutex; - u64 writecnt; /* writable mmap cnt; protected by freeze_mutex */ + atomic64_t writecnt; }; static inline bool map_value_has_spin_lock(const struct bpf_map *map) @@ -534,6 +534,8 @@ struct btf_func_model { * programs only. Should not be used with normal calls and indirect calls. */ #define BPF_TRAMP_F_SKIP_FRAME BIT(2) +/* Return the return value of fentry prog. Only used by bpf_struct_ops. */ +#define BPF_TRAMP_F_RET_FENTRY_RET BIT(4) /* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50 * bytes on x86. Pick a number to fit into BPF_IMAGE_SIZE / 2 @@ -672,6 +674,7 @@ int bpf_trampoline_unlink_prog(struct bpf_prog *prog, struct bpf_trampoline *tr) struct bpf_trampoline *bpf_trampoline_get(u64 key, struct bpf_attach_target_info *tgt_info); void bpf_trampoline_put(struct bpf_trampoline *tr); +int arch_prepare_bpf_dispatcher(void *image, s64 *funcs, int num_funcs); #define BPF_DISPATCHER_INIT(_name) { \ .mutex = __MUTEX_INITIALIZER(_name.mutex), \ .func = &_name##_func, \ @@ -869,8 +872,11 @@ struct bpf_array_aux { * stored in the map to make sure that all callers and callees have * the same prog type and JITed flag. */ - enum bpf_prog_type type; - bool jited; + struct { + spinlock_t lock; + enum bpf_prog_type type; + bool jited; + } owner; /* Programs with direct jumps into programs part of this array. */ struct list_head poke_progs; struct bpf_map *map; @@ -1256,6 +1262,7 @@ void bpf_map_charge_move(struct bpf_map_memory *dst, void *bpf_map_area_alloc(u64 size, int numa_node); void *bpf_map_area_mmapable_alloc(u64 size, int numa_node); void bpf_map_area_free(void *base); +bool bpf_map_write_active(const struct bpf_map *map); void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr); int generic_map_lookup_batch(struct bpf_map *map, const union bpf_attr *attr, @@ -1487,6 +1494,12 @@ struct bpf_prog *bpf_prog_by_id(u32 id); struct bpf_link *bpf_link_by_id(u32 id); const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id); + +static inline bool unprivileged_ebpf_enabled(void) +{ + return !sysctl_unprivileged_bpf_disabled; +} + #else /* !CONFIG_BPF_SYSCALL */ static inline struct bpf_prog *bpf_prog_get(u32 ufd) { @@ -1681,6 +1694,12 @@ bpf_base_func_proto(enum bpf_func_id func_id) { return NULL; } + +static inline bool unprivileged_ebpf_enabled(void) +{ + return false; +} + #endif /* CONFIG_BPF_SYSCALL */ static inline struct bpf_prog *bpf_prog_get_type(u32 ufd, diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index a8137bb6dd3c223e42426c3b6d6442af0d63c762..142c98a3497d933142c8974b14b94d1b9d24498b 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -77,6 +77,9 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_LSM, lsm, void *, void *) #endif /* CONFIG_BPF_LSM */ #endif +#ifdef CONFIG_FUSE_BPF +BPF_PROG_TYPE(BPF_PROG_TYPE_FUSE, fuse, struct fuse_bpf_args, struct fuse_bpf_args) +#endif BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, percpu_array_map_ops) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index f2286b8b0e6630179628989a3ff56a6ec58cfa1b..e20362ae573e0ba13aebe62344cd2689e71854d8 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -368,6 +368,13 @@ static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log) log->level == BPF_LOG_KERNEL); } +static inline bool +bpf_verifier_log_attr_valid(const struct bpf_verifier_log *log) +{ + return log->len_total >= 128 && log->len_total <= UINT_MAX >> 2 && + log->level && log->ubuf && !(log->level & ~BPF_LOG_MASK); +} + #define BPF_MAX_SUBPROGS 256 struct bpf_subprog_info { diff --git a/include/linux/cc_platform.h b/include/linux/cc_platform.h new file mode 100644 index 0000000000000000000000000000000000000000..a075b70b9a70cbade89faaf2813e700920b00043 --- /dev/null +++ b/include/linux/cc_platform.h @@ -0,0 +1,88 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Confidential Computing Platform Capability checks + * + * Copyright (C) 2021 Advanced Micro Devices, Inc. + * + * Author: Tom Lendacky + */ + +#ifndef _LINUX_CC_PLATFORM_H +#define _LINUX_CC_PLATFORM_H + +#include +#include + +/** + * enum cc_attr - Confidential computing attributes + * + * These attributes represent confidential computing features that are + * currently active. + */ +enum cc_attr { + /** + * @CC_ATTR_MEM_ENCRYPT: Memory encryption is active + * + * The platform/OS is running with active memory encryption. This + * includes running either as a bare-metal system or a hypervisor + * and actively using memory encryption or as a guest/virtual machine + * and actively using memory encryption. + * + * Examples include SME, SEV and SEV-ES. + */ + CC_ATTR_MEM_ENCRYPT, + + /** + * @CC_ATTR_HOST_MEM_ENCRYPT: Host memory encryption is active + * + * The platform/OS is running as a bare-metal system or a hypervisor + * and actively using memory encryption. + * + * Examples include SME. + */ + CC_ATTR_HOST_MEM_ENCRYPT, + + /** + * @CC_ATTR_GUEST_MEM_ENCRYPT: Guest memory encryption is active + * + * The platform/OS is running as a guest/virtual machine and actively + * using memory encryption. + * + * Examples include SEV and SEV-ES. + */ + CC_ATTR_GUEST_MEM_ENCRYPT, + + /** + * @CC_ATTR_GUEST_STATE_ENCRYPT: Guest state encryption is active + * + * The platform/OS is running as a guest/virtual machine and actively + * using memory encryption and register state encryption. + * + * Examples include SEV-ES. + */ + CC_ATTR_GUEST_STATE_ENCRYPT, +}; + +#ifdef CONFIG_ARCH_HAS_CC_PLATFORM + +/** + * cc_platform_has() - Checks if the specified cc_attr attribute is active + * @attr: Confidential computing attribute to check + * + * The cc_platform_has() function will return an indicator as to whether the + * specified Confidential Computing attribute is currently active. + * + * Context: Any context + * Return: + * * TRUE - Specified Confidential Computing attribute is active + * * FALSE - Specified Confidential Computing attribute is not active + */ +bool cc_platform_has(enum cc_attr attr); + +#else /* !CONFIG_ARCH_HAS_CC_PLATFORM */ + +static inline bool cc_platform_has(enum cc_attr attr) { return false; } + +#endif /* CONFIG_ARCH_HAS_CC_PLATFORM */ + +#endif /* _LINUX_CC_PLATFORM_H */ diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 67b2190b1daef5e896158d2132cf19c53b1cd729..ceecd7829d35524f68b6f2b676342c54b2a3cfa3 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -435,6 +435,18 @@ static inline void cgroup_put(struct cgroup *cgrp) css_put(&cgrp->self); } +extern struct mutex cgroup_mutex; + +static inline void cgroup_lock(void) +{ + mutex_lock(&cgroup_mutex); +} + +static inline void cgroup_unlock(void) +{ + mutex_unlock(&cgroup_mutex); +} + /** * task_css_set_check - obtain a task's css_set with extra access conditions * @task: the task to obtain css_set for @@ -449,7 +461,6 @@ static inline void cgroup_put(struct cgroup *cgrp) * as locks used during the cgroup_subsys::attach() methods. */ #ifdef CONFIG_PROVE_RCU -extern struct mutex cgroup_mutex; extern spinlock_t css_set_lock; #define task_css_set_check(task, __c) \ rcu_dereference_check((task)->cgroups, \ @@ -709,6 +720,8 @@ struct cgroup; static inline u64 cgroup_id(struct cgroup *cgrp) { return 1; } static inline void css_get(struct cgroup_subsys_state *css) {} static inline void css_put(struct cgroup_subsys_state *css) {} +static inline void cgroup_lock(void) {} +static inline void cgroup_unlock(void) {} static inline int cgroup_attach_task_all(struct task_struct *from, struct task_struct *t) { return 0; } static inline int cgroupstats_build(struct cgroupstats *stats, diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index df8e709819bc08e9add51aeb72d20c1c115d343a..65783d0db2d59c4c44158ce20d0f1d7b395a4c94 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -43,6 +43,8 @@ struct module; * @shift: Cycle to nanosecond divisor (power of two) * @max_idle_ns: Maximum idle time permitted by the clocksource (nsecs) * @maxadj: Maximum adjustment value to mult (~11%) + * @uncertainty_margin: Maximum uncertainty in nanoseconds per half second. + * Zero says to use default WATCHDOG_THRESHOLD. * @archdata: Optional arch-specific data * @max_cycles: Maximum safe cycle value which won't overflow on * multiplication @@ -98,6 +100,7 @@ struct clocksource { u32 shift; u64 max_idle_ns; u32 maxadj; + u32 uncertainty_margin; #ifdef CONFIG_ARCH_CLOCKSOURCE_DATA struct arch_clocksource_data archdata; #endif diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index 8a26cd96674120adf30c49b2a9e25f1c0d509073..49b0ac8b6fd323b422c047043126951bd0c68531 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -3,16 +3,6 @@ #error "Please don't include directly, include instead." #endif -#define CLANG_VERSION (__clang_major__ * 10000 \ - + __clang_minor__ * 100 \ - + __clang_patchlevel__) - -#if CLANG_VERSION < 100001 -#ifndef __BPF_TRACING__ -# error Sorry, your version of Clang is too old - please use 10.0.1 or newer. -#endif -#endif - /* Compiler specific definitions for Clang compiler */ /* same as gcc, this was present in clang-2.6 so we can assume it works diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 4cf524ccab43020d7adb70b3e523a2268d153953..c6ab0dcb9c0c76e55dbf25a9c703da4b45a48334 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -10,17 +10,6 @@ + __GNUC_MINOR__ * 100 \ + __GNUC_PATCHLEVEL__) -/* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58145 */ -#if GCC_VERSION < 40900 -# error Sorry, your version of GCC is too old - please use 4.9 or newer. -#elif defined(CONFIG_ARM64) && GCC_VERSION < 50100 -/* - * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63293 - * https://lore.kernel.org/r/20210107111841.GN1551@shell.armlinux.org.uk - */ -# error Sorry, your version of GCC is too old - please use 5.1 or newer. -#endif - /* * This macro obfuscates arithmetic on a variable address so that gcc * shouldn't recognize the original var, and make assumptions about it. diff --git a/include/linux/console.h b/include/linux/console.h index 4b1e26c4cb4286603b0a381022fef0468bb8728c..bc2a749e6f0d4dabe54139d2a6bbaf3c8d3ac99a 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -150,6 +150,8 @@ struct console { short flags; short index; int cflag; + uint ispeed; + uint ospeed; void *data; struct console *next; }; diff --git a/include/linux/damon.h b/include/linux/damon.h new file mode 100644 index 0000000000000000000000000000000000000000..5e1e3a128b77a9bac6452a7fa40c69e2c2cc456b --- /dev/null +++ b/include/linux/damon.h @@ -0,0 +1,511 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * DAMON api + * + * Author: SeongJae Park + */ + +#ifndef _DAMON_H_ +#define _DAMON_H_ + +#include +#include +#include +#include + +/* Minimal region size. Every damon_region is aligned by this. */ +#define DAMON_MIN_REGION PAGE_SIZE +/* Max priority score for DAMON-based operation schemes */ +#define DAMOS_MAX_SCORE (99) + +/* Get a random number in [l, r) */ +static inline unsigned long damon_rand(unsigned long l, unsigned long r) +{ + return l + prandom_u32_max(r - l); +} + +/** + * struct damon_addr_range - Represents an address region of [@start, @end). + * @start: Start address of the region (inclusive). + * @end: End address of the region (exclusive). + */ +struct damon_addr_range { + unsigned long start; + unsigned long end; +}; + +/** + * struct damon_region - Represents a monitoring target region. + * @ar: The address range of the region. + * @sampling_addr: Address of the sample for the next access check. + * @nr_accesses: Access frequency of this region. + * @list: List head for siblings. + * @age: Age of this region. + * + * @age is initially zero, increased for each aggregation interval, and reset + * to zero again if the access frequency is significantly changed. If two + * regions are merged into a new region, both @nr_accesses and @age of the new + * region are set as region size-weighted average of those of the two regions. + */ +struct damon_region { + struct damon_addr_range ar; + unsigned long sampling_addr; + unsigned int nr_accesses; + struct list_head list; + + unsigned int age; +/* private: Internal value for age calculation. */ + unsigned int last_nr_accesses; +}; + +/** + * struct damon_target - Represents a monitoring target. + * @id: Unique identifier for this target. + * @nr_regions: Number of monitoring target regions of this target. + * @regions_list: Head of the monitoring target regions of this target. + * @list: List head for siblings. + * + * Each monitoring context could have multiple targets. For example, a context + * for virtual memory address spaces could have multiple target processes. The + * @id of each target should be unique among the targets of the context. For + * example, in the virtual address monitoring context, it could be a pidfd or + * an address of an mm_struct. + */ +struct damon_target { + unsigned long id; + unsigned int nr_regions; + struct list_head regions_list; + struct list_head list; +}; + +/** + * enum damos_action - Represents an action of a Data Access Monitoring-based + * Operation Scheme. + * + * @DAMOS_WILLNEED: Call ``madvise()`` for the region with MADV_WILLNEED. + * @DAMOS_COLD: Call ``madvise()`` for the region with MADV_COLD. + * @DAMOS_PAGEOUT: Call ``madvise()`` for the region with MADV_PAGEOUT. + * @DAMOS_HUGEPAGE: Call ``madvise()`` for the region with MADV_HUGEPAGE. + * @DAMOS_NOHUGEPAGE: Call ``madvise()`` for the region with MADV_NOHUGEPAGE. + * @DAMOS_STAT: Do nothing but count the stat. + */ +enum damos_action { + DAMOS_WILLNEED, + DAMOS_COLD, + DAMOS_PAGEOUT, + DAMOS_HUGEPAGE, + DAMOS_NOHUGEPAGE, + DAMOS_STAT, /* Do nothing but only record the stat */ +}; + +/** + * struct damos_quota - Controls the aggressiveness of the given scheme. + * @ms: Maximum milliseconds that the scheme can use. + * @sz: Maximum bytes of memory that the action can be applied. + * @reset_interval: Charge reset interval in milliseconds. + * + * @weight_sz: Weight of the region's size for prioritization. + * @weight_nr_accesses: Weight of the region's nr_accesses for prioritization. + * @weight_age: Weight of the region's age for prioritization. + * + * To avoid consuming too much CPU time or IO resources for applying the + * &struct damos->action to large memory, DAMON allows users to set time and/or + * size quotas. The quotas can be set by writing non-zero values to &ms and + * &sz, respectively. If the time quota is set, DAMON tries to use only up to + * &ms milliseconds within &reset_interval for applying the action. If the + * size quota is set, DAMON tries to apply the action only up to &sz bytes + * within &reset_interval. + * + * Internally, the time quota is transformed to a size quota using estimated + * throughput of the scheme's action. DAMON then compares it against &sz and + * uses smaller one as the effective quota. + * + * For selecting regions within the quota, DAMON prioritizes current scheme's + * target memory regions using the &struct damon_primitive->get_scheme_score. + * You could customize the prioritization logic by setting &weight_sz, + * &weight_nr_accesses, and &weight_age, because monitoring primitives are + * encouraged to respect those. + */ +struct damos_quota { + unsigned long ms; + unsigned long sz; + unsigned long reset_interval; + + unsigned int weight_sz; + unsigned int weight_nr_accesses; + unsigned int weight_age; + +/* private: */ + /* For throughput estimation */ + unsigned long total_charged_sz; + unsigned long total_charged_ns; + + unsigned long esz; /* Effective size quota in bytes */ + + /* For charging the quota */ + unsigned long charged_sz; + unsigned long charged_from; + struct damon_target *charge_target_from; + unsigned long charge_addr_from; + + /* For prioritization */ + unsigned long histogram[DAMOS_MAX_SCORE + 1]; + unsigned int min_score; +}; + +/** + * enum damos_wmark_metric - Represents the watermark metric. + * + * @DAMOS_WMARK_NONE: Ignore the watermarks of the given scheme. + * @DAMOS_WMARK_FREE_MEM_RATE: Free memory rate of the system in [0,1000]. + */ +enum damos_wmark_metric { + DAMOS_WMARK_NONE, + DAMOS_WMARK_FREE_MEM_RATE, +}; + +/** + * struct damos_watermarks - Controls when a given scheme should be activated. + * @metric: Metric for the watermarks. + * @interval: Watermarks check time interval in microseconds. + * @high: High watermark. + * @mid: Middle watermark. + * @low: Low watermark. + * + * If &metric is &DAMOS_WMARK_NONE, the scheme is always active. Being active + * means DAMON does monitoring and applying the action of the scheme to + * appropriate memory regions. Else, DAMON checks &metric of the system for at + * least every &interval microseconds and works as below. + * + * If &metric is higher than &high, the scheme is inactivated. If &metric is + * between &mid and &low, the scheme is activated. If &metric is lower than + * &low, the scheme is inactivated. + */ +struct damos_watermarks { + enum damos_wmark_metric metric; + unsigned long interval; + unsigned long high; + unsigned long mid; + unsigned long low; + +/* private: */ + bool activated; +}; + +/** + * struct damos_stat - Statistics on a given scheme. + * @nr_tried: Total number of regions that the scheme is tried to be applied. + * @sz_tried: Total size of regions that the scheme is tried to be applied. + * @nr_applied: Total number of regions that the scheme is applied. + * @sz_applied: Total size of regions that the scheme is applied. + * @qt_exceeds: Total number of times the quota of the scheme has exceeded. + */ +struct damos_stat { + unsigned long nr_tried; + unsigned long sz_tried; + unsigned long nr_applied; + unsigned long sz_applied; + unsigned long qt_exceeds; +}; + +/** + * struct damos - Represents a Data Access Monitoring-based Operation Scheme. + * @min_sz_region: Minimum size of target regions. + * @max_sz_region: Maximum size of target regions. + * @min_nr_accesses: Minimum ``->nr_accesses`` of target regions. + * @max_nr_accesses: Maximum ``->nr_accesses`` of target regions. + * @min_age_region: Minimum age of target regions. + * @max_age_region: Maximum age of target regions. + * @action: &damo_action to be applied to the target regions. + * @quota: Control the aggressiveness of this scheme. + * @wmarks: Watermarks for automated (in)activation of this scheme. + * @stat: Statistics of this scheme. + * @list: List head for siblings. + * + * For each aggregation interval, DAMON finds regions which fit in the + * condition (&min_sz_region, &max_sz_region, &min_nr_accesses, + * &max_nr_accesses, &min_age_region, &max_age_region) and applies &action to + * those. To avoid consuming too much CPU time or IO resources for the + * &action, "a is used. + * + * To do the work only when needed, schemes can be activated for specific + * system situations using &wmarks. If all schemes that registered to the + * monitoring context are inactive, DAMON stops monitoring either, and just + * repeatedly checks the watermarks. + * + * If all schemes that registered to a &struct damon_ctx are inactive, DAMON + * stops monitoring and just repeatedly checks the watermarks. + * + * After applying the &action to each region, &stat_count and &stat_sz is + * updated to reflect the number of regions and total size of regions that the + * &action is applied. + */ +struct damos { + unsigned long min_sz_region; + unsigned long max_sz_region; + unsigned int min_nr_accesses; + unsigned int max_nr_accesses; + unsigned int min_age_region; + unsigned int max_age_region; + enum damos_action action; + struct damos_quota quota; + struct damos_watermarks wmarks; + struct damos_stat stat; + struct list_head list; +}; + +struct damon_ctx; + +/** + * struct damon_primitive - Monitoring primitives for given use cases. + * + * @init: Initialize primitive-internal data structures. + * @update: Update primitive-internal data structures. + * @prepare_access_checks: Prepare next access check of target regions. + * @check_accesses: Check the accesses to target regions. + * @reset_aggregated: Reset aggregated accesses monitoring results. + * @get_scheme_score: Get the score of a region for a scheme. + * @apply_scheme: Apply a DAMON-based operation scheme. + * @target_valid: Determine if the target is valid. + * @cleanup: Clean up the context. + * + * DAMON can be extended for various address spaces and usages. For this, + * users should register the low level primitives for their target address + * space and usecase via the &damon_ctx.primitive. Then, the monitoring thread + * (&damon_ctx.kdamond) calls @init and @prepare_access_checks before starting + * the monitoring, @update after each &damon_ctx.primitive_update_interval, and + * @check_accesses, @target_valid and @prepare_access_checks after each + * &damon_ctx.sample_interval. Finally, @reset_aggregated is called after each + * &damon_ctx.aggr_interval. + * + * @init should initialize primitive-internal data structures. For example, + * this could be used to construct proper monitoring target regions and link + * those to @damon_ctx.adaptive_targets. + * @update should update the primitive-internal data structures. For example, + * this could be used to update monitoring target regions for current status. + * @prepare_access_checks should manipulate the monitoring regions to be + * prepared for the next access check. + * @check_accesses should check the accesses to each region that made after the + * last preparation and update the number of observed accesses of each region. + * It should also return max number of observed accesses that made as a result + * of its update. The value will be used for regions adjustment threshold. + * @reset_aggregated should reset the access monitoring results that aggregated + * by @check_accesses. + * @get_scheme_score should return the priority score of a region for a scheme + * as an integer in [0, &DAMOS_MAX_SCORE]. + * @apply_scheme is called from @kdamond when a region for user provided + * DAMON-based operation scheme is found. It should apply the scheme's action + * to the region and return bytes of the region that the action is successfully + * applied. + * @target_valid should check whether the target is still valid for the + * monitoring. + * @cleanup is called from @kdamond just before its termination. + */ +struct damon_primitive { + void (*init)(struct damon_ctx *context); + void (*update)(struct damon_ctx *context); + void (*prepare_access_checks)(struct damon_ctx *context); + unsigned int (*check_accesses)(struct damon_ctx *context); + void (*reset_aggregated)(struct damon_ctx *context); + int (*get_scheme_score)(struct damon_ctx *context, + struct damon_target *t, struct damon_region *r, + struct damos *scheme); + unsigned long (*apply_scheme)(struct damon_ctx *context, + struct damon_target *t, struct damon_region *r, + struct damos *scheme); + bool (*target_valid)(void *target); + void (*cleanup)(struct damon_ctx *context); +}; + +/** + * struct damon_callback - Monitoring events notification callbacks. + * + * @before_start: Called before starting the monitoring. + * @after_sampling: Called after each sampling. + * @after_aggregation: Called after each aggregation. + * @before_terminate: Called before terminating the monitoring. + * @private: User private data. + * + * The monitoring thread (&damon_ctx.kdamond) calls @before_start and + * @before_terminate just before starting and finishing the monitoring, + * respectively. Therefore, those are good places for installing and cleaning + * @private. + * + * The monitoring thread calls @after_sampling and @after_aggregation for each + * of the sampling intervals and aggregation intervals, respectively. + * Therefore, users can safely access the monitoring results without additional + * protection. For the reason, users are recommended to use these callback for + * the accesses to the results. + * + * If any callback returns non-zero, monitoring stops. + */ +struct damon_callback { + void *private; + + int (*before_start)(struct damon_ctx *context); + int (*after_sampling)(struct damon_ctx *context); + int (*after_aggregation)(struct damon_ctx *context); + void (*before_terminate)(struct damon_ctx *context); +}; + +/** + * struct damon_ctx - Represents a context for each monitoring. This is the + * main interface that allows users to set the attributes and get the results + * of the monitoring. + * + * @sample_interval: The time between access samplings. + * @aggr_interval: The time between monitor results aggregations. + * @primitive_update_interval: The time between monitoring primitive updates. + * + * For each @sample_interval, DAMON checks whether each region is accessed or + * not. It aggregates and keeps the access information (number of accesses to + * each region) for @aggr_interval time. DAMON also checks whether the target + * memory regions need update (e.g., by ``mmap()`` calls from the application, + * in case of virtual memory monitoring) and applies the changes for each + * @primitive_update_interval. All time intervals are in micro-seconds. + * Please refer to &struct damon_primitive and &struct damon_callback for more + * detail. + * + * @kdamond: Kernel thread who does the monitoring. + * @kdamond_stop: Notifies whether kdamond should stop. + * @kdamond_lock: Mutex for the synchronizations with @kdamond. + * + * For each monitoring context, one kernel thread for the monitoring is + * created. The pointer to the thread is stored in @kdamond. + * + * Once started, the monitoring thread runs until explicitly required to be + * terminated or every monitoring target is invalid. The validity of the + * targets is checked via the &damon_primitive.target_valid of @primitive. The + * termination can also be explicitly requested by writing non-zero to + * @kdamond_stop. The thread sets @kdamond to NULL when it terminates. + * Therefore, users can know whether the monitoring is ongoing or terminated by + * reading @kdamond. Reads and writes to @kdamond and @kdamond_stop from + * outside of the monitoring thread must be protected by @kdamond_lock. + * + * Note that the monitoring thread protects only @kdamond and @kdamond_stop via + * @kdamond_lock. Accesses to other fields must be protected by themselves. + * + * @primitive: Set of monitoring primitives for given use cases. + * @callback: Set of callbacks for monitoring events notifications. + * + * @min_nr_regions: The minimum number of adaptive monitoring regions. + * @max_nr_regions: The maximum number of adaptive monitoring regions. + * @adaptive_targets: Head of monitoring targets (&damon_target) list. + * @schemes: Head of schemes (&damos) list. + */ +struct damon_ctx { + unsigned long sample_interval; + unsigned long aggr_interval; + unsigned long primitive_update_interval; + +/* private: internal use only */ + struct timespec64 last_aggregation; + struct timespec64 last_primitive_update; + +/* public: */ + struct task_struct *kdamond; + struct mutex kdamond_lock; + + struct damon_primitive primitive; + struct damon_callback callback; + + unsigned long min_nr_regions; + unsigned long max_nr_regions; + struct list_head adaptive_targets; + struct list_head schemes; +}; + +static inline struct damon_region *damon_next_region(struct damon_region *r) +{ + return container_of(r->list.next, struct damon_region, list); +} + +static inline struct damon_region *damon_prev_region(struct damon_region *r) +{ + return container_of(r->list.prev, struct damon_region, list); +} + +static inline struct damon_region *damon_last_region(struct damon_target *t) +{ + return list_last_entry(&t->regions_list, struct damon_region, list); +} + +#define damon_for_each_region(r, t) \ + list_for_each_entry(r, &t->regions_list, list) + +#define damon_for_each_region_safe(r, next, t) \ + list_for_each_entry_safe(r, next, &t->regions_list, list) + +#define damon_for_each_target(t, ctx) \ + list_for_each_entry(t, &(ctx)->adaptive_targets, list) + +#define damon_for_each_target_safe(t, next, ctx) \ + list_for_each_entry_safe(t, next, &(ctx)->adaptive_targets, list) + +#define damon_for_each_scheme(s, ctx) \ + list_for_each_entry(s, &(ctx)->schemes, list) + +#define damon_for_each_scheme_safe(s, next, ctx) \ + list_for_each_entry_safe(s, next, &(ctx)->schemes, list) + +#ifdef CONFIG_DAMON + +struct damon_region *damon_new_region(unsigned long start, unsigned long end); + +/* + * Add a region between two other regions + */ +static inline void damon_insert_region(struct damon_region *r, + struct damon_region *prev, struct damon_region *next, + struct damon_target *t) +{ + __list_add(&r->list, &prev->list, &next->list); + t->nr_regions++; +} + +void damon_add_region(struct damon_region *r, struct damon_target *t); +void damon_destroy_region(struct damon_region *r, struct damon_target *t); + +struct damos *damon_new_scheme( + unsigned long min_sz_region, unsigned long max_sz_region, + unsigned int min_nr_accesses, unsigned int max_nr_accesses, + unsigned int min_age_region, unsigned int max_age_region, + enum damos_action action, struct damos_quota *quota, + struct damos_watermarks *wmarks); +void damon_add_scheme(struct damon_ctx *ctx, struct damos *s); +void damon_destroy_scheme(struct damos *s); + +struct damon_target *damon_new_target(unsigned long id); +void damon_add_target(struct damon_ctx *ctx, struct damon_target *t); +bool damon_targets_empty(struct damon_ctx *ctx); +void damon_free_target(struct damon_target *t); +void damon_destroy_target(struct damon_target *t); +unsigned int damon_nr_regions(struct damon_target *t); + +struct damon_ctx *damon_new_ctx(void); +void damon_destroy_ctx(struct damon_ctx *ctx); +int damon_set_targets(struct damon_ctx *ctx, + unsigned long *ids, ssize_t nr_ids); +int damon_set_attrs(struct damon_ctx *ctx, unsigned long sample_int, + unsigned long aggr_int, unsigned long primitive_upd_int, + unsigned long min_nr_reg, unsigned long max_nr_reg); +int damon_set_schemes(struct damon_ctx *ctx, + struct damos **schemes, ssize_t nr_schemes); +int damon_nr_running_ctxs(void); + +int damon_start(struct damon_ctx **ctxs, int nr_ctxs); +int damon_stop(struct damon_ctx **ctxs, int nr_ctxs); + +#endif /* CONFIG_DAMON */ + +#ifdef CONFIG_DAMON_VADDR +bool damon_va_target_valid(void *t); +void damon_va_set_primitives(struct damon_ctx *ctx); +#endif /* CONFIG_DAMON_VADDR */ + +#ifdef CONFIG_DAMON_PADDR +bool damon_pa_target_valid(void *t); +void damon_pa_set_primitives(struct damon_ctx *ctx); +#endif /* CONFIG_DAMON_PADDR */ + +#endif /* _DAMON_H */ diff --git a/include/linux/delay.h b/include/linux/delay.h index 1d0e2ce6b6d9f1ff4fa9461cb2b00d40d92fd7c2..abecbccab6e444abd0281e5c21a1f744a2059cb7 100644 --- a/include/linux/delay.h +++ b/include/linux/delay.h @@ -20,6 +20,7 @@ */ #include +#include extern unsigned long loops_per_jiffy; @@ -58,8 +59,15 @@ void calibrate_delay(void); void __attribute__((weak)) calibration_delay_done(void); void msleep(unsigned int msecs); unsigned long msleep_interruptible(unsigned int msecs); +void usleep_range_state(unsigned long min, unsigned long max, + unsigned int state); void usleep_range(unsigned long min, unsigned long max); +static inline void usleep_idle_range(unsigned long min, unsigned long max) +{ + usleep_range_state(min, max, TASK_IDLE); +} + static inline void ssleep(unsigned int seconds) { msleep(seconds * 1000); diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h index 18aade195884d7b3e3899dc89175e300f58434e0..80f0dbac77c450041596d8181ebad2c743b5189e 100644 --- a/include/linux/dma-direct.h +++ b/include/linux/dma-direct.h @@ -24,6 +24,33 @@ struct bus_dma_region { u64 offset; }; +static inline bool zone_dma32_is_empty(int node) +{ +#ifdef CONFIG_ZONE_DMA32 + pg_data_t *pgdat = NODE_DATA(node); + + return zone_is_empty(&pgdat->node_zones[ZONE_DMA32]); +#else + return true; +#endif +} + +static inline bool zone_dma32_are_empty(void) +{ +#ifdef CONFIG_NUMA + int node; + + for_each_node(node) + if (!zone_dma32_is_empty(node)) + return false; +#else + if (!zone_dma32_is_empty(numa_node_id())) + return false; +#endif + + return true; +} + static inline dma_addr_t translate_phys_to_dma(struct device *dev, phys_addr_t paddr) { diff --git a/include/linux/elfcore.h b/include/linux/elfcore.h index de51c1bef27daa80df2c730e0d5ea0df27ebe048..40e93bfa559d13ff6e748e01f9f94e3525ccba68 100644 --- a/include/linux/elfcore.h +++ b/include/linux/elfcore.h @@ -104,7 +104,7 @@ static inline int elf_core_copy_task_fpregs(struct task_struct *t, struct pt_reg #endif } -#if defined(CONFIG_UM) || defined(CONFIG_IA64) +#if (defined(CONFIG_UML) && defined(CONFIG_X86_32)) || defined(CONFIG_IA64) /* * These functions parameterize elf_core_dump in fs/binfmt_elf.c to write out * extra segments containing the gate DSO contents. Dumping its diff --git a/include/linux/ethtool_netlink.h b/include/linux/ethtool_netlink.h index 1e7bf78cb3829cc4059862a1256746d562492116..aba348d58ff61c5265b76c3e51f83417582e519e 100644 --- a/include/linux/ethtool_netlink.h +++ b/include/linux/ethtool_netlink.h @@ -10,6 +10,9 @@ #define __ETHTOOL_LINK_MODE_MASK_NWORDS \ DIV_ROUND_UP(__ETHTOOL_LINK_MODE_MASK_NBITS, 32) +#define ETHTOOL_PAUSE_STAT_CNT (__ETHTOOL_A_PAUSE_STAT_CNT - \ + ETHTOOL_A_PAUSE_STAT_TX_FRAMES) + enum ethtool_multicast_groups { ETHNL_MCGRP_MONITOR, }; diff --git a/include/linux/export.h b/include/linux/export.h index 8889809924156b4952ac7e1217722d40d0a253d6..3b25f680180fa18277cba0b9111238c7b6dc6add 100644 --- a/include/linux/export.h +++ b/include/linux/export.h @@ -190,8 +190,10 @@ struct kernel_symbol { #define EXPORT_SYMBOL(sym) _EXPORT_SYMBOL(sym, "") #define EXPORT_SYMBOL_GPL(sym) _EXPORT_SYMBOL(sym, "_gpl") #define EXPORT_SYMBOL_GPL_FUTURE(sym) _EXPORT_SYMBOL(sym, "_gpl_future") -#define EXPORT_SYMBOL_NS(sym, ns) __EXPORT_SYMBOL(sym, "", #ns) -#define EXPORT_SYMBOL_NS_GPL(sym, ns) __EXPORT_SYMBOL(sym, "_gpl", #ns) +#define _EXPORT_SYMBOL_NS(sym, ns) __EXPORT_SYMBOL(sym, "", #ns) +#define _EXPORT_SYMBOL_NS_GPL(sym, ns) __EXPORT_SYMBOL(sym, "_gpl", #ns) +#define EXPORT_SYMBOL_NS(sym, ns) _EXPORT_SYMBOL_NS(sym, ns) +#define EXPORT_SYMBOL_NS_GPL(sym, ns) _EXPORT_SYMBOL_NS_GPL(sym, ns) #ifdef CONFIG_UNUSED_SYMBOLS #define EXPORT_UNUSED_SYMBOL(sym) _EXPORT_SYMBOL(sym, "_unused") diff --git a/include/linux/filter.h b/include/linux/filter.h index 822b701c803d1118554ec7e7509dc49089b56479..bc6ce4b202a809820f5446c6b92bd6a4042b2fbb 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -998,6 +998,7 @@ extern int bpf_jit_enable; extern int bpf_jit_harden; extern int bpf_jit_kallsyms; extern long bpf_jit_limit; +extern long bpf_jit_limit_max; typedef void (*bpf_jit_fill_hole_t)(void *area, unsigned int size); diff --git a/include/linux/fs.h b/include/linux/fs.h index b3b3d95eac2fba457b5c51991c2ac1f17bc9a15d..4019e6fa3b95bd18ca5147bdaa5af414227e4cfe 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2263,6 +2263,8 @@ enum file_time_flags { extern bool atime_needs_update(const struct path *, struct inode *); extern void touch_atime(const struct path *); +int inode_update_time(struct inode *inode, struct timespec64 *time, int flags); + static inline void file_accessed(struct file *file) { if (!(file->f_flags & O_NOATIME)) @@ -3285,7 +3287,7 @@ static inline bool vma_is_fsdax(struct vm_area_struct *vma) { struct inode *inode; - if (!vma->vm_file) + if (!IS_ENABLED(CONFIG_FS_DAX) || !vma->vm_file) return false; if (!vma_is_dax(vma)) return false; diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h index 5b44b0195a28a07a0990748aedd591b2fb86e352..e869ce3ae6600fd95caac84c662da9472b9edc9f 100644 --- a/include/linux/fs_context.h +++ b/include/linux/fs_context.h @@ -140,6 +140,8 @@ extern int generic_parse_monolithic(struct fs_context *fc, void *data); extern int vfs_get_tree(struct fs_context *fc); extern void put_fs_context(struct fs_context *fc); extern void fc_drop_locked(struct fs_context *fc); +int reconfigure_single(struct super_block *s, + int flags, void *data); /* * sget() wrappers to be called from the ->get_tree() op. diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 219f28bf97ca2da1de31319835e8f73c04352e80..e05cb512897a2a27392138bb9806d50eb3a27a4e 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -47,27 +47,125 @@ struct fscrypt_name { #define FSCRYPT_SET_CONTEXT_MAX_SIZE 40 #ifdef CONFIG_FS_ENCRYPTION + /* - * fscrypt superblock flags + * If set, the fscrypt bounce page pool won't be allocated (unless another + * filesystem needs it). Set this if the filesystem always uses its own bounce + * pages for writes and therefore won't need the fscrypt bounce page pool. */ #define FS_CFLG_OWN_PAGES (1U << 1) -/* - * crypto operations for filesystems - */ +/* Crypto operations for filesystems */ struct fscrypt_operations { + + /* Set of optional flags; see above for allowed flags */ unsigned int flags; + + /* + * If set, this is a filesystem-specific key description prefix that + * will be accepted for "logon" keys for v1 fscrypt policies, in + * addition to the generic prefix "fscrypt:". This functionality is + * deprecated, so new filesystems shouldn't set this field. + */ const char *key_prefix; + + /* + * Get the fscrypt context of the given inode. + * + * @inode: the inode whose context to get + * @ctx: the buffer into which to get the context + * @len: length of the @ctx buffer in bytes + * + * Return: On success, returns the length of the context in bytes; this + * may be less than @len. On failure, returns -ENODATA if the + * inode doesn't have a context, -ERANGE if the context is + * longer than @len, or another -errno code. + */ int (*get_context)(struct inode *inode, void *ctx, size_t len); + + /* + * Set an fscrypt context on the given inode. + * + * @inode: the inode whose context to set. The inode won't already have + * an fscrypt context. + * @ctx: the context to set + * @len: length of @ctx in bytes (at most FSCRYPT_SET_CONTEXT_MAX_SIZE) + * @fs_data: If called from fscrypt_set_context(), this will be the + * value the filesystem passed to fscrypt_set_context(). + * Otherwise (i.e. when called from + * FS_IOC_SET_ENCRYPTION_POLICY) this will be NULL. + * + * i_rwsem will be held for write. + * + * Return: 0 on success, -errno on failure. + */ int (*set_context)(struct inode *inode, const void *ctx, size_t len, void *fs_data); + + /* + * Get the dummy fscrypt policy in use on the filesystem (if any). + * + * Filesystems only need to implement this function if they support the + * test_dummy_encryption mount option. + * + * Return: A pointer to the dummy fscrypt policy, if the filesystem is + * mounted with test_dummy_encryption; otherwise NULL. + */ const union fscrypt_policy *(*get_dummy_policy)(struct super_block *sb); + + /* + * Check whether a directory is empty. i_rwsem will be held for write. + */ bool (*empty_dir)(struct inode *inode); - unsigned int max_namelen; + + /* + * Check whether the filesystem's inode numbers and UUID are stable, + * meaning that they will never be changed even by offline operations + * such as filesystem shrinking and therefore can be used in the + * encryption without the possibility of files becoming unreadable. + * + * Filesystems only need to implement this function if they want to + * support the FSCRYPT_POLICY_FLAG_IV_INO_LBLK_{32,64} flags. These + * flags are designed to work around the limitations of UFS and eMMC + * inline crypto hardware, and they shouldn't be used in scenarios where + * such hardware isn't being used. + * + * Leaving this NULL is equivalent to always returning false. + */ bool (*has_stable_inodes)(struct super_block *sb); + + /* + * Get the number of bits that the filesystem uses to represent inode + * numbers and file logical block numbers. + * + * By default, both of these are assumed to be 64-bit. This function + * can be implemented to declare that either or both of these numbers is + * shorter, which may allow the use of the + * FSCRYPT_POLICY_FLAG_IV_INO_LBLK_{32,64} flags and/or the use of + * inline crypto hardware whose maximum DUN length is less than 64 bits + * (e.g., eMMC v5.2 spec compliant hardware). This function only needs + * to be implemented if support for one of these features is needed. + */ void (*get_ino_and_lblk_bits)(struct super_block *sb, int *ino_bits_ret, int *lblk_bits_ret); + + /* + * Return the number of block devices to which the filesystem may write + * encrypted file contents. + * + * If the filesystem can use multiple block devices (other than block + * devices that aren't used for encrypted file contents, such as + * external journal devices), and wants to support inline encryption, + * then it must implement this function. Otherwise it's not needed. + */ int (*get_num_devices)(struct super_block *sb); + + /* + * If ->get_num_devices() returns a value greater than 1, then this + * function is called to get the array of request_queues that the + * filesystem is using -- one per block device. (There may be duplicate + * entries in this array, as block devices can share a request_queue.) + */ void (*get_devices)(struct super_block *sb, struct request_queue **devs); diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index f8acddcf54fb4ce2e7b03dda41f52dd936b591e8..37eba54836d1bab45a12a2170ad238dd37206e54 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -87,6 +87,25 @@ static inline int fsnotify_file(struct file *file, __u32 mask) if (file->f_mode & FMODE_NONOTIFY) return 0; + /* + * Open calls notify early on, so lower file system must be notified + */ + if (mask & FS_OPEN) { + if (path->dentry->d_op && + path->dentry->d_op->d_canonical_path) { + struct path lower_path; + int ret; + + path->dentry->d_op->d_canonical_path(path, &lower_path); + ret = fsnotify_parent(lower_path.dentry, mask, + &lower_path, FSNOTIFY_EVENT_PATH); + path_put(&lower_path); + + if (ret) + return ret; + } + } + return fsnotify_parent(path->dentry, mask, path, FSNOTIFY_EVENT_PATH); } @@ -203,6 +222,42 @@ static inline void fsnotify_link(struct inode *dir, struct inode *inode, fsnotify_name(dir, FS_CREATE, inode, &new_dentry->d_name, 0); } +/* + * fsnotify_delete - @dentry was unlinked and unhashed + * + * Caller must make sure that dentry->d_name is stable. + * + * Note: unlike fsnotify_unlink(), we have to pass also the unlinked inode + * as this may be called after d_delete() and old_dentry may be negative. + */ +static inline void fsnotify_delete(struct inode *dir, struct inode *inode, + struct dentry *dentry) +{ + __u32 mask = FS_DELETE; + + if (S_ISDIR(inode->i_mode)) + mask |= FS_ISDIR; + + fsnotify_name(dir, mask, inode, &dentry->d_name, 0); +} + +/** + * d_delete_notify - delete a dentry and call fsnotify_delete() + * @dentry: The dentry to delete + * + * This helper is used to guaranty that the unlinked inode cannot be found + * by lookup of this name after fsnotify_delete() event has been delivered. + */ +static inline void d_delete_notify(struct inode *dir, struct dentry *dentry) +{ + struct inode *inode = d_inode(dentry); + + ihold(inode); + d_delete(dentry); + fsnotify_delete(dir, inode, dentry); + iput(inode); +} + /* * fsnotify_unlink - 'name' was unlinked * @@ -210,10 +265,10 @@ static inline void fsnotify_link(struct inode *dir, struct inode *inode, */ static inline void fsnotify_unlink(struct inode *dir, struct dentry *dentry) { - /* Expected to be called before d_delete() */ - WARN_ON_ONCE(d_is_negative(dentry)); + if (WARN_ON_ONCE(d_is_negative(dentry))) + return; - fsnotify_dirent(dir, dentry, FS_DELETE); + fsnotify_delete(dir, d_inode(dentry), dentry); } /* @@ -233,10 +288,10 @@ static inline void fsnotify_mkdir(struct inode *inode, struct dentry *dentry) */ static inline void fsnotify_rmdir(struct inode *dir, struct dentry *dentry) { - /* Expected to be called before d_delete() */ - WARN_ON_ONCE(d_is_negative(dentry)); + if (WARN_ON_ONCE(d_is_negative(dentry))) + return; - fsnotify_dirent(dir, dentry, FS_DELETE | FS_ISDIR); + fsnotify_delete(dir, d_inode(dentry), dentry); } /* diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h index 8392b8ce70dd1a82345f43bb4eafc29cad2013eb..57c1c7918702c101e4eb8b5b2f24369d3ce67b15 100644 --- a/include/linux/fwnode.h +++ b/include/linux/fwnode.h @@ -11,6 +11,7 @@ #include #include +#include #include #include diff --git a/include/linux/gfp.h b/include/linux/gfp.h index fbed5dd273db5c1e116eb81722e0a8705723e49e..ae480a3d5307c8d6f9dec1bcffdbc68da515b687 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -40,17 +40,25 @@ struct vm_area_struct; #define ___GFP_THISNODE 0x200000u #define ___GFP_ACCOUNT 0x400000u #define ___GFP_ZEROTAGS 0x800000u -#define ___GFP_SKIP_KASAN_POISON 0x1000000u +#ifdef CONFIG_KASAN_HW_TAGS +#define ___GFP_SKIP_ZERO 0x1000000u +#define ___GFP_SKIP_KASAN_UNPOISON 0x2000000u +#define ___GFP_SKIP_KASAN_POISON 0x4000000u +#else +#define ___GFP_SKIP_ZERO 0 +#define ___GFP_SKIP_KASAN_UNPOISON 0 +#define ___GFP_SKIP_KASAN_POISON 0 +#endif #ifdef CONFIG_CMA -#define ___GFP_CMA 0x2000000u +#define ___GFP_CMA 0x8000000u #else #define ___GFP_CMA 0 #endif #ifdef CONFIG_LOCKDEP #ifdef CONFIG_CMA -#define ___GFP_NOLOCKDEP 0x4000000u +#define ___GFP_NOLOCKDEP 0x10000000u #else -#define ___GFP_NOLOCKDEP 0x2000000u +#define ___GFP_NOLOCKDEP 0x8000000u #endif #else #define ___GFP_NOLOCKDEP 0 @@ -229,27 +237,34 @@ struct vm_area_struct; * * %__GFP_ZERO returns a zeroed page on success. * - * %__GFP_ZEROTAGS returns a page with zeroed memory tags on success, if - * __GFP_ZERO is set. + * %__GFP_ZEROTAGS zeroes memory tags at allocation time if the memory itself + * is being zeroed (either via __GFP_ZERO or via init_on_alloc, provided that + * __GFP_SKIP_ZERO is not set). This flag is intended for optimization: setting + * memory tags at the same time as zeroing memory has minimal additional + * performace impact. + * + * %__GFP_SKIP_KASAN_UNPOISON makes KASAN skip unpoisoning on page allocation. + * Only effective in HW_TAGS mode. * - * %__GFP_SKIP_KASAN_POISON returns a page which does not need to be poisoned - * on deallocation. Typically used for userspace pages. Currently only has an - * effect in HW tags mode. + * %__GFP_SKIP_KASAN_POISON makes KASAN skip poisoning on page deallocation. + * Typically, used for userspace pages. Only effective in HW_TAGS mode. */ #define __GFP_NOWARN ((__force gfp_t)___GFP_NOWARN) #define __GFP_COMP ((__force gfp_t)___GFP_COMP) #define __GFP_ZERO ((__force gfp_t)___GFP_ZERO) #define __GFP_ZEROTAGS ((__force gfp_t)___GFP_ZEROTAGS) -#define __GFP_SKIP_KASAN_POISON ((__force gfp_t)___GFP_SKIP_KASAN_POISON) +#define __GFP_SKIP_ZERO ((__force gfp_t)___GFP_SKIP_ZERO) +#define __GFP_SKIP_KASAN_UNPOISON ((__force gfp_t)___GFP_SKIP_KASAN_UNPOISON) +#define __GFP_SKIP_KASAN_POISON ((__force gfp_t)___GFP_SKIP_KASAN_POISON) /* Disable lockdep for GFP context tracking */ #define __GFP_NOLOCKDEP ((__force gfp_t)___GFP_NOLOCKDEP) /* Room for N __GFP_FOO bits */ #ifdef CONFIG_CMA -#define __GFP_BITS_SHIFT (26 + IS_ENABLED(CONFIG_LOCKDEP)) +#define __GFP_BITS_SHIFT (28 + IS_ENABLED(CONFIG_LOCKDEP)) #else -#define __GFP_BITS_SHIFT (25 + IS_ENABLED(CONFIG_LOCKDEP)) +#define __GFP_BITS_SHIFT (27 + IS_ENABLED(CONFIG_LOCKDEP)) #endif #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) diff --git a/include/linux/hid.h b/include/linux/hid.h index f433bab58da8a9af779add310ebc478cfa7606da..a47003f79b34e392f83471a595b9a48e0dbe60c6 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -346,6 +346,8 @@ struct hid_item { /* BIT(9) reserved for backward compatibility, was NO_INIT_INPUT_REPORTS */ #define HID_QUIRK_ALWAYS_POLL BIT(10) #define HID_QUIRK_INPUT_PER_APP BIT(11) +#define HID_QUIRK_X_INVERT BIT(12) +#define HID_QUIRK_Y_INVERT BIT(13) #define HID_QUIRK_SKIP_OUTPUT_REPORTS BIT(16) #define HID_QUIRK_SKIP_OUTPUT_REPORT_ID BIT(17) #define HID_QUIRK_NO_OUTPUT_REPORTS_ON_INTR_EP BIT(18) @@ -837,6 +839,11 @@ static inline bool hid_is_using_ll_driver(struct hid_device *hdev, return hdev->ll_driver == driver; } +static inline bool hid_is_usb(struct hid_device *hdev) +{ + return hid_is_using_ll_driver(hdev, &usb_hid_driver); +} + #define PM_HINT_FULLON 1<<5 #define PM_HINT_NORMAL 1<<1 diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 386ddf43e2f2b33d831cf7e6e64fcb88730ec793..6ff7d1731037ff2fe081753a3093a881350014a3 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -61,6 +61,9 @@ * interrupt handler after suspending interrupts. For system * wakeup devices users need to implement wakeup detection in * their interrupt handlers. + * IRQF_NO_AUTOEN - Don't enable IRQ or NMI automatically when users request it. + * Users will enable it explicitly by enable_irq() or enable_nmi() + * later. */ #define IRQF_SHARED 0x00000080 #define IRQF_PROBE_SHARED 0x00000100 @@ -74,6 +77,7 @@ #define IRQF_NO_THREAD 0x00010000 #define IRQF_EARLY_RESUME 0x00020000 #define IRQF_COND_SUSPEND 0x00040000 +#define IRQF_NO_AUTOEN 0x00080000 #define IRQF_TIMER (__IRQF_TIMER | IRQF_NO_SUSPEND | IRQF_NO_THREAD) diff --git a/include/linux/io.h b/include/linux/io.h index 8394c56babc26fa1a4d99cd75126cf6ca2a26b42..416b936b19bcbb5826bacaa37173732e1f60f4ee 100644 --- a/include/linux/io.h +++ b/include/linux/io.h @@ -21,6 +21,8 @@ void __ioread32_copy(void *to, const void __iomem *from, size_t count); void __iowrite64_copy(void __iomem *to, const void *from, size_t count); #ifdef CONFIG_MMU +void ioremap_phys_range_hook(phys_addr_t phys_addr, size_t size, pgprot_t prot); +void iounmap_phys_range_hook(phys_addr_t phys_addr, size_t size); int ioremap_page_range(unsigned long addr, unsigned long end, phys_addr_t phys_addr, pgprot_t prot); #else diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 257ffd4d921fb0e5d882d80bc7737683d4d8819a..baf831d05db1a10c3c38ed83bb8419723a43541f 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -99,6 +99,24 @@ iomap_sector(struct iomap *iomap, loff_t pos) return (iomap->addr + pos - iomap->offset) >> SECTOR_SHIFT; } +/* + * Returns the inline data pointer for logical offset @pos. + */ +static inline void *iomap_inline_data(struct iomap *iomap, loff_t pos) +{ + return iomap->inline_data + pos - iomap->offset; +} + +/* + * Check if the mapping's length is within the valid range for inline data. + * This is used to guard against accessing data beyond the page inline_data + * points at. + */ +static inline bool iomap_inline_data_valid(struct iomap *iomap) +{ + return iomap->length <= PAGE_SIZE - offset_in_page(iomap->inline_data); +} + /* * When a filesystem sets page_ops in an iomap mapping it returns, page_prepare * and page_done will be called for each page written to. This only applies to diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index a06a78c67f19f8246b9769a87d2a3cc1ede2ff28..08325105131a227e3bbe805e05c145abf3956433 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -132,6 +132,16 @@ static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns) return ns; } +static inline struct ipc_namespace *get_ipc_ns_not_zero(struct ipc_namespace *ns) +{ + if (ns) { + if (refcount_inc_not_zero(&ns->count)) + return ns; + } + + return NULL; +} + extern void put_ipc_ns(struct ipc_namespace *ns); #else static inline struct ipc_namespace *copy_ipcs(unsigned long flags, @@ -148,6 +158,11 @@ static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns) return ns; } +static inline struct ipc_namespace *get_ipc_ns_not_zero(struct ipc_namespace *ns) +{ + return ns; +} + static inline void put_ipc_ns(struct ipc_namespace *ns) { } diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 43348c487c8e453d18d35d5faaf7e0d1d77b8d74..ef67c56081a4a88190af4b3b73343a63878d9bbc 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -575,67 +575,11 @@ #define ICC_SRE_EL1_DFB (1U << 1) #define ICC_SRE_EL1_SRE (1U << 0) -/* - * Hypervisor interface registers (SRE only) - */ -#define ICH_LR_VIRTUAL_ID_MASK ((1ULL << 32) - 1) - -#define ICH_LR_EOI (1ULL << 41) -#define ICH_LR_GROUP (1ULL << 60) -#define ICH_LR_HW (1ULL << 61) -#define ICH_LR_STATE (3ULL << 62) -#define ICH_LR_PENDING_BIT (1ULL << 62) -#define ICH_LR_ACTIVE_BIT (1ULL << 63) -#define ICH_LR_PHYS_ID_SHIFT 32 -#define ICH_LR_PHYS_ID_MASK (0x3ffULL << ICH_LR_PHYS_ID_SHIFT) -#define ICH_LR_PRIORITY_SHIFT 48 -#define ICH_LR_PRIORITY_MASK (0xffULL << ICH_LR_PRIORITY_SHIFT) - /* These are for GICv2 emulation only */ #define GICH_LR_VIRTUALID (0x3ffUL << 0) #define GICH_LR_PHYSID_CPUID_SHIFT (10) #define GICH_LR_PHYSID_CPUID (7UL << GICH_LR_PHYSID_CPUID_SHIFT) -#define ICH_MISR_EOI (1 << 0) -#define ICH_MISR_U (1 << 1) - -#define ICH_HCR_EN (1 << 0) -#define ICH_HCR_UIE (1 << 1) -#define ICH_HCR_NPIE (1 << 3) -#define ICH_HCR_TC (1 << 10) -#define ICH_HCR_TALL0 (1 << 11) -#define ICH_HCR_TALL1 (1 << 12) -#define ICH_HCR_EOIcount_SHIFT 27 -#define ICH_HCR_EOIcount_MASK (0x1f << ICH_HCR_EOIcount_SHIFT) - -#define ICH_VMCR_ACK_CTL_SHIFT 2 -#define ICH_VMCR_ACK_CTL_MASK (1 << ICH_VMCR_ACK_CTL_SHIFT) -#define ICH_VMCR_FIQ_EN_SHIFT 3 -#define ICH_VMCR_FIQ_EN_MASK (1 << ICH_VMCR_FIQ_EN_SHIFT) -#define ICH_VMCR_CBPR_SHIFT 4 -#define ICH_VMCR_CBPR_MASK (1 << ICH_VMCR_CBPR_SHIFT) -#define ICH_VMCR_EOIM_SHIFT 9 -#define ICH_VMCR_EOIM_MASK (1 << ICH_VMCR_EOIM_SHIFT) -#define ICH_VMCR_BPR1_SHIFT 18 -#define ICH_VMCR_BPR1_MASK (7 << ICH_VMCR_BPR1_SHIFT) -#define ICH_VMCR_BPR0_SHIFT 21 -#define ICH_VMCR_BPR0_MASK (7 << ICH_VMCR_BPR0_SHIFT) -#define ICH_VMCR_PMR_SHIFT 24 -#define ICH_VMCR_PMR_MASK (0xffUL << ICH_VMCR_PMR_SHIFT) -#define ICH_VMCR_ENG0_SHIFT 0 -#define ICH_VMCR_ENG0_MASK (1 << ICH_VMCR_ENG0_SHIFT) -#define ICH_VMCR_ENG1_SHIFT 1 -#define ICH_VMCR_ENG1_MASK (1 << ICH_VMCR_ENG1_SHIFT) - -#define ICH_VTR_PRI_BITS_SHIFT 29 -#define ICH_VTR_PRI_BITS_MASK (7 << ICH_VTR_PRI_BITS_SHIFT) -#define ICH_VTR_ID_BITS_SHIFT 23 -#define ICH_VTR_ID_BITS_MASK (7 << ICH_VTR_ID_BITS_SHIFT) -#define ICH_VTR_SEIS_SHIFT 22 -#define ICH_VTR_SEIS_MASK (1 << ICH_VTR_SEIS_SHIFT) -#define ICH_VTR_A3V_SHIFT 21 -#define ICH_VTR_A3V_MASK (1 << ICH_VTR_A3V_SHIFT) - #define ICC_IAR1_EL1_SPURIOUS 0x3ff #define ICC_SRE_EL2_SRE (1 << 0) diff --git a/include/linux/kasan-tags.h b/include/linux/kasan-tags.h new file mode 100644 index 0000000000000000000000000000000000000000..4f85f562512cb5f496a5949f3c1842fdb0b76a1c --- /dev/null +++ b/include/linux/kasan-tags.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_KASAN_TAGS_H +#define _LINUX_KASAN_TAGS_H + +#define KASAN_TAG_KERNEL 0xFF /* native kernel pointers tag */ +#define KASAN_TAG_INVALID 0xFE /* inaccessible memory tag */ +#define KASAN_TAG_MAX 0xFD /* maximum value for random tags */ + +#ifdef CONFIG_KASAN_HW_TAGS +#define KASAN_TAG_MIN 0xF0 /* minimum value for random tags */ +#else +#define KASAN_TAG_MIN 0x00 /* minimum value for random tags */ +#endif + +#endif /* LINUX_KASAN_TAGS_H */ diff --git a/include/linux/kasan.h b/include/linux/kasan.h index a1c7ce5f3e4f2364eeb79b231a75055a7e3a7aa1..90c54edfe5565677bf020de5cf65643127cdf733 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -3,6 +3,7 @@ #define _LINUX_KASAN_H #include +#include #include #include @@ -16,14 +17,15 @@ struct task_struct; #include #include -/* kasan_data struct is used in KUnit tests for KASAN expected failures */ -struct kunit_kasan_expectation { - bool report_expected; - bool report_found; -}; - #endif +typedef unsigned int __bitwise kasan_vmalloc_flags_t; + +#define KASAN_VMALLOC_NONE ((__force kasan_vmalloc_flags_t)0x00u) +#define KASAN_VMALLOC_INIT ((__force kasan_vmalloc_flags_t)0x01u) +#define KASAN_VMALLOC_VM_ALLOC ((__force kasan_vmalloc_flags_t)0x02u) +#define KASAN_VMALLOC_PROT_NORMAL ((__force kasan_vmalloc_flags_t)0x04u) + #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) #include @@ -89,14 +91,11 @@ static __always_inline bool kasan_enabled(void) return static_branch_likely(&kasan_flag_enabled); } -static inline bool kasan_has_integrated_init(void) +static inline bool kasan_hw_tags_enabled(void) { return kasan_enabled(); } -void kasan_alloc_pages(struct page *page, unsigned int order, gfp_t flags); -void kasan_free_pages(struct page *page, unsigned int order); - #else /* CONFIG_KASAN_HW_TAGS */ static inline bool kasan_enabled(void) @@ -104,27 +103,18 @@ static inline bool kasan_enabled(void) return IS_ENABLED(CONFIG_KASAN); } -static inline bool kasan_has_integrated_init(void) +static inline bool kasan_hw_tags_enabled(void) { return false; } -static __always_inline void kasan_alloc_pages(struct page *page, - unsigned int order, gfp_t flags) -{ - /* Only available for integrated init. */ - BUILD_BUG(); -} +#endif /* CONFIG_KASAN_HW_TAGS */ -static __always_inline void kasan_free_pages(struct page *page, - unsigned int order) +static inline bool kasan_has_integrated_init(void) { - /* Only available for integrated init. */ - BUILD_BUG(); + return kasan_hw_tags_enabled(); } -#endif /* CONFIG_KASAN_HW_TAGS */ - #ifdef CONFIG_KASAN struct kasan_cache { @@ -370,12 +360,14 @@ static inline void kasan_unpoison_task_stack(struct task_struct *task) {} void kasan_cache_shrink(struct kmem_cache *cache); void kasan_cache_shutdown(struct kmem_cache *cache); void kasan_record_aux_stack(void *ptr); +void kasan_record_aux_stack_noalloc(void *ptr); #else /* CONFIG_KASAN_GENERIC */ static inline void kasan_cache_shrink(struct kmem_cache *cache) {} static inline void kasan_cache_shutdown(struct kmem_cache *cache) {} static inline void kasan_record_aux_stack(void *ptr) {} +static inline void kasan_record_aux_stack_noalloc(void *ptr) {} #endif /* CONFIG_KASAN_GENERIC */ @@ -427,29 +419,66 @@ static inline void kasan_init_hw_tags(void) { } #ifdef CONFIG_KASAN_VMALLOC +#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) + int kasan_populate_vmalloc(unsigned long addr, unsigned long size); -void kasan_poison_vmalloc(const void *start, unsigned long size); -void kasan_unpoison_vmalloc(const void *start, unsigned long size); void kasan_release_vmalloc(unsigned long start, unsigned long end, unsigned long free_region_start, unsigned long free_region_end); -#else /* CONFIG_KASAN_VMALLOC */ +#else /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS */ static inline int kasan_populate_vmalloc(unsigned long start, unsigned long size) { return 0; } +static inline void kasan_release_vmalloc(unsigned long start, + unsigned long end, + unsigned long free_region_start, + unsigned long free_region_end) { } -static inline void kasan_poison_vmalloc(const void *start, unsigned long size) -{ } -static inline void kasan_unpoison_vmalloc(const void *start, unsigned long size) -{ } +#endif /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS */ + +void *__kasan_unpoison_vmalloc(const void *start, unsigned long size, + kasan_vmalloc_flags_t flags); +static __always_inline void *kasan_unpoison_vmalloc(const void *start, + unsigned long size, + kasan_vmalloc_flags_t flags) +{ + if (kasan_enabled()) + return __kasan_unpoison_vmalloc(start, size, flags); + return (void *)start; +} + +void __kasan_poison_vmalloc(const void *start, unsigned long size); +static __always_inline void kasan_poison_vmalloc(const void *start, + unsigned long size) +{ + if (kasan_enabled()) + __kasan_poison_vmalloc(start, size); +} + +#else /* CONFIG_KASAN_VMALLOC */ + +static inline int kasan_populate_vmalloc(unsigned long start, + unsigned long size) +{ + return 0; +} static inline void kasan_release_vmalloc(unsigned long start, unsigned long end, unsigned long free_region_start, - unsigned long free_region_end) {} + unsigned long free_region_end) { } + +static inline void *kasan_unpoison_vmalloc(const void *start, + unsigned long size, + kasan_vmalloc_flags_t flags) +{ + return (void *)start; +} +static inline void kasan_poison_vmalloc(const void *start, unsigned long size) +{ } #endif /* CONFIG_KASAN_VMALLOC */ @@ -457,17 +486,17 @@ static inline void kasan_release_vmalloc(unsigned long start, !defined(CONFIG_KASAN_VMALLOC) /* - * These functions provide a special case to support backing module - * allocations with real shadow memory. With KASAN vmalloc, the special - * case is unnecessary, as the work is handled in the generic case. + * These functions allocate and free shadow memory for kernel modules. + * They are only required when KASAN_VMALLOC is not supported, as otherwise + * shadow memory is allocated by the generic vmalloc handlers. */ -int kasan_module_alloc(void *addr, size_t size); -void kasan_free_shadow(const struct vm_struct *vm); +int kasan_alloc_module_shadow(void *addr, size_t size, gfp_t gfp_mask); +void kasan_free_module_shadow(const struct vm_struct *vm); #else /* (CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS) && !CONFIG_KASAN_VMALLOC */ -static inline int kasan_module_alloc(void *addr, size_t size) { return 0; } -static inline void kasan_free_shadow(const struct vm_struct *vm) {} +static inline int kasan_alloc_module_shadow(void *addr, size_t size, gfp_t gfp_mask) { return 0; } +static inline void kasan_free_module_shadow(const struct vm_struct *vm) {} #endif /* (CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS) && !CONFIG_KASAN_VMALLOC */ diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h index 89f0745c096d4b090cec7e21df46e9d435ad5a78..8fff3500d50eea76e543e44b8ffda3f4ae704004 100644 --- a/include/linux/kernel_stat.h +++ b/include/linux/kernel_stat.h @@ -103,6 +103,7 @@ extern void account_system_index_time(struct task_struct *, u64, enum cpu_usage_stat); extern void account_steal_time(u64); extern void account_idle_time(u64); +extern u64 get_idle_time(struct kernel_cpustat *kcs, int cpu); #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE static inline void account_process_tick(struct task_struct *tsk, int user) diff --git a/include/linux/kfence.h b/include/linux/kfence.h index a70d1ea0353252db33a75a800d6fd4c81e637602..3fe6dd8a18c19607ad1116f3977929d7c86d856e 100644 --- a/include/linux/kfence.h +++ b/include/linux/kfence.h @@ -51,10 +51,11 @@ extern atomic_t kfence_allocation_gate; static __always_inline bool is_kfence_address(const void *addr) { /* - * The non-NULL check is required in case the __kfence_pool pointer was - * never initialized; keep it in the slow-path after the range-check. + * The __kfence_pool != NULL check is required to deal with the case + * where __kfence_pool == NULL && addr < KFENCE_POOL_SIZE. Keep it in + * the slow-path after the range-check! */ - return unlikely((unsigned long)((char *)addr - __kfence_pool) < KFENCE_POOL_SIZE && addr); + return unlikely((unsigned long)((char *)addr - __kfence_pool) < KFENCE_POOL_SIZE && __kfence_pool); } /** diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 21f21f7f878ce9f3bfaf7b130edc546cba4e0467..4dbebd319b6f5d3117449ba5e3087b19b7cc22a1 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -155,6 +155,8 @@ struct kretprobe { raw_spinlock_t lock; }; +#define KRETPROBE_MAX_DATA_SIZE 4096 + struct kretprobe_instance { union { struct hlist_node hlist; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 594fa8c9988e5b242908a4387ca5ce3025c6a6e4..8350aad4577758a1b2f52a13027dccc7a2e49d4c 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1024,7 +1024,6 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu); bool kvm_is_reserved_pfn(kvm_pfn_t pfn); bool kvm_is_zone_device_pfn(kvm_pfn_t pfn); -bool kvm_is_transparent_hugepage(kvm_pfn_t pfn); struct kvm_irq_ack_notifier { struct hlist_node link; diff --git a/include/linux/libata.h b/include/linux/libata.h index 5f550eb27f811ad7b73c72d9a84daae3c730c900..d3600fc7f7c4de89e5b7265142a4957d51785bbd 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -390,7 +390,7 @@ enum { /* This should match the actual table size of * ata_eh_cmd_timeout_table in libata-eh.c. */ - ATA_EH_CMD_TIMEOUT_TABLE_SIZE = 6, + ATA_EH_CMD_TIMEOUT_TABLE_SIZE = 7, /* Horkage types. May be set by libata or controller on drives (some horkage may be drive/controller pair dependent */ @@ -422,6 +422,7 @@ enum { ATA_HORKAGE_NOTRIM = (1 << 24), /* don't use TRIM */ ATA_HORKAGE_MAX_SEC_1024 = (1 << 25), /* Limit max sects to 1024 */ ATA_HORKAGE_MAX_TRIM_128M = (1 << 26), /* Limit max trim size to 128M */ + ATA_HORKAGE_NO_NCQ_ON_ATI = (1 << 27), /* Disable NCQ on ATI chipset */ /* DMA mask for user DMA control: User visible values; DO NOT renumber */ diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 435a2e22ff95a28e826bb6109729c92c008561f9..37dc5594081d1f93ebd26084a31918acd39eb0db 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -26,13 +26,13 @@ * #undef LSM_HOOK * }; */ -LSM_HOOK(int, 0, binder_set_context_mgr, struct task_struct *mgr) -LSM_HOOK(int, 0, binder_transaction, struct task_struct *from, - struct task_struct *to) -LSM_HOOK(int, 0, binder_transfer_binder, struct task_struct *from, - struct task_struct *to) -LSM_HOOK(int, 0, binder_transfer_file, struct task_struct *from, - struct task_struct *to, struct file *file) +LSM_HOOK(int, 0, binder_set_context_mgr, const struct cred *mgr) +LSM_HOOK(int, 0, binder_transaction, const struct cred *from, + const struct cred *to) +LSM_HOOK(int, 0, binder_transfer_binder, const struct cred *from, + const struct cred *to) +LSM_HOOK(int, 0, binder_transfer_file, const struct cred *from, + const struct cred *to, struct file *file) LSM_HOOK(int, 0, ptrace_access_check, struct task_struct *child, unsigned int mode) LSM_HOOK(int, 0, ptrace_traceme, struct task_struct *parent) diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 3af055b7ee1f07b387c976c52e383d358c70cf75..0ad8aa6579145dfc44504e71dd9d93b2734242c3 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1297,22 +1297,22 @@ * * @binder_set_context_mgr: * Check whether @mgr is allowed to be the binder context manager. - * @mgr contains the task_struct for the task being registered. + * @mgr contains the struct cred for the current binder process. * Return 0 if permission is granted. * @binder_transaction: * Check whether @from is allowed to invoke a binder transaction call * to @to. - * @from contains the task_struct for the sending task. - * @to contains the task_struct for the receiving task. + * @from contains the struct cred for the sending process. + * @to contains the struct cred for the receiving process. * @binder_transfer_binder: * Check whether @from is allowed to transfer a binder reference to @to. - * @from contains the task_struct for the sending task. - * @to contains the task_struct for the receiving task. + * @from contains the struct cred for the sending process. + * @to contains the struct cred for the receiving process. * @binder_transfer_file: * Check whether @from is allowed to transfer @file to @to. - * @from contains the task_struct for the sending task. + * @from contains the struct cred for the sending process. * @file contains the struct file being transferred. - * @to contains the task_struct for the receiving task. + * @to contains the struct cred for the receiving process. * * @ptrace_access_check: * Check permission before allowing the current process to trace the diff --git a/include/linux/mdio.h b/include/linux/mdio.h index dbd69b3d170b4c29944f69efd6a22edc6ea5daad..de5fb4b333ce3df46872ad0023fd71e3ca45ad03 100644 --- a/include/linux/mdio.h +++ b/include/linux/mdio.h @@ -72,6 +72,9 @@ struct mdio_driver { /* Clears up any memory if needed */ void (*remove)(struct mdio_device *mdiodev); + + /* Quiesces the device on system shutdown, turns off interrupts etc */ + void (*shutdown)(struct mdio_device *mdiodev); }; #define to_mdio_driver(d) \ container_of(to_mdio_common_driver(d), struct mdio_driver, mdiodrv) diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 2f288bcc5b790b45f730865a08d86d58be103732..3306d99dd5274a4a69fcc4488494e9ba094ecc96 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -371,7 +371,7 @@ static inline int memblock_get_region_node(const struct memblock_region *r) /* Flags for memblock allocation APIs */ #define MEMBLOCK_ALLOC_ANYWHERE (~(phys_addr_t)0) #define MEMBLOCK_ALLOC_ACCESSIBLE 0 -#define MEMBLOCK_ALLOC_KASAN 1 +#define MEMBLOCK_ALLOC_NOLEAKTRACE 1 /* We are using top down, so it is safe to use 0 here */ #define MEMBLOCK_LOW_LIMIT 0 @@ -387,8 +387,8 @@ phys_addr_t memblock_alloc_range_nid(phys_addr_t size, phys_addr_t end, int nid, bool exact_nid); phys_addr_t memblock_phys_alloc_try_nid(phys_addr_t size, phys_addr_t align, int nid); -static inline phys_addr_t memblock_phys_alloc(phys_addr_t size, - phys_addr_t align) +static __always_inline phys_addr_t memblock_phys_alloc(phys_addr_t size, + phys_addr_t align) { return memblock_phys_alloc_range(size, align, 0, MEMBLOCK_ALLOC_ACCESSIBLE); diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 860c902fcb3011d3543b4154780c5df7d05d39a9..531cb39a00f75531e3284ad42806453dfe085672 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -197,7 +197,7 @@ struct obj_cgroup { struct mem_cgroup *memcg; atomic_t nr_charged_bytes; union { - struct list_head list; + struct list_head list; /* protected by objcg_lock */ struct rcu_head rcu; }; }; @@ -300,7 +300,8 @@ struct mem_cgroup { int kmemcg_id; enum memcg_kmem_state kmem_state; struct obj_cgroup __rcu *objcg; - struct list_head objcg_list; /* list of inherited objcgs */ + /* list of inherited objcgs, protected by objcg_lock */ + struct list_head objcg_list; #endif MEMCG_PADDING(_pad2_); @@ -331,6 +332,11 @@ struct mem_cgroup { struct deferred_split deferred_split_queue; #endif +#ifdef CONFIG_LRU_GEN + /* per-memcg mm_struct list */ + struct lru_gen_mm_list mm_list; +#endif + ANDROID_OEM_DATA(1); struct mem_cgroup_per_node *nodeinfo[0]; /* WARNING: nodeinfo must be the last member here */ @@ -734,6 +740,23 @@ static inline unsigned long memcg_page_state_local(struct mem_cgroup *memcg, void __mod_memcg_state(struct mem_cgroup *memcg, int idx, int val); +/* try to stablize page_memcg() for all the pages in a memcg */ +static inline bool mem_cgroup_trylock_pages(struct mem_cgroup *memcg) +{ + rcu_read_lock(); + + if (mem_cgroup_disabled() || !atomic_read(&memcg->moving_account)) + return true; + + rcu_read_unlock(); + return false; +} + +static inline void mem_cgroup_unlock_pages(void) +{ + rcu_read_unlock(); +} + /* idx can be of type enum memcg_stat_item or node_stat_item */ static inline void mod_memcg_state(struct mem_cgroup *memcg, int idx, int val) @@ -1152,6 +1175,18 @@ static inline void unlock_page_memcg(struct page *page) { } +static inline bool mem_cgroup_trylock_pages(struct mem_cgroup *memcg) +{ + /* to match page_memcg_rcu() */ + rcu_read_lock(); + return true; +} + +static inline void mem_cgroup_unlock_pages(void) +{ + rcu_read_unlock(); +} + static inline void mem_cgroup_handle_over_high(void) { } diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 70a3664785f80329dadd2ec38615d96212aeec74..eba1f1cbc9fbd1b4093f2285d5fa15f4276807f7 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -9274,16 +9274,22 @@ struct mlx5_ifc_pcmr_reg_bits { u8 reserved_at_0[0x8]; u8 local_port[0x8]; u8 reserved_at_10[0x10]; + u8 entropy_force_cap[0x1]; u8 entropy_calc_cap[0x1]; u8 entropy_gre_calc_cap[0x1]; - u8 reserved_at_23[0x1b]; + u8 reserved_at_23[0xf]; + u8 rx_ts_over_crc_cap[0x1]; + u8 reserved_at_33[0xb]; u8 fcs_cap[0x1]; u8 reserved_at_3f[0x1]; + u8 entropy_force[0x1]; u8 entropy_calc[0x1]; u8 entropy_gre_calc[0x1]; - u8 reserved_at_43[0x1b]; + u8 reserved_at_43[0xf]; + u8 rx_ts_over_crc[0x1]; + u8 reserved_at_53[0xb]; u8 fcs_chk[0x1]; u8 reserved_at_5f[0x1]; }; @@ -9301,8 +9307,8 @@ struct mlx5_ifc_bufferx_reg_bits { u8 reserved_at_0[0x6]; u8 lossy[0x1]; u8 epsb[0x1]; - u8 reserved_at_8[0xc]; - u8 size[0xc]; + u8 reserved_at_8[0x8]; + u8 size[0x10]; u8 xoff_threshold[0x10]; u8 xon_threshold[0x10]; diff --git a/include/linux/mm.h b/include/linux/mm.h index ff79e257a861e62239053a5c96bc1ebb9afc4620..c93e896ab4d839a3fe8e3934cd3164a4ca3e5dd2 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1107,6 +1107,8 @@ vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf); #define ZONES_PGOFF (NODES_PGOFF - ZONES_WIDTH) #define LAST_CPUPID_PGOFF (ZONES_PGOFF - LAST_CPUPID_WIDTH) #define KASAN_TAG_PGOFF (LAST_CPUPID_PGOFF - KASAN_TAG_WIDTH) +#define LRU_GEN_PGOFF (KASAN_TAG_PGOFF - LRU_GEN_WIDTH) +#define LRU_REFS_PGOFF (LRU_GEN_PGOFF - LRU_REFS_WIDTH) /* * Define the bit shifts to access each section. For non-existent @@ -1286,7 +1288,6 @@ static inline void put_page(struct page *page) */ #define GUP_PIN_COUNTING_BIAS (1U << 10) -void put_user_page(struct page *page); void unpin_user_page(struct page *page); void unpin_user_pages_dirty_lock(struct page **pages, unsigned long npages, bool make_dirty); @@ -2037,28 +2038,27 @@ static inline unsigned long get_mm_counter(struct mm_struct *mm, int member) return (unsigned long)val; } -void mm_trace_rss_stat(struct mm_struct *mm, int member, long count, - long value); +void mm_trace_rss_stat(struct mm_struct *mm, int member, long count); static inline void add_mm_counter(struct mm_struct *mm, int member, long value) { long count = atomic_long_add_return(value, &mm->rss_stat.count[member]); - mm_trace_rss_stat(mm, member, count, value); + mm_trace_rss_stat(mm, member, count); } static inline void inc_mm_counter(struct mm_struct *mm, int member) { long count = atomic_long_inc_return(&mm->rss_stat.count[member]); - mm_trace_rss_stat(mm, member, count, 1); + mm_trace_rss_stat(mm, member, count); } static inline void dec_mm_counter(struct mm_struct *mm, int member) { long count = atomic_long_dec_return(&mm->rss_stat.count[member]); - mm_trace_rss_stat(mm, member, count, -1); + mm_trace_rss_stat(mm, member, count); } /* Optimized variant when page is already known not to be PageAnon */ @@ -2673,16 +2673,16 @@ extern struct vm_area_struct *__vma_merge(struct mm_struct *mm, struct vm_area_struct *prev, unsigned long addr, unsigned long end, unsigned long vm_flags, struct anon_vma *anon, struct file *file, pgoff_t pgoff, struct mempolicy *mpol, struct vm_userfaultfd_ctx uff, - const char __user *user, bool keep_locked); + struct anon_vma_name *, bool keep_locked); static inline struct vm_area_struct *vma_merge(struct mm_struct *mm, struct vm_area_struct *prev, unsigned long addr, unsigned long end, unsigned long vm_flags, struct anon_vma *anon, struct file *file, pgoff_t off, struct mempolicy *pol, struct vm_userfaultfd_ctx uff, - const char __user *user) + struct anon_vma_name *anon_name) { return __vma_merge(mm, prev, addr, end, vm_flags, anon, file, off, - pol, uff, user, false); + pol, uff, anon_name, false); } extern struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *); @@ -3396,5 +3396,17 @@ static inline int seal_check_future_write(int seals, struct vm_area_struct *vma) return 0; } +#ifdef CONFIG_ANON_VMA_NAME +int madvise_set_anon_name(struct mm_struct *mm, unsigned long start, + unsigned long len_in, + struct anon_vma_name *anon_name); +#else +static inline int +madvise_set_anon_name(struct mm_struct *mm, unsigned long start, + unsigned long len_in, struct anon_vma_name *anon_name) { + return 0; +} +#endif + #endif /* __KERNEL__ */ #endif /* _LINUX_MM_H */ diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 8fc71e9d7bb079dab4b6057a062d5d34822a6cac..af0790da3b9fe39ffee6cc6f1053af1c844df441 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -4,6 +4,7 @@ #include #include +#include /** * page_is_file_lru - should the page be on a file LRU or anon LRU? @@ -30,6 +31,8 @@ static __always_inline void __update_lru_size(struct lruvec *lruvec, { struct pglist_data *pgdat = lruvec_pgdat(lruvec); + lockdep_assert_held(&pgdat->lru_lock); + __mod_lruvec_state(lruvec, NR_LRU_BASE + lru, nr_pages); __mod_zone_page_state(&pgdat->node_zones[zid], NR_ZONE_LRU_BASE + lru, nr_pages); @@ -45,84 +48,363 @@ static __always_inline void update_lru_size(struct lruvec *lruvec, #endif } +/** + * __clear_page_lru_flags - clear page lru flags before releasing a page + * @page: the page that was on lru and now has a zero reference + */ +static __always_inline void __clear_page_lru_flags(struct page *page) +{ + VM_BUG_ON_PAGE(!PageLRU(page), page); + + __ClearPageLRU(page); + + /* this shouldn't happen, so leave the flags to bad_page() */ + if (PageActive(page) && PageUnevictable(page)) + return; + + __ClearPageActive(page); + __ClearPageUnevictable(page); +} + +/** + * page_lru - which LRU list should a page be on? + * @page: the page to test + * + * Returns the LRU list a page should be on, as an index + * into the array of LRU lists. + */ +static __always_inline enum lru_list page_lru(struct page *page) +{ + enum lru_list lru; + + VM_BUG_ON_PAGE(PageActive(page) && PageUnevictable(page), page); + + if (PageUnevictable(page)) + return LRU_UNEVICTABLE; + + lru = page_is_file_lru(page) ? LRU_INACTIVE_FILE : LRU_INACTIVE_ANON; + if (PageActive(page)) + lru += LRU_ACTIVE; + + return lru; +} + +#ifdef CONFIG_LRU_GEN + +static inline bool lru_gen_enabled(void) +{ +#ifdef CONFIG_LRU_GEN_ENABLED + DECLARE_STATIC_KEY_TRUE(lru_gen_caps[NR_LRU_GEN_CAPS]); + + return static_branch_likely(&lru_gen_caps[LRU_GEN_CORE]); +#else + DECLARE_STATIC_KEY_FALSE(lru_gen_caps[NR_LRU_GEN_CAPS]); + + return static_branch_unlikely(&lru_gen_caps[LRU_GEN_CORE]); +#endif +} + +static inline bool lru_gen_in_fault(void) +{ + return current->in_lru_fault; +} + +static inline int lru_gen_from_seq(unsigned long seq) +{ + return seq % MAX_NR_GENS; +} + +static inline int lru_hist_from_seq(unsigned long seq) +{ + return seq % NR_HIST_GENS; +} + +static inline int lru_tier_from_refs(int refs) +{ + VM_BUG_ON(refs > BIT(LRU_REFS_WIDTH)); + + /* see the comment on MAX_NR_TIERS */ + return order_base_2(refs + 1); +} + +static inline bool lru_gen_is_active(struct lruvec *lruvec, int gen) +{ + unsigned long max_seq = lruvec->lrugen.max_seq; + + VM_BUG_ON(gen >= MAX_NR_GENS); + + /* see the comment on MIN_NR_GENS */ + return gen == lru_gen_from_seq(max_seq) || gen == lru_gen_from_seq(max_seq - 1); +} + +static inline void lru_gen_update_size(struct lruvec *lruvec, struct page *page, + int old_gen, int new_gen) +{ + int type = page_is_file_lru(page); + int zone = page_zonenum(page); + int delta = thp_nr_pages(page); + enum lru_list lru = type * LRU_INACTIVE_FILE; + struct lru_gen_struct *lrugen = &lruvec->lrugen; + + VM_BUG_ON(old_gen != -1 && old_gen >= MAX_NR_GENS); + VM_BUG_ON(new_gen != -1 && new_gen >= MAX_NR_GENS); + VM_BUG_ON(old_gen == -1 && new_gen == -1); + + if (old_gen >= 0) + WRITE_ONCE(lrugen->nr_pages[old_gen][type][zone], + lrugen->nr_pages[old_gen][type][zone] - delta); + if (new_gen >= 0) + WRITE_ONCE(lrugen->nr_pages[new_gen][type][zone], + lrugen->nr_pages[new_gen][type][zone] + delta); + + /* addition */ + if (old_gen < 0) { + if (lru_gen_is_active(lruvec, new_gen)) + lru += LRU_ACTIVE; + __update_lru_size(lruvec, lru, zone, delta); + return; + } + + /* deletion */ + if (new_gen < 0) { + if (lru_gen_is_active(lruvec, old_gen)) + lru += LRU_ACTIVE; + __update_lru_size(lruvec, lru, zone, -delta); + return; + } + + /* promotion */ + if (!lru_gen_is_active(lruvec, old_gen) && lru_gen_is_active(lruvec, new_gen)) { + __update_lru_size(lruvec, lru, zone, -delta); + __update_lru_size(lruvec, lru + LRU_ACTIVE, zone, delta); + } + + /* demotion requires isolation, e.g., lru_deactivate_fn() */ + VM_BUG_ON(lru_gen_is_active(lruvec, old_gen) && !lru_gen_is_active(lruvec, new_gen)); +} + +static inline bool lru_gen_add_page(struct lruvec *lruvec, struct page *page, bool reclaiming) +{ + int gen; + unsigned long old_flags, new_flags; + int type = page_is_file_lru(page); + int zone = page_zonenum(page); + struct lru_gen_struct *lrugen = &lruvec->lrugen; + + if (PageUnevictable(page) || !lrugen->enabled) + return false; + /* + * There are three common cases for this page: + * 1. If it's hot, e.g., freshly faulted in or previously hot and + * migrated, add it to the youngest generation. + * 2. If it's cold but can't be evicted immediately, i.e., an anon page + * not in swapcache or a dirty page pending writeback, add it to the + * second oldest generation. + * 3. Everything else (clean, cold) is added to the oldest generation. + */ + if (PageActive(page)) + gen = lru_gen_from_seq(lrugen->max_seq); + else if ((type == LRU_GEN_ANON && !PageSwapCache(page)) || + (PageReclaim(page) && (PageDirty(page) || PageWriteback(page)))) + gen = lru_gen_from_seq(lrugen->min_seq[type] + 1); + else + gen = lru_gen_from_seq(lrugen->min_seq[type]); + + do { + new_flags = old_flags = READ_ONCE(page->flags); + VM_BUG_ON_PAGE(new_flags & LRU_GEN_MASK, page); + + /* see the comment on MIN_NR_GENS */ + new_flags &= ~(LRU_GEN_MASK | BIT(PG_active)); + new_flags |= (gen + 1UL) << LRU_GEN_PGOFF; + } while (cmpxchg(&page->flags, old_flags, new_flags) != old_flags); + + lru_gen_update_size(lruvec, page, -1, gen); + /* for rotate_reclaimable_page() */ + if (reclaiming) + list_add_tail(&page->lru, &lrugen->lists[gen][type][zone]); + else + list_add(&page->lru, &lrugen->lists[gen][type][zone]); + + return true; +} + +static inline bool lru_gen_del_page(struct lruvec *lruvec, struct page *page, bool reclaiming) +{ + int gen; + unsigned long old_flags, new_flags; + + do { + new_flags = old_flags = READ_ONCE(page->flags); + if (!(new_flags & LRU_GEN_MASK)) + return false; + + VM_BUG_ON_PAGE(PageActive(page), page); + VM_BUG_ON_PAGE(PageUnevictable(page), page); + + gen = ((new_flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1; + + new_flags &= ~LRU_GEN_MASK; + if (!(new_flags & BIT(PG_referenced))) + new_flags &= ~(LRU_REFS_MASK | LRU_REFS_FLAGS); + /* for shrink_page_list() */ + if (reclaiming) + new_flags &= ~(BIT(PG_referenced) | BIT(PG_reclaim)); + else if (lru_gen_is_active(lruvec, gen)) + new_flags |= BIT(PG_active); + } while (cmpxchg(&page->flags, old_flags, new_flags) != old_flags); + + lru_gen_update_size(lruvec, page, gen, -1); + list_del(&page->lru); + + return true; +} + +#else + +static inline bool lru_gen_enabled(void) +{ + return false; +} + +static inline bool lru_gen_in_fault(void) +{ + return false; +} + +static inline bool lru_gen_add_page(struct lruvec *lruvec, struct page *page, bool reclaiming) +{ + return false; +} + +static inline bool lru_gen_del_page(struct lruvec *lruvec, struct page *page, bool reclaiming) +{ + return false; +} + +#endif /* CONFIG_LRU_GEN */ + static __always_inline void add_page_to_lru_list(struct page *page, - struct lruvec *lruvec, enum lru_list lru) + struct lruvec *lruvec) { + enum lru_list lru = page_lru(page); + + if (lru_gen_add_page(lruvec, page, false)) + return; + update_lru_size(lruvec, lru, page_zonenum(page), thp_nr_pages(page)); list_add(&page->lru, &lruvec->lists[lru]); } static __always_inline void add_page_to_lru_list_tail(struct page *page, - struct lruvec *lruvec, enum lru_list lru) + struct lruvec *lruvec) { + enum lru_list lru = page_lru(page); + + if (lru_gen_add_page(lruvec, page, true)) + return; + update_lru_size(lruvec, lru, page_zonenum(page), thp_nr_pages(page)); list_add_tail(&page->lru, &lruvec->lists[lru]); } static __always_inline void del_page_from_lru_list(struct page *page, - struct lruvec *lruvec, enum lru_list lru) + struct lruvec *lruvec) { + if (lru_gen_del_page(lruvec, page, false)) + return; + list_del(&page->lru); - update_lru_size(lruvec, lru, page_zonenum(page), -thp_nr_pages(page)); + update_lru_size(lruvec, page_lru(page), page_zonenum(page), + -thp_nr_pages(page)); } -/** - * page_lru_base_type - which LRU list type should a page be on? - * @page: the page to test - * - * Used for LRU list index arithmetic. - * - * Returns the base LRU type - file or anon - @page should be on. +#ifdef CONFIG_ANON_VMA_NAME +/* + * mmap_lock should be read-locked when calling anon_vma_name(). Caller should + * either keep holding the lock while using the returned pointer or it should + * raise anon_vma_name refcount before releasing the lock. */ -static inline enum lru_list page_lru_base_type(struct page *page) +extern struct anon_vma_name *anon_vma_name(struct vm_area_struct *vma); +extern struct anon_vma_name *anon_vma_name_alloc(const char *name); +extern void anon_vma_name_free(struct kref *kref); + +/* mmap_lock should be read-locked */ +static inline void anon_vma_name_get(struct anon_vma_name *anon_name) { - if (page_is_file_lru(page)) - return LRU_INACTIVE_FILE; - return LRU_INACTIVE_ANON; + if (anon_name) + kref_get(&anon_name->kref); } -/** - * page_off_lru - which LRU list was page on? clearing its lru flags. - * @page: the page to test - * - * Returns the LRU list a page was on, as an index into the array of LRU - * lists; and clears its Unevictable or Active flags, ready for freeing. - */ -static __always_inline enum lru_list page_off_lru(struct page *page) +static inline void anon_vma_name_put(struct anon_vma_name *anon_name) { - enum lru_list lru; + if (anon_name) + kref_put(&anon_name->kref, anon_vma_name_free); +} + +static inline +struct anon_vma_name *anon_vma_name_reuse(struct anon_vma_name *anon_name) +{ + /* Prevent anon_name refcount saturation early on */ + if (kref_read(&anon_name->kref) < REFCOUNT_MAX) { + anon_vma_name_get(anon_name); + return anon_name; - if (PageUnevictable(page)) { - __ClearPageUnevictable(page); - lru = LRU_UNEVICTABLE; - } else { - lru = page_lru_base_type(page); - if (PageActive(page)) { - __ClearPageActive(page); - lru += LRU_ACTIVE; - } } - return lru; + return anon_vma_name_alloc(anon_name->name); } -/** - * page_lru - which LRU list should a page be on? - * @page: the page to test - * - * Returns the LRU list a page should be on, as an index - * into the array of LRU lists. - */ -static __always_inline enum lru_list page_lru(struct page *page) +static inline void dup_anon_vma_name(struct vm_area_struct *orig_vma, + struct vm_area_struct *new_vma) { - enum lru_list lru; + struct anon_vma_name *anon_name = anon_vma_name(orig_vma); - if (PageUnevictable(page)) - lru = LRU_UNEVICTABLE; - else { - lru = page_lru_base_type(page); - if (PageActive(page)) - lru += LRU_ACTIVE; - } - return lru; + if (anon_name) + new_vma->anon_name = anon_vma_name_reuse(anon_name); } + +static inline void free_anon_vma_name(struct vm_area_struct *vma) +{ + /* + * Not using anon_vma_name because it generates a warning if mmap_lock + * is not held, which might be the case here. + */ + if (!vma->vm_file) + anon_vma_name_put(vma->anon_name); +} + +static inline bool anon_vma_name_eq(struct anon_vma_name *anon_name1, + struct anon_vma_name *anon_name2) +{ + if (anon_name1 == anon_name2) + return true; + + return anon_name1 && anon_name2 && + !strcmp(anon_name1->name, anon_name2->name); +} +#else /* CONFIG_ANON_VMA_NAME */ +static inline struct anon_vma_name *anon_vma_name(struct vm_area_struct *vma) +{ + return NULL; +} + +static inline struct anon_vma_name *anon_vma_name_alloc(const char *name) +{ + return NULL; +} + +static inline void anon_vma_name_get(struct anon_vma_name *anon_name) {} +static inline void anon_vma_name_put(struct anon_vma_name *anon_name) {} +static inline void dup_anon_vma_name(struct vm_area_struct *orig_vma, + struct vm_area_struct *new_vma) {} +static inline void free_anon_vma_name(struct vm_area_struct *vma) {} + +static inline bool anon_vma_name_eq(struct anon_vma_name *anon_name1, + struct anon_vma_name *anon_name2) +{ + return true; +} + +#endif /* CONFIG_ANON_VMA_NAME */ + #endif diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index c853f612a81533332f8ae05e3a06025a0992623c..48d72db9d86dfeb00d1ac8223a1f0e3ba0222937 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -3,8 +3,10 @@ #define _LINUX_MM_TYPES_H #include +#include #include +#include #include #include #include @@ -15,6 +17,8 @@ #include #include #include +#include +#include #include #include @@ -299,6 +303,12 @@ struct vm_userfaultfd_ctx { struct vm_userfaultfd_ctx {}; #endif /* CONFIG_USERFAULTFD */ +struct anon_vma_name { + struct kref kref; + /* The name needs to be at the end because it is dynamically sized. */ + char name[]; +}; + /* * This struct describes a virtual memory area. There is one of these * per VM-area/task. A VM area is any part of the process virtual memory @@ -341,15 +351,19 @@ struct vm_area_struct { * linkage into the address_space->i_mmap interval tree. * * For private anonymous mappings, a pointer to a null terminated string - * in the user process containing the name given to the vma, or NULL - * if unnamed. + * containing the name given to the vma, or NULL if unnamed. */ + union { struct { struct rb_node rb; unsigned long rb_subtree_last; } shared; - const char __user *anon_name; + /* + * Serialized by mmap_sem. Never use directly because it is + * valid only when vm_file is NULL. Use anon_vma_name instead. + */ + struct anon_vma_name *anon_name; }; /* @@ -404,6 +418,7 @@ struct core_state { }; struct kioctx_table; +struct percpu_rw_semaphore; struct mm_struct { struct { struct vm_area_struct *mmap; /* list of VMAs */ @@ -561,6 +576,9 @@ struct mm_struct { struct file __rcu *exe_file; #ifdef CONFIG_MMU_NOTIFIER struct mmu_notifier_subscriptions *notifier_subscriptions; +#ifdef CONFIG_SPECULATIVE_PAGE_FAULT + struct percpu_rw_semaphore *mmu_notifier_lock; +#endif #endif #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS pgtable_t pmd_huge_pte; /* protected by page_table_lock */ @@ -599,6 +617,23 @@ struct mm_struct { u32 pasid; #endif +#ifdef CONFIG_LRU_GEN + struct { + /* this mm_struct is on lru_gen_mm_list */ + struct list_head list; +#ifdef CONFIG_MEMCG + /* points to the memcg of "owner" above */ + struct mem_cgroup *memcg; +#endif + /* + * Set when switching to this mm_struct, as a hint of + * whether it has been used since the last time per-node + * page table walkers cleared the corresponding bits. + */ + nodemask_t nodes; + } lru_gen; +#endif /* CONFIG_LRU_GEN */ + ANDROID_KABI_RESERVE(1); } __randomize_layout; @@ -626,6 +661,65 @@ static inline cpumask_t *mm_cpumask(struct mm_struct *mm) return (struct cpumask *)&mm->cpu_bitmap; } +#ifdef CONFIG_LRU_GEN + +struct lru_gen_mm_list { + /* mm_struct list for page table walkers */ + struct list_head fifo; + /* protects the list above */ + spinlock_t lock; +}; + +void lru_gen_add_mm(struct mm_struct *mm); +void lru_gen_del_mm(struct mm_struct *mm); +#ifdef CONFIG_MEMCG +void lru_gen_migrate_mm(struct mm_struct *mm); +#endif + +static inline void lru_gen_init_mm(struct mm_struct *mm) +{ + INIT_LIST_HEAD(&mm->lru_gen.list); +#ifdef CONFIG_MEMCG + mm->lru_gen.memcg = NULL; +#endif + nodes_clear(mm->lru_gen.nodes); +} + +static inline void lru_gen_use_mm(struct mm_struct *mm) +{ + /* unlikely but not a bug when racing with lru_gen_migrate_mm() */ + VM_WARN_ON(list_empty(&mm->lru_gen.list)); + + if (!(current->flags & PF_KTHREAD) && !nodes_full(mm->lru_gen.nodes)) + nodes_setall(mm->lru_gen.nodes); +} + +#else /* !CONFIG_LRU_GEN */ + +static inline void lru_gen_add_mm(struct mm_struct *mm) +{ +} + +static inline void lru_gen_del_mm(struct mm_struct *mm) +{ +} + +#ifdef CONFIG_MEMCG +static inline void lru_gen_migrate_mm(struct mm_struct *mm) +{ +} +#endif + +static inline void lru_gen_init_mm(struct mm_struct *mm) +{ +} + +static inline void lru_gen_use_mm(struct mm_struct *mm) +{ +} + +#endif /* CONFIG_LRU_GEN */ + struct mmu_gather; extern void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long start, unsigned long end); @@ -831,13 +925,4 @@ typedef struct { unsigned long val; } swp_entry_t; -/* Return the name for an anonymous mapping or NULL for a file-backed mapping */ -static inline const char __user *vma_get_anon_name(struct vm_area_struct *vma) -{ - if (vma->vm_file) - return NULL; - - return vma->anon_name; -} - #endif /* _LINUX_MM_TYPES_H */ diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index a7255f582d7c9d6e702fcdea998bff91ce3bebdf..6f303131b97616efa7ba43de75729cc10a1c46d9 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -6,6 +6,8 @@ #include #include #include +#include +#include #include #include #include @@ -502,17 +504,68 @@ static inline void mmu_notifier_invalidate_range(struct mm_struct *mm, __mmu_notifier_invalidate_range(mm, start, end); } -static inline void mmu_notifier_subscriptions_init(struct mm_struct *mm) +#ifdef CONFIG_SPECULATIVE_PAGE_FAULT + +static inline bool mmu_notifier_subscriptions_init(struct mm_struct *mm) { + mm->mmu_notifier_lock = kzalloc(sizeof(struct percpu_rw_semaphore), GFP_KERNEL); + if (!mm->mmu_notifier_lock) + return false; + + percpu_init_rwsem(mm->mmu_notifier_lock); mm->notifier_subscriptions = NULL; + + return true; } static inline void mmu_notifier_subscriptions_destroy(struct mm_struct *mm) { if (mm_has_notifiers(mm)) __mmu_notifier_subscriptions_destroy(mm); + + if (in_atomic()) { + percpu_rwsem_async_destroy(mm->mmu_notifier_lock); + } else { + percpu_free_rwsem(mm->mmu_notifier_lock); + kfree(mm->mmu_notifier_lock); + } + mm->mmu_notifier_lock = NULL; +} + +static inline bool mmu_notifier_trylock(struct mm_struct *mm) +{ + return percpu_down_read_trylock(mm->mmu_notifier_lock); +} + +static inline void mmu_notifier_unlock(struct mm_struct *mm) +{ + percpu_up_read(mm->mmu_notifier_lock); } +#else /* CONFIG_SPECULATIVE_PAGE_FAULT */ + +static inline bool mmu_notifier_subscriptions_init(struct mm_struct *mm) +{ + mm->notifier_subscriptions = NULL; + return true; +} + +static inline void mmu_notifier_subscriptions_destroy(struct mm_struct *mm) +{ + if (mm_has_notifiers(mm)) + __mmu_notifier_subscriptions_destroy(mm); +} + +static inline bool mmu_notifier_trylock(struct mm_struct *mm) +{ + return true; +} + +static inline void mmu_notifier_unlock(struct mm_struct *mm) +{ +} + +#endif /* CONFIG_SPECULATIVE_PAGE_FAULT */ static inline void mmu_notifier_range_init(struct mmu_notifier_range *range, enum mmu_notifier_event event, @@ -727,14 +780,24 @@ static inline void mmu_notifier_invalidate_range(struct mm_struct *mm, { } -static inline void mmu_notifier_subscriptions_init(struct mm_struct *mm) +static inline bool mmu_notifier_subscriptions_init(struct mm_struct *mm) { + return true; } static inline void mmu_notifier_subscriptions_destroy(struct mm_struct *mm) { } +static inline bool mmu_notifier_trylock(struct mm_struct *mm) +{ + return true; +} + +static inline void mmu_notifier_unlock(struct mm_struct *mm) +{ +} + #define mmu_notifier_range_update_to_read_only(r) false #define ptep_clear_flush_young_notify ptep_clear_flush_young diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 625507ea48124cfbddfeaf6fe884a5496d46632d..60ddb2e4794a33e0ce4acf0bb5636ac89bf7a76f 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -277,6 +277,206 @@ enum lruvec_flags { */ }; +#endif /* !__GENERATING_BOUNDS_H */ + +/* + * Evictable pages are divided into multiple generations. The youngest and the + * oldest generation numbers, max_seq and min_seq, are monotonically increasing. + * They form a sliding window of a variable size [MIN_NR_GENS, MAX_NR_GENS]. An + * offset within MAX_NR_GENS, gen, indexes the LRU list of the corresponding + * generation. The gen counter in page->flags stores gen+1 while a page is on + * one of lrugen->lists[]. Otherwise it stores 0. + * + * A page is added to the youngest generation on faulting. The aging needs to + * check the accessed bit at least twice before handing this page over to the + * eviction. The first check takes care of the accessed bit set on the initial + * fault; the second check makes sure this page hasn't been used since then. + * This process, AKA second chance, requires a minimum of two generations, + * hence MIN_NR_GENS. And to maintain ABI compatibility with the active/inactive + * LRU, these two generations are considered active; the rest of generations, if + * they exist, are considered inactive. See lru_gen_is_active(). PG_active is + * always cleared while a page is on one of lrugen->lists[] so that the aging + * needs not to worry about it. And it's set again when a page considered active + * is isolated for non-reclaiming purposes, e.g., migration. See + * lru_gen_add_page() and lru_gen_del_page(). + * + * MAX_NR_GENS is set to 4 so that the multi-gen LRU can support twice of the + * categories of the active/inactive LRU when keeping track of accesses through + * page tables. It requires order_base_2(MAX_NR_GENS+1) bits in page->flags. + */ +#define MIN_NR_GENS 2U +#define MAX_NR_GENS 4U + +/* + * Each generation is divided into multiple tiers. Tiers represent different + * ranges of numbers of accesses through file descriptors. A page accessed N + * times through file descriptors is in tier order_base_2(N). A page in the + * first tier (N=0,1) is marked by PG_referenced unless it was faulted in + * though page tables or read ahead. A page in any other tier (N>1) is marked + * by PG_referenced and PG_workingset. + * + * In contrast to moving across generations which requires the LRU lock, moving + * across tiers only requires operations on page->flags and therefore has a + * negligible cost in the buffered access path. In the eviction path, + * comparisons of refaulted/(evicted+protected) from the first tier and the + * rest infer whether pages accessed multiple times through file descriptors + * are statistically hot and thus worth protecting. + * + * MAX_NR_TIERS is set to 4 so that the multi-gen LRU can support twice of the + * categories of the active/inactive LRU when keeping track of accesses through + * file descriptors. It requires MAX_NR_TIERS-2 additional bits in page->flags. + */ +#define MAX_NR_TIERS 4U + +#ifndef __GENERATING_BOUNDS_H + +struct lruvec; +struct page_vma_mapped_walk; + +#define LRU_GEN_MASK ((BIT(LRU_GEN_WIDTH) - 1) << LRU_GEN_PGOFF) +#define LRU_REFS_MASK ((BIT(LRU_REFS_WIDTH) - 1) << LRU_REFS_PGOFF) +#define LRU_REFS_FLAGS (BIT(PG_referenced) | BIT(PG_workingset)) + +#ifdef CONFIG_LRU_GEN + +enum { + LRU_GEN_ANON, + LRU_GEN_FILE, +}; + +enum { + LRU_GEN_CORE, + LRU_GEN_MM_WALK, + LRU_GEN_NONLEAF_YOUNG, + NR_LRU_GEN_CAPS +}; + +#define MIN_LRU_BATCH BITS_PER_LONG +#define MAX_LRU_BATCH (MIN_LRU_BATCH * 128) + +/* whether to keep historical stats from evicted generations */ +#ifdef CONFIG_LRU_GEN_STATS +#define NR_HIST_GENS MAX_NR_GENS +#else +#define NR_HIST_GENS 1U +#endif + +/* + * The youngest generation number is stored in max_seq for both anon and file + * types as they are aged on an equal footing. The oldest generation numbers are + * stored in min_seq[] separately for anon and file types as clean file pages + * can be evicted regardless of swap constraints. + * + * Normally anon and file min_seq are in sync. But if swapping is constrained, + * e.g., out of swap space, file min_seq is allowed to advance and leave anon + * min_seq behind. + */ +struct lru_gen_struct { + /* the aging increments the youngest generation number */ + unsigned long max_seq; + /* the eviction increments the oldest generation numbers */ + unsigned long min_seq[ANON_AND_FILE]; + /* the birth time of each generation in jiffies */ + unsigned long timestamps[MAX_NR_GENS]; + /* the multi-gen LRU lists */ + struct list_head lists[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES]; + /* the sizes of the above lists */ + unsigned long nr_pages[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES]; + /* the exponential moving average of refaulted */ + unsigned long avg_refaulted[ANON_AND_FILE][MAX_NR_TIERS]; + /* the exponential moving average of evicted+protected */ + unsigned long avg_total[ANON_AND_FILE][MAX_NR_TIERS]; + /* the first tier doesn't need protection, hence the minus one */ + unsigned long protected[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS - 1]; + /* can be modified without holding the LRU lock */ + atomic_long_t evicted[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS]; + atomic_long_t refaulted[NR_HIST_GENS][ANON_AND_FILE][MAX_NR_TIERS]; + /* whether the multi-gen LRU is enabled */ + bool enabled; +}; + +enum { + MM_PTE_TOTAL, /* total leaf entries */ + MM_PTE_OLD, /* old leaf entries */ + MM_PTE_YOUNG, /* young leaf entries */ + MM_PMD_TOTAL, /* total non-leaf entries */ + MM_PMD_FOUND, /* non-leaf entries found in Bloom filters */ + MM_PMD_ADDED, /* non-leaf entries added to Bloom filters */ + NR_MM_STATS +}; + +/* mnemonic codes for the mm stats above */ +#define MM_STAT_CODES "toydfa" + +/* double-buffering Bloom filters */ +#define NR_BLOOM_FILTERS 2 + +struct lru_gen_mm_state { + /* set to max_seq after each iteration */ + unsigned long seq; + /* where the current iteration starts (inclusive) */ + struct list_head *head; + /* where the last iteration ends (exclusive) */ + struct list_head *tail; + /* to wait for the last page table walker to finish */ + struct wait_queue_head wait; + /* Bloom filters flip after each iteration */ + unsigned long *filters[NR_BLOOM_FILTERS]; + /* the mm stats for debugging */ + unsigned long stats[NR_HIST_GENS][NR_MM_STATS]; + /* the number of concurrent page table walkers */ + int nr_walkers; +}; + +struct lru_gen_mm_walk { + /* the lruvec under reclaim */ + struct lruvec *lruvec; + /* unstable max_seq from lru_gen_struct */ + unsigned long max_seq; + /* the next address within an mm to scan */ + unsigned long next_addr; + /* to batch page table entries */ + unsigned long bitmap[BITS_TO_LONGS(MIN_LRU_BATCH)]; + /* to batch promoted pages */ + int nr_pages[MAX_NR_GENS][ANON_AND_FILE][MAX_NR_ZONES]; + /* to batch the mm stats */ + int mm_stats[NR_MM_STATS]; + /* total batched items */ + int batched; + bool can_swap; + bool full_scan; +}; + +void lru_gen_init_lruvec(struct lruvec *lruvec); +void lru_gen_look_around(struct page_vma_mapped_walk *pvmw); + +#ifdef CONFIG_MEMCG +void lru_gen_init_memcg(struct mem_cgroup *memcg); +void lru_gen_exit_memcg(struct mem_cgroup *memcg); +#endif + +#else /* !CONFIG_LRU_GEN */ + +static inline void lru_gen_init_lruvec(struct lruvec *lruvec) +{ +} + +static inline void lru_gen_look_around(struct page_vma_mapped_walk *pvmw) +{ +} + +#ifdef CONFIG_MEMCG +static inline void lru_gen_init_memcg(struct mem_cgroup *memcg) +{ +} + +static inline void lru_gen_exit_memcg(struct mem_cgroup *memcg) +{ +} +#endif + +#endif /* CONFIG_LRU_GEN */ + struct lruvec { struct list_head lists[NR_LRU_LISTS]; /* @@ -292,9 +492,16 @@ struct lruvec { unsigned long refaults[ANON_AND_FILE]; /* Various lruvec state flags (enum lruvec_flags) */ unsigned long flags; +#ifdef CONFIG_LRU_GEN + /* evictable pages divided into generations */ + struct lru_gen_struct lrugen; + /* to concurrently iterate lru_gen_mm_list */ + struct lru_gen_mm_state mm_state; +#endif #ifdef CONFIG_MEMCG struct pglist_data *pgdat; #endif + ANDROID_VENDOR_DATA(1); }; /* Isolate unmapped pages */ @@ -827,6 +1034,11 @@ typedef struct pglist_data { unsigned long flags; +#ifdef CONFIG_LRU_GEN + /* kswap mm walk data */ + struct lru_gen_mm_walk mm_walk; +#endif + ZONE_PADDING(_pad2_) /* Per-node vmstats */ @@ -964,6 +1176,15 @@ static inline int is_highmem_idx(enum zone_type idx) #endif } +#ifdef CONFIG_ZONE_DMA +bool has_managed_dma(void); +#else +static inline bool has_managed_dma(void) +{ + return false; +} +#endif + /** * is_highmem - helper function to quickly check if a struct zone is a * highmem zone or not. This is an attempt to keep references diff --git a/include/linux/module.h b/include/linux/module.h index 4fa2f7de56e61440eaf2999eac0b1f17da234b6e..c5c3ce08f64664615b811a319d16fa5ba1ef7c06 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -293,7 +293,8 @@ extern typeof(name) __mod_##type##__##name##_device_table \ * files require multiple MODULE_FIRMWARE() specifiers */ #define MODULE_FIRMWARE(_firmware) MODULE_INFO(firmware, _firmware) -#define MODULE_IMPORT_NS(ns) MODULE_INFO(import_ns, #ns) +#define _MODULE_IMPORT_NS(ns) MODULE_INFO(import_ns, #ns) +#define MODULE_IMPORT_NS(ns) _MODULE_IMPORT_NS(ns) struct notifier_block; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index a347061c01ef3cabc88621c3d4753eed83aa64a6..9dadd780a2a67082ab49188d40745761edd150f5 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2094,7 +2094,7 @@ struct net_device { struct netdev_queue *_tx ____cacheline_aligned_in_smp; unsigned int num_tx_queues; unsigned int real_num_tx_queues; - struct Qdisc *qdisc; + struct Qdisc __rcu *qdisc; unsigned int tx_queue_len; spinlock_t tx_global_lock; @@ -2585,6 +2585,7 @@ struct packet_type { struct net_device *); bool (*id_match)(struct packet_type *ptype, struct sock *sk); + struct net *af_packet_net; void *af_packet_priv; struct list_head list; @@ -4259,7 +4260,8 @@ static inline u32 netif_msg_init(int debug_value, int default_msg_enable_bits) static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu) { spin_lock(&txq->_xmit_lock); - txq->xmit_lock_owner = cpu; + /* Pairs with READ_ONCE() in __dev_queue_xmit() */ + WRITE_ONCE(txq->xmit_lock_owner, cpu); } static inline bool __netif_tx_acquire(struct netdev_queue *txq) @@ -4276,26 +4278,32 @@ static inline void __netif_tx_release(struct netdev_queue *txq) static inline void __netif_tx_lock_bh(struct netdev_queue *txq) { spin_lock_bh(&txq->_xmit_lock); - txq->xmit_lock_owner = smp_processor_id(); + /* Pairs with READ_ONCE() in __dev_queue_xmit() */ + WRITE_ONCE(txq->xmit_lock_owner, smp_processor_id()); } static inline bool __netif_tx_trylock(struct netdev_queue *txq) { bool ok = spin_trylock(&txq->_xmit_lock); - if (likely(ok)) - txq->xmit_lock_owner = smp_processor_id(); + + if (likely(ok)) { + /* Pairs with READ_ONCE() in __dev_queue_xmit() */ + WRITE_ONCE(txq->xmit_lock_owner, smp_processor_id()); + } return ok; } static inline void __netif_tx_unlock(struct netdev_queue *txq) { - txq->xmit_lock_owner = -1; + /* Pairs with READ_ONCE() in __dev_queue_xmit() */ + WRITE_ONCE(txq->xmit_lock_owner, -1); spin_unlock(&txq->_xmit_lock); } static inline void __netif_tx_unlock_bh(struct netdev_queue *txq) { - txq->xmit_lock_owner = -1; + /* Pairs with READ_ONCE() in __dev_queue_xmit() */ + WRITE_ONCE(txq->xmit_lock_owner, -1); spin_unlock_bh(&txq->_xmit_lock); } diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 91a6525a98cb7e0848b2be237538ef70b3d44a6c..1e0a3497bdb4644db96a7b0a2cdb935732800447 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -501,8 +501,8 @@ extern int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fh, struct nfs_fattr *fattr, struct nfs4_label *label); extern int nfs_may_open(struct inode *inode, const struct cred *cred, int openflags); extern void nfs_access_zap_cache(struct inode *inode); -extern int nfs_access_get_cached(struct inode *inode, const struct cred *cred, struct nfs_access_entry *res, - bool may_block); +extern int nfs_access_get_cached(struct inode *inode, const struct cred *cred, + u32 *mask, bool may_block); /* * linux/fs/nfs/symlink.c @@ -553,6 +553,7 @@ extern int nfs_wb_page_cancel(struct inode *inode, struct page* page); extern int nfs_commit_inode(struct inode *, int); extern struct nfs_commit_data *nfs_commitdata_alloc(bool never_fail); extern void nfs_commit_free(struct nfs_commit_data *data); +bool nfs_commit_end(struct nfs_mds_commit_info *cinfo); static inline int nfs_have_writebacks(struct inode *inode) diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h index ac398e143c9a1cee72a8b59eacb0a7a148b142b1..89fe4e3592f9309d5853657903282e92c07479d1 100644 --- a/include/linux/nodemask.h +++ b/include/linux/nodemask.h @@ -486,6 +486,7 @@ static inline int num_node_state(enum node_states state) #define first_online_node 0 #define first_memory_node 0 #define next_online_node(nid) (MAX_NUMNODES) +#define next_memory_node(nid) (MAX_NUMNODES) #define nr_node_ids 1U #define nr_online_nodes 1U diff --git a/include/linux/page-flags-layout.h b/include/linux/page-flags-layout.h index 7d4ec26d8a3ed284e30b0508a9f6082e2fb596dd..c1946cdb845fec775097fb10e2747e543bf70b88 100644 --- a/include/linux/page-flags-layout.h +++ b/include/linux/page-flags-layout.h @@ -21,16 +21,17 @@ #elif MAX_NR_ZONES <= 8 #define ZONES_SHIFT 3 #else -#error ZONES_SHIFT -- too many zones configured adjust calculation +#error ZONES_SHIFT "Too many zones configured" #endif +#define ZONES_WIDTH ZONES_SHIFT + #ifdef CONFIG_SPARSEMEM #include - -/* SECTION_SHIFT #bits space required to store a section # */ #define SECTIONS_SHIFT (MAX_PHYSMEM_BITS - SECTION_SIZE_BITS) - -#endif /* CONFIG_SPARSEMEM */ +#else +#define SECTIONS_SHIFT 0 +#endif #ifndef BUILD_VDSO32_64 /* @@ -54,17 +55,29 @@ #define SECTIONS_WIDTH 0 #endif -#define ZONES_WIDTH ZONES_SHIFT - -#if SECTIONS_WIDTH+ZONES_WIDTH+NODES_SHIFT <= BITS_PER_LONG - NR_PAGEFLAGS +#if ZONES_WIDTH + LRU_GEN_WIDTH + LRU_REFS_WIDTH + SECTIONS_WIDTH + NODES_SHIFT \ + <= BITS_PER_LONG - NR_PAGEFLAGS #define NODES_WIDTH NODES_SHIFT -#else -#ifdef CONFIG_SPARSEMEM_VMEMMAP +#elif defined(CONFIG_SPARSEMEM_VMEMMAP) #error "Vmemmap: No space for nodes field in page flags" -#endif +#else #define NODES_WIDTH 0 #endif +/* + * Note that this #define MUST have a value so that it can be tested with + * the IS_ENABLED() macro. + */ +#if NODES_SHIFT != 0 && NODES_WIDTH == 0 +#define NODE_NOT_IN_PAGE_FLAGS 1 +#endif + +#if defined(CONFIG_KASAN_SW_TAGS) || defined(CONFIG_KASAN_HW_TAGS) +#define KASAN_TAG_WIDTH 8 +#else +#define KASAN_TAG_WIDTH 0 +#endif + #ifdef CONFIG_NUMA_BALANCING #define LAST__PID_SHIFT 8 #define LAST__PID_MASK ((1 << LAST__PID_SHIFT)-1) @@ -77,36 +90,20 @@ #define LAST_CPUPID_SHIFT 0 #endif -#if defined(CONFIG_KASAN_SW_TAGS) || defined(CONFIG_KASAN_HW_TAGS) -#define KASAN_TAG_WIDTH 8 -#else -#define KASAN_TAG_WIDTH 0 -#endif - -#if SECTIONS_WIDTH+ZONES_WIDTH+NODES_SHIFT+LAST_CPUPID_SHIFT+KASAN_TAG_WIDTH \ - <= BITS_PER_LONG - NR_PAGEFLAGS +#if ZONES_WIDTH + LRU_GEN_WIDTH + LRU_REFS_WIDTH + SECTIONS_WIDTH + NODES_WIDTH + \ + KASAN_TAG_WIDTH + LAST_CPUPID_SHIFT <= BITS_PER_LONG - NR_PAGEFLAGS #define LAST_CPUPID_WIDTH LAST_CPUPID_SHIFT #else #define LAST_CPUPID_WIDTH 0 #endif -#if SECTIONS_WIDTH+NODES_WIDTH+ZONES_WIDTH+LAST_CPUPID_WIDTH+KASAN_TAG_WIDTH \ - > BITS_PER_LONG - NR_PAGEFLAGS -#error "Not enough bits in page flags" -#endif - -/* - * We are going to use the flags for the page to node mapping if its in - * there. This includes the case where there is no node, so it is implicit. - * Note that this #define MUST have a value so that it can be tested with - * the IS_ENABLED() macro. - */ -#if !(NODES_WIDTH > 0 || NODES_SHIFT == 0) -#define NODE_NOT_IN_PAGE_FLAGS 1 +#if LAST_CPUPID_SHIFT != 0 && LAST_CPUPID_WIDTH == 0 +#define LAST_CPUPID_NOT_IN_PAGE_FLAGS #endif -#if defined(CONFIG_NUMA_BALANCING) && LAST_CPUPID_WIDTH == 0 -#define LAST_CPUPID_NOT_IN_PAGE_FLAGS +#if ZONES_WIDTH + LRU_GEN_WIDTH + LRU_REFS_WIDTH + SECTIONS_WIDTH + NODES_WIDTH + \ + KASAN_TAG_WIDTH + LAST_CPUPID_WIDTH > BITS_PER_LONG - NR_PAGEFLAGS +#error "Not enough bits in page flags" #endif #endif diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index c88dc07743d4e45f5a4956fada58cec8d810be9d..1d2a9a3103da7f3d8ddac285db7619fcc3ea4b44 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -132,7 +132,7 @@ enum pageflags { #ifdef CONFIG_MEMORY_FAILURE PG_hwpoison, /* hardware poisoned page. Don't touch */ #endif -#if defined(CONFIG_IDLE_PAGE_TRACKING) && defined(CONFIG_64BIT) +#if defined(CONFIG_PAGE_IDLE_FLAG) && defined(CONFIG_64BIT) PG_young, PG_idle, #endif @@ -440,7 +440,7 @@ PAGEFLAG_FALSE(HWPoison) #define __PG_HWPOISON 0 #endif -#if defined(CONFIG_IDLE_PAGE_TRACKING) && defined(CONFIG_64BIT) +#if defined(CONFIG_PAGE_IDLE_FLAG) && defined(CONFIG_64BIT) TESTPAGEFLAG(Young, young, PF_ANY) SETPAGEFLAG(Young, young, PF_ANY) TESTCLEARFLAG(Young, young, PF_ANY) @@ -758,7 +758,7 @@ PAGE_TYPE_OPS(Guard, guard) extern bool is_free_buddy_page(struct page *page); -__PAGEFLAG(Isolated, isolated, PF_ANY); +PAGEFLAG(Isolated, isolated, PF_ANY); /* * If network-based swap is enabled, sl*b must keep track of whether pages @@ -803,7 +803,7 @@ static inline void ClearPageSlabPfmemalloc(struct page *page) 1UL << PG_private | 1UL << PG_private_2 | \ 1UL << PG_writeback | 1UL << PG_reserved | \ 1UL << PG_slab | 1UL << PG_active | \ - 1UL << PG_unevictable | __PG_MLOCKED) + 1UL << PG_unevictable | __PG_MLOCKED | LRU_GEN_MASK) /* * Flags checked when a page is prepped for return by the page allocator. @@ -814,7 +814,7 @@ static inline void ClearPageSlabPfmemalloc(struct page *page) * alloc-free cycle to prevent from reusing the page. */ #define PAGE_FLAGS_CHECK_AT_PREP \ - (((1UL << NR_PAGEFLAGS) - 1) & ~__PG_HWPOISON) + ((((1UL << NR_PAGEFLAGS) - 1) & ~__PG_HWPOISON) | LRU_GEN_MASK | LRU_REFS_MASK) #define PAGE_FLAGS_PRIVATE \ (1UL << PG_private | 1UL << PG_private_2) diff --git a/include/linux/page_ext.h b/include/linux/page_ext.h index cd45c1927d903df99b1da91fc0b11c1d3b3bfa95..2e64843c1468988a3819f9a094886ec3a81b8a1c 100644 --- a/include/linux/page_ext.h +++ b/include/linux/page_ext.h @@ -20,12 +20,10 @@ enum page_ext_flags { PAGE_EXT_OWNER, PAGE_EXT_OWNER_ALLOCATED, #if defined(CONFIG_PAGE_PINNER) - /* page refcount was increased by GUP or follow_page(FOLL_GET) */ - PAGE_EXT_GET, /* page migration failed */ PAGE_EXT_PINNER_MIGRATION_FAILED, #endif -#if defined(CONFIG_IDLE_PAGE_TRACKING) && !defined(CONFIG_64BIT) +#if defined(CONFIG_PAGE_IDLE_FLAG) && !defined(CONFIG_64BIT) PAGE_EXT_YOUNG, PAGE_EXT_IDLE, #endif diff --git a/include/linux/page_idle.h b/include/linux/page_idle.h index 1e894d34bdceb2a91318bd1ea7796a17deeddf8b..d8a6aecf99cb9f0fc63bc49126b06fe5936d82e4 100644 --- a/include/linux/page_idle.h +++ b/include/linux/page_idle.h @@ -6,7 +6,7 @@ #include #include -#ifdef CONFIG_IDLE_PAGE_TRACKING +#ifdef CONFIG_PAGE_IDLE_FLAG #ifdef CONFIG_64BIT static inline bool page_is_young(struct page *page) @@ -106,7 +106,7 @@ static inline void clear_page_idle(struct page *page) } #endif /* CONFIG_64BIT */ -#else /* !CONFIG_IDLE_PAGE_TRACKING */ +#else /* !CONFIG_PAGE_IDLE_FLAG */ static inline bool page_is_young(struct page *page) { @@ -135,6 +135,6 @@ static inline void clear_page_idle(struct page *page) { } -#endif /* CONFIG_IDLE_PAGE_TRACKING */ +#endif /* CONFIG_PAGE_IDLE_FLAG */ #endif /* _LINUX_MM_PAGE_IDLE_H */ diff --git a/include/linux/page_pinner.h b/include/linux/page_pinner.h index ba14d7636dc001666d5d97b2c9c228297270279f..4d574d1ced59c11ac377d15ee489b7d46eed59d0 100644 --- a/include/linux/page_pinner.h +++ b/include/linux/page_pinner.h @@ -9,34 +9,14 @@ extern struct static_key_false page_pinner_inited; extern struct static_key_true failure_tracking; extern struct page_ext_operations page_pinner_ops; -extern void __reset_page_pinner(struct page *page, unsigned int order, bool free); -extern void __set_page_pinner(struct page *page, unsigned int order); -extern void __dump_page_pinner(struct page *page); -void __page_pinner_migration_failed(struct page *page); -void __page_pinner_mark_migration_failed_pages(struct list_head *page_list); - -static inline void reset_page_pinner(struct page *page, unsigned int order) -{ - if (static_branch_unlikely(&page_pinner_inited)) - __reset_page_pinner(page, order, false); -} +extern void __free_page_pinner(struct page *page, unsigned int order); +void __page_pinner_failure_detect(struct page *page); +void __page_pinner_put_page(struct page *page); static inline void free_page_pinner(struct page *page, unsigned int order) { if (static_branch_unlikely(&page_pinner_inited)) - __reset_page_pinner(page, order, true); -} - -static inline void set_page_pinner(struct page *page, unsigned int order) -{ - if (static_branch_unlikely(&page_pinner_inited)) - __set_page_pinner(page, order); -} - -static inline void dump_page_pinner(struct page *page) -{ - if (static_branch_unlikely(&page_pinner_inited)) - __dump_page_pinner(page); + __free_page_pinner(page, order); } static inline void page_pinner_put_page(struct page *page) @@ -44,7 +24,7 @@ static inline void page_pinner_put_page(struct page *page) if (!static_branch_unlikely(&failure_tracking)) return; - __page_pinner_migration_failed(page); + __page_pinner_put_page(page); } static inline void page_pinner_failure_detect(struct page *page) @@ -52,37 +32,17 @@ static inline void page_pinner_failure_detect(struct page *page) if (!static_branch_unlikely(&failure_tracking)) return; - __page_pinner_migration_failed(page); -} - -static inline void page_pinner_mark_migration_failed_pages(struct list_head *page_list) -{ - if (!static_branch_unlikely(&failure_tracking)) - return; - - __page_pinner_mark_migration_failed_pages(page_list); + __page_pinner_failure_detect(page); } #else -static inline void reset_page_pinner(struct page *page, unsigned int order) -{ -} static inline void free_page_pinner(struct page *page, unsigned int order) { } -static inline void set_page_pinner(struct page *page, unsigned int order) -{ -} -static inline void dump_page_pinner(struct page *page) -{ -} static inline void page_pinner_put_page(struct page *page) { } static inline void page_pinner_failure_detect(struct page *page) { } -static inline void page_pinner_mark_migration_failed_pages(struct list_head *page_list) -{ -} #endif /* CONFIG_PAGE_PINNER */ #endif /* __LINUX_PAGE_PINNER_H */ diff --git a/include/linux/pci.h b/include/linux/pci.h index 31c8ce4c7088b3f5d8880bf9c36fc8f15f44c80a..605ac0bd908a643288a8e6b409d09fd508d2ef2f 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -228,6 +228,8 @@ enum pci_dev_flags { PCI_DEV_FLAGS_NO_FLR_RESET = (__force pci_dev_flags_t) (1 << 10), /* Don't use Relaxed Ordering for TLPs directed at this device */ PCI_DEV_FLAGS_NO_RELAXED_ORDERING = (__force pci_dev_flags_t) (1 << 11), + /* Device does honor MSI masking despite saying otherwise */ + PCI_DEV_FLAGS_HAS_MSI_MASKING = (__force pci_dev_flags_t) (1 << 12), }; enum pci_irq_reroute_variant { diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h index 5fda40f97fe916043b8217e8fd8b6a3ccdb0f26f..bf1668fc9c5eec54fe6588dc4fbdfbd97f31da8d 100644 --- a/include/linux/percpu-rwsem.h +++ b/include/linux/percpu-rwsem.h @@ -13,7 +13,14 @@ struct percpu_rw_semaphore { struct rcu_sync rss; unsigned int __percpu *read_count; struct rcuwait writer; - wait_queue_head_t waiters; + /* + * destroy_list_entry is used during object destruction when waiters + * can't be used, therefore reusing the same space. + */ + union { + wait_queue_head_t waiters; + struct list_head destroy_list_entry; + }; atomic_t block; #ifdef CONFIG_DEBUG_LOCK_ALLOC struct lockdep_map dep_map; @@ -127,8 +134,12 @@ extern void percpu_up_write(struct percpu_rw_semaphore *); extern int __percpu_init_rwsem(struct percpu_rw_semaphore *, const char *, struct lock_class_key *); +/* Can't be called in atomic context. */ extern void percpu_free_rwsem(struct percpu_rw_semaphore *); +/* Invokes percpu_free_rwsem and frees the semaphore from a worker thread. */ +extern void percpu_rwsem_async_destroy(struct percpu_rw_semaphore *sem); + #define percpu_init_rwsem(sem) \ ({ \ static struct lock_class_key rwsem_key; \ diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index 505480217cf1a9b4f779b454d42bc78c464e189b..2512e2f9cd4e7964fe08c42052d4313f5cfc6679 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -163,6 +163,12 @@ int arm_pmu_acpi_probe(armpmu_init_fn init_fn); static inline int arm_pmu_acpi_probe(armpmu_init_fn init_fn) { return 0; } #endif +#ifdef CONFIG_KVM +void kvm_host_pmu_init(struct arm_pmu *pmu); +#else +#define kvm_host_pmu_init(x) do { } while(0) +#endif + /* Internal functions only for core arm_pmu code */ struct arm_pmu *armpmu_alloc(void); struct arm_pmu *armpmu_alloc_atomic(void); diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 072ac6c1ef2b6e67346de3f0f543300bfae09d5d..67a50c78232fe2e351d8700761f36e4d3b4ea927 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -607,7 +607,6 @@ struct swevent_hlist { #define PERF_ATTACH_TASK_DATA 0x08 #define PERF_ATTACH_ITRACE 0x10 #define PERF_ATTACH_SCHED_CB 0x20 -#define PERF_ATTACH_CHILD 0x40 struct perf_cgroup; struct perf_buffer; @@ -679,16 +678,6 @@ struct perf_event { u64 total_time_running; u64 tstamp; - /* - * timestamp shadows the actual context timing but it can - * be safely used in NMI interrupt context. It reflects the - * context time as it was when the event was last scheduled in. - * - * ctx_time already accounts for ctx->timestamp. Therefore to - * compute ctx_time for a sample, simply add perf_clock(). - */ - u64 shadow_ctx_time; - struct perf_event_attr attr; u16 header_size; u16 id_header_size; @@ -833,6 +822,7 @@ struct perf_event_context { */ u64 time; u64 timestamp; + u64 timeoffset; /* * These fields let us detect when two contexts have both @@ -915,6 +905,8 @@ struct bpf_perf_event_data_kern { struct perf_cgroup_info { u64 time; u64 timestamp; + u64 timeoffset; + int active; }; struct perf_cgroup { @@ -1234,7 +1226,18 @@ extern void perf_event_bpf_event(struct bpf_prog *prog, enum perf_bpf_event_type type, u16 flags); -extern struct perf_guest_info_callbacks *perf_guest_cbs; +extern struct perf_guest_info_callbacks __rcu *perf_guest_cbs; +static inline struct perf_guest_info_callbacks *perf_get_guest_cbs(void) +{ + /* + * Callbacks are RCU-protected and must be READ_ONCE to avoid reloading + * the callbacks between a !NULL check and dereferences, to ensure + * pending stores/changes to the callback pointers are visible before a + * non-NULL perf_guest_cbs is visible to readers, and to prevent a + * module from unloading callbacks while readers are active. + */ + return rcu_dereference(perf_guest_cbs); +} extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks); diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 039ce2de83336c0079b021ca78f3be2504d90ad1..ab7287e59c070629347e29fff71bc5f0a4534124 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -44,6 +44,7 @@ static inline unsigned long pte_index(unsigned long address) { return (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1); } +#define pte_index pte_index #ifndef pmd_index static inline unsigned long pmd_index(unsigned long address) @@ -193,7 +194,7 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, #endif #ifndef __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG -#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) @@ -214,7 +215,7 @@ static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, BUILD_BUG(); return 0; } -#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG */ #endif #ifndef __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH @@ -240,6 +241,19 @@ static inline int pmdp_clear_flush_young(struct vm_area_struct *vma, #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif +#ifndef arch_has_hw_pte_young +/* + * Return whether the accessed bit is supported on the local CPU. + * + * This stub assumes accessing through an old PTE triggers a page fault. + * Architectures that automatically set the access bit should overwrite it. + */ +static inline bool arch_has_hw_pte_young(void) +{ + return false; +} +#endif + #ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long address, diff --git a/include/linux/platform_data/ti-sysc.h b/include/linux/platform_data/ti-sysc.h index 9837fb011f2fb0ce9ffc43cbfcac8392f7f22f7f..989aa30c598dc80a304e944d45a8e730e01c297e 100644 --- a/include/linux/platform_data/ti-sysc.h +++ b/include/linux/platform_data/ti-sysc.h @@ -50,6 +50,7 @@ struct sysc_regbits { s8 emufree_shift; }; +#define SYSC_QUIRK_REINIT_ON_CTX_LOST BIT(28) #define SYSC_QUIRK_REINIT_ON_RESUME BIT(27) #define SYSC_QUIRK_GPMC_DEBUG BIT(26) #define SYSC_MODULE_QUIRK_ENA_RESETDONE BIT(25) diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 6c08a085367bf790fe0eecf500c6b3b5f7ca8c9e..30091ab5de287c25b6b3d6653fa2e68c39a91896 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -58,6 +58,7 @@ extern void pm_runtime_get_suppliers(struct device *dev); extern void pm_runtime_put_suppliers(struct device *dev); extern void pm_runtime_new_link(struct device *dev); extern void pm_runtime_drop_link(struct device_link *link); +extern void pm_runtime_release_supplier(struct device_link *link, bool check_idle); /** * pm_runtime_get_if_in_use - Conditionally bump up runtime PM usage counter. @@ -127,7 +128,7 @@ static inline bool pm_runtime_suspended(struct device *dev) * pm_runtime_active - Check whether or not a device is runtime-active. * @dev: Target device. * - * Return %true if runtime PM is enabled for @dev and its runtime PM status is + * Return %true if runtime PM is disabled for @dev or its runtime PM status is * %RPM_ACTIVE, or %false otherwise. * * Note that the return value of this function can only be trusted if it is @@ -279,6 +280,8 @@ static inline void pm_runtime_get_suppliers(struct device *dev) {} static inline void pm_runtime_put_suppliers(struct device *dev) {} static inline void pm_runtime_new_link(struct device *dev) {} static inline void pm_runtime_drop_link(struct device_link *link) {} +static inline void pm_runtime_release_supplier(struct device_link *link, + bool check_idle) {} #endif /* !CONFIG_PM */ diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 896c16d2c5fb2aa74aaca5a2738c7899be1905ea..913aa60228b16c68e56edf25a89b7adb6fe49974 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -177,8 +177,10 @@ static inline void posix_cputimers_group_init(struct posix_cputimers *pct, #endif #ifdef CONFIG_POSIX_CPU_TIMERS_TASK_WORK +void clear_posix_cputimers_work(struct task_struct *p); void posix_cputimers_init_work(void); #else +static inline void clear_posix_cputimers_work(struct task_struct *p) { } static inline void posix_cputimers_init_work(void) { } #endif diff --git a/include/linux/psi.h b/include/linux/psi.h index 7361023f3fdd50b52083797aab2e5b33461154bc..db4ecfaab879265168229a2f732e08adfcf7a3fd 100644 --- a/include/linux/psi.h +++ b/include/linux/psi.h @@ -33,7 +33,7 @@ void cgroup_move_task(struct task_struct *p, struct css_set *to); struct psi_trigger *psi_trigger_create(struct psi_group *group, char *buf, size_t nbytes, enum psi_res res); -void psi_trigger_replace(void **trigger_ptr, struct psi_trigger *t); +void psi_trigger_destroy(struct psi_trigger *t); __poll_t psi_trigger_poll(void **trigger_ptr, struct file *file, poll_table *wait); diff --git a/include/linux/psi_types.h b/include/linux/psi_types.h index 4b73f0f19ef5a2085cf5babc81168d537b514bff..541b45881a5a6be3326945b00c6815e7f26155e5 100644 --- a/include/linux/psi_types.h +++ b/include/linux/psi_types.h @@ -128,9 +128,6 @@ struct psi_trigger { * events to one per window */ u64 last_event_time; - - /* Refcounting to prevent premature destruction */ - struct kref refcount; }; struct psi_group { diff --git a/include/linux/rpmsg.h b/include/linux/rpmsg.h index 9cc3e79ba115ea5cedf963ebd336da2779344d4c..7cbb0fbf7d8f376fda3f1e61f1dc6cbc66ecb497 100644 --- a/include/linux/rpmsg.h +++ b/include/linux/rpmsg.h @@ -187,7 +187,7 @@ static inline struct rpmsg_endpoint *rpmsg_create_ept(struct rpmsg_device *rpdev /* This shouldn't be possible */ WARN_ON(1); - return ERR_PTR(-ENXIO); + return NULL; } static inline int rpmsg_send(struct rpmsg_endpoint *ept, void *data, int len) diff --git a/include/linux/sched.h b/include/linux/sched.h index d7c16fa9c858ecfb80918097eb8812877ef1ee3b..df2259962ba1e2a8784dd6ddcd06c41f9a53b2ce 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -835,6 +835,10 @@ struct task_struct { #ifdef CONFIG_MEMCG unsigned in_user_fault:1; #endif +#ifdef CONFIG_LRU_GEN + /* whether the LRU algorithm may apply to this access */ + unsigned in_lru_fault:1; +#endif #ifdef CONFIG_COMPAT_BRK unsigned brk_randomized:1; #endif @@ -1576,7 +1580,6 @@ extern struct pid *cad_pid; #define PF_MEMALLOC 0x00000800 /* Allocating memory */ #define PF_NPROC_EXCEEDED 0x00001000 /* set_user() noticed that RLIMIT_NPROC was exceeded */ #define PF_USED_MATH 0x00002000 /* If unset the fpu must be initialized before use */ -#define PF_USED_ASYNC 0x00004000 /* Used async_schedule*(), used by module init */ #define PF_NOFREEZE 0x00008000 /* This thread should not be frozen */ #define PF_FROZEN 0x00010000 /* Frozen for system suspend */ #define PF_KSWAPD 0x00020000 /* I am kswapd */ @@ -1621,7 +1624,7 @@ extern struct pid *cad_pid; #define tsk_used_math(p) ((p)->flags & PF_USED_MATH) #define used_math() tsk_used_math(current) -static inline bool is_percpu_thread(void) +static __always_inline bool is_percpu_thread(void) { #ifdef CONFIG_SMP return (current->flags & PF_NO_SETAFFINITY) && diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index 85fb2f34c59b771e6cbc321c82932b3667ff46a1..fa75f325dad5328784acfc902b3b320754c15a87 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -55,7 +55,8 @@ extern asmlinkage void schedule_tail(struct task_struct *prev); extern void init_idle(struct task_struct *idle, int cpu); extern int sched_fork(unsigned long clone_flags, struct task_struct *p); -extern void sched_post_fork(struct task_struct *p); +extern void sched_post_fork(struct task_struct *p, + struct kernel_clone_args *kargs); extern void sched_dead(struct task_struct *p); void __noreturn do_task_dead(void); @@ -157,7 +158,7 @@ static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t) * Protects ->fs, ->files, ->mm, ->group_info, ->comm, keyring * subscriptions and synchronises with wait4(). Also used in procfs. Also * pins the final release of task.io_context. Also protects ->cpuset and - * ->cgroup.subsys[]. And ->vfork_done. + * ->cgroup.subsys[]. And ->vfork_done. And ->sysvshm.shm_clist. * * Nests both inside and outside of read_lock(&tasklist_lock). * It must not be nested with write_lock_irq(&tasklist_lock), diff --git a/include/linux/sched/task_stack.h b/include/linux/sched/task_stack.h index 2413427e439c7a4cde1eb8d786837c94f6cefe4b..d10150587d8192f28250dacaa6c08195c8a0333f 100644 --- a/include/linux/sched/task_stack.h +++ b/include/linux/sched/task_stack.h @@ -25,7 +25,11 @@ static inline void *task_stack_page(const struct task_struct *task) static inline unsigned long *end_of_stack(const struct task_struct *task) { +#ifdef CONFIG_STACK_GROWSUP + return (unsigned long *)((unsigned long)task->stack + THREAD_SIZE) - 1; +#else return task->stack; +#endif } #elif !defined(__HAVE_THREAD_FUNCTIONS) diff --git a/include/linux/security.h b/include/linux/security.h index 48c6ed9ba086795395f8f7e17f2ed8bb87ca0f61..a87cbacab0786466b83cff42721178f7677b9c7f 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -252,13 +252,13 @@ extern int security_init(void); extern int early_security_init(void); /* Security operations */ -int security_binder_set_context_mgr(struct task_struct *mgr); -int security_binder_transaction(struct task_struct *from, - struct task_struct *to); -int security_binder_transfer_binder(struct task_struct *from, - struct task_struct *to); -int security_binder_transfer_file(struct task_struct *from, - struct task_struct *to, struct file *file); +int security_binder_set_context_mgr(const struct cred *mgr); +int security_binder_transaction(const struct cred *from, + const struct cred *to); +int security_binder_transfer_binder(const struct cred *from, + const struct cred *to); +int security_binder_transfer_file(const struct cred *from, + const struct cred *to, struct file *file); int security_ptrace_access_check(struct task_struct *child, unsigned int mode); int security_ptrace_traceme(struct task_struct *parent); int security_capget(struct task_struct *target, @@ -494,25 +494,25 @@ static inline int early_security_init(void) return 0; } -static inline int security_binder_set_context_mgr(struct task_struct *mgr) +static inline int security_binder_set_context_mgr(const struct cred *mgr) { return 0; } -static inline int security_binder_transaction(struct task_struct *from, - struct task_struct *to) +static inline int security_binder_transaction(const struct cred *from, + const struct cred *to) { return 0; } -static inline int security_binder_transfer_binder(struct task_struct *from, - struct task_struct *to) +static inline int security_binder_transfer_binder(const struct cred *from, + const struct cred *to) { return 0; } -static inline int security_binder_transfer_file(struct task_struct *from, - struct task_struct *to, +static inline int security_binder_transfer_file(const struct cred *from, + const struct cred *to, struct file *file) { return 0; @@ -1011,6 +1011,11 @@ static inline void security_transfer_creds(struct cred *new, { } +static inline void security_cred_getsecid(const struct cred *c, u32 *secid) +{ + *secid = 0; +} + static inline int security_kernel_act_as(struct cred *cred, u32 secid) { return 0; diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index b83b3ae3c877f3b4d6e7989681db89ab645e586c..662a8cfa1bcd3e58b777e1428c89618ff0c49ede 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -182,7 +182,7 @@ static const struct file_operations __name ## _fops = { \ #define DEFINE_PROC_SHOW_ATTRIBUTE(__name) \ static int __name ## _open(struct inode *inode, struct file *file) \ { \ - return single_open(file, __name ## _show, inode->i_private); \ + return single_open(file, __name ## _show, PDE_DATA(inode)); \ } \ \ static const struct proc_ops __name ## _proc_ops = { \ diff --git a/include/linux/siphash.h b/include/linux/siphash.h index bf21591a9e5e653585c26cb3f3f0857256c0eb89..0cda61855d90719e6175d0325598bbbe1b79254b 100644 --- a/include/linux/siphash.h +++ b/include/linux/siphash.h @@ -27,9 +27,7 @@ static inline bool siphash_key_is_zero(const siphash_key_t *key) } u64 __siphash_aligned(const void *data, size_t len, const siphash_key_t *key); -#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS u64 __siphash_unaligned(const void *data, size_t len, const siphash_key_t *key); -#endif u64 siphash_1u64(const u64 a, const siphash_key_t *key); u64 siphash_2u64(const u64 a, const u64 b, const siphash_key_t *key); @@ -82,10 +80,9 @@ static inline u64 ___siphash_aligned(const __le64 *data, size_t len, static inline u64 siphash(const void *data, size_t len, const siphash_key_t *key) { -#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS - if (!IS_ALIGNED((unsigned long)data, SIPHASH_ALIGNMENT)) + if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) || + !IS_ALIGNED((unsigned long)data, SIPHASH_ALIGNMENT)) return __siphash_unaligned(data, len, key); -#endif return ___siphash_aligned(data, len, key); } @@ -96,10 +93,8 @@ typedef struct { u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key); -#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS u32 __hsiphash_unaligned(const void *data, size_t len, const hsiphash_key_t *key); -#endif u32 hsiphash_1u32(const u32 a, const hsiphash_key_t *key); u32 hsiphash_2u32(const u32 a, const u32 b, const hsiphash_key_t *key); @@ -135,10 +130,9 @@ static inline u32 ___hsiphash_aligned(const __le32 *data, size_t len, static inline u32 hsiphash(const void *data, size_t len, const hsiphash_key_t *key) { -#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS - if (!IS_ALIGNED((unsigned long)data, HSIPHASH_ALIGNMENT)) + if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) || + !IS_ALIGNED((unsigned long)data, HSIPHASH_ALIGNMENT)) return __hsiphash_unaligned(data, len, key); -#endif return ___hsiphash_aligned(data, len, key); } diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f84c1505bdb8a0a2fac2af3194ee4a24e9e6a1a6..3a02503b3637a862576c6f8c3626e30ca13a5089 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1646,6 +1646,22 @@ static inline int skb_unclone(struct sk_buff *skb, gfp_t pri) return 0; } +/* This variant of skb_unclone() makes sure skb->truesize is not changed */ +static inline int skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri) +{ + might_sleep_if(gfpflags_allow_blocking(pri)); + + if (skb_cloned(skb)) { + unsigned int save = skb->truesize; + int res; + + res = pskb_expand_head(skb, 0, 0, pri); + skb->truesize = save; + return res; + } + return 0; +} + /** * skb_header_cloned - is the header a clone * @skb: buffer to check diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h index 6bb4bc1a5f5459fc06b90548558a4745d0973b05..b2f7e7c6ba54d57c4953da0f03a2dc7d8c2c852f 100644 --- a/include/linux/stackdepot.h +++ b/include/linux/stackdepot.h @@ -11,8 +11,14 @@ #ifndef _LINUX_STACKDEPOT_H #define _LINUX_STACKDEPOT_H +#include + typedef u32 depot_stack_handle_t; +depot_stack_handle_t __stack_depot_save(unsigned long *entries, + unsigned int nr_entries, + gfp_t gfp_flags, bool can_alloc); + depot_stack_handle_t stack_depot_save(unsigned long *entries, unsigned int nr_entries, gfp_t gfp_flags); diff --git a/include/linux/suspend.h b/include/linux/suspend.h index fa2ea36a7bd7e5683b1775e271550a2d92b4a06b..27321d5d01dfa560af8a4a0e1bde8628f84b1846 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -437,15 +437,7 @@ struct platform_hibernation_ops { #ifdef CONFIG_HIBERNATION /* kernel/power/snapshot.c */ -extern void __register_nosave_region(unsigned long b, unsigned long e, int km); -static inline void __init register_nosave_region(unsigned long b, unsigned long e) -{ - __register_nosave_region(b, e, 0); -} -static inline void __init register_nosave_region_late(unsigned long b, unsigned long e) -{ - __register_nosave_region(b, e, 1); -} +extern void register_nosave_region(unsigned long b, unsigned long e); extern int swsusp_page_is_forbidden(struct page *); extern void swsusp_set_page_free(struct page *); extern void swsusp_unset_page_free(struct page *); @@ -464,7 +456,6 @@ int pfn_is_nosave(unsigned long pfn); int hibernate_quiet_exec(int (*func)(void *data), void *data); #else /* CONFIG_HIBERNATION */ static inline void register_nosave_region(unsigned long b, unsigned long e) {} -static inline void register_nosave_region_late(unsigned long b, unsigned long e) {} static inline int swsusp_page_is_forbidden(struct page *p) { return 0; } static inline void swsusp_set_page_free(struct page *p) {} static inline void swsusp_unset_page_free(struct page *p) {} @@ -512,14 +503,14 @@ extern void ksys_sync_helper(void); /* drivers/base/power/wakeup.c */ extern bool events_check_enabled; -extern unsigned int pm_wakeup_irq; extern suspend_state_t pm_suspend_target_state; extern bool pm_wakeup_pending(void); extern void pm_system_wakeup(void); extern void pm_system_cancel_wakeup(void); -extern void pm_wakeup_clear(bool reset); +extern void pm_wakeup_clear(unsigned int irq_number); extern void pm_system_irq_wakeup(unsigned int irq_number); +extern unsigned int pm_wakeup_irq(void); extern bool pm_get_wakeup_count(unsigned int *count, bool block); extern bool pm_save_wakeup_count(unsigned int count); extern void pm_wakep_autosleep_enabled(bool set); diff --git a/include/linux/swap.h b/include/linux/swap.h index beda0a50d0b91961acf510d12cab230dfd0cbce6..dfdf026dc54d11e2fea39b32efcdc95f90c168e7 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -130,6 +130,10 @@ union swap_header { */ struct reclaim_state { unsigned long reclaimed_slab; +#ifdef CONFIG_LRU_GEN + /* per-thread mm walk data */ + struct lru_gen_mm_walk *mm_walk; +#endif }; #ifdef __KERNEL__ @@ -281,6 +285,7 @@ struct swap_info_struct { */ struct work_struct discard_work; /* discard worker */ struct swap_cluster_list discard_clusters; /* discard clusters list */ + ANDROID_VENDOR_DATA(1); struct plist_node avail_lists[]; /* * entries in swap_avail_heads, one * entry per node. @@ -350,6 +355,7 @@ extern void lru_add_drain_cpu(int cpu); extern void lru_add_drain_cpu_zone(struct zone *zone); extern void lru_add_drain_all(void); extern void rotate_reclaimable_page(struct page *page); +extern void activate_page(struct page *page); extern void deactivate_file_page(struct page *page); extern void deactivate_page(struct page *page); extern void mark_page_lazyfree(struct page *page); diff --git a/include/linux/swap_slots.h b/include/linux/swap_slots.h index 347f1a30419059a577157e3a2fd68979db294dfd..c4dc91ebbaa8771a997b15b93cc42f5f68bb680b 100644 --- a/include/linux/swap_slots.h +++ b/include/linux/swap_slots.h @@ -19,6 +19,7 @@ struct swap_slots_cache { spinlock_t free_lock; /* protects slots_ret, n_ret */ swp_entry_t *slots_ret; int n_ret; + ANDROID_VENDOR_DATA(1); }; void disable_swap_slots_cache_lock(void); diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index aea0ce9f3b745ad8f8bfeed1b74ad9c1307f1c28..112ff24ea927b3b8b488d5a58bb70f634496d183 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -905,6 +905,7 @@ asmlinkage long sys_mincore(unsigned long start, size_t len, asmlinkage long sys_madvise(unsigned long start, size_t len, int behavior); asmlinkage long sys_process_madvise(int pidfd, const struct iovec __user *vec, size_t vlen, int behavior, unsigned int flags); +asmlinkage long sys_process_mrelease(int pidfd, unsigned int flags); asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size, unsigned long prot, unsigned long pgoff, unsigned long flags); diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h index 459e9a76d7e6ff39b37e7bf50353140ad8ef3742..18a9949bba187e7979a4d409e0485ae3d149f7ca 100644 --- a/include/linux/tee_drv.h +++ b/include/linux/tee_drv.h @@ -195,7 +195,7 @@ int tee_session_calc_client_uuid(uuid_t *uuid, u32 connection_method, * @offset: offset of buffer in user space * @pages: locked pages from userspace * @num_pages: number of locked pages - * @dmabuf: dmabuf used to for exporting to user space + * @refcount: reference counter * @flags: defined by TEE_SHM_* in tee_drv.h * @id: unique id of a shared memory object on this device * @@ -210,7 +210,7 @@ struct tee_shm { unsigned int offset; struct page **pages; size_t num_pages; - struct dma_buf *dmabuf; + refcount_t refcount; u32 flags; int id; }; @@ -582,4 +582,18 @@ struct tee_client_driver { #define to_tee_client_driver(d) \ container_of(d, struct tee_client_driver, driver) +/** + * teedev_open() - Open a struct tee_device + * @teedev: Device to open + * + * @return a pointer to struct tee_context on success or an ERR_PTR on failure. + */ +struct tee_context *teedev_open(struct tee_device *teedev); + +/** + * teedev_close_context() - closes a struct tee_context + * @ctx: The struct tee_context to close + */ +void teedev_close_context(struct tee_context *ctx); + #endif /*__TEE_DRV_H*/ diff --git a/include/linux/topology.h b/include/linux/topology.h index ad03df1cc2667d090361ca21aa12d827314884f1..7634cd737061c85c567e7254ca437f0787750b60 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -48,6 +48,7 @@ int arch_update_cpu_topology(void); /* Conform to ACPI 2.0 SLIT distance definitions */ #define LOCAL_DISTANCE 10 #define REMOTE_DISTANCE 20 +#define DISTANCE_BITS 8 #ifndef node_distance #define node_distance(from,to) ((from) == (to) ? LOCAL_DISTANCE : REMOTE_DISTANCE) #endif diff --git a/include/linux/tpm.h b/include/linux/tpm.h index 804a3f69bbd9322d5fd5a0e7b68371f2bf6bd59c..95c3069823f9be2327586a792cfac729bc63415a 100644 --- a/include/linux/tpm.h +++ b/include/linux/tpm.h @@ -262,6 +262,7 @@ enum tpm2_cc_attrs { #define TPM_VID_INTEL 0x8086 #define TPM_VID_WINBOND 0x1050 #define TPM_VID_STM 0x104A +#define TPM_VID_ATML 0x1114 enum tpm_chip_flags { TPM_CHIP_FLAG_TPM2 = BIT(1), diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index d321fe5ad1a1437543b40c4a46a83b4bf07205d9..c57b79301a75e338ea612129bcb021578638c192 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -571,7 +571,7 @@ struct trace_event_file { #define PERF_MAX_TRACE_SIZE 2048 -#define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */ +#define MAX_FILTER_STR_VAL 256U /* Should handle KSYM_SYMBOL_LEN */ enum event_trigger_type { ETT_NONE = (0), diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h index d14b39d1418e454186fbb07b45d382af3a37f9aa..2ffafbdcda070ea493549c01f549be40da495f08 100644 --- a/include/linux/usb/hcd.h +++ b/include/linux/usb/hcd.h @@ -125,7 +125,6 @@ struct usb_hcd { #define HCD_FLAG_RH_RUNNING 5 /* root hub is running? */ #define HCD_FLAG_DEAD 6 /* controller has died? */ #define HCD_FLAG_INTF_AUTHORIZED 7 /* authorize interfaces? */ -#define HCD_FLAG_DEFER_RH_REGISTER 8 /* Defer roothub registration */ /* The flags can be tested using these macros; they are likely to * be slightly faster than test_bit(). @@ -136,7 +135,6 @@ struct usb_hcd { #define HCD_WAKEUP_PENDING(hcd) ((hcd)->flags & (1U << HCD_FLAG_WAKEUP_PENDING)) #define HCD_RH_RUNNING(hcd) ((hcd)->flags & (1U << HCD_FLAG_RH_RUNNING)) #define HCD_DEAD(hcd) ((hcd)->flags & (1U << HCD_FLAG_DEAD)) -#define HCD_DEFER_RH_REGISTER(hcd) ((hcd)->flags & (1U << HCD_FLAG_DEFER_RH_REGISTER)) /* * Specifies if interfaces are authorized by default diff --git a/include/linux/usb/role.h b/include/linux/usb/role.h index 0164fed31b06cc8f5f24c8dab3651961c89528f9..b9ccaeb8a4aef14e08e092a4da405c4f22764c67 100644 --- a/include/linux/usb/role.h +++ b/include/linux/usb/role.h @@ -90,6 +90,12 @@ fwnode_usb_role_switch_get(struct fwnode_handle *node) static inline void usb_role_switch_put(struct usb_role_switch *sw) { } +static inline struct usb_role_switch * +usb_role_switch_find_by_fwnode(const struct fwnode_handle *fwnode) +{ + return NULL; +} + static inline struct usb_role_switch * usb_role_switch_register(struct device *parent, const struct usb_role_switch_desc *desc) diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 8ecc2e208d6137715fb1455f4b5af1e79f9d9f7e..90c5ad556809721a37fe6512bc7265fb9e251e18 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -135,7 +135,6 @@ void virtio_break_device(struct virtio_device *dev); void virtio_config_changed(struct virtio_device *dev); void virtio_config_disable(struct virtio_device *dev); void virtio_config_enable(struct virtio_device *dev); -int virtio_finalize_features(struct virtio_device *dev); #ifdef CONFIG_PM_SLEEP int virtio_device_freeze(struct virtio_device *dev); int virtio_device_restore(struct virtio_device *dev); diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 8519b3ae5d52ef4d78763109ab7b419721289880..dafdc7f48c01b08a84574543af5ff9613b6ab57c 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -23,6 +23,8 @@ struct virtio_shm_region { * any of @get/@set, @get_status/@set_status, or @get_features/ * @finalize_features are NOT safe to be called from an atomic * context. + * @enable_cbs: enable the callbacks + * vdev: the virtio_device * @get: read the value of a configuration field * vdev: the virtio_device * offset: the offset of the configuration field @@ -62,8 +64,9 @@ struct virtio_shm_region { * Returns the first 64 feature bits (all we currently need). * @finalize_features: confirm what device features we'll be using. * vdev: the virtio_device - * This gives the final feature bits for the device: it can change + * This sends the driver feature bits to the device: it can change * the dev->feature bits if it wants. + * Note: despite the name this can be called any number of times. * Returns 0 on success or error status * @bus_name: return the bus name associated with the device (optional) * vdev: the virtio_device @@ -75,6 +78,7 @@ struct virtio_shm_region { */ typedef void vq_callback_t(struct virtqueue *); struct virtio_config_ops { + void (*enable_cbs)(struct virtio_device *vdev); void (*get)(struct virtio_device *vdev, unsigned offset, void *buf, unsigned len); void (*set)(struct virtio_device *vdev, unsigned offset, @@ -229,6 +233,9 @@ void virtio_device_ready(struct virtio_device *dev) { unsigned status = dev->config->get_status(dev); + if (dev->config->enable_cbs) + dev->config->enable_cbs(dev); + BUG_ON(status & VIRTIO_CONFIG_S_DRIVER_OK); dev->config->set_status(dev, status | VIRTIO_CONFIG_S_DRIVER_OK); } diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index b465f8f3e554f27ced45c35f54f113cf6dce1f07..a960de68ac69ee12939a95729e36513312e41a68 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -7,9 +7,27 @@ #include #include +static inline bool virtio_net_hdr_match_proto(__be16 protocol, __u8 gso_type) +{ + switch (gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { + case VIRTIO_NET_HDR_GSO_TCPV4: + return protocol == cpu_to_be16(ETH_P_IP); + case VIRTIO_NET_HDR_GSO_TCPV6: + return protocol == cpu_to_be16(ETH_P_IPV6); + case VIRTIO_NET_HDR_GSO_UDP: + return protocol == cpu_to_be16(ETH_P_IP) || + protocol == cpu_to_be16(ETH_P_IPV6); + default: + return false; + } +} + static inline int virtio_net_hdr_set_proto(struct sk_buff *skb, const struct virtio_net_hdr *hdr) { + if (skb->protocol) + return 0; + switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { case VIRTIO_NET_HDR_GSO_TCPV4: case VIRTIO_NET_HDR_GSO_UDP: @@ -88,9 +106,12 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, if (!skb->protocol) { __be16 protocol = dev_parse_header_protocol(skb); - virtio_net_hdr_set_proto(skb, hdr); - if (protocol && protocol != skb->protocol) + if (!protocol) + virtio_net_hdr_set_proto(skb, hdr); + else if (!virtio_net_hdr_match_proto(protocol, hdr->gso_type)) return -EINVAL; + else + skb->protocol = protocol; } retry: if (!skb_flow_dissect_flow_keys_basic(NULL, skb, &keys, @@ -120,10 +141,15 @@ retry: if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { u16 gso_size = __virtio16_to_cpu(little_endian, hdr->gso_size); + unsigned int nh_off = p_off; struct skb_shared_info *shinfo = skb_shinfo(skb); + /* UFO may not include transport header in gso_size. */ + if (gso_type & SKB_GSO_UDP) + nh_off -= thlen; + /* Too small packets are not really GSO ones. */ - if (skb->len - p_off > gso_size) { + if (skb->len - nh_off > gso_size) { shinfo->gso_size = gso_size; shinfo->gso_type = gso_type; diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 0549ca17ba6f7eef40367b94c32b7c8d35945237..d074a14e753c38694edc766ab79b21c70d3c7222 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -28,16 +28,12 @@ struct notifier_block; /* in notifier.h */ #define VM_FLUSH_RESET_PERMS 0x00000100 /* reset direct map and flush TLB on unmap, can't be freed in atomic context */ #define VM_MAP_PUT_PAGES 0x00000200 /* put pages and free array in vfree */ -/* - * VM_KASAN is used slighly differently depending on CONFIG_KASAN_VMALLOC. - * - * If IS_ENABLED(CONFIG_KASAN_VMALLOC), VM_KASAN is set on a vm_struct after - * shadow memory has been mapped. It's used to handle allocation errors so that - * we don't try to poision shadow on free if it was never allocated. - * - * Otherwise, VM_KASAN is set for kasan_module_alloc() allocations and used to - * determine which allocations need the module shadow freed. - */ +#if (defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)) && \ + !defined(CONFIG_KASAN_VMALLOC) +#define VM_DEFER_KMEMLEAK 0x00000800 /* defer kmemleak object creation */ +#else +#define VM_DEFER_KMEMLEAK 0 +#endif /* bits [20..32] reserved for arch specific ioremap internals */ @@ -82,6 +78,13 @@ struct vmap_area { }; }; +#ifndef arch_vmap_pgprot_tagged +static inline pgprot_t arch_vmap_pgprot_tagged(pgprot_t prot) +{ + return prot; +} +#endif + /* * Highlevel APIs for driver use */ diff --git a/include/linux/wait.h b/include/linux/wait.h index f8b0704968a1eda2f90618efa5d53f4a00b60c01..5dfae54f88fde27e1b45d88bf317af3843f34c5b 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -207,6 +207,7 @@ void __wake_up_sync_key(struct wait_queue_head *wq_head, unsigned int mode, void void __wake_up_locked_sync_key(struct wait_queue_head *wq_head, unsigned int mode, void *key); void __wake_up_locked(struct wait_queue_head *wq_head, unsigned int mode, int nr); void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode); +void __wake_up_pollfree(struct wait_queue_head *wq_head); #define wake_up(x) __wake_up(x, TASK_NORMAL, 1, NULL) #define wake_up_nr(x, nr) __wake_up(x, TASK_NORMAL, nr, NULL) @@ -218,6 +219,7 @@ void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode); #define wake_up_interruptible_nr(x, nr) __wake_up(x, TASK_INTERRUPTIBLE, nr, NULL) #define wake_up_interruptible_all(x) __wake_up(x, TASK_INTERRUPTIBLE, 0, NULL) #define wake_up_interruptible_sync(x) __wake_up_sync((x), TASK_INTERRUPTIBLE) +#define wake_up_sync(x) __wake_up_sync((x), TASK_NORMAL) /* * Wakeup macros to be used to report events to the targets. @@ -235,6 +237,31 @@ void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode); #define wake_up_interruptible_sync_poll_locked(x, m) \ __wake_up_locked_sync_key((x), TASK_INTERRUPTIBLE, poll_to_key(m)) +/** + * wake_up_pollfree - signal that a polled waitqueue is going away + * @wq_head: the wait queue head + * + * In the very rare cases where a ->poll() implementation uses a waitqueue whose + * lifetime is tied to a task rather than to the 'struct file' being polled, + * this function must be called before the waitqueue is freed so that + * non-blocking polls (e.g. epoll) are notified that the queue is going away. + * + * The caller must also RCU-delay the freeing of the wait_queue_head, e.g. via + * an explicit synchronize_rcu() or call_rcu(), or via SLAB_TYPESAFE_BY_RCU. + */ +static inline void wake_up_pollfree(struct wait_queue_head *wq_head) +{ + /* + * For performance reasons, we don't always take the queue lock here. + * Therefore, we might race with someone removing the last entry from + * the queue, and proceed while they still hold the queue lock. + * However, rcu_read_lock() is required to be held in such cases, so we + * can safely proceed with an RCU-delayed free. + */ + if (waitqueue_active(wq_head)) + __wake_up_pollfree(wq_head); +} + #define ___wait_cond_timeout(condition) \ ({ \ bool __cond = (condition); \ diff --git a/include/linux/watch_queue.h b/include/linux/watch_queue.h index c994d1b2cdbaa2abb313170749d0ee396a807a90..3b9a40ae8bdba76dec83554989fa67f98cb59aba 100644 --- a/include/linux/watch_queue.h +++ b/include/linux/watch_queue.h @@ -28,7 +28,8 @@ struct watch_type_filter { struct watch_filter { union { struct rcu_head rcu; - unsigned long type_filter[2]; /* Bitmask of accepted types */ + /* Bitmask of accepted types */ + DECLARE_BITMAP(type_filter, WATCH_TYPE__NR); }; u32 nr_filters; /* Number of filters */ struct watch_type_filter filters[]; diff --git a/include/linux/xz.h b/include/linux/xz.h index 9884c8440188e5fa83c741a4fcd01024c0217f1a..7285ca5d56e9de8d48bdfad1aa29cb1f1d8e0f61 100644 --- a/include/linux/xz.h +++ b/include/linux/xz.h @@ -233,6 +233,112 @@ XZ_EXTERN void xz_dec_reset(struct xz_dec *s); */ XZ_EXTERN void xz_dec_end(struct xz_dec *s); +/* + * Decompressor for MicroLZMA, an LZMA variant with a very minimal header. + * See xz_dec_microlzma_alloc() below for details. + * + * These functions aren't used or available in preboot code and thus aren't + * marked with XZ_EXTERN. This avoids warnings about static functions that + * are never defined. + */ +/** + * struct xz_dec_microlzma - Opaque type to hold the MicroLZMA decoder state + */ +struct xz_dec_microlzma; + +/** + * xz_dec_microlzma_alloc() - Allocate memory for the MicroLZMA decoder + * @mode XZ_SINGLE or XZ_PREALLOC + * @dict_size LZMA dictionary size. This must be at least 4 KiB and + * at most 3 GiB. + * + * In contrast to xz_dec_init(), this function only allocates the memory + * and remembers the dictionary size. xz_dec_microlzma_reset() must be used + * before calling xz_dec_microlzma_run(). + * + * The amount of allocated memory is a little less than 30 KiB with XZ_SINGLE. + * With XZ_PREALLOC also a dictionary buffer of dict_size bytes is allocated. + * + * On success, xz_dec_microlzma_alloc() returns a pointer to + * struct xz_dec_microlzma. If memory allocation fails or + * dict_size is invalid, NULL is returned. + * + * The compressed format supported by this decoder is a raw LZMA stream + * whose first byte (always 0x00) has been replaced with bitwise-negation + * of the LZMA properties (lc/lp/pb) byte. For example, if lc/lp/pb is + * 3/0/2, the first byte is 0xA2. This way the first byte can never be 0x00. + * Just like with LZMA2, lc + lp <= 4 must be true. The LZMA end-of-stream + * marker must not be used. The unused values are reserved for future use. + * This MicroLZMA header format was created for use in EROFS but may be used + * by others too. + */ +extern struct xz_dec_microlzma *xz_dec_microlzma_alloc(enum xz_mode mode, + uint32_t dict_size); + +/** + * xz_dec_microlzma_reset() - Reset the MicroLZMA decoder state + * @s Decoder state allocated using xz_dec_microlzma_alloc() + * @comp_size Compressed size of the input stream + * @uncomp_size Uncompressed size of the input stream. A value smaller + * than the real uncompressed size of the input stream can + * be specified if uncomp_size_is_exact is set to false. + * uncomp_size can never be set to a value larger than the + * expected real uncompressed size because it would eventually + * result in XZ_DATA_ERROR. + * @uncomp_size_is_exact This is an int instead of bool to avoid + * requiring stdbool.h. This should normally be set to true. + * When this is set to false, error detection is weaker. + */ +extern void xz_dec_microlzma_reset(struct xz_dec_microlzma *s, + uint32_t comp_size, uint32_t uncomp_size, + int uncomp_size_is_exact); + +/** + * xz_dec_microlzma_run() - Run the MicroLZMA decoder + * @s Decoder state initialized using xz_dec_microlzma_reset() + * @b: Input and output buffers + * + * This works similarly to xz_dec_run() with a few important differences. + * Only the differences are documented here. + * + * The only possible return values are XZ_OK, XZ_STREAM_END, and + * XZ_DATA_ERROR. This function cannot return XZ_BUF_ERROR: if no progress + * is possible due to lack of input data or output space, this function will + * keep returning XZ_OK. Thus, the calling code must be written so that it + * will eventually provide input and output space matching (or exceeding) + * comp_size and uncomp_size arguments given to xz_dec_microlzma_reset(). + * If the caller cannot do this (for example, if the input file is truncated + * or otherwise corrupt), the caller must detect this error by itself to + * avoid an infinite loop. + * + * If the compressed data seems to be corrupt, XZ_DATA_ERROR is returned. + * This can happen also when incorrect dictionary, uncompressed, or + * compressed sizes have been specified. + * + * With XZ_PREALLOC only: As an extra feature, b->out may be NULL to skip over + * uncompressed data. This way the caller doesn't need to provide a temporary + * output buffer for the bytes that will be ignored. + * + * With XZ_SINGLE only: In contrast to xz_dec_run(), the return value XZ_OK + * is also possible and thus XZ_SINGLE is actually a limited multi-call mode. + * After XZ_OK the bytes decoded so far may be read from the output buffer. + * It is possible to continue decoding but the variables b->out and b->out_pos + * MUST NOT be changed by the caller. Increasing the value of b->out_size is + * allowed to make more output space available; one doesn't need to provide + * space for the whole uncompressed data on the first call. The input buffer + * may be changed normally like with XZ_PREALLOC. This way input data can be + * provided from non-contiguous memory. + */ +extern enum xz_ret xz_dec_microlzma_run(struct xz_dec_microlzma *s, + struct xz_buf *b); + +/** + * xz_dec_microlzma_end() - Free the memory allocated for the decoder state + * @s: Decoder state allocated using xz_dec_microlzma_alloc(). + * If s is NULL, this function does nothing. + */ +extern void xz_dec_microlzma_end(struct xz_dec_microlzma *s); + /* * Standalone build (userspace build or in-kernel build for boot time use) * needs a CRC32 implementation. For normal in-kernel use, kernel's own diff --git a/include/memory/renesas-rpc-if.h b/include/memory/renesas-rpc-if.h index 9ad136682c476b68246ba394a66b4875962d1bc6..aceb2c360d3fa1d1a317cd280291fe2505bb9205 100644 --- a/include/memory/renesas-rpc-if.h +++ b/include/memory/renesas-rpc-if.h @@ -58,6 +58,7 @@ struct rpcif_op { struct rpcif { struct device *dev; + void __iomem *base; void __iomem *dirmap; struct regmap *regmap; struct reset_control *rstc; diff --git a/include/net/addrconf.h b/include/net/addrconf.h index d8873a9d49fbd178f382ee1c5b21494fc0fb8053..dd2e03d39f386173e588795ef3562d765657e4e8 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -6,6 +6,8 @@ #define RTR_SOLICITATION_INTERVAL (4*HZ) #define RTR_SOLICITATION_MAX_INTERVAL (3600*HZ) /* 1 hour */ +#define MIN_VALID_LIFETIME (2*3600) /* 2 hours */ + #define TEMP_VALID_LIFETIME (7*86400) #define TEMP_PREFERRED_LIFETIME (86400) #define REGEN_MAX_RETRY (3) diff --git a/include/net/bond_3ad.h b/include/net/bond_3ad.h index c8696a230b7d922479b6e2d8368f65050d4e2cc8..1a28f299a4c6138ac3e932aa64a90d12b35a37ff 100644 --- a/include/net/bond_3ad.h +++ b/include/net/bond_3ad.h @@ -262,7 +262,7 @@ struct ad_system { struct ad_bond_info { struct ad_system system; /* 802.3ad system structure */ struct bond_3ad_stats stats; - u32 agg_select_timer; /* Timer to select aggregator after all adapter's hand shakes */ + atomic_t agg_select_timer; /* Timer to select aggregator after all adapter's hand shakes */ u16 aggregator_identifier; }; diff --git a/include/net/bond_alb.h b/include/net/bond_alb.h index f6af76c87a6c3856e92ac438c6c93c4c6ab9eec2..191c36afa1f4aa543f9b3b57d35630f0e71c2584 100644 --- a/include/net/bond_alb.h +++ b/include/net/bond_alb.h @@ -126,7 +126,7 @@ struct tlb_slave_info { struct alb_bond_info { struct tlb_client_info *tx_hashtbl; /* Dynamically allocated */ u32 unbalanced_load; - int tx_rebalance_counter; + atomic_t tx_rebalance_counter; int lp_counter; /* -------- rlb parameters -------- */ int rlb_enabled; diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 81d5b2486c047fd5c43cdb6963a6dc71a0dda237..bb86217d853e88ed33df49acde1b4a333f8bdcab 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -5259,7 +5259,6 @@ struct cfg80211_cqm_config; * netdev and may otherwise be used by driver read-only, will be update * by cfg80211 on change_interface * @mgmt_registrations: list of registrations for management frames - * @mgmt_registrations_lock: lock for the list * @mgmt_registrations_need_update: mgmt registrations were updated, * need to propagate the update to the driver * @mtx: mutex used to lock data in this struct, may be used by drivers @@ -5306,7 +5305,6 @@ struct wireless_dev { u32 identifier; struct list_head mgmt_registrations; - spinlock_t mgmt_registrations_lock; u8 mgmt_registrations_need_update:1; struct mutex mtx; diff --git a/include/net/checksum.h b/include/net/checksum.h index 0d05b9e8690b8bdf7c526f9625484d2bb54c8106..8b7d0c31598f5158fdae9a513c3cf8f01ffa9d99 100644 --- a/include/net/checksum.h +++ b/include/net/checksum.h @@ -22,7 +22,7 @@ #include #ifndef _HAVE_ARCH_COPY_AND_CSUM_FROM_USER -static inline +static __always_inline __wsum csum_and_copy_from_user (const void __user *src, void *dst, int len) { @@ -33,7 +33,7 @@ __wsum csum_and_copy_from_user (const void __user *src, void *dst, #endif #ifndef HAVE_CSUM_COPY_USER -static __inline__ __wsum csum_and_copy_to_user +static __always_inline __wsum csum_and_copy_to_user (const void *src, void __user *dst, int len) { __wsum sum = csum_partial(src, len, ~0U); @@ -45,7 +45,7 @@ static __inline__ __wsum csum_and_copy_to_user #endif #ifndef _HAVE_ARCH_CSUM_AND_COPY -static inline __wsum +static __always_inline __wsum csum_partial_copy_nocheck(const void *src, void *dst, int len) { memcpy(dst, src, len); @@ -54,7 +54,7 @@ csum_partial_copy_nocheck(const void *src, void *dst, int len) #endif #ifndef HAVE_ARCH_CSUM_ADD -static inline __wsum csum_add(__wsum csum, __wsum addend) +static __always_inline __wsum csum_add(__wsum csum, __wsum addend) { u32 res = (__force u32)csum; res += (__force u32)addend; @@ -62,12 +62,12 @@ static inline __wsum csum_add(__wsum csum, __wsum addend) } #endif -static inline __wsum csum_sub(__wsum csum, __wsum addend) +static __always_inline __wsum csum_sub(__wsum csum, __wsum addend) { return csum_add(csum, ~addend); } -static inline __sum16 csum16_add(__sum16 csum, __be16 addend) +static __always_inline __sum16 csum16_add(__sum16 csum, __be16 addend) { u16 res = (__force u16)csum; @@ -75,12 +75,12 @@ static inline __sum16 csum16_add(__sum16 csum, __be16 addend) return (__force __sum16)(res + (res < (__force u16)addend)); } -static inline __sum16 csum16_sub(__sum16 csum, __be16 addend) +static __always_inline __sum16 csum16_sub(__sum16 csum, __be16 addend) { return csum16_add(csum, ~addend); } -static inline __wsum +static __always_inline __wsum csum_block_add(__wsum csum, __wsum csum2, int offset) { u32 sum = (__force u32)csum2; @@ -92,36 +92,37 @@ csum_block_add(__wsum csum, __wsum csum2, int offset) return csum_add(csum, (__force __wsum)sum); } -static inline __wsum +static __always_inline __wsum csum_block_add_ext(__wsum csum, __wsum csum2, int offset, int len) { return csum_block_add(csum, csum2, offset); } -static inline __wsum +static __always_inline __wsum csum_block_sub(__wsum csum, __wsum csum2, int offset) { return csum_block_add(csum, ~csum2, offset); } -static inline __wsum csum_unfold(__sum16 n) +static __always_inline __wsum csum_unfold(__sum16 n) { return (__force __wsum)n; } -static inline __wsum csum_partial_ext(const void *buff, int len, __wsum sum) +static __always_inline +__wsum csum_partial_ext(const void *buff, int len, __wsum sum) { return csum_partial(buff, len, sum); } #define CSUM_MANGLED_0 ((__force __sum16)0xffff) -static inline void csum_replace_by_diff(__sum16 *sum, __wsum diff) +static __always_inline void csum_replace_by_diff(__sum16 *sum, __wsum diff) { *sum = csum_fold(csum_add(diff, ~csum_unfold(*sum))); } -static inline void csum_replace4(__sum16 *sum, __be32 from, __be32 to) +static __always_inline void csum_replace4(__sum16 *sum, __be32 from, __be32 to) { __wsum tmp = csum_sub(~csum_unfold(*sum), (__force __wsum)from); @@ -134,11 +135,16 @@ static inline void csum_replace4(__sum16 *sum, __be32 from, __be32 to) * m : old value of a 16bit field * m' : new value of a 16bit field */ -static inline void csum_replace2(__sum16 *sum, __be16 old, __be16 new) +static __always_inline void csum_replace2(__sum16 *sum, __be16 old, __be16 new) { *sum = ~csum16_add(csum16_sub(~(*sum), old), new); } +static inline void csum_replace(__wsum *csum, __wsum old, __wsum new) +{ + *csum = csum_add(csum_sub(*csum, old), new); +} + struct sk_buff; void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb, __be32 from, __be32 to, bool pseudohdr); @@ -148,16 +154,16 @@ void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb, void inet_proto_csum_replace_by_diff(__sum16 *sum, struct sk_buff *skb, __wsum diff, bool pseudohdr); -static inline void inet_proto_csum_replace2(__sum16 *sum, struct sk_buff *skb, - __be16 from, __be16 to, - bool pseudohdr) +static __always_inline +void inet_proto_csum_replace2(__sum16 *sum, struct sk_buff *skb, + __be16 from, __be16 to, bool pseudohdr) { inet_proto_csum_replace4(sum, skb, (__force __be32)from, (__force __be32)to, pseudohdr); } -static inline __wsum remcsum_adjust(void *ptr, __wsum csum, - int start, int offset) +static __always_inline __wsum remcsum_adjust(void *ptr, __wsum csum, + int start, int offset) { __sum16 *psum = (__sum16 *)(ptr + offset); __wsum delta; @@ -173,7 +179,7 @@ static inline __wsum remcsum_adjust(void *ptr, __wsum csum, return delta; } -static inline void remcsum_unadjust(__sum16 *psum, __wsum delta) +static __always_inline void remcsum_unadjust(__sum16 *psum, __wsum delta) { *psum = csum_fold(csum_sub(delta, (__force __wsum)*psum)); } diff --git a/include/net/dst_cache.h b/include/net/dst_cache.h index 67634675e9197cdbd8225e0e4aa1547d8f09f036..df6622a5fe98f0a9732617bb2a757ef9c9611797 100644 --- a/include/net/dst_cache.h +++ b/include/net/dst_cache.h @@ -79,6 +79,17 @@ static inline void dst_cache_reset(struct dst_cache *dst_cache) dst_cache->reset_ts = jiffies; } +/** + * dst_cache_reset_now - invalidate the cache contents immediately + * @dst_cache: the cache + * + * The caller must be sure there are no concurrent users, as this frees + * all dst_cache users immediately, rather than waiting for the next + * per-cpu usage like dst_cache_reset does. Most callers should use the + * higher speed lazily-freed dst_cache_reset function instead. + */ +void dst_cache_reset_now(struct dst_cache *dst_cache); + /** * dst_cache_init - initialize the cache, allocating the required storage * @dst_cache: the cache diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h index 14efa0ded75dd93d6f63735a95feaf72de6fdef7..adab27ba1ecbf054f5284e4342b1551b104d9a2c 100644 --- a/include/net/dst_metadata.h +++ b/include/net/dst_metadata.h @@ -123,8 +123,20 @@ static inline struct metadata_dst *tun_dst_unclone(struct sk_buff *skb) memcpy(&new_md->u.tun_info, &md_dst->u.tun_info, sizeof(struct ip_tunnel_info) + md_size); +#ifdef CONFIG_DST_CACHE + /* Unclone the dst cache if there is one */ + if (new_md->u.tun_info.dst_cache.cache) { + int ret; + + ret = dst_cache_init(&new_md->u.tun_info.dst_cache, GFP_ATOMIC); + if (ret) { + metadata_dst_free(new_md); + return ERR_PTR(ret); + } + } +#endif + skb_dst_drop(skb); - dst_hold(&new_md->dst); skb_dst_set(skb, &new_md->dst); return new_md; } diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 4b10676c69d1917e4c30e086bf8f00b1e0f37ed4..bd07484ab9dd5f9de0321f63393941b521a0b5fa 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -69,7 +69,7 @@ struct fib_rules_ops { int (*action)(struct fib_rule *, struct flowi *, int, struct fib_lookup_arg *); - bool (*suppress)(struct fib_rule *, + bool (*suppress)(struct fib_rule *, int, struct fib_lookup_arg *); int (*match)(struct fib_rule *, struct flowi *, int); @@ -218,7 +218,9 @@ INDIRECT_CALLABLE_DECLARE(int fib4_rule_action(struct fib_rule *rule, struct fib_lookup_arg *arg)); INDIRECT_CALLABLE_DECLARE(bool fib6_rule_suppress(struct fib_rule *rule, + int flags, struct fib_lookup_arg *arg)); INDIRECT_CALLABLE_DECLARE(bool fib4_rule_suppress(struct fib_rule *rule, + int flags, struct fib_lookup_arg *arg)); #endif diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 8177cb94958c4a0f90300436366e587709250aaa..5894b5b44365a61fba538e1ed5326c798b89575b 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -296,7 +296,7 @@ static inline void inet_csk_prepare_for_destroy_sock(struct sock *sk) { /* The below has to be done to allow calling inet_csk_destroy_sock */ sock_set_flag(sk, SOCK_DEAD); - percpu_counter_inc(sk->sk_prot->orphan_count); + this_cpu_inc(*sk->sk_prot->orphan_count); } void inet_csk_destroy_sock(struct sock *sk); diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index bac79e817776c6e65ed37f5b5c6af61a5df59ec0..4cbd413e71a3faac2e694b4713f62b42cf9a75ed 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -116,8 +116,15 @@ int fqdir_init(struct fqdir **fqdirp, struct inet_frags *f, struct net *net); static inline void fqdir_pre_exit(struct fqdir *fqdir) { - fqdir->high_thresh = 0; /* prevent creation of new frags */ - fqdir->dead = true; + /* Prevent creation of new frags. + * Pairs with READ_ONCE() in inet_frag_find(). + */ + WRITE_ONCE(fqdir->high_thresh, 0); + + /* Pairs with READ_ONCE() in inet_frag_kill(), ip_expire() + * and ip6frag_expire_frag_queue(). + */ + WRITE_ONCE(fqdir->dead, true); } void fqdir_exit(struct fqdir *fqdir); diff --git a/include/net/ip.h b/include/net/ip.h index a887442a1a6f9470b43ae9b3b68a0ae6954d25d4..fc163f3fc9b21a5ac787ee3f39200076678f217f 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -518,19 +518,18 @@ static inline void ip_select_ident_segs(struct net *net, struct sk_buff *skb, { struct iphdr *iph = ip_hdr(skb); + /* We had many attacks based on IPID, use the private + * generator as much as we can. + */ + if (sk && inet_sk(sk)->inet_daddr) { + iph->id = htons(inet_sk(sk)->inet_id); + inet_sk(sk)->inet_id += segs; + return; + } if ((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) { - /* This is only to work around buggy Windows95/2000 - * VJ compression implementations. If the ID field - * does not change, they drop every other packet in - * a TCP stream using header compression. - */ - if (sk && inet_sk(sk)->inet_daddr) { - iph->id = htons(inet_sk(sk)->inet_id); - inet_sk(sk)->inet_id += segs; - } else { - iph->id = 0; - } + iph->id = 0; } else { + /* Unfortunately we need the big hammer to get a suitable IPID */ __ip_select_ident(net, iph, segs); } } diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index bd4bfdd30e28e523370ab1345776175a93acecbd..ebdc542416cdb18fed94ae3c416c6df7b68ae508 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -290,7 +290,7 @@ static inline bool fib6_get_cookie_safe(const struct fib6_info *f6i, fn = rcu_dereference(f6i->fib6_node); if (fn) { - *cookie = fn->fn_sernum; + *cookie = READ_ONCE(fn->fn_sernum); /* pairs with smp_wmb() in fib6_update_sernum_upto_root() */ smp_rmb(); status = true; @@ -501,6 +501,7 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh, struct fib6_config *cfg, gfp_t gfp_flags, struct netlink_ext_ack *extack); void fib6_nh_release(struct fib6_nh *fib6_nh); +void fib6_nh_release_dsts(struct fib6_nh *fib6_nh); int call_fib6_entry_notifiers(struct net *net, enum fib_event_type event_type, diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 2ec062aaa9782b71cfd75279c8347d90cff14e81..088f257cd6fb3e5d754c5b5e58f28c9fd7e9c834 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -437,7 +437,7 @@ int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, #ifdef CONFIG_IP_ROUTE_CLASSID static inline int fib_num_tclassid_users(struct net *net) { - return net->ipv4.fib_num_tclassid_users; + return atomic_read(&net->ipv4.fib_num_tclassid_users); } #else static inline int fib_num_tclassid_users(struct net *net) @@ -553,5 +553,5 @@ int ip_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh, int fib_nexthop_info(struct sk_buff *skb, const struct fib_nh_common *nh, u8 rt_family, unsigned char *flags, bool skip_oif); int fib_add_nexthop(struct sk_buff *skb, const struct fib_nh_common *nh, - int nh_weight, u8 rt_family); + int nh_weight, u8 rt_family, u32 nh_tclassid); #endif /* _NET_FIB_H */ diff --git a/include/net/ipv6.h b/include/net/ipv6.h index bd1f396cc9c729d734fbf9132335266dfddec5f9..60601896d4747ff5800786b7421873135d47a857 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -390,17 +390,20 @@ static inline void txopt_put(struct ipv6_txoptions *opt) kfree_rcu(opt, rcu); } +#if IS_ENABLED(CONFIG_IPV6) struct ip6_flowlabel *__fl6_sock_lookup(struct sock *sk, __be32 label); extern struct static_key_false_deferred ipv6_flowlabel_exclusive; static inline struct ip6_flowlabel *fl6_sock_lookup(struct sock *sk, __be32 label) { - if (static_branch_unlikely(&ipv6_flowlabel_exclusive.key)) + if (static_branch_unlikely(&ipv6_flowlabel_exclusive.key) && + READ_ONCE(sock_net(sk)->ipv6.flowlabel_has_excl)) return __fl6_sock_lookup(sk, label) ? : ERR_PTR(-ENOENT); return NULL; } +#endif struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions *opt_space, struct ip6_flowlabel *fl, diff --git a/include/net/ipv6_frag.h b/include/net/ipv6_frag.h index 851029ecff13cb617d41b4043f039ae59e832b82..0a4779175a5238d8989a777ef8417223b6157679 100644 --- a/include/net/ipv6_frag.h +++ b/include/net/ipv6_frag.h @@ -67,7 +67,8 @@ ip6frag_expire_frag_queue(struct net *net, struct frag_queue *fq) struct sk_buff *head; rcu_read_lock(); - if (fq->q.fqdir->dead) + /* Paired with the WRITE_ONCE() in fqdir_pre_exit(). */ + if (READ_ONCE(fq->q.fqdir->dead)) goto out_rcu_unlock; spin_lock(&fq->q.lock); diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h index 8fce558b5fea3cdcab8b80bc73a0e24d7080efd8..14a43111ffc6a031e78563f5542a15f7139d34c6 100644 --- a/include/net/ipv6_stubs.h +++ b/include/net/ipv6_stubs.h @@ -47,6 +47,7 @@ struct ipv6_stub { struct fib6_config *cfg, gfp_t gfp_flags, struct netlink_ext_ack *extack); void (*fib6_nh_release)(struct fib6_nh *fib6_nh); + void (*fib6_nh_release_dsts)(struct fib6_nh *fib6_nh); void (*fib6_update_sernum)(struct net *net, struct fib6_info *rt); int (*ip6_del_rt)(struct net *net, struct fib6_info *rt, bool skip_notify); void (*fib6_rt_update)(struct net *net, struct fib6_info *rt, diff --git a/include/net/llc.h b/include/net/llc.h index df282d9b401704626b1824c70600c993a4ee1bb7..9c10b121b49b0ff8d88ca71ac7c76aae16930bd5 100644 --- a/include/net/llc.h +++ b/include/net/llc.h @@ -72,7 +72,9 @@ struct llc_sap { static inline struct hlist_head *llc_sk_dev_hash(struct llc_sap *sap, int ifindex) { - return &sap->sk_dev_hash[ifindex % LLC_SK_DEV_HASH_ENTRIES]; + u32 bucket = hash_32(ifindex, LLC_SK_DEV_HASH_BITS); + + return &sap->sk_dev_hash[bucket]; } static inline diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 8a26efdadbbc476b3c5801182f141fbf0d48dacd..7750ef0fe0dd540e40a20188ee78cb60ffd4b9ff 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -261,6 +261,7 @@ static inline void *neighbour_priv(const struct neighbour *n) #define NEIGH_UPDATE_F_OVERRIDE 0x00000001 #define NEIGH_UPDATE_F_WEAK_OVERRIDE 0x00000002 #define NEIGH_UPDATE_F_OVERRIDE_ISROUTER 0x00000004 +#define NEIGH_UPDATE_F_USE 0x10000000 #define NEIGH_UPDATE_F_EXT_LEARNED 0x20000000 #define NEIGH_UPDATE_F_ISROUTER 0x40000000 #define NEIGH_UPDATE_F_ADMIN 0x80000000 @@ -512,10 +513,15 @@ static inline int neigh_output(struct neighbour *n, struct sk_buff *skb, { const struct hh_cache *hh = &n->hh; - if ((n->nud_state & NUD_CONNECTED) && hh->hh_len && !skip_cache) + /* n->nud_state and hh->hh_len could be changed under us. + * neigh_hh_output() is taking care of the race later. + */ + if (!skip_cache && + (READ_ONCE(n->nud_state) & NUD_CONNECTED) && + READ_ONCE(hh->hh_len)) return neigh_hh_output(hh, skb); - else - return n->output(n, skb); + + return n->output(n, skb); } static inline struct neighbour * diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 878d0ce8132149500537f28e6ecd85c7f4ebba09..d13076d7367f2793288f8e68f7df436f476145f9 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -269,14 +269,14 @@ static inline bool nf_is_loopback_packet(const struct sk_buff *skb) /* jiffies until ct expires, 0 if already expired */ static inline unsigned long nf_ct_expires(const struct nf_conn *ct) { - s32 timeout = ct->timeout - nfct_time_stamp; + s32 timeout = READ_ONCE(ct->timeout) - nfct_time_stamp; return timeout > 0 ? timeout : 0; } static inline bool nf_ct_is_expired(const struct nf_conn *ct) { - return (__s32)(ct->timeout - nfct_time_stamp) <= 0; + return (__s32)(READ_ONCE(ct->timeout) - nfct_time_stamp) <= 0; } /* use after obtaining a reference count */ @@ -295,7 +295,7 @@ static inline bool nf_ct_should_gc(const struct nf_conn *ct) static inline void nf_ct_offload_timeout(struct nf_conn *ct) { if (nf_ct_expires(ct) < NF_CT_DAY / 2) - ct->timeout = nfct_time_stamp + NF_CT_DAY; + WRITE_ONCE(ct->timeout, nfct_time_stamp + NF_CT_DAY); } struct kernel_param; diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h index e770bba00066453885c147cfebaf2f3ed6f00124..b1d43894296a6ac35b9e69e90efa13f32ba9143e 100644 --- a/include/net/netfilter/nf_queue.h +++ b/include/net/netfilter/nf_queue.h @@ -37,7 +37,7 @@ void nf_register_queue_handler(struct net *net, const struct nf_queue_handler *q void nf_unregister_queue_handler(struct net *net); void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict); -void nf_queue_entry_get_refs(struct nf_queue_entry *entry); +bool nf_queue_entry_get_refs(struct nf_queue_entry *entry); void nf_queue_entry_free(struct nf_queue_entry *entry); static inline void init_hashrandom(u32 *jhash_initval) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index ed4a9d098164fa4da6af874bf472ce0e0c70063c..76bfb6cd5815de40304750af985a50d753656e48 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -825,7 +825,7 @@ struct nft_expr_ops { int (*offload)(struct nft_offload_ctx *ctx, struct nft_flow_rule *flow, const struct nft_expr *expr); - u32 offload_flags; + bool (*offload_action)(const struct nft_expr *expr); const struct nft_expr_type *type; void *data; }; diff --git a/include/net/netfilter/nf_tables_offload.h b/include/net/netfilter/nf_tables_offload.h index 434a6158852f366c5f5e9bb6899bfe9823e9a8ec..7a453a35a41dda9147b6f10fe9ff22bdc86f1588 100644 --- a/include/net/netfilter/nf_tables_offload.h +++ b/include/net/netfilter/nf_tables_offload.h @@ -67,8 +67,6 @@ struct nft_flow_rule { struct flow_rule *rule; }; -#define NFT_OFFLOAD_F_ACTION (1 << 0) - void nft_flow_rule_set_addr_type(struct nft_flow_rule *flow, enum flow_dissector_key_id addr_type); diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 13b3defd5cb0dda78a4fa224bf41bc49f90500fa..fb94551d03e6e677f8d0d39d27460973ecda50e9 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -62,7 +62,7 @@ struct netns_ipv4 { #endif bool fib_has_custom_local_routes; #ifdef CONFIG_IP_ROUTE_CLASSID - int fib_num_tclassid_users; + atomic_t fib_num_tclassid_users; #endif struct hlist_head *fib_table_hash; bool fib_offload_disabled; diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 213e96241e768ec754fb957f15907283662ec4f1..3d779d2ec7d793172bb43f256c2d0dde69f26d8e 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -81,9 +81,10 @@ struct netns_ipv6 { spinlock_t fib6_gc_lock; unsigned int ip6_rt_gc_expire; unsigned long ip6_rt_last_gc; + unsigned char flowlabel_has_excl; #ifdef CONFIG_IPV6_MULTIPLE_TABLES - unsigned int fib6_rules_require_fldissect; bool fib6_has_custom_rules; + unsigned int fib6_rules_require_fldissect; #ifdef CONFIG_IPV6_SUBTREES unsigned int fib6_routes_require_src; #endif diff --git a/include/net/nexthop.h b/include/net/nexthop.h index 4c8c9fe9a3f0e5c0babbb55585b5b78df1fb2da4..fd87d727aa217c9fecc3cade07abd0f5d6d0f761 100644 --- a/include/net/nexthop.h +++ b/include/net/nexthop.h @@ -211,7 +211,7 @@ int nexthop_mpath_fill_node(struct sk_buff *skb, struct nexthop *nh, struct fib_nh_common *nhc = &nhi->fib_nhc; int weight = nhg->nh_entries[i].weight; - if (fib_add_nexthop(skb, nhc, weight, rt_family) < 0) + if (fib_add_nexthop(skb, nhc, weight, rt_family, 0) < 0) return -EMSGSIZE; } diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h index 33979017b782492d2fc98a7d70a542dd02930127..004e49f748419472e6a2f8a824ff604759b9d04e 100644 --- a/include/net/nfc/nci_core.h +++ b/include/net/nfc/nci_core.h @@ -30,6 +30,7 @@ enum nci_flag { NCI_UP, NCI_DATA_EXCHANGE, NCI_DATA_EXCHANGE_TO, + NCI_UNREG, }; /* NCI device states */ diff --git a/include/net/nl802154.h b/include/net/nl802154.h index ddcee128f5d9ac59c35b8364dc4c5943a04c712f..145acb8f250957f5fd7ae682a475034eea6055fc 100644 --- a/include/net/nl802154.h +++ b/include/net/nl802154.h @@ -19,6 +19,8 @@ * */ +#include + #define NL802154_GENL_NAME "nl802154" enum nl802154_commands { @@ -150,10 +152,9 @@ enum nl802154_attrs { }; enum nl802154_iftype { - /* for backwards compatibility TODO */ - NL802154_IFTYPE_UNSPEC = -1, + NL802154_IFTYPE_UNSPEC = (~(__u32)0), - NL802154_IFTYPE_NODE, + NL802154_IFTYPE_NODE = 0, NL802154_IFTYPE_MONITOR, NL802154_IFTYPE_COORD, diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 2be90a54a4044d302a278d5961b234348a05e683..7e58b4470570526ea95ce17f256a436114bef7b1 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -11,6 +11,7 @@ #include #define DEFAULT_TX_QUEUE_LEN 1000 +#define STAB_SIZE_LOG_MAX 30 struct qdisc_walker { int stop; diff --git a/include/net/route.h b/include/net/route.h index ff021cab657e503e7db2044bc7c3309053d6c173..a07c277cd33e82b96d9075a8447438316c5ef181 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -369,7 +369,7 @@ static inline struct neighbour *ip_neigh_gw4(struct net_device *dev, { struct neighbour *neigh; - neigh = __ipv4_neigh_lookup_noref(dev, daddr); + neigh = __ipv4_neigh_lookup_noref(dev, (__force u32)daddr); if (unlikely(!neigh)) neigh = __neigh_create(&arp_tbl, &daddr, dev, false); diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 2faae277f042ca15f55c809d0539977f4ddf6266..9d8b830c70a39bc9d647ec9ae82178c5e072fb95 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -307,6 +307,8 @@ struct Qdisc_ops { struct netlink_ext_ack *extack); void (*attach)(struct Qdisc *sch); int (*change_tx_queue_len)(struct Qdisc *, unsigned int); + void (*change_real_num_tx)(struct Qdisc *sch, + unsigned int new_real_tx); int (*dump)(struct Qdisc *, struct sk_buff *); int (*dump_stats)(struct Qdisc *, struct gnet_dump *); @@ -690,6 +692,8 @@ void qdisc_class_hash_grow(struct Qdisc *, struct Qdisc_class_hash *); void qdisc_class_hash_destroy(struct Qdisc_class_hash *); int dev_qdisc_change_tx_queue_len(struct net_device *dev); +void dev_qdisc_change_real_num_tx(struct net_device *dev, + unsigned int new_real_tx); void dev_init_scheduler(struct net_device *dev); void dev_shutdown(struct net_device *dev); void dev_activate(struct net_device *dev); @@ -1264,6 +1268,7 @@ struct psched_ratecfg { u64 rate_bytes_ps; /* bytes per second */ u32 mult; u16 overhead; + u16 mpu; u8 linklayer; u8 shift; }; @@ -1273,6 +1278,9 @@ static inline u64 psched_l2t_ns(const struct psched_ratecfg *r, { len += r->overhead; + if (len < r->mpu) + len = r->mpu; + if (unlikely(r->linklayer == TC_LINKLAYER_ATM)) return ((u64)(DIV_ROUND_UP(len,48)*53) * r->mult) >> r->shift; @@ -1295,6 +1303,7 @@ static inline void psched_ratecfg_getrate(struct tc_ratespec *res, res->rate = min_t(u64, r->rate_bytes_ps, ~0U); res->overhead = r->overhead; + res->mpu = r->mpu; res->linklayer = (r->linklayer & TC_LINKLAYER_MASK); } diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 4fc747b778eb636b1ee477548d316da65143a525..33475d061823e8c572036dfc26ddb5810afeffca 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -103,6 +103,7 @@ extern struct percpu_counter sctp_sockets_allocated; int sctp_asconf_mgmt(struct sctp_sock *, struct sctp_sockaddr_entry *); struct sk_buff *sctp_skb_recv_datagram(struct sock *, int, int, int *); +typedef int (*sctp_callback_t)(struct sctp_endpoint *, struct sctp_transport *, void *); void sctp_transport_walk_start(struct rhashtable_iter *iter); void sctp_transport_walk_stop(struct rhashtable_iter *iter); struct sctp_transport *sctp_transport_get_next(struct net *net, @@ -113,9 +114,8 @@ int sctp_transport_lookup_process(int (*cb)(struct sctp_transport *, void *), struct net *net, const union sctp_addr *laddr, const union sctp_addr *paddr, void *p); -int sctp_for_each_transport(int (*cb)(struct sctp_transport *, void *), - int (*cb_done)(struct sctp_transport *, void *), - struct net *net, int *pos, void *p); +int sctp_transport_traverse_process(sctp_callback_t cb, sctp_callback_t cb_done, + struct net *net, int *pos, void *p); int sctp_for_each_endpoint(int (*cb)(struct sctp_endpoint *, void *), void *p); int sctp_get_sctp_info(struct sock *sk, struct sctp_association *asoc, struct sctp_info *info); diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 51d698f2656fc17ac6d376fc8ca1eee99b3d0b46..be9ff0422c16265a6c30eeefecae8de887eaad22 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1339,6 +1339,7 @@ struct sctp_endpoint { u32 secid; u32 peer_secid; + struct rcu_head rcu; }; /* Recover the outter endpoint structure. */ @@ -1354,7 +1355,7 @@ static inline struct sctp_endpoint *sctp_ep(struct sctp_ep_common *base) struct sctp_endpoint *sctp_endpoint_new(struct sock *, gfp_t); void sctp_endpoint_free(struct sctp_endpoint *); void sctp_endpoint_put(struct sctp_endpoint *); -void sctp_endpoint_hold(struct sctp_endpoint *); +int sctp_endpoint_hold(struct sctp_endpoint *ep); void sctp_endpoint_add_asoc(struct sctp_endpoint *, struct sctp_association *); struct sctp_association *sctp_endpoint_lookup_assoc( const struct sctp_endpoint *ep, diff --git a/include/net/sock.h b/include/net/sock.h index 076eaef817ff4b95aac35d8ffd99f1f9d29d7376..fd78d61b8fec2649e3cee9057db88975e62a3368 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -481,8 +481,10 @@ struct sock { u32 sk_ack_backlog; u32 sk_max_ack_backlog; kuid_t sk_uid; + spinlock_t sk_peer_lock; struct pid *sk_peer_pid; const struct cred *sk_peer_cred; + long sk_rcvtimeo; ktime_t sk_stamp; #if BITS_PER_LONG==32 @@ -1225,7 +1227,7 @@ struct proto { unsigned int useroffset; /* Usercopy region offset */ unsigned int usersize; /* Usercopy region size */ - struct percpu_counter *orphan_count; + unsigned int __percpu *orphan_count; struct request_sock_ops *rsk_prot; struct timewait_sock_ops *twsk_prot; @@ -2333,19 +2335,22 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp, * @sk: socket * * Use the per task page_frag instead of the per socket one for - * optimization when we know that we're in the normal context and owns + * optimization when we know that we're in process context and own * everything that's associated with %current. * - * gfpflags_allow_blocking() isn't enough here as direct reclaim may nest - * inside other socket operations and end up recursing into sk_page_frag() - * while it's already in use. + * Both direct reclaim and page faults can nest inside other + * socket operations and end up recursing into sk_page_frag() + * while it's already in use: explicitly avoid task page_frag + * usage if the caller is potentially doing any of them. + * This assumes that page fault handlers use the GFP_NOFS flags. * * Return: a per task page_frag if context allows that, * otherwise a per socket one. */ static inline struct page_frag *sk_page_frag(struct sock *sk) { - if (gfpflags_normal_context(sk->sk_allocation)) + if ((sk->sk_allocation & (__GFP_DIRECT_RECLAIM | __GFP_MEMALLOC | __GFP_FS)) == + (__GFP_DIRECT_RECLAIM | __GFP_FS)) return ¤t->task_frag; return &sk->sk_frag; diff --git a/include/net/strparser.h b/include/net/strparser.h index 1d20b98493a103030d203add46a540c6d1f8010f..bec1439bd3be6fe4e10705e416376da8ec7a27a3 100644 --- a/include/net/strparser.h +++ b/include/net/strparser.h @@ -54,10 +54,24 @@ struct strp_msg { int offset; }; +struct _strp_msg { + /* Internal cb structure. struct strp_msg must be first for passing + * to upper layer. + */ + struct strp_msg strp; + int accum_len; +}; + +struct sk_skb_cb { +#define SK_SKB_CB_PRIV_LEN 20 + unsigned char data[SK_SKB_CB_PRIV_LEN]; + struct _strp_msg strp; +}; + static inline struct strp_msg *strp_msg(struct sk_buff *skb) { return (struct strp_msg *)((void *)skb->cb + - offsetof(struct qdisc_skb_cb, data)); + offsetof(struct sk_skb_cb, strp)); } /* Structure for an attached lower socket */ diff --git a/include/net/tcp.h b/include/net/tcp.h index eff611da5780b3a172347b51095d32b02e64e6b2..334b8d1b54429225da51747143c7247328234708 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -48,7 +48,9 @@ extern struct inet_hashinfo tcp_hashinfo; -extern struct percpu_counter tcp_orphan_count; +DECLARE_PER_CPU(unsigned int, tcp_orphan_count); +int tcp_orphan_count_sum(void); + void tcp_time_wait(struct sock *sk, int state, int timeo); #define MAX_TCP_HEADER L1_CACHE_ALIGN(128 + MAX_HEADER) @@ -290,19 +292,6 @@ static inline bool tcp_out_of_memory(struct sock *sk) void sk_forced_mem_schedule(struct sock *sk, int size); -static inline bool tcp_too_many_orphans(struct sock *sk, int shift) -{ - struct percpu_counter *ocp = sk->sk_prot->orphan_count; - int orphans = percpu_counter_read_positive(ocp); - - if (orphans << shift > sysctl_tcp_max_orphans) { - orphans = percpu_counter_sum_positive(ocp); - if (orphans << shift > sysctl_tcp_max_orphans) - return true; - } - return false; -} - bool tcp_check_oom(struct sock *sk, int shift); diff --git a/include/net/tls.h b/include/net/tls.h index b0d953be977eea1da6c164d124e2f485a730d5ee..c875c23ad1735e04a877bcd75038c62dd759881a 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -373,6 +373,7 @@ int tls_sk_query(struct sock *sk, int optname, char __user *optval, int __user *optlen); int tls_sk_attach(struct sock *sk, int optname, char __user *optval, unsigned int optlen); +void tls_err_abort(struct sock *sk, int err); int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx); void tls_sw_strparser_arm(struct sock *sk, struct tls_context *ctx); @@ -481,12 +482,6 @@ static inline bool tls_is_sk_tx_device_offloaded(struct sock *sk) #endif } -static inline void tls_err_abort(struct sock *sk, int err) -{ - sk->sk_err = err; - sk->sk_error_report(sk); -} - static inline bool tls_bigint_increment(unsigned char *seq, int len) { int i; @@ -527,7 +522,7 @@ static inline void tls_advance_record_sn(struct sock *sk, struct cipher_context *ctx) { if (tls_bigint_increment(ctx->rec_seq, prot->rec_seq_size)) - tls_err_abort(sk, EBADMSG); + tls_err_abort(sk, -EBADMSG); if (prot->version != TLS_1_3_VERSION) tls_bigint_increment(ctx->iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, diff --git a/include/net/udp.h b/include/net/udp.h index 949ae14a54250ea692d970a88d14b0a73504453f..435cc009e6eaaa79d376d3d71156ed5d3186534b 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -488,8 +488,9 @@ static inline struct sk_buff *udp_rcv_segment(struct sock *sk, * CHECKSUM_NONE in __udp_gso_segment. UDP GRO indeed builds partial * packets in udp_gro_complete_segment. As does UDP GSO, verified by * udp_send_skb. But when those packets are looped in dev_loopback_xmit - * their ip_summed is set to CHECKSUM_UNNECESSARY. Reset in this - * specific case, where PARTIAL is both correct and required. + * their ip_summed CHECKSUM_NONE is changed to CHECKSUM_UNNECESSARY. + * Reset in this specific case, where PARTIAL is both correct and + * required. */ if (skb->pkt_type == PACKET_LOOPBACK) skb->ip_summed = CHECKSUM_PARTIAL; diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 737ac722410fe57f378556ede7689c2e8cc66a29..b07e6748f539683c929bf4c6b136b04255676845 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -193,6 +193,11 @@ struct xfrm_state { struct xfrm_algo_aead *aead; const char *geniv; + /* mapping change rate limiting */ + __be16 new_mapping_sport; + u32 new_mapping; /* seconds */ + u32 mapping_maxage; /* seconds for input SA */ + /* Data for encapsulator */ struct xfrm_encap_tmpl *encap; struct sock __rcu *encap_sk; @@ -1546,7 +1551,6 @@ void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si); void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si); u32 xfrm_replay_seqhi(struct xfrm_state *x, __be32 net_seq); int xfrm_init_replay(struct xfrm_state *x); -u32 __xfrm_state_mtu(struct xfrm_state *x, int mtu); u32 xfrm_state_mtu(struct xfrm_state *x, int mtu); int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload); int xfrm_init_state(struct xfrm_state *x); @@ -1662,14 +1666,15 @@ int km_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, const struct xfrm_migrate *m, int num_bundles, const struct xfrm_kmaddress *k, const struct xfrm_encap_tmpl *encap); -struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *net); +struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *net, + u32 if_id); struct xfrm_state *xfrm_state_migrate(struct xfrm_state *x, struct xfrm_migrate *m, struct xfrm_encap_tmpl *encap); int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, struct xfrm_migrate *m, int num_bundles, struct xfrm_kmaddress *k, struct net *net, - struct xfrm_encap_tmpl *encap); + struct xfrm_encap_tmpl *encap, u32 if_id); #endif int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport); diff --git a/include/rdma/rdma_netlink.h b/include/rdma/rdma_netlink.h index 2758d9df71ee9187ea6019855b2cfd9c929c91f1..c2a79aeee113c1de91f82d2590e37e14a6031ee8 100644 --- a/include/rdma/rdma_netlink.h +++ b/include/rdma/rdma_netlink.h @@ -30,7 +30,7 @@ enum rdma_nl_flags { * constant as well and the compiler checks they are the same. */ #define MODULE_ALIAS_RDMA_NETLINK(_index, _val) \ - static inline void __chk_##_index(void) \ + static inline void __maybe_unused __chk_##_index(void) \ { \ BUILD_BUG_ON(_index != _val); \ } \ diff --git a/include/scsi/scsi.h b/include/scsi/scsi.h index 5339baadc082e05e9574253b323a1b4a36d36b9d..ef3345119b29a6f19e989ada33e5afc36f2634ec 100644 --- a/include/scsi/scsi.h +++ b/include/scsi/scsi.h @@ -88,6 +88,21 @@ static inline int scsi_is_wlun(u64 lun) return (lun & 0xff00) == SCSI_W_LUN_BASE; } +/** + * scsi_status_is_check_condition - check the status return. + * + * @status: the status passed up from the driver (including host and + * driver components) + * + * This returns true if the status code is SAM_STAT_CHECK_CONDITION. + */ +static inline int scsi_status_is_check_condition(int status) +{ + if (status < 0) + return false; + status &= 0xfe; + return status == SAM_STAT_CHECK_CONDITION; +} /* * MESSAGE CODES diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h index b6398c9d2c5c64ec2537c802812a1035e9ccb8fe..4c08a7bd969f2f73e436b7cdd654386469fffa92 100644 --- a/include/scsi/scsi_cmnd.h +++ b/include/scsi/scsi_cmnd.h @@ -150,6 +150,12 @@ struct scsi_cmnd { ANDROID_KABI_RESERVE(4); }; +/* Variant of blk_mq_rq_from_pdu() that verifies the type of its argument. */ +static inline struct request *scsi_cmd_to_rq(struct scsi_cmnd *scmd) +{ + return blk_mq_rq_from_pdu(scmd); +} + /* * Return the driver private allocation behind the command. * Only works if cmd_size is set in the host template. @@ -162,7 +168,15 @@ static inline void *scsi_cmd_priv(struct scsi_cmnd *cmd) /* make sure not to use it with passthrough commands */ static inline struct scsi_driver *scsi_cmd_to_driver(struct scsi_cmnd *cmd) { - return *(struct scsi_driver **)cmd->request->rq_disk->private_data; + struct request *rq = scsi_cmd_to_rq(cmd); + + return *(struct scsi_driver **)rq->rq_disk->private_data; +} + +/* A helper function to make it easier to backport upstream SCSI patches. */ +static inline void scsi_done(struct scsi_cmnd *cmd) +{ + cmd->scsi_done(cmd); } extern void scsi_finish_command(struct scsi_cmnd *cmd); diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index 158423bcdde93307a88918fdfce06576280100fb..d54a6cb496dac301dac853df556fe5f76b239d55 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -205,6 +205,8 @@ struct scsi_device { unsigned unmap_limit_for_ws:1; /* Use the UNMAP limit for WRITE SAME */ unsigned rpm_autosuspend:1; /* Enable runtime autosuspend at device * creation time */ + unsigned ignore_media_change:1; /* Ignore MEDIA CHANGE on resume */ + unsigned silence_suspend:1; /* Do not print runtime PM related messages */ bool offline_already; /* Device offline message logged */ @@ -271,13 +273,15 @@ sdev_prefix_printk(const char *, const struct scsi_device *, const char *, __printf(3, 4) void scmd_printk(const char *, const struct scsi_cmnd *, const char *, ...); -#define scmd_dbg(scmd, fmt, a...) \ - do { \ - if ((scmd)->request->rq_disk) \ - sdev_dbg((scmd)->device, "[%s] " fmt, \ - (scmd)->request->rq_disk->disk_name, ##a);\ - else \ - sdev_dbg((scmd)->device, fmt, ##a); \ +#define scmd_dbg(scmd, fmt, a...) \ + do { \ + struct request *__rq = scsi_cmd_to_rq((scmd)); \ + \ + if (__rq->rq_disk) \ + sdev_dbg((scmd)->device, "[%s] " fmt, \ + __rq->rq_disk->disk_name, ##a); \ + else \ + sdev_dbg((scmd)->device, fmt, ##a); \ } while (0) enum scsi_target_state { diff --git a/include/scsi/scsi_devinfo.h b/include/scsi/scsi_devinfo.h index 3fdb322d4c4bbeaba31fc0ac0a5a912d924637f6..5d14adae21c78903188052187076eeec9644188a 100644 --- a/include/scsi/scsi_devinfo.h +++ b/include/scsi/scsi_devinfo.h @@ -28,7 +28,8 @@ #define BLIST_LARGELUN ((__force blist_flags_t)(1ULL << 9)) /* override additional length field */ #define BLIST_INQUIRY_36 ((__force blist_flags_t)(1ULL << 10)) -#define __BLIST_UNUSED_11 ((__force blist_flags_t)(1ULL << 11)) +/* ignore MEDIA CHANGE unit attention after resuming from runtime suspend */ +#define BLIST_IGN_MEDIA_CHANGE ((__force blist_flags_t)(1ULL << 11)) /* do not do automatic start on add */ #define BLIST_NOSTARTONADD ((__force blist_flags_t)(1ULL << 12)) #define __BLIST_UNUSED_13 ((__force blist_flags_t)(1ULL << 13)) @@ -73,8 +74,7 @@ #define __BLIST_HIGH_UNUSED (~(__BLIST_LAST_USED | \ (__force blist_flags_t) \ ((__force __u64)__BLIST_LAST_USED - 1ULL))) -#define __BLIST_UNUSED_MASK (__BLIST_UNUSED_11 | \ - __BLIST_UNUSED_13 | \ +#define __BLIST_UNUSED_MASK (__BLIST_UNUSED_13 | \ __BLIST_UNUSED_14 | \ __BLIST_UNUSED_15 | \ __BLIST_UNUSED_16 | \ diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h index 8c0d37f389df719292d488d1b04e64f20bf44c4b..585f26495ab489024b3ea154cf0d63608386d268 100644 --- a/include/scsi/scsi_host.h +++ b/include/scsi/scsi_host.h @@ -271,6 +271,16 @@ struct scsi_host_template { */ int (* map_queues)(struct Scsi_Host *shost); + /* + * SCSI interface of blk_poll - poll for IO completions. + * Only applicable if SCSI LLD exposes multiple h/w queues. + * + * Return value: Number of completed entries found. + * + * Status: OPTIONAL + */ + int (* mq_poll)(struct Scsi_Host *shost, unsigned int queue_num); + /* * Check if scatterlists need to be padded for DMA draining. * @@ -343,10 +353,17 @@ struct scsi_host_template { /* * This determines if we will use a non-interrupt driven * or an interrupt driven scheme. It is set to the maximum number - * of simultaneous commands a single hw queue in HBA will accept. + * of simultaneous commands a single hw queue in HBA will accept. Does + * not include @reserved_tags. */ int can_queue; + /* + * Number of tags to reserve. A reserved tag can be allocated by passing + * the BLK_MQ_REQ_RESERVED flag to blk_mq_alloc_request(). + */ + unsigned reserved_tags; + /* * In many instances, especially where disconnect / reconnect are * supported, our host also has an ID on the SCSI bus. If this is @@ -616,6 +633,7 @@ struct Scsi_Host { * the total queue depth is can_queue. */ unsigned nr_hw_queues; + unsigned nr_maps; unsigned active_mode:2; unsigned unchecked_isa_dma:1; diff --git a/include/sound/hda_codec.h b/include/sound/hda_codec.h index 73827b7d17e008d70cfaf0edc0888756bf0d3edd..cf530e9fb5f5ced8375cead8f6e9ea5a1834361b 100644 --- a/include/sound/hda_codec.h +++ b/include/sound/hda_codec.h @@ -225,6 +225,7 @@ struct hda_codec { #endif /* misc flags */ + unsigned int configured:1; /* codec was configured */ unsigned int in_freeing:1; /* being released */ unsigned int registered:1; /* codec was registered */ unsigned int display_power_control:1; /* needs display power */ diff --git a/include/sound/hdaudio_ext.h b/include/sound/hdaudio_ext.h index 7abf74c1c47406c1dcc46c2f3e38e497b782a8c2..75048ea178f627af3684ecdde94b8b895dfb8385 100644 --- a/include/sound/hdaudio_ext.h +++ b/include/sound/hdaudio_ext.h @@ -88,6 +88,8 @@ struct hdac_ext_stream *snd_hdac_ext_stream_assign(struct hdac_bus *bus, struct snd_pcm_substream *substream, int type); void snd_hdac_ext_stream_release(struct hdac_ext_stream *azx_dev, int type); +void snd_hdac_ext_stream_decouple_locked(struct hdac_bus *bus, + struct hdac_ext_stream *azx_dev, bool decouple); void snd_hdac_ext_stream_decouple(struct hdac_bus *bus, struct hdac_ext_stream *azx_dev, bool decouple); void snd_hdac_ext_stop_streams(struct hdac_bus *bus); diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index 549947d407cfd2f55455748ebde55d3d4860b531..18a5dcd275f88daae48e2b021f7728133b1951bc 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -788,8 +788,9 @@ struct se_device { atomic_long_t read_bytes; atomic_long_t write_bytes; /* Active commands on this virtual SE device */ - atomic_t simple_cmds; - atomic_t dev_ordered_sync; + atomic_t non_ordered; + bool ordered_sync_in_progress; + atomic_t delayed_cmd_count; atomic_t dev_qf_count; u32 export_count; spinlock_t delayed_cmd_lock; @@ -811,6 +812,7 @@ struct se_device { struct list_head dev_sep_list; struct list_head dev_tmr_list; struct work_struct qf_work_queue; + struct work_struct delayed_cmd_work; struct list_head delayed_cmd_list; struct list_head state_list; struct list_head qf_cmd_list; diff --git a/include/trace/events/cgroup.h b/include/trace/events/cgroup.h index 7f42a3de59e6b46ff313738371439bf532a88a79..dd7d7c9efecdf33d7c28c89d769e7d00e70cae2d 100644 --- a/include/trace/events/cgroup.h +++ b/include/trace/events/cgroup.h @@ -59,8 +59,8 @@ DECLARE_EVENT_CLASS(cgroup, TP_STRUCT__entry( __field( int, root ) - __field( int, id ) __field( int, level ) + __field( u64, id ) __string( path, path ) ), @@ -71,7 +71,7 @@ DECLARE_EVENT_CLASS(cgroup, __assign_str(path, path); ), - TP_printk("root=%d id=%d level=%d path=%s", + TP_printk("root=%d id=%llu level=%d path=%s", __entry->root, __entry->id, __entry->level, __get_str(path)) ); @@ -126,8 +126,8 @@ DECLARE_EVENT_CLASS(cgroup_migrate, TP_STRUCT__entry( __field( int, dst_root ) - __field( int, dst_id ) __field( int, dst_level ) + __field( u64, dst_id ) __field( int, pid ) __string( dst_path, path ) __string( comm, task->comm ) @@ -142,7 +142,7 @@ DECLARE_EVENT_CLASS(cgroup_migrate, __assign_str(comm, task->comm); ), - TP_printk("dst_root=%d dst_id=%d dst_level=%d dst_path=%s pid=%d comm=%s", + TP_printk("dst_root=%d dst_id=%llu dst_level=%d dst_path=%s pid=%d comm=%s", __entry->dst_root, __entry->dst_id, __entry->dst_level, __get_str(dst_path), __entry->pid, __get_str(comm)) ); @@ -171,8 +171,8 @@ DECLARE_EVENT_CLASS(cgroup_event, TP_STRUCT__entry( __field( int, root ) - __field( int, id ) __field( int, level ) + __field( u64, id ) __string( path, path ) __field( int, val ) ), @@ -185,7 +185,7 @@ DECLARE_EVENT_CLASS(cgroup_event, __entry->val = val; ), - TP_printk("root=%d id=%d level=%d path=%s val=%d", + TP_printk("root=%d id=%llu level=%d path=%s val=%d", __entry->root, __entry->id, __entry->level, __get_str(path), __entry->val) ); diff --git a/include/trace/events/damon.h b/include/trace/events/damon.h new file mode 100644 index 0000000000000000000000000000000000000000..c79f1d4c39afe754a18d3c646a0bd262e531eb83 --- /dev/null +++ b/include/trace/events/damon.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM damon + +#if !defined(_TRACE_DAMON_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_DAMON_H + +#include +#include +#include + +TRACE_EVENT(damon_aggregated, + + TP_PROTO(struct damon_target *t, unsigned int target_id, + struct damon_region *r, unsigned int nr_regions), + + TP_ARGS(t, target_id, r, nr_regions), + + TP_STRUCT__entry( + __field(unsigned long, target_id) + __field(unsigned int, nr_regions) + __field(unsigned long, start) + __field(unsigned long, end) + __field(unsigned int, nr_accesses) + __field(unsigned int, age) + ), + + TP_fast_assign( + __entry->target_id = target_id; + __entry->nr_regions = nr_regions; + __entry->start = r->ar.start; + __entry->end = r->ar.end; + __entry->nr_accesses = r->nr_accesses; + __entry->age = r->age; + ), + + TP_printk("target_id=%lu nr_regions=%u %lu-%lu: %u %u", + __entry->target_id, __entry->nr_regions, + __entry->start, __entry->end, + __entry->nr_accesses, __entry->age) +); + +#endif /* _TRACE_DAMON_H */ + +/* This part must be outside protection */ +#include diff --git a/include/trace/events/erofs.h b/include/trace/events/erofs.h index db4f2cec8360629e5d7d19b6208b13a86e4c271c..16ae7b6668105b71b668730d5eab05099b3699de 100644 --- a/include/trace/events/erofs.h +++ b/include/trace/events/erofs.h @@ -24,7 +24,7 @@ struct erofs_map_blocks; #define show_mflags(flags) __print_flags(flags, "", \ { EROFS_MAP_MAPPED, "M" }, \ { EROFS_MAP_META, "I" }, \ - { EROFS_MAP_ZIPPED, "Z" }) + { EROFS_MAP_ENCODED, "E" }) TRACE_EVENT(erofs_lookup, diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 4e881d91c8744d4562c9078f25c20ed0de4c1d0b..f701bb23f83c439e5d8c6171b55422769d2e172c 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -540,17 +540,17 @@ TRACE_EVENT(f2fs_truncate_partial_nodes, TRACE_EVENT(f2fs_file_write_iter, - TP_PROTO(struct inode *inode, unsigned long offset, - unsigned long length, int ret), + TP_PROTO(struct inode *inode, loff_t offset, size_t length, + ssize_t ret), TP_ARGS(inode, offset, length, ret), TP_STRUCT__entry( __field(dev_t, dev) __field(ino_t, ino) - __field(unsigned long, offset) - __field(unsigned long, length) - __field(int, ret) + __field(loff_t, offset) + __field(size_t, length) + __field(ssize_t, ret) ), TP_fast_assign( @@ -562,7 +562,7 @@ TRACE_EVENT(f2fs_file_write_iter, ), TP_printk("dev = (%d,%d), ino = %lu, " - "offset = %lu, length = %lu, written(err) = %d", + "offset = %lld, length = %zu, written(err) = %zd", show_dev_ino(__entry), __entry->offset, __entry->length, @@ -570,9 +570,10 @@ TRACE_EVENT(f2fs_file_write_iter, ); TRACE_EVENT(f2fs_map_blocks, - TP_PROTO(struct inode *inode, struct f2fs_map_blocks *map, int ret), + TP_PROTO(struct inode *inode, struct f2fs_map_blocks *map, + int create, int flag, int ret), - TP_ARGS(inode, map, ret), + TP_ARGS(inode, map, create, flag, ret), TP_STRUCT__entry( __field(dev_t, dev) @@ -583,11 +584,14 @@ TRACE_EVENT(f2fs_map_blocks, __field(unsigned int, m_flags) __field(int, m_seg_type) __field(bool, m_may_create) + __field(bool, m_multidev_dio) + __field(int, create) + __field(int, flag) __field(int, ret) ), TP_fast_assign( - __entry->dev = inode->i_sb->s_dev; + __entry->dev = map->m_bdev->bd_dev; __entry->ino = inode->i_ino; __entry->m_lblk = map->m_lblk; __entry->m_pblk = map->m_pblk; @@ -595,12 +599,16 @@ TRACE_EVENT(f2fs_map_blocks, __entry->m_flags = map->m_flags; __entry->m_seg_type = map->m_seg_type; __entry->m_may_create = map->m_may_create; + __entry->m_multidev_dio = map->m_multidev_dio; + __entry->create = create; + __entry->flag = flag; __entry->ret = ret; ), TP_printk("dev = (%d,%d), ino = %lu, file offset = %llu, " - "start blkaddr = 0x%llx, len = 0x%llx, flags = %u," - "seg_type = %d, may_create = %d, err = %d", + "start blkaddr = 0x%llx, len = 0x%llx, flags = %u, " + "seg_type = %d, may_create = %d, multidevice = %d, " + "create = %d, flag = %d, err = %d", show_dev_ino(__entry), (unsigned long long)__entry->m_lblk, (unsigned long long)__entry->m_pblk, @@ -608,6 +616,9 @@ TRACE_EVENT(f2fs_map_blocks, __entry->m_flags, __entry->m_seg_type, __entry->m_may_create, + __entry->m_multidev_dio, + __entry->create, + __entry->flag, __entry->ret) ); @@ -807,20 +818,20 @@ TRACE_EVENT(f2fs_lookup_start, TP_STRUCT__entry( __field(dev_t, dev) __field(ino_t, ino) - __field(const char *, name) + __string(name, dentry->d_name.name) __field(unsigned int, flags) ), TP_fast_assign( __entry->dev = dir->i_sb->s_dev; __entry->ino = dir->i_ino; - __entry->name = dentry->d_name.name; + __assign_str(name, dentry->d_name.name); __entry->flags = flags; ), TP_printk("dev = (%d,%d), pino = %lu, name:%s, flags:%u", show_dev_ino(__entry), - __entry->name, + __get_str(name), __entry->flags) ); @@ -834,7 +845,7 @@ TRACE_EVENT(f2fs_lookup_end, TP_STRUCT__entry( __field(dev_t, dev) __field(ino_t, ino) - __field(const char *, name) + __string(name, dentry->d_name.name) __field(nid_t, cino) __field(int, err) ), @@ -842,14 +853,14 @@ TRACE_EVENT(f2fs_lookup_end, TP_fast_assign( __entry->dev = dir->i_sb->s_dev; __entry->ino = dir->i_ino; - __entry->name = dentry->d_name.name; + __assign_str(name, dentry->d_name.name); __entry->cino = ino; __entry->err = err; ), TP_printk("dev = (%d,%d), pino = %lu, name:%s, ino:%u, err:%d", show_dev_ino(__entry), - __entry->name, + __get_str(name), __entry->cino, __entry->err) ); @@ -925,14 +936,14 @@ TRACE_EVENT(f2fs_fallocate, TRACE_EVENT(f2fs_direct_IO_enter, - TP_PROTO(struct inode *inode, loff_t offset, unsigned long len, int rw), + TP_PROTO(struct inode *inode, struct kiocb *iocb, long len, int rw), - TP_ARGS(inode, offset, len, rw), + TP_ARGS(inode, iocb, len, rw), TP_STRUCT__entry( __field(dev_t, dev) __field(ino_t, ino) - __field(loff_t, pos) + __field(struct kiocb *, iocb) __field(unsigned long, len) __field(int, rw) ), @@ -940,15 +951,18 @@ TRACE_EVENT(f2fs_direct_IO_enter, TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; - __entry->pos = offset; + __entry->iocb = iocb; __entry->len = len; __entry->rw = rw; ), - TP_printk("dev = (%d,%d), ino = %lu pos = %lld len = %lu rw = %d", + TP_printk("dev = (%d,%d), ino = %lu pos = %lld len = %lu ki_flags = %x ki_hint = %x ki_ioprio = %x rw = %d", show_dev_ino(__entry), - __entry->pos, + __entry->iocb->ki_pos, __entry->len, + __entry->iocb->ki_flags, + __entry->iocb->ki_hint, + __entry->iocb->ki_ioprio, __entry->rw) ); diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h index 390270e00a1d14e0083f06e04cf61a3fa49968dc..12d76932e390ed8fd00b260a2765861bd30c1cdb 100644 --- a/include/trace/events/mmflags.h +++ b/include/trace/events/mmflags.h @@ -48,11 +48,21 @@ {(unsigned long)__GFP_WRITE, "__GFP_WRITE"}, \ {(unsigned long)__GFP_RECLAIM, "__GFP_RECLAIM"}, \ {(unsigned long)__GFP_DIRECT_RECLAIM, "__GFP_DIRECT_RECLAIM"},\ - {(unsigned long)__GFP_KSWAPD_RECLAIM, "__GFP_KSWAPD_RECLAIM"}\ + {(unsigned long)__GFP_KSWAPD_RECLAIM, "__GFP_KSWAPD_RECLAIM"},\ + {(unsigned long)__GFP_ZEROTAGS, "__GFP_ZEROTAGS"} \ + +#ifdef CONFIG_KASAN_HW_TAGS +#define __def_gfpflag_names_kasan , \ + {(unsigned long)__GFP_SKIP_ZERO, "__GFP_SKIP_ZERO"}, \ + {(unsigned long)__GFP_SKIP_KASAN_POISON, "__GFP_SKIP_KASAN_POISON"}, \ + {(unsigned long)__GFP_SKIP_KASAN_UNPOISON, "__GFP_SKIP_KASAN_UNPOISON"} +#else +#define __def_gfpflag_names_kasan +#endif #define show_gfp_flags(flags) \ (flags) ? __print_flags(flags, "|", \ - __def_gfpflag_names \ + __def_gfpflag_names __def_gfpflag_names_kasan \ ) : "none" #ifdef CONFIG_MMU @@ -73,7 +83,7 @@ #define IF_HAVE_PG_HWPOISON(flag,string) #endif -#if defined(CONFIG_IDLE_PAGE_TRACKING) && defined(CONFIG_64BIT) +#if defined(CONFIG_PAGE_IDLE_FLAG) && defined(CONFIG_64BIT) #define IF_HAVE_PG_IDLE(flag,string) ,{1UL << flag, string} #else #define IF_HAVE_PG_IDLE(flag,string) diff --git a/include/trace/events/pagemap.h b/include/trace/events/pagemap.h index 8fd1babae761b901bc6627bb45e85f3631f4fa48..e1735fe7c76afd0900294a42c3f1e0e8b22998ce 100644 --- a/include/trace/events/pagemap.h +++ b/include/trace/events/pagemap.h @@ -27,24 +27,21 @@ TRACE_EVENT(mm_lru_insertion, - TP_PROTO( - struct page *page, - int lru - ), + TP_PROTO(struct page *page), - TP_ARGS(page, lru), + TP_ARGS(page), TP_STRUCT__entry( __field(struct page *, page ) __field(unsigned long, pfn ) - __field(int, lru ) + __field(enum lru_list, lru ) __field(unsigned long, flags ) ), TP_fast_assign( __entry->page = page; __entry->pfn = page_to_pfn(page); - __entry->lru = lru; + __entry->lru = page_lru(page); __entry->flags = trace_pagemap_flags(page); ), diff --git a/include/trace/events/ufs.h b/include/trace/events/ufs.h index 50654f352639225d72b8b6f266a22583311acb33..599739ee7b2089d19cb84f6d73c536a400f0f494 100644 --- a/include/trace/events/ufs.h +++ b/include/trace/events/ufs.h @@ -20,31 +20,51 @@ { SYNCHRONIZE_CACHE, "SYNC" }, \ { UNMAP, "UNMAP" }) -#define UFS_LINK_STATES \ - EM(UIC_LINK_OFF_STATE) \ - EM(UIC_LINK_ACTIVE_STATE) \ - EMe(UIC_LINK_HIBERN8_STATE) - -#define UFS_PWR_MODES \ - EM(UFS_ACTIVE_PWR_MODE) \ - EM(UFS_SLEEP_PWR_MODE) \ - EMe(UFS_POWERDOWN_PWR_MODE) - -#define UFSCHD_CLK_GATING_STATES \ - EM(CLKS_OFF) \ - EM(CLKS_ON) \ - EM(REQ_CLKS_OFF) \ - EMe(REQ_CLKS_ON) +#define UFS_LINK_STATES \ + EM(UIC_LINK_OFF_STATE, "UIC_LINK_OFF_STATE") \ + EM(UIC_LINK_ACTIVE_STATE, "UIC_LINK_ACTIVE_STATE") \ + EMe(UIC_LINK_HIBERN8_STATE, "UIC_LINK_HIBERN8_STATE") + +#define UFS_PWR_MODES \ + EM(UFS_ACTIVE_PWR_MODE, "UFS_ACTIVE_PWR_MODE") \ + EM(UFS_SLEEP_PWR_MODE, "UFS_SLEEP_PWR_MODE") \ + EM(UFS_POWERDOWN_PWR_MODE, "UFS_POWERDOWN_PWR_MODE") \ + EMe(UFS_DEEPSLEEP_PWR_MODE, "UFS_DEEPSLEEP_PWR_MODE") + +#define UFSCHD_CLK_GATING_STATES \ + EM(CLKS_OFF, "CLKS_OFF") \ + EM(CLKS_ON, "CLKS_ON") \ + EM(REQ_CLKS_OFF, "REQ_CLKS_OFF") \ + EMe(REQ_CLKS_ON, "REQ_CLKS_ON") + +#define UFS_CMD_TRACE_STRINGS \ + EM(UFS_CMD_SEND, "send_req") \ + EM(UFS_CMD_COMP, "complete_rsp") \ + EM(UFS_DEV_COMP, "dev_complete") \ + EM(UFS_QUERY_SEND, "query_send") \ + EM(UFS_QUERY_COMP, "query_complete") \ + EM(UFS_QUERY_ERR, "query_complete_err") \ + EM(UFS_TM_SEND, "tm_send") \ + EM(UFS_TM_COMP, "tm_complete") \ + EMe(UFS_TM_ERR, "tm_complete_err") + +#define UFS_CMD_TRACE_TSF_TYPES \ + EM(UFS_TSF_CDB, "CDB") \ + EM(UFS_TSF_OSF, "OSF") \ + EM(UFS_TSF_TM_INPUT, "TM_INPUT") \ + EMe(UFS_TSF_TM_OUTPUT, "TM_OUTPUT") /* Enums require being exported to userspace, for user tool parsing */ #undef EM #undef EMe -#define EM(a) TRACE_DEFINE_ENUM(a); -#define EMe(a) TRACE_DEFINE_ENUM(a); +#define EM(a, b) TRACE_DEFINE_ENUM(a); +#define EMe(a, b) TRACE_DEFINE_ENUM(a); UFS_LINK_STATES; UFS_PWR_MODES; UFSCHD_CLK_GATING_STATES; +UFS_CMD_TRACE_STRINGS +UFS_CMD_TRACE_TSF_TYPES /* * Now redefine the EM() and EMe() macros to map the enums to the strings @@ -52,8 +72,13 @@ UFSCHD_CLK_GATING_STATES; */ #undef EM #undef EMe -#define EM(a) { a, #a }, -#define EMe(a) { a, #a } +#define EM(a, b) {a, b}, +#define EMe(a, b) {a, b} + +#define show_ufs_cmd_trace_str(str_t) \ + __print_symbolic(str_t, UFS_CMD_TRACE_STRINGS) +#define show_ufs_cmd_trace_tsf(tsf) \ + __print_symbolic(tsf, UFS_CMD_TRACE_TSF_TYPES) TRACE_EVENT(ufshcd_clk_gating, @@ -221,17 +246,37 @@ DEFINE_EVENT(ufshcd_template, ufshcd_init, int dev_state, int link_state), TP_ARGS(dev_name, err, usecs, dev_state, link_state)); +DEFINE_EVENT(ufshcd_template, ufshcd_wl_suspend, + TP_PROTO(const char *dev_name, int err, s64 usecs, + int dev_state, int link_state), + TP_ARGS(dev_name, err, usecs, dev_state, link_state)); + +DEFINE_EVENT(ufshcd_template, ufshcd_wl_resume, + TP_PROTO(const char *dev_name, int err, s64 usecs, + int dev_state, int link_state), + TP_ARGS(dev_name, err, usecs, dev_state, link_state)); + +DEFINE_EVENT(ufshcd_template, ufshcd_wl_runtime_suspend, + TP_PROTO(const char *dev_name, int err, s64 usecs, + int dev_state, int link_state), + TP_ARGS(dev_name, err, usecs, dev_state, link_state)); + +DEFINE_EVENT(ufshcd_template, ufshcd_wl_runtime_resume, + TP_PROTO(const char *dev_name, int err, s64 usecs, + int dev_state, int link_state), + TP_ARGS(dev_name, err, usecs, dev_state, link_state)); + TRACE_EVENT(ufshcd_command, - TP_PROTO(const char *dev_name, const char *str, unsigned int tag, - u32 doorbell, int transfer_len, u32 intr, u64 lba, - u8 opcode, u8 group_id), + TP_PROTO(const char *dev_name, enum ufs_trace_str_t str_t, + unsigned int tag, u32 doorbell, int transfer_len, u32 intr, + u64 lba, u8 opcode, u8 group_id), - TP_ARGS(dev_name, str, tag, doorbell, transfer_len, + TP_ARGS(dev_name, str_t, tag, doorbell, transfer_len, intr, lba, opcode, group_id), TP_STRUCT__entry( __string(dev_name, dev_name) - __string(str, str) + __field(enum ufs_trace_str_t, str_t) __field(unsigned int, tag) __field(u32, doorbell) __field(int, transfer_len) @@ -243,7 +288,7 @@ TRACE_EVENT(ufshcd_command, TP_fast_assign( __assign_str(dev_name, dev_name); - __assign_str(str, str); + __entry->str_t = str_t; __entry->tag = tag; __entry->doorbell = doorbell; __entry->transfer_len = transfer_len; @@ -255,22 +300,22 @@ TRACE_EVENT(ufshcd_command, TP_printk( "%s: %s: tag: %u, DB: 0x%x, size: %d, IS: %u, LBA: %llu, opcode: 0x%x (%s), group_id: 0x%x", - __get_str(str), __get_str(dev_name), __entry->tag, - __entry->doorbell, __entry->transfer_len, + show_ufs_cmd_trace_str(__entry->str_t), __get_str(dev_name), + __entry->tag, __entry->doorbell, __entry->transfer_len, __entry->intr, __entry->lba, (u32)__entry->opcode, str_opcode(__entry->opcode), (u32)__entry->group_id ) ); TRACE_EVENT(ufshcd_uic_command, - TP_PROTO(const char *dev_name, const char *str, u32 cmd, + TP_PROTO(const char *dev_name, enum ufs_trace_str_t str_t, u32 cmd, u32 arg1, u32 arg2, u32 arg3), - TP_ARGS(dev_name, str, cmd, arg1, arg2, arg3), + TP_ARGS(dev_name, str_t, cmd, arg1, arg2, arg3), TP_STRUCT__entry( __string(dev_name, dev_name) - __string(str, str) + __field(enum ufs_trace_str_t, str_t) __field(u32, cmd) __field(u32, arg1) __field(u32, arg2) @@ -279,7 +324,7 @@ TRACE_EVENT(ufshcd_uic_command, TP_fast_assign( __assign_str(dev_name, dev_name); - __assign_str(str, str); + __entry->str_t = str_t; __entry->cmd = cmd; __entry->arg1 = arg1; __entry->arg2 = arg2; @@ -288,38 +333,63 @@ TRACE_EVENT(ufshcd_uic_command, TP_printk( "%s: %s: cmd: 0x%x, arg1: 0x%x, arg2: 0x%x, arg3: 0x%x", - __get_str(str), __get_str(dev_name), __entry->cmd, - __entry->arg1, __entry->arg2, __entry->arg3 + show_ufs_cmd_trace_str(__entry->str_t), __get_str(dev_name), + __entry->cmd, __entry->arg1, __entry->arg2, __entry->arg3 ) ); TRACE_EVENT(ufshcd_upiu, - TP_PROTO(const char *dev_name, const char *str, void *hdr, void *tsf), + TP_PROTO(const char *dev_name, enum ufs_trace_str_t str_t, void *hdr, + void *tsf, enum ufs_trace_tsf_t tsf_t), - TP_ARGS(dev_name, str, hdr, tsf), + TP_ARGS(dev_name, str_t, hdr, tsf, tsf_t), TP_STRUCT__entry( __string(dev_name, dev_name) - __string(str, str) + __field(enum ufs_trace_str_t, str_t) __array(unsigned char, hdr, 12) __array(unsigned char, tsf, 16) + __field(enum ufs_trace_tsf_t, tsf_t) ), TP_fast_assign( __assign_str(dev_name, dev_name); - __assign_str(str, str); + __entry->str_t = str_t; memcpy(__entry->hdr, hdr, sizeof(__entry->hdr)); memcpy(__entry->tsf, tsf, sizeof(__entry->tsf)); + __entry->tsf_t = tsf_t; ), TP_printk( - "%s: %s: HDR:%s, CDB:%s", - __get_str(str), __get_str(dev_name), + "%s: %s: HDR:%s, %s:%s", + show_ufs_cmd_trace_str(__entry->str_t), __get_str(dev_name), __print_hex(__entry->hdr, sizeof(__entry->hdr)), + show_ufs_cmd_trace_tsf(__entry->tsf_t), __print_hex(__entry->tsf, sizeof(__entry->tsf)) ) ); +TRACE_EVENT(ufshcd_exception_event, + + TP_PROTO(const char *dev_name, u16 status), + + TP_ARGS(dev_name, status), + + TP_STRUCT__entry( + __string(dev_name, dev_name) + __field(u16, status) + ), + + TP_fast_assign( + __assign_str(dev_name, dev_name); + __entry->status = status; + ), + + TP_printk("%s: status 0x%x", + __get_str(dev_name), __entry->status + ) +); + #endif /* if !defined(_TRACE_UFS_H) || defined(TRACE_HEADER_MULTI_READ) */ /* This part must be outside protection */ diff --git a/include/trace/hooks/avc.h b/include/trace/hooks/avc.h index 2c76e02fc3c52947c46f7275ae7767c27eb7e734..1d205382e6df4e82b1fdd3fa98d4ce1b88225622 100644 --- a/include/trace/hooks/avc.h +++ b/include/trace/hooks/avc.h @@ -5,7 +5,6 @@ #define TRACE_INCLUDE_PATH trace/hooks #if !defined(_TRACE_HOOK_AVC_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_HOOK_AVC_H -#include #include /* * Following tracepoints are not exported in tracefs and provide a diff --git a/include/trace/hooks/binder.h b/include/trace/hooks/binder.h index ffba8746e58ba8be12ea08ff219f79cd1721ec13..d7419ef64de21c75a96c731ac78416df45aa6d83 100644 --- a/include/trace/hooks/binder.h +++ b/include/trace/hooks/binder.h @@ -5,7 +5,6 @@ #define TRACE_INCLUDE_PATH trace/hooks #if !defined(_TRACE_HOOK_BINDER_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_HOOK_BINDER_H -#include #include /* * Following tracepoints are not exported in tracefs and provide a diff --git a/include/trace/hooks/block.h b/include/trace/hooks/block.h index 0d9d7db39968041cd10c757558dbfdf1fe2bde82..ca9762a9be5b04b15c053e1faabb9f1209c4fad9 100644 --- a/include/trace/hooks/block.h +++ b/include/trace/hooks/block.h @@ -7,7 +7,6 @@ #if !defined(_TRACE_HOOK_BLOCK_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_HOOK_BLOCK_H -#include #include struct blk_mq_tag_set; diff --git a/include/trace/hooks/bug.h b/include/trace/hooks/bug.h index 685859e0eaf795a27c6460425f3d570f9a9ffd6c..3bd683e54f71370b39f89f0b59f7d9b16a0f9951 100644 --- a/include/trace/hooks/bug.h +++ b/include/trace/hooks/bug.h @@ -5,7 +5,6 @@ #if !defined(_TRACE_HOOK_BUG_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_HOOK_BUG_H -#include #include /* * Following tracepoints are not exported in tracefs and provide a diff --git a/include/trace/hooks/cgroup.h b/include/trace/hooks/cgroup.h index 6b6d7918c51867591140eedc15108ffeedfa25e9..e33d3519d6765f3328301c32bdf95990387ab44e 100644 --- a/include/trace/hooks/cgroup.h +++ b/include/trace/hooks/cgroup.h @@ -5,7 +5,6 @@ #define TRACE_INCLUDE_PATH trace/hooks #if !defined(_TRACE_HOOK_CGROUP_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_HOOK_CGROUP_H -#include #include struct task_struct; diff --git a/include/trace/hooks/cpu.h b/include/trace/hooks/cpu.h index 645702996b00043ee9d5d039efd3a9771b3b08fe..b368736e7285771d3ee0c8ae0700bf85f4317cf0 100644 --- a/include/trace/hooks/cpu.h +++ b/include/trace/hooks/cpu.h @@ -4,7 +4,6 @@ #define TRACE_INCLUDE_PATH trace/hooks #if !defined(_TRACE_HOOK_CPU_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_HOOK_CPU_H -#include #include /* * Following tracepoints are not exported in tracefs and provide a diff --git a/include/trace/hooks/cpufreq.h b/include/trace/hooks/cpufreq.h index 7208da2e7e3e2a57c7cd39070e8837a615b05066..70f90a3ef25c23905a99801c1d0d699e3ebbfd90 100644 --- a/include/trace/hooks/cpufreq.h +++ b/include/trace/hooks/cpufreq.h @@ -8,7 +8,6 @@ #define _TRACE_HOOK_CPUFREQ_H #include -#include #include DECLARE_HOOK(android_vh_show_max_freq, @@ -27,6 +26,21 @@ DECLARE_HOOK(android_vh_cpufreq_acct_update_power, DECLARE_RESTRICTED_HOOK(android_rvh_cpufreq_transition, TP_PROTO(struct cpufreq_policy *policy), TP_ARGS(policy), 1); + +DECLARE_HOOK(android_vh_cpufreq_resolve_freq, + TP_PROTO(struct cpufreq_policy *policy, unsigned int *target_freq, + unsigned int old_target_freq), + TP_ARGS(policy, target_freq, old_target_freq)); + +DECLARE_HOOK(android_vh_cpufreq_fast_switch, + TP_PROTO(struct cpufreq_policy *policy, unsigned int *target_freq, + unsigned int old_target_freq), + TP_ARGS(policy, target_freq, old_target_freq)); + +DECLARE_HOOK(android_vh_cpufreq_target, + TP_PROTO(struct cpufreq_policy *policy, unsigned int *target_freq, + unsigned int old_target_freq), + TP_ARGS(policy, target_freq, old_target_freq)); /* macro versions of hooks are no longer required */ #endif /* _TRACE_HOOK_CPUFREQ_H */ diff --git a/include/trace/hooks/cpuidle.h b/include/trace/hooks/cpuidle.h index 6d1f4bf7f1edb899ba2cb469361df95ded45eb29..c2ddabe37b00beaf2cdeba720340e441fa6150ac 100644 --- a/include/trace/hooks/cpuidle.h +++ b/include/trace/hooks/cpuidle.h @@ -7,7 +7,6 @@ #if !defined(_TRACE_HOOK_CPUIDLE_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_HOOK_CPUIDLE_H -#include #include struct cpuidle_device; diff --git a/include/trace/hooks/cpuidle_psci.h b/include/trace/hooks/cpuidle_psci.h index 94b01eba0b4ec13d2b16d4ee2bfdffe90a782d47..eef0032c0879b6e798f8047949c2bae190fa9d9b 100644 --- a/include/trace/hooks/cpuidle_psci.h +++ b/include/trace/hooks/cpuidle_psci.h @@ -4,7 +4,6 @@ #define TRACE_INCLUDE_PATH trace/hooks #if !defined(_TRACE_HOOK_CPUIDLE_PSCI_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_HOOK_CPUIDLE_PSCI_H -#include #include /* * Following tracepoints are not exported in tracefs and provide a diff --git a/include/trace/hooks/creds.h b/include/trace/hooks/creds.h index dd877e3a1f3e46868335c52ae37883803804eb43..92b0d8e981f3a4a7659708cc0518e603383b677a 100644 --- a/include/trace/hooks/creds.h +++ b/include/trace/hooks/creds.h @@ -5,7 +5,6 @@ #define TRACE_INCLUDE_PATH trace/hooks #if !defined(_TRACE_HOOK_CREDS_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_HOOK_CREDS_H -#include #include /* * Following tracepoints are not exported in tracefs and provide a diff --git a/include/trace/hooks/debug.h b/include/trace/hooks/debug.h index ac006d7fdd0a22fb41c2ea8bbb1af5f82483c013..90d81041a48b5461aa24bd6b5602882b505b0337 100644 --- a/include/trace/hooks/debug.h +++ b/include/trace/hooks/debug.h @@ -7,7 +7,6 @@ #if !defined(_TRACE_HOOK_DEBUG_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_HOOK_DEBUG_H -#include #include #if defined(CONFIG_TRACEPOINTS) && defined(CONFIG_ANDROID_VENDOR_HOOKS) diff --git a/include/trace/hooks/dtask.h b/include/trace/hooks/dtask.h index 3c49af0e65608d188c848ee3d6e5ea53d268feac..b483037e10131e35a65a651f483cdee9c9248ad5 100644 --- a/include/trace/hooks/dtask.h +++ b/include/trace/hooks/dtask.h @@ -5,7 +5,6 @@ #if !defined(_TRACE_HOOK_DTASK_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_HOOK_DTASK_H -#include #include /* * Following tracepoints are not exported in tracefs and provide a diff --git a/include/trace/hooks/epoch.h b/include/trace/hooks/epoch.h index f1c5342c822e3e782aed060f78dc9d799428d5ab..c3f018edf130616d46ab3530d4594e44318fa233 100644 --- a/include/trace/hooks/epoch.h +++ b/include/trace/hooks/epoch.h @@ -7,7 +7,6 @@ #if !defined(_TRACE_HOOK_EPOCH_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_HOOK_EPOCH_H -#include #include DECLARE_HOOK(android_vh_show_suspend_epoch_val, diff --git a/include/trace/hooks/fault.h b/include/trace/hooks/fault.h index 1daf2d250f310bdb3f4e789ace0365ecc2bf3841..8803a1b3d2f08950e8a7a48e0957ba68bc6f7115 100644 --- a/include/trace/hooks/fault.h +++ b/include/trace/hooks/fault.h @@ -5,7 +5,6 @@ #if !defined(_TRACE_HOOK_FAULT_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_HOOK_FAULT_H -#include #include /* * Following tracepoints are not exported in tracefs and provide a diff --git a/include/trace/hooks/fips140.h b/include/trace/hooks/fips140.h index 3aebf61687915c4fd22bded0e0fddf5833c27d91..fd4a42c013c744e7aff914880af968c63e648334 100644 --- a/include/trace/hooks/fips140.h +++ b/include/trace/hooks/fips140.h @@ -5,7 +5,6 @@ #if !defined(_TRACE_HOOK_FIPS140_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_HOOK_FIPS140_H -#include #include struct crypto_aes_ctx; diff --git a/include/trace/hooks/fpsimd.h b/include/trace/hooks/fpsimd.h index a4a3ce228fff9ba3bb78dfe885182f507faaba77..3365c298e6bc179941fa1debbabe57faea36008c 100644 --- a/include/trace/hooks/fpsimd.h +++ b/include/trace/hooks/fpsimd.h @@ -7,7 +7,6 @@ #if !defined(_TRACE_HOOK_FPSIMD_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_HOOK_FPSIMD_H -#include #include struct task_struct; diff --git a/include/trace/hooks/fs.h b/include/trace/hooks/fs.h index 90acad536bdbafedc39623f97439daf9033703eb..1c8d692be3afa24e1df12d08fb100adf569b38c9 100644 --- a/include/trace/hooks/fs.h +++ b/include/trace/hooks/fs.h @@ -8,7 +8,6 @@ #if !defined(_TRACE_HOOK_FS_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_HOOK_FS_H -#include #include DECLARE_HOOK(android_vh_ep_create_wakeup_source, TP_PROTO(char *name, int len), diff --git a/include/trace/hooks/ftrace_dump.h b/include/trace/hooks/ftrace_dump.h index ed3be8a0d74b15c2a497b545e5730da63a98b706..b63d9c80942607a107b1823a26c905a8412ec57a 100644 --- a/include/trace/hooks/ftrace_dump.h +++ b/include/trace/hooks/ftrace_dump.h @@ -10,7 +10,6 @@ #include #include -#include #include DECLARE_HOOK(android_vh_ftrace_oops_enter, diff --git a/include/trace/hooks/futex.h b/include/trace/hooks/futex.h index 0692dd68d93c42885f3aa9212ca7b5fd5a2c8707..3acf8128dc3e63eb5257b4efec3dfe1284c577bc 100644 --- a/include/trace/hooks/futex.h +++ b/include/trace/hooks/futex.h @@ -5,7 +5,6 @@ #define TRACE_INCLUDE_PATH trace/hooks #if !defined(_TRACE_HOOK_FUTEX_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_HOOK_FUTEX_H -#include #include #include /* diff --git a/include/trace/hooks/gic.h b/include/trace/hooks/gic.h index 2f7d3cfa6d9b3e497f28b6a3ddd132ed01ab9e11..d33786f2de95bb94064f12df84bfdf05a7258963 100644 --- a/include/trace/hooks/gic.h +++ b/include/trace/hooks/gic.h @@ -8,7 +8,6 @@ #define _TRACE_HOOK_GIC_H -#include #include struct gic_chip_data; diff --git a/include/trace/hooks/gic_v3.h b/include/trace/hooks/gic_v3.h index 3afb7379145d1a5b6b1c4d824e306ea1659bae21..337f7e1348c1864495045fbd8f2244727dede91a 100644 --- a/include/trace/hooks/gic_v3.h +++ b/include/trace/hooks/gic_v3.h @@ -4,7 +4,6 @@ #define TRACE_INCLUDE_PATH trace/hooks #if !defined(_TRACE_HOOK_GIC_V3_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_HOOK_GIC_V3_H -#include #include /* * Following tracepoints are not exported in tracefs and provide a diff --git a/include/trace/hooks/gpiolib.h b/include/trace/hooks/gpiolib.h index 96aca935f953d40e182a49e6457ecd96ded85013..24cb3f54fb82eb497e6b5f577579e1448585887e 100644 --- a/include/trace/hooks/gpiolib.h +++ b/include/trace/hooks/gpiolib.h @@ -7,7 +7,6 @@ #if !defined(_TRACE_HOOK_GPIOLIB_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_HOOK_GPIOLIB_H -#include #include #include "../drivers/gpio/gpiolib.h" diff --git a/include/trace/hooks/gup.h b/include/trace/hooks/gup.h new file mode 100644 index 0000000000000000000000000000000000000000..47ea47eeede409bfb624272354a007f049c2b829 --- /dev/null +++ b/include/trace/hooks/gup.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM gup +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH trace/hooks +#if !defined(_TRACE_HOOK_GUP_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_HOOK_GUP_H +#include + +struct page; + +DECLARE_HOOK(android_vh_try_grab_compound_head, + TP_PROTO(struct page *page, int refs, unsigned int flags, bool *ret), + TP_ARGS(page, refs, flags, ret)); + +DECLARE_HOOK(android_vh___get_user_pages_remote, + TP_PROTO(int *locked, unsigned int *gup_flags, struct page **pages), + TP_ARGS(locked, gup_flags, pages)); + +DECLARE_HOOK(android_vh_get_user_pages, + TP_PROTO(unsigned int *gup_flags, struct page **pages), + TP_ARGS(gup_flags, pages)); + +DECLARE_HOOK(android_vh_internal_get_user_pages_fast, + TP_PROTO(unsigned int *gup_flags, struct page **pages), + TP_ARGS(gup_flags, pages)); + +DECLARE_HOOK(android_vh_pin_user_pages, + TP_PROTO(unsigned int *gup_flags, struct page **pages), + TP_ARGS(gup_flags, pages)); +#endif /* _TRACE_HOOK_GUP_H */ +/* This part must be outside protection */ +#include diff --git a/include/trace/hooks/hung_task.h b/include/trace/hooks/hung_task.h index b355828df349e8695502664c1638f8ede6665ce9..1559d3ed774f00597b2d93a3edfd9ace3cbca80c 100644 --- a/include/trace/hooks/hung_task.h +++ b/include/trace/hooks/hung_task.h @@ -7,7 +7,6 @@ #if !defined(_TRACE_HOOK_HUNG_TASK_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_HOOK_HUNG_TASK_H -#include #include DECLARE_HOOK(android_vh_check_uninterruptible_tasks, diff --git a/include/trace/hooks/i2c.h b/include/trace/hooks/i2c.h index 71536ad67f6701cc01991c742837e4cc782c3f04..306c0f8cab51f9fcfe292ffd981c0ae4e5ca1ce3 100644 --- a/include/trace/hooks/i2c.h +++ b/include/trace/hooks/i2c.h @@ -7,7 +7,6 @@ #if !defined(_TRACE_HOOK_I2C_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_HOOK_I2C_H -#include #include DECLARE_HOOK(android_vh_of_i2c_get_board_info, diff --git a/include/trace/hooks/iommu.h b/include/trace/hooks/iommu.h index d4fbeec3844bf2f45c3a393ca6efbdb5497dbd90..6c30cb7932fa6db6abc8293281e5c90df52f71e4 100644 --- a/include/trace/hooks/iommu.h +++ b/include/trace/hooks/iommu.h @@ -9,14 +9,12 @@ #include -#include #include #if defined(CONFIG_TRACEPOINTS) && defined(CONFIG_ANDROID_VENDOR_HOOKS) - -DECLARE_HOOK(android_vh_iommu_setup_dma_ops, +DECLARE_RESTRICTED_HOOK(android_rvh_iommu_setup_dma_ops, TP_PROTO(struct device *dev, u64 dma_base, u64 size), - TP_ARGS(dev, dma_base, size)); + TP_ARGS(dev, dma_base, size), 1); DECLARE_HOOK(android_vh_iommu_alloc_iova, TP_PROTO(struct device *dev, dma_addr_t iova, size_t size), @@ -27,7 +25,7 @@ DECLARE_HOOK(android_vh_iommu_free_iova, TP_ARGS(iova, size)); #else -#define trace_android_vh_iommu_setup_dma_ops(dev, dma_base, size) +#define trace_android_rvh_iommu_setup_dma_ops(dev, dma_base, size) #define trace_android_vh_iommu_alloc_iova(dev, iova, size) #define trace_android_vh_iommu_free_iova(iova, size) diff --git a/include/trace/hooks/ipv6.h b/include/trace/hooks/ipv6.h index c1998af0c5a0ae430e0c3510ed547094d35fe5e7..64a54816123321aaceca887a42dc03fd195899a3 100644 --- a/include/trace/hooks/ipv6.h +++ b/include/trace/hooks/ipv6.h @@ -5,7 +5,6 @@ #if !defined(_TRACE_HOOK_IPV6_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_HOOK_IPV6_H -#include #include /* * Following tracepoints are not exported in tracefs and provide a diff --git a/include/trace/hooks/logbuf.h b/include/trace/hooks/logbuf.h index f73ad597fc648b8efe8fc046270f8409fd546559..72c34f7fbad0c8d439d7ecbdae14a78d35761a24 100644 --- a/include/trace/hooks/logbuf.h +++ b/include/trace/hooks/logbuf.h @@ -7,7 +7,6 @@ #if !defined(_TRACE_HOOK_LOGBUF_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_HOOK_LOGBUF_H -#include #include struct printk_ringbuffer; diff --git a/include/trace/hooks/memory.h b/include/trace/hooks/memory.h index 4b3f77a72d82c528f2e38b7a5bb1c785987aca80..940af43d736c599b097a5fd96f0dc2b2ac8701e4 100644 --- a/include/trace/hooks/memory.h +++ b/include/trace/hooks/memory.h @@ -5,7 +5,6 @@ #define TRACE_INCLUDE_PATH trace/hooks #if !defined(_TRACE_HOOK_MEMORY_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_HOOK_MEMORY_H -#include #include /* * Following tracepoints are not exported in tracefs and provide a diff --git a/include/trace/hooks/mm.h b/include/trace/hooks/mm.h index ac6142a6e02ad5c2de190cdcb2053ad8fb5fd431..9b583a3371011734c114238a52c101f77ea22605 100644 --- a/include/trace/hooks/mm.h +++ b/include/trace/hooks/mm.h @@ -11,7 +11,6 @@ #include #include -#include #include struct cma; diff --git a/include/trace/hooks/mmc_core.h b/include/trace/hooks/mmc_core.h index ad367782d59f66e7d6ddc2f6a5f7a631f5df1c10..2e36dd8b2b67a63d23506786ecd1514c1f2dd597 100644 --- a/include/trace/hooks/mmc_core.h +++ b/include/trace/hooks/mmc_core.h @@ -7,7 +7,6 @@ #if !defined(_TRACE_HOOK_MMC_CORE_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_HOOK_MMC_CORE_H -#include #include struct mmc_host; diff --git a/include/trace/hooks/module.h b/include/trace/hooks/module.h index 281cb0d37c12cf0d19d93763647b35a58e3fbeef..78b49869ca65ca7f6fdfd4bf95d3d80437c44a32 100644 --- a/include/trace/hooks/module.h +++ b/include/trace/hooks/module.h @@ -5,7 +5,6 @@ #define TRACE_INCLUDE_PATH trace/hooks #if !defined(_TRACE_HOOK_MODULE_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_HOOK_MODULE_H -#include #include /* * Following tracepoints are not exported in tracefs and provide a diff --git a/include/trace/hooks/mpam.h b/include/trace/hooks/mpam.h index 8479fe7c2356f498a89b3c2065aa83f794321b9b..b62f965032da75760f15dd9df51c4adf5d93662a 100644 --- a/include/trace/hooks/mpam.h +++ b/include/trace/hooks/mpam.h @@ -5,7 +5,6 @@ #define TRACE_INCLUDE_PATH trace/hooks #if !defined(_TRACE_HOOK_MPAM_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_HOOK_MPAM_H -#include #include /* * Following tracepoints are not exported in tracefs and provide a diff --git a/include/trace/hooks/net.h b/include/trace/hooks/net.h index 6715aa4eb668d768a7079997e15c77cd48e1d779..0b6de6f659cf79f7a0df3b6babd7166e21b7dd58 100644 --- a/include/trace/hooks/net.h +++ b/include/trace/hooks/net.h @@ -6,7 +6,6 @@ #if !defined(_TRACE_HOOK_NET_VH_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_HOOK_NET_VH_H -#include #include struct packet_type; diff --git a/include/trace/hooks/pci.h b/include/trace/hooks/pci.h new file mode 100644 index 0000000000000000000000000000000000000000..00d38980ec4992454b7ff2683b62c725cc63fa8d --- /dev/null +++ b/include/trace/hooks/pci.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM pci +#define TRACE_INCLUDE_PATH trace/hooks + +#if !defined(_TRACE_HOOK_PCI_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_HOOK_PCI_H +#include +/* + * Following tracepoints are not exported in tracefs and provide a + * mechanism for vendor modules to hook and extend functionality + */ + +DECLARE_RESTRICTED_HOOK(android_rvh_pci_d3_sleep, + TP_PROTO(struct pci_dev *dev, unsigned int *delay), + TP_ARGS(dev, delay), 1); + +#endif /* _TRACE_HOOK_PCI_H */ + +/* This part must be outside protection */ +#include diff --git a/include/trace/hooks/pm_domain.h b/include/trace/hooks/pm_domain.h index 548e350e8090805cdb52a91b05eb496f8c53d0da..f6b1c3808ce98a696d612fcdb4e9e844f1a8f98e 100644 --- a/include/trace/hooks/pm_domain.h +++ b/include/trace/hooks/pm_domain.h @@ -8,7 +8,6 @@ #if !defined(_TRACE_HOOK_PM_DOMAIN_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_HOOK_PM_DOMAIN_H -#include #include struct generic_pm_domain; diff --git a/include/trace/hooks/power.h b/include/trace/hooks/power.h index 149ea08580b808750b5830fb6a29a29e1a7536e0..195538679f322e0769a44005bd7378b375d29f61 100644 --- a/include/trace/hooks/power.h +++ b/include/trace/hooks/power.h @@ -5,7 +5,6 @@ #if !defined(_TRACE_HOOK_POWER_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_HOOK_POWER_H -#include #include /* * Following tracepoints are not exported in tracefs and provide a diff --git a/include/trace/hooks/preemptirq.h b/include/trace/hooks/preemptirq.h index b4d32c473f3aca459bcf1caa7f96f91523169377..4fbab929f4cf5ee002acd5a60d1a1962a553637f 100644 --- a/include/trace/hooks/preemptirq.h +++ b/include/trace/hooks/preemptirq.h @@ -8,7 +8,6 @@ #if !defined(_TRACE_HOOK_PREEMPTIRQ_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_HOOK_PREEMPTIRQ_H -#include #include DECLARE_RESTRICTED_HOOK(android_rvh_preempt_disable, diff --git a/include/trace/hooks/printk.h b/include/trace/hooks/printk.h index e4df6e4bf7a8a8a5fff08ba5ea27332291443479..5ec4d743d67f5fb756c8cc997ddc1e52d4aecb8a 100644 --- a/include/trace/hooks/printk.h +++ b/include/trace/hooks/printk.h @@ -7,7 +7,6 @@ #if !defined(_TRACE_HOOK_PRINTK_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_HOOK_PRINTK_H -#include #include DECLARE_HOOK(android_vh_printk_hotplug, diff --git a/include/trace/hooks/psi.h b/include/trace/hooks/psi.h index 62d1a0ee7b6352976844460def3207575fbea1e3..bd704c16185b4b5f661534864280db99424439b2 100644 --- a/include/trace/hooks/psi.h +++ b/include/trace/hooks/psi.h @@ -7,7 +7,6 @@ #if !defined(_TRACE_HOOK_PSI_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_HOOK_PSI_H -#include #include #if defined(CONFIG_TRACEPOINTS) && defined(CONFIG_ANDROID_VENDOR_HOOKS) diff --git a/include/trace/hooks/remoteproc.h b/include/trace/hooks/remoteproc.h index 7cc5e93d2ebe50789edb30281033e3426aa35d07..ee0a2f0ea147f9b37e65dc7d1a700b0ebe783c94 100644 --- a/include/trace/hooks/remoteproc.h +++ b/include/trace/hooks/remoteproc.h @@ -7,7 +7,6 @@ #if !defined(_TRACE_HOOK_RPROC_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_HOOK_RPROC_H -#include #include struct rproc; diff --git a/include/trace/hooks/rwsem.h b/include/trace/hooks/rwsem.h index d644a6e2125989d0cfd24f11f08f0813501981c9..9737879e3bc8f31417d127a3e523e4be636140b1 100644 --- a/include/trace/hooks/rwsem.h +++ b/include/trace/hooks/rwsem.h @@ -4,7 +4,6 @@ #define TRACE_INCLUDE_PATH trace/hooks #if !defined(_TRACE_HOOK_RWSEM_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_HOOK_RWSEM_H -#include #include /* * Following tracepoints are not exported in tracefs and provide a diff --git a/include/trace/hooks/sched.h b/include/trace/hooks/sched.h index 3b7d022c4328a6e6c5cb641392afd1c400c52770..79e8b401b3a071c17af5ad013a8303d34744944c 100644 --- a/include/trace/hooks/sched.h +++ b/include/trace/hooks/sched.h @@ -4,7 +4,6 @@ #define TRACE_INCLUDE_PATH trace/hooks #if !defined(_TRACE_HOOK_SCHED_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_HOOK_SCHED_H -#include #include /* * Following tracepoints are not exported in tracefs and provide a @@ -19,6 +18,10 @@ DECLARE_RESTRICTED_HOOK(android_rvh_select_task_rq_rt, TP_PROTO(struct task_struct *p, int prev_cpu, int sd_flag, int wake_flags, int *new_cpu), TP_ARGS(p, prev_cpu, sd_flag, wake_flags, new_cpu), 1); +DECLARE_RESTRICTED_HOOK(android_rvh_select_task_rq_dl, + TP_PROTO(struct task_struct *p, int prev_cpu, int sd_flag, int wake_flags, int *new_cpu), + TP_ARGS(p, prev_cpu, sd_flag, wake_flags, new_cpu), 1); + DECLARE_RESTRICTED_HOOK(android_rvh_select_fallback_rq, TP_PROTO(int cpu, struct task_struct *p, int *new_cpu), TP_ARGS(cpu, p, new_cpu), 1); @@ -232,6 +235,15 @@ DECLARE_HOOK(android_vh_map_util_freq, bool *need_freq_update), TP_ARGS(util, freq, cap, next_freq, policy, need_freq_update)); +DECLARE_HOOK(android_vh_update_next_freq, + TP_PROTO(struct cpufreq_policy *policy, unsigned int old_next_freq, + unsigned int *new_next_freq, s64 delta_ns), + TP_ARGS(policy, old_next_freq, new_next_freq, delta_ns)); + +DECLARE_HOOK(android_vh_sugov_get_util, + TP_PROTO(unsigned int cpu, unsigned long *ret), + TP_ARGS(cpu, ret)); + struct em_perf_domain; DECLARE_HOOK(android_vh_em_cpu_energy, TP_PROTO(struct em_perf_domain *pd, @@ -275,6 +287,16 @@ DECLARE_RESTRICTED_HOOK(android_rvh_uclamp_eff_get, struct uclamp_se *uclamp_max, struct uclamp_se *uclamp_eff, int *ret), TP_ARGS(p, clamp_id, uclamp_max, uclamp_eff, ret), 1); +DECLARE_RESTRICTED_HOOK(android_rvh_uclamp_task_util, + TP_PROTO(unsigned long task_util, unsigned long min_util, + unsigned long max_util, unsigned long *ret), + TP_ARGS(task_util, min_util, max_util, ret), 1); + +DECLARE_RESTRICTED_HOOK(android_rvh_uclamp_rq_util_with, + TP_PROTO(unsigned long util, unsigned long min_util, + unsigned long max_util, unsigned long *ret), + TP_ARGS(util, min_util, max_util, ret), 1); + DECLARE_HOOK(android_vh_build_sched_domains, TP_PROTO(bool has_asym), TP_ARGS(has_asym)); @@ -382,6 +404,30 @@ DECLARE_HOOK(android_vh_setscheduler_uclamp, TP_PROTO(struct task_struct *tsk, int clamp_id, unsigned int value), TP_ARGS(tsk, clamp_id, value)); +DECLARE_RESTRICTED_HOOK(android_rvh_attach_entity_load_avg, + TP_PROTO(struct cfs_rq *cfs_rq, struct sched_entity *se), + TP_ARGS(cfs_rq, se), 1); + +DECLARE_RESTRICTED_HOOK(android_rvh_detach_entity_load_avg, + TP_PROTO(struct cfs_rq *cfs_rq, struct sched_entity *se), + TP_ARGS(cfs_rq, se), 1); + +DECLARE_RESTRICTED_HOOK(android_rvh_update_load_avg, + TP_PROTO(u64 now, struct cfs_rq *cfs_rq, struct sched_entity *se), + TP_ARGS(now, cfs_rq, se), 1); + +DECLARE_RESTRICTED_HOOK(android_rvh_remove_entity_load_avg, + TP_PROTO(struct cfs_rq *cfs_rq, struct sched_entity *se), + TP_ARGS(cfs_rq, se), 1); + +DECLARE_RESTRICTED_HOOK(android_rvh_update_blocked_fair, + TP_PROTO(struct rq *rq), + TP_ARGS(rq), 1); + +DECLARE_RESTRICTED_HOOK(android_rvh_update_rt_rq_load_avg, + TP_PROTO(u64 now, struct rq *rq, struct task_struct *tsk, int running), + TP_ARGS(now, rq, tsk, running), 1); + /* macro versions of hooks are no longer required */ #endif /* _TRACE_HOOK_SCHED_H */ diff --git a/include/trace/hooks/scmi.h b/include/trace/hooks/scmi.h index ea2db9f07a9f80bb2f59809e8349493d0aee8a4f..d889d3334b9cbab35de08925629b4e813ab07a8c 100644 --- a/include/trace/hooks/scmi.h +++ b/include/trace/hooks/scmi.h @@ -7,7 +7,6 @@ #if !defined(_TRACE_HOOK_SCMI_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_HOOK_SCMI_H -#include #include DECLARE_HOOK(android_vh_scmi_timeout_sync, diff --git a/include/trace/hooks/selinux.h b/include/trace/hooks/selinux.h index 2ad6484d2c5535103087862dd40ba8fd490b9879..ebd6187c41082486ad0c09539a6f41b2d6a158ef 100644 --- a/include/trace/hooks/selinux.h +++ b/include/trace/hooks/selinux.h @@ -5,7 +5,6 @@ #define TRACE_INCLUDE_PATH trace/hooks #if !defined(_TRACE_HOOK_SELINUX_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_HOOK_SELINUX_H -#include #include /* * Following tracepoints are not exported in tracefs and provide a diff --git a/include/trace/hooks/shmem_fs.h b/include/trace/hooks/shmem_fs.h index 08e63033070e994ff49a66b1cafb56504a3deade..0077707072c5c9c13107a8ec4e98ca68f829e21b 100644 --- a/include/trace/hooks/shmem_fs.h +++ b/include/trace/hooks/shmem_fs.h @@ -5,7 +5,6 @@ #define TRACE_INCLUDE_PATH trace/hooks #if !defined(_TRACE_HOOK_SHMEM_FS_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_HOOK_SHMEM_FS_H -#include #include struct page; diff --git a/include/trace/hooks/signal.h b/include/trace/hooks/signal.h index a0db2e8cf77d3024a2d5a59e34a7d3de5403bf99..937e6c49b7ca8536caf3a01247ea42c746ac495f 100644 --- a/include/trace/hooks/signal.h +++ b/include/trace/hooks/signal.h @@ -5,7 +5,6 @@ #define TRACE_INCLUDE_PATH trace/hooks #if !defined(_TRACE_HOOK_SIGNAL_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_HOOK_SIGNAL_H -#include #include struct task_struct; diff --git a/include/trace/hooks/snd_compr.h b/include/trace/hooks/snd_compr.h index c42ed8d6bc30183e9f1979c929aa97a065e50d14..2e63aff9e4f943b84e3306fdeb6c4fa4b553b8ff 100644 --- a/include/trace/hooks/snd_compr.h +++ b/include/trace/hooks/snd_compr.h @@ -8,7 +8,6 @@ #if !defined(_TRACE_HOOK_SND_COMPR_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_HOOK_SND_COMPR_H -#include #include /* * Following tracepoints are not exported in tracefs and provide a diff --git a/include/trace/hooks/softlockup.h b/include/trace/hooks/softlockup.h index 9294913f91df3ced4f6a1d955ed174e04d2cb555..072f30007d7128c8f6356c13b7a100fa0260fe92 100644 --- a/include/trace/hooks/softlockup.h +++ b/include/trace/hooks/softlockup.h @@ -5,7 +5,6 @@ #if !defined(_TRACE_HOOK_SOFTLOCKUP_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_HOOK_SOFTLOCKUP_H -#include #include /* * Following tracepoints are not exported in tracefs and provide a diff --git a/include/trace/hooks/sound.h b/include/trace/hooks/sound.h index 2bf018edf39267238a9d072f4815b6f9d0a722e9..3dfd589d264d11097045e8a47735c01e600db493 100644 --- a/include/trace/hooks/sound.h +++ b/include/trace/hooks/sound.h @@ -5,7 +5,6 @@ #define TRACE_INCLUDE_PATH trace/hooks #if !defined(_TRACE_HOOK_SOUND_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_HOOK_SOUND_H -#include #include #include diff --git a/include/trace/hooks/sys.h b/include/trace/hooks/sys.h index 9e5d7a5cb2cf13a0fa23e227f9305833fa054d51..e2d5d6d4fc142f3624f4e568cde16be51740f87d 100644 --- a/include/trace/hooks/sys.h +++ b/include/trace/hooks/sys.h @@ -5,7 +5,6 @@ #define TRACE_INCLUDE_PATH trace/hooks #if !defined(_TRACE_HOOK_SYS_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_HOOK_SYS_H -#include #include struct task_struct; diff --git a/include/trace/hooks/syscall_check.h b/include/trace/hooks/syscall_check.h index d39802aa4a1e6a5b331b2405731c12a010b346be..56d8267297ca2bd85e3ce6d2539e9e51a9facbbb 100644 --- a/include/trace/hooks/syscall_check.h +++ b/include/trace/hooks/syscall_check.h @@ -5,7 +5,6 @@ #define TRACE_INCLUDE_PATH trace/hooks #if !defined(_TRACE_HOOK_SYSCALL_CHECK_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_HOOK_SYSCALL_CHECK_H -#include #include /* * Following tracepoints are not exported in tracefs and provide a diff --git a/include/trace/hooks/sysrqcrash.h b/include/trace/hooks/sysrqcrash.h index d163f898a9f6e6b2bee68a3afd30c4d6efb09d36..0056322c38179792896bf03d96119cb7426a3c1c 100644 --- a/include/trace/hooks/sysrqcrash.h +++ b/include/trace/hooks/sysrqcrash.h @@ -5,7 +5,6 @@ #if !defined(_TRACE_HOOK_SYSRQCRASH_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_HOOK_SYSRQCRASH_H -#include #include /* * Following tracepoints are not exported in tracefs and provide a diff --git a/include/trace/hooks/thermal.h b/include/trace/hooks/thermal.h index 944454ca0e481a4092661bed360d60e4a3e87579..78ce2a5ea49bf1e1bd70e7184282892609ee92af 100644 --- a/include/trace/hooks/thermal.h +++ b/include/trace/hooks/thermal.h @@ -7,8 +7,8 @@ #if !defined(_TRACE_HOOK_THERMAL_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_HOOK_THERMAL_H -#include #include +#include DECLARE_HOOK(android_vh_enable_thermal_genl_check, TP_PROTO(int event, int tz_id, int *enable_thermal_genl), @@ -19,6 +19,26 @@ DECLARE_HOOK(android_vh_thermal_pm_notify_suspend, TP_PROTO(struct thermal_zone_device *tz, int *irq_wakeable), TP_ARGS(tz, irq_wakeable)); +DECLARE_HOOK(android_vh_modify_thermal_request_freq, + TP_PROTO(struct cpufreq_policy *policy, unsigned long *request_freq), + TP_ARGS(policy, request_freq)); + +DECLARE_HOOK(android_vh_modify_thermal_target_freq, + TP_PROTO(struct cpufreq_policy *policy, unsigned int *target_freq), + TP_ARGS(policy, target_freq)); + +DECLARE_HOOK(android_vh_enable_thermal_power_throttle, + TP_PROTO(int *enable, int *override), + TP_ARGS(enable, override)); + +DECLARE_HOOK(android_vh_thermal_power_cap, + TP_PROTO(u32 *power_range), + TP_ARGS(power_range)); + +DECLARE_HOOK(android_vh_get_thermal_zone_device, + TP_PROTO(struct thermal_zone_device *tz), + TP_ARGS(tz)); + #endif /* _TRACE_HOOK_THERMAL_H */ /* This part must be outside protection */ #include diff --git a/include/trace/hooks/timer.h b/include/trace/hooks/timer.h index 174d958e4c3145fa6bf9f20f54204f4d810363c5..67ef865dad4adb64c896091055df2a8653771bca 100644 --- a/include/trace/hooks/timer.h +++ b/include/trace/hooks/timer.h @@ -7,7 +7,6 @@ #if !defined(_TRACE_HOOK_TIMER_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_HOOK_TIMER_H -#include #include DECLARE_HOOK(android_vh_timer_calc_index, diff --git a/include/trace/hooks/topology.h b/include/trace/hooks/topology.h index 74014027160c8d6055ee5d810b0355e5fc4d721a..5d29893ba477d80094c5d1a372002d8a95735ebc 100644 --- a/include/trace/hooks/topology.h +++ b/include/trace/hooks/topology.h @@ -7,7 +7,6 @@ #if !defined(_TRACE_HOOK_TOPOLOGY_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_HOOK_TOPOLOGY_H -#include #include #include diff --git a/include/trace/hooks/traps.h b/include/trace/hooks/traps.h index 350a14369acc963a69d7bc2a6d80b92fddd8072b..38749535aa4d0a9839d02ddf6a06be3e59e43b67 100644 --- a/include/trace/hooks/traps.h +++ b/include/trace/hooks/traps.h @@ -5,7 +5,6 @@ #if !defined(_TRACE_HOOK_TRAPS_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_HOOK_TRAPS_H -#include #include /* * Following tracepoints are not exported in tracefs and provide a diff --git a/include/trace/hooks/typec.h b/include/trace/hooks/typec.h index f87151124075836acbcaeef9883364c91e79fd0f..6634dbb82917fa74e7ef588755e9d145ab2850be 100644 --- a/include/trace/hooks/typec.h +++ b/include/trace/hooks/typec.h @@ -4,7 +4,6 @@ #define TRACE_INCLUDE_PATH trace/hooks #if !defined(_TRACE_HOOK_TYPEC_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_HOOK_TYPEC_H -#include #include #include /* @@ -62,6 +61,10 @@ DECLARE_HOOK(android_vh_typec_tcpm_log, TP_PROTO(const char *log, bool *bypass), TP_ARGS(log, bypass)); -#endif /* _TRACE_HOOK_UFSHCD_H */ +DECLARE_HOOK(android_vh_typec_tcpm_modify_src_caps, + TP_PROTO(unsigned int *nr_src_pdo, u32 (*src_pdo)[PDO_MAX_OBJECTS], bool *modified), + TP_ARGS(nr_src_pdo, src_pdo, modified)); + +#endif /* _TRACE_HOOK_TYPEC_H */ /* This part must be outside protection */ #include diff --git a/include/trace/hooks/ufshcd.h b/include/trace/hooks/ufshcd.h index ec8845a55076390a70c0969c27d4e2237d2ec555..eebde088cd240ff4154f4c6055dce6fcbd124329 100644 --- a/include/trace/hooks/ufshcd.h +++ b/include/trace/hooks/ufshcd.h @@ -4,7 +4,6 @@ #define TRACE_INCLUDE_PATH trace/hooks #if !defined(_TRACE_HOOK_UFSHCD_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_HOOK_UFSHCD_H -#include #include /* * Following tracepoints are not exported in tracefs and provide a @@ -19,6 +18,10 @@ DECLARE_HOOK(android_vh_ufs_fill_prdt, unsigned int segments, int *err), TP_ARGS(hba, lrbp, segments, err)); +DECLARE_RESTRICTED_HOOK(android_rvh_ufs_complete_init, + TP_PROTO(struct ufs_hba *hba), + TP_ARGS(hba), 1); + DECLARE_RESTRICTED_HOOK(android_rvh_ufs_reprogram_all_keys, TP_PROTO(struct ufs_hba *hba, int *err), TP_ARGS(hba, err), 1); diff --git a/include/trace/hooks/usb.h b/include/trace/hooks/usb.h index f8459473042e4b994dac34aafc31567e7af41af2..f4d5ff04b6b880b4cb46530c9995a092cffe9b46 100644 --- a/include/trace/hooks/usb.h +++ b/include/trace/hooks/usb.h @@ -5,7 +5,6 @@ #define TRACE_INCLUDE_PATH trace/hooks #if !defined(_TRACE_HOOK_USB_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_HOOK_USB_H -#include #include #include /* diff --git a/include/trace/hooks/user.h b/include/trace/hooks/user.h index b5b507ccd9c265380059383097be37b3a66c5247..773eed2ea75a4837fd1fe3168c3fb4918c2b36ec 100644 --- a/include/trace/hooks/user.h +++ b/include/trace/hooks/user.h @@ -5,7 +5,6 @@ #define TRACE_INCLUDE_PATH trace/hooks #if !defined(_TRACE_HOOK_USER_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_HOOK_USER_H -#include #include struct user_struct; diff --git a/include/trace/hooks/v4l2core.h b/include/trace/hooks/v4l2core.h index 32b6e96ba627b2a8117377989e9c50553b96ebc2..615e1ce8ff9e9785a2f251f0613cc1d85b6929f6 100644 --- a/include/trace/hooks/v4l2core.h +++ b/include/trace/hooks/v4l2core.h @@ -7,7 +7,6 @@ #if !defined(_TRACE_HOOK_V4L2CORE_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_HOOK_V4L2_CORE_H -#include #include struct v4l2_format; diff --git a/include/trace/hooks/v4l2mc.h b/include/trace/hooks/v4l2mc.h index 8a4653bd4a4de5acf1e0438d48d0017a6909729a..952c3ffe26e5cd859e525b606e0eb44af3b440a0 100644 --- a/include/trace/hooks/v4l2mc.h +++ b/include/trace/hooks/v4l2mc.h @@ -7,7 +7,6 @@ #if !defined(_TRACE_HOOK_V4L2MC_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_HOOK_V4L2MC_H -#include #include struct media_link; diff --git a/include/trace/hooks/vendor_hooks.h b/include/trace/hooks/vendor_hooks.h index f51a9ac310457ca06747a4e75e6368cc70e26b8e..a283509abbf61e68d9e23f1353fc46a919f63369 100644 --- a/include/trace/hooks/vendor_hooks.h +++ b/include/trace/hooks/vendor_hooks.h @@ -7,7 +7,9 @@ * will override the DECLARE_RESTRICTED_HOOK and break the second include. */ +#ifndef __GENKSYMS__ #include +#endif #if defined(CONFIG_TRACEPOINTS) && defined(CONFIG_ANDROID_VENDOR_HOOKS) diff --git a/include/trace/hooks/vmscan.h b/include/trace/hooks/vmscan.h index b6b77d9895cdd7a1bdfd0b494b16abf4890b95d9..4a3af8e75839f2aff048c4f18d9a021aa25526b1 100644 --- a/include/trace/hooks/vmscan.h +++ b/include/trace/hooks/vmscan.h @@ -7,7 +7,6 @@ #if !defined(_TRACE_HOOK_VMSCAN_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_HOOK_VMSCAN_H -#include #include DECLARE_HOOK(android_vh_tune_scan_type, diff --git a/include/trace/hooks/workqueue.h b/include/trace/hooks/workqueue.h index 0ffe56433a4c8a66d9dc568ac56685f0ca8bd785..dee037d0b7c04dfb5a9b4c6900a74ec54de83232 100644 --- a/include/trace/hooks/workqueue.h +++ b/include/trace/hooks/workqueue.h @@ -5,7 +5,6 @@ #if !defined(_TRACE_HOOK_WORKQUEUE_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_HOOK_WORKQUEUE_H -#include #include /* * Following tracepoints are not exported in tracefs and provide a diff --git a/include/trace/hooks/wqlockup.h b/include/trace/hooks/wqlockup.h index 21e23ad6b9d90b6371a63361e14577e61c9a270f..2572ebf5eff46d59cc9ee08bc63d2e61d72df1be 100644 --- a/include/trace/hooks/wqlockup.h +++ b/include/trace/hooks/wqlockup.h @@ -5,7 +5,6 @@ #if !defined(_TRACE_HOOK_WQLOCKUP_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_HOOK_WQLOCKUP_H -#include #include /* * Following tracepoints are not exported in tracefs and provide a diff --git a/include/uapi/asm-generic/poll.h b/include/uapi/asm-generic/poll.h index 41b509f410bf9b7f50f2c2e31542f170ee28f38a..f9c520ce4bf4e4cd3128e298d0d06a415514046e 100644 --- a/include/uapi/asm-generic/poll.h +++ b/include/uapi/asm-generic/poll.h @@ -29,7 +29,7 @@ #define POLLRDHUP 0x2000 #endif -#define POLLFREE (__force __poll_t)0x4000 /* currently only for epoll */ +#define POLLFREE (__force __poll_t)0x4000 #define POLL_BUSY_LOOP (__force __poll_t)0x8000 diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index 2056318988f774931c4e0a3104144bf4a75ff52f..f7b735dabf3578b4a7b728908d27df64a52b985f 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -859,9 +859,11 @@ __SYSCALL(__NR_pidfd_getfd, sys_pidfd_getfd) __SYSCALL(__NR_faccessat2, sys_faccessat2) #define __NR_process_madvise 440 __SYSCALL(__NR_process_madvise, sys_process_madvise) +#define __NR_process_mrelease 448 +__SYSCALL(__NR_process_mrelease, sys_process_mrelease) #undef __NR_syscalls -#define __NR_syscalls 441 +#define __NR_syscalls 449 /* * 32 bit systems traditionally used different diff --git a/include/uapi/linux/android/binder.h b/include/uapi/linux/android/binder.h index 74dcd8e92e50d1811a56162d99b906120a1eca70..2d3f015e2ae3865f93887918dd180b5e0d7b83d0 100644 --- a/include/uapi/linux/android/binder.h +++ b/include/uapi/linux/android/binder.h @@ -273,7 +273,14 @@ struct binder_freeze_info { struct binder_frozen_status_info { __u32 pid; + + /* process received sync transactions since last frozen + * bit 0: received sync transaction after being frozen + * bit 1: new pending sync transaction during freezing + */ __u32 sync_recv; + + /* process received async transactions since last frozen */ __u32 async_recv; }; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 762bf87c26a3efa2d4bdf2bc3f9d218b951cbd1c..26b6bee72039a995a5faa06e698201002520d548 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -199,6 +199,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_EXT, BPF_PROG_TYPE_LSM, BPF_PROG_TYPE_SK_LOOKUP, + BPF_PROG_TYPE_FUSE, }; enum bpf_attach_type { diff --git a/include/uapi/linux/can/isotp.h b/include/uapi/linux/can/isotp.h index 7793b26aa154d68f5fb2b074007afdfa0ccd3a44..c55935b64ccc8ef21f5c528e7536ac44474e826d 100644 --- a/include/uapi/linux/can/isotp.h +++ b/include/uapi/linux/can/isotp.h @@ -135,7 +135,7 @@ struct can_isotp_ll_options { #define CAN_ISOTP_FORCE_RXSTMIN 0x100 /* ignore CFs depending on rx stmin */ #define CAN_ISOTP_RX_EXT_ADDR 0x200 /* different rx extended addressing */ #define CAN_ISOTP_WAIT_TX_DONE 0x400 /* wait for tx completion */ - +#define CAN_ISOTP_SF_BROADCAST 0x800 /* 1-to-N functional addressing */ /* default values */ diff --git a/include/uapi/linux/close_range.h b/include/uapi/linux/close_range.h index 6928a9fdee3c45e9b0c02047ad21da5d9669d191..2d804281554c5cd8379ce07f45e21e6f355d1568 100644 --- a/include/uapi/linux/close_range.h +++ b/include/uapi/linux/close_range.h @@ -5,5 +5,8 @@ /* Unshare the file descriptor table before closing file descriptors. */ #define CLOSE_RANGE_UNSHARE (1U << 1) +/* Set the FD_CLOEXEC bit instead of closing the file descriptor. */ +#define CLOSE_RANGE_CLOEXEC (1U << 2) + #endif /* _UAPI_LINUX_CLOSE_RANGE_H */ diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index e2bf36e6964b64ce1bf1b5dd50e5f5daea152d1b..c94fa2941502123856eb5564049628ca668eb88a 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -394,7 +394,9 @@ enum { ETHTOOL_A_PAUSE_STAT_TX_FRAMES, ETHTOOL_A_PAUSE_STAT_RX_FRAMES, - /* add new constants above here */ + /* add new constants above here + * adjust ETHTOOL_PAUSE_STAT_CNT if adding non-stats! + */ __ETHTOOL_A_PAUSE_STAT_CNT, ETHTOOL_A_PAUSE_STAT_MAX = (__ETHTOOL_A_PAUSE_STAT_CNT - 1) }; diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h index bf3c62dd71123cac396ecd7b48eaef925511e406..f0521a39a5863a0346eb26db501fbb0ade15b4f8 100644 --- a/include/uapi/linux/fuse.h +++ b/include/uapi/linux/fuse.h @@ -516,6 +516,23 @@ struct fuse_entry_out { struct fuse_attr attr; }; +#define FUSE_ACTION_KEEP 0 +#define FUSE_ACTION_REMOVE 1 +#define FUSE_ACTION_REPLACE 2 + +struct fuse_entry_bpf_out { + uint64_t backing_action; + uint64_t backing_fd; + uint64_t bpf_action; + uint64_t bpf_fd; +}; + +struct fuse_entry_bpf { + struct fuse_entry_bpf_out out; + struct file *backing_file; + struct file *bpf_file; +}; + struct fuse_forget_in { uint64_t nlookup; }; @@ -634,6 +651,12 @@ struct fuse_read_in { uint32_t padding; }; +struct fuse_read_out { + uint64_t offset; + uint32_t again; + uint32_t padding; +}; + #define FUSE_COMPAT_WRITE_IN_SIZE 24 struct fuse_write_in { @@ -807,7 +830,7 @@ struct fuse_in_header { uint32_t uid; uint32_t gid; uint32_t pid; - uint32_t padding; + uint32_t error_in; }; struct fuse_out_header { @@ -942,4 +965,52 @@ struct fuse_removemapping_one { #define FUSE_REMOVEMAPPING_MAX_ENTRY \ (PAGE_SIZE / sizeof(struct fuse_removemapping_one)) +/* + * Fuse BPF Args + * + * Used to communicate with bpf programs to allow checking or altering certain values. + * The end_offset allows the bpf verifier to check boundaries statically. This reflects + * the ends of the buffer. size shows the length that was actually used. + * + */ + +/** One input argument of a request */ +struct fuse_bpf_in_arg { + uint32_t size; + const void *value; + const void *end_offset; +}; + +/** One output argument of a request */ +struct fuse_bpf_arg { + uint32_t size; + void *value; + void *end_offset; +}; + +#define FUSE_MAX_IN_ARGS 5 +#define FUSE_MAX_OUT_ARGS 3 + +#define FUSE_BPF_FORCE (1 << 0) +#define FUSE_BPF_OUT_ARGVAR (1 << 6) + +struct fuse_bpf_args { + uint64_t nodeid; + uint32_t opcode; + uint32_t error_in; + uint32_t in_numargs; + uint32_t out_numargs; + uint32_t flags; + struct fuse_bpf_in_arg in_args[FUSE_MAX_IN_ARGS]; + struct fuse_bpf_arg out_args[FUSE_MAX_OUT_ARGS]; +}; + +#define FUSE_BPF_USER_FILTER 1 +#define FUSE_BPF_BACKING 2 +#define FUSE_BPF_POST_FILTER 4 + +#define FUSE_OPCODE_FILTER 0x0ffff +#define FUSE_PREFILTER 0x10000 +#define FUSE_POSTFILTER 0x20000 + #endif /* _LINUX_FUSE_H */ diff --git a/include/uapi/linux/input-event-codes.h b/include/uapi/linux/input-event-codes.h index 225ec87d4f2283c7e2aacba55fbf3d405753b570..7989d9483ea75e2bbaaf78c1fd3d3bca741678ff 100644 --- a/include/uapi/linux/input-event-codes.h +++ b/include/uapi/linux/input-event-codes.h @@ -278,7 +278,8 @@ #define KEY_PAUSECD 201 #define KEY_PROG3 202 #define KEY_PROG4 203 -#define KEY_DASHBOARD 204 /* AL Dashboard */ +#define KEY_ALL_APPLICATIONS 204 /* AC Desktop Show All Applications */ +#define KEY_DASHBOARD KEY_ALL_APPLICATIONS #define KEY_SUSPEND 205 #define KEY_CLOSE 206 /* AC Close */ #define KEY_PLAY 207 @@ -612,6 +613,7 @@ #define KEY_ASSISTANT 0x247 /* AL Context-aware desktop assistant */ #define KEY_KBD_LAYOUT_NEXT 0x248 /* AC Next Keyboard Layout Select */ #define KEY_EMOJI_PICKER 0x249 /* Show/hide emoji picker (HUTRR101) */ +#define KEY_DICTATE 0x24a /* Start or Stop Voice Dictation Session (HUTRR99) */ #define KEY_BRIGHTNESS_MIN 0x250 /* Set Brightness to Minimum */ #define KEY_BRIGHTNESS_MAX 0x251 /* Set Brightness to Maximum */ diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 3c77972c8a50047fc01e02e636f0fc41e05f7b91..facb29ebef2dd50803d6923dae8b940af799b25a 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -819,6 +819,12 @@ struct kvm_ppc_resize_hpt { #define KVM_VM_TYPE_ARM_IPA_SIZE_MASK 0xffULL #define KVM_VM_TYPE_ARM_IPA_SIZE(x) \ ((x) & KVM_VM_TYPE_ARM_IPA_SIZE_MASK) + +#define KVM_VM_TYPE_ARM_PROTECTED (1UL << 31) + +#define KVM_VM_TYPE_MASK (KVM_VM_TYPE_ARM_IPA_SIZE_MASK | \ + KVM_VM_TYPE_ARM_PROTECTED) + /* * ioctls for /dev/kvm fds: */ @@ -1072,6 +1078,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_EXIT_ON_EMULATION_FAILURE 204 */ #define KVM_CAP_ARM_MTE 205 +#define KVM_CAP_ARM_PROTECTED_VM 0xffbadab1 #ifdef KVM_CAP_IRQ_ROUTING diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h index 4b3395082d15c95acc2afaeb7f15b32baf65ef53..26071021e986f6e3f6f71cf12fba8bd11a2390bd 100644 --- a/include/uapi/linux/netfilter/nf_conntrack_common.h +++ b/include/uapi/linux/netfilter/nf_conntrack_common.h @@ -106,7 +106,7 @@ enum ip_conntrack_status { IPS_NAT_CLASH = IPS_UNTRACKED, #endif - /* Conntrack got a helper explicitly attached via CT target. */ + /* Conntrack got a helper explicitly attached (ruleset, ctnetlink). */ IPS_HELPER_BIT = 13, IPS_HELPER = (1 << IPS_HELPER_BIT), diff --git a/include/uapi/linux/nfc.h b/include/uapi/linux/nfc.h index f6e3c8c9c7449ad6b98623d034128e03a3c9e651..4fa4e979e948a57a41f3897d8d05bcc26e475da7 100644 --- a/include/uapi/linux/nfc.h +++ b/include/uapi/linux/nfc.h @@ -263,7 +263,7 @@ enum nfc_sdp_attr { #define NFC_SE_ENABLED 0x1 struct sockaddr_nfc { - sa_family_t sa_family; + __kernel_sa_family_t sa_family; __u32 dev_idx; __u32 target_idx; __u32 nfc_protocol; @@ -271,14 +271,14 @@ struct sockaddr_nfc { #define NFC_LLCP_MAX_SERVICE_NAME 63 struct sockaddr_nfc_llcp { - sa_family_t sa_family; + __kernel_sa_family_t sa_family; __u32 dev_idx; __u32 target_idx; __u32 nfc_protocol; __u8 dsap; /* Destination SAP, if known */ __u8 ssap; /* Source SAP to be bound to */ char service_name[NFC_LLCP_MAX_SERVICE_NAME]; /* Service name URI */; - size_t service_name_len; + __kernel_size_t service_name_len; }; /* NFC socket protocols */ diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index a95d55f9f25761ab78c8ef529dc41f450f95339e..142b184eca8b4cbba4321d9a4e0a5b5b164fa08e 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -504,6 +504,12 @@ #define PCI_EXP_DEVCTL_URRE 0x0008 /* Unsupported Request Reporting En. */ #define PCI_EXP_DEVCTL_RELAX_EN 0x0010 /* Enable relaxed ordering */ #define PCI_EXP_DEVCTL_PAYLOAD 0x00e0 /* Max_Payload_Size */ +#define PCI_EXP_DEVCTL_PAYLOAD_128B 0x0000 /* 128 Bytes */ +#define PCI_EXP_DEVCTL_PAYLOAD_256B 0x0020 /* 256 Bytes */ +#define PCI_EXP_DEVCTL_PAYLOAD_512B 0x0040 /* 512 Bytes */ +#define PCI_EXP_DEVCTL_PAYLOAD_1024B 0x0060 /* 1024 Bytes */ +#define PCI_EXP_DEVCTL_PAYLOAD_2048B 0x0080 /* 2048 Bytes */ +#define PCI_EXP_DEVCTL_PAYLOAD_4096B 0x00a0 /* 4096 Bytes */ #define PCI_EXP_DEVCTL_EXT_TAG 0x0100 /* Extended Tag Field Enable */ #define PCI_EXP_DEVCTL_PHANTOM 0x0200 /* Phantom Functions Enable */ #define PCI_EXP_DEVCTL_AUX_PME 0x0400 /* Auxiliary Power PM Enable */ diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 58b37b12e7452c4c4042d7b963624a4965d687e5..2f70791ff8e57f171cde67dc9a715f9efd07cce5 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -248,11 +248,11 @@ struct prctl_mm_map { #define PR_SET_IO_FLUSHER 57 #define PR_GET_IO_FLUSHER 58 -#define PR_SET_VMA 0x53564d41 -# define PR_SET_VMA_ANON_NAME 0 - /* Set/get enabled arm64 pointer authentication keys */ #define PR_PAC_SET_ENABLED_KEYS 60 #define PR_PAC_GET_ENABLED_KEYS 61 +#define PR_SET_VMA 0x53564d41 +# define PR_SET_VMA_ANON_NAME 0 + #endif /* _LINUX_PRCTL_H */ diff --git a/include/uapi/linux/psci.h b/include/uapi/linux/psci.h index 2fcad1dd0b0e00a68c37c345697fa133a996c054..b1484a5a3c3600f709de4fb272b9b91b2bd9a8d5 100644 --- a/include/uapi/linux/psci.h +++ b/include/uapi/linux/psci.h @@ -55,6 +55,8 @@ #define PSCI_1_0_FN64_SYSTEM_SUSPEND PSCI_0_2_FN64(14) #define PSCI_1_1_FN64_SYSTEM_RESET2 PSCI_0_2_FN64(18) +#define PSCI_1_1_FN64_MEM_PROTECT PSCI_0_2_FN64(19) + /* PSCI v0.2 power state encoding for CPU_SUSPEND function */ #define PSCI_0_2_POWER_STATE_ID_MASK 0xffff #define PSCI_0_2_POWER_STATE_ID_SHIFT 0 @@ -82,6 +84,10 @@ #define PSCI_0_2_TOS_UP_NO_MIGRATE 1 #define PSCI_0_2_TOS_MP 2 +/* PSCI v1.1 reset type encoding for SYSTEM_RESET2 */ +#define PSCI_1_1_RESET_TYPE_SYSTEM_WARM_RESET 0 +#define PSCI_1_1_RESET_TYPE_VENDOR_START 0x80000000U + /* PSCI version decoding (independent of PSCI version) */ #define PSCI_VERSION_MAJOR_SHIFT 16 #define PSCI_VERSION_MINOR_MASK \ diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index cfcb10b7548382fc295e4d0fd4d03c81522b6475..62db78b9c1a0a21963c8cdd61212c0c09ce8a58e 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -349,5 +349,7 @@ struct tcp_zerocopy_receive { __u32 recv_skip_hint; /* out: amount of bytes to skip */ __u32 inq; /* out: amount of bytes in read queue */ __s32 err; /* out: socket error */ + __u64 copybuf_address; /* in: copybuf address (small reads) */ + __s32 copybuf_len; /* in/out: copybuf bytes avail/used or error */ }; #endif /* _UAPI_LINUX_TCP_H */ diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h index b052355ac7a324e173f4ea44c48d8b757fa687cf..bc740d6d22593f91d0a0e6e78e3d264a05450b27 100644 --- a/include/uapi/linux/virtio_ids.h +++ b/include/uapi/linux/virtio_ids.h @@ -45,6 +45,7 @@ #define VIRTIO_ID_CRYPTO 20 /* virtio crypto */ #define VIRTIO_ID_IOMMU 23 /* virtio IOMMU */ #define VIRTIO_ID_MEM 24 /* virtio mem */ +#define VIRTIO_ID_SOUND 25 /* virtio sound */ #define VIRTIO_ID_FS 26 /* virtio filesystem */ #define VIRTIO_ID_PMEM 27 /* virtio pmem */ #define VIRTIO_ID_MAC80211_HWSIM 29 /* virtio mac80211-hwsim */ diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h index ffc6a5391bb7bb8f7543cb36fb16487596fd3255..90ddb49fce84e6f3e8cd0b500fbcb68021eac379 100644 --- a/include/uapi/linux/xfrm.h +++ b/include/uapi/linux/xfrm.h @@ -308,6 +308,7 @@ enum xfrm_attr_type_t { XFRMA_SET_MARK, /* __u32 */ XFRMA_SET_MARK_MASK, /* __u32 */ XFRMA_IF_ID, /* __u32 */ + XFRMA_MTIMER_THRESH, /* __u32 in seconds for input SA */ __XFRMA_MAX #define XFRMA_OUTPUT_MARK XFRMA_SET_MARK /* Compatibility */ @@ -505,6 +506,12 @@ struct xfrm_user_offload { int ifindex; __u8 flags; }; +/* This flag was exposed without any kernel code that supporting it. + * Unfortunately, strongswan has the code that uses sets this flag, + * which makes impossible to reuse this bit. + * + * So leave it here to make sure that it won't be reused by mistake. + */ #define XFRM_OFFLOAD_IPV6 1 #define XFRM_OFFLOAD_INBOUND 2 diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h index 0b1182a3cf4128b4502b7e3db59adeb8522daea5..57b4ae6a4a186e4823877b2d78d7f97e021ac7d0 100644 --- a/include/xen/grant_table.h +++ b/include/xen/grant_table.h @@ -97,17 +97,32 @@ int gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly); * access has been ended, free the given page too. Access will be ended * immediately iff the grant entry is not in use, otherwise it will happen * some time later. page may be 0, in which case no freeing will occur. + * Note that the granted page might still be accessed (read or write) by the + * other side after gnttab_end_foreign_access() returns, so even if page was + * specified as 0 it is not allowed to just reuse the page for other + * purposes immediately. gnttab_end_foreign_access() will take an additional + * reference to the granted page in this case, which is dropped only after + * the grant is no longer in use. + * This requires that multi page allocations for areas subject to + * gnttab_end_foreign_access() are done via alloc_pages_exact() (and freeing + * via free_pages_exact()) in order to avoid high order pages. */ void gnttab_end_foreign_access(grant_ref_t ref, int readonly, unsigned long page); +/* + * End access through the given grant reference, iff the grant entry is + * no longer in use. In case of success ending foreign access, the + * grant reference is deallocated. + * Return 1 if the grant entry was freed, 0 if it is still in use. + */ +int gnttab_try_end_foreign_access(grant_ref_t ref); + int gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn); unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref); unsigned long gnttab_end_foreign_transfer(grant_ref_t ref); -int gnttab_query_foreign_access(grant_ref_t ref); - /* * operations on reserved batches of grant references */ diff --git a/include/xen/interface/io/ring.h b/include/xen/interface/io/ring.h index 2af7a1cd665893d9ff509730c73e0e0852a6881a..b39cdbc522ec7eeb0aa01cf1a1142dacd1772a52 100644 --- a/include/xen/interface/io/ring.h +++ b/include/xen/interface/io/ring.h @@ -1,21 +1,53 @@ -/* SPDX-License-Identifier: GPL-2.0 */ /****************************************************************************** * ring.h * * Shared producer-consumer ring macros. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * * Tim Deegan and Andrew Warfield November 2004. */ #ifndef __XEN_PUBLIC_IO_RING_H__ #define __XEN_PUBLIC_IO_RING_H__ +/* + * When #include'ing this header, you need to provide the following + * declaration upfront: + * - standard integers types (uint8_t, uint16_t, etc) + * They are provided by stdint.h of the standard headers. + * + * In addition, if you intend to use the FLEX macros, you also need to + * provide the following, before invoking the FLEX macros: + * - size_t + * - memcpy + * - grant_ref_t + * These declarations are provided by string.h of the standard headers, + * and grant_table.h from the Xen public headers. + */ + #include typedef unsigned int RING_IDX; /* Round a 32-bit unsigned constant down to the nearest power of two. */ -#define __RD2(_x) (((_x) & 0x00000002) ? 0x2 : ((_x) & 0x1)) +#define __RD2(_x) (((_x) & 0x00000002) ? 0x2 : ((_x) & 0x1)) #define __RD4(_x) (((_x) & 0x0000000c) ? __RD2((_x)>>2)<<2 : __RD2(_x)) #define __RD8(_x) (((_x) & 0x000000f0) ? __RD4((_x)>>4)<<4 : __RD4(_x)) #define __RD16(_x) (((_x) & 0x0000ff00) ? __RD8((_x)>>8)<<8 : __RD8(_x)) @@ -27,82 +59,79 @@ typedef unsigned int RING_IDX; * A ring contains as many entries as will fit, rounded down to the nearest * power of two (so we can mask with (size-1) to loop around). */ -#define __CONST_RING_SIZE(_s, _sz) \ - (__RD32(((_sz) - offsetof(struct _s##_sring, ring)) / \ - sizeof(((struct _s##_sring *)0)->ring[0]))) - +#define __CONST_RING_SIZE(_s, _sz) \ + (__RD32(((_sz) - offsetof(struct _s##_sring, ring)) / \ + sizeof(((struct _s##_sring *)0)->ring[0]))) /* * The same for passing in an actual pointer instead of a name tag. */ -#define __RING_SIZE(_s, _sz) \ - (__RD32(((_sz) - (long)&(_s)->ring + (long)(_s)) / sizeof((_s)->ring[0]))) +#define __RING_SIZE(_s, _sz) \ + (__RD32(((_sz) - (long)(_s)->ring + (long)(_s)) / sizeof((_s)->ring[0]))) /* * Macros to make the correct C datatypes for a new kind of ring. * * To make a new ring datatype, you need to have two message structures, - * let's say struct request, and struct response already defined. + * let's say request_t, and response_t already defined. * * In a header where you want the ring datatype declared, you then do: * - * DEFINE_RING_TYPES(mytag, struct request, struct response); + * DEFINE_RING_TYPES(mytag, request_t, response_t); * * These expand out to give you a set of types, as you can see below. * The most important of these are: * - * struct mytag_sring - The shared ring. - * struct mytag_front_ring - The 'front' half of the ring. - * struct mytag_back_ring - The 'back' half of the ring. + * mytag_sring_t - The shared ring. + * mytag_front_ring_t - The 'front' half of the ring. + * mytag_back_ring_t - The 'back' half of the ring. * * To initialize a ring in your code you need to know the location and size * of the shared memory area (PAGE_SIZE, for instance). To initialise * the front half: * - * struct mytag_front_ring front_ring; - * SHARED_RING_INIT((struct mytag_sring *)shared_page); - * FRONT_RING_INIT(&front_ring, (struct mytag_sring *)shared_page, - * PAGE_SIZE); + * mytag_front_ring_t front_ring; + * SHARED_RING_INIT((mytag_sring_t *)shared_page); + * FRONT_RING_INIT(&front_ring, (mytag_sring_t *)shared_page, PAGE_SIZE); * * Initializing the back follows similarly (note that only the front * initializes the shared ring): * - * struct mytag_back_ring back_ring; - * BACK_RING_INIT(&back_ring, (struct mytag_sring *)shared_page, - * PAGE_SIZE); + * mytag_back_ring_t back_ring; + * BACK_RING_INIT(&back_ring, (mytag_sring_t *)shared_page, PAGE_SIZE); */ -#define DEFINE_RING_TYPES(__name, __req_t, __rsp_t) \ - \ -/* Shared ring entry */ \ -union __name##_sring_entry { \ - __req_t req; \ - __rsp_t rsp; \ -}; \ - \ -/* Shared ring page */ \ -struct __name##_sring { \ - RING_IDX req_prod, req_event; \ - RING_IDX rsp_prod, rsp_event; \ - uint8_t pad[48]; \ - union __name##_sring_entry ring[1]; /* variable-length */ \ -}; \ - \ -/* "Front" end's private variables */ \ -struct __name##_front_ring { \ - RING_IDX req_prod_pvt; \ - RING_IDX rsp_cons; \ - unsigned int nr_ents; \ - struct __name##_sring *sring; \ -}; \ - \ -/* "Back" end's private variables */ \ -struct __name##_back_ring { \ - RING_IDX rsp_prod_pvt; \ - RING_IDX req_cons; \ - unsigned int nr_ents; \ - struct __name##_sring *sring; \ -}; - +#define DEFINE_RING_TYPES(__name, __req_t, __rsp_t) \ + \ +/* Shared ring entry */ \ +union __name##_sring_entry { \ + __req_t req; \ + __rsp_t rsp; \ +}; \ + \ +/* Shared ring page */ \ +struct __name##_sring { \ + RING_IDX req_prod, req_event; \ + RING_IDX rsp_prod, rsp_event; \ + uint8_t __pad[48]; \ + union __name##_sring_entry ring[1]; /* variable-length */ \ +}; \ + \ +/* "Front" end's private variables */ \ +struct __name##_front_ring { \ + RING_IDX req_prod_pvt; \ + RING_IDX rsp_cons; \ + unsigned int nr_ents; \ + struct __name##_sring *sring; \ +}; \ + \ +/* "Back" end's private variables */ \ +struct __name##_back_ring { \ + RING_IDX rsp_prod_pvt; \ + RING_IDX req_cons; \ + unsigned int nr_ents; \ + struct __name##_sring *sring; \ +}; \ + \ /* * Macros for manipulating rings. * @@ -119,94 +148,99 @@ struct __name##_back_ring { \ */ /* Initialising empty rings */ -#define SHARED_RING_INIT(_s) do { \ - (_s)->req_prod = (_s)->rsp_prod = 0; \ - (_s)->req_event = (_s)->rsp_event = 1; \ - memset((_s)->pad, 0, sizeof((_s)->pad)); \ +#define SHARED_RING_INIT(_s) do { \ + (_s)->req_prod = (_s)->rsp_prod = 0; \ + (_s)->req_event = (_s)->rsp_event = 1; \ + (void)memset((_s)->__pad, 0, sizeof((_s)->__pad)); \ } while(0) -#define FRONT_RING_ATTACH(_r, _s, _i, __size) do { \ - (_r)->req_prod_pvt = (_i); \ - (_r)->rsp_cons = (_i); \ - (_r)->nr_ents = __RING_SIZE(_s, __size); \ - (_r)->sring = (_s); \ +#define FRONT_RING_ATTACH(_r, _s, _i, __size) do { \ + (_r)->req_prod_pvt = (_i); \ + (_r)->rsp_cons = (_i); \ + (_r)->nr_ents = __RING_SIZE(_s, __size); \ + (_r)->sring = (_s); \ } while (0) #define FRONT_RING_INIT(_r, _s, __size) FRONT_RING_ATTACH(_r, _s, 0, __size) -#define BACK_RING_ATTACH(_r, _s, _i, __size) do { \ - (_r)->rsp_prod_pvt = (_i); \ - (_r)->req_cons = (_i); \ - (_r)->nr_ents = __RING_SIZE(_s, __size); \ - (_r)->sring = (_s); \ +#define BACK_RING_ATTACH(_r, _s, _i, __size) do { \ + (_r)->rsp_prod_pvt = (_i); \ + (_r)->req_cons = (_i); \ + (_r)->nr_ents = __RING_SIZE(_s, __size); \ + (_r)->sring = (_s); \ } while (0) #define BACK_RING_INIT(_r, _s, __size) BACK_RING_ATTACH(_r, _s, 0, __size) /* How big is this ring? */ -#define RING_SIZE(_r) \ +#define RING_SIZE(_r) \ ((_r)->nr_ents) /* Number of free requests (for use on front side only). */ -#define RING_FREE_REQUESTS(_r) \ +#define RING_FREE_REQUESTS(_r) \ (RING_SIZE(_r) - ((_r)->req_prod_pvt - (_r)->rsp_cons)) /* Test if there is an empty slot available on the front ring. * (This is only meaningful from the front. ) */ -#define RING_FULL(_r) \ +#define RING_FULL(_r) \ (RING_FREE_REQUESTS(_r) == 0) /* Test if there are outstanding messages to be processed on a ring. */ -#define RING_HAS_UNCONSUMED_RESPONSES(_r) \ +#define RING_HAS_UNCONSUMED_RESPONSES(_r) \ ((_r)->sring->rsp_prod - (_r)->rsp_cons) -#define RING_HAS_UNCONSUMED_REQUESTS(_r) \ - ({ \ - unsigned int req = (_r)->sring->req_prod - (_r)->req_cons; \ - unsigned int rsp = RING_SIZE(_r) - \ - ((_r)->req_cons - (_r)->rsp_prod_pvt); \ - req < rsp ? req : rsp; \ - }) +#define RING_HAS_UNCONSUMED_REQUESTS(_r) ({ \ + unsigned int req = (_r)->sring->req_prod - (_r)->req_cons; \ + unsigned int rsp = RING_SIZE(_r) - \ + ((_r)->req_cons - (_r)->rsp_prod_pvt); \ + req < rsp ? req : rsp; \ +}) /* Direct access to individual ring elements, by index. */ -#define RING_GET_REQUEST(_r, _idx) \ +#define RING_GET_REQUEST(_r, _idx) \ (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].req)) +#define RING_GET_RESPONSE(_r, _idx) \ + (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].rsp)) + /* - * Get a local copy of a request. + * Get a local copy of a request/response. * - * Use this in preference to RING_GET_REQUEST() so all processing is + * Use this in preference to RING_GET_{REQUEST,RESPONSE}() so all processing is * done on a local copy that cannot be modified by the other end. * * Note that https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58145 may cause this - * to be ineffective where _req is a struct which consists of only bitfields. + * to be ineffective where dest is a struct which consists of only bitfields. */ -#define RING_COPY_REQUEST(_r, _idx, _req) do { \ - /* Use volatile to force the copy into _req. */ \ - *(_req) = *(volatile typeof(_req))RING_GET_REQUEST(_r, _idx); \ +#define RING_COPY_(type, r, idx, dest) do { \ + /* Use volatile to force the copy into dest. */ \ + *(dest) = *(volatile typeof(dest))RING_GET_##type(r, idx); \ } while (0) -#define RING_GET_RESPONSE(_r, _idx) \ - (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].rsp)) +#define RING_COPY_REQUEST(r, idx, req) RING_COPY_(REQUEST, r, idx, req) +#define RING_COPY_RESPONSE(r, idx, rsp) RING_COPY_(RESPONSE, r, idx, rsp) /* Loop termination condition: Would the specified index overflow the ring? */ -#define RING_REQUEST_CONS_OVERFLOW(_r, _cons) \ +#define RING_REQUEST_CONS_OVERFLOW(_r, _cons) \ (((_cons) - (_r)->rsp_prod_pvt) >= RING_SIZE(_r)) /* Ill-behaved frontend determination: Can there be this many requests? */ -#define RING_REQUEST_PROD_OVERFLOW(_r, _prod) \ +#define RING_REQUEST_PROD_OVERFLOW(_r, _prod) \ (((_prod) - (_r)->rsp_prod_pvt) > RING_SIZE(_r)) +/* Ill-behaved backend determination: Can there be this many responses? */ +#define RING_RESPONSE_PROD_OVERFLOW(_r, _prod) \ + (((_prod) - (_r)->rsp_cons) > RING_SIZE(_r)) -#define RING_PUSH_REQUESTS(_r) do { \ - virt_wmb(); /* back sees requests /before/ updated producer index */ \ - (_r)->sring->req_prod = (_r)->req_prod_pvt; \ +#define RING_PUSH_REQUESTS(_r) do { \ + virt_wmb(); /* back sees requests /before/ updated producer index */\ + (_r)->sring->req_prod = (_r)->req_prod_pvt; \ } while (0) -#define RING_PUSH_RESPONSES(_r) do { \ - virt_wmb(); /* front sees responses /before/ updated producer index */ \ - (_r)->sring->rsp_prod = (_r)->rsp_prod_pvt; \ +#define RING_PUSH_RESPONSES(_r) do { \ + virt_wmb(); /* front sees resps /before/ updated producer index */ \ + (_r)->sring->rsp_prod = (_r)->rsp_prod_pvt; \ } while (0) /* @@ -239,40 +273,40 @@ struct __name##_back_ring { \ * field appropriately. */ -#define RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(_r, _notify) do { \ - RING_IDX __old = (_r)->sring->req_prod; \ - RING_IDX __new = (_r)->req_prod_pvt; \ - virt_wmb(); /* back sees requests /before/ updated producer index */ \ - (_r)->sring->req_prod = __new; \ - virt_mb(); /* back sees new requests /before/ we check req_event */ \ - (_notify) = ((RING_IDX)(__new - (_r)->sring->req_event) < \ - (RING_IDX)(__new - __old)); \ +#define RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(_r, _notify) do { \ + RING_IDX __old = (_r)->sring->req_prod; \ + RING_IDX __new = (_r)->req_prod_pvt; \ + virt_wmb(); /* back sees requests /before/ updated producer index */\ + (_r)->sring->req_prod = __new; \ + virt_mb(); /* back sees new requests /before/ we check req_event */ \ + (_notify) = ((RING_IDX)(__new - (_r)->sring->req_event) < \ + (RING_IDX)(__new - __old)); \ } while (0) -#define RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(_r, _notify) do { \ - RING_IDX __old = (_r)->sring->rsp_prod; \ - RING_IDX __new = (_r)->rsp_prod_pvt; \ - virt_wmb(); /* front sees responses /before/ updated producer index */ \ - (_r)->sring->rsp_prod = __new; \ - virt_mb(); /* front sees new responses /before/ we check rsp_event */ \ - (_notify) = ((RING_IDX)(__new - (_r)->sring->rsp_event) < \ - (RING_IDX)(__new - __old)); \ +#define RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(_r, _notify) do { \ + RING_IDX __old = (_r)->sring->rsp_prod; \ + RING_IDX __new = (_r)->rsp_prod_pvt; \ + virt_wmb(); /* front sees resps /before/ updated producer index */ \ + (_r)->sring->rsp_prod = __new; \ + virt_mb(); /* front sees new resps /before/ we check rsp_event */ \ + (_notify) = ((RING_IDX)(__new - (_r)->sring->rsp_event) < \ + (RING_IDX)(__new - __old)); \ } while (0) -#define RING_FINAL_CHECK_FOR_REQUESTS(_r, _work_to_do) do { \ - (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r); \ - if (_work_to_do) break; \ - (_r)->sring->req_event = (_r)->req_cons + 1; \ - virt_mb(); \ - (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r); \ +#define RING_FINAL_CHECK_FOR_REQUESTS(_r, _work_to_do) do { \ + (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r); \ + if (_work_to_do) break; \ + (_r)->sring->req_event = (_r)->req_cons + 1; \ + virt_mb(); \ + (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r); \ } while (0) -#define RING_FINAL_CHECK_FOR_RESPONSES(_r, _work_to_do) do { \ - (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r); \ - if (_work_to_do) break; \ - (_r)->sring->rsp_event = (_r)->rsp_cons + 1; \ - virt_mb(); \ - (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r); \ +#define RING_FINAL_CHECK_FOR_RESPONSES(_r, _work_to_do) do { \ + (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r); \ + if (_work_to_do) break; \ + (_r)->sring->rsp_event = (_r)->rsp_cons + 1; \ + virt_mb(); \ + (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r); \ } while (0) diff --git a/init/Kconfig b/init/Kconfig index 4413af4302ea1091292d40cf01abf11a2ba5bed1..f7b3d9de6624c4964997265d19840521af0d874b 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -26,40 +26,58 @@ config CC_VERSION_TEXT and then every file will be rebuilt. config CC_IS_GCC - def_bool $(success,echo "$(CC_VERSION_TEXT)" | grep -q gcc) + def_bool $(success,test "$(cc-name)" = GCC) config GCC_VERSION int - default $(shell,$(srctree)/scripts/gcc-version.sh $(CC)) if CC_IS_GCC + default $(cc-version) if CC_IS_GCC default 0 -config LD_VERSION +config CC_IS_CLANG + def_bool $(success,test "$(cc-name)" = Clang) + +config CLANG_VERSION int - default $(shell,$(LD) --version | $(srctree)/scripts/ld-version.sh) + default $(cc-version) if CC_IS_CLANG + default 0 -config CC_IS_CLANG - def_bool $(success,echo "$(CC_VERSION_TEXT)" | grep -q clang) +config AS_IS_GNU + def_bool $(success,test "$(as-name)" = GNU) -config LD_IS_LLD - def_bool $(success,$(LD) -v | head -n 1 | grep -q LLD) +config AS_IS_LLVM + def_bool $(success,test "$(as-name)" = LLVM) -config CLANG_VERSION +config AS_VERSION + int + # Use clang version if this is the integrated assembler + default CLANG_VERSION if AS_IS_LLVM + default $(as-version) + +config LD_IS_BFD + def_bool $(success,test "$(ld-name)" = BFD) + +config LD_VERSION int - default $(shell,$(srctree)/scripts/clang-version.sh $(CC)) + default $(ld-version) if LD_IS_BFD + default 0 + +config LD_IS_LLD + def_bool $(success,test "$(ld-name)" = LLD) config LLD_VERSION int - default $(shell,$(srctree)/scripts/lld-version.sh $(LD)) + default $(ld-version) if LD_IS_LLD + default 0 config CC_CAN_LINK bool - default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(m64-flag)) if 64BIT - default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(m32-flag)) + default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(USERCFLAGS) $(USERLDFLAGS) $(m64-flag)) if 64BIT + default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(USERCFLAGS) $(USERLDFLAGS) $(m32-flag)) config CC_CAN_LINK_STATIC bool - default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(m64-flag) -static) if 64BIT - default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(m32-flag) -static) + default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(USERCFLAGS) $(USERLDFLAGS) $(m64-flag) -static) if 64BIT + default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(USERCFLAGS) $(USERLDFLAGS) $(m32-flag) -static) config CC_HAS_ASM_GOTO def_bool $(success,$(srctree)/scripts/gcc-goto.sh $(CC)) @@ -1738,6 +1756,16 @@ config BPF_JIT_DEFAULT_ON def_bool ARCH_WANT_DEFAULT_BPF_JIT || BPF_JIT_ALWAYS_ON depends on HAVE_EBPF_JIT && BPF_JIT +config BPF_UNPRIV_DEFAULT_OFF + bool "Disable unprivileged BPF by default" + depends on BPF_SYSCALL + help + Disables unprivileged BPF by default by setting the corresponding + /proc/sys/kernel/unprivileged_bpf_disabled knob to 2. An admin can + still reenable it by setting it to 0 later on, or permanently + disable it by setting it to 1 (from which no other transition to + 0 is possible anymore). + source "kernel/bpf/preload/Kconfig" config USERFAULTFD @@ -2200,6 +2228,19 @@ config MODULE_SIG_FORCE Reject unsigned modules or signed modules for which we don't have a key. Without this, such modules will simply taint the kernel. +config MODULE_SIG_PROTECT + bool "Android GKI module protection" + depends on MODULE_SIG && !MODULE_SIG_FORCE + help + Enables Android GKI symbol and export protection support. + + This modifies the behavior of the MODULE_SIG_FORCE as follows: + - Allows Android GKI Modules signed using MODULE_SIG_ALL during build. + - Allows other modules to load if they don't violate the access to + Android GKI protected symbols and do not export the symbols already + exported by the Android GKI modules. Loading will fail and return + -EACCES (Permission denied) if symbol access contidions are not met. + config MODULE_SIG_ALL bool "Automatically sign all modules" default y diff --git a/init/Kconfig.gki b/init/Kconfig.gki index 1a4fde6a8e6279a6d23a616f3d4fa3496c6bcefe..1a9aac1250152de81b073de421c82f25f3466052 100644 --- a/init/Kconfig.gki +++ b/init/Kconfig.gki @@ -96,6 +96,7 @@ config GKI_HIDDEN_MEDIA_CONFIGS select CEC_NOTIFIER select CEC_PIN select VIDEOBUF2_DMA_CONTIG + select VIDEOBUF2_DMA_SG help Dummy config option used to enable hidden media configs. These are normally selected implicitly when a module diff --git a/init/main.c b/init/main.c index 9934624dbe083b1d99e27b0bf10421c112a70e04..e0defb470d46296e499ceddfd1563f5d1f1587fc 100644 --- a/init/main.c +++ b/init/main.c @@ -382,6 +382,7 @@ static char * __init xbc_make_cmdline(const char *key) ret = xbc_snprint_cmdline(new_cmdline, len + 1, root); if (ret < 0 || ret > len) { pr_err("Failed to print extra kernel cmdline.\n"); + memblock_free(__pa(new_cmdline), len + 1); return NULL; } @@ -908,11 +909,7 @@ asmlinkage __visible void __init __no_sanitize_address start_kernel(void) * time - but meanwhile we still have a functioning scheduler. */ sched_init(); - /* - * Disable preemption - early bootup scheduling is extremely - * fragile until we cpu_idle() for the first time. - */ - preempt_disable(); + if (WARN(!irqs_disabled(), "Interrupts were enabled *very* early, fixing it\n")) local_irq_disable(); diff --git a/ipc/shm.c b/ipc/shm.c index e25c7c6106bcf79ee9dc18ecc133d3531ef2aecb..471ac3e7498d5359f151dd43f76d471924b76949 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -62,9 +62,18 @@ struct shmid_kernel /* private to the kernel */ struct pid *shm_lprid; struct user_struct *mlock_user; - /* The task created the shm object. NULL if the task is dead. */ + /* + * The task created the shm object, for + * task_lock(shp->shm_creator) + */ struct task_struct *shm_creator; - struct list_head shm_clist; /* list by creator */ + + /* + * List by creator. task_lock(->shm_creator) required for read/write. + * If list_empty(), then the creator is dead already. + */ + struct list_head shm_clist; + struct ipc_namespace *ns; } __randomize_layout; /* shm_mode upper byte flags */ @@ -115,6 +124,7 @@ static void do_shm_rmid(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) struct shmid_kernel *shp; shp = container_of(ipcp, struct shmid_kernel, shm_perm); + WARN_ON(ns != shp->ns); if (shp->shm_nattch) { shp->shm_perm.mode |= SHM_DEST; @@ -225,10 +235,43 @@ static void shm_rcu_free(struct rcu_head *head) kvfree(shp); } -static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s) +/* + * It has to be called with shp locked. + * It must be called before ipc_rmid() + */ +static inline void shm_clist_rm(struct shmid_kernel *shp) { - list_del(&s->shm_clist); - ipc_rmid(&shm_ids(ns), &s->shm_perm); + struct task_struct *creator; + + /* ensure that shm_creator does not disappear */ + rcu_read_lock(); + + /* + * A concurrent exit_shm may do a list_del_init() as well. + * Just do nothing if exit_shm already did the work + */ + if (!list_empty(&shp->shm_clist)) { + /* + * shp->shm_creator is guaranteed to be valid *only* + * if shp->shm_clist is not empty. + */ + creator = shp->shm_creator; + + task_lock(creator); + /* + * list_del_init() is a nop if the entry was already removed + * from the list. + */ + list_del_init(&shp->shm_clist); + task_unlock(creator); + } + rcu_read_unlock(); +} + +static inline void shm_rmid(struct shmid_kernel *s) +{ + shm_clist_rm(s); + ipc_rmid(&shm_ids(s->ns), &s->shm_perm); } @@ -283,7 +326,7 @@ static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp) shm_file = shp->shm_file; shp->shm_file = NULL; ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT; - shm_rmid(ns, shp); + shm_rmid(shp); shm_unlock(shp); if (!is_file_hugepages(shm_file)) shmem_lock(shm_file, 0, shp->mlock_user); @@ -306,10 +349,10 @@ static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp) * * 2) sysctl kernel.shm_rmid_forced is set to 1. */ -static bool shm_may_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp) +static bool shm_may_destroy(struct shmid_kernel *shp) { return (shp->shm_nattch == 0) && - (ns->shm_rmid_forced || + (shp->ns->shm_rmid_forced || (shp->shm_perm.mode & SHM_DEST)); } @@ -340,7 +383,7 @@ static void shm_close(struct vm_area_struct *vma) ipc_update_pid(&shp->shm_lprid, task_tgid(current)); shp->shm_dtim = ktime_get_real_seconds(); shp->shm_nattch--; - if (shm_may_destroy(ns, shp)) + if (shm_may_destroy(shp)) shm_destroy(ns, shp); else shm_unlock(shp); @@ -361,10 +404,10 @@ static int shm_try_destroy_orphaned(int id, void *p, void *data) * * As shp->* are changed under rwsem, it's safe to skip shp locking. */ - if (shp->shm_creator != NULL) + if (!list_empty(&shp->shm_clist)) return 0; - if (shm_may_destroy(ns, shp)) { + if (shm_may_destroy(shp)) { shm_lock_by_ptr(shp); shm_destroy(ns, shp); } @@ -382,48 +425,97 @@ void shm_destroy_orphaned(struct ipc_namespace *ns) /* Locking assumes this will only be called with task == current */ void exit_shm(struct task_struct *task) { - struct ipc_namespace *ns = task->nsproxy->ipc_ns; - struct shmid_kernel *shp, *n; + for (;;) { + struct shmid_kernel *shp; + struct ipc_namespace *ns; - if (list_empty(&task->sysvshm.shm_clist)) - return; + task_lock(task); + + if (list_empty(&task->sysvshm.shm_clist)) { + task_unlock(task); + break; + } + + shp = list_first_entry(&task->sysvshm.shm_clist, struct shmid_kernel, + shm_clist); - /* - * If kernel.shm_rmid_forced is not set then only keep track of - * which shmids are orphaned, so that a later set of the sysctl - * can clean them up. - */ - if (!ns->shm_rmid_forced) { - down_read(&shm_ids(ns).rwsem); - list_for_each_entry(shp, &task->sysvshm.shm_clist, shm_clist) - shp->shm_creator = NULL; /* - * Only under read lock but we are only called on current - * so no entry on the list will be shared. + * 1) Get pointer to the ipc namespace. It is worth to say + * that this pointer is guaranteed to be valid because + * shp lifetime is always shorter than namespace lifetime + * in which shp lives. + * We taken task_lock it means that shp won't be freed. */ - list_del(&task->sysvshm.shm_clist); - up_read(&shm_ids(ns).rwsem); - return; - } + ns = shp->ns; - /* - * Destroy all already created segments, that were not yet mapped, - * and mark any mapped as orphan to cover the sysctl toggling. - * Destroy is skipped if shm_may_destroy() returns false. - */ - down_write(&shm_ids(ns).rwsem); - list_for_each_entry_safe(shp, n, &task->sysvshm.shm_clist, shm_clist) { - shp->shm_creator = NULL; + /* + * 2) If kernel.shm_rmid_forced is not set then only keep track of + * which shmids are orphaned, so that a later set of the sysctl + * can clean them up. + */ + if (!ns->shm_rmid_forced) + goto unlink_continue; - if (shm_may_destroy(ns, shp)) { - shm_lock_by_ptr(shp); - shm_destroy(ns, shp); + /* + * 3) get a reference to the namespace. + * The refcount could be already 0. If it is 0, then + * the shm objects will be free by free_ipc_work(). + */ + ns = get_ipc_ns_not_zero(ns); + if (!ns) { +unlink_continue: + list_del_init(&shp->shm_clist); + task_unlock(task); + continue; } - } - /* Remove the list head from any segments still attached. */ - list_del(&task->sysvshm.shm_clist); - up_write(&shm_ids(ns).rwsem); + /* + * 4) get a reference to shp. + * This cannot fail: shm_clist_rm() is called before + * ipc_rmid(), thus the refcount cannot be 0. + */ + WARN_ON(!ipc_rcu_getref(&shp->shm_perm)); + + /* + * 5) unlink the shm segment from the list of segments + * created by current. + * This must be done last. After unlinking, + * only the refcounts obtained above prevent IPC_RMID + * from destroying the segment or the namespace. + */ + list_del_init(&shp->shm_clist); + + task_unlock(task); + + /* + * 6) we have all references + * Thus lock & if needed destroy shp. + */ + down_write(&shm_ids(ns).rwsem); + shm_lock_by_ptr(shp); + /* + * rcu_read_lock was implicitly taken in shm_lock_by_ptr, it's + * safe to call ipc_rcu_putref here + */ + ipc_rcu_putref(&shp->shm_perm, shm_rcu_free); + + if (ipc_valid_object(&shp->shm_perm)) { + if (shm_may_destroy(shp)) + shm_destroy(ns, shp); + else + shm_unlock(shp); + } else { + /* + * Someone else deleted the shp from namespace + * idr/kht while we have waited. + * Just unlock and continue. + */ + shm_unlock(shp); + } + + up_write(&shm_ids(ns).rwsem); + put_ipc_ns(ns); /* paired with get_ipc_ns_not_zero */ + } } static vm_fault_t shm_fault(struct vm_fault *vmf) @@ -680,7 +772,11 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) if (error < 0) goto no_id; + shp->ns = ns; + + task_lock(current); list_add(&shp->shm_clist, ¤t->sysvshm.shm_clist); + task_unlock(current); /* * shmid gets reported as "inode#" in /proc/pid/maps. @@ -1573,7 +1669,8 @@ out_nattch: down_write(&shm_ids(ns).rwsem); shp = shm_lock(ns, shmid); shp->shm_nattch--; - if (shm_may_destroy(ns, shp)) + + if (shm_may_destroy(shp)) shm_destroy(ns, shp); else shm_unlock(shp); diff --git a/ipc/util.c b/ipc/util.c index cfa0045e748d55a5e0b1fbd608fe477b8cf4c96f..bbb5190af6d9f29042c4ab1403e07d63d9ead714 100644 --- a/ipc/util.c +++ b/ipc/util.c @@ -446,8 +446,8 @@ static int ipcget_public(struct ipc_namespace *ns, struct ipc_ids *ids, static void ipc_kht_remove(struct ipc_ids *ids, struct kern_ipc_perm *ipcp) { if (ipcp->key != IPC_PRIVATE) - rhashtable_remove_fast(&ids->key_ht, &ipcp->khtnode, - ipc_kht_params); + WARN_ON_ONCE(rhashtable_remove_fast(&ids->key_ht, &ipcp->khtnode, + ipc_kht_params)); } /** @@ -462,7 +462,7 @@ void ipc_rmid(struct ipc_ids *ids, struct kern_ipc_perm *ipcp) { int idx = ipcid_to_idx(ipcp->id); - idr_remove(&ids->ipcs_idr, idx); + WARN_ON_ONCE(idr_remove(&ids->ipcs_idr, idx) != ipcp); ipc_kht_remove(ids, ipcp); ids->in_use--; ipcp->deleted = true; diff --git a/kernel/Makefile b/kernel/Makefile index 6ee614db171787f840be147c9ce28028e4cee605..ed1aa304b4a95f5ac77477d99f18b60c08c7911b 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -66,6 +66,7 @@ obj-$(CONFIG_UID16) += uid16.o obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_MODULE_SIG) += module_signing.o obj-$(CONFIG_MODULE_SIG_FORMAT) += module_signature.o +obj-$(CONFIG_MODULE_SIG_PROTECT) += gki_module.o obj-$(CONFIG_KALLSYMS) += kallsyms.o obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o obj-$(CONFIG_CRASH_CORE) += crash_core.o @@ -155,3 +156,19 @@ $(obj)/kheaders_data.tar.xz: FORCE $(call cmd,genikh) clean-files := kheaders_data.tar.xz kheaders.md5 + +# +# ANDROID: GKI: Generate headerfiles required for gki_module.o +# +# Dependencies on generated files need to be listed explicitly +$(obj)/gki_module.o: $(obj)/gki_module_protected.h $(obj)/gki_module_exported.h + +$(obj)/gki_module_protected.h: $(srctree)/android/abi_gki_modules_protected \ + $(srctree)/scripts/gen_gki_modules_headers.sh + $(Q)$(CONFIG_SHELL) $(srctree)/scripts/gen_gki_modules_headers.sh $@ \ + "$(srctree)" + +$(obj)/gki_module_exported.h: $(srctree)/android/abi_gki_modules_exports \ + $(srctree)/scripts/gen_gki_modules_headers.sh + $(Q)$(CONFIG_SHELL) $(srctree)/scripts/gen_gki_modules_headers.sh $@ \ + "$(srctree)" diff --git a/kernel/async.c b/kernel/async.c index 33258e6e20f83c319649c266142180926cba689c..1746cd65e271bcd7e41f3eb571d314c8ea4d8fa7 100644 --- a/kernel/async.c +++ b/kernel/async.c @@ -205,9 +205,6 @@ async_cookie_t async_schedule_node_domain(async_func_t func, void *data, atomic_inc(&entry_count); spin_unlock_irqrestore(&async_lock, flags); - /* mark that this task has queued an async job, used by module init */ - current->flags |= PF_USED_ASYNC; - /* schedule for execution */ queue_work_node(node, system_unbound_wq, &entry->work); diff --git a/kernel/audit.c b/kernel/audit.c index 68cee3bc8cfe65251acef759edf7badf45088f40..aeec86ed470883f28acde2f3b73011252f4d4d66 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -541,20 +541,22 @@ static void kauditd_printk_skb(struct sk_buff *skb) /** * kauditd_rehold_skb - Handle a audit record send failure in the hold queue * @skb: audit record + * @error: error code (unused) * * Description: * This should only be used by the kauditd_thread when it fails to flush the * hold queue. */ -static void kauditd_rehold_skb(struct sk_buff *skb) +static void kauditd_rehold_skb(struct sk_buff *skb, __always_unused int error) { - /* put the record back in the queue at the same place */ - skb_queue_head(&audit_hold_queue, skb); + /* put the record back in the queue */ + skb_queue_tail(&audit_hold_queue, skb); } /** * kauditd_hold_skb - Queue an audit record, waiting for auditd * @skb: audit record + * @error: error code * * Description: * Queue the audit record, waiting for an instance of auditd. When this @@ -564,19 +566,31 @@ static void kauditd_rehold_skb(struct sk_buff *skb) * and queue it, if we have room. If we want to hold on to the record, but we * don't have room, record a record lost message. */ -static void kauditd_hold_skb(struct sk_buff *skb) +static void kauditd_hold_skb(struct sk_buff *skb, int error) { /* at this point it is uncertain if we will ever send this to auditd so * try to send the message via printk before we go any further */ kauditd_printk_skb(skb); /* can we just silently drop the message? */ - if (!audit_default) { - kfree_skb(skb); - return; + if (!audit_default) + goto drop; + + /* the hold queue is only for when the daemon goes away completely, + * not -EAGAIN failures; if we are in a -EAGAIN state requeue the + * record on the retry queue unless it's full, in which case drop it + */ + if (error == -EAGAIN) { + if (!audit_backlog_limit || + skb_queue_len(&audit_retry_queue) < audit_backlog_limit) { + skb_queue_tail(&audit_retry_queue, skb); + return; + } + audit_log_lost("kauditd retry queue overflow"); + goto drop; } - /* if we have room, queue the message */ + /* if we have room in the hold queue, queue the message */ if (!audit_backlog_limit || skb_queue_len(&audit_hold_queue) < audit_backlog_limit) { skb_queue_tail(&audit_hold_queue, skb); @@ -585,24 +599,32 @@ static void kauditd_hold_skb(struct sk_buff *skb) /* we have no other options - drop the message */ audit_log_lost("kauditd hold queue overflow"); +drop: kfree_skb(skb); } /** * kauditd_retry_skb - Queue an audit record, attempt to send again to auditd * @skb: audit record + * @error: error code (unused) * * Description: * Not as serious as kauditd_hold_skb() as we still have a connected auditd, * but for some reason we are having problems sending it audit records so * queue the given record and attempt to resend. */ -static void kauditd_retry_skb(struct sk_buff *skb) +static void kauditd_retry_skb(struct sk_buff *skb, __always_unused int error) { - /* NOTE: because records should only live in the retry queue for a - * short period of time, before either being sent or moved to the hold - * queue, we don't currently enforce a limit on this queue */ - skb_queue_tail(&audit_retry_queue, skb); + if (!audit_backlog_limit || + skb_queue_len(&audit_retry_queue) < audit_backlog_limit) { + skb_queue_tail(&audit_retry_queue, skb); + return; + } + + /* we have to drop the record, send it via printk as a last effort */ + kauditd_printk_skb(skb); + audit_log_lost("kauditd retry queue overflow"); + kfree_skb(skb); } /** @@ -640,7 +662,7 @@ static void auditd_reset(const struct auditd_connection *ac) /* flush the retry queue to the hold queue, but don't touch the main * queue since we need to process that normally for multicast */ while ((skb = skb_dequeue(&audit_retry_queue))) - kauditd_hold_skb(skb); + kauditd_hold_skb(skb, -ECONNREFUSED); } /** @@ -714,16 +736,18 @@ static int kauditd_send_queue(struct sock *sk, u32 portid, struct sk_buff_head *queue, unsigned int retry_limit, void (*skb_hook)(struct sk_buff *skb), - void (*err_hook)(struct sk_buff *skb)) + void (*err_hook)(struct sk_buff *skb, int error)) { int rc = 0; - struct sk_buff *skb; - static unsigned int failed = 0; + struct sk_buff *skb = NULL; + struct sk_buff *skb_tail; + unsigned int failed = 0; /* NOTE: kauditd_thread takes care of all our locking, we just use * the netlink info passed to us (e.g. sk and portid) */ - while ((skb = skb_dequeue(queue))) { + skb_tail = skb_peek_tail(queue); + while ((skb != skb_tail) && (skb = skb_dequeue(queue))) { /* call the skb_hook for each skb we touch */ if (skb_hook) (*skb_hook)(skb); @@ -731,36 +755,34 @@ static int kauditd_send_queue(struct sock *sk, u32 portid, /* can we send to anyone via unicast? */ if (!sk) { if (err_hook) - (*err_hook)(skb); + (*err_hook)(skb, -ECONNREFUSED); continue; } +retry: /* grab an extra skb reference in case of error */ skb_get(skb); rc = netlink_unicast(sk, skb, portid, 0); if (rc < 0) { - /* fatal failure for our queue flush attempt? */ + /* send failed - try a few times unless fatal error */ if (++failed >= retry_limit || rc == -ECONNREFUSED || rc == -EPERM) { - /* yes - error processing for the queue */ sk = NULL; if (err_hook) - (*err_hook)(skb); - if (!skb_hook) - goto out; - /* keep processing with the skb_hook */ + (*err_hook)(skb, rc); + if (rc == -EAGAIN) + rc = 0; + /* continue to drain the queue */ continue; } else - /* no - requeue to preserve ordering */ - skb_queue_head(queue, skb); + goto retry; } else { - /* it worked - drop the extra reference and continue */ + /* skb sent - drop the extra reference and continue */ consume_skb(skb); failed = 0; } } -out: return (rc >= 0 ? 0 : rc); } @@ -1542,6 +1564,20 @@ static void audit_receive(struct sk_buff *skb) nlh = nlmsg_next(nlh, &len); } audit_ctl_unlock(); + + /* can't block with the ctrl lock, so penalize the sender now */ + if (audit_backlog_limit && + (skb_queue_len(&audit_queue) > audit_backlog_limit)) { + DECLARE_WAITQUEUE(wait, current); + + /* wake kauditd to try and flush the queue */ + wake_up_interruptible(&kauditd_wait); + + add_wait_queue_exclusive(&audit_backlog_wait, &wait); + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(audit_backlog_wait_time); + remove_wait_queue(&audit_backlog_wait, &wait); + } } /* Log information about who is connecting to the audit multicast socket */ @@ -1609,7 +1645,8 @@ static int __net_init audit_net_init(struct net *net) audit_panic("cannot initialize netlink socket in namespace"); return -ENOMEM; } - aunet->sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT; + /* limit the timeout in case auditd is blocked/stopped */ + aunet->sk->sk_sndtimeo = HZ / 10; return 0; } @@ -1825,7 +1862,9 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask, * task_tgid_vnr() since auditd_pid is set in audit_receive_msg() * using a PID anchored in the caller's namespace * 2. generator holding the audit_cmd_mutex - we don't want to block - * while holding the mutex */ + * while holding the mutex, although we do penalize the sender + * later in audit_receive() when it is safe to block + */ if (!(auditd_test_task(current) || audit_ctl_owner_current())) { long stime = audit_backlog_wait_time; diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 8dba8f0983b5c0485c54d9a12ba2b206cdc9ffe4..638f424859edc384aa22b8908b1dc5a784d6bd97 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -653,7 +653,7 @@ static int audit_filter_rules(struct task_struct *tsk, result = audit_comparator(audit_loginuid_set(tsk), f->op, f->val); break; case AUDIT_SADDR_FAM: - if (ctx->sockaddr) + if (ctx && ctx->sockaddr) result = audit_comparator(ctx->sockaddr->ss_family, f->op, f->val); break; diff --git a/kernel/bounds.c b/kernel/bounds.c index 9795d75b09b2323306ad6a058a6350a87a251443..10dd9e6b03e551a1936587e02543b1c1883e201a 100644 --- a/kernel/bounds.c +++ b/kernel/bounds.c @@ -22,6 +22,13 @@ int main(void) DEFINE(NR_CPUS_BITS, ilog2(CONFIG_NR_CPUS)); #endif DEFINE(SPINLOCK_SIZE, sizeof(spinlock_t)); +#ifdef CONFIG_LRU_GEN + DEFINE(LRU_GEN_WIDTH, order_base_2(MAX_NR_GENS + 1)); + DEFINE(LRU_REFS_WIDTH, MAX_NR_TIERS - 2); +#else + DEFINE(LRU_GEN_WIDTH, 0); + DEFINE(LRU_REFS_WIDTH, 0); +#endif /* End of constants */ return 0; diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index c1b9f71ee6aac480d935f2301d339765e301361d..bfdc63dc74dbfdfd4d928682f9ea7041c0c662d9 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -36,3 +36,6 @@ obj-$(CONFIG_BPF_SYSCALL) += bpf_struct_ops.o obj-${CONFIG_BPF_LSM} += bpf_lsm.o endif obj-$(CONFIG_BPF_PRELOAD) += preload/ +ifeq ($(CONFIG_FUSE_BPF),y) +obj-$(CONFIG_BPF_SYSCALL) += bpf_fuse.o +endif diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index c6c81eceb68fb5d5cc754ec4296e30fb713db74c..36c68dcea2369a98f3dbbeaa5308b94a31a6f008 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -1025,6 +1025,7 @@ static struct bpf_map *prog_array_map_alloc(union bpf_attr *attr) INIT_WORK(&aux->work, prog_array_map_clear_deferred); INIT_LIST_HEAD(&aux->poke_progs); mutex_init(&aux->poke_mutex); + spin_lock_init(&aux->owner.lock); map = array_map_alloc(attr); if (IS_ERR(map)) { diff --git a/kernel/bpf/bpf_fuse.c b/kernel/bpf/bpf_fuse.c new file mode 100644 index 0000000000000000000000000000000000000000..00738c84b76e6f58100de3f1aa626af6f48bb008 --- /dev/null +++ b/kernel/bpf/bpf_fuse.c @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2021 Google LLC + +#include +#include + +static const struct bpf_func_proto * +fuse_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) +{ + switch (func_id) { + case BPF_FUNC_trace_printk: + return bpf_get_trace_printk_proto(); + + case BPF_FUNC_get_current_uid_gid: + return &bpf_get_current_uid_gid_proto; + + case BPF_FUNC_get_current_pid_tgid: + return &bpf_get_current_pid_tgid_proto; + + case BPF_FUNC_map_lookup_elem: + return &bpf_map_lookup_elem_proto; + + case BPF_FUNC_map_update_elem: + return &bpf_map_update_elem_proto; + + default: + pr_debug("Invalid fuse bpf func %d\n", func_id); + return NULL; + } +} + +static bool fuse_prog_is_valid_access(int off, int size, + enum bpf_access_type type, + const struct bpf_prog *prog, + struct bpf_insn_access_aux *info) +{ + int i; + + if (off < 0 || off > offsetofend(struct fuse_bpf_args, out_args)) + return false; + + /* TODO This is garbage. Do it properly */ + for (i = 0; i < 5; i++) { + if (off == offsetof(struct fuse_bpf_args, in_args[i].value)) { + info->reg_type = PTR_TO_RDONLY_BUF; + info->ctx_field_size = 256; + if (type != BPF_READ) + return false; + return true; + } + } + for (i = 0; i < 3; i++) { + if (off == offsetof(struct fuse_bpf_args, out_args[i].value)) { + info->reg_type = PTR_TO_RDWR_BUF; + info->ctx_field_size = 256; + return true; + } + } + if (type != BPF_READ) + return false; + + return true; +} + +const struct bpf_verifier_ops fuse_verifier_ops = { + .get_func_proto = fuse_prog_func_proto, + .is_valid_access = fuse_prog_is_valid_access, +}; + +const struct bpf_prog_ops fuse_prog_ops = { +}; + diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c index 9d3278e103554d767eb2e546339ff87bc2c23c60..d97de35511841849bdff101710b765f7415e5add 100644 --- a/kernel/bpf/bpf_struct_ops.c +++ b/kernel/bpf/bpf_struct_ops.c @@ -368,6 +368,7 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, const struct btf_type *mtype, *ptype; struct bpf_prog *prog; u32 moff; + u32 flags; moff = btf_member_bit_offset(t, member) / 8; ptype = btf_type_resolve_ptr(btf_vmlinux, member->type, NULL); @@ -431,10 +432,12 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, tprogs[BPF_TRAMP_FENTRY].progs[0] = prog; tprogs[BPF_TRAMP_FENTRY].nr_progs = 1; + flags = st_ops->func_models[i].ret_size > 0 ? + BPF_TRAMP_F_RET_FENTRY_RET : 0; err = arch_prepare_bpf_trampoline(NULL, image, st_map->image + PAGE_SIZE, - &st_ops->func_models[i], 0, - tprogs, NULL); + &st_ops->func_models[i], + flags, tprogs, NULL); if (err < 0) goto reset_unlock; diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index aaf2fbaa0cc76e4a8615ac961a16da6042c76ec3..6161973fffdd5ce05488007bb752fa792fd3eb30 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -3,6 +3,7 @@ #include #include +#include #include #include #include @@ -4135,8 +4136,7 @@ static struct btf *btf_parse(void __user *btf_data, u32 btf_data_size, log->len_total = log_size; /* log attributes have to be sane */ - if (log->len_total < 128 || log->len_total > UINT_MAX >> 8 || - !log->level || !log->ubuf) { + if (!bpf_verifier_log_attr_valid(log)) { err = -EINVAL; goto errout; } diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 92cd35b6127145b8d1c2c1226bb9f50fd2225964..b448788b55af16dde00be0a939c24d4601d13b23 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -530,6 +530,7 @@ int bpf_jit_enable __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_DEFAULT_ON); int bpf_jit_kallsyms __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_DEFAULT_ON); int bpf_jit_harden __read_mostly; long bpf_jit_limit __read_mostly; +long bpf_jit_limit_max __read_mostly; static void bpf_prog_ksym_set_addr(struct bpf_prog *prog) @@ -823,7 +824,8 @@ u64 __weak bpf_jit_alloc_exec_limit(void) static int __init bpf_jit_charge_init(void) { /* Only used as heuristic here to derive limit. */ - bpf_jit_limit = min_t(u64, round_up(bpf_jit_alloc_exec_limit() >> 2, + bpf_jit_limit_max = bpf_jit_alloc_exec_limit(); + bpf_jit_limit = min_t(u64, round_up(bpf_jit_limit_max >> 2, PAGE_SIZE), LONG_MAX); return 0; } @@ -833,7 +835,7 @@ int bpf_jit_charge_modmem(u32 pages) { if (atomic_long_add_return(pages, &bpf_jit_current) > (bpf_jit_limit >> PAGE_SHIFT)) { - if (!capable(CAP_SYS_ADMIN)) { + if (!bpf_capable()) { atomic_long_sub(pages, &bpf_jit_current); return -EPERM; } @@ -1779,20 +1781,26 @@ static unsigned int __bpf_prog_ret0_warn(const void *ctx, bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp) { + bool ret; + if (fp->kprobe_override) return false; - if (!array->aux->type) { + spin_lock(&array->aux->owner.lock); + + if (!array->aux->owner.type) { /* There's no owner yet where we could check for * compatibility. */ - array->aux->type = fp->type; - array->aux->jited = fp->jited; - return true; + array->aux->owner.type = fp->type; + array->aux->owner.jited = fp->jited; + ret = true; + } else { + ret = array->aux->owner.type == fp->type && + array->aux->owner.jited == fp->jited; } - - return array->aux->type == fp->type && - array->aux->jited == fp->jited; + spin_unlock(&array->aux->owner.lock); + return ret; } static int bpf_check_tail_call(const struct bpf_prog *fp) diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index b5be9659ab590303a3f5945f85207b37adef8147..01149821ded91a2995b58a6656edb6f05916102e 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -92,7 +92,7 @@ static struct hlist_head *dev_map_create_hash(unsigned int entries, int i; struct hlist_head *hash; - hash = bpf_map_area_alloc(entries * sizeof(*hash), numa_node); + hash = bpf_map_area_alloc((u64) entries * sizeof(*hash), numa_node); if (hash != NULL) for (i = 0; i < entries; i++) INIT_HLIST_HEAD(&hash[i]); @@ -153,7 +153,7 @@ static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr) spin_lock_init(&dtab->index_lock); } else { - dtab->netdev_map = bpf_map_area_alloc(dtab->map.max_entries * + dtab->netdev_map = bpf_map_area_alloc((u64) dtab->map.max_entries * sizeof(struct bpf_dtab_netdev *), dtab->map.numa_node); if (!dtab->netdev_map) diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c index f9913bc65ef8d7a34feea4261a1cab351ce66a26..1e4bf23528a3de1d955e7f73284508e90d31332e 100644 --- a/kernel/bpf/ringbuf.c +++ b/kernel/bpf/ringbuf.c @@ -108,7 +108,7 @@ static struct bpf_ringbuf *bpf_ringbuf_area_alloc(size_t data_sz, int numa_node) } rb = vmap(pages, nr_meta_pages + 2 * nr_data_pages, - VM_ALLOC | VM_USERMAP, PAGE_KERNEL); + VM_MAP | VM_USERMAP, PAGE_KERNEL); if (rb) { kmemleak_not_leak(pages); rb->pages = pages; diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index ebf60848d5eb78b18c3b7efbb8d5e4f47e1e8c4a..56cd7e6589ff344b7d8a8883be626cca0d2f6fcd 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -64,7 +64,8 @@ static inline int stack_map_data_size(struct bpf_map *map) static int prealloc_elems_and_freelist(struct bpf_stack_map *smap) { - u32 elem_size = sizeof(struct stack_map_bucket) + smap->map.value_size; + u64 elem_size = sizeof(struct stack_map_bucket) + + (u64)smap->map.value_size; int err; smap->elems = bpf_map_area_alloc(elem_size * smap->map.max_entries, @@ -663,13 +664,14 @@ BPF_CALL_4(bpf_get_task_stack, struct task_struct *, task, void *, buf, u32, size, u64, flags) { struct pt_regs *regs; - long res; + long res = -EINVAL; if (!try_get_task_stack(task)) return -EFAULT; regs = task_pt_regs(task); - res = __bpf_get_stack(regs, task, NULL, buf, size, flags); + if (regs) + res = __bpf_get_stack(regs, task, NULL, buf, size, flags); put_task_stack(task); return res; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 2fee91544ecb73a447c664b874b8928375311007..0c45e247f5e1873921f6d06c4fdaf37d89fe87e3 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -52,7 +52,8 @@ static DEFINE_SPINLOCK(map_idr_lock); static DEFINE_IDR(link_idr); static DEFINE_SPINLOCK(link_idr_lock); -int sysctl_unprivileged_bpf_disabled __read_mostly; +int sysctl_unprivileged_bpf_disabled __read_mostly = + IS_BUILTIN(CONFIG_BPF_UNPRIV_DEFAULT_OFF) ? 2 : 0; static const struct bpf_map_ops * const bpf_map_types[] = { #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) @@ -129,6 +130,21 @@ static struct bpf_map *find_and_alloc_map(union bpf_attr *attr) return map; } +static void bpf_map_write_active_inc(struct bpf_map *map) +{ + atomic64_inc(&map->writecnt); +} + +static void bpf_map_write_active_dec(struct bpf_map *map) +{ + atomic64_dec(&map->writecnt); +} + +bool bpf_map_write_active(const struct bpf_map *map) +{ + return atomic64_read(&map->writecnt) != 0; +} + static u32 bpf_map_value_size(struct bpf_map *map) { if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || @@ -537,8 +553,10 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp) if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) { array = container_of(map, struct bpf_array, map); - type = array->aux->type; - jited = array->aux->jited; + spin_lock(&array->aux->owner.lock); + type = array->aux->owner.type; + jited = array->aux->owner.jited; + spin_unlock(&array->aux->owner.lock); } seq_printf(m, @@ -588,11 +606,8 @@ static void bpf_map_mmap_open(struct vm_area_struct *vma) { struct bpf_map *map = vma->vm_file->private_data; - if (vma->vm_flags & VM_MAYWRITE) { - mutex_lock(&map->freeze_mutex); - map->writecnt++; - mutex_unlock(&map->freeze_mutex); - } + if (vma->vm_flags & VM_MAYWRITE) + bpf_map_write_active_inc(map); } /* called for all unmapped memory region (including initial) */ @@ -600,11 +615,8 @@ static void bpf_map_mmap_close(struct vm_area_struct *vma) { struct bpf_map *map = vma->vm_file->private_data; - if (vma->vm_flags & VM_MAYWRITE) { - mutex_lock(&map->freeze_mutex); - map->writecnt--; - mutex_unlock(&map->freeze_mutex); - } + if (vma->vm_flags & VM_MAYWRITE) + bpf_map_write_active_dec(map); } static const struct vm_operations_struct bpf_map_default_vmops = { @@ -654,7 +666,7 @@ static int bpf_map_mmap(struct file *filp, struct vm_area_struct *vma) goto out; if (vma->vm_flags & VM_MAYWRITE) - map->writecnt++; + bpf_map_write_active_inc(map); out: mutex_unlock(&map->freeze_mutex); return err; @@ -1086,6 +1098,7 @@ static int map_update_elem(union bpf_attr *attr) map = __bpf_map_get(f); if (IS_ERR(map)) return PTR_ERR(map); + bpf_map_write_active_inc(map); if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { err = -EPERM; goto err_put; @@ -1127,6 +1140,7 @@ free_value: free_key: kfree(key); err_put: + bpf_map_write_active_dec(map); fdput(f); return err; } @@ -1149,6 +1163,7 @@ static int map_delete_elem(union bpf_attr *attr) map = __bpf_map_get(f); if (IS_ERR(map)) return PTR_ERR(map); + bpf_map_write_active_inc(map); if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { err = -EPERM; goto err_put; @@ -1179,6 +1194,7 @@ static int map_delete_elem(union bpf_attr *attr) out: kfree(key); err_put: + bpf_map_write_active_dec(map); fdput(f); return err; } @@ -1294,6 +1310,7 @@ int generic_map_delete_batch(struct bpf_map *map, maybe_wait_bpf_programs(map); if (err) break; + cond_resched(); } if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp))) err = -EFAULT; @@ -1309,12 +1326,11 @@ int generic_map_update_batch(struct bpf_map *map, void __user *values = u64_to_user_ptr(attr->batch.values); void __user *keys = u64_to_user_ptr(attr->batch.keys); u32 value_size, cp, max_count; - int ufd = attr->map_fd; + int ufd = attr->batch.map_fd; void *key, *value; struct fd f; int err = 0; - f = fdget(ufd); if (attr->batch.elem_flags & ~BPF_F_LOCK) return -EINVAL; @@ -1339,6 +1355,7 @@ int generic_map_update_batch(struct bpf_map *map, return -ENOMEM; } + f = fdget(ufd); /* bpf_map_do_batch() guarantees ufd is valid */ for (cp = 0; cp < max_count; cp++) { err = -EFAULT; if (copy_from_user(key, keys + cp * map->key_size, @@ -1351,6 +1368,7 @@ int generic_map_update_batch(struct bpf_map *map, if (err) break; + cond_resched(); } if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp))) @@ -1358,6 +1376,7 @@ int generic_map_update_batch(struct bpf_map *map, kfree(value); kfree(key); + fdput(f); return err; } @@ -1447,6 +1466,7 @@ int generic_map_lookup_batch(struct bpf_map *map, swap(prev_key, key); retry = MAP_LOOKUP_RETRIES; cp++; + cond_resched(); } if (err == -EFAULT) @@ -1482,6 +1502,7 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr) map = __bpf_map_get(f); if (IS_ERR(map)) return PTR_ERR(map); + bpf_map_write_active_inc(map); if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ) || !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { err = -EPERM; @@ -1523,6 +1544,7 @@ free_value: free_key: kfree(key); err_put: + bpf_map_write_active_dec(map); fdput(f); return err; } @@ -1549,8 +1571,7 @@ static int map_freeze(const union bpf_attr *attr) } mutex_lock(&map->freeze_mutex); - - if (map->writecnt) { + if (bpf_map_write_active(map)) { err = -EBUSY; goto err_put; } @@ -3975,6 +3996,9 @@ static int bpf_map_do_batch(const union bpf_attr *attr, union bpf_attr __user *uattr, int cmd) { + bool has_read = cmd == BPF_MAP_LOOKUP_BATCH || + cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH; + bool has_write = cmd != BPF_MAP_LOOKUP_BATCH; struct bpf_map *map; int err, ufd; struct fd f; @@ -3987,16 +4011,13 @@ static int bpf_map_do_batch(const union bpf_attr *attr, map = __bpf_map_get(f); if (IS_ERR(map)) return PTR_ERR(map); - - if ((cmd == BPF_MAP_LOOKUP_BATCH || - cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH) && - !(map_get_sys_perms(map, f) & FMODE_CAN_READ)) { + if (has_write) + bpf_map_write_active_inc(map); + if (has_read && !(map_get_sys_perms(map, f) & FMODE_CAN_READ)) { err = -EPERM; goto err_put; } - - if (cmd != BPF_MAP_LOOKUP_BATCH && - !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { + if (has_write && !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { err = -EPERM; goto err_put; } @@ -4009,8 +4030,9 @@ static int bpf_map_do_batch(const union bpf_attr *attr, BPF_DO_BATCH(map->ops->map_update_batch); else BPF_DO_BATCH(map->ops->map_delete_batch); - err_put: + if (has_write) + bpf_map_write_active_dec(map); fdput(f); return err; } diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 0c26757ea7fbb8c8fe8a6a382e61b35ca81acca8..015bf2ba4a0b6eb511170b2c4d9fdb98d5e15d9e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1249,22 +1249,28 @@ static void __reg_bound_offset(struct bpf_reg_state *reg) reg->var_off = tnum_or(tnum_clear_subreg(var64_off), var32_off); } +static bool __reg32_bound_s64(s32 a) +{ + return a >= 0 && a <= S32_MAX; +} + static void __reg_assign_32_into_64(struct bpf_reg_state *reg) { reg->umin_value = reg->u32_min_value; reg->umax_value = reg->u32_max_value; - /* Attempt to pull 32-bit signed bounds into 64-bit bounds - * but must be positive otherwise set to worse case bounds - * and refine later from tnum. + + /* Attempt to pull 32-bit signed bounds into 64-bit bounds but must + * be positive otherwise set to worse case bounds and refine later + * from tnum. */ - if (reg->s32_min_value >= 0 && reg->s32_max_value >= 0) - reg->smax_value = reg->s32_max_value; - else - reg->smax_value = U32_MAX; - if (reg->s32_min_value >= 0) + if (__reg32_bound_s64(reg->s32_min_value) && + __reg32_bound_s64(reg->s32_max_value)) { reg->smin_value = reg->s32_min_value; - else + reg->smax_value = reg->s32_max_value; + } else { reg->smin_value = 0; + reg->smax_value = U32_MAX; + } } static void __reg_combine_32_into_64(struct bpf_reg_state *reg) @@ -1298,12 +1304,12 @@ static void __reg_combine_32_into_64(struct bpf_reg_state *reg) static bool __reg64_bound_s32(s64 a) { - return a > S32_MIN && a < S32_MAX; + return a >= S32_MIN && a <= S32_MAX; } static bool __reg64_bound_u32(u64 a) { - return a > U32_MIN && a < U32_MAX; + return a >= U32_MIN && a <= U32_MAX; } static void __reg_combine_64_into_32(struct bpf_reg_state *reg) @@ -3486,7 +3492,22 @@ static void coerce_reg_to_size(struct bpf_reg_state *reg, int size) static bool bpf_map_is_rdonly(const struct bpf_map *map) { - return (map->map_flags & BPF_F_RDONLY_PROG) && map->frozen; + /* A map is considered read-only if the following condition are true: + * + * 1) BPF program side cannot change any of the map content. The + * BPF_F_RDONLY_PROG flag is throughout the lifetime of a map + * and was set at map creation time. + * 2) The map value(s) have been initialized from user space by a + * loader and then "frozen", such that no new map update/delete + * operations from syscall side are possible for the rest of + * the map's lifetime from that point onwards. + * 3) Any parallel/pending map update/delete operations from syscall + * side have been completed. Only after that point, it's safe to + * assume that map value(s) are immutable. + */ + return (map->map_flags & BPF_F_RDONLY_PROG) && + READ_ONCE(map->frozen) && + !bpf_map_write_active(map); } static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val) @@ -6016,16 +6037,16 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, fallthrough; case PTR_TO_PACKET_END: case PTR_TO_SOCKET: - case PTR_TO_SOCKET_OR_NULL: case PTR_TO_SOCK_COMMON: - case PTR_TO_SOCK_COMMON_OR_NULL: case PTR_TO_TCP_SOCK: - case PTR_TO_TCP_SOCK_OR_NULL: case PTR_TO_XDP_SOCK: +reject: verbose(env, "R%d pointer arithmetic on %s prohibited\n", dst, reg_type_str[ptr_reg->type]); return -EACCES; default: + if (reg_type_may_be_null(ptr_reg->type)) + goto reject; break; } @@ -7110,6 +7131,10 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) insn->dst_reg); } zext_32_to_64(dst_reg); + + __update_reg_bounds(dst_reg); + __reg_deduce_bounds(dst_reg); + __reg_bound_offset(dst_reg); } } else { /* case: R = imm @@ -7225,7 +7250,7 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *vstate, new_range = dst_reg->off; if (range_right_open) - new_range--; + new_range++; /* Examples for register markings: * @@ -7700,15 +7725,15 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state, { if (reg_type_may_be_null(reg->type) && reg->id == id && !WARN_ON_ONCE(!reg->id)) { - /* Old offset (both fixed and variable parts) should - * have been known-zero, because we don't allow pointer - * arithmetic on pointers that might be NULL. - */ if (WARN_ON_ONCE(reg->smin_value || reg->smax_value || !tnum_equals_const(reg->var_off, 0) || reg->off)) { - __mark_reg_known_zero(reg); - reg->off = 0; + /* Old offset (both fixed and variable parts) should + * have been known-zero, because we don't allow pointer + * arithmetic on pointers that might be NULL. If we + * see this happening, don't convert the register. + */ + return; } if (is_null) { reg->type = SCALAR_VALUE; @@ -12324,11 +12349,11 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, log->ubuf = (char __user *) (unsigned long) attr->log_buf; log->len_total = attr->log_size; - ret = -EINVAL; /* log attributes have to be sane */ - if (log->len_total < 128 || log->len_total > UINT_MAX >> 2 || - !log->level || !log->ubuf || log->level & ~BPF_LOG_MASK) + if (!bpf_verifier_log_attr_valid(log)) { + ret = -EINVAL; goto err_unlock; + } } if (IS_ERR(btf_vmlinux)) { diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h index 02f32b13d9e52dfba22075499fdd2e032e1c0b75..df99cb9071471aa50526a180cac42ab3a98600b9 100644 --- a/kernel/cgroup/cgroup-internal.h +++ b/kernel/cgroup/cgroup-internal.h @@ -146,7 +146,6 @@ struct cgroup_mgctx { #define DEFINE_CGROUP_MGCTX(name) \ struct cgroup_mgctx name = CGROUP_MGCTX_INIT(name) -extern struct mutex cgroup_mutex; extern spinlock_t css_set_lock; extern struct cgroup_subsys *cgroup_subsys[]; extern struct list_head cgroup_roots; diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index a1e8068d17cc319fdceb5292feaa701ba99f4b15..f931f6547423ff672f8e036bec2d9e40d0ae7b9f 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -548,6 +548,14 @@ static ssize_t cgroup_release_agent_write(struct kernfs_open_file *of, BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX); + /* + * Release agent gets called with all capabilities, + * require capabilities to set release agent. + */ + if ((of->file->f_cred->user_ns != &init_user_ns) || + !capable(CAP_SYS_ADMIN)) + return -EPERM; + cgrp = cgroup_kn_lock_live(of->kn, false); if (!cgrp) return -ENODEV; @@ -961,6 +969,12 @@ int cgroup1_parse_param(struct fs_context *fc, struct fs_parameter *param) /* Specifying two release agents is forbidden */ if (ctx->release_agent) return invalfc(fc, "release_agent respecified"); + /* + * Release agent gets called with all capabilities, + * require capabilities to set release agent. + */ + if ((fc->user_ns != &init_user_ns) || !capable(CAP_SYS_ADMIN)) + return invalfc(fc, "Setting release_agent not allowed"); ctx->release_agent = param->string; param->string = NULL; break; diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index bac43ee9570068211e98c959aaa827358310742b..e914646569b60e457c48454a0a7ce0b2c42893c5 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -1730,6 +1730,7 @@ int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask) struct cgroup *dcgrp = &dst_root->cgrp; struct cgroup_subsys *ss; int ssid, i, ret; + u16 dfl_disable_ss_mask = 0; lockdep_assert_held(&cgroup_mutex); @@ -1746,8 +1747,28 @@ int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask) /* can't move between two non-dummy roots either */ if (ss->root != &cgrp_dfl_root && dst_root != &cgrp_dfl_root) return -EBUSY; + + /* + * Collect ssid's that need to be disabled from default + * hierarchy. + */ + if (ss->root == &cgrp_dfl_root) + dfl_disable_ss_mask |= 1 << ssid; + } while_each_subsys_mask(); + if (dfl_disable_ss_mask) { + struct cgroup *scgrp = &cgrp_dfl_root.cgrp; + + /* + * Controllers from default hierarchy that need to be rebound + * are all disabled together in one go. + */ + cgrp_dfl_root.subsys_mask &= ~dfl_disable_ss_mask; + WARN_ON(cgroup_apply_control(scgrp)); + cgroup_finalize_control(scgrp, 0); + } + do_each_subsys_mask(ss, ssid, ss_mask) { struct cgroup_root *src_root = ss->root; struct cgroup *scgrp = &src_root->cgrp; @@ -1756,10 +1777,12 @@ int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask) WARN_ON(!css || cgroup_css(dcgrp, ss)); - /* disable from the source */ - src_root->subsys_mask &= ~(1 << ssid); - WARN_ON(cgroup_apply_control(scgrp)); - cgroup_finalize_control(scgrp, 0); + if (src_root != &cgrp_dfl_root) { + /* disable from the source */ + src_root->subsys_mask &= ~(1 << ssid); + WARN_ON(cgroup_apply_control(scgrp)); + cgroup_finalize_control(scgrp, 0); + } /* rebind */ RCU_INIT_POINTER(scgrp->subsys[ssid], NULL); @@ -2166,8 +2189,10 @@ static void cgroup_kill_sb(struct super_block *sb) * And don't kill the default root. */ if (list_empty(&root->cgrp.self.children) && root != &cgrp_dfl_root && - !percpu_ref_is_dying(&root->cgrp.self.refcnt)) + !percpu_ref_is_dying(&root->cgrp.self.refcnt)) { + cgroup_bpf_offline(&root->cgrp); percpu_ref_kill(&root->cgrp.self.refcnt); + } cgroup_put(&root->cgrp); kernfs_kill_sb(sb); } @@ -3603,6 +3628,12 @@ static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf, cgroup_get(cgrp); cgroup_kn_unlock(of->kn); + /* Allow only one trigger per file descriptor */ + if (of->priv) { + cgroup_put(cgrp); + return -EBUSY; + } + psi = cgroup_ino(cgrp) == 1 ? &psi_system : &cgrp->psi; new = psi_trigger_create(psi, buf, nbytes, res); if (IS_ERR(new)) { @@ -3610,8 +3641,7 @@ static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf, return PTR_ERR(new); } - psi_trigger_replace(&of->priv, new); - + smp_store_release(&of->priv, new); cgroup_put(cgrp); return nbytes; @@ -3646,7 +3676,7 @@ static __poll_t cgroup_pressure_poll(struct kernfs_open_file *of, static void cgroup_pressure_release(struct kernfs_open_file *of) { - psi_trigger_replace(&of->priv, NULL); + psi_trigger_destroy(of->priv); } bool cgroup_psi_enabled(void) diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index c7f574a68cdbdb70585fc5930d88808922f43f05..3a0204d880db6dd646732d97c4e4380d12798fa0 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -1505,10 +1505,15 @@ static void update_sibling_cpumasks(struct cpuset *parent, struct cpuset *cs, struct cpuset *sibling; struct cgroup_subsys_state *pos_css; + lockdep_assert_held(&cpuset_mutex); + /* * Check all its siblings and call update_cpumasks_hier() * if their use_parent_ecpus flag is set in order for them * to use the right effective_cpus value. + * + * The update_cpumasks_hier() function may sleep. So we have to + * release the RCU read lock before calling it. */ rcu_read_lock(); cpuset_for_each_child(sibling, pos_css, parent) { @@ -1516,8 +1521,13 @@ static void update_sibling_cpumasks(struct cpuset *parent, struct cpuset *cs, continue; if (!sibling->use_parent_ecpus) continue; + if (!css_tryget_online(&sibling->css)) + continue; + rcu_read_unlock(); update_cpumasks_hier(sibling, tmp); + rcu_read_lock(); + css_put(&sibling->css); } rcu_read_unlock(); } @@ -1592,8 +1602,7 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, * Make sure that subparts_cpus is a subset of cpus_allowed. */ if (cs->nr_subparts_cpus) { - cpumask_andnot(cs->subparts_cpus, cs->subparts_cpus, - cs->cpus_allowed); + cpumask_and(cs->subparts_cpus, cs->subparts_cpus, cs->cpus_allowed); cs->nr_subparts_cpus = cpumask_weight(cs->subparts_cpus); } spin_unlock_irq(&callback_lock); @@ -2229,6 +2238,7 @@ static void cpuset_attach(struct cgroup_taskset *tset) cgroup_taskset_first(tset, &css); cs = css_cs(css); + cpus_read_lock(); mutex_lock(&cpuset_mutex); guarantee_online_mems(cs, &cpuset_attach_nodemask_to); @@ -2282,6 +2292,7 @@ static void cpuset_attach(struct cgroup_taskset *tset) wake_up(&cpuset_attach_wq); mutex_unlock(&cpuset_mutex); + cpus_read_unlock(); } /* The various types of files and directories in a cpuset file system */ diff --git a/kernel/cgroup/rstat.c b/kernel/cgroup/rstat.c index d51175cedfca4f5abd20d879bd564485942087e9..89ca9b61aa0d9a15ead07782d683b4595019d02c 100644 --- a/kernel/cgroup/rstat.c +++ b/kernel/cgroup/rstat.c @@ -421,8 +421,6 @@ static void root_cgroup_cputime(struct task_cputime *cputime) cputime->sum_exec_runtime += user; cputime->sum_exec_runtime += sys; cputime->sum_exec_runtime += cpustat[CPUTIME_STEAL]; - cputime->sum_exec_runtime += cpustat[CPUTIME_GUEST]; - cputime->sum_exec_runtime += cpustat[CPUTIME_GUEST_NICE]; } } diff --git a/kernel/cpu.c b/kernel/cpu.c index ef31235f1d2e78a1fa15bafc42f0fdc7cb26465d..5ee1335ebf704a35d744f4958f17fc1a51a45618 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -558,6 +559,12 @@ static int bringup_cpu(unsigned int cpu) struct task_struct *idle = idle_thread_get(cpu); int ret; + /* + * Reset stale stack state from the last time this CPU was online. + */ + scs_task_reset(idle); + kasan_unpoison_task_stack(idle); + /* * Some architectures have to walk the irq descriptors to * setup the vector space for the cpu which comes online. diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c index 4457545299177913e363b02d2ac0133ed550b215..10d07ace46c15d22873ee44de06b703319123987 100644 --- a/kernel/dma/debug.c +++ b/kernel/dma/debug.c @@ -1300,6 +1300,12 @@ void debug_dma_map_sg(struct device *dev, struct scatterlist *sg, if (unlikely(dma_debug_disabled())) return; + for_each_sg(sg, s, nents, i) { + check_for_stack(dev, sg_page(s), s->offset); + if (!PageHighMem(sg_page(s))) + check_for_illegal_area(dev, sg_virt(s), s->length); + } + for_each_sg(sg, s, mapped_ents, i) { entry = dma_entry_alloc(); if (!entry) @@ -1315,12 +1321,6 @@ void debug_dma_map_sg(struct device *dev, struct scatterlist *sg, entry->sg_call_ents = nents; entry->sg_mapped_ents = mapped_ents; - check_for_stack(dev, sg_page(s), s->offset); - - if (!PageHighMem(sg_page(s))) { - check_for_illegal_area(dev, sg_virt(s), sg_dma_len(s)); - } - check_sg_segment(dev, s); add_dma_entry(entry); diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index d2695123296a300fb576b2bb8789eeb13ba6103e..5a7014eaa17680f376df4650afd8181c19b2e113 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -61,7 +61,8 @@ static gfp_t dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask, *phys_limit = dma_to_phys(dev, dma_limit); if (*phys_limit <= DMA_BIT_MASK(zone_dma_bits)) return GFP_DMA; - if (*phys_limit <= DMA_BIT_MASK(32)) + if (*phys_limit <= DMA_BIT_MASK(32) && + !zone_dma32_are_empty()) return GFP_DMA32; return 0; } @@ -120,7 +121,8 @@ again: if (IS_ENABLED(CONFIG_ZONE_DMA32) && phys_limit < DMA_BIT_MASK(64) && - !(gfp & (GFP_DMA32 | GFP_DMA))) { + !(gfp & (GFP_DMA32 | GFP_DMA)) && + !zone_dma32_are_empty()) { gfp |= GFP_DMA32; goto again; } diff --git a/kernel/dma/pool.c b/kernel/dma/pool.c index d4637f72239b404f4fbef822e74cb4c441062034..92aa832b24d2c2da38494e9a8c437700eec758b8 100644 --- a/kernel/dma/pool.c +++ b/kernel/dma/pool.c @@ -74,7 +74,7 @@ static bool cma_in_zone(gfp_t gfp) end = cma_get_base(cma) + size - 1; if (IS_ENABLED(CONFIG_ZONE_DMA) && (gfp & GFP_DMA)) return end <= DMA_BIT_MASK(zone_dma_bits); - if (IS_ENABLED(CONFIG_ZONE_DMA32) && (gfp & GFP_DMA32)) + if (IS_ENABLED(CONFIG_ZONE_DMA32) && (gfp & GFP_DMA32) && !zone_dma32_are_empty()) return end <= DMA_BIT_MASK(32); return true; } @@ -156,7 +156,7 @@ static void atomic_pool_work_fn(struct work_struct *work) if (IS_ENABLED(CONFIG_ZONE_DMA)) atomic_pool_resize(atomic_pool_dma, GFP_KERNEL | GFP_DMA); - if (IS_ENABLED(CONFIG_ZONE_DMA32)) + if (IS_ENABLED(CONFIG_ZONE_DMA32) && !zone_dma32_are_empty()) atomic_pool_resize(atomic_pool_dma32, GFP_KERNEL | GFP_DMA32); atomic_pool_resize(atomic_pool_kernel, GFP_KERNEL); @@ -206,13 +206,13 @@ static int __init dma_atomic_pool_init(void) GFP_KERNEL); if (!atomic_pool_kernel) ret = -ENOMEM; - if (IS_ENABLED(CONFIG_ZONE_DMA)) { + if (has_managed_dma()) { atomic_pool_dma = __dma_atomic_pool_init(atomic_pool_size, GFP_KERNEL | GFP_DMA); if (!atomic_pool_dma) ret = -ENOMEM; } - if (IS_ENABLED(CONFIG_ZONE_DMA32)) { + if (IS_ENABLED(CONFIG_ZONE_DMA32) && !zone_dma32_are_empty()) { atomic_pool_dma32 = __dma_atomic_pool_init(atomic_pool_size, GFP_KERNEL | GFP_DMA32); if (!atomic_pool_dma32) @@ -227,9 +227,9 @@ postcore_initcall(dma_atomic_pool_init); static inline struct gen_pool *dma_guess_pool(struct gen_pool *prev, gfp_t gfp) { if (prev == NULL) { - if (IS_ENABLED(CONFIG_ZONE_DMA32) && (gfp & GFP_DMA32)) + if (IS_ENABLED(CONFIG_ZONE_DMA32) && (gfp & GFP_DMA32) && !zone_dma32_are_empty()) return atomic_pool_dma32; - if (IS_ENABLED(CONFIG_ZONE_DMA) && (gfp & GFP_DMA)) + if (atomic_pool_dma && (gfp & GFP_DMA)) return atomic_pool_dma; return atomic_pool_kernel; } diff --git a/kernel/entry/kvm.c b/kernel/entry/kvm.c index b6678a5e3cf64807f73852445183d27e554f5c58..2a3139dab109e7ef1546a104dfa3bd9ce4b18686 100644 --- a/kernel/entry/kvm.c +++ b/kernel/entry/kvm.c @@ -16,8 +16,10 @@ static int xfer_to_guest_mode_work(struct kvm_vcpu *vcpu, unsigned long ti_work) if (ti_work & _TIF_NEED_RESCHED) schedule(); - if (ti_work & _TIF_NOTIFY_RESUME) + if (ti_work & _TIF_NOTIFY_RESUME) { tracehook_notify_resume(NULL); + rseq_handle_notify_resume(NULL, NULL); + } ret = arch_xfer_to_guest_mode_handle_work(vcpu, ti_work); if (ret) diff --git a/kernel/events/core.c b/kernel/events/core.c index 8bb7ca443005735eceac29a20aa498db62a98da4..f58a026dd2d3042eca865ca4137032f430b93dec 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -266,7 +266,7 @@ static void event_function_call(struct perf_event *event, event_f func, void *da if (!event->parent) { /* * If this is a !child event, we must hold ctx::mutex to - * stabilize the the event->ctx relation. See + * stabilize the event->ctx relation. See * perf_event_ctx_lock(). */ lockdep_assert_held(&ctx->mutex); @@ -673,6 +673,23 @@ perf_event_set_state(struct perf_event *event, enum perf_event_state state) WRITE_ONCE(event->state, state); } +/* + * UP store-release, load-acquire + */ + +#define __store_release(ptr, val) \ +do { \ + barrier(); \ + WRITE_ONCE(*(ptr), (val)); \ +} while (0) + +#define __load_acquire(ptr) \ +({ \ + __unqual_scalar_typeof(*(ptr)) ___p = READ_ONCE(*(ptr)); \ + barrier(); \ + ___p; \ +}) + #ifdef CONFIG_CGROUP_PERF static inline bool @@ -718,34 +735,51 @@ static inline u64 perf_cgroup_event_time(struct perf_event *event) return t->time; } -static inline void __update_cgrp_time(struct perf_cgroup *cgrp) +static inline u64 perf_cgroup_event_time_now(struct perf_event *event, u64 now) { - struct perf_cgroup_info *info; - u64 now; - - now = perf_clock(); + struct perf_cgroup_info *t; - info = this_cpu_ptr(cgrp->info); + t = per_cpu_ptr(event->cgrp->info, event->cpu); + if (!__load_acquire(&t->active)) + return t->time; + now += READ_ONCE(t->timeoffset); + return now; +} - info->time += now - info->timestamp; +static inline void __update_cgrp_time(struct perf_cgroup_info *info, u64 now, bool adv) +{ + if (adv) + info->time += now - info->timestamp; info->timestamp = now; + /* + * see update_context_time() + */ + WRITE_ONCE(info->timeoffset, info->time - info->timestamp); } -static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx) +static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx, bool final) { struct perf_cgroup *cgrp = cpuctx->cgrp; struct cgroup_subsys_state *css; + struct perf_cgroup_info *info; if (cgrp) { + u64 now = perf_clock(); + for (css = &cgrp->css; css; css = css->parent) { cgrp = container_of(css, struct perf_cgroup, css); - __update_cgrp_time(cgrp); + info = this_cpu_ptr(cgrp->info); + + __update_cgrp_time(info, now, true); + if (final) + __store_release(&info->active, 0); } } } static inline void update_cgrp_time_from_event(struct perf_event *event) { + struct perf_cgroup_info *info; struct perf_cgroup *cgrp; /* @@ -759,8 +793,10 @@ static inline void update_cgrp_time_from_event(struct perf_event *event) /* * Do not update time when cgroup is not active */ - if (cgroup_is_descendant(cgrp->css.cgroup, event->cgrp->css.cgroup)) - __update_cgrp_time(event->cgrp); + if (cgroup_is_descendant(cgrp->css.cgroup, event->cgrp->css.cgroup)) { + info = this_cpu_ptr(event->cgrp->info); + __update_cgrp_time(info, perf_clock(), true); + } } static inline void @@ -784,7 +820,8 @@ perf_cgroup_set_timestamp(struct task_struct *task, for (css = &cgrp->css; css; css = css->parent) { cgrp = container_of(css, struct perf_cgroup, css); info = this_cpu_ptr(cgrp->info); - info->timestamp = ctx->timestamp; + __update_cgrp_time(info, ctx->timestamp, false); + __store_release(&info->active, 1); } } @@ -801,7 +838,7 @@ static DEFINE_PER_CPU(struct list_head, cgrp_cpuctx_list); */ static void perf_cgroup_switch(struct task_struct *task, int mode) { - struct perf_cpu_context *cpuctx; + struct perf_cpu_context *cpuctx, *tmp; struct list_head *list; unsigned long flags; @@ -812,7 +849,7 @@ static void perf_cgroup_switch(struct task_struct *task, int mode) local_irq_save(flags); list = this_cpu_ptr(&cgrp_cpuctx_list); - list_for_each_entry(cpuctx, list, cgrp_cpuctx_entry) { + list_for_each_entry_safe(cpuctx, tmp, list, cgrp_cpuctx_entry) { WARN_ON_ONCE(cpuctx->ctx.nr_cgroups == 0); perf_ctx_lock(cpuctx, cpuctx->task_ctx); @@ -980,14 +1017,6 @@ out: return ret; } -static inline void -perf_cgroup_set_shadow_time(struct perf_event *event, u64 now) -{ - struct perf_cgroup_info *t; - t = per_cpu_ptr(event->cgrp->info, event->cpu); - event->shadow_ctx_time = now - t->timestamp; -} - static inline void perf_cgroup_event_enable(struct perf_event *event, struct perf_event_context *ctx) { @@ -1065,7 +1094,8 @@ static inline void update_cgrp_time_from_event(struct perf_event *event) { } -static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx) +static inline void update_cgrp_time_from_cpuctx(struct perf_cpu_context *cpuctx, + bool final) { } @@ -1097,12 +1127,12 @@ perf_cgroup_switch(struct task_struct *task, struct task_struct *next) { } -static inline void -perf_cgroup_set_shadow_time(struct perf_event *event, u64 now) +static inline u64 perf_cgroup_event_time(struct perf_event *event) { + return 0; } -static inline u64 perf_cgroup_event_time(struct perf_event *event) +static inline u64 perf_cgroup_event_time_now(struct perf_event *event, u64 now) { return 0; } @@ -1300,7 +1330,7 @@ static void put_ctx(struct perf_event_context *ctx) * life-time rules separate them. That is an exiting task cannot fork, and a * spawning task cannot (yet) exit. * - * But remember that that these are parent<->child context relations, and + * But remember that these are parent<->child context relations, and * migration does not affect children, therefore these two orderings should not * interact. * @@ -1439,7 +1469,7 @@ static u64 primary_event_id(struct perf_event *event) /* * Get the perf_event_context for a task and lock it. * - * This has to cope with with the fact that until it is locked, + * This has to cope with the fact that until it is locked, * the context could get moved to another task. */ static struct perf_event_context * @@ -1524,22 +1554,59 @@ static void perf_unpin_context(struct perf_event_context *ctx) /* * Update the record of the current time in a context. */ -static void update_context_time(struct perf_event_context *ctx) +static void __update_context_time(struct perf_event_context *ctx, bool adv) { u64 now = perf_clock(); - ctx->time += now - ctx->timestamp; + if (adv) + ctx->time += now - ctx->timestamp; ctx->timestamp = now; + + /* + * The above: time' = time + (now - timestamp), can be re-arranged + * into: time` = now + (time - timestamp), which gives a single value + * offset to compute future time without locks on. + * + * See perf_event_time_now(), which can be used from NMI context where + * it's (obviously) not possible to acquire ctx->lock in order to read + * both the above values in a consistent manner. + */ + WRITE_ONCE(ctx->timeoffset, ctx->time - ctx->timestamp); +} + +static void update_context_time(struct perf_event_context *ctx) +{ + __update_context_time(ctx, true); } static u64 perf_event_time(struct perf_event *event) { struct perf_event_context *ctx = event->ctx; + if (unlikely(!ctx)) + return 0; + if (is_cgroup_event(event)) return perf_cgroup_event_time(event); - return ctx ? ctx->time : 0; + return ctx->time; +} + +static u64 perf_event_time_now(struct perf_event *event, u64 now) +{ + struct perf_event_context *ctx = event->ctx; + + if (unlikely(!ctx)) + return 0; + + if (is_cgroup_event(event)) + return perf_cgroup_event_time_now(event, now); + + if (!(__load_acquire(&ctx->is_active) & EVENT_TIME)) + return ctx->time; + + now += READ_ONCE(ctx->timeoffset); + return now; } static enum event_type_t get_event_type(struct perf_event *event) @@ -2209,26 +2276,6 @@ out: perf_event__header_size(leader); } -static void sync_child_event(struct perf_event *child_event); - -static void perf_child_detach(struct perf_event *event) -{ - struct perf_event *parent_event = event->parent; - - if (!(event->attach_state & PERF_ATTACH_CHILD)) - return; - - event->attach_state &= ~PERF_ATTACH_CHILD; - - if (WARN_ON_ONCE(!parent_event)) - return; - - lockdep_assert_held(&parent_event->child_mutex); - - sync_child_event(event); - list_del_init(&event->child_list); -} - static bool is_orphaned_event(struct perf_event *event) { return event->state == PERF_EVENT_STATE_DEAD; @@ -2336,7 +2383,6 @@ group_sched_out(struct perf_event *group_event, } #define DETACH_GROUP 0x01UL -#define DETACH_CHILD 0x02UL /* * Cross CPU call to remove a performance event @@ -2354,17 +2400,18 @@ __perf_remove_from_context(struct perf_event *event, if (ctx->is_active & EVENT_TIME) { update_context_time(ctx); - update_cgrp_time_from_cpuctx(cpuctx); + update_cgrp_time_from_cpuctx(cpuctx, false); } event_sched_out(event, cpuctx, ctx); if (flags & DETACH_GROUP) perf_group_detach(event); - if (flags & DETACH_CHILD) - perf_child_detach(event); list_del_event(event, ctx); if (!ctx->nr_events && ctx->is_active) { + if (ctx == &cpuctx->ctx) + update_cgrp_time_from_cpuctx(cpuctx, true); + ctx->is_active = 0; ctx->rotate_necessary = 0; if (ctx->task) { @@ -2390,21 +2437,25 @@ static void perf_remove_from_context(struct perf_event *event, unsigned long fla lockdep_assert_held(&ctx->mutex); + event_function_call(event, __perf_remove_from_context, (void *)flags); + /* - * Because of perf_event_exit_task(), perf_remove_from_context() ought - * to work in the face of TASK_TOMBSTONE, unlike every other - * event_function_call() user. + * The above event_function_call() can NO-OP when it hits + * TASK_TOMBSTONE. In that case we must already have been detached + * from the context (by perf_event_exit_event()) but the grouping + * might still be in-tact. */ - raw_spin_lock_irq(&ctx->lock); - if (!ctx->is_active) { - __perf_remove_from_context(event, __get_cpu_context(ctx), - ctx, (void *)flags); + WARN_ON_ONCE(event->attach_state & PERF_ATTACH_CONTEXT); + if ((flags & DETACH_GROUP) && + (event->attach_state & PERF_ATTACH_GROUP)) { + /* + * Since in that case we cannot possibly be scheduled, simply + * detach now. + */ + raw_spin_lock_irq(&ctx->lock); + perf_group_detach(event); raw_spin_unlock_irq(&ctx->lock); - return; } - raw_spin_unlock_irq(&ctx->lock); - - event_function_call(event, __perf_remove_from_context, (void *)flags); } /* @@ -2486,40 +2537,6 @@ void perf_event_disable_inatomic(struct perf_event *event) irq_work_queue(&event->pending); } -static void perf_set_shadow_time(struct perf_event *event, - struct perf_event_context *ctx) -{ - /* - * use the correct time source for the time snapshot - * - * We could get by without this by leveraging the - * fact that to get to this function, the caller - * has most likely already called update_context_time() - * and update_cgrp_time_xx() and thus both timestamp - * are identical (or very close). Given that tstamp is, - * already adjusted for cgroup, we could say that: - * tstamp - ctx->timestamp - * is equivalent to - * tstamp - cgrp->timestamp. - * - * Then, in perf_output_read(), the calculation would - * work with no changes because: - * - event is guaranteed scheduled in - * - no scheduled out in between - * - thus the timestamp would be the same - * - * But this is a bit hairy. - * - * So instead, we have an explicit cgroup call to remain - * within the time time source all along. We believe it - * is cleaner and simpler to understand. - */ - if (is_cgroup_event(event)) - perf_cgroup_set_shadow_time(event, event->tstamp); - else - event->shadow_ctx_time = event->tstamp - ctx->timestamp; -} - #define MAX_INTERRUPTS (~0ULL) static void perf_log_throttle(struct perf_event *event, int enable); @@ -2560,8 +2577,6 @@ event_sched_in(struct perf_event *event, perf_pmu_disable(event->pmu); - perf_set_shadow_time(event, ctx); - perf_log_itrace_start(event); if (event->pmu->add(event, PERF_EF_START)) { @@ -3235,16 +3250,6 @@ static void ctx_sched_out(struct perf_event_context *ctx, return; } - ctx->is_active &= ~event_type; - if (!(ctx->is_active & EVENT_ALL)) - ctx->is_active = 0; - - if (ctx->task) { - WARN_ON_ONCE(cpuctx->task_ctx != ctx); - if (!ctx->is_active) - cpuctx->task_ctx = NULL; - } - /* * Always update time if it was set; not only when it changes. * Otherwise we can 'forget' to update time for any but the last @@ -3258,7 +3263,22 @@ static void ctx_sched_out(struct perf_event_context *ctx, if (is_active & EVENT_TIME) { /* update (and stop) ctx time */ update_context_time(ctx); - update_cgrp_time_from_cpuctx(cpuctx); + update_cgrp_time_from_cpuctx(cpuctx, ctx == &cpuctx->ctx); + /* + * CPU-release for the below ->is_active store, + * see __load_acquire() in perf_event_time_now() + */ + barrier(); + } + + ctx->is_active &= ~event_type; + if (!(ctx->is_active & EVENT_ALL)) + ctx->is_active = 0; + + if (ctx->task) { + WARN_ON_ONCE(cpuctx->task_ctx != ctx); + if (!ctx->is_active) + cpuctx->task_ctx = NULL; } is_active ^= ctx->is_active; /* changed bits */ @@ -3695,6 +3715,35 @@ static noinline int visit_groups_merge(struct perf_cpu_context *cpuctx, return 0; } +/* + * Because the userpage is strictly per-event (there is no concept of context, + * so there cannot be a context indirection), every userpage must be updated + * when context time starts :-( + * + * IOW, we must not miss EVENT_TIME edges. + */ +static inline bool event_update_userpage(struct perf_event *event) +{ + if (likely(!atomic_read(&event->mmap_count))) + return false; + + perf_event_update_time(event); + perf_event_update_userpage(event); + + return true; +} + +static inline void group_update_userpage(struct perf_event *group_event) +{ + struct perf_event *event; + + if (!event_update_userpage(group_event)) + return; + + for_each_sibling_event(event, group_event) + event_update_userpage(event); +} + static int merge_sched_in(struct perf_event *event, void *data) { struct perf_event_context *ctx = event->ctx; @@ -3713,14 +3762,15 @@ static int merge_sched_in(struct perf_event *event, void *data) } if (event->state == PERF_EVENT_STATE_INACTIVE) { + *can_add_hw = 0; if (event->attr.pinned) { perf_cgroup_event_disable(event, ctx); perf_event_set_state(event, PERF_EVENT_STATE_ERROR); + } else { + ctx->rotate_necessary = 1; + perf_mux_hrtimer_restart(cpuctx); + group_update_userpage(event); } - - *can_add_hw = 0; - ctx->rotate_necessary = 1; - perf_mux_hrtimer_restart(cpuctx); } return 0; @@ -3761,13 +3811,23 @@ ctx_sched_in(struct perf_event_context *ctx, struct task_struct *task) { int is_active = ctx->is_active; - u64 now; lockdep_assert_held(&ctx->lock); if (likely(!ctx->nr_events)) return; + if (is_active ^ EVENT_TIME) { + /* start ctx time */ + __update_context_time(ctx, false); + perf_cgroup_set_timestamp(task, ctx); + /* + * CPU-release for the below ->is_active store, + * see __load_acquire() in perf_event_time_now() + */ + barrier(); + } + ctx->is_active |= (event_type | EVENT_TIME); if (ctx->task) { if (!is_active) @@ -3778,13 +3838,6 @@ ctx_sched_in(struct perf_event_context *ctx, is_active ^= ctx->is_active; /* changed bits */ - if (is_active & EVENT_TIME) { - /* start ctx time */ - now = perf_clock(); - ctx->timestamp = now; - perf_cgroup_set_timestamp(task, ctx); - } - /* * First go through the list and put on any pinned groups * in order to give them the best chance of going on. @@ -4320,6 +4373,18 @@ static inline u64 perf_event_count(struct perf_event *event) return local64_read(&event->count) + atomic64_read(&event->child_count); } +static void calc_timer_values(struct perf_event *event, + u64 *now, + u64 *enabled, + u64 *running) +{ + u64 ctx_time; + + *now = perf_clock(); + ctx_time = perf_event_time_now(event, *now); + __perf_update_times(event, ctx_time, enabled, running); +} + /* * NMI-safe method to read a local event, that is an event that * is: @@ -4379,10 +4444,9 @@ int perf_event_read_local(struct perf_event *event, u64 *value, *value = local64_read(&event->count); if (enabled || running) { - u64 now = event->shadow_ctx_time + perf_clock(); - u64 __enabled, __running; + u64 __enabled, __running, __now;; - __perf_update_times(event, now, &__enabled, &__running); + calc_timer_values(event, &__now, &__enabled, &__running); if (enabled) *enabled = __enabled; if (running) @@ -5690,18 +5754,6 @@ static int perf_event_index(struct perf_event *event) return event->pmu->event_idx(event); } -static void calc_timer_values(struct perf_event *event, - u64 *now, - u64 *enabled, - u64 *running) -{ - u64 ctx_time; - - *now = perf_clock(); - ctx_time = event->shadow_ctx_time + *now; - __perf_update_times(event, ctx_time, enabled, running); -} - static void perf_event_init_userpage(struct perf_event *event) { struct perf_event_mmap_page *userpg; @@ -5826,6 +5878,8 @@ static void ring_buffer_attach(struct perf_event *event, struct perf_buffer *old_rb = NULL; unsigned long flags; + WARN_ON_ONCE(event->parent); + if (event->rb) { /* * Should be impossible, we set this when removing @@ -5883,6 +5937,9 @@ static void ring_buffer_wakeup(struct perf_event *event) { struct perf_buffer *rb; + if (event->parent) + event = event->parent; + rcu_read_lock(); rb = rcu_dereference(event->rb); if (rb) { @@ -5896,6 +5953,9 @@ struct perf_buffer *ring_buffer_get(struct perf_event *event) { struct perf_buffer *rb; + if (event->parent) + event = event->parent; + rcu_read_lock(); rb = rcu_dereference(event->rb); if (rb) { @@ -6240,6 +6300,7 @@ accounting: ring_buffer_attach(event, rb); + perf_event_update_time(event); perf_event_init_userpage(event); perf_event_update_userpage(event); } else { @@ -6389,18 +6450,25 @@ static void perf_pending_event(struct irq_work *entry) * Later on, we might change it to a list if there is * another virtualization implementation supporting the callbacks. */ -struct perf_guest_info_callbacks *perf_guest_cbs; +struct perf_guest_info_callbacks __rcu *perf_guest_cbs; int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs) { - perf_guest_cbs = cbs; + if (WARN_ON_ONCE(rcu_access_pointer(perf_guest_cbs))) + return -EBUSY; + + rcu_assign_pointer(perf_guest_cbs, cbs); return 0; } EXPORT_SYMBOL_GPL(perf_register_guest_info_callbacks); int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs) { - perf_guest_cbs = NULL; + if (WARN_ON_ONCE(rcu_access_pointer(perf_guest_cbs) != cbs)) + return -EINVAL; + + rcu_assign_pointer(perf_guest_cbs, NULL); + synchronize_rcu(); return 0; } EXPORT_SYMBOL_GPL(perf_unregister_guest_info_callbacks); @@ -6559,7 +6627,7 @@ static unsigned long perf_prepare_sample_aux(struct perf_event *event, if (WARN_ON_ONCE(READ_ONCE(sampler->oncpu) != smp_processor_id())) goto out; - rb = ring_buffer_get(sampler->parent ? sampler->parent : sampler); + rb = ring_buffer_get(sampler); if (!rb) goto out; @@ -6625,7 +6693,7 @@ static void perf_aux_sample_output(struct perf_event *event, if (WARN_ON_ONCE(!sampler || !data->aux_size)) return; - rb = ring_buffer_get(sampler->parent ? sampler->parent : sampler); + rb = ring_buffer_get(sampler); if (!rb) return; @@ -7011,7 +7079,6 @@ void perf_output_sample(struct perf_output_handle *handle, static u64 perf_virt_to_phys(u64 virt) { u64 phys_addr = 0; - struct page *p = NULL; if (!virt) return 0; @@ -7030,14 +7097,15 @@ static u64 perf_virt_to_phys(u64 virt) * If failed, leave phys_addr as 0. */ if (current->mm != NULL) { + struct page *p; + pagefault_disable(); - if (get_user_page_fast_only(virt, 0, &p)) + if (get_user_page_fast_only(virt, 0, &p)) { phys_addr = page_to_phys(p) + virt % PAGE_SIZE; + put_page(p); + } pagefault_enable(); } - - if (p) - put_page(p); } return phys_addr; @@ -12271,17 +12339,14 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu) } EXPORT_SYMBOL_GPL(perf_pmu_migrate_context); -static void sync_child_event(struct perf_event *child_event) +static void sync_child_event(struct perf_event *child_event, + struct task_struct *child) { struct perf_event *parent_event = child_event->parent; u64 child_val; - if (child_event->attr.inherit_stat) { - struct task_struct *task = child_event->ctx->task; - - if (task && task != TASK_TOMBSTONE) - perf_event_read_event(child_event, task); - } + if (child_event->attr.inherit_stat) + perf_event_read_event(child_event, child); child_val = perf_event_count(child_event); @@ -12296,53 +12361,60 @@ static void sync_child_event(struct perf_event *child_event) } static void -perf_event_exit_event(struct perf_event *event, struct perf_event_context *ctx) +perf_event_exit_event(struct perf_event *child_event, + struct perf_event_context *child_ctx, + struct task_struct *child) { - struct perf_event *parent_event = event->parent; - unsigned long detach_flags = 0; - - if (parent_event) { - /* - * Do not destroy the 'original' grouping; because of the - * context switch optimization the original events could've - * ended up in a random child task. - * - * If we were to destroy the original group, all group related - * operations would cease to function properly after this - * random child dies. - * - * Do destroy all inherited groups, we don't care about those - * and being thorough is better. - */ - detach_flags = DETACH_GROUP | DETACH_CHILD; - mutex_lock(&parent_event->child_mutex); - } + struct perf_event *parent_event = child_event->parent; - perf_remove_from_context(event, detach_flags); + /* + * Do not destroy the 'original' grouping; because of the context + * switch optimization the original events could've ended up in a + * random child task. + * + * If we were to destroy the original group, all group related + * operations would cease to function properly after this random + * child dies. + * + * Do destroy all inherited groups, we don't care about those + * and being thorough is better. + */ + raw_spin_lock_irq(&child_ctx->lock); + WARN_ON_ONCE(child_ctx->is_active); - raw_spin_lock_irq(&ctx->lock); - if (event->state > PERF_EVENT_STATE_EXIT) - perf_event_set_state(event, PERF_EVENT_STATE_EXIT); - raw_spin_unlock_irq(&ctx->lock); + if (parent_event) + perf_group_detach(child_event); + list_del_event(child_event, child_ctx); + perf_event_set_state(child_event, PERF_EVENT_STATE_EXIT); /* is_event_hup() */ + raw_spin_unlock_irq(&child_ctx->lock); /* - * Child events can be freed. + * Parent events are governed by their filedesc, retain them. */ - if (parent_event) { - mutex_unlock(&parent_event->child_mutex); - /* - * Kick perf_poll() for is_event_hup(); - */ - perf_event_wakeup(parent_event); - free_event(event); - put_event(parent_event); + if (!parent_event) { + perf_event_wakeup(child_event); return; } + /* + * Child events can be cleaned up. + */ + + sync_child_event(child_event, child); /* - * Parent events are governed by their filedesc, retain them. + * Remove this event from the parent's list + */ + WARN_ON_ONCE(parent_event->ctx->parent_ctx); + mutex_lock(&parent_event->child_mutex); + list_del_init(&child_event->child_list); + mutex_unlock(&parent_event->child_mutex); + + /* + * Kick perf_poll() for is_event_hup(). */ - perf_event_wakeup(event); + perf_event_wakeup(parent_event); + free_event(child_event); + put_event(parent_event); } static void perf_event_exit_task_context(struct task_struct *child, int ctxn) @@ -12399,7 +12471,7 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn) perf_event_task(child, child_ctx, 0); list_for_each_entry_safe(child_event, next, &child_ctx->event_list, event_entry) - perf_event_exit_event(child_event, child_ctx); + perf_event_exit_event(child_event, child_ctx, child); mutex_unlock(&child_ctx->mutex); @@ -12659,7 +12731,6 @@ inherit_event(struct perf_event *parent_event, */ raw_spin_lock_irqsave(&child_ctx->lock, flags); add_event_to_ctx(child_event, child_ctx); - child_event->attach_state |= PERF_ATTACH_CHILD; raw_spin_unlock_irqrestore(&child_ctx->lock, flags); /* diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 00b0358739ab3b6b66b780e20f3374bbef4d4d34..e1bbb3b92921d8eb084d2844cd2ae85ebaf5e14f 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1735,7 +1735,7 @@ void uprobe_free_utask(struct task_struct *t) } /* - * Allocate a uprobe_task object for the task if if necessary. + * Allocate a uprobe_task object for the task if necessary. * Called when the thread hits a breakpoint. * * Returns: diff --git a/kernel/exit.c b/kernel/exit.c index f61ac326daabd260c57191943112fa5ca4c3a20b..24a302ecde59f4ba24cf76201c583ee9acfb64f7 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -423,6 +423,7 @@ assign_new_owner: goto retry; } WRITE_ONCE(mm->owner, c); + lru_gen_migrate_mm(mm); task_unlock(c); put_task_struct(c); } diff --git a/kernel/fork.c b/kernel/fork.c index ec8c47c7664e0c19f48ffbe6fe2e7d9858086c74..965d97bec20e3de5d9164cbd1dccec698186d35c 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include #include @@ -231,15 +232,17 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node) if (!s) continue; - /* Mark stack accessible for KASAN. */ + /* Reset stack metadata. */ kasan_unpoison_range(s->addr, THREAD_SIZE); + stack = kasan_reset_tag(s->addr); + /* Clear stale pointers from reused stack. */ - memset(s->addr, 0, THREAD_SIZE); + memset(stack, 0, THREAD_SIZE); tsk->stack_vm_area = s; - tsk->stack = s->addr; - return s->addr; + tsk->stack = stack; + return stack; } /* @@ -259,6 +262,7 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node) * so cache the vm_struct. */ if (stack) { + stack = kasan_reset_tag(stack); tsk->stack_vm_area = find_vm_area(stack); tsk->stack = stack; } @@ -371,12 +375,14 @@ struct vm_area_struct *vm_area_dup(struct vm_area_struct *orig) *new = data_race(*orig); INIT_VMA(new); new->vm_next = new->vm_prev = NULL; + dup_anon_vma_name(orig, new); } return new; } void vm_area_free(struct vm_area_struct *vma) { + free_anon_vma_name(vma); kmem_cache_free(vm_area_cachep, vma); } @@ -1070,7 +1076,8 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, mm_init_owner(mm, p); mm_init_pasid(mm); RCU_INIT_POINTER(mm->exe_file, NULL); - mmu_notifier_subscriptions_init(mm); + if (!mmu_notifier_subscriptions_init(mm)) + goto fail_nopgd; init_tlb_flush_pending(mm); #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS mm->pmd_huge_pte = NULL; @@ -1093,6 +1100,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, goto fail_nocontext; mm->user_ns = get_user_ns(user_ns); + lru_gen_init_mm(mm); return mm; fail_nocontext: @@ -1135,6 +1143,7 @@ static inline void __mmput(struct mm_struct *mm) } if (mm->binfmt) module_put(mm->binfmt->module); + lru_gen_del_mm(mm); mmdrop(mm); } @@ -2232,6 +2241,7 @@ static __latent_entropy struct task_struct *copy_process( p->pdeath_signal = 0; INIT_LIST_HEAD(&p->thread_group); p->task_works = NULL; + clear_posix_cputimers_work(p); /* * Ensure that the cgroup subsystem policies allow the new process to be @@ -2298,10 +2308,6 @@ static __latent_entropy struct task_struct *copy_process( goto bad_fork_cancel_cgroup; } - /* past the last point of failure */ - if (pidfile) - fd_install(pidfd, pidfile); - init_task_pid_links(p); if (likely(p->pid)) { ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace); @@ -2350,8 +2356,11 @@ static __latent_entropy struct task_struct *copy_process( syscall_tracepoint_update(p); write_unlock_irq(&tasklist_lock); + if (pidfile) + fd_install(pidfd, pidfile); + proc_fork_connector(p); - sched_post_fork(p); + sched_post_fork(p, args); cgroup_post_fork(p, args); perf_event_fork(p); @@ -2434,7 +2443,7 @@ static inline void init_idle_pids(struct task_struct *idle) } } -struct task_struct *fork_idle(int cpu) +struct task_struct * __init fork_idle(int cpu) { struct task_struct *task; struct kernel_clone_args args = { @@ -2530,6 +2539,13 @@ pid_t kernel_clone(struct kernel_clone_args *args) get_task_struct(p); } + if (IS_ENABLED(CONFIG_LRU_GEN) && !(clone_flags & CLONE_VM)) { + /* lock the task to synchronize with memcg migration */ + task_lock(p); + lru_gen_add_mm(p->mm); + task_unlock(p); + } + wake_up_new_task(p); /* forking complete and child started to run, tell ptracer */ diff --git a/kernel/futex.c b/kernel/futex.c index 3cd0ae850fdef9f7649ca2eff78ae3d2eac7e6d8..20b27a9551e730858f72db3b4b66c16e6465f27e 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -587,7 +587,7 @@ again: lock_page(page); shmem_swizzled = PageSwapCache(page) || page->mapping; unlock_page(page); - put_user_page(page); + put_page(page); if (shmem_swizzled) goto again; @@ -637,7 +637,7 @@ again: if (READ_ONCE(page->mapping) != mapping) { rcu_read_unlock(); - put_user_page(page); + put_page(page); goto again; } @@ -645,7 +645,7 @@ again: inode = READ_ONCE(mapping->host); if (!inode) { rcu_read_unlock(); - put_user_page(page); + put_page(page); goto again; } @@ -657,7 +657,7 @@ again: } out: - put_user_page(page); + put_page(page); return err; } diff --git a/kernel/gki_module.c b/kernel/gki_module.c new file mode 100644 index 0000000000000000000000000000000000000000..24651dfc34f84edd41dda676fe643a810c709fc5 --- /dev/null +++ b/kernel/gki_module.c @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2021 Google LLC + * Author: ramjiyani@google.com (Ramji Jiyani) + */ + +#include +#include +#include +#include +#include + +/* + * Build time generated header files + * + * gki_module_exported.h -- Symbols protected from _export_ by unsigned modules + * gki_module_protected.h -- Symbols protected from _access_ by unsigned modules + */ +#include "gki_module_protected.h" +#include "gki_module_exported.h" + +#define MAX_STRCMP_LEN (max(MAX_PROTECTED_NAME_LEN, MAX_EXPORTED_NAME_LEN)) + +/* bsearch() comparision callback */ +static int cmp_name(const void *sym, const void *protected_sym) +{ + return strncmp(sym, protected_sym, MAX_STRCMP_LEN); +} + +/** + * gki_is_module_protected_symbol - Is a symbol protected from unsigned module? + * + * @name: Symbol being checked against protection from unsigned module + */ +bool gki_is_module_protected_symbol(const char *name) +{ + return bsearch(name, gki_protected_symbols, NO_OF_PROTECTED_SYMBOLS, + MAX_PROTECTED_NAME_LEN, cmp_name) != NULL; +} + +/** + * gki_is_module_exported_symbol - Is a symbol exported from a GKI module? + * + * @name: Symbol being checked against exported symbols from GKI modules + */ +bool gki_is_module_exported_symbol(const char *name) +{ + return bsearch(name, gki_exported_symbols, NO_OF_EXPORTED_SYMBOLS, + MAX_EXPORTED_NAME_LEN, cmp_name) != NULL; +} diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index ec1c6e6ed7cf79e414cc770dd2dd3cd6a63dfe8b..147c5de13da78b717318267fbd2404f754fd8c4e 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -1628,7 +1628,8 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) irqd_set(&desc->irq_data, IRQD_NO_BALANCING); } - if (irq_settings_can_autoenable(desc)) { + if (!(new->flags & IRQF_NO_AUTOEN) && + irq_settings_can_autoenable(desc)) { irq_startup(desc, IRQ_RESEND, IRQ_START_COND); } else { /* @@ -2021,10 +2022,15 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler, * which interrupt is which (messes up the interrupt freeing * logic etc). * + * Also shared interrupts do not go well with disabling auto enable. + * The sharing interrupt might request it while it's still disabled + * and then wait for interrupts forever. + * * Also IRQF_COND_SUSPEND only makes sense for shared interrupts and * it cannot be set along with IRQF_NO_SUSPEND. */ if (((irqflags & IRQF_SHARED) && !dev_id) || + ((irqflags & IRQF_SHARED) && (irqflags & IRQF_NO_AUTOEN)) || (!(irqflags & IRQF_SHARED) && (irqflags & IRQF_COND_SUSPEND)) || ((irqflags & IRQF_NO_SUSPEND) && (irqflags & IRQF_COND_SUSPEND))) return -EINVAL; @@ -2180,7 +2186,8 @@ int request_nmi(unsigned int irq, irq_handler_t handler, desc = irq_to_desc(irq); - if (!desc || irq_settings_can_autoenable(desc) || + if (!desc || (irq_settings_can_autoenable(desc) && + !(irqflags & IRQF_NO_AUTOEN)) || !irq_settings_can_request(desc) || WARN_ON(irq_settings_is_per_cpu_devid(desc)) || !irq_supports_nmi(desc)) diff --git a/kernel/kprobes.c b/kernel/kprobes.c index f590e9ff37062e488bd12f9ebaeef0a5c6c1c3ea..cdea59acd66bf33d447bb19af405fb83f95c4c82 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -2137,6 +2137,9 @@ int register_kretprobe(struct kretprobe *rp) } } + if (rp->data_size > KRETPROBE_MAX_DATA_SIZE) + return -E2BIG; + rp->kp.pre_handler = pre_handler_kretprobe; rp->kp.post_handler = NULL; rp->kp.fault_handler = NULL; @@ -2943,13 +2946,12 @@ static const struct file_operations fops_kp = { static int __init debugfs_kprobe_init(void) { struct dentry *dir; - unsigned int value = 1; dir = debugfs_create_dir("kprobes", NULL); debugfs_create_file("list", 0400, dir, NULL, &kprobes_fops); - debugfs_create_file("enabled", 0600, dir, &value, &fops_kp); + debugfs_create_file("enabled", 0600, dir, NULL, &fops_kp); debugfs_create_file("blacklist", 0400, dir, NULL, &kprobe_blacklist_fops); diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 5184f689681588978af83ade472c6423a807af52..af4b35450556ff9a3165307edfa54c69b6e78b32 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -887,7 +887,7 @@ look_up_lock_class(const struct lockdep_map *lock, unsigned int subclass) if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) return NULL; - hlist_for_each_entry_rcu(class, hash_head, hash_entry) { + hlist_for_each_entry_rcu_notrace(class, hash_head, hash_entry) { if (class->key == key) { /* * Huh! same key, different name? Did someone trample @@ -3387,7 +3387,7 @@ struct lock_class *lock_chain_get_class(struct lock_chain *chain, int i) u16 chain_hlock = chain_hlocks[chain->base + i]; unsigned int class_idx = chain_hlock_class_idx(chain_hlock); - return lock_classes + class_idx - 1; + return lock_classes + class_idx; } /* @@ -3455,7 +3455,7 @@ static void print_chain_keys_chain(struct lock_chain *chain) hlock_id = chain_hlocks[chain->base + i]; chain_key = print_chain_key_iteration(hlock_id, chain_key); - print_lock_name(lock_classes + chain_hlock_class_idx(hlock_id) - 1); + print_lock_name(lock_classes + chain_hlock_class_idx(hlock_id)); printk("\n"); } } @@ -5303,7 +5303,7 @@ int __lock_is_held(const struct lockdep_map *lock, int read) struct held_lock *hlock = curr->held_locks + i; if (match_held_lock(hlock, lock)) { - if (read == -1 || hlock->read == read) + if (read == -1 || !!hlock->read == read) return 1; return 0; diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c index 70a32a576f3f249c7fd4ffd8f27f8d4e0cd5e3c7..a3d37bf83c60430a6bc999029559cbf694e9d8e1 100644 --- a/kernel/locking/percpu-rwsem.c +++ b/kernel/locking/percpu-rwsem.c @@ -7,6 +7,7 @@ #include #include #include +#include #include int __percpu_init_rwsem(struct percpu_rw_semaphore *sem, @@ -268,3 +269,34 @@ void percpu_up_write(struct percpu_rw_semaphore *sem) rcu_sync_exit(&sem->rss); } EXPORT_SYMBOL_GPL(percpu_up_write); + +static LIST_HEAD(destroy_list); +static DEFINE_SPINLOCK(destroy_list_lock); + +static void destroy_list_workfn(struct work_struct *work) +{ + struct percpu_rw_semaphore *sem, *sem2; + LIST_HEAD(to_destroy); + + spin_lock(&destroy_list_lock); + list_splice_init(&destroy_list, &to_destroy); + spin_unlock(&destroy_list_lock); + + if (list_empty(&to_destroy)) + return; + + list_for_each_entry_safe(sem, sem2, &to_destroy, destroy_list_entry) { + percpu_free_rwsem(sem); + kfree(sem); + } +} + +static DECLARE_WORK(destroy_list_work, destroy_list_workfn); + +void percpu_rwsem_async_destroy(struct percpu_rw_semaphore *sem) +{ + spin_lock(&destroy_list_lock); + list_add_tail(&sem->destroy_list_entry, &destroy_list); + spin_unlock(&destroy_list_lock); + schedule_work(&destroy_list_work); +} diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index 135e395d1c143ee88f1fe7cc9e7a3df2d1d27dc0..49c0fa173bdedeccdae799a45b1bd26936193b89 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -1441,7 +1441,7 @@ rt_mutex_fasttrylock(struct rt_mutex *lock, } /* - * Performs the wakeup of the the top-waiter and re-enables preemption. + * Performs the wakeup of the top-waiter and re-enables preemption. */ void rt_mutex_postunlock(struct wake_q_head *wake_q) { @@ -1835,7 +1835,7 @@ struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock) * been started. * @waiter: the pre-initialized rt_mutex_waiter * - * Wait for the the lock acquisition started on our behalf by + * Wait for the lock acquisition started on our behalf by * rt_mutex_start_proxy_lock(). Upon failure, the caller must call * rt_mutex_cleanup_proxy_lock(). * diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c index e0505b9b8a3164dfae327f63a2a80e8afe31312b..cec73899a8252c30d9029457145f176825b5b46a 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c @@ -1182,7 +1182,7 @@ rwsem_down_write_slowpath(struct rw_semaphore *sem, int state) /* * If there were already threads queued before us and: - * 1) there are no no active locks, wake the front + * 1) there are no active locks, wake the front * queued process(es) as the handoff bit might be set. * 2) there are no active writers and some readers, the lock * must be read owned; so we try to wake any read lock diff --git a/kernel/locking/semaphore.c b/kernel/locking/semaphore.c index d9dd94defc0a928c4697b49c2071090c6105966e..9aa855a96c4ae2e1149baafa56f1d0d6e1c8717e 100644 --- a/kernel/locking/semaphore.c +++ b/kernel/locking/semaphore.c @@ -119,7 +119,7 @@ EXPORT_SYMBOL(down_killable); * @sem: the semaphore to be acquired * * Try to acquire the semaphore atomically. Returns 0 if the semaphore has - * been acquired successfully or 1 if it it cannot be acquired. + * been acquired successfully or 1 if it cannot be acquired. * * NOTE: This return value is inverted from both spin_trylock and * mutex_trylock! Be careful about this when converting code. diff --git a/kernel/module-internal.h b/kernel/module-internal.h index 33783abc377bfb5333a8da30601134f4265dbcea..9b040512536e1351a13992981203c3f621de31be 100644 --- a/kernel/module-internal.h +++ b/kernel/module-internal.h @@ -29,3 +29,17 @@ struct load_info { }; extern int mod_verify_sig(const void *mod, struct load_info *info); + +#ifdef CONFIG_MODULE_SIG_PROTECT +extern bool gki_is_module_exported_symbol(const char *name); +extern bool gki_is_module_protected_symbol(const char *name); +#else +static inline bool gki_is_module_exported_symbol(const char *name) +{ + return 0; +} +static inline bool gki_is_module_protected_symbol(const char *name) +{ + return 0; +} +#endif /* CONFIG_MODULE_SIG_PROTECT */ diff --git a/kernel/module.c b/kernel/module.c index ae977497213c6511aa1d13b9d8f0c40f6716e4e9..b81200591e6d0fff6089f1847e6a5c78719fe7e8 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -275,7 +275,7 @@ static void module_assert_mutex_or_preempt(void) #endif } -#ifdef CONFIG_MODULE_SIG +#if defined(CONFIG_MODULE_SIG) && !defined(CONFIG_MODULE_SIG_PROTECT) static bool sig_enforce = IS_ENABLED(CONFIG_MODULE_SIG_FORCE); module_param(sig_enforce, bool_enable_only, 0644); @@ -2323,6 +2323,13 @@ static int verify_exported_symbols(struct module *mod) for (i = 0; i < ARRAY_SIZE(arr); i++) { for (s = arr[i].sym; s < arr[i].sym + arr[i].num; s++) { + if (!mod->sig_ok && gki_is_module_exported_symbol( + kernel_symbol_name(s))) { + pr_err("%s: exporting protected symbol(%s)\n", + mod->name, kernel_symbol_name(s)); + return -EACCES; + } + if (find_symbol(kernel_symbol_name(s), &owner, NULL, NULL, true, false)) { pr_err("%s: exports duplicate symbol %s" @@ -2389,6 +2396,13 @@ static int simplify_symbols(struct module *mod, const struct load_info *info) break; case SHN_UNDEF: + if (!mod->sig_ok && + gki_is_module_protected_symbol(name)) { + pr_err("%s: is not an Android GKI signed module. It can not access protected symbol: %s\n", + mod->name, name); + return -EACCES; + } + ksym = resolve_symbol_wait(mod, info, name); /* Ok if resolved. */ if (ksym && !IS_ERR(ksym)) { @@ -2965,7 +2979,15 @@ static int module_sig_check(struct load_info *info, int flags) return -EKEYREJECTED; } +/* + * ANDROID: GKI: Do not prevent loading of unsigned modules; + * as all modules except GKI modules are not signed. + */ +#ifndef CONFIG_MODULE_SIG_PROTECT return security_locked_down(LOCKDOWN_MODULE_SIGNATURE); +#else + return 0; +#endif } #else /* !CONFIG_MODULE_SIG */ static int module_sig_check(struct load_info *info, int flags) @@ -3739,12 +3761,6 @@ static noinline int do_init_module(struct module *mod) } freeinit->module_init = mod->init_layout.base; - /* - * We want to find out whether @mod uses async during init. Clear - * PF_USED_ASYNC. async_schedule*() will set it. - */ - current->flags &= ~PF_USED_ASYNC; - do_mod_ctors(mod); /* Start the module */ if (mod->init != NULL) @@ -3770,22 +3786,13 @@ static noinline int do_init_module(struct module *mod) /* * We need to finish all async code before the module init sequence - * is done. This has potential to deadlock. For example, a newly - * detected block device can trigger request_module() of the - * default iosched from async probing task. Once userland helper - * reaches here, async_synchronize_full() will wait on the async - * task waiting on request_module() and deadlock. - * - * This deadlock is avoided by perfomring async_synchronize_full() - * iff module init queued any async jobs. This isn't a full - * solution as it will deadlock the same if module loading from - * async jobs nests more than once; however, due to the various - * constraints, this hack seems to be the best option for now. - * Please refer to the following thread for details. + * is done. This has potential to deadlock if synchronous module + * loading is requested from async (which is not allowed!). * - * http://thread.gmane.org/gmane.linux.kernel/1420814 + * See commit 0fdff3ec6d87 ("async, kmod: warn on synchronous + * request_module() from async workers") for more details. */ - if (!mod->async_probe_requested && (current->flags & PF_USED_ASYNC)) + if (!mod->async_probe_requested) async_synchronize_full(); ftrace_free_mem(mod, mod->init_layout.base, mod->init_layout.base + @@ -4049,6 +4056,8 @@ static int load_module(struct load_info *info, const char __user *uargs, "kernel\n", mod->name); add_taint_module(mod, TAINT_UNSIGNED_MODULE, LOCKDEP_STILL_OK); } +#else + mod->sig_ok = 0; #endif /* To avoid stressing percpu allocator, do this once we're unique. */ diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c index 5caf59d6933167b4f14affa29a09bf0e5e15e120..41430128d120ff9efb3e61b985bc94bf7fdd06a6 100644 --- a/kernel/power/energy_model.c +++ b/kernel/power/energy_model.c @@ -107,8 +107,7 @@ static void em_debug_remove_pd(struct device *dev) {} static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd, int nr_states, struct em_data_callback *cb) { - unsigned long opp_eff, prev_opp_eff = ULONG_MAX; - unsigned long power, freq, prev_freq = 0; + unsigned long power, freq, prev_freq = 0, prev_cost = ULONG_MAX; struct em_perf_state *table; int i, ret; u64 fmax; @@ -153,27 +152,21 @@ static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd, table[i].power = power; table[i].frequency = prev_freq = freq; - - /* - * The hertz/watts efficiency ratio should decrease as the - * frequency grows on sane platforms. But this isn't always - * true in practice so warn the user if a higher OPP is more - * power efficient than a lower one. - */ - opp_eff = freq / power; - if (opp_eff >= prev_opp_eff) - dev_dbg(dev, "EM: hertz/watts ratio non-monotonically decreasing: em_perf_state %d >= em_perf_state%d\n", - i, i - 1); - prev_opp_eff = opp_eff; } /* Compute the cost of each performance state. */ fmax = (u64) table[nr_states - 1].frequency; - for (i = 0; i < nr_states; i++) { + for (i = nr_states - 1; i >= 0; i--) { unsigned long power_res = em_scale_power(table[i].power); table[i].cost = div64_u64(fmax * power_res, table[i].frequency); + if (table[i].cost >= prev_cost) { + dev_dbg(dev, "EM: OPP:%lu is inefficient\n", + table[i].frequency); + } else { + prev_cost = table[i].cost; + } } pd->table = table; diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index da0b419141779960541d743b42b7269e77cc66bf..b13fe337fa51e179c8ea51670c696038822b7b76 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -688,7 +688,7 @@ static int load_image_and_restore(void) goto Unlock; error = swsusp_read(&flags); - swsusp_close(FMODE_READ); + swsusp_close(FMODE_READ | FMODE_EXCL); if (!error) error = hibernation_restore(flags & SF_PLATFORM_MODE); @@ -978,7 +978,7 @@ static int software_resume(void) /* The snapshot device should not be opened while we're running */ if (!hibernate_acquire()) { error = -EBUSY; - swsusp_close(FMODE_READ); + swsusp_close(FMODE_READ | FMODE_EXCL); goto Unlock; } @@ -1013,7 +1013,7 @@ static int software_resume(void) pm_pr_dbg("Hibernation image not present or could not be loaded.\n"); return error; Close_Finish: - swsusp_close(FMODE_READ); + swsusp_close(FMODE_READ | FMODE_EXCL); goto Finish; } diff --git a/kernel/power/main.c b/kernel/power/main.c index 0aefd6f57e0acd0bf0fd726fcc6cd6c700ec7312..d6140ed15d0b17098e8e28ec981e33759a26790a 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -504,7 +504,10 @@ static ssize_t pm_wakeup_irq_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - return pm_wakeup_irq ? sprintf(buf, "%u\n", pm_wakeup_irq) : -ENODATA; + if (!pm_wakeup_irq()) + return -ENODATA; + + return sprintf(buf, "%u\n", pm_wakeup_irq()); } power_attr_ro(pm_wakeup_irq); diff --git a/kernel/power/process.c b/kernel/power/process.c index 0170cf3147578a4a1fa630a6714184e3ae33bef8..ba51823759eac9d82c60a54920c98cfa55e74119 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -143,7 +143,7 @@ int freeze_processes(void) if (!pm_freezing) atomic_inc(&system_freezing_cnt); - pm_wakeup_clear(true); + pm_wakeup_clear(0); pr_info("Freezing user space processes ... "); pm_freezing = true; error = try_to_freeze_tasks(true); diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index a3491b29c5ccca4b9aa657c74c965a948c74d9d3..6a71543c85232e2d406e1faa9cd0e2cabf267743 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -944,8 +944,7 @@ static void memory_bm_recycle(struct memory_bitmap *bm) * Register a range of page frames the contents of which should not be saved * during hibernation (to be used in the early initialization code). */ -void __init __register_nosave_region(unsigned long start_pfn, - unsigned long end_pfn, int use_kmalloc) +void __init register_nosave_region(unsigned long start_pfn, unsigned long end_pfn) { struct nosave_region *region; @@ -961,18 +960,12 @@ void __init __register_nosave_region(unsigned long start_pfn, goto Report; } } - if (use_kmalloc) { - /* During init, this shouldn't fail */ - region = kmalloc(sizeof(struct nosave_region), GFP_KERNEL); - BUG_ON(!region); - } else { - /* This allocation cannot fail */ - region = memblock_alloc(sizeof(struct nosave_region), - SMP_CACHE_BYTES); - if (!region) - panic("%s: Failed to allocate %zu bytes\n", __func__, - sizeof(struct nosave_region)); - } + /* This allocation cannot fail */ + region = memblock_alloc(sizeof(struct nosave_region), + SMP_CACHE_BYTES); + if (!region) + panic("%s: Failed to allocate %zu bytes\n", __func__, + sizeof(struct nosave_region)); region->start_pfn = start_pfn; region->end_pfn = end_pfn; list_add_tail(®ion->list, &nosave_regions); diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 54595837730a71fbfbf7f47c20ac09f429992720..0b20b2a456a3e216179637ff04c986803b623225 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -139,9 +139,7 @@ static void s2idle_loop(void) break; } - pm_wakeup_clear(false); clear_wakeup_reasons(); - s2idle_enter(); } diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 72e33054a2e1b72d7b9c529d59105a51ea9dc9e6..25e7cb96bb8840ed7f4977d78774056fff8f55db 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -299,7 +299,7 @@ static int hib_submit_io(int op, int op_flags, pgoff_t page_off, void *addr, return error; } -static blk_status_t hib_wait_io(struct hib_bio_batch *hb) +static int hib_wait_io(struct hib_bio_batch *hb) { /* * We are relying on the behavior of blk_plug that a thread with @@ -1521,9 +1521,10 @@ end: int swsusp_check(void) { int error; + void *holder; hib_resume_bdev = blkdev_get_by_dev(swsusp_resume_device, - FMODE_READ, NULL); + FMODE_READ | FMODE_EXCL, &holder); if (!IS_ERR(hib_resume_bdev)) { set_blocksize(hib_resume_bdev, PAGE_SIZE); clear_page(swsusp_header); @@ -1545,7 +1546,7 @@ int swsusp_check(void) put: if (error) - blkdev_put(hib_resume_bdev, FMODE_READ); + blkdev_put(hib_resume_bdev, FMODE_READ | FMODE_EXCL); else pr_debug("Image signature found, resuming\n"); } else { diff --git a/kernel/power/wakelock.c b/kernel/power/wakelock.c index 105df4dfc783971acdd1d17ee2971cb892c97d02..52571dcad768b988eaadbd3ce98a4ac42dd2f7dd 100644 --- a/kernel/power/wakelock.c +++ b/kernel/power/wakelock.c @@ -39,23 +39,20 @@ ssize_t pm_show_wakelocks(char *buf, bool show_active) { struct rb_node *node; struct wakelock *wl; - char *str = buf; - char *end = buf + PAGE_SIZE; + int len = 0; mutex_lock(&wakelocks_lock); for (node = rb_first(&wakelocks_tree); node; node = rb_next(node)) { wl = rb_entry(node, struct wakelock, node); if (wl->ws->active == show_active) - str += scnprintf(str, end - str, "%s ", wl->name); + len += sysfs_emit_at(buf, len, "%s ", wl->name); } - if (str > buf) - str--; - str += scnprintf(str, end - str, "\n"); + len += sysfs_emit_at(buf, len, "\n"); mutex_unlock(&wakelocks_lock); - return (str - buf); + return len; } #if CONFIG_PM_WAKELOCKS_LIMIT > 0 diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index de278ccc3940d29cf0228e1fee56655398e110a9..db4a07651bc1ddc304d2f9628fa89a3621b3aa01 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -2222,8 +2222,15 @@ static int __init console_setup(char *str) char *s, *options, *brl_options = NULL; int idx; - if (str[0] == 0) + /* + * console="" or console=null have been suggested as a way to + * disable console output. Use ttynull that has been created + * for exacly this purpose. + */ + if (str[0] == 0 || strcmp(str, "null") == 0) { + __add_preferred_console("ttynull", 0, NULL, NULL, true); return 1; + } if (_braille_console_setup(&str, &brl_options)) return 1; diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig index b71e21f73c40364e3fb78b03a5a30cab22e192ea..97bfe2a891dbc9766329ed1f4fa61c1f2b25b83e 100644 --- a/kernel/rcu/Kconfig +++ b/kernel/rcu/Kconfig @@ -212,6 +212,20 @@ config RCU_BOOST_DELAY Accept the default if unsure. +config RCU_EXP_KTHREAD + bool "Perform RCU expedited work in a real-time kthread" + depends on RCU_BOOST && RCU_EXPERT + default !PREEMPT_RT && NR_CPUS <= 32 + help + Use this option to further reduce the latencies of expedited + grace periods at the expense of being more disruptive. + + This option is disabled by default on PREEMPT_RT=y kernels which + disable expedited grace periods after boot by unconditionally + setting rcupdate.rcu_normal_after_boot=1. + + Accept the default if unsure. + config RCU_NOCB_CPU bool "Offload RCU callback processing from boot-selected CPUs" depends on TREE_RCU diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index fcf95d1eec69ab1bb70612bae2d59a410bdc43c6..7befcfae5fb5ac84d55988f122d90bd903fdfa47 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -524,7 +524,12 @@ int rcu_get_gp_kthreads_prio(void); void rcu_fwd_progress_check(unsigned long j); void rcu_force_quiescent_state(void); extern struct workqueue_struct *rcu_gp_wq; +#ifdef CONFIG_RCU_EXP_KTHREAD +extern struct kthread_worker *rcu_exp_gp_kworker; +extern struct kthread_worker *rcu_exp_par_gp_kworker; +#else /* !CONFIG_RCU_EXP_KTHREAD */ extern struct workqueue_struct *rcu_par_gp_wq; +#endif /* CONFIG_RCU_EXP_KTHREAD */ #endif /* #else #ifdef CONFIG_TINY_RCU */ #ifdef CONFIG_RCU_NOCB_CPU diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 916ea4f66e4b2380c3cc7d3bd1c10bb0601cd3c9..6c1aea48a79a1c5938998150daf32c1ed1cca779 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -1238,28 +1238,34 @@ static void rcutorture_one_extend(int *readstate, int newstate, /* First, put new protection in place to avoid critical-section gap. */ if (statesnew & RCUTORTURE_RDR_BH) local_bh_disable(); + if (statesnew & RCUTORTURE_RDR_RBH) + rcu_read_lock_bh(); if (statesnew & RCUTORTURE_RDR_IRQ) local_irq_disable(); if (statesnew & RCUTORTURE_RDR_PREEMPT) preempt_disable(); - if (statesnew & RCUTORTURE_RDR_RBH) - rcu_read_lock_bh(); if (statesnew & RCUTORTURE_RDR_SCHED) rcu_read_lock_sched(); if (statesnew & RCUTORTURE_RDR_RCU) idxnew = cur_ops->readlock() << RCUTORTURE_RDR_SHIFT; - /* Next, remove old protection, irq first due to bh conflict. */ + /* + * Next, remove old protection, in decreasing order of strength + * to avoid unlock paths that aren't safe in the stronger + * context. Namely: BH can not be enabled with disabled interrupts. + * Additionally PREEMPT_RT requires that BH is enabled in preemptible + * context. + */ if (statesold & RCUTORTURE_RDR_IRQ) local_irq_enable(); - if (statesold & RCUTORTURE_RDR_BH) - local_bh_enable(); if (statesold & RCUTORTURE_RDR_PREEMPT) preempt_enable(); - if (statesold & RCUTORTURE_RDR_RBH) - rcu_read_unlock_bh(); if (statesold & RCUTORTURE_RDR_SCHED) rcu_read_unlock_sched(); + if (statesold & RCUTORTURE_RDR_BH) + local_bh_enable(); + if (statesold & RCUTORTURE_RDR_RBH) + rcu_read_unlock_bh(); if (statesold & RCUTORTURE_RDR_RCU) { bool lockit = !statesnew && !(torture_random(trsp) & 0xffff); @@ -1302,6 +1308,9 @@ rcutorture_extend_mask(int oldmask, struct torture_random_state *trsp) int mask = rcutorture_extend_mask_max(); unsigned long randmask1 = torture_random(trsp) >> 8; unsigned long randmask2 = randmask1 >> 3; + unsigned long preempts = RCUTORTURE_RDR_PREEMPT | RCUTORTURE_RDR_SCHED; + unsigned long preempts_irq = preempts | RCUTORTURE_RDR_IRQ; + unsigned long bhs = RCUTORTURE_RDR_BH | RCUTORTURE_RDR_RBH; WARN_ON_ONCE(mask >> RCUTORTURE_RDR_SHIFT); /* Mostly only one bit (need preemption!), sometimes lots of bits. */ @@ -1309,11 +1318,26 @@ rcutorture_extend_mask(int oldmask, struct torture_random_state *trsp) mask = mask & randmask2; else mask = mask & (1 << (randmask2 % RCUTORTURE_RDR_NBITS)); - /* Can't enable bh w/irq disabled. */ - if ((mask & RCUTORTURE_RDR_IRQ) && - ((!(mask & RCUTORTURE_RDR_BH) && (oldmask & RCUTORTURE_RDR_BH)) || - (!(mask & RCUTORTURE_RDR_RBH) && (oldmask & RCUTORTURE_RDR_RBH)))) - mask |= RCUTORTURE_RDR_BH | RCUTORTURE_RDR_RBH; + + /* + * Can't enable bh w/irq disabled. + */ + if (mask & RCUTORTURE_RDR_IRQ) + mask |= oldmask & bhs; + + /* + * Ideally these sequences would be detected in debug builds + * (regardless of RT), but until then don't stop testing + * them on non-RT. + */ + if (IS_ENABLED(CONFIG_PREEMPT_RT)) { + /* Can't modify BH in atomic context */ + if (oldmask & preempts_irq) + mask &= ~bhs; + if ((oldmask | mask) & preempts_irq) + mask |= oldmask & bhs; + } + return mask ?: RCUTORTURE_RDR_RCU; } diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index b338f514ee5aa996b0efb8a38d63055d62c9cade..7c05c5ab78653c5f0d3a62dd7abb847ef63878ec 100644 --- a/kernel/rcu/tasks.h +++ b/kernel/rcu/tasks.h @@ -197,6 +197,7 @@ static int __noreturn rcu_tasks_kthread(void *arg) * This loop is terminated by the system going down. ;-) */ for (;;) { + set_tasks_gp_state(rtp, RTGS_WAIT_CBS); /* Pick up any new callbacks. */ raw_spin_lock_irqsave(&rtp->cbs_lock, flags); @@ -236,8 +237,6 @@ static int __noreturn rcu_tasks_kthread(void *arg) } /* Paranoid sleep to keep this from entering a tight loop */ schedule_timeout_idle(rtp->gp_sleep); - - set_tasks_gp_state(rtp, RTGS_WAIT_CBS); } } diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 8c81c05c4236aa55a418f91ed683119d57dfece7..5b5f80ad0e7aaa1463d89000993ca5efe7ad4a69 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -1581,10 +1581,11 @@ static void __maybe_unused rcu_advance_cbs_nowake(struct rcu_node *rnp, struct rcu_data *rdp) { rcu_lockdep_assert_cblist_protected(rdp); - if (!rcu_seq_state(rcu_seq_current(&rnp->gp_seq)) || - !raw_spin_trylock_rcu_node(rnp)) + if (!rcu_seq_state(rcu_seq_current(&rnp->gp_seq)) || !raw_spin_trylock_rcu_node(rnp)) return; - WARN_ON_ONCE(rcu_advance_cbs(rnp, rdp)); + // The grace period cannot end while we hold the rcu_node lock. + if (rcu_seq_state(rcu_seq_current(&rnp->gp_seq))) + WARN_ON_ONCE(rcu_advance_cbs(rnp, rdp)); raw_spin_unlock_rcu_node(rnp); } @@ -1888,7 +1889,7 @@ static void rcu_gp_fqs(bool first_time) struct rcu_node *rnp = rcu_get_root(); WRITE_ONCE(rcu_state.gp_activity, jiffies); - rcu_state.n_force_qs++; + WRITE_ONCE(rcu_state.n_force_qs, rcu_state.n_force_qs + 1); if (first_time) { /* Collect dyntick-idle snapshots. */ force_qs_rnp(dyntick_save_progress_counter); @@ -2530,7 +2531,7 @@ static void rcu_do_batch(struct rcu_data *rdp) /* Reset ->qlen_last_fqs_check trigger if enough CBs have drained. */ if (count == 0 && rdp->qlen_last_fqs_check != 0) { rdp->qlen_last_fqs_check = 0; - rdp->n_force_qs_snap = rcu_state.n_force_qs; + rdp->n_force_qs_snap = READ_ONCE(rcu_state.n_force_qs); } else if (count < rdp->qlen_last_fqs_check - qhimark) rdp->qlen_last_fqs_check = count; @@ -2876,10 +2877,10 @@ static void __call_rcu_core(struct rcu_data *rdp, struct rcu_head *head, } else { /* Give the grace period a kick. */ rdp->blimit = DEFAULT_MAX_RCU_BLIMIT; - if (rcu_state.n_force_qs == rdp->n_force_qs_snap && + if (READ_ONCE(rcu_state.n_force_qs) == rdp->n_force_qs_snap && rcu_segcblist_first_pend_cb(&rdp->cblist) != head) rcu_force_quiescent_state(); - rdp->n_force_qs_snap = rcu_state.n_force_qs; + rdp->n_force_qs_snap = READ_ONCE(rcu_state.n_force_qs); rdp->qlen_last_fqs_check = rcu_segcblist_n_cbs(&rdp->cblist); } } @@ -2959,7 +2960,7 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func) head->func = func; head->next = NULL; local_irq_save(flags); - kasan_record_aux_stack(head); + kasan_record_aux_stack_noalloc(head); rdp = this_cpu_ptr(&rcu_data); /* Add the callback to our list. */ @@ -3496,6 +3497,7 @@ void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func) goto unlock_return; } + kasan_record_aux_stack_noalloc(ptr); success = kvfree_call_rcu_add_ptr_to_bulk(krcp, ptr); if (!success) { run_page_cache_worker(krcp); @@ -3986,7 +3988,7 @@ int rcutree_prepare_cpu(unsigned int cpu) /* Set up local state, ensuring consistent view of global state. */ raw_spin_lock_irqsave_rcu_node(rnp, flags); rdp->qlen_last_fqs_check = 0; - rdp->n_force_qs_snap = rcu_state.n_force_qs; + rdp->n_force_qs_snap = READ_ONCE(rcu_state.n_force_qs); rdp->blimit = blimit; if (rcu_segcblist_empty(&rdp->cblist) && /* No early-boot CBs? */ !rcu_segcblist_is_offloaded(&rdp->cblist)) @@ -4232,6 +4234,51 @@ static int rcu_pm_notify(struct notifier_block *self, return NOTIFY_OK; } +#ifdef CONFIG_RCU_EXP_KTHREAD +struct kthread_worker *rcu_exp_gp_kworker; +struct kthread_worker *rcu_exp_par_gp_kworker; + +static void __init rcu_start_exp_gp_kworkers(void) +{ + const char *par_gp_kworker_name = "rcu_exp_par_gp_kthread_worker"; + const char *gp_kworker_name = "rcu_exp_gp_kthread_worker"; + struct sched_param param = { .sched_priority = kthread_prio }; + + rcu_exp_gp_kworker = kthread_create_worker(0, gp_kworker_name); + if (IS_ERR_OR_NULL(rcu_exp_gp_kworker)) { + pr_err("Failed to create %s!\n", gp_kworker_name); + return; + } + + rcu_exp_par_gp_kworker = kthread_create_worker(0, par_gp_kworker_name); + if (IS_ERR_OR_NULL(rcu_exp_par_gp_kworker)) { + pr_err("Failed to create %s!\n", par_gp_kworker_name); + kthread_destroy_worker(rcu_exp_gp_kworker); + return; + } + + sched_setscheduler_nocheck(rcu_exp_gp_kworker->task, SCHED_FIFO, ¶m); + sched_setscheduler_nocheck(rcu_exp_par_gp_kworker->task, SCHED_FIFO, + ¶m); +} + +static inline void rcu_alloc_par_gp_wq(void) +{ +} +#else /* !CONFIG_RCU_EXP_KTHREAD */ +struct workqueue_struct *rcu_par_gp_wq; + +static void __init rcu_start_exp_gp_kworkers(void) +{ +} + +static inline void rcu_alloc_par_gp_wq(void) +{ + rcu_par_gp_wq = alloc_workqueue("rcu_par_gp", WQ_MEM_RECLAIM, 0); + WARN_ON(!rcu_par_gp_wq); +} +#endif /* CONFIG_RCU_EXP_KTHREAD */ + /* * Spawn the kthreads that handle RCU's grace periods. */ @@ -4277,6 +4324,8 @@ static int __init rcu_spawn_gp_kthread(void) rcu_spawn_nocb_kthreads(); rcu_spawn_boost_kthreads(); rcu_spawn_core_kthreads(); + /* Create kthread worker for expedited GPs */ + rcu_start_exp_gp_kworkers(); return 0; } early_initcall(rcu_spawn_gp_kthread); @@ -4499,7 +4548,6 @@ static void __init rcu_dump_rcu_node_tree(void) } struct workqueue_struct *rcu_gp_wq; -struct workqueue_struct *rcu_par_gp_wq; static void __init kfree_rcu_batch_init(void) { @@ -4552,8 +4600,7 @@ void __init rcu_init(void) /* Create workqueue for expedited GPs and for Tree SRCU. */ rcu_gp_wq = alloc_workqueue("rcu_gp", WQ_MEM_RECLAIM, 0); WARN_ON(!rcu_gp_wq); - rcu_par_gp_wq = alloc_workqueue("rcu_par_gp", WQ_MEM_RECLAIM, 0); - WARN_ON(!rcu_par_gp_wq); + rcu_alloc_par_gp_wq(); srcu_init(); /* Fill in default value for rcutree.qovld boot parameter. */ diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index e4f66b8f7c470109d30ab62458b8ee5b2d736262..12432de6c92039bb6f7b5e33d72975bac430002d 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -10,6 +10,7 @@ */ #include +#include #include #include #include @@ -23,7 +24,11 @@ /* Communicate arguments to a workqueue handler. */ struct rcu_exp_work { unsigned long rew_s; +#ifdef CONFIG_RCU_EXP_KTHREAD + struct kthread_work rew_work; +#else struct work_struct rew_work; +#endif /* CONFIG_RCU_EXP_KTHREAD */ }; /* RCU's kthread states for tracing. */ diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index 6e1ac5b41f6950882e0956c5833521f0858bc01d..bde4080a936f11aed2f36c60a81bad6acc5f9ae6 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -334,15 +334,13 @@ fastpath: * Select the CPUs within the specified rcu_node that the upcoming * expedited grace period needs to wait for. */ -static void sync_rcu_exp_select_node_cpus(struct work_struct *wp) +static void __sync_rcu_exp_select_node_cpus(struct rcu_exp_work *rewp) { int cpu; unsigned long flags; unsigned long mask_ofl_test; unsigned long mask_ofl_ipi; int ret; - struct rcu_exp_work *rewp = - container_of(wp, struct rcu_exp_work, rew_work); struct rcu_node *rnp = container_of(rewp, struct rcu_node, rew); raw_spin_lock_irqsave_rcu_node(rnp, flags); @@ -387,6 +385,7 @@ retry_ipi: continue; } if (get_cpu() == cpu) { + mask_ofl_test |= mask; put_cpu(); continue; } @@ -416,13 +415,119 @@ retry_ipi: rcu_report_exp_cpu_mult(rnp, mask_ofl_test, false); } +static void rcu_exp_sel_wait_wake(unsigned long s); + +#ifdef CONFIG_RCU_EXP_KTHREAD +static void sync_rcu_exp_select_node_cpus(struct kthread_work *wp) +{ + struct rcu_exp_work *rewp = + container_of(wp, struct rcu_exp_work, rew_work); + + __sync_rcu_exp_select_node_cpus(rewp); +} + +static inline bool rcu_gp_par_worker_started(void) +{ + return !!READ_ONCE(rcu_exp_par_gp_kworker); +} + +static inline void sync_rcu_exp_select_cpus_queue_work(struct rcu_node *rnp) +{ + kthread_init_work(&rnp->rew.rew_work, sync_rcu_exp_select_node_cpus); + /* + * Use rcu_exp_par_gp_kworker, because flushing a work item from + * another work item on the same kthread worker can result in + * deadlock. + */ + kthread_queue_work(rcu_exp_par_gp_kworker, &rnp->rew.rew_work); +} + +static inline void sync_rcu_exp_select_cpus_flush_work(struct rcu_node *rnp) +{ + kthread_flush_work(&rnp->rew.rew_work); +} + +/* + * Work-queue handler to drive an expedited grace period forward. + */ +static void wait_rcu_exp_gp(struct kthread_work *wp) +{ + struct rcu_exp_work *rewp; + + rewp = container_of(wp, struct rcu_exp_work, rew_work); + rcu_exp_sel_wait_wake(rewp->rew_s); +} + +static inline void synchronize_rcu_expedited_queue_work(struct rcu_exp_work *rew) +{ + kthread_init_work(&rew->rew_work, wait_rcu_exp_gp); + kthread_queue_work(rcu_exp_gp_kworker, &rew->rew_work); +} + +static inline void synchronize_rcu_expedited_destroy_work(struct rcu_exp_work *rew) +{ +} +#else /* !CONFIG_RCU_EXP_KTHREAD */ +static void sync_rcu_exp_select_node_cpus(struct work_struct *wp) +{ + struct rcu_exp_work *rewp = + container_of(wp, struct rcu_exp_work, rew_work); + + __sync_rcu_exp_select_node_cpus(rewp); +} + +static inline bool rcu_gp_par_worker_started(void) +{ + return !!READ_ONCE(rcu_par_gp_wq); +} + +static inline void sync_rcu_exp_select_cpus_queue_work(struct rcu_node *rnp) +{ + int cpu = find_next_bit(&rnp->ffmask, BITS_PER_LONG, -1); + + INIT_WORK(&rnp->rew.rew_work, sync_rcu_exp_select_node_cpus); + /* If all offline, queue the work on an unbound CPU. */ + if (unlikely(cpu > rnp->grphi - rnp->grplo)) + cpu = WORK_CPU_UNBOUND; + else + cpu += rnp->grplo; + queue_work_on(cpu, rcu_par_gp_wq, &rnp->rew.rew_work); +} + +static inline void sync_rcu_exp_select_cpus_flush_work(struct rcu_node *rnp) +{ + flush_work(&rnp->rew.rew_work); +} + +/* + * Work-queue handler to drive an expedited grace period forward. + */ +static void wait_rcu_exp_gp(struct work_struct *wp) +{ + struct rcu_exp_work *rewp; + + rewp = container_of(wp, struct rcu_exp_work, rew_work); + rcu_exp_sel_wait_wake(rewp->rew_s); +} + +static inline void synchronize_rcu_expedited_queue_work(struct rcu_exp_work *rew) +{ + INIT_WORK_ONSTACK(&rew->rew_work, wait_rcu_exp_gp); + queue_work(rcu_gp_wq, &rew->rew_work); +} + +static inline void synchronize_rcu_expedited_destroy_work(struct rcu_exp_work *rew) +{ + destroy_work_on_stack(&rew->rew_work); +} +#endif /* CONFIG_RCU_EXP_KTHREAD */ + /* * Select the nodes that the upcoming expedited grace period needs * to wait for. */ static void sync_rcu_exp_select_cpus(void) { - int cpu; struct rcu_node *rnp; trace_rcu_exp_grace_period(rcu_state.name, rcu_exp_gp_seq_endval(), TPS("reset")); @@ -434,28 +539,21 @@ static void sync_rcu_exp_select_cpus(void) rnp->exp_need_flush = false; if (!READ_ONCE(rnp->expmask)) continue; /* Avoid early boot non-existent wq. */ - if (!READ_ONCE(rcu_par_gp_wq) || + if (!rcu_gp_par_worker_started() || rcu_scheduler_active != RCU_SCHEDULER_RUNNING || rcu_is_last_leaf_node(rnp)) { - /* No workqueues yet or last leaf, do direct call. */ + /* No worker started yet or last leaf, do direct call. */ sync_rcu_exp_select_node_cpus(&rnp->rew.rew_work); continue; } - INIT_WORK(&rnp->rew.rew_work, sync_rcu_exp_select_node_cpus); - cpu = find_next_bit(&rnp->ffmask, BITS_PER_LONG, -1); - /* If all offline, queue the work on an unbound CPU. */ - if (unlikely(cpu > rnp->grphi - rnp->grplo)) - cpu = WORK_CPU_UNBOUND; - else - cpu += rnp->grplo; - queue_work_on(cpu, rcu_par_gp_wq, &rnp->rew.rew_work); + sync_rcu_exp_select_cpus_queue_work(rnp); rnp->exp_need_flush = true; } - /* Wait for workqueue jobs (if any) to complete. */ + /* Wait for jobs (if any) to complete. */ rcu_for_each_leaf_node(rnp) if (rnp->exp_need_flush) - flush_work(&rnp->rew.rew_work); + sync_rcu_exp_select_cpus_flush_work(rnp); } /* @@ -618,17 +716,6 @@ static void rcu_exp_sel_wait_wake(unsigned long s) rcu_exp_wait_wake(s); } -/* - * Work-queue handler to drive an expedited grace period forward. - */ -static void wait_rcu_exp_gp(struct work_struct *wp) -{ - struct rcu_exp_work *rewp; - - rewp = container_of(wp, struct rcu_exp_work, rew_work); - rcu_exp_sel_wait_wake(rewp->rew_s); -} - #ifdef CONFIG_PREEMPT_RCU /* @@ -760,7 +847,7 @@ static void sync_sched_exp_online_cleanup(int cpu) my_cpu = get_cpu(); /* Quiescent state either not needed or already requested, leave. */ if (!(READ_ONCE(rnp->expmask) & rdp->grpmask) || - __this_cpu_read(rcu_data.cpu_no_qs.b.exp)) { + rdp->cpu_no_qs.b.exp) { put_cpu(); return; } @@ -844,20 +931,19 @@ void synchronize_rcu_expedited(void) } else { /* Marshall arguments & schedule the expedited grace period. */ rew.rew_s = s; - INIT_WORK_ONSTACK(&rew.rew_work, wait_rcu_exp_gp); - queue_work(rcu_gp_wq, &rew.rew_work); + synchronize_rcu_expedited_queue_work(&rew); } /* Wait for expedited grace period to complete. */ rnp = rcu_get_root(); wait_event(rnp->exp_wq[rcu_seq_ctr(s) & 0x3], sync_exp_work_done(s)); - smp_mb(); /* Workqueue actions happen before return. */ + smp_mb(); /* Work actions happen before return. */ /* Let the next expedited grace period start. */ mutex_unlock(&rcu_state.exp_mutex); if (likely(!boottime)) - destroy_work_on_stack(&rew.rew_work); + synchronize_rcu_expedited_destroy_work(&rew); } EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index c5091aeaa37bb620797d29e12d4602bf90d5e0f0..39e8f78182a8558f03466e7179c05ddc340b9ec3 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -531,16 +531,16 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags) raw_spin_unlock_irqrestore_rcu_node(rnp, flags); } - /* Unboost if we were boosted. */ - if (IS_ENABLED(CONFIG_RCU_BOOST) && drop_boost_mutex) - rt_mutex_futex_unlock(&rnp->boost_mtx); - /* * If this was the last task on the expedited lists, * then we need to report up the rcu_node hierarchy. */ if (!empty_exp && empty_exp_now) rcu_report_exp_rnp(rnp, true); + + /* Unboost if we were boosted. */ + if (IS_ENABLED(CONFIG_RCU_BOOST) && drop_boost_mutex) + rt_mutex_futex_unlock(&rnp->boost_mtx); } else { local_irq_restore(flags); } @@ -628,7 +628,7 @@ static void rcu_read_unlock_special(struct task_struct *t) set_tsk_need_resched(current); set_preempt_need_resched(); if (IS_ENABLED(CONFIG_IRQ_WORK) && irqs_were_disabled && - !rdp->defer_qs_iw_pending && exp) { + !rdp->defer_qs_iw_pending && exp && cpu_online(rdp->cpu)) { // Get scheduler to re-evaluate and call hooks. // If !IRQ_WORK, FQS scan will eventually IPI. init_irq_work(&rdp->defer_qs_iw, @@ -1646,7 +1646,11 @@ static void wake_nocb_gp(struct rcu_data *rdp, bool force, rcu_nocb_unlock_irqrestore(rdp, flags); return; } - del_timer(&rdp->nocb_timer); + + if (READ_ONCE(rdp->nocb_defer_wakeup) > RCU_NOCB_WAKE_NOT) { + WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT); + del_timer(&rdp->nocb_timer); + } rcu_nocb_unlock_irqrestore(rdp, flags); raw_spin_lock_irqsave(&rdp_gp->nocb_gp_lock, flags); if (force || READ_ONCE(rdp_gp->nocb_gp_sleep)) { @@ -2164,7 +2168,6 @@ static void do_nocb_deferred_wakeup_common(struct rcu_data *rdp) return; } ndw = READ_ONCE(rdp->nocb_defer_wakeup); - WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT); wake_nocb_gp(rdp, ndw == RCU_NOCB_WAKE_FORCE, flags); trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("DeferredWake")); } @@ -2573,7 +2576,7 @@ static void rcu_bind_gp_kthread(void) } /* Record the current task on dyntick-idle entry. */ -static void noinstr rcu_dynticks_task_enter(void) +static __always_inline void rcu_dynticks_task_enter(void) { #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) WRITE_ONCE(current->rcu_tasks_idle_cpu, smp_processor_id()); @@ -2581,7 +2584,7 @@ static void noinstr rcu_dynticks_task_enter(void) } /* Record no current task on dyntick-idle exit. */ -static void noinstr rcu_dynticks_task_exit(void) +static __always_inline void rcu_dynticks_task_exit(void) { #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) WRITE_ONCE(current->rcu_tasks_idle_cpu, -1); @@ -2589,7 +2592,7 @@ static void noinstr rcu_dynticks_task_exit(void) } /* Turn on heavyweight RCU tasks trace readers on idle/user entry. */ -static void rcu_dynticks_task_trace_enter(void) +static __always_inline void rcu_dynticks_task_trace_enter(void) { #ifdef CONFIG_TASKS_TRACE_RCU if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB)) @@ -2598,7 +2601,7 @@ static void rcu_dynticks_task_trace_enter(void) } /* Turn off heavyweight RCU tasks trace readers on idle/user exit. */ -static void rcu_dynticks_task_trace_exit(void) +static __always_inline void rcu_dynticks_task_trace_exit(void) { #ifdef CONFIG_TASKS_TRACE_RCU if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB)) diff --git a/kernel/rseq.c b/kernel/rseq.c index a4f86a9d6937cdfa2f13d1dcc9be863c1943d06f..0077713bf2400126caacd3f8900143adab5edea3 100644 --- a/kernel/rseq.c +++ b/kernel/rseq.c @@ -268,9 +268,16 @@ void __rseq_handle_notify_resume(struct ksignal *ksig, struct pt_regs *regs) return; if (unlikely(!access_ok(t->rseq, sizeof(*t->rseq)))) goto error; - ret = rseq_ip_fixup(regs); - if (unlikely(ret < 0)) - goto error; + /* + * regs is NULL if and only if the caller is in a syscall path. Skip + * fixup and leave rseq_cs as is so that rseq_sycall() will detect and + * kill a misbehaving userspace on debug kernels. + */ + if (regs) { + ret = rseq_ip_fixup(regs); + if (unlikely(ret < 0)) + goto error; + } if (unlikely(rseq_update_cpu_id(t))) goto error; return; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 891f85bb815b1a3b0195d948b7c486e70fcaee5e..507ed2aed0e8ee8d581ddfe8de13e867e40fb290 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1605,7 +1605,7 @@ static void __init init_uclamp_rq(struct rq *rq) }; } - rq->uclamp_flags = 0; + rq->uclamp_flags = UCLAMP_FLAG_IDLE; } static void __init init_uclamp(void) @@ -2843,6 +2843,9 @@ EXPORT_SYMBOL_GPL(wake_up_if_idle); bool cpus_share_cache(int this_cpu, int that_cpu) { + if (this_cpu == that_cpu) + return true; + return per_cpu(sd_llc_id, this_cpu) == per_cpu(sd_llc_id, that_cpu); } @@ -3113,6 +3116,9 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) if (READ_ONCE(p->on_rq) && ttwu_runnable(p, wake_flags)) goto unlock; + if (p->state & TASK_UNINTERRUPTIBLE) + trace_sched_blocked_reason(p); + #ifdef CONFIG_SMP /* * Ensure we load p->on_cpu _after_ p->on_rq, otherwise it would be @@ -3448,8 +3454,6 @@ static inline void init_schedstats(void) {} */ int sched_fork(unsigned long clone_flags, struct task_struct *p) { - unsigned long flags; - trace_android_rvh_sched_fork(p); __sched_fork(clone_flags, p); @@ -3499,24 +3503,6 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) init_entity_runnable_average(&p->se); trace_android_rvh_finish_prio_fork(p); - /* - * The child is not yet in the pid-hash so no cgroup attach races, - * and the cgroup is pinned to this child due to cgroup_fork() - * is ran before sched_fork(). - * - * Silence PROVE_RCU. - */ - raw_spin_lock_irqsave(&p->pi_lock, flags); - rseq_migrate(p); - /* - * We're setting the CPU for the first time, we don't migrate, - * so use __set_task_cpu(). - */ - __set_task_cpu(p, smp_processor_id()); - if (p->sched_class->task_fork) - p->sched_class->task_fork(p); - raw_spin_unlock_irqrestore(&p->pi_lock, flags); - #ifdef CONFIG_SCHED_INFO if (likely(sched_info_on())) memset(&p->sched_info, 0, sizeof(p->sched_info)); @@ -3532,8 +3518,29 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) return 0; } -void sched_post_fork(struct task_struct *p) +void sched_post_fork(struct task_struct *p, struct kernel_clone_args *kargs) { + unsigned long flags; +#ifdef CONFIG_CGROUP_SCHED + struct task_group *tg; +#endif + + raw_spin_lock_irqsave(&p->pi_lock, flags); +#ifdef CONFIG_CGROUP_SCHED + tg = container_of(kargs->cset->subsys[cpu_cgrp_id], + struct task_group, css); + p->sched_task_group = autogroup_task_group(p, tg); +#endif + rseq_migrate(p); + /* + * We're setting the CPU for the first time, we don't migrate, + * so use __set_task_cpu(). + */ + __set_task_cpu(p, smp_processor_id()); + if (p->sched_class->task_fork) + p->sched_class->task_fork(p); + raw_spin_unlock_irqrestore(&p->pi_lock, flags); + uclamp_post_fork(p); } @@ -3996,6 +4003,7 @@ context_switch(struct rq *rq, struct task_struct *prev, * finish_task_switch()'s mmdrop(). */ switch_mm_irqs_off(prev->active_mm, next->mm, next); + lru_gen_use_mm(next->mm); if (!prev->mm) { // from kernel /* will mmdrop() in finish_task_switch(). */ @@ -4611,6 +4619,23 @@ restart: BUG(); } +static bool __task_can_run(struct task_struct *prev) +{ + if (__fatal_signal_pending(prev)) + return true; + + if (!frozen_or_skipped(prev)) + return true; + + /* + * We can't safely go back on the runqueue if we're an asymmetric + * task skipping the freezer. Doing so can lead to migration failures + * later on if there aren't any suitable CPUs left around for us to + * move to. + */ + return task_cpu_possible_mask(prev) == cpu_possible_mask; +} + /* * __schedule() is the main scheduler function. * @@ -4704,7 +4729,7 @@ static void __sched notrace __schedule(bool preempt) */ prev_state = prev->state; if (!preempt && prev_state) { - if (signal_pending_state(prev_state, prev)) { + if (signal_pending_state(prev_state, prev) && __task_can_run(prev)) { prev->state = TASK_RUNNING; } else { prev->sched_contributes_to_load = @@ -5527,6 +5552,10 @@ recheck: /* Normal users shall not reset the sched_reset_on_fork flag: */ if (p->sched_reset_on_fork && !reset_on_fork) return -EPERM; + + /* Can't change util-clamps */ + if (attr->sched_flags & SCHED_FLAG_UTIL_CLAMP) + return -EPERM; } if (user) { @@ -5879,6 +5908,16 @@ err_size: return -E2BIG; } +static void get_params(struct task_struct *p, struct sched_attr *attr) +{ + if (task_has_dl_policy(p)) + __getparam_dl(p, attr); + else if (task_has_rt_policy(p)) + attr->sched_priority = p->rt_priority; + else + attr->sched_nice = task_nice(p); +} + /** * sys_sched_setscheduler - set/change the scheduler policy and RT priority * @pid: the pid in question. @@ -5940,6 +5979,8 @@ SYSCALL_DEFINE3(sched_setattr, pid_t, pid, struct sched_attr __user *, uattr, rcu_read_unlock(); if (likely(p)) { + if (attr.sched_flags & SCHED_FLAG_KEEP_PARAMS) + get_params(p, &attr); retval = sched_setattr(p, &attr); put_task_struct(p); } @@ -6088,12 +6129,8 @@ SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr, kattr.sched_policy = p->policy; if (p->sched_reset_on_fork) kattr.sched_flags |= SCHED_FLAG_RESET_ON_FORK; - if (task_has_dl_policy(p)) - __getparam_dl(p, &kattr); - else if (task_has_rt_policy(p)) - kattr.sched_priority = p->rt_priority; - else - kattr.sched_nice = task_nice(p); + get_params(p, &kattr); + kattr.sched_flags &= SCHED_FLAG_ALL; #ifdef CONFIG_UCLAMP_TASK /* @@ -6743,7 +6780,7 @@ void show_state_filter(unsigned long state_filter) * NOTE: this function does not set the idle thread's NEED_RESCHED * flag, to make booting more robust. */ -void init_idle(struct task_struct *idle, int cpu) +void __init init_idle(struct task_struct *idle, int cpu) { struct rq *rq = cpu_rq(cpu); unsigned long flags; @@ -6757,9 +6794,6 @@ void init_idle(struct task_struct *idle, int cpu) idle->se.exec_start = sched_clock(); idle->flags |= PF_IDLE; - scs_task_reset(idle); - kasan_unpoison_task_stack(idle); - #ifdef CONFIG_SMP /* * Its possible that init_idle() gets called multiple times on a task, diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 0f7bc658ecec16174bad197e9a17569eae6b7425..2ee872716c5f2b4d23813ce07dfa401e09459ef0 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -104,6 +104,9 @@ static bool sugov_update_next_freq(struct sugov_policy *sg_policy, u64 time, unsigned int next_freq) { if (!sg_policy->need_freq_update) { + s64 delta_ns = time - sg_policy->last_freq_update_time; + trace_android_vh_update_next_freq(sg_policy->policy, sg_policy->next_freq, + &next_freq, delta_ns); if (sg_policy->next_freq == next_freq) return false; } else { @@ -291,6 +294,11 @@ static unsigned long sugov_get_util(struct sugov_cpu *sg_cpu) struct rq *rq = cpu_rq(sg_cpu->cpu); unsigned long util = cpu_util_cfs(rq); unsigned long max = arch_scale_cpu_capacity(sg_cpu->cpu); + unsigned long ret = 0; + + trace_android_vh_sugov_get_util(sg_cpu->cpu, &ret); + if (ret) + return ret; sg_cpu->max = max; sg_cpu->bw_dl = cpu_bw_dl(rq); @@ -618,9 +626,17 @@ static struct attribute *sugov_attrs[] = { }; ATTRIBUTE_GROUPS(sugov); +static void sugov_tunables_free(struct kobject *kobj) +{ + struct gov_attr_set *attr_set = container_of(kobj, struct gov_attr_set, kobj); + + kfree(to_sugov_tunables(attr_set)); +} + static struct kobj_type sugov_tunables_ktype = { .default_groups = sugov_groups, .sysfs_ops = &governor_sysfs_ops, + .release = &sugov_tunables_free, }; /********************** cpufreq governor interface *********************/ @@ -720,12 +736,10 @@ static struct sugov_tunables *sugov_tunables_alloc(struct sugov_policy *sg_polic return tunables; } -static void sugov_tunables_free(struct sugov_tunables *tunables) +static void sugov_clear_global_tunables(void) { if (!have_governor_per_policy()) global_tunables = NULL; - - kfree(tunables); } static int sugov_init(struct cpufreq_policy *policy) @@ -788,7 +802,7 @@ out: fail: kobject_put(&tunables->attr_set.kobj); policy->governor_data = NULL; - sugov_tunables_free(tunables); + sugov_clear_global_tunables(); stop_kthread: sugov_kthread_stop(sg_policy); @@ -815,7 +829,7 @@ static void sugov_exit(struct cpufreq_policy *policy) count = gov_attr_set_put(&tunables->attr_set, &sg_policy->tunables_hook); policy->governor_data = NULL; if (!count) - sugov_tunables_free(tunables); + sugov_clear_global_tunables(); mutex_unlock(&global_tunables_lock); diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 343147802e6c30820ab6e5afaf190258613cd34c..7ffe3e2be4859f9bc8efa11634c5c8de64715b9a 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -155,10 +155,10 @@ void account_guest_time(struct task_struct *p, u64 cputime) /* Add guest time to cpustat. */ if (task_nice(p) > 0) { - cpustat[CPUTIME_NICE] += cputime; + task_group_account_field(p, CPUTIME_NICE, cputime); cpustat[CPUTIME_GUEST_NICE] += cputime; } else { - cpustat[CPUTIME_USER] += cputime; + task_group_account_field(p, CPUTIME_USER, cputime); cpustat[CPUTIME_GUEST] += cputime; } } diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 16230441b47af36c61bd0c2cb67ca13d305552da..2e45335678aef7027ed701c4a0cafe8ea1659402 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -17,6 +17,7 @@ */ #include "sched.h" #include "pelt.h" +#include struct dl_bandwidth def_dl_bandwidth; @@ -1675,6 +1676,11 @@ select_task_rq_dl(struct task_struct *p, int cpu, int sd_flag, int flags) struct task_struct *curr; bool select_rq; struct rq *rq; + int target_cpu = -1; + + trace_android_rvh_select_task_rq_dl(p, cpu, sd_flag, flags, &target_cpu); + if (target_cpu >= 0) + return target_cpu; if (sd_flag != SD_BALANCE_WAKE) goto out; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index f02c75acc15c1b8c78a5c11321cb662fbf4bc213..ff561e3763f4b6ad1c4605e8460c88e713541fc2 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -999,7 +999,6 @@ update_stats_enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) } trace_sched_stat_blocked(tsk, delta); - trace_sched_blocked_reason(tsk); /* * Blocking time is in units of nanosecs, so shift by @@ -3388,7 +3387,6 @@ void set_task_rq_fair(struct sched_entity *se, se->avg.last_update_time = n_last_update_time; } - /* * When on migration a sched_entity joins/leaves the PELT hierarchy, we need to * propagate its contribution. The key to this propagation is the invariant @@ -3456,7 +3454,6 @@ void set_task_rq_fair(struct sched_entity *se, * XXX: only do this for the part of runnable > running ? * */ - static inline void update_tg_cfs_util(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq *gcfs_rq) { @@ -3510,9 +3507,10 @@ update_tg_cfs_runnable(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cf static inline void update_tg_cfs_load(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq *gcfs_rq) { - long delta, running_sum, runnable_sum = gcfs_rq->prop_runnable_sum; + long delta_avg, running_sum, runnable_sum = gcfs_rq->prop_runnable_sum; unsigned long load_avg; u64 load_sum = 0; + s64 delta_sum; u32 divider; if (!runnable_sum) @@ -3559,13 +3557,13 @@ update_tg_cfs_load(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq load_sum = (s64)se_weight(se) * runnable_sum; load_avg = div_s64(load_sum, divider); - delta = load_avg - se->avg.load_avg; + delta_sum = load_sum - (s64)se_weight(se) * se->avg.load_sum; + delta_avg = load_avg - se->avg.load_avg; se->avg.load_sum = runnable_sum; se->avg.load_avg = load_avg; - - add_positive(&cfs_rq->avg.load_avg, delta); - cfs_rq->avg.load_sum = cfs_rq->avg.load_avg * divider; + add_positive(&cfs_rq->avg.load_avg, delta_avg); + add_positive(&cfs_rq->avg.load_sum, delta_sum); } static inline void add_tg_cfs_propagate(struct cfs_rq *cfs_rq, long runnable_sum) @@ -3681,15 +3679,27 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq) r = removed_load; sub_positive(&sa->load_avg, r); - sa->load_sum = sa->load_avg * divider; + sub_positive(&sa->load_sum, r * divider); r = removed_util; sub_positive(&sa->util_avg, r); - sa->util_sum = sa->util_avg * divider; + sub_positive(&sa->util_sum, r * divider); + /* + * Because of rounding, se->util_sum might ends up being +1 more than + * cfs->util_sum. Although this is not a problem by itself, detaching + * a lot of tasks with the rounding problem between 2 updates of + * util_avg (~1ms) can make cfs->util_sum becoming null whereas + * cfs_util_avg is not. + * Check that util_sum is still above its lower bound for the new + * util_avg. Given that period_contrib might have moved since the last + * sync, we are only sure that util_sum must be above or equal to + * util_avg * minimum possible divider + */ + sa->util_sum = max_t(u32, sa->util_sum, sa->util_avg * PELT_MIN_DIVIDER); r = removed_runnable; sub_positive(&sa->runnable_avg, r); - sa->runnable_sum = sa->runnable_avg * divider; + sub_positive(&sa->runnable_sum, r * divider); /* * removed_runnable is the unweighted version of removed_load so we @@ -3753,6 +3763,8 @@ static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s div_u64(se->avg.load_avg * se->avg.load_sum, se_weight(se)); } + trace_android_rvh_attach_entity_load_avg(cfs_rq, se); + enqueue_load_avg(cfs_rq, se); cfs_rq->avg.util_avg += se->avg.util_avg; cfs_rq->avg.util_sum += se->avg.util_sum; @@ -3776,17 +3788,13 @@ static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s */ static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) { - /* - * cfs_rq->avg.period_contrib can be used for both cfs_rq and se. - * See ___update_load_avg() for details. - */ - u32 divider = get_pelt_divider(&cfs_rq->avg); + trace_android_rvh_detach_entity_load_avg(cfs_rq, se); dequeue_load_avg(cfs_rq, se); sub_positive(&cfs_rq->avg.util_avg, se->avg.util_avg); - cfs_rq->avg.util_sum = cfs_rq->avg.util_avg * divider; + sub_positive(&cfs_rq->avg.util_sum, se->avg.util_sum); sub_positive(&cfs_rq->avg.runnable_avg, se->avg.runnable_avg); - cfs_rq->avg.runnable_sum = cfs_rq->avg.runnable_avg * divider; + sub_positive(&cfs_rq->avg.runnable_sum, se->avg.runnable_sum); add_tg_cfs_propagate(cfs_rq, -se->avg.load_sum); @@ -3821,6 +3829,8 @@ static inline void update_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *s decayed = update_cfs_rq_load_avg(now, cfs_rq); decayed |= propagate_entity_load_avg(se); + trace_android_rvh_update_load_avg(now, cfs_rq, se); + if (!se->avg.last_update_time && (flags & DO_ATTACH)) { /* @@ -3892,6 +3902,8 @@ static void remove_entity_load_avg(struct sched_entity *se) sync_entity_load_avg(se); + trace_android_rvh_remove_entity_load_avg(cfs_rq, se); + raw_spin_lock_irqsave(&cfs_rq->removed.lock, flags); ++cfs_rq->removed.nr; cfs_rq->removed.util_avg += se->avg.util_avg; @@ -3932,9 +3944,16 @@ static inline unsigned long task_util_est(struct task_struct *p) #ifdef CONFIG_UCLAMP_TASK static inline unsigned long uclamp_task_util(struct task_struct *p) { - return clamp(task_util_est(p), - uclamp_eff_value(p, UCLAMP_MIN), - uclamp_eff_value(p, UCLAMP_MAX)); + unsigned long min_util = uclamp_eff_value(p, UCLAMP_MIN); + unsigned long max_util = uclamp_eff_value(p, UCLAMP_MAX); + unsigned long task_util = task_util_est(p); + unsigned long ret = 0; + + trace_android_rvh_uclamp_task_util(task_util, min_util, max_util, &ret); + if (ret) + return ret; + + return clamp(task_util, min_util, max_util); } #else static inline unsigned long uclamp_task_util(struct task_struct *p) @@ -5182,7 +5201,7 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b) /* * When a group wakes up we want to make sure that its quota is not already * expired/exceeded, otherwise it may be allowed to steal additional ticks of - * runtime as update_curr() throttling can not not trigger until it's on-rq. + * runtime as update_curr() throttling can not trigger until it's on-rq. */ static void check_enqueue_throttle(struct cfs_rq *cfs_rq) { @@ -6328,8 +6347,10 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) * pattern is IO completions. */ if (is_per_cpu_kthread(current) && + in_task() && prev == smp_processor_id() && - this_rq()->nr_running <= 1) { + this_rq()->nr_running <= 1 && + asym_fits_capacity(task_util, prev)) { return prev; } @@ -8095,6 +8116,8 @@ static bool __update_blocked_fair(struct rq *rq, bool *done) bool decayed = false; int cpu = cpu_of(rq); + trace_android_rvh_update_blocked_fair(rq); + /* * Iterates the task_group tree in a bottom up fashion, see * list_add_leaf_cfs_rq() for details. diff --git a/kernel/sched/membarrier.c b/kernel/sched/membarrier.c index 16f57e71f9c441ada23d071fabcc8c0e32a54ccc..cc7cd512e4e33833d781b5b6686fd6085fbf879f 100644 --- a/kernel/sched/membarrier.c +++ b/kernel/sched/membarrier.c @@ -19,11 +19,11 @@ #endif #ifdef CONFIG_RSEQ -#define MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ_BITMASK \ +#define MEMBARRIER_PRIVATE_EXPEDITED_RSEQ_BITMASK \ (MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ \ - | MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ_BITMASK) + | MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ) #else -#define MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ_BITMASK 0 +#define MEMBARRIER_PRIVATE_EXPEDITED_RSEQ_BITMASK 0 #endif #define MEMBARRIER_CMD_BITMASK \ @@ -31,7 +31,8 @@ | MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED \ | MEMBARRIER_CMD_PRIVATE_EXPEDITED \ | MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED \ - | MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK) + | MEMBARRIER_PRIVATE_EXPEDITED_SYNC_CORE_BITMASK \ + | MEMBARRIER_PRIVATE_EXPEDITED_RSEQ_BITMASK) static void ipi_mb(void *info) { @@ -315,7 +316,7 @@ static int sync_runqueues_membarrier_state(struct mm_struct *mm) /* * For each cpu runqueue, if the task's mm match @mm, ensure that all - * @mm's membarrier state set bits are also set in in the runqueue's + * @mm's membarrier state set bits are also set in the runqueue's * membarrier state. This ensures that a runqueue scheduling * between threads which are users of @mm has its membarrier state * updated. diff --git a/kernel/sched/pelt.c b/kernel/sched/pelt.c index cd54fab42cf4eb7a8794440365866fb0ce993aca..e15a2fad146ef61a6e92db5e8c4b3373fca667af 100644 --- a/kernel/sched/pelt.c +++ b/kernel/sched/pelt.c @@ -30,6 +30,7 @@ int pelt_load_avg_period = PELT32_LOAD_AVG_PERIOD; int pelt_load_avg_max = PELT32_LOAD_AVG_MAX; +EXPORT_SYMBOL_GPL(pelt_load_avg_max); const u32 *pelt_runnable_avg_yN_inv = pelt32_runnable_avg_yN_inv; static int __init set_pelt(char *str) @@ -216,9 +217,8 @@ accumulate_sum(u64 delta, struct sched_avg *sa, * load_avg = u_0` + y*(u_0 + u_1*y + u_2*y^2 + ... ) * = u_0 + u_1*y + u_2*y^2 + ... [re-labeling u_i --> u_{i+1}] */ -static __always_inline int -___update_load_sum(u64 now, struct sched_avg *sa, - unsigned long load, unsigned long runnable, int running) +int ___update_load_sum(u64 now, struct sched_avg *sa, + unsigned long load, unsigned long runnable, int running) { u64 delta; @@ -268,6 +268,7 @@ ___update_load_sum(u64 now, struct sched_avg *sa, return 1; } +EXPORT_SYMBOL_GPL(___update_load_sum); /* * When syncing *_avg with *_sum, we must take into account the current @@ -293,8 +294,7 @@ ___update_load_sum(u64 now, struct sched_avg *sa, * the period_contrib of cfs_rq when updating the sched_avg of a sched_entity * if it's more convenient. */ -static __always_inline void -___update_load_avg(struct sched_avg *sa, unsigned long load) +void ___update_load_avg(struct sched_avg *sa, unsigned long load) { u32 divider = get_pelt_divider(sa); @@ -305,6 +305,7 @@ ___update_load_avg(struct sched_avg *sa, unsigned long load) sa->runnable_avg = div_u64(sa->runnable_sum, divider); WRITE_ONCE(sa->util_avg, sa->util_sum / divider); } +EXPORT_SYMBOL_GPL(___update_load_avg); /* * sched_entity: diff --git a/kernel/sched/pelt.h b/kernel/sched/pelt.h index 0b9aeebb9c325f0b572125b0b91b70f065422771..45bf08e22207cb5d5ee1445dfee9b0e8d1421d20 100644 --- a/kernel/sched/pelt.h +++ b/kernel/sched/pelt.h @@ -37,9 +37,11 @@ update_irq_load_avg(struct rq *rq, u64 running) } #endif +#define PELT_MIN_DIVIDER (LOAD_AVG_MAX - 1024) + static inline u32 get_pelt_divider(struct sched_avg *avg) { - return LOAD_AVG_MAX - 1024 + avg->period_contrib; + return PELT_MIN_DIVIDER + avg->period_contrib; } static inline void cfs_se_util_change(struct sched_avg *avg) diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c index 5518c070d1d98737f3b59d8f2b328fa4bfdb8535..f46ac0f39777f5f51706b511940ab7b5dcc2804e 100644 --- a/kernel/sched/psi.c +++ b/kernel/sched/psi.c @@ -1167,7 +1167,6 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group, t->event = 0; t->last_event_time = 0; init_waitqueue_head(&t->event_wait); - kref_init(&t->refcount); mutex_lock(&group->trigger_lock); @@ -1196,15 +1195,19 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group, return t; } -static void psi_trigger_destroy(struct kref *ref) +void psi_trigger_destroy(struct psi_trigger *t) { - struct psi_trigger *t = container_of(ref, struct psi_trigger, refcount); - struct psi_group *group = t->group; + struct psi_group *group; struct task_struct *task_to_destroy = NULL; - if (static_branch_likely(&psi_disabled)) + /* + * We do not check psi_disabled since it might have been disabled after + * the trigger got created. + */ + if (!t) return; + group = t->group; /* * Wakeup waiters to stop polling. Can happen if cgroup is deleted * from under a polling process. @@ -1240,9 +1243,9 @@ static void psi_trigger_destroy(struct kref *ref) mutex_unlock(&group->trigger_lock); /* - * Wait for both *trigger_ptr from psi_trigger_replace and - * poll_task RCUs to complete their read-side critical sections - * before destroying the trigger and optionally the poll_task + * Wait for psi_schedule_poll_work RCU to complete its read-side + * critical section before destroying the trigger and optionally the + * poll_task. */ synchronize_rcu(); /* @@ -1260,18 +1263,6 @@ static void psi_trigger_destroy(struct kref *ref) kfree(t); } -void psi_trigger_replace(void **trigger_ptr, struct psi_trigger *new) -{ - struct psi_trigger *old = *trigger_ptr; - - if (static_branch_likely(&psi_disabled)) - return; - - rcu_assign_pointer(*trigger_ptr, new); - if (old) - kref_put(&old->refcount, psi_trigger_destroy); -} - __poll_t psi_trigger_poll(void **trigger_ptr, struct file *file, poll_table *wait) { @@ -1281,24 +1272,15 @@ __poll_t psi_trigger_poll(void **trigger_ptr, if (static_branch_likely(&psi_disabled)) return DEFAULT_POLLMASK | EPOLLERR | EPOLLPRI; - rcu_read_lock(); - - t = rcu_dereference(*(void __rcu __force **)trigger_ptr); - if (!t) { - rcu_read_unlock(); + t = smp_load_acquire(trigger_ptr); + if (!t) return DEFAULT_POLLMASK | EPOLLERR | EPOLLPRI; - } - kref_get(&t->refcount); - - rcu_read_unlock(); poll_wait(file, &t->event_wait, wait); if (cmpxchg(&t->event, 1, 0) == 1) ret |= EPOLLPRI; - kref_put(&t->refcount, psi_trigger_destroy); - return ret; } @@ -1322,14 +1304,24 @@ static ssize_t psi_write(struct file *file, const char __user *user_buf, buf[buf_size - 1] = '\0'; - new = psi_trigger_create(&psi_system, buf, nbytes, res); - if (IS_ERR(new)) - return PTR_ERR(new); - seq = file->private_data; + /* Take seq->lock to protect seq->private from concurrent writes */ mutex_lock(&seq->lock); - psi_trigger_replace(&seq->private, new); + + /* Allow only one trigger per file descriptor */ + if (seq->private) { + mutex_unlock(&seq->lock); + return -EBUSY; + } + + new = psi_trigger_create(&psi_system, buf, nbytes, res); + if (IS_ERR(new)) { + mutex_unlock(&seq->lock); + return PTR_ERR(new); + } + + smp_store_release(&seq->private, new); mutex_unlock(&seq->lock); return nbytes; @@ -1364,7 +1356,7 @@ static int psi_fop_release(struct inode *inode, struct file *file) { struct seq_file *seq = file->private_data; - psi_trigger_replace(&seq->private, NULL); + psi_trigger_destroy(seq->private); return single_release(inode, file); } diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index da40e5f07a575e9be35cd7886407a882b7535d91..3e1866534db52091dfff129398efa4dcb4b1b30b 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -54,11 +54,8 @@ void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime) rt_b->rt_period_timer.function = sched_rt_period_timer; } -static void start_rt_bandwidth(struct rt_bandwidth *rt_b) +static inline void do_start_rt_bandwidth(struct rt_bandwidth *rt_b) { - if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF) - return; - raw_spin_lock(&rt_b->rt_runtime_lock); if (!rt_b->rt_period_active) { rt_b->rt_period_active = 1; @@ -77,6 +74,14 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b) raw_spin_unlock(&rt_b->rt_runtime_lock); } +static void start_rt_bandwidth(struct rt_bandwidth *rt_b) +{ + if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF) + return; + + do_start_rt_bandwidth(rt_b); +} + void init_rt_rq(struct rt_rq *rt_rq) { struct rt_prio_array *array; @@ -1031,13 +1036,17 @@ static void update_curr_rt(struct rq *rq) for_each_sched_rt_entity(rt_se) { struct rt_rq *rt_rq = rt_rq_of_se(rt_se); + int exceeded; if (sched_rt_runtime(rt_rq) != RUNTIME_INF) { raw_spin_lock(&rt_rq->rt_runtime_lock); rt_rq->rt_time += delta_exec; - if (sched_rt_runtime_exceeded(rt_rq)) + exceeded = sched_rt_runtime_exceeded(rt_rq); + if (exceeded) resched_curr(rq); raw_spin_unlock(&rt_rq->rt_runtime_lock); + if (exceeded) + do_start_rt_bandwidth(sched_rt_bandwidth(rt_rq)); } } } @@ -1676,6 +1685,7 @@ static inline void set_next_task_rt(struct rq *rq, struct task_struct *p, bool f */ if (rq->curr->sched_class != &rt_sched_class) update_rt_rq_load_avg(rq_clock_pelt(rq), rq, 0); + trace_android_rvh_update_rt_rq_load_avg(rq_clock_pelt(rq), rq, p, 0); rt_queue_push_tasks(rq); } @@ -1728,6 +1738,7 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p) update_curr_rt(rq); update_rt_rq_load_avg(rq_clock_pelt(rq), rq, 1); + trace_android_rvh_update_rt_rq_load_avg(rq_clock_pelt(rq), rq, p, 1); /* * The previous task needs to be made eligible for pushing @@ -2491,6 +2502,7 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued) update_curr_rt(rq); update_rt_rq_load_avg(rq_clock_pelt(rq), rq, 1); + trace_android_rvh_update_rt_rq_load_avg(rq_clock_pelt(rq), rq, p, 1); watchdog(rq, p); @@ -2821,8 +2833,12 @@ static int sched_rt_global_validate(void) static void sched_rt_do_global(void) { + unsigned long flags; + + raw_spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags); def_rt_bandwidth.rt_runtime = global_rt_runtime(); def_rt_bandwidth.rt_period = ns_to_ktime(global_rt_period()); + raw_spin_unlock_irqrestore(&def_rt_bandwidth.rt_runtime_lock, flags); } int sched_rt_handler(struct ctl_table *table, int write, void *buffer, diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 518da844fe2a3e5d56238bdd7ce031d3d5b18aa8..317be806dd044be3e0b32266b3d23c7f9dddd111 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -79,6 +79,7 @@ #include "cpudeadline.h" #include +#include #ifdef CONFIG_SCHED_DEBUG # define SCHED_WARN_ON(x) WARN_ONCE(x, #x) @@ -2465,6 +2466,7 @@ unsigned long uclamp_rq_util_with(struct rq *rq, unsigned long util, { unsigned long min_util = 0; unsigned long max_util = 0; + unsigned long ret = 0; if (!static_branch_likely(&sched_uclamp_used)) return util; @@ -2492,6 +2494,10 @@ out: if (unlikely(min_util >= max_util)) return min_util; + trace_android_rvh_uclamp_rq_util_with(util, min_util, max_util, &ret); + if (ret) + return ret; + return clamp(util, min_util, max_util); } diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c index b9a654bba1b7dffad8c58d2c34ddeedeb458c7b9..9227887547d60c88fd7753eb3474a9352e953b6e 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c @@ -1542,66 +1542,58 @@ static void init_numa_topology_type(void) } } + +#define NR_DISTANCE_VALUES (1 << DISTANCE_BITS) + void sched_init_numa(void) { - int next_distance, curr_distance = node_distance(0, 0); struct sched_domain_topology_level *tl; - int level = 0; - int i, j, k; - - sched_domains_numa_distance = kzalloc(sizeof(int) * (nr_node_ids + 1), GFP_KERNEL); - if (!sched_domains_numa_distance) - return; - - /* Includes NUMA identity node at level 0. */ - sched_domains_numa_distance[level++] = curr_distance; - sched_domains_numa_levels = level; + unsigned long *distance_map; + int nr_levels = 0; + int i, j; /* * O(nr_nodes^2) deduplicating selection sort -- in order to find the * unique distances in the node_distance() table. - * - * Assumes node_distance(0,j) includes all distances in - * node_distance(i,j) in order to avoid cubic time. */ - next_distance = curr_distance; + distance_map = bitmap_alloc(NR_DISTANCE_VALUES, GFP_KERNEL); + if (!distance_map) + return; + + bitmap_zero(distance_map, NR_DISTANCE_VALUES); for (i = 0; i < nr_node_ids; i++) { for (j = 0; j < nr_node_ids; j++) { - for (k = 0; k < nr_node_ids; k++) { - int distance = node_distance(i, k); - - if (distance > curr_distance && - (distance < next_distance || - next_distance == curr_distance)) - next_distance = distance; - - /* - * While not a strong assumption it would be nice to know - * about cases where if node A is connected to B, B is not - * equally connected to A. - */ - if (sched_debug() && node_distance(k, i) != distance) - sched_numa_warn("Node-distance not symmetric"); + int distance = node_distance(i, j); - if (sched_debug() && i && !find_numa_distance(distance)) - sched_numa_warn("Node-0 not representative"); + if (distance < LOCAL_DISTANCE || distance >= NR_DISTANCE_VALUES) { + sched_numa_warn("Invalid distance value range"); + return; } - if (next_distance != curr_distance) { - sched_domains_numa_distance[level++] = next_distance; - sched_domains_numa_levels = level; - curr_distance = next_distance; - } else break; + + bitmap_set(distance_map, distance, 1); } + } + /* + * We can now figure out how many unique distance values there are and + * allocate memory accordingly. + */ + nr_levels = bitmap_weight(distance_map, NR_DISTANCE_VALUES); - /* - * In case of sched_debug() we verify the above assumption. - */ - if (!sched_debug()) - break; + sched_domains_numa_distance = kcalloc(nr_levels, sizeof(int), GFP_KERNEL); + if (!sched_domains_numa_distance) { + bitmap_free(distance_map); + return; + } + + for (i = 0, j = 0; i < nr_levels; i++, j++) { + j = find_next_bit(distance_map, NR_DISTANCE_VALUES, j); + sched_domains_numa_distance[i] = j; } + bitmap_free(distance_map); + /* - * 'level' contains the number of unique distances + * 'nr_levels' contains the number of unique distances * * The sched_domains_numa_distance[] array includes the actual distance * numbers. @@ -1610,15 +1602,15 @@ void sched_init_numa(void) /* * Here, we should temporarily reset sched_domains_numa_levels to 0. * If it fails to allocate memory for array sched_domains_numa_masks[][], - * the array will contain less then 'level' members. This could be + * the array will contain less then 'nr_levels' members. This could be * dangerous when we use it to iterate array sched_domains_numa_masks[][] * in other functions. * - * We reset it to 'level' at the end of this function. + * We reset it to 'nr_levels' at the end of this function. */ sched_domains_numa_levels = 0; - sched_domains_numa_masks = kzalloc(sizeof(void *) * level, GFP_KERNEL); + sched_domains_numa_masks = kzalloc(sizeof(void *) * nr_levels, GFP_KERNEL); if (!sched_domains_numa_masks) return; @@ -1626,7 +1618,7 @@ void sched_init_numa(void) * Now for each level, construct a mask per node which contains all * CPUs of nodes that are that many hops away from us. */ - for (i = 0; i < level; i++) { + for (i = 0; i < nr_levels; i++) { sched_domains_numa_masks[i] = kzalloc(nr_node_ids * sizeof(void *), GFP_KERNEL); if (!sched_domains_numa_masks[i]) @@ -1634,12 +1626,17 @@ void sched_init_numa(void) for (j = 0; j < nr_node_ids; j++) { struct cpumask *mask = kzalloc(cpumask_size(), GFP_KERNEL); + int k; + if (!mask) return; sched_domains_numa_masks[i][j] = mask; for_each_node(k) { + if (sched_debug() && (node_distance(j, k) != node_distance(k, j))) + sched_numa_warn("Node-distance not symmetric"); + if (node_distance(j, k) > sched_domains_numa_distance[i]) continue; @@ -1651,7 +1648,7 @@ void sched_init_numa(void) /* Compute default topology size */ for (i = 0; sched_domain_topology[i].mask; i++); - tl = kzalloc((i + level + 1) * + tl = kzalloc((i + nr_levels + 1) * sizeof(struct sched_domain_topology_level), GFP_KERNEL); if (!tl) return; @@ -1674,7 +1671,7 @@ void sched_init_numa(void) /* * .. and append 'j' levels of NUMA goodness. */ - for (j = 1; j < level; i++, j++) { + for (j = 1; j < nr_levels; i++, j++) { tl[i] = (struct sched_domain_topology_level){ .mask = sd_numa_mask, .sd_flags = cpu_numa_flags, @@ -1686,8 +1683,8 @@ void sched_init_numa(void) sched_domain_topology = tl; - sched_domains_numa_levels = level; - sched_max_numa_distance = sched_domains_numa_distance[level - 1]; + sched_domains_numa_levels = nr_levels; + sched_max_numa_distance = sched_domains_numa_distance[nr_levels - 1]; init_numa_topology_type(); } diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c index bc694780a6ba567f526d7aae1d5da539d99a2ad8..c4f324ad035c67b3a7c6a0a136725627e02637bc 100644 --- a/kernel/sched/wait.c +++ b/kernel/sched/wait.c @@ -227,6 +227,13 @@ void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode) } EXPORT_SYMBOL_GPL(__wake_up_sync); /* For internal use only */ +void __wake_up_pollfree(struct wait_queue_head *wq_head) +{ + __wake_up(wq_head, TASK_NORMAL, 0, poll_to_key(EPOLLHUP | POLLFREE)); + /* POLLFREE must have cleared the queue. */ + WARN_ON_ONCE(waitqueue_active(wq_head)); +} + /* * Note: we use "set_current_state()" _after_ the wait-queue add, * because we need a memory barrier there on SMP, so that any diff --git a/kernel/scs.c b/kernel/scs.c index e2a71fc82fa0686c48bb62f7d559e47445096237..b7e1b096d90601bfb2dc6a2a3646c2928d22dc1d 100644 --- a/kernel/scs.c +++ b/kernel/scs.c @@ -32,15 +32,19 @@ static void *__scs_alloc(int node) for (i = 0; i < NR_CACHED_SCS; i++) { s = this_cpu_xchg(scs_cache[i], NULL); if (s) { - kasan_unpoison_vmalloc(s, SCS_SIZE); + s = kasan_unpoison_vmalloc(s, SCS_SIZE, + KASAN_VMALLOC_PROT_NORMAL); memset(s, 0, SCS_SIZE); - return s; + goto out; } } - return __vmalloc_node_range(SCS_SIZE, 1, VMALLOC_START, VMALLOC_END, + s = __vmalloc_node_range(SCS_SIZE, 1, VMALLOC_START, VMALLOC_END, GFP_SCS, PAGE_KERNEL, 0, node, __builtin_return_address(0)); + +out: + return kasan_reset_tag(s); } void *scs_alloc(int node) @@ -78,6 +82,7 @@ void scs_free(void *s) if (this_cpu_cmpxchg(scs_cache[i], 0, s) == NULL) return; + kasan_unpoison_vmalloc(s, SCS_SIZE, KASAN_VMALLOC_PROT_NORMAL); vfree_atomic(s); } diff --git a/kernel/signal.c b/kernel/signal.c index cae67cc698d7b1c1a75f8d8e7cce36864e9d1c81..7814560f8637e742a9ec89adea6c06449f0b05d1 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2109,15 +2109,6 @@ static inline bool may_ptrace_stop(void) return true; } -/* - * Return non-zero if there is a SIGKILL that should be waking us up. - * Called with the siglock held. - */ -static bool sigkill_pending(struct task_struct *tsk) -{ - return sigismember(&tsk->pending.signal, SIGKILL) || - sigismember(&tsk->signal->shared_pending.signal, SIGKILL); -} /* * This must be called with current->sighand->siglock held. @@ -2144,17 +2135,16 @@ static void ptrace_stop(int exit_code, int why, int clear_code, kernel_siginfo_t * calling arch_ptrace_stop, so we must release it now. * To preserve proper semantics, we must do this before * any signal bookkeeping like checking group_stop_count. - * Meanwhile, a SIGKILL could come in before we retake the - * siglock. That must prevent us from sleeping in TASK_TRACED. - * So after regaining the lock, we must check for SIGKILL. */ spin_unlock_irq(¤t->sighand->siglock); arch_ptrace_stop(exit_code, info); spin_lock_irq(¤t->sighand->siglock); - if (sigkill_pending(current)) - return; } + /* + * schedule() will not sleep if there is a pending signal that + * can awaken the task. + */ set_special_state(TASK_TRACED); /* diff --git a/kernel/smpboot.c b/kernel/smpboot.c index f25208e8df8365e090cedf887638b3e0b00e92bc..e4163042c4d6658d4fcf192bfcac04e8c2ddb5cf 100644 --- a/kernel/smpboot.c +++ b/kernel/smpboot.c @@ -33,7 +33,6 @@ struct task_struct *idle_thread_get(unsigned int cpu) if (!tsk) return ERR_PTR(-ENOMEM); - init_idle(tsk, cpu); return tsk; } diff --git a/kernel/stackleak.c b/kernel/stackleak.c index ce161a8e8d97585c8b1152fec1ea088343cf15d8..dd07239ddff9f4ba562b7e46d9fcc4af0679fd7e 100644 --- a/kernel/stackleak.c +++ b/kernel/stackleak.c @@ -48,7 +48,7 @@ int stack_erasing_sysctl(struct ctl_table *table, int write, #define skip_erasing() false #endif /* CONFIG_STACKLEAK_RUNTIME_DISABLE */ -asmlinkage void notrace stackleak_erase(void) +asmlinkage void noinstr stackleak_erase(void) { /* It would be nice not to have 'kstack_ptr' and 'boundary' on stack */ unsigned long kstack_ptr = current->lowest_stack; @@ -102,9 +102,8 @@ asmlinkage void notrace stackleak_erase(void) /* Reset the 'lowest_stack' value for the next syscall */ current->lowest_stack = current_top_of_stack() - THREAD_SIZE/64; } -NOKPROBE_SYMBOL(stackleak_erase); -void __used __no_caller_saved_registers notrace stackleak_track_stack(void) +void __used __no_caller_saved_registers noinstr stackleak_track_stack(void) { unsigned long sp = current_stack_pointer; diff --git a/kernel/sys.c b/kernel/sys.c index 980e3abf9a850ceb4935dcc3bbae449dcdaba447..b9c690c534345df91030b6fcc3d8cc305e7e5189 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -7,6 +7,7 @@ #include #include +#include #include #include #include @@ -42,8 +43,6 @@ #include #include #include -#include -#include #include #include @@ -2281,154 +2280,71 @@ int __weak arch_prctl_spec_ctrl_set(struct task_struct *t, unsigned long which, return -EINVAL; } -#ifdef CONFIG_MMU -static int prctl_update_vma_anon_name(struct vm_area_struct *vma, - struct vm_area_struct **prev, - unsigned long start, unsigned long end, - const char __user *name_addr) -{ - struct mm_struct *mm = vma->vm_mm; - int error = 0; - pgoff_t pgoff; - - if (name_addr == vma_get_anon_name(vma)) { - *prev = vma; - goto out; - } - - pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); - *prev = vma_merge(mm, *prev, start, end, vma->vm_flags, vma->anon_vma, - vma->vm_file, pgoff, vma_policy(vma), - vma->vm_userfaultfd_ctx, name_addr); - if (*prev) { - vma = *prev; - goto success; - } - - *prev = vma; - - if (start != vma->vm_start) { - error = split_vma(mm, vma, start, 1); - if (error) - goto out; - } - - if (end != vma->vm_end) { - error = split_vma(mm, vma, end, 0); - if (error) - goto out; - } +#define PR_IO_FLUSHER (PF_MEMALLOC_NOIO | PF_LOCAL_THROTTLE) -success: - if (!vma->vm_file) - vma->anon_name = name_addr; +#ifdef CONFIG_ANON_VMA_NAME -out: - if (error == -ENOMEM) - error = -EAGAIN; - return error; -} +#define ANON_VMA_NAME_MAX_LEN 256 +#define ANON_VMA_NAME_INVALID_CHARS "\\`$[]" -static int prctl_set_vma_anon_name(unsigned long start, unsigned long end, - unsigned long arg) +static inline bool is_valid_name_char(char ch) { - unsigned long tmp; - struct vm_area_struct *vma, *prev; - int unmapped_error = 0; - int error = -EINVAL; - - /* - * If the interval [start,end) covers some unmapped address - * ranges, just ignore them, but return -ENOMEM at the end. - * - this matches the handling in madvise. - */ - vma = find_vma_prev(current->mm, start, &prev); - if (vma && start > vma->vm_start) - prev = vma; - - for (;;) { - /* Still start < end. */ - error = -ENOMEM; - if (!vma) - return error; - - /* Here start < (end|vma->vm_end). */ - if (start < vma->vm_start) { - unmapped_error = -ENOMEM; - start = vma->vm_start; - if (start >= end) - return error; - } - - /* Here vma->vm_start <= start < (end|vma->vm_end) */ - tmp = vma->vm_end; - if (end < tmp) - tmp = end; - - /* Here vma->vm_start <= start < tmp <= (end|vma->vm_end). */ - error = prctl_update_vma_anon_name(vma, &prev, start, tmp, - (const char __user *)arg); - if (error) - return error; - start = tmp; - if (prev && start < prev->vm_end) - start = prev->vm_end; - error = unmapped_error; - if (start >= end) - return error; - if (prev) - vma = prev->vm_next; - else /* madvise_remove dropped mmap_lock */ - vma = find_vma(current->mm, start); - } + /* printable ascii characters, excluding ANON_VMA_NAME_INVALID_CHARS */ + return ch > 0x1f && ch < 0x7f && + !strchr(ANON_VMA_NAME_INVALID_CHARS, ch); } -static int prctl_set_vma(unsigned long opt, unsigned long start, - unsigned long len_in, unsigned long arg) +static int prctl_set_vma(unsigned long opt, unsigned long addr, + unsigned long size, unsigned long arg) { struct mm_struct *mm = current->mm; + const char __user *uname; + struct anon_vma_name *anon_name = NULL; int error; - unsigned long len; - unsigned long end; - - if (start & ~PAGE_MASK) - return -EINVAL; - len = (len_in + ~PAGE_MASK) & PAGE_MASK; - - /* Check to see whether len was rounded up from small -ve to zero */ - if (len_in && !len) - return -EINVAL; - - end = start + len; - if (end < start) - return -EINVAL; - - if (end == start) - return 0; - - mmap_write_lock(mm); switch (opt) { case PR_SET_VMA_ANON_NAME: - error = prctl_set_vma_anon_name(start, end, arg); + uname = (const char __user *)arg; + if (uname) { + char *name, *pch; + + name = strndup_user(uname, ANON_VMA_NAME_MAX_LEN); + if (IS_ERR(name)) + return PTR_ERR(name); + + for (pch = name; *pch != '\0'; pch++) { + if (!is_valid_name_char(*pch)) { + kfree(name); + return -EINVAL; + } + } + /* anon_vma has its own copy */ + anon_name = anon_vma_name_alloc(name); + kfree(name); + if (!anon_name) + return -ENOMEM; + + } + + mmap_write_lock(mm); + error = madvise_set_anon_name(mm, addr, size, anon_name); + mmap_write_unlock(mm); + anon_vma_name_put(anon_name); break; default: error = -EINVAL; } - mmap_write_unlock(mm); - return error; } -#else /* CONFIG_MMU */ + +#else /* CONFIG_ANON_VMA_NAME */ static int prctl_set_vma(unsigned long opt, unsigned long start, - unsigned long len_in, unsigned long arg) + unsigned long size, unsigned long arg) { return -EINVAL; } -#endif - -#define PR_IO_FLUSHER (PF_MEMALLOC_NOIO | PF_LOCAL_THROTTLE) +#endif /* CONFIG_ANON_VMA_NAME */ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, unsigned long, arg4, unsigned long, arg5) @@ -2642,9 +2558,6 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, return -EINVAL; error = arch_prctl_spec_ctrl_set(me, arg2, arg3); break; - case PR_SET_VMA: - error = prctl_set_vma(arg2, arg3, arg4, arg5); - break; case PR_PAC_RESET_KEYS: if (arg3 || arg4 || arg5) return -EINVAL; @@ -2693,6 +2606,9 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, error = (current->flags & PR_IO_FLUSHER) == PR_IO_FLUSHER; break; + case PR_SET_VMA: + error = prctl_set_vma(arg2, arg3, arg4, arg5); + break; default: error = -EINVAL; break; diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index f27ac94d5fa7270ba9f272be1bcba6a903149206..6b8203edf5310d5faa7a7c07adbb0dcfe3e6a56e 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -281,6 +281,7 @@ COND_SYSCALL(munlockall); COND_SYSCALL(mincore); COND_SYSCALL(madvise); COND_SYSCALL(process_madvise); +COND_SYSCALL(process_mrelease); COND_SYSCALL(remap_file_pages); COND_SYSCALL(mbind); COND_SYSCALL_COMPAT(mbind); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 7aed10c7dea962dcde94049ffdbb1c1b07a0e4c4..e68cc71b9f8f2ce35a065af44c821270c0ed56fc 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -107,7 +107,6 @@ #if defined(CONFIG_SYSCTL) /* External variables not in a header file. */ -extern int extra_free_kbytes; /* Constants used for minimum and maximum */ #ifdef CONFIG_LOCKUP_DETECTOR @@ -122,7 +121,10 @@ static unsigned long one_ul = 1; static unsigned long long_max = LONG_MAX; static int one_hundred = 100; static int two_hundred = 200; +#ifdef CONFIG_PERF_EVENTS static int one_thousand = 1000; +#endif +static int three_thousand = 3000; #ifdef CONFIG_PRINTK static int ten_thousand = 10000; #endif @@ -236,7 +238,34 @@ static int bpf_stats_handler(struct ctl_table *table, int write, mutex_unlock(&bpf_stats_enabled_mutex); return ret; } -#endif + +void __weak unpriv_ebpf_notify(int new_state) +{ +} + +static int bpf_unpriv_handler(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + int ret, unpriv_enable = *(int *)table->data; + bool locked_state = unpriv_enable == 1; + struct ctl_table tmp = *table; + + if (write && !capable(CAP_SYS_ADMIN)) + return -EPERM; + + tmp.data = &unpriv_enable; + ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); + if (write && !ret) { + if (locked_state && unpriv_enable != 1) + return -EPERM; + *(int *)table->data = unpriv_enable; + } + + unpriv_ebpf_notify(unpriv_enable); + + return ret; +} +#endif /* CONFIG_BPF_SYSCALL && CONFIG_SYSCTL */ /* * /proc/sys support @@ -2629,10 +2658,9 @@ static struct ctl_table kern_table[] = { .data = &sysctl_unprivileged_bpf_disabled, .maxlen = sizeof(sysctl_unprivileged_bpf_disabled), .mode = 0644, - /* only handle a transition from default "0" to "1" */ - .proc_handler = proc_dointvec_minmax, - .extra1 = SYSCTL_ONE, - .extra2 = SYSCTL_ONE, + .proc_handler = bpf_unpriv_handler, + .extra1 = SYSCTL_ZERO, + .extra2 = &two, }, { .procname = "bpf_stats_enabled", @@ -2905,15 +2933,7 @@ static struct ctl_table vm_table[] = { .mode = 0644, .proc_handler = watermark_scale_factor_sysctl_handler, .extra1 = SYSCTL_ONE, - .extra2 = &one_thousand, - }, - { - .procname = "extra_free_kbytes", - .data = &extra_free_kbytes, - .maxlen = sizeof(extra_free_kbytes), - .mode = 0644, - .proc_handler = min_free_kbytes_sysctl_handler, - .extra1 = SYSCTL_ZERO, + .extra2 = &three_thousand, }, { .procname = "percpu_pagelist_fraction", diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 1657b5e74c64cf9efd44a220dc38a4a6512e3226..6dce59def3ea8143d946ffca410b3fa419bfef58 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -93,6 +93,20 @@ static char override_name[CS_NAME_LEN]; static int finished_booting; static u64 suspend_start; +/* + * Threshold: 0.0312s, when doubled: 0.0625s. + * Also a default for cs->uncertainty_margin when registering clocks. + */ +#define WATCHDOG_THRESHOLD (NSEC_PER_SEC >> 5) + +/* + * Maximum permissible delay between two readouts of the watchdog + * clocksource surrounding a read of the clocksource being validated. + * This delay could be due to SMIs, NMIs, or to VCPU preemptions. Used as + * a lower bound for cs->uncertainty_margin values when registering clocks. + */ +#define WATCHDOG_MAX_SKEW (100 * NSEC_PER_USEC) + #ifdef CONFIG_CLOCKSOURCE_WATCHDOG static void clocksource_watchdog_work(struct work_struct *work); static void clocksource_select(void); @@ -119,17 +133,9 @@ static int clocksource_watchdog_kthread(void *data); static void __clocksource_change_rating(struct clocksource *cs, int rating); /* - * Interval: 0.5sec Threshold: 0.0625s + * Interval: 0.5sec. */ #define WATCHDOG_INTERVAL (HZ >> 1) -#define WATCHDOG_THRESHOLD (NSEC_PER_SEC >> 4) - -/* - * Maximum permissible delay between two readouts of the watchdog - * clocksource surrounding a read of the clocksource being validated. - * This delay could be due to SMIs, NMIs, or to VCPU preemptions. - */ -#define WATCHDOG_MAX_SKEW (100 * NSEC_PER_USEC) static void clocksource_watchdog_work(struct work_struct *work) { @@ -194,17 +200,24 @@ void clocksource_mark_unstable(struct clocksource *cs) static ulong max_cswd_read_retries = 3; module_param(max_cswd_read_retries, ulong, 0644); -static bool cs_watchdog_read(struct clocksource *cs, u64 *csnow, u64 *wdnow) +enum wd_read_status { + WD_READ_SUCCESS, + WD_READ_UNSTABLE, + WD_READ_SKIP +}; + +static enum wd_read_status cs_watchdog_read(struct clocksource *cs, u64 *csnow, u64 *wdnow) { unsigned int nretries; - u64 wd_end, wd_delta; - int64_t wd_delay; + u64 wd_end, wd_end2, wd_delta; + int64_t wd_delay, wd_seq_delay; for (nretries = 0; nretries <= max_cswd_read_retries; nretries++) { local_irq_disable(); *wdnow = watchdog->read(watchdog); *csnow = cs->read(cs); wd_end = watchdog->read(watchdog); + wd_end2 = watchdog->read(watchdog); local_irq_enable(); wd_delta = clocksource_delta(wd_end, *wdnow, watchdog->mask); @@ -215,13 +228,34 @@ static bool cs_watchdog_read(struct clocksource *cs, u64 *csnow, u64 *wdnow) pr_warn("timekeeping watchdog on CPU%d: %s retried %d times before success\n", smp_processor_id(), watchdog->name, nretries); } - return true; + return WD_READ_SUCCESS; } + + /* + * Now compute delay in consecutive watchdog read to see if + * there is too much external interferences that cause + * significant delay in reading both clocksource and watchdog. + * + * If consecutive WD read-back delay > WATCHDOG_MAX_SKEW/2, + * report system busy, reinit the watchdog and skip the current + * watchdog test. + */ + wd_delta = clocksource_delta(wd_end2, wd_end, watchdog->mask); + wd_seq_delay = clocksource_cyc2ns(wd_delta, watchdog->mult, watchdog->shift); + if (wd_seq_delay > WATCHDOG_MAX_SKEW/2) + goto skip_test; } pr_warn("timekeeping watchdog on CPU%d: %s read-back delay of %lldns, attempt %d, marking unstable\n", smp_processor_id(), watchdog->name, wd_delay, nretries); - return false; + return WD_READ_UNSTABLE; + +skip_test: + pr_info("timekeeping watchdog on CPU%d: %s wd-wd read-back delay of %lldns\n", + smp_processor_id(), watchdog->name, wd_seq_delay); + pr_info("wd-%s-wd read-back delay of %lldns, clock-skew test skipped!\n", + cs->name, wd_delay); + return WD_READ_SKIP; } static u64 csnow_mid; @@ -284,6 +318,8 @@ static void clocksource_watchdog(struct timer_list *unused) int next_cpu, reset_pending; int64_t wd_nsec, cs_nsec; struct clocksource *cs; + enum wd_read_status read_ret; + u32 md; spin_lock(&watchdog_lock); if (!watchdog_running) @@ -300,9 +336,12 @@ static void clocksource_watchdog(struct timer_list *unused) continue; } - if (!cs_watchdog_read(cs, &csnow, &wdnow)) { - /* Clock readout unreliable, so give it up. */ - __clocksource_unstable(cs); + read_ret = cs_watchdog_read(cs, &csnow, &wdnow); + + if (read_ret != WD_READ_SUCCESS) { + if (read_ret == WD_READ_UNSTABLE) + /* Clock readout unreliable, so give it up. */ + __clocksource_unstable(cs); continue; } @@ -330,7 +369,8 @@ static void clocksource_watchdog(struct timer_list *unused) continue; /* Check the deviation from the watchdog clocksource. */ - if (abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD) { + md = cs->uncertainty_margin + watchdog->uncertainty_margin; + if (abs(cs_nsec - wd_nsec) > md) { pr_warn("timekeeping watchdog on CPU%d: Marking clocksource '%s' as unstable because the skew is too large:\n", smp_processor_id(), cs->name); pr_warn(" '%s' wd_now: %llx wd_last: %llx mask: %llx\n", @@ -985,6 +1025,26 @@ void __clocksource_update_freq_scale(struct clocksource *cs, u32 scale, u32 freq clocks_calc_mult_shift(&cs->mult, &cs->shift, freq, NSEC_PER_SEC / scale, sec * scale); } + + /* + * If the uncertainty margin is not specified, calculate it. + * If both scale and freq are non-zero, calculate the clock + * period, but bound below at 2*WATCHDOG_MAX_SKEW. However, + * if either of scale or freq is zero, be very conservative and + * take the tens-of-milliseconds WATCHDOG_THRESHOLD value for the + * uncertainty margin. Allow stupidly small uncertainty margins + * to be specified by the caller for testing purposes, but warn + * to discourage production use of this capability. + */ + if (scale && freq && !cs->uncertainty_margin) { + cs->uncertainty_margin = NSEC_PER_SEC / (scale * freq); + if (cs->uncertainty_margin < 2 * WATCHDOG_MAX_SKEW) + cs->uncertainty_margin = 2 * WATCHDOG_MAX_SKEW; + } else if (!cs->uncertainty_margin) { + cs->uncertainty_margin = WATCHDOG_THRESHOLD; + } + WARN_ON_ONCE(cs->uncertainty_margin < 2 * WATCHDOG_MAX_SKEW); + /* * Ensure clocksources that have large 'mult' values don't overflow * when adjusted. diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c index eddcf497044459daa04d9370ccd1b7379db31997..65409abcca8e111368bc86983e5b0a72a191ae7d 100644 --- a/kernel/time/jiffies.c +++ b/kernel/time/jiffies.c @@ -49,13 +49,14 @@ static u64 jiffies_read(struct clocksource *cs) * for "tick-less" systems. */ static struct clocksource clocksource_jiffies = { - .name = "jiffies", - .rating = 1, /* lowest valid rating*/ - .read = jiffies_read, - .mask = CLOCKSOURCE_MASK(32), - .mult = TICK_NSEC << JIFFIES_SHIFT, /* details above */ - .shift = JIFFIES_SHIFT, - .max_cycles = 10, + .name = "jiffies", + .rating = 1, /* lowest valid rating*/ + .uncertainty_margin = 32 * NSEC_PER_MSEC, + .read = jiffies_read, + .mask = CLOCKSOURCE_MASK(32), + .mult = TICK_NSEC << JIFFIES_SHIFT, /* details above */ + .shift = JIFFIES_SHIFT, + .max_cycles = 10, }; __cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(jiffies_lock); diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index 08c033b80256972dd5bb482d80f11864052b8cbd..5d76edd0ad9c26d477e6a2035d668c882d177f61 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -1100,14 +1100,29 @@ static void posix_cpu_timers_work(struct callback_head *work) handle_posix_cpu_timers(current); } +/* + * Clear existing posix CPU timers task work. + */ +void clear_posix_cputimers_work(struct task_struct *p) +{ + /* + * A copied work entry from the old task is not meaningful, clear it. + * N.B. init_task_work will not do this. + */ + memset(&p->posix_cputimers_work.work, 0, + sizeof(p->posix_cputimers_work.work)); + init_task_work(&p->posix_cputimers_work.work, + posix_cpu_timers_work); + p->posix_cputimers_work.scheduled = false; +} + /* * Initialize posix CPU timers task work in init task. Out of line to * keep the callback static and to avoid header recursion hell. */ void __init posix_cputimers_init_work(void) { - init_task_work(¤t->posix_cputimers_work.work, - posix_cpu_timers_work); + clear_posix_cputimers_work(current); } /* diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index eb04a2d749116b263cf71a62951dc6182dacecf2..22ef5cf17a48fbb5dc3e044f9a78bddbe0ee41c8 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1311,8 +1311,7 @@ int do_settimeofday64(const struct timespec64 *ts) timekeeping_forward_now(tk); xt = tk_xtime(tk); - ts_delta.tv_sec = ts->tv_sec - xt.tv_sec; - ts_delta.tv_nsec = ts->tv_nsec - xt.tv_nsec; + ts_delta = timespec64_sub(*ts, xt); if (timespec64_compare(&tk->wall_to_monotonic, &ts_delta) > 0) { ret = -EINVAL; diff --git a/kernel/time/timer.c b/kernel/time/timer.c index ed71c41410472cd7615f1d43091e5eeb30ac8cfd..1535065a7d76ad96c129e91d87f9f8909d5c1b1a 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -2053,26 +2053,44 @@ unsigned long msleep_interruptible(unsigned int msecs) EXPORT_SYMBOL(msleep_interruptible); /** - * usleep_range - Sleep for an approximate time - * @min: Minimum time in usecs to sleep - * @max: Maximum time in usecs to sleep + * usleep_range_state - Sleep for an approximate time in a given state + * @min: Minimum time in usecs to sleep + * @max: Maximum time in usecs to sleep + * @state: State of the current task that will be while sleeping * * In non-atomic context where the exact wakeup time is flexible, use - * usleep_range() instead of udelay(). The sleep improves responsiveness + * usleep_range_state() instead of udelay(). The sleep improves responsiveness * by avoiding the CPU-hogging busy-wait of udelay(), and the range reduces * power usage by allowing hrtimers to take advantage of an already- * scheduled interrupt instead of scheduling a new one just for this sleep. */ -void __sched usleep_range(unsigned long min, unsigned long max) +void __sched usleep_range_state(unsigned long min, unsigned long max, + unsigned int state) { ktime_t exp = ktime_add_us(ktime_get(), min); u64 delta = (u64)(max - min) * NSEC_PER_USEC; for (;;) { - __set_current_state(TASK_UNINTERRUPTIBLE); + __set_current_state(state); /* Do not return before the requested sleep time has elapsed */ if (!schedule_hrtimeout_range(&exp, delta, HRTIMER_MODE_ABS)) break; } } + +/** + * usleep_range - Sleep for an approximate time + * @min: Minimum time in usecs to sleep + * @max: Maximum time in usecs to sleep + * + * In non-atomic context where the exact wakeup time is flexible, use + * usleep_range() instead of udelay(). The sleep improves responsiveness + * by avoiding the CPU-hogging busy-wait of udelay(), and the range reduces + * power usage by allowing hrtimers to take advantage of an already- + * scheduled interrupt instead of scheduling a new one just for this sleep. + */ +void __sched usleep_range(unsigned long min, unsigned long max) +{ + usleep_range_state(min, max, TASK_UNINTERRUPTIBLE); +} EXPORT_SYMBOL(usleep_range); diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index ba644760f50767ac4f9848414239432913e2c9b6..a9e074769881f7bd92432e4cfa358a40c1d12bbf 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1517,9 +1517,6 @@ static const struct bpf_func_proto bpf_perf_prog_read_value_proto = { BPF_CALL_4(bpf_read_branch_records, struct bpf_perf_event_data_kern *, ctx, void *, buf, u32, size, u64, flags) { -#ifndef CONFIG_X86 - return -ENOENT; -#else static const u32 br_entry_size = sizeof(struct perf_branch_entry); struct perf_branch_stack *br_stack = ctx->data->br_stack; u32 to_copy; @@ -1528,7 +1525,7 @@ BPF_CALL_4(bpf_read_branch_records, struct bpf_perf_event_data_kern *, ctx, return -EINVAL; if (unlikely(!br_stack)) - return -EINVAL; + return -ENOENT; if (flags & BPF_F_GET_BRANCH_RECORDS_SIZE) return br_stack->nr * br_entry_size; @@ -1540,7 +1537,6 @@ BPF_CALL_4(bpf_read_branch_records, struct bpf_perf_event_data_kern *, ctx, memcpy(buf, br_stack->entries, to_copy); return to_copy; -#endif } static const struct bpf_func_proto bpf_read_branch_records_proto = { diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 30010614b9237a55b3e1238b9463b6aa380b0579..4a5d35dc490b2d4cbfd3685dee71fc12df18dace 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -6985,7 +6985,7 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op; int bit; - bit = trace_test_and_set_recursion(TRACE_LIST_START, TRACE_LIST_MAX); + bit = trace_test_and_set_recursion(TRACE_LIST_START); if (bit < 0) return; @@ -7060,7 +7060,7 @@ static void ftrace_ops_assist_func(unsigned long ip, unsigned long parent_ip, { int bit; - bit = trace_test_and_set_recursion(TRACE_LIST_START, TRACE_LIST_MAX); + bit = trace_test_and_set_recursion(TRACE_LIST_START); if (bit < 0) return; diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 39d4d9b25d1a10e9b00c7ec1c240e144d92f0d72..6deac666ba3e4dab5efdd750d052edcf8b2bfbaa 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -5000,6 +5000,9 @@ void ring_buffer_reset(struct trace_buffer *buffer) struct ring_buffer_per_cpu *cpu_buffer; int cpu; + /* prevent another thread from changing buffer sizes */ + mutex_lock(&buffer->mutex); + for_each_buffer_cpu(buffer, cpu) { cpu_buffer = buffer->buffers[cpu]; @@ -5018,6 +5021,8 @@ void ring_buffer_reset(struct trace_buffer *buffer) atomic_dec(&cpu_buffer->record_disabled); atomic_dec(&cpu_buffer->resize_disabled); } + + mutex_unlock(&buffer->mutex); } EXPORT_SYMBOL_GPL(ring_buffer_reset); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index cdf6dd5ff88dfb834b9efea9cee99a37400ed662..5c3c4663395060b9f2c817969b3ff53193ced285 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -234,7 +234,7 @@ static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata; static int __init set_trace_boot_options(char *str) { strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE); - return 0; + return 1; } __setup("trace_options=", set_trace_boot_options); @@ -245,12 +245,16 @@ static int __init set_trace_boot_clock(char *str) { strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE); trace_boot_clock = trace_boot_clock_buf; - return 0; + return 1; } __setup("trace_clock=", set_trace_boot_clock); static int __init set_tracepoint_printk(char *str) { + /* Ignore the "tp_printk_stop_on_boot" param */ + if (*str == '_') + return 0; + if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0)) tracepoint_printk = 1; return 1; @@ -1487,10 +1491,12 @@ static int __init set_buf_size(char *str) if (!str) return 0; buf_size = memparse(str, &str); - /* nr_entries can not be zero */ - if (buf_size == 0) - return 0; - trace_buf_size = buf_size; + /* + * nr_entries can not be zero and the startup + * tests require some buffer space. Therefore + * ensure we have at least 4096 bytes of buffer. + */ + trace_buf_size = max(4096UL, buf_size); return 1; } __setup("trace_buf_size=", set_buf_size); @@ -3135,7 +3141,7 @@ struct trace_buffer_struct { char buffer[4][TRACE_BUF_SIZE]; }; -static struct trace_buffer_struct *trace_percpu_buffer; +static struct trace_buffer_struct __percpu *trace_percpu_buffer; /* * Thise allows for lockless recording. If we're nested too deeply, then @@ -3145,7 +3151,7 @@ static char *get_trace_buf(void) { struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer); - if (!buffer || buffer->nesting >= 4) + if (!trace_percpu_buffer || buffer->nesting >= 4) return NULL; buffer->nesting++; @@ -3164,7 +3170,7 @@ static void put_trace_buf(void) static int alloc_percpu_trace_buffer(void) { - struct trace_buffer_struct *buffers; + struct trace_buffer_struct __percpu *buffers; if (trace_percpu_buffer) return 0; @@ -5235,6 +5241,7 @@ static const char readme_msg[] = #ifdef CONFIG_HIST_TRIGGERS " hist trigger\t- If set, event hits are aggregated into a hash table\n" "\t Format: hist:keys=\n" + "\t [:=[,=...]]\n" "\t [:values=]\n" "\t [:sort=]\n" "\t [:size=#entries]\n" @@ -5246,6 +5253,16 @@ static const char readme_msg[] = "\t common_timestamp - to record current timestamp\n" "\t common_cpu - to record the CPU the event happened on\n" "\n" + "\t A hist trigger variable can be:\n" + "\t - a reference to a field e.g. x=current_timestamp,\n" + "\t - a reference to another variable e.g. y=$x,\n" + "\t - a numeric literal: e.g. ms_per_sec=1000,\n" + "\t - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n" + "\n" + "\t hist trigger aritmethic expressions support addition(+), subtraction(-),\n" + "\t multiplication(*) and division(/) operators. An operand can be either a\n" + "\t variable reference, field or numeric literal.\n" + "\n" "\t When a matching event is hit, an entry is added to a hash\n" "\t table using the key(s) and value(s) named, and the value of a\n" "\t sum called 'hitcount' is incremented. Keys and values\n" @@ -7258,7 +7275,8 @@ static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr) err = kzalloc(sizeof(*err), GFP_KERNEL); if (!err) err = ERR_PTR(-ENOMEM); - tr->n_err_log_entries++; + else + tr->n_err_log_entries++; return err; } diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 6784b572ce59743daec7b5a437532facc6229385..8d67f7f44840048ab875ef8e3eebb4acebe1c987 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -573,18 +573,6 @@ struct tracer { * then this function calls... * The function callback, which can use the FTRACE bits to * check for recursion. - * - * Now if the arch does not support a feature, and it calls - * the global list function which calls the ftrace callback - * all three of these steps will do a recursion protection. - * There's no reason to do one if the previous caller already - * did. The recursion that we are protecting against will - * go through the same steps again. - * - * To prevent the multiple recursion checks, if a recursion - * bit is set that is higher than the MAX bit of the current - * check, then we know that the check was made by the previous - * caller, and we can skip the current check. */ enum { /* Function recursion bits */ @@ -592,12 +580,14 @@ enum { TRACE_FTRACE_NMI_BIT, TRACE_FTRACE_IRQ_BIT, TRACE_FTRACE_SIRQ_BIT, + TRACE_FTRACE_TRANSITION_BIT, - /* INTERNAL_BITs must be greater than FTRACE_BITs */ + /* Internal use recursion bits */ TRACE_INTERNAL_BIT, TRACE_INTERNAL_NMI_BIT, TRACE_INTERNAL_IRQ_BIT, TRACE_INTERNAL_SIRQ_BIT, + TRACE_INTERNAL_TRANSITION_BIT, TRACE_BRANCH_BIT, /* @@ -637,12 +627,6 @@ enum { * function is called to clear it. */ TRACE_GRAPH_NOTRACE_BIT, - - /* - * When transitioning between context, the preempt_count() may - * not be correct. Allow for a single recursion to cover this case. - */ - TRACE_TRANSITION_BIT, }; #define trace_recursion_set(bit) do { (current)->trace_recursion |= (1<<(bit)); } while (0) @@ -662,12 +646,18 @@ enum { #define TRACE_CONTEXT_BITS 4 #define TRACE_FTRACE_START TRACE_FTRACE_BIT -#define TRACE_FTRACE_MAX ((1 << (TRACE_FTRACE_START + TRACE_CONTEXT_BITS)) - 1) #define TRACE_LIST_START TRACE_INTERNAL_BIT -#define TRACE_LIST_MAX ((1 << (TRACE_LIST_START + TRACE_CONTEXT_BITS)) - 1) -#define TRACE_CONTEXT_MASK TRACE_LIST_MAX +#define TRACE_CONTEXT_MASK ((1 << (TRACE_LIST_START + TRACE_CONTEXT_BITS)) - 1) + +enum { + TRACE_CTX_NMI, + TRACE_CTX_IRQ, + TRACE_CTX_SOFTIRQ, + TRACE_CTX_NORMAL, + TRACE_CTX_TRANSITION, +}; static __always_inline int trace_get_context_bit(void) { @@ -675,59 +665,48 @@ static __always_inline int trace_get_context_bit(void) if (in_interrupt()) { if (in_nmi()) - bit = 0; + bit = TRACE_CTX_NMI; else if (in_irq()) - bit = 1; + bit = TRACE_CTX_IRQ; else - bit = 2; + bit = TRACE_CTX_SOFTIRQ; } else - bit = 3; + bit = TRACE_CTX_NORMAL; return bit; } -static __always_inline int trace_test_and_set_recursion(int start, int max) +static __always_inline int trace_test_and_set_recursion(int start) { unsigned int val = current->trace_recursion; int bit; - /* A previous recursion check was made */ - if ((val & TRACE_CONTEXT_MASK) > max) - return 0; - bit = trace_get_context_bit() + start; if (unlikely(val & (1 << bit))) { /* * It could be that preempt_count has not been updated during * a switch between contexts. Allow for a single recursion. */ - bit = TRACE_TRANSITION_BIT; + bit = start + TRACE_CTX_TRANSITION; if (trace_recursion_test(bit)) return -1; trace_recursion_set(bit); barrier(); - return bit + 1; + return bit; } - /* Normal check passed, clear the transition to allow it again */ - trace_recursion_clear(TRACE_TRANSITION_BIT); - val |= 1 << bit; current->trace_recursion = val; barrier(); - return bit + 1; + return bit; } static __always_inline void trace_clear_recursion(int bit) { unsigned int val = current->trace_recursion; - if (!bit) - return; - - bit--; bit = 1 << bit; val &= ~bit; @@ -1527,14 +1506,26 @@ __event_trigger_test_discard(struct trace_event_file *file, if (eflags & EVENT_FILE_FL_TRIGGER_COND) *tt = event_triggers_call(file, entry, event); - if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) || - (unlikely(file->flags & EVENT_FILE_FL_FILTERED) && - !filter_match_preds(file->filter, entry))) { - __trace_event_discard_commit(buffer, event); - return true; - } + if (likely(!(file->flags & (EVENT_FILE_FL_SOFT_DISABLED | + EVENT_FILE_FL_FILTERED | + EVENT_FILE_FL_PID_FILTER)))) + return false; + + if (file->flags & EVENT_FILE_FL_SOFT_DISABLED) + goto discard; + + if (file->flags & EVENT_FILE_FL_FILTERED && + !filter_match_preds(file->filter, entry)) + goto discard; + + if ((file->flags & EVENT_FILE_FL_PID_FILTER) && + trace_event_ignore_this_pid(file)) + goto discard; return false; + discard: + __trace_event_discard_commit(buffer, event); + return true; } /** diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c index 0996d59750ff0f12425463237cd35665ec7854a8..ed3ab7b699a34b0f023ae3243faf3aaacf19effb 100644 --- a/kernel/trace/trace_boot.c +++ b/kernel/trace/trace_boot.c @@ -233,8 +233,8 @@ trace_boot_init_events(struct trace_array *tr, struct xbc_node *node) if (!node) return; /* per-event key starts with "event.GROUP.EVENT" */ - xbc_node_for_each_child(node, gnode) - xbc_node_for_each_child(gnode, enode) + xbc_node_for_each_subkey(node, gnode) + xbc_node_for_each_subkey(gnode, enode) trace_boot_init_one_event(tr, gnode, enode); } #else @@ -315,7 +315,7 @@ trace_boot_init_instances(struct xbc_node *node) if (!node) return; - xbc_node_for_each_child(node, inode) { + xbc_node_for_each_subkey(node, inode) { p = xbc_node_get_data(inode); if (!p || *p == '\0') continue; diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index ab3cb67b869e5c2187a1daec221a47ec6f50eea4..7cc5f0a77c3cca7c834dfe0661f239b7103ff2f6 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -2462,12 +2462,22 @@ static struct trace_event_file * trace_create_new_event(struct trace_event_call *call, struct trace_array *tr) { + struct trace_pid_list *no_pid_list; + struct trace_pid_list *pid_list; struct trace_event_file *file; file = kmem_cache_alloc(file_cachep, GFP_TRACE); if (!file) return NULL; + pid_list = rcu_dereference_protected(tr->filtered_pids, + lockdep_is_held(&event_mutex)); + no_pid_list = rcu_dereference_protected(tr->filtered_no_pids, + lockdep_is_held(&event_mutex)); + + if (pid_list || no_pid_list) + file->flags |= EVENT_FILE_FL_PID_FILTER; + file->event_call = call; file->tr = tr; atomic_set(&file->sm_ref, 0); diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 78a678eeb140935354283529066bc75c4fd4863e..a255ffbe342f3a519a75fcbd1ffe69503db10767 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -5,6 +5,7 @@ * Copyright (C) 2009 Tom Zanussi */ +#include #include #include #include @@ -654,6 +655,52 @@ DEFINE_EQUALITY_PRED(32); DEFINE_EQUALITY_PRED(16); DEFINE_EQUALITY_PRED(8); +/* user space strings temp buffer */ +#define USTRING_BUF_SIZE 1024 + +struct ustring_buffer { + char buffer[USTRING_BUF_SIZE]; +}; + +static __percpu struct ustring_buffer *ustring_per_cpu; + +static __always_inline char *test_string(char *str) +{ + struct ustring_buffer *ubuf; + char *kstr; + + if (!ustring_per_cpu) + return NULL; + + ubuf = this_cpu_ptr(ustring_per_cpu); + kstr = ubuf->buffer; + + /* For safety, do not trust the string pointer */ + if (!strncpy_from_kernel_nofault(kstr, str, USTRING_BUF_SIZE)) + return NULL; + return kstr; +} + +static __always_inline char *test_ustring(char *str) +{ + struct ustring_buffer *ubuf; + char __user *ustr; + char *kstr; + + if (!ustring_per_cpu) + return NULL; + + ubuf = this_cpu_ptr(ustring_per_cpu); + kstr = ubuf->buffer; + + /* user space address? */ + ustr = (char __user *)str; + if (!strncpy_from_user_nofault(kstr, ustr, USTRING_BUF_SIZE)) + return NULL; + + return kstr; +} + /* Filter predicate for fixed sized arrays of characters */ static int filter_pred_string(struct filter_pred *pred, void *event) { @@ -667,19 +714,43 @@ static int filter_pred_string(struct filter_pred *pred, void *event) return match; } -/* Filter predicate for char * pointers */ -static int filter_pred_pchar(struct filter_pred *pred, void *event) +static __always_inline int filter_pchar(struct filter_pred *pred, char *str) { - char **addr = (char **)(event + pred->offset); int cmp, match; - int len = strlen(*addr) + 1; /* including tailing '\0' */ + int len; - cmp = pred->regex.match(*addr, &pred->regex, len); + len = strlen(str) + 1; /* including tailing '\0' */ + cmp = pred->regex.match(str, &pred->regex, len); match = cmp ^ pred->not; return match; } +/* Filter predicate for char * pointers */ +static int filter_pred_pchar(struct filter_pred *pred, void *event) +{ + char **addr = (char **)(event + pred->offset); + char *str; + + str = test_string(*addr); + if (!str) + return 0; + + return filter_pchar(pred, str); +} + +/* Filter predicate for char * pointers in user space*/ +static int filter_pred_pchar_user(struct filter_pred *pred, void *event) +{ + char **addr = (char **)(event + pred->offset); + char *str; + + str = test_ustring(*addr); + if (!str) + return 0; + + return filter_pchar(pred, str); +} /* * Filter predicate for dynamic sized arrays of characters. @@ -1158,6 +1229,7 @@ static int parse_pred(const char *str, void *data, struct filter_pred *pred = NULL; char num_buf[24]; /* Big enough to hold an address */ char *field_name; + bool ustring = false; char q; u64 val; int len; @@ -1192,6 +1264,12 @@ static int parse_pred(const char *str, void *data, return -EINVAL; } + /* See if the field is a user space string */ + if ((len = str_has_prefix(str + i, ".ustring"))) { + ustring = true; + i += len; + } + while (isspace(str[i])) i++; @@ -1320,8 +1398,20 @@ static int parse_pred(const char *str, void *data, } else if (field->filter_type == FILTER_DYN_STRING) pred->fn = filter_pred_strloc; - else - pred->fn = filter_pred_pchar; + else { + + if (!ustring_per_cpu) { + /* Once allocated, keep it around for good */ + ustring_per_cpu = alloc_percpu(struct ustring_buffer); + if (!ustring_per_cpu) + goto err_mem; + } + + if (ustring) + pred->fn = filter_pred_pchar_user; + else + pred->fn = filter_pred_pchar; + } /* go past the last quote */ i++; @@ -1387,6 +1477,9 @@ static int parse_pred(const char *str, void *data, err_free: kfree(pred); return -EINVAL; +err_mem: + kfree(pred); + return -ENOMEM; } enum { diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 1b7f90e00eb052992ffe95ea9e73eabd85df4ddd..a2be2564ec0b92dfd43d9b2c2fc9542f38c8366f 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -66,7 +66,10 @@ C(EMPTY_SORT_FIELD, "Empty sort field"), \ C(TOO_MANY_SORT_FIELDS, "Too many sort fields (Max = 2)"), \ C(INVALID_SORT_FIELD, "Sort field must be a key or a val"), \ - C(INVALID_STR_OPERAND, "String type can not be an operand in expression"), + C(INVALID_STR_OPERAND, "String type can not be an operand in expression"), \ + C(EXPECT_NUMBER, "Expecting numeric literal"), \ + C(UNARY_MINUS_SUBEXPR, "Unary minus not supported in sub-expressions"), \ + C(DIVISION_BY_ZERO, "Division by zero"), #undef C #define C(a, b) HIST_ERR_##a @@ -88,12 +91,16 @@ typedef u64 (*hist_field_fn_t) (struct hist_field *field, #define HIST_FIELD_OPERANDS_MAX 2 #define HIST_FIELDS_MAX (TRACING_MAP_FIELDS_MAX + TRACING_MAP_VARS_MAX) #define HIST_ACTIONS_MAX 8 +#define HIST_CONST_DIGITS_MAX 21 +#define HIST_DIV_SHIFT 20 /* For optimizing division by constants */ enum field_op_id { FIELD_OP_NONE, FIELD_OP_PLUS, FIELD_OP_MINUS, FIELD_OP_UNARY_MINUS, + FIELD_OP_DIV, + FIELD_OP_MULT, }; /* @@ -150,6 +157,11 @@ struct hist_field { bool read_once; unsigned int var_str_idx; + + /* Numeric literals are represented as u64 */ + u64 constant; + /* Used to optimize division by constants */ + u64 div_multiplier; }; static u64 hist_field_none(struct hist_field *field, @@ -160,6 +172,15 @@ static u64 hist_field_none(struct hist_field *field, return 0; } +static u64 hist_field_const(struct hist_field *field, + struct tracing_map_elt *elt, + struct ring_buffer_event *rbe, + void *event) +{ + return field->constant; +} + + static u64 hist_field_counter(struct hist_field *field, struct tracing_map_elt *elt, struct ring_buffer_event *rbe, @@ -240,6 +261,101 @@ static u64 hist_field_minus(struct hist_field *hist_field, return val1 - val2; } +static u64 hist_field_div(struct hist_field *hist_field, + struct tracing_map_elt *elt, + struct ring_buffer_event *rbe, + void *event) +{ + struct hist_field *operand1 = hist_field->operands[0]; + struct hist_field *operand2 = hist_field->operands[1]; + + u64 val1 = operand1->fn(operand1, elt, rbe, event); + u64 val2 = operand2->fn(operand2, elt, rbe, event); + + /* Return -1 for the undefined case */ + if (!val2) + return -1; + + /* Use shift if the divisor is a power of 2 */ + if (!(val2 & (val2 - 1))) + return val1 >> __ffs64(val2); + + return div64_u64(val1, val2); +} + +static u64 div_by_power_of_two(struct hist_field *hist_field, + struct tracing_map_elt *elt, + struct ring_buffer_event *rbe, + void *event) +{ + struct hist_field *operand1 = hist_field->operands[0]; + struct hist_field *operand2 = hist_field->operands[1]; + + u64 val1 = operand1->fn(operand1, elt, rbe, event); + + return val1 >> __ffs64(operand2->constant); +} + +static u64 div_by_not_power_of_two(struct hist_field *hist_field, + struct tracing_map_elt *elt, + struct ring_buffer_event *rbe, + void *event) +{ + struct hist_field *operand1 = hist_field->operands[0]; + struct hist_field *operand2 = hist_field->operands[1]; + + u64 val1 = operand1->fn(operand1, elt, rbe, event); + + return div64_u64(val1, operand2->constant); +} + +static u64 div_by_mult_and_shift(struct hist_field *hist_field, + struct tracing_map_elt *elt, + struct ring_buffer_event *rbe, + void *event) +{ + struct hist_field *operand1 = hist_field->operands[0]; + struct hist_field *operand2 = hist_field->operands[1]; + + u64 val1 = operand1->fn(operand1, elt, rbe, event); + + /* + * If the divisor is a constant, do a multiplication and shift instead. + * + * Choose Z = some power of 2. If Y <= Z, then: + * X / Y = (X * (Z / Y)) / Z + * + * (Z / Y) is a constant (mult) which is calculated at parse time, so: + * X / Y = (X * mult) / Z + * + * The division by Z can be replaced by a shift since Z is a power of 2: + * X / Y = (X * mult) >> HIST_DIV_SHIFT + * + * As long, as X < Z the results will not be off by more than 1. + */ + if (val1 < (1 << HIST_DIV_SHIFT)) { + u64 mult = operand2->div_multiplier; + + return (val1 * mult + ((1 << HIST_DIV_SHIFT) - 1)) >> HIST_DIV_SHIFT; + } + + return div64_u64(val1, operand2->constant); +} + +static u64 hist_field_mult(struct hist_field *hist_field, + struct tracing_map_elt *elt, + struct ring_buffer_event *rbe, + void *event) +{ + struct hist_field *operand1 = hist_field->operands[0]; + struct hist_field *operand2 = hist_field->operands[1]; + + u64 val1 = operand1->fn(operand1, elt, rbe, event); + u64 val2 = operand2->fn(operand2, elt, rbe, event); + + return val1 * val2; +} + static u64 hist_field_unary_minus(struct hist_field *hist_field, struct tracing_map_elt *elt, struct ring_buffer_event *rbe, @@ -307,6 +423,7 @@ enum hist_field_flags { HIST_FIELD_FL_VAR_REF = 1 << 14, HIST_FIELD_FL_CPU = 1 << 15, HIST_FIELD_FL_ALIAS = 1 << 16, + HIST_FIELD_FL_CONST = 1 << 17, }; struct var_defs { @@ -481,6 +598,25 @@ struct snapshot_context { void *key; }; +/* + * Returns the specific division function to use if the divisor + * is constant. This avoids extra branches when the trigger is hit. + */ +static hist_field_fn_t hist_field_get_div_fn(struct hist_field *divisor) +{ + u64 div = divisor->constant; + + if (!(div & (div - 1))) + return div_by_power_of_two; + + /* If the divisor is too large, do a regular division */ + if (div > (1 << HIST_DIV_SHIFT)) + return div_by_not_power_of_two; + + divisor->div_multiplier = div64_u64((u64)(1 << HIST_DIV_SHIFT), div); + return div_by_mult_and_shift; +} + static void track_data_free(struct track_data *track_data) { struct hist_elt_data *elt_data; @@ -1464,6 +1600,12 @@ static void expr_field_str(struct hist_field *field, char *expr) { if (field->flags & HIST_FIELD_FL_VAR_REF) strcat(expr, "$"); + else if (field->flags & HIST_FIELD_FL_CONST) { + char str[HIST_CONST_DIGITS_MAX]; + + snprintf(str, HIST_CONST_DIGITS_MAX, "%llu", field->constant); + strcat(expr, str); + } strcat(expr, hist_field_name(field, 0)); @@ -1519,6 +1661,12 @@ static char *expr_str(struct hist_field *field, unsigned int level) case FIELD_OP_PLUS: strcat(expr, "+"); break; + case FIELD_OP_DIV: + strcat(expr, "/"); + break; + case FIELD_OP_MULT: + strcat(expr, "*"); + break; default: kfree(expr); return NULL; @@ -1529,34 +1677,92 @@ static char *expr_str(struct hist_field *field, unsigned int level) return expr; } -static int contains_operator(char *str) +/* + * If field_op != FIELD_OP_NONE, *sep points to the root operator + * of the expression tree to be evaluated. + */ +static int contains_operator(char *str, char **sep) { enum field_op_id field_op = FIELD_OP_NONE; - char *op; + char *minus_op, *plus_op, *div_op, *mult_op; - op = strpbrk(str, "+-"); - if (!op) - return FIELD_OP_NONE; - switch (*op) { - case '-': + /* + * Report the last occurrence of the operators first, so that the + * expression is evaluated left to right. This is important since + * subtraction and division are not associative. + * + * e.g + * 64/8/4/2 is 1, i.e 64/8/4/2 = ((64/8)/4)/2 + * 14-7-5-2 is 0, i.e 14-7-5-2 = ((14-7)-5)-2 + */ + + /* + * First, find lower precedence addition and subtraction + * since the expression will be evaluated recursively. + */ + minus_op = strrchr(str, '-'); + if (minus_op) { /* - * Unfortunately, the modifier ".sym-offset" - * can confuse things. + * Unary minus is not supported in sub-expressions. If + * present, it is always the next root operator. */ - if (op - str >= 4 && !strncmp(op - 4, ".sym-offset", 11)) - return FIELD_OP_NONE; - - if (*str == '-') + if (minus_op == str) { field_op = FIELD_OP_UNARY_MINUS; - else - field_op = FIELD_OP_MINUS; - break; - case '+': - field_op = FIELD_OP_PLUS; - break; - default: - break; + goto out; + } + + field_op = FIELD_OP_MINUS; + } + + plus_op = strrchr(str, '+'); + if (plus_op || minus_op) { + /* + * For operators of the same precedence use to rightmost as the + * root, so that the expression is evaluated left to right. + */ + if (plus_op > minus_op) + field_op = FIELD_OP_PLUS; + goto out; + } + + /* + * Multiplication and division have higher precedence than addition and + * subtraction. + */ + div_op = strrchr(str, '/'); + if (div_op) + field_op = FIELD_OP_DIV; + + mult_op = strrchr(str, '*'); + /* + * For operators of the same precedence use to rightmost as the + * root, so that the expression is evaluated left to right. + */ + if (mult_op > div_op) + field_op = FIELD_OP_MULT; + +out: + if (sep) { + switch (field_op) { + case FIELD_OP_UNARY_MINUS: + case FIELD_OP_MINUS: + *sep = minus_op; + break; + case FIELD_OP_PLUS: + *sep = plus_op; + break; + case FIELD_OP_DIV: + *sep = div_op; + break; + case FIELD_OP_MULT: + *sep = mult_op; + break; + case FIELD_OP_NONE: + default: + *sep = NULL; + break; + } } return field_op; @@ -1637,6 +1843,15 @@ static struct hist_field *create_hist_field(struct hist_trigger_data *hist_data, goto out; } + if (flags & HIST_FIELD_FL_CONST) { + hist_field->fn = hist_field_const; + hist_field->size = sizeof(u64); + hist_field->type = kstrdup("u64", GFP_KERNEL); + if (!hist_field->type) + goto free; + goto out; + } + if (flags & HIST_FIELD_FL_STACKTRACE) { hist_field->fn = hist_field_none; goto out; @@ -1684,9 +1899,10 @@ static struct hist_field *create_hist_field(struct hist_trigger_data *hist_data, if (!hist_field->type) goto free; - if (field->filter_type == FILTER_STATIC_STRING) + if (field->filter_type == FILTER_STATIC_STRING) { hist_field->fn = hist_field_string; - else if (field->filter_type == FILTER_DYN_STRING) + hist_field->size = field->size; + } else if (field->filter_type == FILTER_DYN_STRING) hist_field->fn = hist_field_dynstring; else hist_field->fn = hist_field_pstring; @@ -1876,7 +2092,7 @@ static char *field_name_from_var(struct hist_trigger_data *hist_data, if (strcmp(var_name, name) == 0) { field = hist_data->attrs->var_defs.expr[i]; - if (contains_operator(field) || is_var_ref(field)) + if (contains_operator(field, NULL) || is_var_ref(field)) continue; return field; } @@ -1953,7 +2169,11 @@ parse_field(struct hist_trigger_data *hist_data, struct trace_event_file *file, *flags |= HIST_FIELD_FL_HEX; else if (strcmp(modifier, "sym") == 0) *flags |= HIST_FIELD_FL_SYM; - else if (strcmp(modifier, "sym-offset") == 0) + /* + * 'sym-offset' occurrences in the trigger string are modified + * to 'symXoffset' to simplify arithmetic expression parsing. + */ + else if (strcmp(modifier, "symXoffset") == 0) *flags |= HIST_FIELD_FL_SYM_OFFSET; else if ((strcmp(modifier, "execname") == 0) && (strcmp(field_name, "common_pid") == 0)) @@ -1984,9 +2204,9 @@ parse_field(struct hist_trigger_data *hist_data, struct trace_event_file *file, /* * For backward compatibility, if field_name * was "cpu", then we treat this the same as - * common_cpu. + * common_cpu. This also works for "CPU". */ - if (strcmp(field_name, "cpu") == 0) { + if (field && field->filter_type == FILTER_CPU) { *flags |= HIST_FIELD_FL_CPU; } else { hist_err(tr, HIST_ERR_FIELD_NOT_FOUND, @@ -2026,6 +2246,29 @@ static struct hist_field *create_alias(struct hist_trigger_data *hist_data, return alias; } +static struct hist_field *parse_const(struct hist_trigger_data *hist_data, + char *str, char *var_name, + unsigned long *flags) +{ + struct trace_array *tr = hist_data->event_file->tr; + struct hist_field *field = NULL; + u64 constant; + + if (kstrtoull(str, 0, &constant)) { + hist_err(tr, HIST_ERR_EXPECT_NUMBER, errpos(str)); + return NULL; + } + + *flags |= HIST_FIELD_FL_CONST; + field = create_hist_field(hist_data, NULL, *flags, var_name); + if (!field) + return NULL; + + field->constant = constant; + + return field; +} + static struct hist_field *parse_atom(struct hist_trigger_data *hist_data, struct trace_event_file *file, char *str, unsigned long *flags, char *var_name) @@ -2035,6 +2278,15 @@ static struct hist_field *parse_atom(struct hist_trigger_data *hist_data, struct hist_field *hist_field = NULL; int ret = 0; + if (isdigit(str[0])) { + hist_field = parse_const(hist_data, str, var_name, flags); + if (!hist_field) { + ret = -EINVAL; + goto out; + } + return hist_field; + } + s = strchr(str, '.'); if (s) { s = strchr(++s, '.'); @@ -2090,21 +2342,24 @@ static struct hist_field *parse_atom(struct hist_trigger_data *hist_data, static struct hist_field *parse_expr(struct hist_trigger_data *hist_data, struct trace_event_file *file, char *str, unsigned long flags, - char *var_name, unsigned int level); + char *var_name, unsigned int *n_subexprs); static struct hist_field *parse_unary(struct hist_trigger_data *hist_data, struct trace_event_file *file, char *str, unsigned long flags, - char *var_name, unsigned int level) + char *var_name, unsigned int *n_subexprs) { struct hist_field *operand1, *expr = NULL; unsigned long operand_flags; int ret = 0; char *s; + /* Unary minus operator, increment n_subexprs */ + ++*n_subexprs; + /* we support only -(xxx) i.e. explicit parens required */ - if (level > 3) { + if (*n_subexprs > 3) { hist_err(file->tr, HIST_ERR_TOO_MANY_SUBEXPR, errpos(str)); ret = -EINVAL; goto free; @@ -2121,8 +2376,16 @@ static struct hist_field *parse_unary(struct hist_trigger_data *hist_data, } s = strrchr(str, ')'); - if (s) + if (s) { + /* unary minus not supported in sub-expressions */ + if (*(s+1) != '\0') { + hist_err(file->tr, HIST_ERR_UNARY_MINUS_SUBEXPR, + errpos(str)); + ret = -EINVAL; + goto free; + } *s = '\0'; + } else { ret = -EINVAL; /* no closing ')' */ goto free; @@ -2136,7 +2399,7 @@ static struct hist_field *parse_unary(struct hist_trigger_data *hist_data, } operand_flags = 0; - operand1 = parse_expr(hist_data, file, str, operand_flags, NULL, ++level); + operand1 = parse_expr(hist_data, file, str, operand_flags, NULL, n_subexprs); if (IS_ERR(operand1)) { ret = PTR_ERR(operand1); goto free; @@ -2153,6 +2416,8 @@ static struct hist_field *parse_unary(struct hist_trigger_data *hist_data, (HIST_FIELD_FL_TIMESTAMP | HIST_FIELD_FL_TIMESTAMP_USECS); expr->fn = hist_field_unary_minus; expr->operands[0] = operand1; + expr->size = operand1->size; + expr->is_signed = operand1->is_signed; expr->operator = FIELD_OP_UNARY_MINUS; expr->name = expr_str(expr, 0); expr->type = kstrdup(operand1->type, GFP_KERNEL); @@ -2167,9 +2432,15 @@ static struct hist_field *parse_unary(struct hist_trigger_data *hist_data, return ERR_PTR(ret); } +/* + * If the operands are var refs, return pointers the + * variable(s) referenced in var1 and var2, else NULL. + */ static int check_expr_operands(struct trace_array *tr, struct hist_field *operand1, - struct hist_field *operand2) + struct hist_field *operand2, + struct hist_field **var1, + struct hist_field **var2) { unsigned long operand1_flags = operand1->flags; unsigned long operand2_flags = operand2->flags; @@ -2182,6 +2453,7 @@ static int check_expr_operands(struct trace_array *tr, if (!var) return -EINVAL; operand1_flags = var->flags; + *var1 = var; } if ((operand2_flags & HIST_FIELD_FL_VAR_REF) || @@ -2192,6 +2464,7 @@ static int check_expr_operands(struct trace_array *tr, if (!var) return -EINVAL; operand2_flags = var->flags; + *var2 = var; } if ((operand1_flags & HIST_FIELD_FL_TIMESTAMP_USECS) != @@ -2206,74 +2479,102 @@ static int check_expr_operands(struct trace_array *tr, static struct hist_field *parse_expr(struct hist_trigger_data *hist_data, struct trace_event_file *file, char *str, unsigned long flags, - char *var_name, unsigned int level) + char *var_name, unsigned int *n_subexprs) { struct hist_field *operand1 = NULL, *operand2 = NULL, *expr = NULL; - unsigned long operand_flags; + struct hist_field *var1 = NULL, *var2 = NULL; + unsigned long operand_flags, operand2_flags; int field_op, ret = -EINVAL; char *sep, *operand1_str; + hist_field_fn_t op_fn; + bool combine_consts; - if (level > 3) { + if (*n_subexprs > 3) { hist_err(file->tr, HIST_ERR_TOO_MANY_SUBEXPR, errpos(str)); return ERR_PTR(-EINVAL); } - field_op = contains_operator(str); + field_op = contains_operator(str, &sep); if (field_op == FIELD_OP_NONE) return parse_atom(hist_data, file, str, &flags, var_name); if (field_op == FIELD_OP_UNARY_MINUS) - return parse_unary(hist_data, file, str, flags, var_name, ++level); + return parse_unary(hist_data, file, str, flags, var_name, n_subexprs); - switch (field_op) { - case FIELD_OP_MINUS: - sep = "-"; - break; - case FIELD_OP_PLUS: - sep = "+"; - break; - default: - goto free; - } + /* Binary operator found, increment n_subexprs */ + ++*n_subexprs; - operand1_str = strsep(&str, sep); - if (!operand1_str || !str) - goto free; + /* Split the expression string at the root operator */ + if (!sep) + return ERR_PTR(-EINVAL); + + *sep = '\0'; + operand1_str = str; + str = sep+1; + + /* Binary operator requires both operands */ + if (*operand1_str == '\0' || *str == '\0') + return ERR_PTR(-EINVAL); operand_flags = 0; - operand1 = parse_atom(hist_data, file, operand1_str, - &operand_flags, NULL); - if (IS_ERR(operand1)) { - ret = PTR_ERR(operand1); - operand1 = NULL; - goto free; - } + + /* LHS of string is an expression e.g. a+b in a+b+c */ + operand1 = parse_expr(hist_data, file, operand1_str, operand_flags, NULL, n_subexprs); + if (IS_ERR(operand1)) + return ERR_CAST(operand1); + if (operand1->flags & HIST_FIELD_FL_STRING) { hist_err(file->tr, HIST_ERR_INVALID_STR_OPERAND, errpos(operand1_str)); ret = -EINVAL; - goto free; + goto free_op1; } - /* rest of string could be another expression e.g. b+c in a+b+c */ + /* RHS of string is another expression e.g. c in a+b+c */ operand_flags = 0; - operand2 = parse_expr(hist_data, file, str, operand_flags, NULL, ++level); + operand2 = parse_expr(hist_data, file, str, operand_flags, NULL, n_subexprs); if (IS_ERR(operand2)) { ret = PTR_ERR(operand2); - operand2 = NULL; - goto free; + goto free_op1; } if (operand2->flags & HIST_FIELD_FL_STRING) { hist_err(file->tr, HIST_ERR_INVALID_STR_OPERAND, errpos(str)); ret = -EINVAL; - goto free; + goto free_operands; } - ret = check_expr_operands(file->tr, operand1, operand2); + switch (field_op) { + case FIELD_OP_MINUS: + op_fn = hist_field_minus; + break; + case FIELD_OP_PLUS: + op_fn = hist_field_plus; + break; + case FIELD_OP_DIV: + op_fn = hist_field_div; + break; + case FIELD_OP_MULT: + op_fn = hist_field_mult; + break; + default: + ret = -EINVAL; + goto free_operands; + } + + ret = check_expr_operands(file->tr, operand1, operand2, &var1, &var2); if (ret) - goto free; + goto free_operands; - flags |= HIST_FIELD_FL_EXPR; + operand_flags = var1 ? var1->flags : operand1->flags; + operand2_flags = var2 ? var2->flags : operand2->flags; + + /* + * If both operands are constant, the expression can be + * collapsed to a single constant. + */ + combine_consts = operand_flags & operand2_flags & HIST_FIELD_FL_CONST; + + flags |= combine_consts ? HIST_FIELD_FL_CONST : HIST_FIELD_FL_EXPR; flags |= operand1->flags & (HIST_FIELD_FL_TIMESTAMP | HIST_FIELD_FL_TIMESTAMP_USECS); @@ -2281,44 +2582,80 @@ static struct hist_field *parse_expr(struct hist_trigger_data *hist_data, expr = create_hist_field(hist_data, NULL, flags, var_name); if (!expr) { ret = -ENOMEM; - goto free; + goto free_operands; } operand1->read_once = true; operand2->read_once = true; + /* The operands are now owned and free'd by 'expr' */ expr->operands[0] = operand1; expr->operands[1] = operand2; - /* The operand sizes should be the same, so just pick one */ - expr->size = operand1->size; + if (field_op == FIELD_OP_DIV && + operand2_flags & HIST_FIELD_FL_CONST) { + u64 divisor = var2 ? var2->constant : operand2->constant; - expr->operator = field_op; - expr->name = expr_str(expr, 0); - expr->type = kstrdup(operand1->type, GFP_KERNEL); - if (!expr->type) { - ret = -ENOMEM; - goto free; + if (!divisor) { + hist_err(file->tr, HIST_ERR_DIVISION_BY_ZERO, errpos(str)); + ret = -EDOM; + goto free_expr; + } + + /* + * Copy the divisor here so we don't have to look it up + * later if this is a var ref + */ + operand2->constant = divisor; + op_fn = hist_field_get_div_fn(operand2); } - switch (field_op) { - case FIELD_OP_MINUS: - expr->fn = hist_field_minus; - break; - case FIELD_OP_PLUS: - expr->fn = hist_field_plus; - break; - default: - ret = -EINVAL; - goto free; + if (combine_consts) { + if (var1) + expr->operands[0] = var1; + if (var2) + expr->operands[1] = var2; + + expr->constant = op_fn(expr, NULL, NULL, NULL); + + expr->operands[0] = NULL; + expr->operands[1] = NULL; + + /* + * var refs won't be destroyed immediately + * See: destroy_hist_field() + */ + destroy_hist_field(operand2, 0); + destroy_hist_field(operand1, 0); + + expr->name = expr_str(expr, 0); + } else { + expr->fn = op_fn; + + /* The operand sizes should be the same, so just pick one */ + expr->size = operand1->size; + expr->is_signed = operand1->is_signed; + + expr->operator = field_op; + expr->type = kstrdup_const(operand1->type, GFP_KERNEL); + if (!expr->type) { + ret = -ENOMEM; + goto free_expr; + } + + expr->name = expr_str(expr, 0); } return expr; - free: - destroy_hist_field(operand1, 0); + +free_operands: destroy_hist_field(operand2, 0); - destroy_hist_field(expr, 0); +free_op1: + destroy_hist_field(operand1, 0); + return ERR_PTR(ret); +free_expr: + destroy_hist_field(expr, 0); return ERR_PTR(ret); } @@ -2623,8 +2960,10 @@ static inline void __update_field_vars(struct tracing_map_elt *elt, if (val->flags & HIST_FIELD_FL_STRING) { char *str = elt_data->field_var_str[j++]; char *val_str = (char *)(uintptr_t)var_val; + unsigned int size; - strscpy(str, val_str, STR_VAR_LEN_MAX); + size = min(val->size, STR_VAR_LEN_MAX); + strscpy(str, val_str, size); var_val = (u64)(uintptr_t)str; } tracing_map_set_var(elt, var_idx, var_val); @@ -3341,7 +3680,7 @@ static int check_synth_field(struct synth_event *event, if (strcmp(field->type, hist_field->type) != 0) { if (field->size != hist_field->size || - field->is_signed != hist_field->is_signed) + (!field->is_string && field->is_signed != hist_field->is_signed)) return -EINVAL; } @@ -3503,6 +3842,7 @@ static int trace_action_create(struct hist_trigger_data *hist_data, var_ref_idx = find_var_ref_idx(hist_data, var_ref); if (WARN_ON(var_ref_idx < 0)) { + kfree(p); ret = var_ref_idx; goto err; } @@ -3676,9 +4016,9 @@ static int __create_val_field(struct hist_trigger_data *hist_data, unsigned long flags) { struct hist_field *hist_field; - int ret = 0; + int ret = 0, n_subexprs = 0; - hist_field = parse_expr(hist_data, file, field_str, flags, var_name, 0); + hist_field = parse_expr(hist_data, file, field_str, flags, var_name, &n_subexprs); if (IS_ERR(hist_field)) { ret = PTR_ERR(hist_field); goto out; @@ -3781,7 +4121,7 @@ static int create_key_field(struct hist_trigger_data *hist_data, struct hist_field *hist_field = NULL; unsigned long flags = 0; unsigned int key_size; - int ret = 0; + int ret = 0, n_subexprs = 0; if (WARN_ON(key_idx >= HIST_FIELDS_MAX)) return -EINVAL; @@ -3794,7 +4134,7 @@ static int create_key_field(struct hist_trigger_data *hist_data, hist_field = create_hist_field(hist_data, NULL, flags, NULL); } else { hist_field = parse_expr(hist_data, file, field_str, flags, - NULL, 0); + NULL, &n_subexprs); if (IS_ERR(hist_field)) { ret = PTR_ERR(hist_field); goto out; @@ -4358,7 +4698,7 @@ static int create_tracing_map_fields(struct hist_trigger_data *hist_data) if (hist_field->flags & HIST_FIELD_FL_STACKTRACE) cmp_fn = tracing_map_cmp_none; - else if (!field) + else if (!field || hist_field->flags & HIST_FIELD_FL_CPU) cmp_fn = tracing_map_cmp_num(hist_field->size, hist_field->is_signed); else if (is_string_field(field)) @@ -4464,6 +4804,7 @@ static void hist_trigger_elt_update(struct hist_trigger_data *hist_data, if (hist_field->flags & HIST_FIELD_FL_STRING) { unsigned int str_start, var_str_idx, idx; char *str, *val_str; + unsigned int size; str_start = hist_data->n_field_var_str + hist_data->n_save_var_str; @@ -4472,7 +4813,9 @@ static void hist_trigger_elt_update(struct hist_trigger_data *hist_data, str = elt_data->field_var_str[idx]; val_str = (char *)(uintptr_t)hist_val; - strscpy(str, val_str, STR_VAR_LEN_MAX); + + size = min(hist_field->size, STR_VAR_LEN_MAX); + strscpy(str, val_str, size); hist_val = (u64)(uintptr_t)str; } @@ -4829,6 +5172,8 @@ static void hist_field_debug_show_flags(struct seq_file *m, if (flags & HIST_FIELD_FL_ALIAS) seq_puts(m, " HIST_FIELD_FL_ALIAS\n"); + else if (flags & HIST_FIELD_FL_CONST) + seq_puts(m, " HIST_FIELD_FL_CONST\n"); } static int hist_field_debug_show(struct seq_file *m, @@ -4850,6 +5195,9 @@ static int hist_field_debug_show(struct seq_file *m, field->var.idx); } + if (field->flags & HIST_FIELD_FL_CONST) + seq_printf(m, " constant: %llu\n", field->constant); + if (field->flags & HIST_FIELD_FL_ALIAS) seq_printf(m, " var_ref_idx (into hist_data->var_refs[]): %u\n", field->var_ref_idx); @@ -5092,6 +5440,8 @@ static void hist_field_print(struct seq_file *m, struct hist_field *hist_field) if (hist_field->flags & HIST_FIELD_FL_CPU) seq_puts(m, "common_cpu"); + else if (hist_field->flags & HIST_FIELD_FL_CONST) + seq_printf(m, "%llu", hist_field->constant); else if (field_name) { if (hist_field->flags & HIST_FIELD_FL_VAR_REF || hist_field->flags & HIST_FIELD_FL_ALIAS) @@ -5671,7 +6021,7 @@ static int event_hist_trigger_func(struct event_command *cmd_ops, struct synth_event *se; const char *se_name; bool remove = false; - char *trigger, *p; + char *trigger, *p, *start; int ret = 0; lockdep_assert_held(&event_mutex); @@ -5719,6 +6069,16 @@ static int event_hist_trigger_func(struct event_command *cmd_ops, trigger = strstrip(trigger); } + /* + * To simplify arithmetic expression parsing, replace occurrences of + * '.sym-offset' modifier with '.symXoffset' + */ + start = strstr(trigger, ".sym-offset"); + while (start) { + *(start + 4) = 'X'; + start = strstr(start + 11, ".sym-offset"); + } + attrs = parse_hist_trigger_attrs(file->tr, trigger); if (IS_ERR(attrs)) return PTR_ERR(attrs); diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index f725802160c0bc05fab47c29f72234219b6612cc..d0309de2f84fea5bfce5cbe4a8321be92c547057 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -940,6 +940,16 @@ static void traceon_trigger(struct event_trigger_data *data, void *rec, struct ring_buffer_event *event) { + struct trace_event_file *file = data->private_data; + + if (file) { + if (tracer_tracing_is_on(file->tr)) + return; + + tracer_tracing_on(file->tr); + return; + } + if (tracing_is_on()) return; @@ -950,8 +960,15 @@ static void traceon_count_trigger(struct event_trigger_data *data, void *rec, struct ring_buffer_event *event) { - if (tracing_is_on()) - return; + struct trace_event_file *file = data->private_data; + + if (file) { + if (tracer_tracing_is_on(file->tr)) + return; + } else { + if (tracing_is_on()) + return; + } if (!data->count) return; @@ -959,13 +976,26 @@ traceon_count_trigger(struct event_trigger_data *data, void *rec, if (data->count != -1) (data->count)--; - tracing_on(); + if (file) + tracer_tracing_on(file->tr); + else + tracing_on(); } static void traceoff_trigger(struct event_trigger_data *data, void *rec, struct ring_buffer_event *event) { + struct trace_event_file *file = data->private_data; + + if (file) { + if (!tracer_tracing_is_on(file->tr)) + return; + + tracer_tracing_off(file->tr); + return; + } + if (!tracing_is_on()) return; @@ -976,8 +1006,15 @@ static void traceoff_count_trigger(struct event_trigger_data *data, void *rec, struct ring_buffer_event *event) { - if (!tracing_is_on()) - return; + struct trace_event_file *file = data->private_data; + + if (file) { + if (!tracer_tracing_is_on(file->tr)) + return; + } else { + if (!tracing_is_on()) + return; + } if (!data->count) return; @@ -985,7 +1022,10 @@ traceoff_count_trigger(struct event_trigger_data *data, void *rec, if (data->count != -1) (data->count)--; - tracing_off(); + if (file) + tracer_tracing_off(file->tr); + else + tracing_off(); } static int diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index 2c2126e1871d4d19e71a02408314b0badc42d013..93e20ed642e535e1bf9426d4413d8d7a9ff62e55 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -144,7 +144,7 @@ function_trace_call(unsigned long ip, unsigned long parent_ip, pc = preempt_count(); preempt_disable_notrace(); - bit = trace_test_and_set_recursion(TRACE_FTRACE_START, TRACE_FTRACE_MAX); + bit = trace_test_and_set_recursion(TRACE_FTRACE_START); if (bit < 0) goto out; diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 552dbc9d522603a07fea2763af0b798a02406a8c..41dd17390c732bdfda00c1c39d04b4ae17d68efc 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -31,7 +31,7 @@ static int __init set_kprobe_boot_events(char *str) strlcpy(kprobe_boot_events_buf, str, COMMAND_LINE_SIZE); disable_tracing_selftest("running kprobe events"); - return 0; + return 1; } __setup("kprobe_event=", set_kprobe_boot_events); @@ -1183,15 +1183,18 @@ static int probes_profile_seq_show(struct seq_file *m, void *v) { struct dyn_event *ev = v; struct trace_kprobe *tk; + unsigned long nmissed; if (!is_trace_kprobe(ev)) return 0; tk = to_trace_kprobe(ev); + nmissed = trace_kprobe_is_return(tk) ? + tk->rp.kp.nmissed + tk->rp.nmissed : tk->rp.kp.nmissed; seq_printf(m, " %-44s %15lu %15lu\n", trace_probe_name(&tk->tp), trace_kprobe_nhit(tk), - tk->rp.kp.nmissed); + nmissed); return 0; } diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 0dd6e286e5196236e1f229f3a6c98c3d2b3b899e..9900d4e3808cc1d47370124916627cc51685064c 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -1312,6 +1312,7 @@ static int uprobe_perf_open(struct trace_event_call *call, return 0; list_for_each_entry(pos, trace_probe_probe_list(tp), list) { + tu = container_of(pos, struct trace_uprobe, tp); err = uprobe_apply(tu->inode, tu->offset, &tu->consumer, true); if (err) { uprobe_perf_close(call, event); diff --git a/kernel/trace/tracing_map.c b/kernel/trace/tracing_map.c index 4b50fc0cb12c73db44e8d5829e5fe344f81364e5..51a9d1185033b981d4227a765ed00c31d9cda4ae 100644 --- a/kernel/trace/tracing_map.c +++ b/kernel/trace/tracing_map.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "tracing_map.h" #include "trace.h" @@ -307,6 +308,7 @@ static void tracing_map_array_free(struct tracing_map_array *a) for (i = 0; i < a->n_pages; i++) { if (!a->pages[i]) break; + kmemleak_free(a->pages[i]); free_page((unsigned long)a->pages[i]); } @@ -342,6 +344,7 @@ static struct tracing_map_array *tracing_map_array_alloc(unsigned int n_elts, a->pages[i] = (void *)get_zeroed_page(GFP_KERNEL); if (!a->pages[i]) goto free; + kmemleak_alloc(a->pages[i], PAGE_SIZE, 1, GFP_KERNEL); } out: return a; @@ -834,29 +837,35 @@ int tracing_map_init(struct tracing_map *map) return err; } -static int cmp_entries_dup(const struct tracing_map_sort_entry **a, - const struct tracing_map_sort_entry **b) +static int cmp_entries_dup(const void *A, const void *B) { + const struct tracing_map_sort_entry *a, *b; int ret = 0; - if (memcmp((*a)->key, (*b)->key, (*a)->elt->map->key_size)) + a = *(const struct tracing_map_sort_entry **)A; + b = *(const struct tracing_map_sort_entry **)B; + + if (memcmp(a->key, b->key, a->elt->map->key_size)) ret = 1; return ret; } -static int cmp_entries_sum(const struct tracing_map_sort_entry **a, - const struct tracing_map_sort_entry **b) +static int cmp_entries_sum(const void *A, const void *B) { const struct tracing_map_elt *elt_a, *elt_b; + const struct tracing_map_sort_entry *a, *b; struct tracing_map_sort_key *sort_key; struct tracing_map_field *field; tracing_map_cmp_fn_t cmp_fn; void *val_a, *val_b; int ret = 0; - elt_a = (*a)->elt; - elt_b = (*b)->elt; + a = *(const struct tracing_map_sort_entry **)A; + b = *(const struct tracing_map_sort_entry **)B; + + elt_a = a->elt; + elt_b = b->elt; sort_key = &elt_a->map->sort_key; @@ -873,18 +882,21 @@ static int cmp_entries_sum(const struct tracing_map_sort_entry **a, return ret; } -static int cmp_entries_key(const struct tracing_map_sort_entry **a, - const struct tracing_map_sort_entry **b) +static int cmp_entries_key(const void *A, const void *B) { const struct tracing_map_elt *elt_a, *elt_b; + const struct tracing_map_sort_entry *a, *b; struct tracing_map_sort_key *sort_key; struct tracing_map_field *field; tracing_map_cmp_fn_t cmp_fn; void *val_a, *val_b; int ret = 0; - elt_a = (*a)->elt; - elt_b = (*b)->elt; + a = *(const struct tracing_map_sort_entry **)A; + b = *(const struct tracing_map_sort_entry **)B; + + elt_a = a->elt; + elt_b = b->elt; sort_key = &elt_a->map->sort_key; @@ -989,10 +1001,8 @@ static void sort_secondary(struct tracing_map *map, struct tracing_map_sort_key *primary_key, struct tracing_map_sort_key *secondary_key) { - int (*primary_fn)(const struct tracing_map_sort_entry **, - const struct tracing_map_sort_entry **); - int (*secondary_fn)(const struct tracing_map_sort_entry **, - const struct tracing_map_sort_entry **); + int (*primary_fn)(const void *, const void *); + int (*secondary_fn)(const void *, const void *); unsigned i, start = 0, n_sub = 1; if (is_key(map, primary_key->field_idx)) @@ -1061,8 +1071,7 @@ int tracing_map_sort_entries(struct tracing_map *map, unsigned int n_sort_keys, struct tracing_map_sort_entry ***sort_entries) { - int (*cmp_entries_fn)(const struct tracing_map_sort_entry **, - const struct tracing_map_sort_entry **); + int (*cmp_entries_fn)(const void *, const void *); struct tracing_map_sort_entry *sort_entry, **entries; int i, n_entries, ret; diff --git a/kernel/tsacct.c b/kernel/tsacct.c index 257ffb993ea2358bd81412f305f5e28f6982c060..fd2f7a052fdd94d6871e13420a73d27287e22c2a 100644 --- a/kernel/tsacct.c +++ b/kernel/tsacct.c @@ -38,11 +38,10 @@ void bacct_add_tsk(struct user_namespace *user_ns, stats->ac_btime = clamp_t(time64_t, btime, 0, U32_MAX); stats->ac_btime64 = btime; - if (thread_group_leader(tsk)) { + if (tsk->flags & PF_EXITING) stats->ac_exitcode = tsk->exit_code; - if (tsk->flags & PF_FORKNOEXEC) - stats->ac_flag |= AFORK; - } + if (thread_group_leader(tsk) && (tsk->flags & PF_FORKNOEXEC)) + stats->ac_flag |= AFORK; if (tsk->flags & PF_SUPERPRIV) stats->ac_flag |= ASU; if (tsk->flags & PF_DUMPCORE) diff --git a/kernel/watch_queue.c b/kernel/watch_queue.c index 0ef8f65bd2d71188bbe314b11eda366fe87ff889..e3f144d9602617c5b6a77d35799dc1c0d4e1988e 100644 --- a/kernel/watch_queue.c +++ b/kernel/watch_queue.c @@ -54,6 +54,7 @@ static void watch_queue_pipe_buf_release(struct pipe_inode_info *pipe, bit += page->index; set_bit(bit, wqueue->notes_bitmap); + generic_pipe_buf_release(pipe, buf); } // No try_steal function => no stealing @@ -112,7 +113,7 @@ static bool post_one_notification(struct watch_queue *wqueue, buf->offset = offset; buf->len = len; buf->flags = PIPE_BUF_FLAG_WHOLE; - pipe->head = head + 1; + smp_store_release(&pipe->head, head + 1); /* vs pipe_read() */ if (!test_and_clear_bit(note, wqueue->notes_bitmap)) { spin_unlock_irq(&pipe->rd_wait.lock); @@ -243,7 +244,8 @@ long watch_queue_set_size(struct pipe_inode_info *pipe, unsigned int nr_notes) goto error; } - ret = pipe_resize_ring(pipe, nr_notes); + nr_notes = nr_pages * WATCH_QUEUE_NOTES_PER_PAGE; + ret = pipe_resize_ring(pipe, roundup_pow_of_two(nr_notes)); if (ret < 0) goto error; @@ -268,7 +270,7 @@ long watch_queue_set_size(struct pipe_inode_info *pipe, unsigned int nr_notes) wqueue->notes = pages; wqueue->notes_bitmap = bitmap; wqueue->nr_pages = nr_pages; - wqueue->nr_notes = nr_pages * WATCH_QUEUE_NOTES_PER_PAGE; + wqueue->nr_notes = nr_notes; return 0; error_p: @@ -320,7 +322,7 @@ long watch_queue_set_filter(struct pipe_inode_info *pipe, tf[i].info_mask & WATCH_INFO_LENGTH) goto err_filter; /* Ignore any unknown types */ - if (tf[i].type >= sizeof(wfilter->type_filter) * 8) + if (tf[i].type >= WATCH_TYPE__NR) continue; nr_filter++; } @@ -336,7 +338,7 @@ long watch_queue_set_filter(struct pipe_inode_info *pipe, q = wfilter->filters; for (i = 0; i < filter.nr_filters; i++) { - if (tf[i].type >= sizeof(wfilter->type_filter) * BITS_PER_LONG) + if (tf[i].type >= WATCH_TYPE__NR) continue; q->type = tf[i].type; @@ -371,6 +373,7 @@ static void __put_watch_queue(struct kref *kref) for (i = 0; i < wqueue->nr_pages; i++) __free_page(wqueue->notes[i]); + bitmap_free(wqueue->notes_bitmap); wfilter = rcu_access_pointer(wqueue->filter); if (wfilter) @@ -566,7 +569,7 @@ void watch_queue_clear(struct watch_queue *wqueue) rcu_read_lock(); spin_lock_bh(&wqueue->lock); - /* Prevent new additions and prevent notifications from happening */ + /* Prevent new notifications from being stored. */ wqueue->defunct = true; while (!hlist_empty(&wqueue->watches)) { diff --git a/kernel/workqueue.c b/kernel/workqueue.c index e29fbf40b2dd982efcfb449f40c7fcdb9e5279b6..9334aef949f5a816ec986a3f54e6783891bec27e 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -858,8 +858,17 @@ void wq_worker_running(struct task_struct *task) if (!worker->sleeping) return; + + /* + * If preempted by unbind_workers() between the WORKER_NOT_RUNNING check + * and the nr_running increment below, we may ruin the nr_running reset + * and leave with an unexpected pool->nr_running == 1 on the newly unbound + * pool. Protect against such race. + */ + preempt_disable(); if (!(worker->flags & WORKER_NOT_RUNNING)) atomic_inc(&worker->pool->nr_running); + preempt_enable(); worker->sleeping = 0; } @@ -1337,7 +1346,7 @@ static void insert_work(struct pool_workqueue *pwq, struct work_struct *work, struct worker_pool *pool = pwq->pool; /* record the work call stack in order to print it in KASAN reports */ - kasan_record_aux_stack(work); + kasan_record_aux_stack_noalloc(work); /* we own @work, set data and link */ set_work_pwq(work, pwq, extra_flags); @@ -5345,9 +5354,6 @@ int workqueue_set_unbound_cpumask(cpumask_var_t cpumask) int ret = -EINVAL; cpumask_var_t saved_cpumask; - if (!zalloc_cpumask_var(&saved_cpumask, GFP_KERNEL)) - return -ENOMEM; - /* * Not excluding isolated cpus on purpose. * If the user wishes to include them, we allow that. @@ -5355,6 +5361,15 @@ int workqueue_set_unbound_cpumask(cpumask_var_t cpumask) cpumask_and(cpumask, cpumask, cpu_possible_mask); if (!cpumask_empty(cpumask)) { apply_wqattrs_lock(); + if (cpumask_equal(cpumask, wq_unbound_cpumask)) { + ret = 0; + goto out_unlock; + } + + if (!zalloc_cpumask_var(&saved_cpumask, GFP_KERNEL)) { + ret = -ENOMEM; + goto out_unlock; + } /* save the old wq_unbound_cpumask. */ cpumask_copy(saved_cpumask, wq_unbound_cpumask); @@ -5367,10 +5382,11 @@ int workqueue_set_unbound_cpumask(cpumask_var_t cpumask) if (ret < 0) cpumask_copy(wq_unbound_cpumask, saved_cpumask); + free_cpumask_var(saved_cpumask); +out_unlock: apply_wqattrs_unlock(); } - free_cpumask_var(saved_cpumask); return ret; } diff --git a/lib/Kconfig b/lib/Kconfig index 96ee125ab21fe1a96eda8f0c47655df8ca11050f..0abb7975586cab06664114b5223fafbe1df4eba0 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -45,7 +45,6 @@ config BITREVERSE config HAVE_ARCH_BITREVERSE bool default n - depends on BITREVERSE help This option enables the use of hardware bit-reversal instructions on architectures which support such operations. diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan index a5ba06a8635844defd46459df35e9c4024fa24b5..fbfc705e7f7982bd61f35ced600514c4facb5943 100644 --- a/lib/Kconfig.kasan +++ b/lib/Kconfig.kasan @@ -12,6 +12,13 @@ config HAVE_ARCH_KASAN_HW_TAGS config HAVE_ARCH_KASAN_VMALLOC bool +config ARCH_DISABLE_KASAN_INLINE + bool + help + An architecture might not support inline instrumentation. + When this option is selected, inline and stack instrumentation are + disabled. + config CC_HAS_KASAN_GENERIC def_bool $(cc-option, -fsanitize=kernel-address) @@ -59,6 +66,7 @@ choice config KASAN_GENERIC bool "Generic mode" depends on HAVE_ARCH_KASAN && CC_HAS_KASAN_GENERIC + depends on CC_HAS_WORKING_NOSANITIZE_ADDRESS select SLUB_DEBUG if SLUB select CONSTRUCTORS help @@ -79,6 +87,7 @@ config KASAN_GENERIC config KASAN_SW_TAGS bool "Software tag-based mode" depends on HAVE_ARCH_KASAN_SW_TAGS && CC_HAS_KASAN_SW_TAGS + depends on CC_HAS_WORKING_NOSANITIZE_ADDRESS select SLUB_DEBUG if SLUB select CONSTRUCTORS help @@ -130,6 +139,7 @@ config KASAN_OUTLINE config KASAN_INLINE bool "Inline instrumentation" + depends on !ARCH_DISABLE_KASAN_INLINE help Compiler directly inserts code checking shadow memory before memory accesses. This is faster than outline (in some workloads @@ -141,6 +151,7 @@ endchoice config KASAN_STACK bool "Enable stack instrumentation (unsafe)" if CC_IS_CLANG && !COMPILE_TEST depends on KASAN_GENERIC || KASAN_SW_TAGS + depends on !ARCH_DISABLE_KASAN_INLINE default y if CC_IS_GCC help The LLVM stack address sanitizer has a know problem that @@ -154,6 +165,9 @@ config KASAN_STACK but clang users can still enable it for builds without CONFIG_COMPILE_TEST. On gcc it is assumed to always be safe to use and enabled by default. + If the architecture disables inline instrumentation, stack + instrumentation is also disabled as it adds inline-style + instrumentation that is run unconditionally. config KASAN_S390_4_LEVEL_PAGING bool "KASan: use 4-level paging" @@ -164,26 +178,26 @@ config KASAN_S390_4_LEVEL_PAGING to 3TB of RAM with KASan enabled). This options allows to force 4-level paging instead. -config KASAN_SW_TAGS_IDENTIFY +config KASAN_TAGS_IDENTIFY bool "Enable memory corruption identification" - depends on KASAN_SW_TAGS + depends on KASAN_SW_TAGS || KASAN_HW_TAGS help This option enables best-effort identification of bug type (use-after-free or out-of-bounds) at the cost of increased memory consumption. config KASAN_VMALLOC - bool "Back mappings in vmalloc space with real shadow memory" - depends on KASAN_GENERIC && HAVE_ARCH_KASAN_VMALLOC + bool "Check accesses to vmalloc allocations" + depends on HAVE_ARCH_KASAN_VMALLOC help - By default, the shadow region for vmalloc space is the read-only - zero page. This means that KASAN cannot detect errors involving - vmalloc space. - - Enabling this option will hook in to vmap/vmalloc and back those - mappings with real shadow memory allocated on demand. This allows - for KASAN to detect more sorts of errors (and to support vmapped - stacks), but at the cost of higher memory usage. + This mode makes KASAN check accesses to vmalloc allocations for + validity. + + With software KASAN modes, checking is done for all types of vmalloc + allocations. Enabling this option leads to higher memory usage. + + With hardware tag-based KASAN, only VM_ALLOC mappings are checked. + There is no additional memory usage. config KASAN_KUNIT_TEST tristate "KUnit-compatible tests of KASAN bug detection capabilities" if !KUNIT_ALL_TESTS diff --git a/lib/decompress_unxz.c b/lib/decompress_unxz.c index 25d59a95bd6681465d9e57af06f77c3d641b0649..9004ab31aca2f8ad33f75d908317d8c5fea7a4b2 100644 --- a/lib/decompress_unxz.c +++ b/lib/decompress_unxz.c @@ -20,8 +20,8 @@ * * The worst case for in-place decompression is that the beginning of * the file is compressed extremely well, and the rest of the file is - * uncompressible. Thus, we must look for worst-case expansion when the - * compressor is encoding uncompressible data. + * incompressible. Thus, we must look for worst-case expansion when the + * compressor is encoding incompressible data. * * The structure of the .xz file in case of a compresed kernel is as follows. * Sizes (as bytes) of the fields are in parenthesis. @@ -58,7 +58,7 @@ * uncompressed size of the payload is in practice never less than the * payload size itself. The LZMA2 format would allow uncompressed size * to be less than the payload size, but no sane compressor creates such - * files. LZMA2 supports storing uncompressible data in uncompressed form, + * files. LZMA2 supports storing incompressible data in uncompressed form, * so there's never a need to create payloads whose uncompressed size is * smaller than the compressed size. * @@ -167,7 +167,7 @@ * memeq and memzero are not used much and any remotely sane implementation * is fast enough. memcpy/memmove speed matters in multi-call mode, but * the kernel image is decompressed in single-call mode, in which only - * memcpy speed can matter and only if there is a lot of uncompressible data + * memmove speed can matter and only if there is a lot of uncompressible data * (LZMA2 stores uncompressible chunks in uncompressed form). Thus, the * functions below should just be kept small; it's probably not worth * optimizing for speed. diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 537bfdc8cd095f724c86c7a0798bfd1a45121b13..1b0a349fbcd926b4e49e48728c545395e0ae6bf5 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -407,6 +407,7 @@ static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t by return 0; buf->ops = &page_cache_pipe_buf_ops; + buf->flags = 0; get_page(page); buf->page = page; buf->offset = offset; @@ -543,6 +544,7 @@ static size_t push_pipe(struct iov_iter *i, size_t size, break; buf->ops = &default_pipe_buf_ops; + buf->flags = 0; buf->page = page; buf->offset = 0; buf->len = min_t(ssize_t, left, PAGE_SIZE); @@ -1343,7 +1345,7 @@ ssize_t iov_iter_get_pages(struct iov_iter *i, res = get_user_pages_fast(addr, n, iov_iter_rw(i) != WRITE ? FOLL_WRITE : 0, pages); - if (unlikely(res < 0)) + if (unlikely(res <= 0)) return res; return (res == n ? len : res * PAGE_SIZE) - *start; 0;}),({ @@ -1424,8 +1426,9 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, return -ENOMEM; res = get_user_pages_fast(addr, n, iov_iter_rw(i) != WRITE ? FOLL_WRITE : 0, p); - if (unlikely(res < 0)) { + if (unlikely(res <= 0)) { kvfree(p); + *pages = NULL; return res; } *pages = p; diff --git a/lib/mpi/mpi-mod.c b/lib/mpi/mpi-mod.c index 47bc59edd4ff939f961389bda33ede24917f53ec..54fcc01564d9dc6f1a1ae5b58ad820e503f64fbe 100644 --- a/lib/mpi/mpi-mod.c +++ b/lib/mpi/mpi-mod.c @@ -40,6 +40,8 @@ mpi_barrett_t mpi_barrett_init(MPI m, int copy) mpi_normalize(m); ctx = kcalloc(1, sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return NULL; if (copy) { ctx->m = mpi_copy(m); diff --git a/lib/siphash.c b/lib/siphash.c index c47bb6ff21499780b125543eccc3d4406787bf5a..025f0cbf6d7a74133414079e35a9d2b9b178b690 100644 --- a/lib/siphash.c +++ b/lib/siphash.c @@ -49,6 +49,7 @@ SIPROUND; \ return (v0 ^ v1) ^ (v2 ^ v3); +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS u64 __siphash_aligned(const void *data, size_t len, const siphash_key_t *key) { const u8 *end = data + len - (len % sizeof(u64)); @@ -80,8 +81,8 @@ u64 __siphash_aligned(const void *data, size_t len, const siphash_key_t *key) POSTAMBLE } EXPORT_SYMBOL(__siphash_aligned); +#endif -#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS u64 __siphash_unaligned(const void *data, size_t len, const siphash_key_t *key) { const u8 *end = data + len - (len % sizeof(u64)); @@ -113,7 +114,6 @@ u64 __siphash_unaligned(const void *data, size_t len, const siphash_key_t *key) POSTAMBLE } EXPORT_SYMBOL(__siphash_unaligned); -#endif /** * siphash_1u64 - compute 64-bit siphash PRF value of a u64 @@ -250,6 +250,7 @@ EXPORT_SYMBOL(siphash_3u32); HSIPROUND; \ return (v0 ^ v1) ^ (v2 ^ v3); +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key) { const u8 *end = data + len - (len % sizeof(u64)); @@ -280,8 +281,8 @@ u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key) HPOSTAMBLE } EXPORT_SYMBOL(__hsiphash_aligned); +#endif -#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS u32 __hsiphash_unaligned(const void *data, size_t len, const hsiphash_key_t *key) { @@ -313,7 +314,6 @@ u32 __hsiphash_unaligned(const void *data, size_t len, HPOSTAMBLE } EXPORT_SYMBOL(__hsiphash_unaligned); -#endif /** * hsiphash_1u32 - compute 64-bit hsiphash PRF value of a u32 @@ -418,6 +418,7 @@ EXPORT_SYMBOL(hsiphash_4u32); HSIPROUND; \ return v1 ^ v3; +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key) { const u8 *end = data + len - (len % sizeof(u32)); @@ -438,8 +439,8 @@ u32 __hsiphash_aligned(const void *data, size_t len, const hsiphash_key_t *key) HPOSTAMBLE } EXPORT_SYMBOL(__hsiphash_aligned); +#endif -#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS u32 __hsiphash_unaligned(const void *data, size_t len, const hsiphash_key_t *key) { @@ -461,7 +462,6 @@ u32 __hsiphash_unaligned(const void *data, size_t len, HPOSTAMBLE } EXPORT_SYMBOL(__hsiphash_unaligned); -#endif /** * hsiphash_1u32 - compute 32-bit hsiphash PRF value of a u32 diff --git a/lib/stackdepot.c b/lib/stackdepot.c index df9179f4f441487332eb1fc090771d79044219a7..aa3f4c9faefeeda1abca511b903d9146e79a6823 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -102,8 +102,8 @@ static bool init_stack_slab(void **prealloc) } /* Allocation of a new stack in raw storage */ -static struct stack_record *depot_alloc_stack(unsigned long *entries, int size, - u32 hash, void **prealloc, gfp_t alloc_flags) +static struct stack_record * +depot_alloc_stack(unsigned long *entries, int size, u32 hash, void **prealloc) { struct stack_record *stack; size_t required_size = struct_size(stack, entries, size); @@ -248,17 +248,31 @@ unsigned int stack_depot_fetch(depot_stack_handle_t handle, EXPORT_SYMBOL_GPL(stack_depot_fetch); /** - * stack_depot_save - Save a stack trace from an array + * __stack_depot_save - Save a stack trace from an array * * @entries: Pointer to storage array * @nr_entries: Size of the storage array * @alloc_flags: Allocation gfp flags + * @can_alloc: Allocate stack slabs (increased chance of failure if false) + * + * Saves a stack trace from @entries array of size @nr_entries. If @can_alloc is + * %true, is allowed to replenish the stack slab pool in case no space is left + * (allocates using GFP flags of @alloc_flags). If @can_alloc is %false, avoids + * any allocations and will fail if no space is left to store the stack trace. + * + * If the stack trace in @entries is from an interrupt, only the portion up to + * interrupt entry is saved. + * + * Context: Any context, but setting @can_alloc to %false is required if + * alloc_pages() cannot be used from the current context. Currently + * this is the case from contexts where neither %GFP_ATOMIC nor + * %GFP_NOWAIT can be used (NMI, raw_spin_lock). * - * Return: The handle of the stack struct stored in depot + * Return: The handle of the stack struct stored in depot, 0 on failure. */ -depot_stack_handle_t stack_depot_save(unsigned long *entries, - unsigned int nr_entries, - gfp_t alloc_flags) +depot_stack_handle_t __stack_depot_save(unsigned long *entries, + unsigned int nr_entries, + gfp_t alloc_flags, bool can_alloc) { struct stack_record *found = NULL, **bucket; depot_stack_handle_t retval = 0; @@ -267,6 +281,16 @@ depot_stack_handle_t stack_depot_save(unsigned long *entries, unsigned long flags; u32 hash; + /* + * If this stack trace is from an interrupt, including anything before + * interrupt entry usually leads to unbounded stackdepot growth. + * + * Because use of filter_irq_stacks() is a requirement to ensure + * stackdepot can efficiently deduplicate interrupt stacks, always + * filter_irq_stacks() to simplify all callers' use of stackdepot. + */ + nr_entries = filter_irq_stacks(entries, nr_entries); + if (unlikely(nr_entries == 0) || stack_depot_disable) goto fast_exit; @@ -291,7 +315,7 @@ depot_stack_handle_t stack_depot_save(unsigned long *entries, * The smp_load_acquire() here pairs with smp_store_release() to * |next_slab_inited| in depot_alloc_stack() and init_stack_slab(). */ - if (unlikely(!smp_load_acquire(&next_slab_inited))) { + if (unlikely(can_alloc && !smp_load_acquire(&next_slab_inited))) { /* * Zero out zone modifiers, as we don't have specific zone * requirements. Keep the flags related to allocation in atomic @@ -309,9 +333,8 @@ depot_stack_handle_t stack_depot_save(unsigned long *entries, found = find_stack(*bucket, entries, nr_entries, hash); if (!found) { - struct stack_record *new = - depot_alloc_stack(entries, nr_entries, - hash, &prealloc, alloc_flags); + struct stack_record *new = depot_alloc_stack(entries, nr_entries, hash, &prealloc); + if (new) { new->next = *bucket; /* @@ -340,6 +363,26 @@ exit: fast_exit: return retval; } +EXPORT_SYMBOL_GPL(__stack_depot_save); + +/** + * stack_depot_save - Save a stack trace from an array + * + * @entries: Pointer to storage array + * @nr_entries: Size of the storage array + * @alloc_flags: Allocation gfp flags + * + * Context: Contexts where allocations via alloc_pages() are allowed. + * See __stack_depot_save() for more details. + * + * Return: The handle of the stack struct stored in depot, 0 on failure. + */ +depot_stack_handle_t stack_depot_save(unsigned long *entries, + unsigned int nr_entries, + gfp_t alloc_flags) +{ + return __stack_depot_save(entries, nr_entries, alloc_flags, true); +} EXPORT_SYMBOL_GPL(stack_depot_save); static inline int in_irqentry_text(unsigned long ptr) diff --git a/lib/test_hmm.c b/lib/test_hmm.c index 80a78877bd939714ab56d9a155b1e490e915548f..a85613068d6019215e2ee4c262a8873589e168b8 100644 --- a/lib/test_hmm.c +++ b/lib/test_hmm.c @@ -965,9 +965,33 @@ static long dmirror_fops_unlocked_ioctl(struct file *filp, return 0; } +static int dmirror_fops_mmap(struct file *file, struct vm_area_struct *vma) +{ + unsigned long addr; + + for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) { + struct page *page; + int ret; + + page = alloc_page(GFP_KERNEL | __GFP_ZERO); + if (!page) + return -ENOMEM; + + ret = vm_insert_page(vma, addr, page); + if (ret) { + __free_page(page); + return ret; + } + put_page(page); + } + + return 0; +} + static const struct file_operations dmirror_fops = { .open = dmirror_fops_open, .release = dmirror_fops_release, + .mmap = dmirror_fops_mmap, .unlocked_ioctl = dmirror_fops_unlocked_ioctl, .llseek = default_llseek, .owner = THIS_MODULE, diff --git a/lib/test_kasan.c b/lib/test_kasan.c index 87170a68f7e769a654683b2f7b9fad7d6243e3e5..4de52eb5b93b6c911edbb75c7be9c09fdb360a93 100644 --- a/lib/test_kasan.c +++ b/lib/test_kasan.c @@ -19,6 +19,7 @@ #include #include #include +#include #include @@ -36,7 +37,7 @@ void *kasan_ptr_result; int kasan_int_result; static struct kunit_resource resource; -static struct kunit_kasan_expectation fail_data; +static struct kunit_kasan_status test_status; static bool multishot; /* @@ -53,58 +54,63 @@ static int kasan_test_init(struct kunit *test) } multishot = kasan_save_enable_multi_shot(); - kasan_set_tagging_report_once(false); + test_status.report_found = false; + test_status.sync_fault = false; + kunit_add_named_resource(test, NULL, NULL, &resource, + "kasan_status", &test_status); return 0; } static void kasan_test_exit(struct kunit *test) { - kasan_set_tagging_report_once(true); kasan_restore_multi_shot(multishot); + KUNIT_EXPECT_FALSE(test, test_status.report_found); } /** * KUNIT_EXPECT_KASAN_FAIL() - check that the executed expression produces a * KASAN report; causes a test failure otherwise. This relies on a KUnit - * resource named "kasan_data". Do not use this name for KUnit resources + * resource named "kasan_status". Do not use this name for KUnit resources * outside of KASAN tests. * - * For hardware tag-based KASAN in sync mode, when a tag fault happens, tag + * For hardware tag-based KASAN, when a synchronous tag fault happens, tag * checking is auto-disabled. When this happens, this test handler reenables * tag checking. As tag checking can be only disabled or enabled per CPU, * this handler disables migration (preemption). * - * Since the compiler doesn't see that the expression can change the fail_data + * Since the compiler doesn't see that the expression can change the test_status * fields, it can reorder or optimize away the accesses to those fields. * Use READ/WRITE_ONCE() for the accesses and compiler barriers around the * expression to prevent that. + * + * In between KUNIT_EXPECT_KASAN_FAIL checks, test_status.report_found is kept + * as false. This allows detecting KASAN reports that happen outside of the + * checks by asserting !test_status.report_found at the start of + * KUNIT_EXPECT_KASAN_FAIL and in kasan_test_exit. */ -#define KUNIT_EXPECT_KASAN_FAIL(test, expression) do { \ - if (IS_ENABLED(CONFIG_KASAN_HW_TAGS) && \ - !kasan_async_mode_enabled()) \ - migrate_disable(); \ - WRITE_ONCE(fail_data.report_expected, true); \ - WRITE_ONCE(fail_data.report_found, false); \ - kunit_add_named_resource(test, \ - NULL, \ - NULL, \ - &resource, \ - "kasan_data", &fail_data); \ - barrier(); \ - expression; \ - barrier(); \ - if (kasan_async_mode_enabled()) \ - kasan_force_async_fault(); \ - barrier(); \ - KUNIT_EXPECT_EQ(test, \ - READ_ONCE(fail_data.report_expected), \ - READ_ONCE(fail_data.report_found)); \ - if (IS_ENABLED(CONFIG_KASAN_HW_TAGS) && \ - !kasan_async_mode_enabled()) { \ - if (READ_ONCE(fail_data.report_found)) \ - kasan_enable_tagging_sync(); \ - migrate_enable(); \ - } \ +#define KUNIT_EXPECT_KASAN_FAIL(test, expression) do { \ + if (IS_ENABLED(CONFIG_KASAN_HW_TAGS) && \ + kasan_sync_fault_possible()) \ + migrate_disable(); \ + KUNIT_EXPECT_FALSE(test, READ_ONCE(test_status.report_found)); \ + barrier(); \ + expression; \ + barrier(); \ + if (kasan_async_fault_possible()) \ + kasan_force_async_fault(); \ + if (!READ_ONCE(test_status.report_found)) { \ + KUNIT_FAIL(test, KUNIT_SUBTEST_INDENT "KASAN failure " \ + "expected in \"" #expression \ + "\", but none occurred"); \ + } \ + if (IS_ENABLED(CONFIG_KASAN_HW_TAGS) && \ + kasan_sync_fault_possible()) { \ + if (READ_ONCE(test_status.report_found) && \ + READ_ONCE(test_status.sync_fault)) \ + kasan_enable_tagging(); \ + migrate_enable(); \ + } \ + WRITE_ONCE(test_status.report_found, false); \ } while (0) #define KASAN_TEST_NEEDS_CONFIG_ON(test, config) do { \ @@ -124,12 +130,28 @@ static void kasan_test_exit(struct kunit *test) static void kmalloc_oob_right(struct kunit *test) { char *ptr; - size_t size = 123; + size_t size = 128 - KASAN_GRANULE_SIZE - 5; ptr = kmalloc(size, GFP_KERNEL); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); - KUNIT_EXPECT_KASAN_FAIL(test, ptr[size + OOB_TAG_OFF] = 'x'); + /* + * An unaligned access past the requested kmalloc size. + * Only generic KASAN can precisely detect these. + */ + if (IS_ENABLED(CONFIG_KASAN_GENERIC)) + KUNIT_EXPECT_KASAN_FAIL(test, ptr[size] = 'x'); + + /* + * An aligned access into the first out-of-bounds granule that falls + * within the aligned kmalloc object. + */ + KUNIT_EXPECT_KASAN_FAIL(test, ptr[size + 5] = 'y'); + + /* Out-of-bounds access past the aligned kmalloc object. */ + KUNIT_EXPECT_KASAN_FAIL(test, ptr[0] = + ptr[size + KASAN_GRANULE_SIZE + 5]); + kfree(ptr); } @@ -153,7 +175,7 @@ static void kmalloc_node_oob_right(struct kunit *test) ptr = kmalloc_node(size, GFP_KERNEL, 0); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); - KUNIT_EXPECT_KASAN_FAIL(test, ptr[size] = 0); + KUNIT_EXPECT_KASAN_FAIL(test, ptr[0] = ptr[size]); kfree(ptr); } @@ -189,7 +211,7 @@ static void kmalloc_pagealloc_uaf(struct kunit *test) KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); kfree(ptr); - KUNIT_EXPECT_KASAN_FAIL(test, ptr[0] = 0); + KUNIT_EXPECT_KASAN_FAIL(test, ((volatile char *)ptr)[0]); } static void kmalloc_pagealloc_invalid_free(struct kunit *test) @@ -223,7 +245,7 @@ static void pagealloc_oob_right(struct kunit *test) ptr = page_address(pages); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); - KUNIT_EXPECT_KASAN_FAIL(test, ptr[size] = 0); + KUNIT_EXPECT_KASAN_FAIL(test, ptr[0] = ptr[size]); free_pages((unsigned long)ptr, order); } @@ -238,7 +260,7 @@ static void pagealloc_uaf(struct kunit *test) KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); free_pages((unsigned long)ptr, order); - KUNIT_EXPECT_KASAN_FAIL(test, ptr[0] = 0); + KUNIT_EXPECT_KASAN_FAIL(test, ((volatile char *)ptr)[0]); } static void kmalloc_large_oob_right(struct kunit *test) @@ -414,78 +436,114 @@ static void kmalloc_uaf_16(struct kunit *test) kfree(ptr1); } +/* + * Note: in the memset tests below, the written range touches both valid and + * invalid memory. This makes sure that the instrumentation does not only check + * the starting address but the whole range. + */ + static void kmalloc_oob_memset_2(struct kunit *test) { char *ptr; - size_t size = 8; + size_t size = 128 - KASAN_GRANULE_SIZE; ptr = kmalloc(size, GFP_KERNEL); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); - KUNIT_EXPECT_KASAN_FAIL(test, memset(ptr + 7 + OOB_TAG_OFF, 0, 2)); + OPTIMIZER_HIDE_VAR(size); + KUNIT_EXPECT_KASAN_FAIL(test, memset(ptr + size - 1, 0, 2)); kfree(ptr); } static void kmalloc_oob_memset_4(struct kunit *test) { char *ptr; - size_t size = 8; + size_t size = 128 - KASAN_GRANULE_SIZE; ptr = kmalloc(size, GFP_KERNEL); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); - KUNIT_EXPECT_KASAN_FAIL(test, memset(ptr + 5 + OOB_TAG_OFF, 0, 4)); + OPTIMIZER_HIDE_VAR(size); + KUNIT_EXPECT_KASAN_FAIL(test, memset(ptr + size - 3, 0, 4)); kfree(ptr); } - static void kmalloc_oob_memset_8(struct kunit *test) { char *ptr; - size_t size = 8; + size_t size = 128 - KASAN_GRANULE_SIZE; ptr = kmalloc(size, GFP_KERNEL); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); - KUNIT_EXPECT_KASAN_FAIL(test, memset(ptr + 1 + OOB_TAG_OFF, 0, 8)); + OPTIMIZER_HIDE_VAR(size); + KUNIT_EXPECT_KASAN_FAIL(test, memset(ptr + size - 7, 0, 8)); kfree(ptr); } static void kmalloc_oob_memset_16(struct kunit *test) { char *ptr; - size_t size = 16; + size_t size = 128 - KASAN_GRANULE_SIZE; ptr = kmalloc(size, GFP_KERNEL); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); - KUNIT_EXPECT_KASAN_FAIL(test, memset(ptr + 1 + OOB_TAG_OFF, 0, 16)); + OPTIMIZER_HIDE_VAR(size); + KUNIT_EXPECT_KASAN_FAIL(test, memset(ptr + size - 15, 0, 16)); kfree(ptr); } static void kmalloc_oob_in_memset(struct kunit *test) { char *ptr; - size_t size = 666; + size_t size = 128 - KASAN_GRANULE_SIZE; ptr = kmalloc(size, GFP_KERNEL); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); - KUNIT_EXPECT_KASAN_FAIL(test, memset(ptr, 0, size + 5 + OOB_TAG_OFF)); + OPTIMIZER_HIDE_VAR(ptr); + OPTIMIZER_HIDE_VAR(size); + KUNIT_EXPECT_KASAN_FAIL(test, + memset(ptr, 0, size + KASAN_GRANULE_SIZE)); kfree(ptr); } -static void kmalloc_memmove_invalid_size(struct kunit *test) +static void kmalloc_memmove_negative_size(struct kunit *test) { char *ptr; size_t size = 64; - volatile size_t invalid_size = -2; + size_t invalid_size = -2; + + /* + * Hardware tag-based mode doesn't check memmove for negative size. + * As a result, this test introduces a side-effect memory corruption, + * which can result in a crash. + */ + KASAN_TEST_NEEDS_CONFIG_OFF(test, CONFIG_KASAN_HW_TAGS); ptr = kmalloc(size, GFP_KERNEL); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); memset((char *)ptr, 0, 64); + OPTIMIZER_HIDE_VAR(ptr); + OPTIMIZER_HIDE_VAR(invalid_size); + KUNIT_EXPECT_KASAN_FAIL(test, + memmove((char *)ptr, (char *)ptr + 4, invalid_size)); + kfree(ptr); +} + +static void kmalloc_memmove_invalid_size(struct kunit *test) +{ + char *ptr; + size_t size = 64; + volatile size_t invalid_size = size; + ptr = kmalloc(size, GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); + + memset((char *)ptr, 0, 64); + OPTIMIZER_HIDE_VAR(ptr); KUNIT_EXPECT_KASAN_FAIL(test, memmove((char *)ptr, (char *)ptr + 4, invalid_size)); kfree(ptr); @@ -500,7 +558,7 @@ static void kmalloc_uaf(struct kunit *test) KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); kfree(ptr); - KUNIT_EXPECT_KASAN_FAIL(test, *(ptr + 8) = 'x'); + KUNIT_EXPECT_KASAN_FAIL(test, ((volatile char *)ptr)[8]); } static void kmalloc_uaf_memset(struct kunit *test) @@ -508,6 +566,12 @@ static void kmalloc_uaf_memset(struct kunit *test) char *ptr; size_t size = 33; + /* + * Only generic KASAN uses quarantine, which is required to avoid a + * kernel memory corruption this test causes. + */ + KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_GENERIC); + ptr = kmalloc(size, GFP_KERNEL); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); @@ -539,7 +603,7 @@ again: goto again; } - KUNIT_EXPECT_KASAN_FAIL(test, ptr1[40] = 'x'); + KUNIT_EXPECT_KASAN_FAIL(test, ((volatile char *)ptr1)[40]); KUNIT_EXPECT_PTR_NE(test, ptr1, ptr2); kfree(ptr2); @@ -649,7 +713,7 @@ static void kmem_cache_bulk(struct kunit *test) static char global_array[10]; -static void kasan_global_oob(struct kunit *test) +static void kasan_global_oob_right(struct kunit *test) { /* * Deliberate out-of-bounds access. To prevent CONFIG_UBSAN_LOCAL_BOUNDS @@ -672,6 +736,20 @@ static void kasan_global_oob(struct kunit *test) KUNIT_EXPECT_KASAN_FAIL(test, *(volatile char *)p); } +static void kasan_global_oob_left(struct kunit *test) +{ + char *volatile array = global_array; + char *p = array - 3; + + /* + * GCC is known to fail this test, skip it. + * See https://bugzilla.kernel.org/show_bug.cgi?id=215051. + */ + KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_CC_IS_CLANG); + KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_GENERIC); + KUNIT_EXPECT_KASAN_FAIL(test, *(volatile char *)p); +} + /* Check that ksize() makes the whole object accessible. */ static void ksize_unpoisons_memory(struct kunit *test) { @@ -686,7 +764,7 @@ static void ksize_unpoisons_memory(struct kunit *test) ptr[size] = 'x'; /* This one must. */ - KUNIT_EXPECT_KASAN_FAIL(test, ptr[real_size] = 'y'); + KUNIT_EXPECT_KASAN_FAIL(test, ((volatile char *)ptr)[real_size]); kfree(ptr); } @@ -705,8 +783,8 @@ static void ksize_uaf(struct kunit *test) kfree(ptr); KUNIT_EXPECT_KASAN_FAIL(test, ksize(ptr)); - KUNIT_EXPECT_KASAN_FAIL(test, kasan_int_result = *ptr); - KUNIT_EXPECT_KASAN_FAIL(test, kasan_int_result = *(ptr + size)); + KUNIT_EXPECT_KASAN_FAIL(test, ((volatile char *)ptr)[0]); + KUNIT_EXPECT_KASAN_FAIL(test, ((volatile char *)ptr)[size]); } static void kasan_stack_oob(struct kunit *test) @@ -801,6 +879,16 @@ static void kmem_cache_invalid_free(struct kunit *test) kmem_cache_destroy(cache); } +static void kmem_cache_double_destroy(struct kunit *test) +{ + struct kmem_cache *cache; + + cache = kmem_cache_create("test_cache", 200, 0, 0, NULL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, cache); + kmem_cache_destroy(cache); + KUNIT_EXPECT_KASAN_FAIL(test, kmem_cache_destroy(cache)); +} + static void kasan_memchr(struct kunit *test) { char *ptr; @@ -818,6 +906,8 @@ static void kasan_memchr(struct kunit *test) ptr = kmalloc(size, GFP_KERNEL | __GFP_ZERO); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); + OPTIMIZER_HIDE_VAR(ptr); + OPTIMIZER_HIDE_VAR(size); KUNIT_EXPECT_KASAN_FAIL(test, kasan_ptr_result = memchr(ptr, '1', size + 1)); @@ -843,6 +933,8 @@ static void kasan_memcmp(struct kunit *test) KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); memset(arr, 0, sizeof(arr)); + OPTIMIZER_HIDE_VAR(ptr); + OPTIMIZER_HIDE_VAR(size); KUNIT_EXPECT_KASAN_FAIL(test, kasan_int_result = memcmp(ptr, arr, size+1)); kfree(ptr); @@ -972,21 +1064,186 @@ static void kmalloc_double_kzfree(struct kunit *test) KUNIT_EXPECT_KASAN_FAIL(test, kfree_sensitive(ptr)); } +static void vmalloc_helpers_tags(struct kunit *test) +{ + void *ptr; + + /* This test is intended for tag-based modes. */ + KASAN_TEST_NEEDS_CONFIG_OFF(test, CONFIG_KASAN_GENERIC); + + KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_VMALLOC); + + ptr = vmalloc(PAGE_SIZE); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); + + /* Check that the returned pointer is tagged. */ + KUNIT_EXPECT_GE(test, (u8)get_tag(ptr), (u8)KASAN_TAG_MIN); + KUNIT_EXPECT_LT(test, (u8)get_tag(ptr), (u8)KASAN_TAG_KERNEL); + + /* Make sure exported vmalloc helpers handle tagged pointers. */ + KUNIT_ASSERT_TRUE(test, is_vmalloc_addr(ptr)); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, vmalloc_to_page(ptr)); + +#if !IS_MODULE(CONFIG_KASAN_KUNIT_TEST) + { + int rv; + + /* Make sure vmalloc'ed memory permissions can be changed. */ + rv = set_memory_ro((unsigned long)ptr, 1); + KUNIT_ASSERT_GE(test, rv, 0); + rv = set_memory_rw((unsigned long)ptr, 1); + KUNIT_ASSERT_GE(test, rv, 0); + } +#endif + + vfree(ptr); +} + static void vmalloc_oob(struct kunit *test) { - void *area; + char *v_ptr, *p_ptr; + struct page *page; + size_t size = PAGE_SIZE / 2 - KASAN_GRANULE_SIZE - 5; KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_VMALLOC); + v_ptr = vmalloc(size); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, v_ptr); + + OPTIMIZER_HIDE_VAR(v_ptr); + /* - * We have to be careful not to hit the guard page. + * We have to be careful not to hit the guard page in vmalloc tests. * The MMU will catch that and crash us. */ - area = vmalloc(3000); - KUNIT_ASSERT_NOT_ERR_OR_NULL(test, area); - KUNIT_EXPECT_KASAN_FAIL(test, ((volatile char *)area)[3100]); - vfree(area); + /* Make sure in-bounds accesses are valid. */ + v_ptr[0] = 0; + v_ptr[size - 1] = 0; + + /* + * An unaligned access past the requested vmalloc size. + * Only generic KASAN can precisely detect these. + */ + if (IS_ENABLED(CONFIG_KASAN_GENERIC)) + KUNIT_EXPECT_KASAN_FAIL(test, ((volatile char *)v_ptr)[size]); + + /* An aligned access into the first out-of-bounds granule. */ + KUNIT_EXPECT_KASAN_FAIL(test, ((volatile char *)v_ptr)[size + 5]); + + /* Check that in-bounds accesses to the physical page are valid. */ + page = vmalloc_to_page(v_ptr); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, page); + p_ptr = page_address(page); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, p_ptr); + p_ptr[0] = 0; + + vfree(v_ptr); + + /* + * We can't check for use-after-unmap bugs in this nor in the following + * vmalloc tests, as the page might be fully unmapped and accessing it + * will crash the kernel. + */ +} + +static void vmap_tags(struct kunit *test) +{ + char *p_ptr, *v_ptr; + struct page *p_page, *v_page; + + /* + * This test is specifically crafted for the software tag-based mode, + * the only tag-based mode that poisons vmap mappings. + */ + KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_SW_TAGS); + + KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_VMALLOC); + + p_page = alloc_pages(GFP_KERNEL, 1); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, p_page); + p_ptr = page_address(p_page); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, p_ptr); + + v_ptr = vmap(&p_page, 1, VM_MAP, PAGE_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, v_ptr); + + /* + * We can't check for out-of-bounds bugs in this nor in the following + * vmalloc tests, as allocations have page granularity and accessing + * the guard page will crash the kernel. + */ + + KUNIT_EXPECT_GE(test, (u8)get_tag(v_ptr), (u8)KASAN_TAG_MIN); + KUNIT_EXPECT_LT(test, (u8)get_tag(v_ptr), (u8)KASAN_TAG_KERNEL); + + /* Make sure that in-bounds accesses through both pointers work. */ + *p_ptr = 0; + *v_ptr = 0; + + /* Make sure vmalloc_to_page() correctly recovers the page pointer. */ + v_page = vmalloc_to_page(v_ptr); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, v_page); + KUNIT_EXPECT_PTR_EQ(test, p_page, v_page); + + vunmap(v_ptr); + free_pages((unsigned long)p_ptr, 1); +} + +static void vm_map_ram_tags(struct kunit *test) +{ + char *p_ptr, *v_ptr; + struct page *page; + + /* + * This test is specifically crafted for the software tag-based mode, + * the only tag-based mode that poisons vm_map_ram mappings. + */ + KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_SW_TAGS); + + page = alloc_pages(GFP_KERNEL, 1); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, page); + p_ptr = page_address(page); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, p_ptr); + + v_ptr = vm_map_ram(&page, 1, -1); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, v_ptr); + + KUNIT_EXPECT_GE(test, (u8)get_tag(v_ptr), (u8)KASAN_TAG_MIN); + KUNIT_EXPECT_LT(test, (u8)get_tag(v_ptr), (u8)KASAN_TAG_KERNEL); + + /* Make sure that in-bounds accesses through both pointers work. */ + *p_ptr = 0; + *v_ptr = 0; + + vm_unmap_ram(v_ptr, 1); + free_pages((unsigned long)p_ptr, 1); +} + +static void vmalloc_percpu(struct kunit *test) +{ + char __percpu *ptr; + int cpu; + + /* + * This test is specifically crafted for the software tag-based mode, + * the only tag-based mode that poisons percpu mappings. + */ + KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_SW_TAGS); + + ptr = __alloc_percpu(PAGE_SIZE, PAGE_SIZE); + + for_each_possible_cpu(cpu) { + char *c_ptr = per_cpu_ptr(ptr, cpu); + + KUNIT_EXPECT_GE(test, (u8)get_tag(c_ptr), (u8)KASAN_TAG_MIN); + KUNIT_EXPECT_LT(test, (u8)get_tag(c_ptr), (u8)KASAN_TAG_KERNEL); + + /* Make sure that in-bounds accesses don't crash the kernel. */ + *c_ptr = 0; + } + + free_percpu(ptr); } /* @@ -1020,6 +1277,18 @@ static void match_all_not_assigned(struct kunit *test) KUNIT_EXPECT_LT(test, (u8)get_tag(ptr), (u8)KASAN_TAG_KERNEL); free_pages((unsigned long)ptr, order); } + + if (!IS_ENABLED(CONFIG_KASAN_VMALLOC)) + return; + + for (i = 0; i < 256; i++) { + size = (get_random_int() % 1024) + 1; + ptr = vmalloc(size); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); + KUNIT_EXPECT_GE(test, (u8)get_tag(ptr), (u8)KASAN_TAG_MIN); + KUNIT_EXPECT_LT(test, (u8)get_tag(ptr), (u8)KASAN_TAG_KERNEL); + vfree(ptr); + } } /* Check that 0xff works as a match-all pointer tag for tag-based modes. */ @@ -1099,6 +1368,7 @@ static struct kunit_case kasan_kunit_test_cases[] = { KUNIT_CASE(kmalloc_oob_memset_4), KUNIT_CASE(kmalloc_oob_memset_8), KUNIT_CASE(kmalloc_oob_memset_16), + KUNIT_CASE(kmalloc_memmove_negative_size), KUNIT_CASE(kmalloc_memmove_invalid_size), KUNIT_CASE(kmalloc_uaf), KUNIT_CASE(kmalloc_uaf_memset), @@ -1108,7 +1378,8 @@ static struct kunit_case kasan_kunit_test_cases[] = { KUNIT_CASE(kmem_cache_oob), KUNIT_CASE(kmem_cache_accounted), KUNIT_CASE(kmem_cache_bulk), - KUNIT_CASE(kasan_global_oob), + KUNIT_CASE(kasan_global_oob_right), + KUNIT_CASE(kasan_global_oob_left), KUNIT_CASE(kasan_stack_oob), KUNIT_CASE(kasan_alloca_oob_left), KUNIT_CASE(kasan_alloca_oob_right), @@ -1116,13 +1387,18 @@ static struct kunit_case kasan_kunit_test_cases[] = { KUNIT_CASE(ksize_uaf), KUNIT_CASE(kmem_cache_double_free), KUNIT_CASE(kmem_cache_invalid_free), + KUNIT_CASE(kmem_cache_double_destroy), KUNIT_CASE(kasan_memchr), KUNIT_CASE(kasan_memcmp), KUNIT_CASE(kasan_strings), KUNIT_CASE(kasan_bitops_generic), KUNIT_CASE(kasan_bitops_tags), KUNIT_CASE(kmalloc_double_kzfree), + KUNIT_CASE(vmalloc_helpers_tags), KUNIT_CASE(vmalloc_oob), + KUNIT_CASE(vmap_tags), + KUNIT_CASE(vm_map_ram_tags), + KUNIT_CASE(vmalloc_percpu), KUNIT_CASE(match_all_not_assigned), KUNIT_CASE(match_all_ptr_tag), KUNIT_CASE(match_all_mem_tag), diff --git a/lib/test_kasan_module.c b/lib/test_kasan_module.c index eee017ff8980529c1267347fabab0dd2e662df1f..b112cbc835e902357707c906f5095835d7ca6066 100644 --- a/lib/test_kasan_module.c +++ b/lib/test_kasan_module.c @@ -15,14 +15,12 @@ #include "../mm/kasan/kasan.h" -#define OOB_TAG_OFF (IS_ENABLED(CONFIG_KASAN_GENERIC) ? 0 : KASAN_GRANULE_SIZE) - static noinline void __init copy_user_test(void) { char *kmem; char __user *usermem; - size_t size = 10; - int unused; + size_t size = 128 - KASAN_GRANULE_SIZE; + int __maybe_unused unused; kmem = kmalloc(size, GFP_KERNEL); if (!kmem) @@ -37,26 +35,28 @@ static noinline void __init copy_user_test(void) return; } + OPTIMIZER_HIDE_VAR(size); + pr_info("out-of-bounds in copy_from_user()\n"); - unused = copy_from_user(kmem, usermem, size + 1 + OOB_TAG_OFF); + unused = copy_from_user(kmem, usermem, size + 1); pr_info("out-of-bounds in copy_to_user()\n"); - unused = copy_to_user(usermem, kmem, size + 1 + OOB_TAG_OFF); + unused = copy_to_user(usermem, kmem, size + 1); pr_info("out-of-bounds in __copy_from_user()\n"); - unused = __copy_from_user(kmem, usermem, size + 1 + OOB_TAG_OFF); + unused = __copy_from_user(kmem, usermem, size + 1); pr_info("out-of-bounds in __copy_to_user()\n"); - unused = __copy_to_user(usermem, kmem, size + 1 + OOB_TAG_OFF); + unused = __copy_to_user(usermem, kmem, size + 1); pr_info("out-of-bounds in __copy_from_user_inatomic()\n"); - unused = __copy_from_user_inatomic(kmem, usermem, size + 1 + OOB_TAG_OFF); + unused = __copy_from_user_inatomic(kmem, usermem, size + 1); pr_info("out-of-bounds in __copy_to_user_inatomic()\n"); - unused = __copy_to_user_inatomic(usermem, kmem, size + 1 + OOB_TAG_OFF); + unused = __copy_to_user_inatomic(usermem, kmem, size + 1); pr_info("out-of-bounds in strncpy_from_user()\n"); - unused = strncpy_from_user(kmem, usermem, size + 1 + OOB_TAG_OFF); + unused = strncpy_from_user(kmem, usermem, size + 1); vm_munmap((unsigned long)usermem, PAGE_SIZE); kfree(kmem); @@ -73,7 +73,7 @@ static noinline void __init kasan_rcu_reclaim(struct rcu_head *rp) struct kasan_rcu_info, rcu); kfree(fp); - fp->i = 1; + ((volatile struct kasan_rcu_info *)fp)->i; } static noinline void __init kasan_rcu_uaf(void) diff --git a/lib/test_lockup.c b/lib/test_lockup.c index f1a020bcc763ed593b583229b39e42f7028e439a..5359e2d1a2f24cf0584fe1ae498fd0f7167b3952 100644 --- a/lib/test_lockup.c +++ b/lib/test_lockup.c @@ -595,5 +595,6 @@ static int __init test_lockup_init(void) module_init(test_lockup_init); MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(VFS_internal_I_am_really_a_filesystem_and_am_NOT_a_driver); MODULE_AUTHOR("Konstantin Khlebnikov "); MODULE_DESCRIPTION("Test module to generate lockups"); diff --git a/lib/test_meminit.c b/lib/test_meminit.c index e4f706a404b3a1f45b3443b4f8de0fbdd15107f8..3ca717f1139774db6b64df470dee4938bba2d7b4 100644 --- a/lib/test_meminit.c +++ b/lib/test_meminit.c @@ -337,6 +337,7 @@ static int __init do_kmem_cache_size_bulk(int size, int *total_failures) if (num) kmem_cache_free_bulk(c, num, objects); } + kmem_cache_destroy(c); *total_failures += fail; return 1; } diff --git a/lib/xz/Kconfig b/lib/xz/Kconfig index 5cb50245a878cde27e9f972cb6c89110a0c109ef..adce22ac18d660b1b433e0495dfd1c0448a27b69 100644 --- a/lib/xz/Kconfig +++ b/lib/xz/Kconfig @@ -39,6 +39,19 @@ config XZ_DEC_SPARC default y select XZ_DEC_BCJ +config XZ_DEC_MICROLZMA + bool "MicroLZMA decoder" + default n + help + MicroLZMA is a header format variant where the first byte + of a raw LZMA stream (without the end of stream marker) has + been replaced with a bitwise-negation of the lc/lp/pb + properties byte. MicroLZMA was created to be used in EROFS + but can be used by other things too where wasting minimal + amount of space for headers is important. + + Unless you know that you need this, say N. + endif config XZ_DEC_BCJ diff --git a/lib/xz/xz_dec_lzma2.c b/lib/xz/xz_dec_lzma2.c index 65a1aad8c223b95bf442519c9aa12bfd665dccc3..614ec99f62a9f19326882e22b93d981a8da89016 100644 --- a/lib/xz/xz_dec_lzma2.c +++ b/lib/xz/xz_dec_lzma2.c @@ -248,6 +248,10 @@ struct lzma2_dec { * before the first LZMA chunk. */ bool need_props; + +#ifdef XZ_DEC_MICROLZMA + bool pedantic_microlzma; +#endif }; struct xz_dec_lzma2 { @@ -387,7 +391,14 @@ static void dict_uncompressed(struct dictionary *dict, struct xz_buf *b, *left -= copy_size; - memcpy(dict->buf + dict->pos, b->in + b->in_pos, copy_size); + /* + * If doing in-place decompression in single-call mode and the + * uncompressed size of the file is larger than the caller + * thought (i.e. it is invalid input!), the buffers below may + * overlap and cause undefined behavior with memcpy(). + * With valid inputs memcpy() would be fine here. + */ + memmove(dict->buf + dict->pos, b->in + b->in_pos, copy_size); dict->pos += copy_size; if (dict->full < dict->pos) @@ -397,7 +408,11 @@ static void dict_uncompressed(struct dictionary *dict, struct xz_buf *b, if (dict->pos == dict->end) dict->pos = 0; - memcpy(b->out + b->out_pos, b->in + b->in_pos, + /* + * Like above but for multi-call mode: use memmove() + * to avoid undefined behavior with invalid input. + */ + memmove(b->out + b->out_pos, b->in + b->in_pos, copy_size); } @@ -408,6 +423,12 @@ static void dict_uncompressed(struct dictionary *dict, struct xz_buf *b, } } +#ifdef XZ_DEC_MICROLZMA +# define DICT_FLUSH_SUPPORTS_SKIPPING true +#else +# define DICT_FLUSH_SUPPORTS_SKIPPING false +#endif + /* * Flush pending data from dictionary to b->out. It is assumed that there is * enough space in b->out. This is guaranteed because caller uses dict_limit() @@ -421,8 +442,19 @@ static uint32_t dict_flush(struct dictionary *dict, struct xz_buf *b) if (dict->pos == dict->end) dict->pos = 0; - memcpy(b->out + b->out_pos, dict->buf + dict->start, - copy_size); + /* + * These buffers cannot overlap even if doing in-place + * decompression because in multi-call mode dict->buf + * has been allocated by us in this file; it's not + * provided by the caller like in single-call mode. + * + * With MicroLZMA, b->out can be NULL to skip bytes that + * the caller doesn't need. This cannot be done with XZ + * because it would break BCJ filters. + */ + if (!DICT_FLUSH_SUPPORTS_SKIPPING || b->out != NULL) + memcpy(b->out + b->out_pos, dict->buf + dict->start, + copy_size); } dict->start = dict->pos; @@ -488,7 +520,7 @@ static __always_inline void rc_normalize(struct rc_dec *rc) * functions so that the compiler is supposed to be able to more easily avoid * an extra branch. In this particular version of the LZMA decoder, this * doesn't seem to be a good idea (tested with GCC 3.3.6, 3.4.6, and 4.3.3 - * on x86). Using a non-splitted version results in nicer looking code too. + * on x86). Using a non-split version results in nicer looking code too. * * NOTE: This must return an int. Do not make it return a bool or the speed * of the code generated by GCC 3.x decreases 10-15 %. (GCC 4.3 doesn't care, @@ -774,6 +806,7 @@ static void lzma_reset(struct xz_dec_lzma2 *s) s->lzma.rep1 = 0; s->lzma.rep2 = 0; s->lzma.rep3 = 0; + s->lzma.len = 0; /* * All probabilities are initialized to the same value. This hack @@ -1157,8 +1190,6 @@ XZ_EXTERN enum xz_ret xz_dec_lzma2_reset(struct xz_dec_lzma2 *s, uint8_t props) } } - s->lzma.len = 0; - s->lzma2.sequence = SEQ_CONTROL; s->lzma2.need_dict_reset = true; @@ -1174,3 +1205,140 @@ XZ_EXTERN void xz_dec_lzma2_end(struct xz_dec_lzma2 *s) kfree(s); } + +#ifdef XZ_DEC_MICROLZMA +/* This is a wrapper struct to have a nice struct name in the public API. */ +struct xz_dec_microlzma { + struct xz_dec_lzma2 s; +}; + +enum xz_ret xz_dec_microlzma_run(struct xz_dec_microlzma *s_ptr, + struct xz_buf *b) +{ + struct xz_dec_lzma2 *s = &s_ptr->s; + + /* + * sequence is SEQ_PROPERTIES before the first input byte, + * SEQ_LZMA_PREPARE until a total of five bytes have been read, + * and SEQ_LZMA_RUN for the rest of the input stream. + */ + if (s->lzma2.sequence != SEQ_LZMA_RUN) { + if (s->lzma2.sequence == SEQ_PROPERTIES) { + /* One byte is needed for the props. */ + if (b->in_pos >= b->in_size) + return XZ_OK; + + /* + * Don't increment b->in_pos here. The same byte is + * also passed to rc_read_init() which will ignore it. + */ + if (!lzma_props(s, ~b->in[b->in_pos])) + return XZ_DATA_ERROR; + + s->lzma2.sequence = SEQ_LZMA_PREPARE; + } + + /* + * xz_dec_microlzma_reset() doesn't validate the compressed + * size so we do it here. We have to limit the maximum size + * to avoid integer overflows in lzma2_lzma(). 3 GiB is a nice + * round number and much more than users of this code should + * ever need. + */ + if (s->lzma2.compressed < RC_INIT_BYTES + || s->lzma2.compressed > (3U << 30)) + return XZ_DATA_ERROR; + + if (!rc_read_init(&s->rc, b)) + return XZ_OK; + + s->lzma2.compressed -= RC_INIT_BYTES; + s->lzma2.sequence = SEQ_LZMA_RUN; + + dict_reset(&s->dict, b); + } + + /* This is to allow increasing b->out_size between calls. */ + if (DEC_IS_SINGLE(s->dict.mode)) + s->dict.end = b->out_size - b->out_pos; + + while (true) { + dict_limit(&s->dict, min_t(size_t, b->out_size - b->out_pos, + s->lzma2.uncompressed)); + + if (!lzma2_lzma(s, b)) + return XZ_DATA_ERROR; + + s->lzma2.uncompressed -= dict_flush(&s->dict, b); + + if (s->lzma2.uncompressed == 0) { + if (s->lzma2.pedantic_microlzma) { + if (s->lzma2.compressed > 0 || s->lzma.len > 0 + || !rc_is_finished(&s->rc)) + return XZ_DATA_ERROR; + } + + return XZ_STREAM_END; + } + + if (b->out_pos == b->out_size) + return XZ_OK; + + if (b->in_pos == b->in_size + && s->temp.size < s->lzma2.compressed) + return XZ_OK; + } +} + +struct xz_dec_microlzma *xz_dec_microlzma_alloc(enum xz_mode mode, + uint32_t dict_size) +{ + struct xz_dec_microlzma *s; + + /* Restrict dict_size to the same range as in the LZMA2 code. */ + if (dict_size < 4096 || dict_size > (3U << 30)) + return NULL; + + s = kmalloc(sizeof(*s), GFP_KERNEL); + if (s == NULL) + return NULL; + + s->s.dict.mode = mode; + s->s.dict.size = dict_size; + + if (DEC_IS_MULTI(mode)) { + s->s.dict.end = dict_size; + + s->s.dict.buf = vmalloc(dict_size); + if (s->s.dict.buf == NULL) { + kfree(s); + return NULL; + } + } + + return s; +} + +void xz_dec_microlzma_reset(struct xz_dec_microlzma *s, uint32_t comp_size, + uint32_t uncomp_size, int uncomp_size_is_exact) +{ + /* + * comp_size is validated in xz_dec_microlzma_run(). + * uncomp_size can safely be anything. + */ + s->s.lzma2.compressed = comp_size; + s->s.lzma2.uncompressed = uncomp_size; + s->s.lzma2.pedantic_microlzma = uncomp_size_is_exact; + + s->s.lzma2.sequence = SEQ_PROPERTIES; + s->s.temp.size = 0; +} + +void xz_dec_microlzma_end(struct xz_dec_microlzma *s) +{ + if (DEC_IS_MULTI(s->s.dict.mode)) + vfree(s->s.dict.buf); + + kfree(s); +} +#endif diff --git a/lib/xz/xz_dec_stream.c b/lib/xz/xz_dec_stream.c index 32ab2a08b7cbc9ff5a874d1ffe5cfd12d221e901..a30e3308035fa5b1163cb670a3447685e9812567 100644 --- a/lib/xz/xz_dec_stream.c +++ b/lib/xz/xz_dec_stream.c @@ -402,12 +402,12 @@ static enum xz_ret dec_stream_header(struct xz_dec *s) * we will accept other check types too, but then the check won't * be verified and a warning (XZ_UNSUPPORTED_CHECK) will be given. */ + if (s->temp.buf[HEADER_MAGIC_SIZE + 1] > XZ_CHECK_MAX) + return XZ_OPTIONS_ERROR; + s->check_type = s->temp.buf[HEADER_MAGIC_SIZE + 1]; #ifdef XZ_DEC_ANY_CHECK - if (s->check_type > XZ_CHECK_MAX) - return XZ_OPTIONS_ERROR; - if (s->check_type > XZ_CHECK_CRC32) return XZ_UNSUPPORTED_CHECK; #else diff --git a/lib/xz/xz_dec_syms.c b/lib/xz/xz_dec_syms.c index 32eb3c03aede9191bde909e9dfe07f23bea80118..61098c67a4138882873b17332b926a7b7fba8e47 100644 --- a/lib/xz/xz_dec_syms.c +++ b/lib/xz/xz_dec_syms.c @@ -15,8 +15,15 @@ EXPORT_SYMBOL(xz_dec_reset); EXPORT_SYMBOL(xz_dec_run); EXPORT_SYMBOL(xz_dec_end); +#ifdef CONFIG_XZ_DEC_MICROLZMA +EXPORT_SYMBOL(xz_dec_microlzma_alloc); +EXPORT_SYMBOL(xz_dec_microlzma_reset); +EXPORT_SYMBOL(xz_dec_microlzma_run); +EXPORT_SYMBOL(xz_dec_microlzma_end); +#endif + MODULE_DESCRIPTION("XZ decompressor"); -MODULE_VERSION("1.0"); +MODULE_VERSION("1.1"); MODULE_AUTHOR("Lasse Collin and Igor Pavlov"); /* diff --git a/lib/xz/xz_private.h b/lib/xz/xz_private.h index 09360ebb510ef10bbc465b388215322a677381d2..bf1e94ec7873cf3c12273e14f0d10d61446ca7ac 100644 --- a/lib/xz/xz_private.h +++ b/lib/xz/xz_private.h @@ -37,6 +37,9 @@ # ifdef CONFIG_XZ_DEC_SPARC # define XZ_DEC_SPARC # endif +# ifdef CONFIG_XZ_DEC_MICROLZMA +# define XZ_DEC_MICROLZMA +# endif # define memeq(a, b, size) (memcmp(a, b, size) == 0) # define memzero(buf, size) memset(buf, 0, size) # endif diff --git a/mm/Kconfig b/mm/Kconfig index 884ba3860ec88846884384691ef81b2a7d92815f..27cdf23dd46ed61d0c90e241be0bec0ff9a639ff 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -756,10 +756,18 @@ config DEFERRED_STRUCT_PAGE_INIT lifetime of the system until these kthreads finish the initialisation. +config PAGE_IDLE_FLAG + bool + select PAGE_EXTENSION if !64BIT + help + This adds PG_idle and PG_young flags to 'struct page'. PTE Accessed + bit writers can set the state of the bit in the flags so that PTE + Accessed bit readers may avoid disturbance. + config IDLE_PAGE_TRACKING bool "Enable idle page tracking" depends on SYSFS && MMU - select PAGE_EXTENSION if !64BIT + select PAGE_IDLE_FLAG help This feature allows to estimate the amount of user pages that have not been touched during a given period of time. This information can @@ -888,4 +896,51 @@ config ARCH_HAS_HUGEPD config MAPPING_DIRTY_HELPERS bool +# Some architectures want callbacks for all IO mappings in order to +# track the physical addresses that get used as devices. +config ARCH_HAS_IOREMAP_PHYS_HOOKS + bool + +config ANON_VMA_NAME + bool "Anonymous VMA name support" + depends on PROC_FS && ADVISE_SYSCALLS && MMU + + help + Allow naming anonymous virtual memory areas. + + This feature allows assigning names to virtual memory areas. Assigned + names can be later retrieved from /proc/pid/maps and /proc/pid/smaps + and help identifying individual anonymous memory areas. + Assigning a name to anonymous virtual memory area might prevent that + area from being merged with adjacent virtual memory areas due to the + difference in their name. + +# multi-gen LRU { +config LRU_GEN + bool "Multi-Gen LRU" + depends on MMU + # the following options can use up the spare bits in page flags + depends on !MAXSMP && (64BIT || !SPARSEMEM || SPARSEMEM_VMEMMAP) + help + A high performance LRU implementation to overcommit memory. See + Documentation/admin-guide/mm/multigen_lru.rst for details. + +config LRU_GEN_ENABLED + bool "Enable by default" + depends on LRU_GEN + help + This option enables the multi-gen LRU by default. + +config LRU_GEN_STATS + bool "Full stats for debugging" + depends on LRU_GEN + help + Do not enable this option unless you plan to look at historical stats + from evicted generations for debugging purpose. + + This option has a per-memcg and per-node memory overhead. +# } + +source "mm/damon/Kconfig" + endmenu diff --git a/mm/Makefile b/mm/Makefile index 8de8651da06955c10562b2a6b258a05b20244267..b90583a89fb5a865e124c6724061c387de7ea703 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -116,6 +116,7 @@ obj-$(CONFIG_USERFAULTFD) += userfaultfd.o obj-$(CONFIG_IDLE_PAGE_TRACKING) += page_idle.o obj-$(CONFIG_FRAME_VECTOR) += frame_vector.o obj-$(CONFIG_DEBUG_PAGE_REF) += debug_page_ref.o +obj-$(CONFIG_DAMON) += damon/ obj-$(CONFIG_HARDENED_USERCOPY) += usercopy.o obj-$(CONFIG_PERCPU_STATS) += percpu-stats.o obj-$(CONFIG_ZONE_DEVICE) += memremap.o diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 408d5051d05b3d281d4e539b961a16248bd30e4f..ca770a783a9f91de7479d9ffdc1a414fcbf86b7d 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -872,6 +872,13 @@ void bdi_unregister(struct backing_dev_info *bdi) wb_shutdown(&bdi->wb); cgwb_bdi_unregister(bdi); + /* + * If this BDI's min ratio has been set, use bdi_set_min_ratio() to + * update the global bdi_min_ratio. + */ + if (bdi->min_ratio) + bdi_set_min_ratio(bdi, 0); + if (bdi->dev) { bdi_debug_unregister(bdi); device_unregister(bdi->dev); diff --git a/mm/cma.c b/mm/cma.c index 47538607045ebc10c247fff8b220d9a5d8ff60bd..0b1d69a919709bd5945a39e4191eabf32982ad5d 100644 --- a/mm/cma.c +++ b/mm/cma.c @@ -54,6 +54,7 @@ unsigned long cma_get_size(const struct cma *cma) { return cma->count << PAGE_SHIFT; } +EXPORT_SYMBOL_GPL(cma_get_size); const char *cma_get_name(const struct cma *cma) { @@ -570,7 +571,7 @@ out: if (page) { count_vm_event(CMA_ALLOC_SUCCESS); cma_sysfs_account_success_pages(cma, count); - } else { + } else if (!(gfp_mask & __GFP_NORETRY)) { count_vm_event(CMA_ALLOC_FAIL); if (cma) cma_sysfs_account_fail_pages(cma, count); diff --git a/mm/compaction.c b/mm/compaction.c index c1a02788282b3464e7c0605cd47176ab6659ed3e..6650f5968f26119a4208ff94da5bac08bf57e0b1 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -1034,7 +1034,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, low_pfn += compound_nr(page) - 1; /* Successfully isolated */ - del_page_from_lru_list(page, lruvec, page_lru(page)); + del_page_from_lru_list(page, lruvec); mod_node_page_state(page_pgdat(page), NR_ISOLATED_ANON + page_is_file_lru(page), thp_nr_pages(page)); diff --git a/mm/damon/Kconfig b/mm/damon/Kconfig new file mode 100644 index 0000000000000000000000000000000000000000..5bcf05851ad078305362f897d958e2d3f3dd0167 --- /dev/null +++ b/mm/damon/Kconfig @@ -0,0 +1,88 @@ +# SPDX-License-Identifier: GPL-2.0-only + +menu "Data Access Monitoring" + +config DAMON + bool "DAMON: Data Access Monitoring Framework" + help + This builds a framework that allows kernel subsystems to monitor + access frequency of each memory region. The information can be useful + for performance-centric DRAM level memory management. + + See https://damonitor.github.io/doc/html/latest-damon/index.html for + more information. + +config DAMON_KUNIT_TEST + bool "Test for damon" if !KUNIT_ALL_TESTS + depends on DAMON && KUNIT=y + default KUNIT_ALL_TESTS + help + This builds the DAMON Kunit test suite. + + For more information on KUnit and unit tests in general, please refer + to the KUnit documentation. + + If unsure, say N. + +config DAMON_VADDR + bool "Data access monitoring primitives for virtual address spaces" + depends on DAMON && MMU + select PAGE_IDLE_FLAG + help + This builds the default data access monitoring primitives for DAMON + that work for virtual address spaces. + +config DAMON_PADDR + bool "Data access monitoring primitives for the physical address space" + depends on DAMON && MMU + select PAGE_IDLE_FLAG + help + This builds the default data access monitoring primitives for DAMON + that works for the physical address space. + +config DAMON_VADDR_KUNIT_TEST + bool "Test for DAMON primitives" if !KUNIT_ALL_TESTS + depends on DAMON_VADDR && KUNIT=y + default KUNIT_ALL_TESTS + help + This builds the DAMON virtual addresses primitives Kunit test suite. + + For more information on KUnit and unit tests in general, please refer + to the KUnit documentation. + + If unsure, say N. + +config DAMON_DBGFS + bool "DAMON debugfs interface" + depends on DAMON_VADDR && DAMON_PADDR && DEBUG_FS + help + This builds the debugfs interface for DAMON. The user space admins + can use the interface for arbitrary data access monitoring. + + If unsure, say N. + +config DAMON_DBGFS_KUNIT_TEST + bool "Test for damon debugfs interface" if !KUNIT_ALL_TESTS + depends on DAMON_DBGFS && KUNIT=y + default KUNIT_ALL_TESTS + help + This builds the DAMON debugfs interface Kunit test suite. + + For more information on KUnit and unit tests in general, please refer + to the KUnit documentation. + + If unsure, say N. + +config DAMON_RECLAIM + bool "Build DAMON-based reclaim (DAMON_RECLAIM)" + depends on DAMON_PADDR + help + This builds the DAMON-based reclamation subsystem. It finds pages + that not accessed for a long time (cold) using DAMON and reclaim + those. + + This is suggested to be used as a proactive and lightweight + reclamation under light memory pressure, while the traditional page + scanning-based reclamation is used for heavy pressure. + +endmenu diff --git a/mm/damon/Makefile b/mm/damon/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..f7d5ac377a2bb5551e9fe60882b961825d4f492f --- /dev/null +++ b/mm/damon/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0 + +obj-$(CONFIG_DAMON) := core.o +obj-$(CONFIG_DAMON_VADDR) += prmtv-common.o vaddr.o +obj-$(CONFIG_DAMON_PADDR) += prmtv-common.o paddr.o +obj-$(CONFIG_DAMON_DBGFS) += dbgfs.o +obj-$(CONFIG_DAMON_RECLAIM) += reclaim.o diff --git a/mm/damon/core-test.h b/mm/damon/core-test.h new file mode 100644 index 0000000000000000000000000000000000000000..7008c3735e99f1d486109aeb8d9b3ccfbf68b6fb --- /dev/null +++ b/mm/damon/core-test.h @@ -0,0 +1,253 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Data Access Monitor Unit Tests + * + * Copyright 2019 Amazon.com, Inc. or its affiliates. All rights reserved. + * + * Author: SeongJae Park + */ + +#ifdef CONFIG_DAMON_KUNIT_TEST + +#ifndef _DAMON_CORE_TEST_H +#define _DAMON_CORE_TEST_H + +#include + +static void damon_test_regions(struct kunit *test) +{ + struct damon_region *r; + struct damon_target *t; + + r = damon_new_region(1, 2); + KUNIT_EXPECT_EQ(test, 1ul, r->ar.start); + KUNIT_EXPECT_EQ(test, 2ul, r->ar.end); + KUNIT_EXPECT_EQ(test, 0u, r->nr_accesses); + + t = damon_new_target(42); + KUNIT_EXPECT_EQ(test, 0u, damon_nr_regions(t)); + + damon_add_region(r, t); + KUNIT_EXPECT_EQ(test, 1u, damon_nr_regions(t)); + + damon_del_region(r, t); + KUNIT_EXPECT_EQ(test, 0u, damon_nr_regions(t)); + + damon_free_target(t); +} + +static unsigned int nr_damon_targets(struct damon_ctx *ctx) +{ + struct damon_target *t; + unsigned int nr_targets = 0; + + damon_for_each_target(t, ctx) + nr_targets++; + + return nr_targets; +} + +static void damon_test_target(struct kunit *test) +{ + struct damon_ctx *c = damon_new_ctx(); + struct damon_target *t; + + t = damon_new_target(42); + KUNIT_EXPECT_EQ(test, 42ul, t->id); + KUNIT_EXPECT_EQ(test, 0u, nr_damon_targets(c)); + + damon_add_target(c, t); + KUNIT_EXPECT_EQ(test, 1u, nr_damon_targets(c)); + + damon_destroy_target(t); + KUNIT_EXPECT_EQ(test, 0u, nr_damon_targets(c)); + + damon_destroy_ctx(c); +} + +/* + * Test kdamond_reset_aggregated() + * + * DAMON checks access to each region and aggregates this information as the + * access frequency of each region. In detail, it increases '->nr_accesses' of + * regions that an access has confirmed. 'kdamond_reset_aggregated()' flushes + * the aggregated information ('->nr_accesses' of each regions) to the result + * buffer. As a result of the flushing, the '->nr_accesses' of regions are + * initialized to zero. + */ +static void damon_test_aggregate(struct kunit *test) +{ + struct damon_ctx *ctx = damon_new_ctx(); + unsigned long target_ids[] = {1, 2, 3}; + unsigned long saddr[][3] = {{10, 20, 30}, {5, 42, 49}, {13, 33, 55} }; + unsigned long eaddr[][3] = {{15, 27, 40}, {31, 45, 55}, {23, 44, 66} }; + unsigned long accesses[][3] = {{42, 95, 84}, {10, 20, 30}, {0, 1, 2} }; + struct damon_target *t; + struct damon_region *r; + int it, ir; + + damon_set_targets(ctx, target_ids, 3); + + it = 0; + damon_for_each_target(t, ctx) { + for (ir = 0; ir < 3; ir++) { + r = damon_new_region(saddr[it][ir], eaddr[it][ir]); + r->nr_accesses = accesses[it][ir]; + damon_add_region(r, t); + } + it++; + } + kdamond_reset_aggregated(ctx); + it = 0; + damon_for_each_target(t, ctx) { + ir = 0; + /* '->nr_accesses' should be zeroed */ + damon_for_each_region(r, t) { + KUNIT_EXPECT_EQ(test, 0u, r->nr_accesses); + ir++; + } + /* regions should be preserved */ + KUNIT_EXPECT_EQ(test, 3, ir); + it++; + } + /* targets also should be preserved */ + KUNIT_EXPECT_EQ(test, 3, it); + + damon_destroy_ctx(ctx); +} + +static void damon_test_split_at(struct kunit *test) +{ + struct damon_ctx *c = damon_new_ctx(); + struct damon_target *t; + struct damon_region *r; + + t = damon_new_target(42); + r = damon_new_region(0, 100); + damon_add_region(r, t); + damon_split_region_at(c, t, r, 25); + KUNIT_EXPECT_EQ(test, r->ar.start, 0ul); + KUNIT_EXPECT_EQ(test, r->ar.end, 25ul); + + r = damon_next_region(r); + KUNIT_EXPECT_EQ(test, r->ar.start, 25ul); + KUNIT_EXPECT_EQ(test, r->ar.end, 100ul); + + damon_free_target(t); + damon_destroy_ctx(c); +} + +static void damon_test_merge_two(struct kunit *test) +{ + struct damon_target *t; + struct damon_region *r, *r2, *r3; + int i; + + t = damon_new_target(42); + r = damon_new_region(0, 100); + r->nr_accesses = 10; + damon_add_region(r, t); + r2 = damon_new_region(100, 300); + r2->nr_accesses = 20; + damon_add_region(r2, t); + + damon_merge_two_regions(t, r, r2); + KUNIT_EXPECT_EQ(test, r->ar.start, 0ul); + KUNIT_EXPECT_EQ(test, r->ar.end, 300ul); + KUNIT_EXPECT_EQ(test, r->nr_accesses, 16u); + + i = 0; + damon_for_each_region(r3, t) { + KUNIT_EXPECT_PTR_EQ(test, r, r3); + i++; + } + KUNIT_EXPECT_EQ(test, i, 1); + + damon_free_target(t); +} + +static struct damon_region *__nth_region_of(struct damon_target *t, int idx) +{ + struct damon_region *r; + unsigned int i = 0; + + damon_for_each_region(r, t) { + if (i++ == idx) + return r; + } + + return NULL; +} + +static void damon_test_merge_regions_of(struct kunit *test) +{ + struct damon_target *t; + struct damon_region *r; + unsigned long sa[] = {0, 100, 114, 122, 130, 156, 170, 184}; + unsigned long ea[] = {100, 112, 122, 130, 156, 170, 184, 230}; + unsigned int nrs[] = {0, 0, 10, 10, 20, 30, 1, 2}; + + unsigned long saddrs[] = {0, 114, 130, 156, 170}; + unsigned long eaddrs[] = {112, 130, 156, 170, 230}; + int i; + + t = damon_new_target(42); + for (i = 0; i < ARRAY_SIZE(sa); i++) { + r = damon_new_region(sa[i], ea[i]); + r->nr_accesses = nrs[i]; + damon_add_region(r, t); + } + + damon_merge_regions_of(t, 9, 9999); + /* 0-112, 114-130, 130-156, 156-170 */ + KUNIT_EXPECT_EQ(test, damon_nr_regions(t), 5u); + for (i = 0; i < 5; i++) { + r = __nth_region_of(t, i); + KUNIT_EXPECT_EQ(test, r->ar.start, saddrs[i]); + KUNIT_EXPECT_EQ(test, r->ar.end, eaddrs[i]); + } + damon_free_target(t); +} + +static void damon_test_split_regions_of(struct kunit *test) +{ + struct damon_ctx *c = damon_new_ctx(); + struct damon_target *t; + struct damon_region *r; + + t = damon_new_target(42); + r = damon_new_region(0, 22); + damon_add_region(r, t); + damon_split_regions_of(c, t, 2); + KUNIT_EXPECT_LE(test, damon_nr_regions(t), 2u); + damon_free_target(t); + + t = damon_new_target(42); + r = damon_new_region(0, 220); + damon_add_region(r, t); + damon_split_regions_of(c, t, 4); + KUNIT_EXPECT_LE(test, damon_nr_regions(t), 4u); + damon_free_target(t); + damon_destroy_ctx(c); +} + +static struct kunit_case damon_test_cases[] = { + KUNIT_CASE(damon_test_target), + KUNIT_CASE(damon_test_regions), + KUNIT_CASE(damon_test_aggregate), + KUNIT_CASE(damon_test_split_at), + KUNIT_CASE(damon_test_merge_two), + KUNIT_CASE(damon_test_merge_regions_of), + KUNIT_CASE(damon_test_split_regions_of), + {}, +}; + +static struct kunit_suite damon_test_suite = { + .name = "damon", + .test_cases = damon_test_cases, +}; +kunit_test_suite(damon_test_suite); + +#endif /* _DAMON_CORE_TEST_H */ + +#endif /* CONFIG_DAMON_KUNIT_TEST */ diff --git a/mm/damon/core.c b/mm/damon/core.c new file mode 100644 index 0000000000000000000000000000000000000000..1dd153c31c9e2b8e5fd6bc7d608f830e768f9c1e --- /dev/null +++ b/mm/damon/core.c @@ -0,0 +1,1075 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Data Access Monitor + * + * Author: SeongJae Park + */ + +#define pr_fmt(fmt) "damon: " fmt + +#include +#include +#include +#include +#include +#include + +#define CREATE_TRACE_POINTS +#include + +#ifdef CONFIG_DAMON_KUNIT_TEST +#undef DAMON_MIN_REGION +#define DAMON_MIN_REGION 1 +#endif + +static DEFINE_MUTEX(damon_lock); +static int nr_running_ctxs; + +/* + * Construct a damon_region struct + * + * Returns the pointer to the new struct if success, or NULL otherwise + */ +struct damon_region *damon_new_region(unsigned long start, unsigned long end) +{ + struct damon_region *region; + + region = kmalloc(sizeof(*region), GFP_KERNEL); + if (!region) + return NULL; + + region->ar.start = start; + region->ar.end = end; + region->nr_accesses = 0; + INIT_LIST_HEAD(®ion->list); + + region->age = 0; + region->last_nr_accesses = 0; + + return region; +} + +void damon_add_region(struct damon_region *r, struct damon_target *t) +{ + list_add_tail(&r->list, &t->regions_list); + t->nr_regions++; +} + +static void damon_del_region(struct damon_region *r, struct damon_target *t) +{ + list_del(&r->list); + t->nr_regions--; +} + +static void damon_free_region(struct damon_region *r) +{ + kfree(r); +} + +void damon_destroy_region(struct damon_region *r, struct damon_target *t) +{ + damon_del_region(r, t); + damon_free_region(r); +} + +struct damos *damon_new_scheme( + unsigned long min_sz_region, unsigned long max_sz_region, + unsigned int min_nr_accesses, unsigned int max_nr_accesses, + unsigned int min_age_region, unsigned int max_age_region, + enum damos_action action, struct damos_quota *quota, + struct damos_watermarks *wmarks) +{ + struct damos *scheme; + + scheme = kmalloc(sizeof(*scheme), GFP_KERNEL); + if (!scheme) + return NULL; + scheme->min_sz_region = min_sz_region; + scheme->max_sz_region = max_sz_region; + scheme->min_nr_accesses = min_nr_accesses; + scheme->max_nr_accesses = max_nr_accesses; + scheme->min_age_region = min_age_region; + scheme->max_age_region = max_age_region; + scheme->action = action; + scheme->stat = (struct damos_stat){}; + INIT_LIST_HEAD(&scheme->list); + + scheme->quota.ms = quota->ms; + scheme->quota.sz = quota->sz; + scheme->quota.reset_interval = quota->reset_interval; + scheme->quota.weight_sz = quota->weight_sz; + scheme->quota.weight_nr_accesses = quota->weight_nr_accesses; + scheme->quota.weight_age = quota->weight_age; + scheme->quota.total_charged_sz = 0; + scheme->quota.total_charged_ns = 0; + scheme->quota.esz = 0; + scheme->quota.charged_sz = 0; + scheme->quota.charged_from = 0; + scheme->quota.charge_target_from = NULL; + scheme->quota.charge_addr_from = 0; + + scheme->wmarks.metric = wmarks->metric; + scheme->wmarks.interval = wmarks->interval; + scheme->wmarks.high = wmarks->high; + scheme->wmarks.mid = wmarks->mid; + scheme->wmarks.low = wmarks->low; + scheme->wmarks.activated = true; + + return scheme; +} + +void damon_add_scheme(struct damon_ctx *ctx, struct damos *s) +{ + list_add_tail(&s->list, &ctx->schemes); +} + +static void damon_del_scheme(struct damos *s) +{ + list_del(&s->list); +} + +static void damon_free_scheme(struct damos *s) +{ + kfree(s); +} + +void damon_destroy_scheme(struct damos *s) +{ + damon_del_scheme(s); + damon_free_scheme(s); +} + +/* + * Construct a damon_target struct + * + * Returns the pointer to the new struct if success, or NULL otherwise + */ +struct damon_target *damon_new_target(unsigned long id) +{ + struct damon_target *t; + + t = kmalloc(sizeof(*t), GFP_KERNEL); + if (!t) + return NULL; + + t->id = id; + t->nr_regions = 0; + INIT_LIST_HEAD(&t->regions_list); + + return t; +} + +void damon_add_target(struct damon_ctx *ctx, struct damon_target *t) +{ + list_add_tail(&t->list, &ctx->adaptive_targets); +} + +bool damon_targets_empty(struct damon_ctx *ctx) +{ + return list_empty(&ctx->adaptive_targets); +} + +static void damon_del_target(struct damon_target *t) +{ + list_del(&t->list); +} + +void damon_free_target(struct damon_target *t) +{ + struct damon_region *r, *next; + + damon_for_each_region_safe(r, next, t) + damon_free_region(r); + kfree(t); +} + +void damon_destroy_target(struct damon_target *t) +{ + damon_del_target(t); + damon_free_target(t); +} + +unsigned int damon_nr_regions(struct damon_target *t) +{ + return t->nr_regions; +} + +struct damon_ctx *damon_new_ctx(void) +{ + struct damon_ctx *ctx; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return NULL; + + ctx->sample_interval = 5 * 1000; + ctx->aggr_interval = 100 * 1000; + ctx->primitive_update_interval = 60 * 1000 * 1000; + + ktime_get_coarse_ts64(&ctx->last_aggregation); + ctx->last_primitive_update = ctx->last_aggregation; + + mutex_init(&ctx->kdamond_lock); + + ctx->min_nr_regions = 10; + ctx->max_nr_regions = 1000; + + INIT_LIST_HEAD(&ctx->adaptive_targets); + INIT_LIST_HEAD(&ctx->schemes); + + return ctx; +} + +static void damon_destroy_targets(struct damon_ctx *ctx) +{ + struct damon_target *t, *next_t; + + if (ctx->primitive.cleanup) { + ctx->primitive.cleanup(ctx); + return; + } + + damon_for_each_target_safe(t, next_t, ctx) + damon_destroy_target(t); +} + +void damon_destroy_ctx(struct damon_ctx *ctx) +{ + struct damos *s, *next_s; + + damon_destroy_targets(ctx); + + damon_for_each_scheme_safe(s, next_s, ctx) + damon_destroy_scheme(s); + + kfree(ctx); +} + +/** + * damon_set_targets() - Set monitoring targets. + * @ctx: monitoring context + * @ids: array of target ids + * @nr_ids: number of entries in @ids + * + * This function should not be called while the kdamond is running. + * + * Return: 0 on success, negative error code otherwise. + */ +int damon_set_targets(struct damon_ctx *ctx, + unsigned long *ids, ssize_t nr_ids) +{ + ssize_t i; + struct damon_target *t, *next; + + damon_destroy_targets(ctx); + + for (i = 0; i < nr_ids; i++) { + t = damon_new_target(ids[i]); + if (!t) { + /* The caller should do cleanup of the ids itself */ + damon_for_each_target_safe(t, next, ctx) + damon_destroy_target(t); + return -ENOMEM; + } + damon_add_target(ctx, t); + } + + return 0; +} + +/** + * damon_set_attrs() - Set attributes for the monitoring. + * @ctx: monitoring context + * @sample_int: time interval between samplings + * @aggr_int: time interval between aggregations + * @primitive_upd_int: time interval between monitoring primitive updates + * @min_nr_reg: minimal number of regions + * @max_nr_reg: maximum number of regions + * + * This function should not be called while the kdamond is running. + * Every time interval is in micro-seconds. + * + * Return: 0 on success, negative error code otherwise. + */ +int damon_set_attrs(struct damon_ctx *ctx, unsigned long sample_int, + unsigned long aggr_int, unsigned long primitive_upd_int, + unsigned long min_nr_reg, unsigned long max_nr_reg) +{ + if (min_nr_reg < 3) + return -EINVAL; + if (min_nr_reg > max_nr_reg) + return -EINVAL; + + ctx->sample_interval = sample_int; + ctx->aggr_interval = aggr_int; + ctx->primitive_update_interval = primitive_upd_int; + ctx->min_nr_regions = min_nr_reg; + ctx->max_nr_regions = max_nr_reg; + + return 0; +} + +/** + * damon_set_schemes() - Set data access monitoring based operation schemes. + * @ctx: monitoring context + * @schemes: array of the schemes + * @nr_schemes: number of entries in @schemes + * + * This function should not be called while the kdamond of the context is + * running. + * + * Return: 0 if success, or negative error code otherwise. + */ +int damon_set_schemes(struct damon_ctx *ctx, struct damos **schemes, + ssize_t nr_schemes) +{ + struct damos *s, *next; + ssize_t i; + + damon_for_each_scheme_safe(s, next, ctx) + damon_destroy_scheme(s); + for (i = 0; i < nr_schemes; i++) + damon_add_scheme(ctx, schemes[i]); + return 0; +} + +/** + * damon_nr_running_ctxs() - Return number of currently running contexts. + */ +int damon_nr_running_ctxs(void) +{ + int nr_ctxs; + + mutex_lock(&damon_lock); + nr_ctxs = nr_running_ctxs; + mutex_unlock(&damon_lock); + + return nr_ctxs; +} + +/* Returns the size upper limit for each monitoring region */ +static unsigned long damon_region_sz_limit(struct damon_ctx *ctx) +{ + struct damon_target *t; + struct damon_region *r; + unsigned long sz = 0; + + damon_for_each_target(t, ctx) { + damon_for_each_region(r, t) + sz += r->ar.end - r->ar.start; + } + + if (ctx->min_nr_regions) + sz /= ctx->min_nr_regions; + if (sz < DAMON_MIN_REGION) + sz = DAMON_MIN_REGION; + + return sz; +} + +static int kdamond_fn(void *data); + +/* + * __damon_start() - Starts monitoring with given context. + * @ctx: monitoring context + * + * This function should be called while damon_lock is hold. + * + * Return: 0 on success, negative error code otherwise. + */ +static int __damon_start(struct damon_ctx *ctx) +{ + int err = -EBUSY; + + mutex_lock(&ctx->kdamond_lock); + if (!ctx->kdamond) { + err = 0; + ctx->kdamond = kthread_run(kdamond_fn, ctx, "kdamond.%d", + nr_running_ctxs); + if (IS_ERR(ctx->kdamond)) { + err = PTR_ERR(ctx->kdamond); + ctx->kdamond = NULL; + } + } + mutex_unlock(&ctx->kdamond_lock); + + return err; +} + +/** + * damon_start() - Starts the monitorings for a given group of contexts. + * @ctxs: an array of the pointers for contexts to start monitoring + * @nr_ctxs: size of @ctxs + * + * This function starts a group of monitoring threads for a group of monitoring + * contexts. One thread per each context is created and run in parallel. The + * caller should handle synchronization between the threads by itself. If a + * group of threads that created by other 'damon_start()' call is currently + * running, this function does nothing but returns -EBUSY. + * + * Return: 0 on success, negative error code otherwise. + */ +int damon_start(struct damon_ctx **ctxs, int nr_ctxs) +{ + int i; + int err = 0; + + mutex_lock(&damon_lock); + if (nr_running_ctxs) { + mutex_unlock(&damon_lock); + return -EBUSY; + } + + for (i = 0; i < nr_ctxs; i++) { + err = __damon_start(ctxs[i]); + if (err) + break; + nr_running_ctxs++; + } + mutex_unlock(&damon_lock); + + return err; +} + +/* + * __damon_stop() - Stops monitoring of given context. + * @ctx: monitoring context + * + * Return: 0 on success, negative error code otherwise. + */ +static int __damon_stop(struct damon_ctx *ctx) +{ + struct task_struct *tsk; + + mutex_lock(&ctx->kdamond_lock); + tsk = ctx->kdamond; + if (tsk) { + get_task_struct(tsk); + mutex_unlock(&ctx->kdamond_lock); + kthread_stop(tsk); + put_task_struct(tsk); + return 0; + } + mutex_unlock(&ctx->kdamond_lock); + + return -EPERM; +} + +/** + * damon_stop() - Stops the monitorings for a given group of contexts. + * @ctxs: an array of the pointers for contexts to stop monitoring + * @nr_ctxs: size of @ctxs + * + * Return: 0 on success, negative error code otherwise. + */ +int damon_stop(struct damon_ctx **ctxs, int nr_ctxs) +{ + int i, err = 0; + + for (i = 0; i < nr_ctxs; i++) { + /* nr_running_ctxs is decremented in kdamond_fn */ + err = __damon_stop(ctxs[i]); + if (err) + return err; + } + + return err; +} + +/* + * damon_check_reset_time_interval() - Check if a time interval is elapsed. + * @baseline: the time to check whether the interval has elapsed since + * @interval: the time interval (microseconds) + * + * See whether the given time interval has passed since the given baseline + * time. If so, it also updates the baseline to current time for next check. + * + * Return: true if the time interval has passed, or false otherwise. + */ +static bool damon_check_reset_time_interval(struct timespec64 *baseline, + unsigned long interval) +{ + struct timespec64 now; + + ktime_get_coarse_ts64(&now); + if ((timespec64_to_ns(&now) - timespec64_to_ns(baseline)) < + interval * 1000) + return false; + *baseline = now; + return true; +} + +/* + * Check whether it is time to flush the aggregated information + */ +static bool kdamond_aggregate_interval_passed(struct damon_ctx *ctx) +{ + return damon_check_reset_time_interval(&ctx->last_aggregation, + ctx->aggr_interval); +} + +/* + * Reset the aggregated monitoring results ('nr_accesses' of each region). + */ +static void kdamond_reset_aggregated(struct damon_ctx *c) +{ + struct damon_target *t; + unsigned int ti = 0; /* target's index */ + + damon_for_each_target(t, c) { + struct damon_region *r; + + damon_for_each_region(r, t) { + trace_damon_aggregated(t, ti, r, damon_nr_regions(t)); + r->last_nr_accesses = r->nr_accesses; + r->nr_accesses = 0; + } + ti++; + } +} + +static void damon_split_region_at(struct damon_ctx *ctx, + struct damon_target *t, struct damon_region *r, + unsigned long sz_r); + +static bool __damos_valid_target(struct damon_region *r, struct damos *s) +{ + unsigned long sz; + + sz = r->ar.end - r->ar.start; + return s->min_sz_region <= sz && sz <= s->max_sz_region && + s->min_nr_accesses <= r->nr_accesses && + r->nr_accesses <= s->max_nr_accesses && + s->min_age_region <= r->age && r->age <= s->max_age_region; +} + +static bool damos_valid_target(struct damon_ctx *c, struct damon_target *t, + struct damon_region *r, struct damos *s) +{ + bool ret = __damos_valid_target(r, s); + + if (!ret || !s->quota.esz || !c->primitive.get_scheme_score) + return ret; + + return c->primitive.get_scheme_score(c, t, r, s) >= s->quota.min_score; +} + +static void damon_do_apply_schemes(struct damon_ctx *c, + struct damon_target *t, + struct damon_region *r) +{ + struct damos *s; + + damon_for_each_scheme(s, c) { + struct damos_quota *quota = &s->quota; + unsigned long sz = r->ar.end - r->ar.start; + struct timespec64 begin, end; + unsigned long sz_applied = 0; + + if (!s->wmarks.activated) + continue; + + /* Check the quota */ + if (quota->esz && quota->charged_sz >= quota->esz) + continue; + + /* Skip previously charged regions */ + if (quota->charge_target_from) { + if (t != quota->charge_target_from) + continue; + if (r == damon_last_region(t)) { + quota->charge_target_from = NULL; + quota->charge_addr_from = 0; + continue; + } + if (quota->charge_addr_from && + r->ar.end <= quota->charge_addr_from) + continue; + + if (quota->charge_addr_from && r->ar.start < + quota->charge_addr_from) { + sz = ALIGN_DOWN(quota->charge_addr_from - + r->ar.start, DAMON_MIN_REGION); + if (!sz) { + if (r->ar.end - r->ar.start <= + DAMON_MIN_REGION) + continue; + sz = DAMON_MIN_REGION; + } + damon_split_region_at(c, t, r, sz); + r = damon_next_region(r); + sz = r->ar.end - r->ar.start; + } + quota->charge_target_from = NULL; + quota->charge_addr_from = 0; + } + + if (!damos_valid_target(c, t, r, s)) + continue; + + /* Apply the scheme */ + if (c->primitive.apply_scheme) { + if (quota->esz && + quota->charged_sz + sz > quota->esz) { + sz = ALIGN_DOWN(quota->esz - quota->charged_sz, + DAMON_MIN_REGION); + if (!sz) + goto update_stat; + damon_split_region_at(c, t, r, sz); + } + ktime_get_coarse_ts64(&begin); + sz_applied = c->primitive.apply_scheme(c, t, r, s); + ktime_get_coarse_ts64(&end); + quota->total_charged_ns += timespec64_to_ns(&end) - + timespec64_to_ns(&begin); + quota->charged_sz += sz; + if (quota->esz && quota->charged_sz >= quota->esz) { + quota->charge_target_from = t; + quota->charge_addr_from = r->ar.end + 1; + } + } + if (s->action != DAMOS_STAT) + r->age = 0; + +update_stat: + s->stat.nr_tried++; + s->stat.sz_tried += sz; + if (sz_applied) + s->stat.nr_applied++; + s->stat.sz_applied += sz_applied; + } +} + +/* Shouldn't be called if quota->ms and quota->sz are zero */ +static void damos_set_effective_quota(struct damos_quota *quota) +{ + unsigned long throughput; + unsigned long esz; + + if (!quota->ms) { + quota->esz = quota->sz; + return; + } + + if (quota->total_charged_ns) + throughput = quota->total_charged_sz * 1000000 / + quota->total_charged_ns; + else + throughput = PAGE_SIZE * 1024; + esz = throughput * quota->ms; + + if (quota->sz && quota->sz < esz) + esz = quota->sz; + quota->esz = esz; +} + +static void kdamond_apply_schemes(struct damon_ctx *c) +{ + struct damon_target *t; + struct damon_region *r, *next_r; + struct damos *s; + + damon_for_each_scheme(s, c) { + struct damos_quota *quota = &s->quota; + unsigned long cumulated_sz; + unsigned int score, max_score = 0; + + if (!s->wmarks.activated) + continue; + + if (!quota->ms && !quota->sz) + continue; + + /* New charge window starts */ + if (time_after_eq(jiffies, quota->charged_from + + msecs_to_jiffies( + quota->reset_interval))) { + if (quota->esz && quota->charged_sz >= quota->esz) + s->stat.qt_exceeds++; + quota->total_charged_sz += quota->charged_sz; + quota->charged_from = jiffies; + quota->charged_sz = 0; + damos_set_effective_quota(quota); + } + + if (!c->primitive.get_scheme_score) + continue; + + /* Fill up the score histogram */ + memset(quota->histogram, 0, sizeof(quota->histogram)); + damon_for_each_target(t, c) { + damon_for_each_region(r, t) { + if (!__damos_valid_target(r, s)) + continue; + score = c->primitive.get_scheme_score( + c, t, r, s); + quota->histogram[score] += + r->ar.end - r->ar.start; + if (score > max_score) + max_score = score; + } + } + + /* Set the min score limit */ + for (cumulated_sz = 0, score = max_score; ; score--) { + cumulated_sz += quota->histogram[score]; + if (cumulated_sz >= quota->esz || !score) + break; + } + quota->min_score = score; + } + + damon_for_each_target(t, c) { + damon_for_each_region_safe(r, next_r, t) + damon_do_apply_schemes(c, t, r); + } +} + +static inline unsigned long sz_damon_region(struct damon_region *r) +{ + return r->ar.end - r->ar.start; +} + +/* + * Merge two adjacent regions into one region + */ +static void damon_merge_two_regions(struct damon_target *t, + struct damon_region *l, struct damon_region *r) +{ + unsigned long sz_l = sz_damon_region(l), sz_r = sz_damon_region(r); + + l->nr_accesses = (l->nr_accesses * sz_l + r->nr_accesses * sz_r) / + (sz_l + sz_r); + l->age = (l->age * sz_l + r->age * sz_r) / (sz_l + sz_r); + l->ar.end = r->ar.end; + damon_destroy_region(r, t); +} + +/* + * Merge adjacent regions having similar access frequencies + * + * t target affected by this merge operation + * thres '->nr_accesses' diff threshold for the merge + * sz_limit size upper limit of each region + */ +static void damon_merge_regions_of(struct damon_target *t, unsigned int thres, + unsigned long sz_limit) +{ + struct damon_region *r, *prev = NULL, *next; + + damon_for_each_region_safe(r, next, t) { + if (abs(r->nr_accesses - r->last_nr_accesses) > thres) + r->age = 0; + else + r->age++; + + if (prev && prev->ar.end == r->ar.start && + abs(prev->nr_accesses - r->nr_accesses) <= thres && + sz_damon_region(prev) + sz_damon_region(r) <= sz_limit) + damon_merge_two_regions(t, prev, r); + else + prev = r; + } +} + +/* + * Merge adjacent regions having similar access frequencies + * + * threshold '->nr_accesses' diff threshold for the merge + * sz_limit size upper limit of each region + * + * This function merges monitoring target regions which are adjacent and their + * access frequencies are similar. This is for minimizing the monitoring + * overhead under the dynamically changeable access pattern. If a merge was + * unnecessarily made, later 'kdamond_split_regions()' will revert it. + */ +static void kdamond_merge_regions(struct damon_ctx *c, unsigned int threshold, + unsigned long sz_limit) +{ + struct damon_target *t; + + damon_for_each_target(t, c) + damon_merge_regions_of(t, threshold, sz_limit); +} + +/* + * Split a region in two + * + * r the region to be split + * sz_r size of the first sub-region that will be made + */ +static void damon_split_region_at(struct damon_ctx *ctx, + struct damon_target *t, struct damon_region *r, + unsigned long sz_r) +{ + struct damon_region *new; + + new = damon_new_region(r->ar.start + sz_r, r->ar.end); + if (!new) + return; + + r->ar.end = new->ar.start; + + new->age = r->age; + new->last_nr_accesses = r->last_nr_accesses; + + damon_insert_region(new, r, damon_next_region(r), t); +} + +/* Split every region in the given target into 'nr_subs' regions */ +static void damon_split_regions_of(struct damon_ctx *ctx, + struct damon_target *t, int nr_subs) +{ + struct damon_region *r, *next; + unsigned long sz_region, sz_sub = 0; + int i; + + damon_for_each_region_safe(r, next, t) { + sz_region = r->ar.end - r->ar.start; + + for (i = 0; i < nr_subs - 1 && + sz_region > 2 * DAMON_MIN_REGION; i++) { + /* + * Randomly select size of left sub-region to be at + * least 10 percent and at most 90% of original region + */ + sz_sub = ALIGN_DOWN(damon_rand(1, 10) * + sz_region / 10, DAMON_MIN_REGION); + /* Do not allow blank region */ + if (sz_sub == 0 || sz_sub >= sz_region) + continue; + + damon_split_region_at(ctx, t, r, sz_sub); + sz_region = sz_sub; + } + } +} + +/* + * Split every target region into randomly-sized small regions + * + * This function splits every target region into random-sized small regions if + * current total number of the regions is equal or smaller than half of the + * user-specified maximum number of regions. This is for maximizing the + * monitoring accuracy under the dynamically changeable access patterns. If a + * split was unnecessarily made, later 'kdamond_merge_regions()' will revert + * it. + */ +static void kdamond_split_regions(struct damon_ctx *ctx) +{ + struct damon_target *t; + unsigned int nr_regions = 0; + static unsigned int last_nr_regions; + int nr_subregions = 2; + + damon_for_each_target(t, ctx) + nr_regions += damon_nr_regions(t); + + if (nr_regions > ctx->max_nr_regions / 2) + return; + + /* Maybe the middle of the region has different access frequency */ + if (last_nr_regions == nr_regions && + nr_regions < ctx->max_nr_regions / 3) + nr_subregions = 3; + + damon_for_each_target(t, ctx) + damon_split_regions_of(ctx, t, nr_subregions); + + last_nr_regions = nr_regions; +} + +/* + * Check whether it is time to check and apply the target monitoring regions + * + * Returns true if it is. + */ +static bool kdamond_need_update_primitive(struct damon_ctx *ctx) +{ + return damon_check_reset_time_interval(&ctx->last_primitive_update, + ctx->primitive_update_interval); +} + +/* + * Check whether current monitoring should be stopped + * + * The monitoring is stopped when either the user requested to stop, or all + * monitoring targets are invalid. + * + * Returns true if need to stop current monitoring. + */ +static bool kdamond_need_stop(struct damon_ctx *ctx) +{ + struct damon_target *t; + + if (kthread_should_stop()) + return true; + + if (!ctx->primitive.target_valid) + return false; + + damon_for_each_target(t, ctx) { + if (ctx->primitive.target_valid(t)) + return false; + } + + return true; +} + +static unsigned long damos_wmark_metric_value(enum damos_wmark_metric metric) +{ + struct sysinfo i; + + switch (metric) { + case DAMOS_WMARK_FREE_MEM_RATE: + si_meminfo(&i); + return i.freeram * 1000 / i.totalram; + default: + break; + } + return -EINVAL; +} + +/* + * Returns zero if the scheme is active. Else, returns time to wait for next + * watermark check in micro-seconds. + */ +static unsigned long damos_wmark_wait_us(struct damos *scheme) +{ + unsigned long metric; + + if (scheme->wmarks.metric == DAMOS_WMARK_NONE) + return 0; + + metric = damos_wmark_metric_value(scheme->wmarks.metric); + /* higher than high watermark or lower than low watermark */ + if (metric > scheme->wmarks.high || scheme->wmarks.low > metric) { + if (scheme->wmarks.activated) + pr_debug("deactivate a scheme (%d) for %s wmark\n", + scheme->action, + metric > scheme->wmarks.high ? + "high" : "low"); + scheme->wmarks.activated = false; + return scheme->wmarks.interval; + } + + /* inactive and higher than middle watermark */ + if ((scheme->wmarks.high >= metric && metric >= scheme->wmarks.mid) && + !scheme->wmarks.activated) + return scheme->wmarks.interval; + + if (!scheme->wmarks.activated) + pr_debug("activate a scheme (%d)\n", scheme->action); + scheme->wmarks.activated = true; + return 0; +} + +static void kdamond_usleep(unsigned long usecs) +{ + /* See Documentation/timers/timers-howto.rst for the thresholds */ + if (usecs > 20 * USEC_PER_MSEC) + schedule_timeout_idle(usecs_to_jiffies(usecs)); + else + usleep_idle_range(usecs, usecs + 1); +} + +/* Returns negative error code if it's not activated but should return */ +static int kdamond_wait_activation(struct damon_ctx *ctx) +{ + struct damos *s; + unsigned long wait_time; + unsigned long min_wait_time = 0; + + while (!kdamond_need_stop(ctx)) { + damon_for_each_scheme(s, ctx) { + wait_time = damos_wmark_wait_us(s); + if (!min_wait_time || wait_time < min_wait_time) + min_wait_time = wait_time; + } + if (!min_wait_time) + return 0; + + kdamond_usleep(min_wait_time); + } + return -EBUSY; +} + +/* + * The monitoring daemon that runs as a kernel thread + */ +static int kdamond_fn(void *data) +{ + struct damon_ctx *ctx = (struct damon_ctx *)data; + struct damon_target *t; + struct damon_region *r, *next; + unsigned int max_nr_accesses = 0; + unsigned long sz_limit = 0; + bool done = false; + + pr_debug("kdamond (%d) starts\n", current->pid); + + if (ctx->primitive.init) + ctx->primitive.init(ctx); + if (ctx->callback.before_start && ctx->callback.before_start(ctx)) + done = true; + + sz_limit = damon_region_sz_limit(ctx); + + while (!kdamond_need_stop(ctx) && !done) { + if (kdamond_wait_activation(ctx)) + continue; + + if (ctx->primitive.prepare_access_checks) + ctx->primitive.prepare_access_checks(ctx); + if (ctx->callback.after_sampling && + ctx->callback.after_sampling(ctx)) + done = true; + + kdamond_usleep(ctx->sample_interval); + + if (ctx->primitive.check_accesses) + max_nr_accesses = ctx->primitive.check_accesses(ctx); + + if (kdamond_aggregate_interval_passed(ctx)) { + kdamond_merge_regions(ctx, + max_nr_accesses / 10, + sz_limit); + if (ctx->callback.after_aggregation && + ctx->callback.after_aggregation(ctx)) + done = true; + kdamond_apply_schemes(ctx); + kdamond_reset_aggregated(ctx); + kdamond_split_regions(ctx); + if (ctx->primitive.reset_aggregated) + ctx->primitive.reset_aggregated(ctx); + } + + if (kdamond_need_update_primitive(ctx)) { + if (ctx->primitive.update) + ctx->primitive.update(ctx); + sz_limit = damon_region_sz_limit(ctx); + } + } + damon_for_each_target(t, ctx) { + damon_for_each_region_safe(r, next, t) + damon_destroy_region(r, t); + } + + if (ctx->callback.before_terminate) + ctx->callback.before_terminate(ctx); + if (ctx->primitive.cleanup) + ctx->primitive.cleanup(ctx); + + pr_debug("kdamond (%d) finishes\n", current->pid); + mutex_lock(&ctx->kdamond_lock); + ctx->kdamond = NULL; + mutex_unlock(&ctx->kdamond_lock); + + mutex_lock(&damon_lock); + nr_running_ctxs--; + mutex_unlock(&damon_lock); + + return 0; +} + +#include "core-test.h" diff --git a/mm/damon/dbgfs-test.h b/mm/damon/dbgfs-test.h new file mode 100644 index 0000000000000000000000000000000000000000..86b9f9528231efb52bfe1b791aaaffeb01315492 --- /dev/null +++ b/mm/damon/dbgfs-test.h @@ -0,0 +1,180 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * DAMON Debugfs Interface Unit Tests + * + * Author: SeongJae Park + */ + +#ifdef CONFIG_DAMON_DBGFS_KUNIT_TEST + +#ifndef _DAMON_DBGFS_TEST_H +#define _DAMON_DBGFS_TEST_H + +#include + +static void damon_dbgfs_test_str_to_target_ids(struct kunit *test) +{ + char *question; + unsigned long *answers; + unsigned long expected[] = {12, 35, 46}; + ssize_t nr_integers = 0, i; + + question = "123"; + answers = str_to_target_ids(question, strlen(question), + &nr_integers); + KUNIT_EXPECT_EQ(test, (ssize_t)1, nr_integers); + KUNIT_EXPECT_EQ(test, 123ul, answers[0]); + kfree(answers); + + question = "123abc"; + answers = str_to_target_ids(question, strlen(question), + &nr_integers); + KUNIT_EXPECT_EQ(test, (ssize_t)1, nr_integers); + KUNIT_EXPECT_EQ(test, 123ul, answers[0]); + kfree(answers); + + question = "a123"; + answers = str_to_target_ids(question, strlen(question), + &nr_integers); + KUNIT_EXPECT_EQ(test, (ssize_t)0, nr_integers); + kfree(answers); + + question = "12 35"; + answers = str_to_target_ids(question, strlen(question), + &nr_integers); + KUNIT_EXPECT_EQ(test, (ssize_t)2, nr_integers); + for (i = 0; i < nr_integers; i++) + KUNIT_EXPECT_EQ(test, expected[i], answers[i]); + kfree(answers); + + question = "12 35 46"; + answers = str_to_target_ids(question, strlen(question), + &nr_integers); + KUNIT_EXPECT_EQ(test, (ssize_t)3, nr_integers); + for (i = 0; i < nr_integers; i++) + KUNIT_EXPECT_EQ(test, expected[i], answers[i]); + kfree(answers); + + question = "12 35 abc 46"; + answers = str_to_target_ids(question, strlen(question), + &nr_integers); + KUNIT_EXPECT_EQ(test, (ssize_t)2, nr_integers); + for (i = 0; i < 2; i++) + KUNIT_EXPECT_EQ(test, expected[i], answers[i]); + kfree(answers); + + question = ""; + answers = str_to_target_ids(question, strlen(question), + &nr_integers); + KUNIT_EXPECT_EQ(test, (ssize_t)0, nr_integers); + kfree(answers); + + question = "\n"; + answers = str_to_target_ids(question, strlen(question), + &nr_integers); + KUNIT_EXPECT_EQ(test, (ssize_t)0, nr_integers); + kfree(answers); +} + +static void damon_dbgfs_test_set_targets(struct kunit *test) +{ + struct damon_ctx *ctx = dbgfs_new_ctx(); + unsigned long ids[] = {1, 2, 3}; + char buf[64]; + + /* Make DAMON consider target id as plain number */ + ctx->primitive.target_valid = NULL; + ctx->primitive.cleanup = NULL; + + damon_set_targets(ctx, ids, 3); + sprint_target_ids(ctx, buf, 64); + KUNIT_EXPECT_STREQ(test, (char *)buf, "1 2 3\n"); + + damon_set_targets(ctx, NULL, 0); + sprint_target_ids(ctx, buf, 64); + KUNIT_EXPECT_STREQ(test, (char *)buf, "\n"); + + damon_set_targets(ctx, (unsigned long []){1, 2}, 2); + sprint_target_ids(ctx, buf, 64); + KUNIT_EXPECT_STREQ(test, (char *)buf, "1 2\n"); + + damon_set_targets(ctx, (unsigned long []){2}, 1); + sprint_target_ids(ctx, buf, 64); + KUNIT_EXPECT_STREQ(test, (char *)buf, "2\n"); + + damon_set_targets(ctx, NULL, 0); + sprint_target_ids(ctx, buf, 64); + KUNIT_EXPECT_STREQ(test, (char *)buf, "\n"); + + dbgfs_destroy_ctx(ctx); +} + +static void damon_dbgfs_test_set_init_regions(struct kunit *test) +{ + struct damon_ctx *ctx = damon_new_ctx(); + unsigned long ids[] = {1, 2, 3}; + /* Each line represents one region in `` `` */ + char * const valid_inputs[] = {"2 10 20\n 2 20 30\n2 35 45", + "2 10 20\n", + "2 10 20\n1 39 59\n1 70 134\n 2 20 25\n", + ""}; + /* Reading the file again will show sorted, clean output */ + char * const valid_expects[] = {"2 10 20\n2 20 30\n2 35 45\n", + "2 10 20\n", + "1 39 59\n1 70 134\n2 10 20\n2 20 25\n", + ""}; + char * const invalid_inputs[] = {"4 10 20\n", /* target not exists */ + "2 10 20\n 2 14 26\n", /* regions overlap */ + "1 10 20\n2 30 40\n 1 5 8"}; /* not sorted by address */ + char *input, *expect; + int i, rc; + char buf[256]; + + damon_set_targets(ctx, ids, 3); + + /* Put valid inputs and check the results */ + for (i = 0; i < ARRAY_SIZE(valid_inputs); i++) { + input = valid_inputs[i]; + expect = valid_expects[i]; + + rc = set_init_regions(ctx, input, strnlen(input, 256)); + KUNIT_EXPECT_EQ(test, rc, 0); + + memset(buf, 0, 256); + sprint_init_regions(ctx, buf, 256); + + KUNIT_EXPECT_STREQ(test, (char *)buf, expect); + } + /* Put invalid inputs and check the return error code */ + for (i = 0; i < ARRAY_SIZE(invalid_inputs); i++) { + input = invalid_inputs[i]; + pr_info("input: %s\n", input); + rc = set_init_regions(ctx, input, strnlen(input, 256)); + KUNIT_EXPECT_EQ(test, rc, -EINVAL); + + memset(buf, 0, 256); + sprint_init_regions(ctx, buf, 256); + + KUNIT_EXPECT_STREQ(test, (char *)buf, ""); + } + + damon_set_targets(ctx, NULL, 0); + damon_destroy_ctx(ctx); +} + +static struct kunit_case damon_test_cases[] = { + KUNIT_CASE(damon_dbgfs_test_str_to_target_ids), + KUNIT_CASE(damon_dbgfs_test_set_targets), + KUNIT_CASE(damon_dbgfs_test_set_init_regions), + {}, +}; + +static struct kunit_suite damon_test_suite = { + .name = "damon-dbgfs", + .test_cases = damon_test_cases, +}; +kunit_test_suite(damon_test_suite); + +#endif /* _DAMON_TEST_H */ + +#endif /* CONFIG_DAMON_KUNIT_TEST */ diff --git a/mm/damon/dbgfs.c b/mm/damon/dbgfs.c new file mode 100644 index 0000000000000000000000000000000000000000..5b899601e56c3fd13b434d118ea0e50cb7ab73ff --- /dev/null +++ b/mm/damon/dbgfs.c @@ -0,0 +1,990 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * DAMON Debugfs Interface + * + * Author: SeongJae Park + */ + +#define pr_fmt(fmt) "damon-dbgfs: " fmt + +#include +#include +#include +#include +#include +#include +#include + +static struct damon_ctx **dbgfs_ctxs; +static int dbgfs_nr_ctxs; +static struct dentry **dbgfs_dirs; +static DEFINE_MUTEX(damon_dbgfs_lock); + +/* + * Returns non-empty string on success, negative error code otherwise. + */ +static char *user_input_str(const char __user *buf, size_t count, loff_t *ppos) +{ + char *kbuf; + ssize_t ret; + + /* We do not accept continuous write */ + if (*ppos) + return ERR_PTR(-EINVAL); + + kbuf = kmalloc(count + 1, GFP_KERNEL | __GFP_NOWARN); + if (!kbuf) + return ERR_PTR(-ENOMEM); + + ret = simple_write_to_buffer(kbuf, count + 1, ppos, buf, count); + if (ret != count) { + kfree(kbuf); + return ERR_PTR(-EIO); + } + kbuf[ret] = '\0'; + + return kbuf; +} + +static ssize_t dbgfs_attrs_read(struct file *file, + char __user *buf, size_t count, loff_t *ppos) +{ + struct damon_ctx *ctx = file->private_data; + char kbuf[128]; + int ret; + + mutex_lock(&ctx->kdamond_lock); + ret = scnprintf(kbuf, ARRAY_SIZE(kbuf), "%lu %lu %lu %lu %lu\n", + ctx->sample_interval, ctx->aggr_interval, + ctx->primitive_update_interval, ctx->min_nr_regions, + ctx->max_nr_regions); + mutex_unlock(&ctx->kdamond_lock); + + return simple_read_from_buffer(buf, count, ppos, kbuf, ret); +} + +static ssize_t dbgfs_attrs_write(struct file *file, + const char __user *buf, size_t count, loff_t *ppos) +{ + struct damon_ctx *ctx = file->private_data; + unsigned long s, a, r, minr, maxr; + char *kbuf; + ssize_t ret; + + kbuf = user_input_str(buf, count, ppos); + if (IS_ERR(kbuf)) + return PTR_ERR(kbuf); + + if (sscanf(kbuf, "%lu %lu %lu %lu %lu", + &s, &a, &r, &minr, &maxr) != 5) { + ret = -EINVAL; + goto out; + } + + mutex_lock(&ctx->kdamond_lock); + if (ctx->kdamond) { + ret = -EBUSY; + goto unlock_out; + } + + ret = damon_set_attrs(ctx, s, a, r, minr, maxr); + if (!ret) + ret = count; +unlock_out: + mutex_unlock(&ctx->kdamond_lock); +out: + kfree(kbuf); + return ret; +} + +static ssize_t sprint_schemes(struct damon_ctx *c, char *buf, ssize_t len) +{ + struct damos *s; + int written = 0; + int rc; + + damon_for_each_scheme(s, c) { + rc = scnprintf(&buf[written], len - written, + "%lu %lu %u %u %u %u %d %lu %lu %lu %u %u %u %d %lu %lu %lu %lu %lu %lu %lu %lu %lu\n", + s->min_sz_region, s->max_sz_region, + s->min_nr_accesses, s->max_nr_accesses, + s->min_age_region, s->max_age_region, + s->action, + s->quota.ms, s->quota.sz, + s->quota.reset_interval, + s->quota.weight_sz, + s->quota.weight_nr_accesses, + s->quota.weight_age, + s->wmarks.metric, s->wmarks.interval, + s->wmarks.high, s->wmarks.mid, s->wmarks.low, + s->stat.nr_tried, s->stat.sz_tried, + s->stat.nr_applied, s->stat.sz_applied, + s->stat.qt_exceeds); + if (!rc) + return -ENOMEM; + + written += rc; + } + return written; +} + +static ssize_t dbgfs_schemes_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct damon_ctx *ctx = file->private_data; + char *kbuf; + ssize_t len; + + kbuf = kmalloc(count, GFP_KERNEL | __GFP_NOWARN); + if (!kbuf) + return -ENOMEM; + + mutex_lock(&ctx->kdamond_lock); + len = sprint_schemes(ctx, kbuf, count); + mutex_unlock(&ctx->kdamond_lock); + if (len < 0) + goto out; + len = simple_read_from_buffer(buf, count, ppos, kbuf, len); + +out: + kfree(kbuf); + return len; +} + +static void free_schemes_arr(struct damos **schemes, ssize_t nr_schemes) +{ + ssize_t i; + + for (i = 0; i < nr_schemes; i++) + kfree(schemes[i]); + kfree(schemes); +} + +static bool damos_action_valid(int action) +{ + switch (action) { + case DAMOS_WILLNEED: + case DAMOS_COLD: + case DAMOS_PAGEOUT: + case DAMOS_HUGEPAGE: + case DAMOS_NOHUGEPAGE: + case DAMOS_STAT: + return true; + default: + return false; + } +} + +/* + * Converts a string into an array of struct damos pointers + * + * Returns an array of struct damos pointers that converted if the conversion + * success, or NULL otherwise. + */ +static struct damos **str_to_schemes(const char *str, ssize_t len, + ssize_t *nr_schemes) +{ + struct damos *scheme, **schemes; + const int max_nr_schemes = 256; + int pos = 0, parsed, ret; + unsigned long min_sz, max_sz; + unsigned int min_nr_a, max_nr_a, min_age, max_age; + unsigned int action; + + schemes = kmalloc_array(max_nr_schemes, sizeof(scheme), + GFP_KERNEL); + if (!schemes) + return NULL; + + *nr_schemes = 0; + while (pos < len && *nr_schemes < max_nr_schemes) { + struct damos_quota quota = {}; + struct damos_watermarks wmarks; + + ret = sscanf(&str[pos], + "%lu %lu %u %u %u %u %u %lu %lu %lu %u %u %u %u %lu %lu %lu %lu%n", + &min_sz, &max_sz, &min_nr_a, &max_nr_a, + &min_age, &max_age, &action, "a.ms, + "a.sz, "a.reset_interval, + "a.weight_sz, "a.weight_nr_accesses, + "a.weight_age, &wmarks.metric, + &wmarks.interval, &wmarks.high, &wmarks.mid, + &wmarks.low, &parsed); + if (ret != 18) + break; + if (!damos_action_valid(action)) + goto fail; + + if (min_sz > max_sz || min_nr_a > max_nr_a || min_age > max_age) + goto fail; + + if (wmarks.high < wmarks.mid || wmarks.high < wmarks.low || + wmarks.mid < wmarks.low) + goto fail; + + pos += parsed; + scheme = damon_new_scheme(min_sz, max_sz, min_nr_a, max_nr_a, + min_age, max_age, action, "a, &wmarks); + if (!scheme) + goto fail; + + schemes[*nr_schemes] = scheme; + *nr_schemes += 1; + } + return schemes; +fail: + free_schemes_arr(schemes, *nr_schemes); + return NULL; +} + +static ssize_t dbgfs_schemes_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct damon_ctx *ctx = file->private_data; + char *kbuf; + struct damos **schemes; + ssize_t nr_schemes = 0, ret; + + kbuf = user_input_str(buf, count, ppos); + if (IS_ERR(kbuf)) + return PTR_ERR(kbuf); + + schemes = str_to_schemes(kbuf, count, &nr_schemes); + if (!schemes) { + ret = -EINVAL; + goto out; + } + + mutex_lock(&ctx->kdamond_lock); + if (ctx->kdamond) { + ret = -EBUSY; + goto unlock_out; + } + + ret = damon_set_schemes(ctx, schemes, nr_schemes); + if (!ret) { + ret = count; + nr_schemes = 0; + } + +unlock_out: + mutex_unlock(&ctx->kdamond_lock); + free_schemes_arr(schemes, nr_schemes); +out: + kfree(kbuf); + return ret; +} + +static inline bool targetid_is_pid(const struct damon_ctx *ctx) +{ + return ctx->primitive.target_valid == damon_va_target_valid; +} + +static ssize_t sprint_target_ids(struct damon_ctx *ctx, char *buf, ssize_t len) +{ + struct damon_target *t; + unsigned long id; + int written = 0; + int rc; + + damon_for_each_target(t, ctx) { + id = t->id; + if (targetid_is_pid(ctx)) + /* Show pid numbers to debugfs users */ + id = (unsigned long)pid_vnr((struct pid *)id); + + rc = scnprintf(&buf[written], len - written, "%lu ", id); + if (!rc) + return -ENOMEM; + written += rc; + } + if (written) + written -= 1; + written += scnprintf(&buf[written], len - written, "\n"); + return written; +} + +static ssize_t dbgfs_target_ids_read(struct file *file, + char __user *buf, size_t count, loff_t *ppos) +{ + struct damon_ctx *ctx = file->private_data; + ssize_t len; + char ids_buf[320]; + + mutex_lock(&ctx->kdamond_lock); + len = sprint_target_ids(ctx, ids_buf, 320); + mutex_unlock(&ctx->kdamond_lock); + if (len < 0) + return len; + + return simple_read_from_buffer(buf, count, ppos, ids_buf, len); +} + +/* + * Converts a string into an array of unsigned long integers + * + * Returns an array of unsigned long integers if the conversion success, or + * NULL otherwise. + */ +static unsigned long *str_to_target_ids(const char *str, ssize_t len, + ssize_t *nr_ids) +{ + unsigned long *ids; + const int max_nr_ids = 32; + unsigned long id; + int pos = 0, parsed, ret; + + *nr_ids = 0; + ids = kmalloc_array(max_nr_ids, sizeof(id), GFP_KERNEL); + if (!ids) + return NULL; + while (*nr_ids < max_nr_ids && pos < len) { + ret = sscanf(&str[pos], "%lu%n", &id, &parsed); + pos += parsed; + if (ret != 1) + break; + ids[*nr_ids] = id; + *nr_ids += 1; + } + + return ids; +} + +static void dbgfs_put_pids(unsigned long *ids, int nr_ids) +{ + int i; + + for (i = 0; i < nr_ids; i++) + put_pid((struct pid *)ids[i]); +} + +static ssize_t dbgfs_target_ids_write(struct file *file, + const char __user *buf, size_t count, loff_t *ppos) +{ + struct damon_ctx *ctx = file->private_data; + struct damon_target *t, *next_t; + bool id_is_pid = true; + char *kbuf; + unsigned long *targets; + ssize_t nr_targets; + ssize_t ret; + int i; + + kbuf = user_input_str(buf, count, ppos); + if (IS_ERR(kbuf)) + return PTR_ERR(kbuf); + + if (!strncmp(kbuf, "paddr\n", count)) { + id_is_pid = false; + /* target id is meaningless here, but we set it just for fun */ + scnprintf(kbuf, count, "42 "); + } + + targets = str_to_target_ids(kbuf, count, &nr_targets); + if (!targets) { + ret = -ENOMEM; + goto out; + } + + if (id_is_pid) { + for (i = 0; i < nr_targets; i++) { + targets[i] = (unsigned long)find_get_pid( + (int)targets[i]); + if (!targets[i]) { + dbgfs_put_pids(targets, i); + ret = -EINVAL; + goto free_targets_out; + } + } + } + + mutex_lock(&ctx->kdamond_lock); + if (ctx->kdamond) { + if (id_is_pid) + dbgfs_put_pids(targets, nr_targets); + ret = -EBUSY; + goto unlock_out; + } + + /* remove previously set targets */ + damon_for_each_target_safe(t, next_t, ctx) { + if (targetid_is_pid(ctx)) + put_pid((struct pid *)t->id); + damon_destroy_target(t); + } + + /* Configure the context for the address space type */ + if (id_is_pid) + damon_va_set_primitives(ctx); + else + damon_pa_set_primitives(ctx); + + ret = damon_set_targets(ctx, targets, nr_targets); + if (ret) { + if (id_is_pid) + dbgfs_put_pids(targets, nr_targets); + } else { + ret = count; + } + +unlock_out: + mutex_unlock(&ctx->kdamond_lock); +free_targets_out: + kfree(targets); +out: + kfree(kbuf); + return ret; +} + +static ssize_t sprint_init_regions(struct damon_ctx *c, char *buf, ssize_t len) +{ + struct damon_target *t; + struct damon_region *r; + int written = 0; + int rc; + + damon_for_each_target(t, c) { + damon_for_each_region(r, t) { + rc = scnprintf(&buf[written], len - written, + "%lu %lu %lu\n", + t->id, r->ar.start, r->ar.end); + if (!rc) + return -ENOMEM; + written += rc; + } + } + return written; +} + +static ssize_t dbgfs_init_regions_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct damon_ctx *ctx = file->private_data; + char *kbuf; + ssize_t len; + + kbuf = kmalloc(count, GFP_KERNEL | __GFP_NOWARN); + if (!kbuf) + return -ENOMEM; + + mutex_lock(&ctx->kdamond_lock); + if (ctx->kdamond) { + mutex_unlock(&ctx->kdamond_lock); + len = -EBUSY; + goto out; + } + + len = sprint_init_regions(ctx, kbuf, count); + mutex_unlock(&ctx->kdamond_lock); + if (len < 0) + goto out; + len = simple_read_from_buffer(buf, count, ppos, kbuf, len); + +out: + kfree(kbuf); + return len; +} + +static int add_init_region(struct damon_ctx *c, + unsigned long target_id, struct damon_addr_range *ar) +{ + struct damon_target *t; + struct damon_region *r, *prev; + unsigned long id; + int rc = -EINVAL; + + if (ar->start >= ar->end) + return -EINVAL; + + damon_for_each_target(t, c) { + id = t->id; + if (targetid_is_pid(c)) + id = (unsigned long)pid_vnr((struct pid *)id); + if (id == target_id) { + r = damon_new_region(ar->start, ar->end); + if (!r) + return -ENOMEM; + damon_add_region(r, t); + if (damon_nr_regions(t) > 1) { + prev = damon_prev_region(r); + if (prev->ar.end > r->ar.start) { + damon_destroy_region(r, t); + return -EINVAL; + } + } + rc = 0; + } + } + return rc; +} + +static int set_init_regions(struct damon_ctx *c, const char *str, ssize_t len) +{ + struct damon_target *t; + struct damon_region *r, *next; + int pos = 0, parsed, ret; + unsigned long target_id; + struct damon_addr_range ar; + int err; + + damon_for_each_target(t, c) { + damon_for_each_region_safe(r, next, t) + damon_destroy_region(r, t); + } + + while (pos < len) { + ret = sscanf(&str[pos], "%lu %lu %lu%n", + &target_id, &ar.start, &ar.end, &parsed); + if (ret != 3) + break; + err = add_init_region(c, target_id, &ar); + if (err) + goto fail; + pos += parsed; + } + + return 0; + +fail: + damon_for_each_target(t, c) { + damon_for_each_region_safe(r, next, t) + damon_destroy_region(r, t); + } + return err; +} + +static ssize_t dbgfs_init_regions_write(struct file *file, + const char __user *buf, size_t count, + loff_t *ppos) +{ + struct damon_ctx *ctx = file->private_data; + char *kbuf; + ssize_t ret = count; + int err; + + kbuf = user_input_str(buf, count, ppos); + if (IS_ERR(kbuf)) + return PTR_ERR(kbuf); + + mutex_lock(&ctx->kdamond_lock); + if (ctx->kdamond) { + ret = -EBUSY; + goto unlock_out; + } + + err = set_init_regions(ctx, kbuf, ret); + if (err) + ret = err; + +unlock_out: + mutex_unlock(&ctx->kdamond_lock); + kfree(kbuf); + return ret; +} + +static ssize_t dbgfs_kdamond_pid_read(struct file *file, + char __user *buf, size_t count, loff_t *ppos) +{ + struct damon_ctx *ctx = file->private_data; + char *kbuf; + ssize_t len; + + kbuf = kmalloc(count, GFP_KERNEL | __GFP_NOWARN); + if (!kbuf) + return -ENOMEM; + + mutex_lock(&ctx->kdamond_lock); + if (ctx->kdamond) + len = scnprintf(kbuf, count, "%d\n", ctx->kdamond->pid); + else + len = scnprintf(kbuf, count, "none\n"); + mutex_unlock(&ctx->kdamond_lock); + if (!len) + goto out; + len = simple_read_from_buffer(buf, count, ppos, kbuf, len); + +out: + kfree(kbuf); + return len; +} + +static int damon_dbgfs_open(struct inode *inode, struct file *file) +{ + file->private_data = inode->i_private; + + return nonseekable_open(inode, file); +} + +static const struct file_operations attrs_fops = { + .open = damon_dbgfs_open, + .read = dbgfs_attrs_read, + .write = dbgfs_attrs_write, +}; + +static const struct file_operations schemes_fops = { + .open = damon_dbgfs_open, + .read = dbgfs_schemes_read, + .write = dbgfs_schemes_write, +}; + +static const struct file_operations target_ids_fops = { + .open = damon_dbgfs_open, + .read = dbgfs_target_ids_read, + .write = dbgfs_target_ids_write, +}; + +static const struct file_operations init_regions_fops = { + .open = damon_dbgfs_open, + .read = dbgfs_init_regions_read, + .write = dbgfs_init_regions_write, +}; + +static const struct file_operations kdamond_pid_fops = { + .open = damon_dbgfs_open, + .read = dbgfs_kdamond_pid_read, +}; + +static void dbgfs_fill_ctx_dir(struct dentry *dir, struct damon_ctx *ctx) +{ + const char * const file_names[] = {"attrs", "schemes", "target_ids", + "init_regions", "kdamond_pid"}; + const struct file_operations *fops[] = {&attrs_fops, &schemes_fops, + &target_ids_fops, &init_regions_fops, &kdamond_pid_fops}; + int i; + + for (i = 0; i < ARRAY_SIZE(file_names); i++) + debugfs_create_file(file_names[i], 0600, dir, ctx, fops[i]); +} + +static void dbgfs_before_terminate(struct damon_ctx *ctx) +{ + struct damon_target *t, *next; + + if (!targetid_is_pid(ctx)) + return; + + mutex_lock(&ctx->kdamond_lock); + damon_for_each_target_safe(t, next, ctx) { + put_pid((struct pid *)t->id); + damon_destroy_target(t); + } + mutex_unlock(&ctx->kdamond_lock); +} + +static struct damon_ctx *dbgfs_new_ctx(void) +{ + struct damon_ctx *ctx; + + ctx = damon_new_ctx(); + if (!ctx) + return NULL; + + damon_va_set_primitives(ctx); + ctx->callback.before_terminate = dbgfs_before_terminate; + return ctx; +} + +static void dbgfs_destroy_ctx(struct damon_ctx *ctx) +{ + damon_destroy_ctx(ctx); +} + +/* + * Make a context of @name and create a debugfs directory for it. + * + * This function should be called while holding damon_dbgfs_lock. + * + * Returns 0 on success, negative error code otherwise. + */ +static int dbgfs_mk_context(char *name) +{ + struct dentry *root, **new_dirs, *new_dir; + struct damon_ctx **new_ctxs, *new_ctx; + + if (damon_nr_running_ctxs()) + return -EBUSY; + + new_ctxs = krealloc(dbgfs_ctxs, sizeof(*dbgfs_ctxs) * + (dbgfs_nr_ctxs + 1), GFP_KERNEL); + if (!new_ctxs) + return -ENOMEM; + dbgfs_ctxs = new_ctxs; + + new_dirs = krealloc(dbgfs_dirs, sizeof(*dbgfs_dirs) * + (dbgfs_nr_ctxs + 1), GFP_KERNEL); + if (!new_dirs) + return -ENOMEM; + dbgfs_dirs = new_dirs; + + root = dbgfs_dirs[0]; + if (!root) + return -ENOENT; + + new_dir = debugfs_create_dir(name, root); + dbgfs_dirs[dbgfs_nr_ctxs] = new_dir; + + new_ctx = dbgfs_new_ctx(); + if (!new_ctx) { + debugfs_remove(new_dir); + dbgfs_dirs[dbgfs_nr_ctxs] = NULL; + return -ENOMEM; + } + + dbgfs_ctxs[dbgfs_nr_ctxs] = new_ctx; + dbgfs_fill_ctx_dir(dbgfs_dirs[dbgfs_nr_ctxs], + dbgfs_ctxs[dbgfs_nr_ctxs]); + dbgfs_nr_ctxs++; + + return 0; +} + +static ssize_t dbgfs_mk_context_write(struct file *file, + const char __user *buf, size_t count, loff_t *ppos) +{ + char *kbuf; + char *ctx_name; + ssize_t ret; + + kbuf = user_input_str(buf, count, ppos); + if (IS_ERR(kbuf)) + return PTR_ERR(kbuf); + ctx_name = kmalloc(count + 1, GFP_KERNEL); + if (!ctx_name) { + kfree(kbuf); + return -ENOMEM; + } + + /* Trim white space */ + if (sscanf(kbuf, "%s", ctx_name) != 1) { + ret = -EINVAL; + goto out; + } + + mutex_lock(&damon_dbgfs_lock); + ret = dbgfs_mk_context(ctx_name); + if (!ret) + ret = count; + mutex_unlock(&damon_dbgfs_lock); + +out: + kfree(kbuf); + kfree(ctx_name); + return ret; +} + +/* + * Remove a context of @name and its debugfs directory. + * + * This function should be called while holding damon_dbgfs_lock. + * + * Return 0 on success, negative error code otherwise. + */ +static int dbgfs_rm_context(char *name) +{ + struct dentry *root, *dir, **new_dirs; + struct damon_ctx **new_ctxs; + int i, j; + + if (damon_nr_running_ctxs()) + return -EBUSY; + + root = dbgfs_dirs[0]; + if (!root) + return -ENOENT; + + dir = debugfs_lookup(name, root); + if (!dir) + return -ENOENT; + + new_dirs = kmalloc_array(dbgfs_nr_ctxs - 1, sizeof(*dbgfs_dirs), + GFP_KERNEL); + if (!new_dirs) + return -ENOMEM; + + new_ctxs = kmalloc_array(dbgfs_nr_ctxs - 1, sizeof(*dbgfs_ctxs), + GFP_KERNEL); + if (!new_ctxs) { + kfree(new_dirs); + return -ENOMEM; + } + + for (i = 0, j = 0; i < dbgfs_nr_ctxs; i++) { + if (dbgfs_dirs[i] == dir) { + debugfs_remove(dbgfs_dirs[i]); + dbgfs_destroy_ctx(dbgfs_ctxs[i]); + continue; + } + new_dirs[j] = dbgfs_dirs[i]; + new_ctxs[j++] = dbgfs_ctxs[i]; + } + + kfree(dbgfs_dirs); + kfree(dbgfs_ctxs); + + dbgfs_dirs = new_dirs; + dbgfs_ctxs = new_ctxs; + dbgfs_nr_ctxs--; + + return 0; +} + +static ssize_t dbgfs_rm_context_write(struct file *file, + const char __user *buf, size_t count, loff_t *ppos) +{ + char *kbuf; + ssize_t ret; + char *ctx_name; + + kbuf = user_input_str(buf, count, ppos); + if (IS_ERR(kbuf)) + return PTR_ERR(kbuf); + ctx_name = kmalloc(count + 1, GFP_KERNEL); + if (!ctx_name) { + kfree(kbuf); + return -ENOMEM; + } + + /* Trim white space */ + if (sscanf(kbuf, "%s", ctx_name) != 1) { + ret = -EINVAL; + goto out; + } + + mutex_lock(&damon_dbgfs_lock); + ret = dbgfs_rm_context(ctx_name); + if (!ret) + ret = count; + mutex_unlock(&damon_dbgfs_lock); + +out: + kfree(kbuf); + kfree(ctx_name); + return ret; +} + +static ssize_t dbgfs_monitor_on_read(struct file *file, + char __user *buf, size_t count, loff_t *ppos) +{ + char monitor_on_buf[5]; + bool monitor_on = damon_nr_running_ctxs() != 0; + int len; + + len = scnprintf(monitor_on_buf, 5, monitor_on ? "on\n" : "off\n"); + + return simple_read_from_buffer(buf, count, ppos, monitor_on_buf, len); +} + +static ssize_t dbgfs_monitor_on_write(struct file *file, + const char __user *buf, size_t count, loff_t *ppos) +{ + ssize_t ret; + char *kbuf; + + kbuf = user_input_str(buf, count, ppos); + if (IS_ERR(kbuf)) + return PTR_ERR(kbuf); + + /* Remove white space */ + if (sscanf(kbuf, "%s", kbuf) != 1) { + kfree(kbuf); + return -EINVAL; + } + + mutex_lock(&damon_dbgfs_lock); + if (!strncmp(kbuf, "on", count)) { + int i; + + for (i = 0; i < dbgfs_nr_ctxs; i++) { + if (damon_targets_empty(dbgfs_ctxs[i])) { + kfree(kbuf); + mutex_unlock(&damon_dbgfs_lock); + return -EINVAL; + } + } + ret = damon_start(dbgfs_ctxs, dbgfs_nr_ctxs); + } else if (!strncmp(kbuf, "off", count)) { + ret = damon_stop(dbgfs_ctxs, dbgfs_nr_ctxs); + } else { + ret = -EINVAL; + } + mutex_unlock(&damon_dbgfs_lock); + + if (!ret) + ret = count; + kfree(kbuf); + return ret; +} + +static const struct file_operations mk_contexts_fops = { + .write = dbgfs_mk_context_write, +}; + +static const struct file_operations rm_contexts_fops = { + .write = dbgfs_rm_context_write, +}; + +static const struct file_operations monitor_on_fops = { + .read = dbgfs_monitor_on_read, + .write = dbgfs_monitor_on_write, +}; + +static int __init __damon_dbgfs_init(void) +{ + struct dentry *dbgfs_root; + const char * const file_names[] = {"mk_contexts", "rm_contexts", + "monitor_on"}; + const struct file_operations *fops[] = {&mk_contexts_fops, + &rm_contexts_fops, &monitor_on_fops}; + int i; + + dbgfs_root = debugfs_create_dir("damon", NULL); + + for (i = 0; i < ARRAY_SIZE(file_names); i++) + debugfs_create_file(file_names[i], 0600, dbgfs_root, NULL, + fops[i]); + dbgfs_fill_ctx_dir(dbgfs_root, dbgfs_ctxs[0]); + + dbgfs_dirs = kmalloc_array(1, sizeof(dbgfs_root), GFP_KERNEL); + if (!dbgfs_dirs) { + debugfs_remove(dbgfs_root); + return -ENOMEM; + } + dbgfs_dirs[0] = dbgfs_root; + + return 0; +} + +/* + * Functions for the initialization + */ + +static int __init damon_dbgfs_init(void) +{ + int rc = -ENOMEM; + + mutex_lock(&damon_dbgfs_lock); + dbgfs_ctxs = kmalloc(sizeof(*dbgfs_ctxs), GFP_KERNEL); + if (!dbgfs_ctxs) + goto out; + dbgfs_ctxs[0] = dbgfs_new_ctx(); + if (!dbgfs_ctxs[0]) { + kfree(dbgfs_ctxs); + goto out; + } + dbgfs_nr_ctxs = 1; + + rc = __damon_dbgfs_init(); + if (rc) { + kfree(dbgfs_ctxs[0]); + kfree(dbgfs_ctxs); + pr_err("%s: dbgfs init failed\n", __func__); + } + +out: + mutex_unlock(&damon_dbgfs_lock); + return rc; +} + +module_init(damon_dbgfs_init); + +#include "dbgfs-test.h" diff --git a/mm/damon/paddr.c b/mm/damon/paddr.c new file mode 100644 index 0000000000000000000000000000000000000000..5e8244f65a1a243291cc4c060366589c43486522 --- /dev/null +++ b/mm/damon/paddr.c @@ -0,0 +1,275 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * DAMON Primitives for The Physical Address Space + * + * Author: SeongJae Park + */ + +#define pr_fmt(fmt) "damon-pa: " fmt + +#include +#include +#include +#include +#include + +#include "../internal.h" +#include "prmtv-common.h" + +static bool __damon_pa_mkold(struct page *page, struct vm_area_struct *vma, + unsigned long addr, void *arg) +{ + struct page_vma_mapped_walk pvmw = { + .page = page, + .vma = vma, + .address = addr, + }; + + while (page_vma_mapped_walk(&pvmw)) { + addr = pvmw.address; + if (pvmw.pte) + damon_ptep_mkold(pvmw.pte, vma->vm_mm, addr); + else + damon_pmdp_mkold(pvmw.pmd, vma->vm_mm, addr); + } + return true; +} + +static void damon_pa_mkold(unsigned long paddr) +{ + struct page *page = damon_get_page(PHYS_PFN(paddr)); + struct rmap_walk_control rwc = { + .rmap_one = __damon_pa_mkold, + .anon_lock = page_lock_anon_vma_read, + }; + bool need_lock; + + if (!page) + return; + + if (!page_mapped(page) || !page_rmapping(page)) { + set_page_idle(page); + goto out; + } + + need_lock = !PageAnon(page) || PageKsm(page); + if (need_lock && !trylock_page(page)) + goto out; + + rmap_walk(page, &rwc); + + if (need_lock) + unlock_page(page); + +out: + put_page(page); +} + +static void __damon_pa_prepare_access_check(struct damon_ctx *ctx, + struct damon_region *r) +{ + r->sampling_addr = damon_rand(r->ar.start, r->ar.end); + + damon_pa_mkold(r->sampling_addr); +} + +static void damon_pa_prepare_access_checks(struct damon_ctx *ctx) +{ + struct damon_target *t; + struct damon_region *r; + + damon_for_each_target(t, ctx) { + damon_for_each_region(r, t) + __damon_pa_prepare_access_check(ctx, r); + } +} + +struct damon_pa_access_chk_result { + unsigned long page_sz; + bool accessed; +}; + +static bool __damon_pa_young(struct page *page, struct vm_area_struct *vma, + unsigned long addr, void *arg) +{ + struct damon_pa_access_chk_result *result = arg; + struct page_vma_mapped_walk pvmw = { + .page = page, + .vma = vma, + .address = addr, + }; + + result->accessed = false; + result->page_sz = PAGE_SIZE; + while (page_vma_mapped_walk(&pvmw)) { + addr = pvmw.address; + if (pvmw.pte) { + result->accessed = pte_young(*pvmw.pte) || + !page_is_idle(page) || + mmu_notifier_test_young(vma->vm_mm, addr); + } else { +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + result->accessed = pmd_young(*pvmw.pmd) || + !page_is_idle(page) || + mmu_notifier_test_young(vma->vm_mm, addr); + result->page_sz = ((1UL) << HPAGE_PMD_SHIFT); +#else + WARN_ON_ONCE(1); +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + } + if (result->accessed) { + page_vma_mapped_walk_done(&pvmw); + break; + } + } + + /* If accessed, stop walking */ + return !result->accessed; +} + +static bool damon_pa_young(unsigned long paddr, unsigned long *page_sz) +{ + struct page *page = damon_get_page(PHYS_PFN(paddr)); + struct damon_pa_access_chk_result result = { + .page_sz = PAGE_SIZE, + .accessed = false, + }; + struct rmap_walk_control rwc = { + .arg = &result, + .rmap_one = __damon_pa_young, + .anon_lock = page_lock_anon_vma_read, + }; + bool need_lock; + + if (!page) + return false; + + if (!page_mapped(page) || !page_rmapping(page)) { + if (page_is_idle(page)) + result.accessed = false; + else + result.accessed = true; + put_page(page); + goto out; + } + + need_lock = !PageAnon(page) || PageKsm(page); + if (need_lock && !trylock_page(page)) { + put_page(page); + return NULL; + } + + rmap_walk(page, &rwc); + + if (need_lock) + unlock_page(page); + put_page(page); + +out: + *page_sz = result.page_sz; + return result.accessed; +} + +static void __damon_pa_check_access(struct damon_ctx *ctx, + struct damon_region *r) +{ + static unsigned long last_addr; + static unsigned long last_page_sz = PAGE_SIZE; + static bool last_accessed; + + /* If the region is in the last checked page, reuse the result */ + if (ALIGN_DOWN(last_addr, last_page_sz) == + ALIGN_DOWN(r->sampling_addr, last_page_sz)) { + if (last_accessed) + r->nr_accesses++; + return; + } + + last_accessed = damon_pa_young(r->sampling_addr, &last_page_sz); + if (last_accessed) + r->nr_accesses++; + + last_addr = r->sampling_addr; +} + +static unsigned int damon_pa_check_accesses(struct damon_ctx *ctx) +{ + struct damon_target *t; + struct damon_region *r; + unsigned int max_nr_accesses = 0; + + damon_for_each_target(t, ctx) { + damon_for_each_region(r, t) { + __damon_pa_check_access(ctx, r); + max_nr_accesses = max(r->nr_accesses, max_nr_accesses); + } + } + + return max_nr_accesses; +} + +bool damon_pa_target_valid(void *t) +{ + return true; +} + +static unsigned long damon_pa_apply_scheme(struct damon_ctx *ctx, + struct damon_target *t, struct damon_region *r, + struct damos *scheme) +{ + unsigned long addr, applied; + LIST_HEAD(page_list); + + if (scheme->action != DAMOS_PAGEOUT) + return 0; + + for (addr = r->ar.start; addr < r->ar.end; addr += PAGE_SIZE) { + struct page *page = damon_get_page(PHYS_PFN(addr)); + + if (!page) + continue; + + ClearPageReferenced(page); + test_and_clear_page_young(page); + if (isolate_lru_page(page)) { + put_page(page); + continue; + } + if (PageUnevictable(page)) { + putback_lru_page(page); + } else { + list_add(&page->lru, &page_list); + put_page(page); + } + } + applied = reclaim_pages(&page_list); + cond_resched(); + return applied * PAGE_SIZE; +} + +static int damon_pa_scheme_score(struct damon_ctx *context, + struct damon_target *t, struct damon_region *r, + struct damos *scheme) +{ + switch (scheme->action) { + case DAMOS_PAGEOUT: + return damon_pageout_score(context, r, scheme); + default: + break; + } + + return DAMOS_MAX_SCORE; +} + +void damon_pa_set_primitives(struct damon_ctx *ctx) +{ + ctx->primitive.init = NULL; + ctx->primitive.update = NULL; + ctx->primitive.prepare_access_checks = damon_pa_prepare_access_checks; + ctx->primitive.check_accesses = damon_pa_check_accesses; + ctx->primitive.reset_aggregated = NULL; + ctx->primitive.target_valid = damon_pa_target_valid; + ctx->primitive.cleanup = NULL; + ctx->primitive.apply_scheme = damon_pa_apply_scheme; + ctx->primitive.get_scheme_score = damon_pa_scheme_score; +} diff --git a/mm/damon/prmtv-common.c b/mm/damon/prmtv-common.c new file mode 100644 index 0000000000000000000000000000000000000000..92a04f5831d6bdab339e472c4fafa1da932c4630 --- /dev/null +++ b/mm/damon/prmtv-common.c @@ -0,0 +1,133 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Common Primitives for Data Access Monitoring + * + * Author: SeongJae Park + */ + +#include +#include +#include +#include + +#include "prmtv-common.h" + +/* + * Get an online page for a pfn if it's in the LRU list. Otherwise, returns + * NULL. + * + * The body of this function is stolen from the 'page_idle_get_page()'. We + * steal rather than reuse it because the code is quite simple. + */ +struct page *damon_get_page(unsigned long pfn) +{ + struct page *page = pfn_to_online_page(pfn); + + if (!page || !PageLRU(page) || !get_page_unless_zero(page)) + return NULL; + + if (unlikely(!PageLRU(page))) { + put_page(page); + page = NULL; + } + return page; +} + +void damon_ptep_mkold(pte_t *pte, struct mm_struct *mm, unsigned long addr) +{ + bool referenced = false; + struct page *page = damon_get_page(pte_pfn(*pte)); + + if (!page) + return; + + if (pte_young(*pte)) { + referenced = true; + *pte = pte_mkold(*pte); + } + +#ifdef CONFIG_MMU_NOTIFIER + if (mmu_notifier_clear_young(mm, addr, addr + PAGE_SIZE)) + referenced = true; +#endif /* CONFIG_MMU_NOTIFIER */ + + if (referenced) + set_page_young(page); + + set_page_idle(page); + put_page(page); +} + +void damon_pmdp_mkold(pmd_t *pmd, struct mm_struct *mm, unsigned long addr) +{ +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + bool referenced = false; + struct page *page = damon_get_page(pmd_pfn(*pmd)); + + if (!page) + return; + + if (pmd_young(*pmd)) { + referenced = true; + *pmd = pmd_mkold(*pmd); + } + +#ifdef CONFIG_MMU_NOTIFIER + if (mmu_notifier_clear_young(mm, addr, + addr + ((1UL) << HPAGE_PMD_SHIFT))) + referenced = true; +#endif /* CONFIG_MMU_NOTIFIER */ + + if (referenced) + set_page_young(page); + + set_page_idle(page); + put_page(page); +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +} + +#define DAMON_MAX_SUBSCORE (100) +#define DAMON_MAX_AGE_IN_LOG (32) + +int damon_pageout_score(struct damon_ctx *c, struct damon_region *r, + struct damos *s) +{ + unsigned int max_nr_accesses; + int freq_subscore; + unsigned int age_in_sec; + int age_in_log, age_subscore; + unsigned int freq_weight = s->quota.weight_nr_accesses; + unsigned int age_weight = s->quota.weight_age; + int hotness; + + max_nr_accesses = c->aggr_interval / c->sample_interval; + freq_subscore = r->nr_accesses * DAMON_MAX_SUBSCORE / max_nr_accesses; + + age_in_sec = (unsigned long)r->age * c->aggr_interval / 1000000; + for (age_in_log = 0; age_in_log < DAMON_MAX_AGE_IN_LOG && age_in_sec; + age_in_log++, age_in_sec >>= 1) + ; + + /* If frequency is 0, higher age means it's colder */ + if (freq_subscore == 0) + age_in_log *= -1; + + /* + * Now age_in_log is in [-DAMON_MAX_AGE_IN_LOG, DAMON_MAX_AGE_IN_LOG]. + * Scale it to be in [0, 100] and set it as age subscore. + */ + age_in_log += DAMON_MAX_AGE_IN_LOG; + age_subscore = age_in_log * DAMON_MAX_SUBSCORE / + DAMON_MAX_AGE_IN_LOG / 2; + + hotness = (freq_weight * freq_subscore + age_weight * age_subscore); + if (freq_weight + age_weight) + hotness /= freq_weight + age_weight; + /* + * Transform it to fit in [0, DAMOS_MAX_SCORE] + */ + hotness = hotness * DAMOS_MAX_SCORE / DAMON_MAX_SUBSCORE; + + /* Return coldness of the region */ + return DAMOS_MAX_SCORE - hotness; +} diff --git a/mm/damon/prmtv-common.h b/mm/damon/prmtv-common.h new file mode 100644 index 0000000000000000000000000000000000000000..e790cb5f8fe05913ef868bd41743604c91d4aacf --- /dev/null +++ b/mm/damon/prmtv-common.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Common Primitives for Data Access Monitoring + * + * Author: SeongJae Park + */ + +#include + +struct page *damon_get_page(unsigned long pfn); + +void damon_ptep_mkold(pte_t *pte, struct mm_struct *mm, unsigned long addr); +void damon_pmdp_mkold(pmd_t *pmd, struct mm_struct *mm, unsigned long addr); + +int damon_pageout_score(struct damon_ctx *c, struct damon_region *r, + struct damos *s); diff --git a/mm/damon/reclaim.c b/mm/damon/reclaim.c new file mode 100644 index 0000000000000000000000000000000000000000..183d4f3b4fde7f4e78b97c445d9fed0c1533d908 --- /dev/null +++ b/mm/damon/reclaim.c @@ -0,0 +1,425 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * DAMON-based page reclamation + * + * Author: SeongJae Park + */ + +#define pr_fmt(fmt) "damon-reclaim: " fmt + +#include +#include +#include +#include +#include + +#ifdef MODULE_PARAM_PREFIX +#undef MODULE_PARAM_PREFIX +#endif +#define MODULE_PARAM_PREFIX "damon_reclaim." + +/* + * Enable or disable DAMON_RECLAIM. + * + * You can enable DAMON_RCLAIM by setting the value of this parameter as ``Y``. + * Setting it as ``N`` disables DAMON_RECLAIM. Note that DAMON_RECLAIM could + * do no real monitoring and reclamation due to the watermarks-based activation + * condition. Refer to below descriptions for the watermarks parameter for + * this. + */ +static bool enabled __read_mostly; + +/* + * Time threshold for cold memory regions identification in microseconds. + * + * If a memory region is not accessed for this or longer time, DAMON_RECLAIM + * identifies the region as cold, and reclaims. 120 seconds by default. + */ +static unsigned long min_age __read_mostly = 120000000; +module_param(min_age, ulong, 0600); + +/* + * Limit of time for trying the reclamation in milliseconds. + * + * DAMON_RECLAIM tries to use only up to this time within a time window + * (quota_reset_interval_ms) for trying reclamation of cold pages. This can be + * used for limiting CPU consumption of DAMON_RECLAIM. If the value is zero, + * the limit is disabled. + * + * 10 ms by default. + */ +static unsigned long quota_ms __read_mostly = 10; +module_param(quota_ms, ulong, 0600); + +/* + * Limit of size of memory for the reclamation in bytes. + * + * DAMON_RECLAIM charges amount of memory which it tried to reclaim within a + * time window (quota_reset_interval_ms) and makes no more than this limit is + * tried. This can be used for limiting consumption of CPU and IO. If this + * value is zero, the limit is disabled. + * + * 128 MiB by default. + */ +static unsigned long quota_sz __read_mostly = 128 * 1024 * 1024; +module_param(quota_sz, ulong, 0600); + +/* + * The time/size quota charge reset interval in milliseconds. + * + * The charge reset interval for the quota of time (quota_ms) and size + * (quota_sz). That is, DAMON_RECLAIM does not try reclamation for more than + * quota_ms milliseconds or quota_sz bytes within quota_reset_interval_ms + * milliseconds. + * + * 1 second by default. + */ +static unsigned long quota_reset_interval_ms __read_mostly = 1000; +module_param(quota_reset_interval_ms, ulong, 0600); + +/* + * The watermarks check time interval in microseconds. + * + * Minimal time to wait before checking the watermarks, when DAMON_RECLAIM is + * enabled but inactive due to its watermarks rule. 5 seconds by default. + */ +static unsigned long wmarks_interval __read_mostly = 5000000; +module_param(wmarks_interval, ulong, 0600); + +/* + * Free memory rate (per thousand) for the high watermark. + * + * If free memory of the system in bytes per thousand bytes is higher than + * this, DAMON_RECLAIM becomes inactive, so it does nothing but periodically + * checks the watermarks. 500 (50%) by default. + */ +static unsigned long wmarks_high __read_mostly = 500; +module_param(wmarks_high, ulong, 0600); + +/* + * Free memory rate (per thousand) for the middle watermark. + * + * If free memory of the system in bytes per thousand bytes is between this and + * the low watermark, DAMON_RECLAIM becomes active, so starts the monitoring + * and the reclaiming. 400 (40%) by default. + */ +static unsigned long wmarks_mid __read_mostly = 400; +module_param(wmarks_mid, ulong, 0600); + +/* + * Free memory rate (per thousand) for the low watermark. + * + * If free memory of the system in bytes per thousand bytes is lower than this, + * DAMON_RECLAIM becomes inactive, so it does nothing but periodically checks + * the watermarks. In the case, the system falls back to the LRU-based page + * granularity reclamation logic. 200 (20%) by default. + */ +static unsigned long wmarks_low __read_mostly = 200; +module_param(wmarks_low, ulong, 0600); + +/* + * Sampling interval for the monitoring in microseconds. + * + * The sampling interval of DAMON for the cold memory monitoring. Please refer + * to the DAMON documentation for more detail. 5 ms by default. + */ +static unsigned long sample_interval __read_mostly = 5000; +module_param(sample_interval, ulong, 0600); + +/* + * Aggregation interval for the monitoring in microseconds. + * + * The aggregation interval of DAMON for the cold memory monitoring. Please + * refer to the DAMON documentation for more detail. 100 ms by default. + */ +static unsigned long aggr_interval __read_mostly = 100000; +module_param(aggr_interval, ulong, 0600); + +/* + * Minimum number of monitoring regions. + * + * The minimal number of monitoring regions of DAMON for the cold memory + * monitoring. This can be used to set lower-bound of the monitoring quality. + * But, setting this too high could result in increased monitoring overhead. + * Please refer to the DAMON documentation for more detail. 10 by default. + */ +static unsigned long min_nr_regions __read_mostly = 10; +module_param(min_nr_regions, ulong, 0600); + +/* + * Maximum number of monitoring regions. + * + * The maximum number of monitoring regions of DAMON for the cold memory + * monitoring. This can be used to set upper-bound of the monitoring overhead. + * However, setting this too low could result in bad monitoring quality. + * Please refer to the DAMON documentation for more detail. 1000 by default. + */ +static unsigned long max_nr_regions __read_mostly = 1000; +module_param(max_nr_regions, ulong, 0600); + +/* + * Start of the target memory region in physical address. + * + * The start physical address of memory region that DAMON_RECLAIM will do work + * against. By default, biggest System RAM is used as the region. + */ +static unsigned long monitor_region_start __read_mostly; +module_param(monitor_region_start, ulong, 0600); + +/* + * End of the target memory region in physical address. + * + * The end physical address of memory region that DAMON_RECLAIM will do work + * against. By default, biggest System RAM is used as the region. + */ +static unsigned long monitor_region_end __read_mostly; +module_param(monitor_region_end, ulong, 0600); + +/* + * PID of the DAMON thread + * + * If DAMON_RECLAIM is enabled, this becomes the PID of the worker thread. + * Else, -1. + */ +static int kdamond_pid __read_mostly = -1; +module_param(kdamond_pid, int, 0400); + +/* + * Number of memory regions that tried to be reclaimed. + */ +static unsigned long nr_reclaim_tried_regions __read_mostly; +module_param(nr_reclaim_tried_regions, ulong, 0400); + +/* + * Total bytes of memory regions that tried to be reclaimed. + */ +static unsigned long bytes_reclaim_tried_regions __read_mostly; +module_param(bytes_reclaim_tried_regions, ulong, 0400); + +/* + * Number of memory regions that successfully be reclaimed. + */ +static unsigned long nr_reclaimed_regions __read_mostly; +module_param(nr_reclaimed_regions, ulong, 0400); + +/* + * Total bytes of memory regions that successfully be reclaimed. + */ +static unsigned long bytes_reclaimed_regions __read_mostly; +module_param(bytes_reclaimed_regions, ulong, 0400); + +/* + * Number of times that the time/space quota limits have exceeded + */ +static unsigned long nr_quota_exceeds __read_mostly; +module_param(nr_quota_exceeds, ulong, 0400); + +static struct damon_ctx *ctx; +static struct damon_target *target; + +struct damon_reclaim_ram_walk_arg { + unsigned long start; + unsigned long end; +}; + +static int walk_system_ram(struct resource *res, void *arg) +{ + struct damon_reclaim_ram_walk_arg *a = arg; + + if (a->end - a->start < res->end - res->start) { + a->start = res->start; + a->end = res->end; + } + return 0; +} + +/* + * Find biggest 'System RAM' resource and store its start and end address in + * @start and @end, respectively. If no System RAM is found, returns false. + */ +static bool get_monitoring_region(unsigned long *start, unsigned long *end) +{ + struct damon_reclaim_ram_walk_arg arg = {}; + + walk_system_ram_res(0, ULONG_MAX, &arg, walk_system_ram); + if (arg.end <= arg.start) + return false; + + *start = arg.start; + *end = arg.end; + return true; +} + +static struct damos *damon_reclaim_new_scheme(void) +{ + struct damos_watermarks wmarks = { + .metric = DAMOS_WMARK_FREE_MEM_RATE, + .interval = wmarks_interval, + .high = wmarks_high, + .mid = wmarks_mid, + .low = wmarks_low, + }; + struct damos_quota quota = { + /* + * Do not try reclamation for more than quota_ms milliseconds + * or quota_sz bytes within quota_reset_interval_ms. + */ + .ms = quota_ms, + .sz = quota_sz, + .reset_interval = quota_reset_interval_ms, + /* Within the quota, page out older regions first. */ + .weight_sz = 0, + .weight_nr_accesses = 0, + .weight_age = 1 + }; + struct damos *scheme = damon_new_scheme( + /* Find regions having PAGE_SIZE or larger size */ + PAGE_SIZE, ULONG_MAX, + /* and not accessed at all */ + 0, 0, + /* for min_age or more micro-seconds, and */ + min_age / aggr_interval, UINT_MAX, + /* page out those, as soon as found */ + DAMOS_PAGEOUT, + /* under the quota. */ + "a, + /* (De)activate this according to the watermarks. */ + &wmarks); + + return scheme; +} + +static int damon_reclaim_turn(bool on) +{ + struct damon_region *region; + struct damos *scheme; + int err; + + if (!on) { + err = damon_stop(&ctx, 1); + if (!err) + kdamond_pid = -1; + return err; + } + + err = damon_set_attrs(ctx, sample_interval, aggr_interval, 0, + min_nr_regions, max_nr_regions); + if (err) + return err; + + if (monitor_region_start > monitor_region_end) + return -EINVAL; + if (!monitor_region_start && !monitor_region_end && + !get_monitoring_region(&monitor_region_start, + &monitor_region_end)) + return -EINVAL; + /* DAMON will free this on its own when finish monitoring */ + region = damon_new_region(monitor_region_start, monitor_region_end); + if (!region) + return -ENOMEM; + damon_add_region(region, target); + + /* Will be freed by 'damon_set_schemes()' below */ + scheme = damon_reclaim_new_scheme(); + if (!scheme) { + err = -ENOMEM; + goto free_region_out; + } + err = damon_set_schemes(ctx, &scheme, 1); + if (err) + goto free_scheme_out; + + err = damon_start(&ctx, 1); + if (!err) { + kdamond_pid = ctx->kdamond->pid; + return 0; + } + +free_scheme_out: + damon_destroy_scheme(scheme); +free_region_out: + damon_destroy_region(region, target); + return err; +} + +#define ENABLE_CHECK_INTERVAL_MS 1000 +static struct delayed_work damon_reclaim_timer; +static void damon_reclaim_timer_fn(struct work_struct *work) +{ + static bool last_enabled; + bool now_enabled; + + now_enabled = enabled; + if (last_enabled != now_enabled) { + if (!damon_reclaim_turn(now_enabled)) + last_enabled = now_enabled; + else + enabled = last_enabled; + } + + if (enabled) + schedule_delayed_work(&damon_reclaim_timer, + msecs_to_jiffies(ENABLE_CHECK_INTERVAL_MS)); +} +static DECLARE_DELAYED_WORK(damon_reclaim_timer, damon_reclaim_timer_fn); + +static int enabled_store(const char *val, + const struct kernel_param *kp) +{ + int rc = param_set_bool(val, kp); + + if (rc < 0) + return rc; + + if (enabled) + schedule_delayed_work(&damon_reclaim_timer, 0); + + return 0; +} + +static const struct kernel_param_ops enabled_param_ops = { + .set = enabled_store, + .get = param_get_bool, +}; + +module_param_cb(enabled, &enabled_param_ops, &enabled, 0600); +MODULE_PARM_DESC(enabled, + "Enable or disable DAMON_RECLAIM (default: disabled)"); + +static int damon_reclaim_after_aggregation(struct damon_ctx *c) +{ + struct damos *s; + + /* update the stats parameter */ + damon_for_each_scheme(s, c) { + nr_reclaim_tried_regions = s->stat.nr_tried; + bytes_reclaim_tried_regions = s->stat.sz_tried; + nr_reclaimed_regions = s->stat.nr_applied; + bytes_reclaimed_regions = s->stat.sz_applied; + nr_quota_exceeds = s->stat.qt_exceeds; + } + return 0; +} + +static int __init damon_reclaim_init(void) +{ + ctx = damon_new_ctx(); + if (!ctx) + return -ENOMEM; + + damon_pa_set_primitives(ctx); + ctx->callback.after_aggregation = damon_reclaim_after_aggregation; + + /* 4242 means nothing but fun */ + target = damon_new_target(4242); + if (!target) { + damon_destroy_ctx(ctx); + return -ENOMEM; + } + damon_add_target(ctx, target); + + schedule_delayed_work(&damon_reclaim_timer, 0); + return 0; +} + +module_init(damon_reclaim_init); diff --git a/mm/damon/vaddr-test.h b/mm/damon/vaddr-test.h new file mode 100644 index 0000000000000000000000000000000000000000..6a1b9272ea123a1f2991ad632daf17ef0b173c76 --- /dev/null +++ b/mm/damon/vaddr-test.h @@ -0,0 +1,324 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Data Access Monitor Unit Tests + * + * Copyright 2019 Amazon.com, Inc. or its affiliates. All rights reserved. + * + * Author: SeongJae Park + */ + +#ifdef CONFIG_DAMON_VADDR_KUNIT_TEST + +#ifndef _DAMON_VADDR_TEST_H +#define _DAMON_VADDR_TEST_H + +#include + +static void __link_vmas(struct vm_area_struct *vmas, ssize_t nr_vmas) +{ + int i, j; + unsigned long largest_gap, gap; + + if (!nr_vmas) + return; + + for (i = 0; i < nr_vmas - 1; i++) { + vmas[i].vm_next = &vmas[i + 1]; + + vmas[i].vm_rb.rb_left = NULL; + vmas[i].vm_rb.rb_right = &vmas[i + 1].vm_rb; + + largest_gap = 0; + for (j = i; j < nr_vmas; j++) { + if (j == 0) + continue; + gap = vmas[j].vm_start - vmas[j - 1].vm_end; + if (gap > largest_gap) + largest_gap = gap; + } + vmas[i].rb_subtree_gap = largest_gap; + } + vmas[i].vm_next = NULL; + vmas[i].vm_rb.rb_right = NULL; + vmas[i].rb_subtree_gap = 0; +} + +/* + * Test __damon_va_three_regions() function + * + * In case of virtual memory address spaces monitoring, DAMON converts the + * complex and dynamic memory mappings of each target task to three + * discontiguous regions which cover every mapped areas. However, the three + * regions should not include the two biggest unmapped areas in the original + * mapping, because the two biggest areas are normally the areas between 1) + * heap and the mmap()-ed regions, and 2) the mmap()-ed regions and stack. + * Because these two unmapped areas are very huge but obviously never accessed, + * covering the region is just a waste. + * + * '__damon_va_three_regions() receives an address space of a process. It + * first identifies the start of mappings, end of mappings, and the two biggest + * unmapped areas. After that, based on the information, it constructs the + * three regions and returns. For more detail, refer to the comment of + * 'damon_init_regions_of()' function definition in 'mm/damon.c' file. + * + * For example, suppose virtual address ranges of 10-20, 20-25, 200-210, + * 210-220, 300-305, and 307-330 (Other comments represent this mappings in + * more short form: 10-20-25, 200-210-220, 300-305, 307-330) of a process are + * mapped. To cover every mappings, the three regions should start with 10, + * and end with 305. The process also has three unmapped areas, 25-200, + * 220-300, and 305-307. Among those, 25-200 and 220-300 are the biggest two + * unmapped areas, and thus it should be converted to three regions of 10-25, + * 200-220, and 300-330. + */ +static void damon_test_three_regions_in_vmas(struct kunit *test) +{ + struct damon_addr_range regions[3] = {0,}; + /* 10-20-25, 200-210-220, 300-305, 307-330 */ + struct vm_area_struct vmas[] = { + (struct vm_area_struct) {.vm_start = 10, .vm_end = 20}, + (struct vm_area_struct) {.vm_start = 20, .vm_end = 25}, + (struct vm_area_struct) {.vm_start = 200, .vm_end = 210}, + (struct vm_area_struct) {.vm_start = 210, .vm_end = 220}, + (struct vm_area_struct) {.vm_start = 300, .vm_end = 305}, + (struct vm_area_struct) {.vm_start = 307, .vm_end = 330}, + }; + + __link_vmas(vmas, 6); + + __damon_va_three_regions(&vmas[0], regions); + + KUNIT_EXPECT_EQ(test, 10ul, regions[0].start); + KUNIT_EXPECT_EQ(test, 25ul, regions[0].end); + KUNIT_EXPECT_EQ(test, 200ul, regions[1].start); + KUNIT_EXPECT_EQ(test, 220ul, regions[1].end); + KUNIT_EXPECT_EQ(test, 300ul, regions[2].start); + KUNIT_EXPECT_EQ(test, 330ul, regions[2].end); +} + +static struct damon_region *__nth_region_of(struct damon_target *t, int idx) +{ + struct damon_region *r; + unsigned int i = 0; + + damon_for_each_region(r, t) { + if (i++ == idx) + return r; + } + + return NULL; +} + +/* + * Test 'damon_va_apply_three_regions()' + * + * test kunit object + * regions an array containing start/end addresses of current + * monitoring target regions + * nr_regions the number of the addresses in 'regions' + * three_regions The three regions that need to be applied now + * expected start/end addresses of monitoring target regions that + * 'three_regions' are applied + * nr_expected the number of addresses in 'expected' + * + * The memory mapping of the target processes changes dynamically. To follow + * the change, DAMON periodically reads the mappings, simplifies it to the + * three regions, and updates the monitoring target regions to fit in the three + * regions. The update of current target regions is the role of + * 'damon_va_apply_three_regions()'. + * + * This test passes the given target regions and the new three regions that + * need to be applied to the function and check whether it updates the regions + * as expected. + */ +static void damon_do_test_apply_three_regions(struct kunit *test, + unsigned long *regions, int nr_regions, + struct damon_addr_range *three_regions, + unsigned long *expected, int nr_expected) +{ + struct damon_target *t; + struct damon_region *r; + int i; + + t = damon_new_target(42); + for (i = 0; i < nr_regions / 2; i++) { + r = damon_new_region(regions[i * 2], regions[i * 2 + 1]); + damon_add_region(r, t); + } + + damon_va_apply_three_regions(t, three_regions); + + for (i = 0; i < nr_expected / 2; i++) { + r = __nth_region_of(t, i); + KUNIT_EXPECT_EQ(test, r->ar.start, expected[i * 2]); + KUNIT_EXPECT_EQ(test, r->ar.end, expected[i * 2 + 1]); + } +} + +/* + * This function test most common case where the three big regions are only + * slightly changed. Target regions should adjust their boundary (10-20-30, + * 50-55, 70-80, 90-100) to fit with the new big regions or remove target + * regions (57-79) that now out of the three regions. + */ +static void damon_test_apply_three_regions1(struct kunit *test) +{ + /* 10-20-30, 50-55-57-59, 70-80-90-100 */ + unsigned long regions[] = {10, 20, 20, 30, 50, 55, 55, 57, 57, 59, + 70, 80, 80, 90, 90, 100}; + /* 5-27, 45-55, 73-104 */ + struct damon_addr_range new_three_regions[3] = { + (struct damon_addr_range){.start = 5, .end = 27}, + (struct damon_addr_range){.start = 45, .end = 55}, + (struct damon_addr_range){.start = 73, .end = 104} }; + /* 5-20-27, 45-55, 73-80-90-104 */ + unsigned long expected[] = {5, 20, 20, 27, 45, 55, + 73, 80, 80, 90, 90, 104}; + + damon_do_test_apply_three_regions(test, regions, ARRAY_SIZE(regions), + new_three_regions, expected, ARRAY_SIZE(expected)); +} + +/* + * Test slightly bigger change. Similar to above, but the second big region + * now require two target regions (50-55, 57-59) to be removed. + */ +static void damon_test_apply_three_regions2(struct kunit *test) +{ + /* 10-20-30, 50-55-57-59, 70-80-90-100 */ + unsigned long regions[] = {10, 20, 20, 30, 50, 55, 55, 57, 57, 59, + 70, 80, 80, 90, 90, 100}; + /* 5-27, 56-57, 65-104 */ + struct damon_addr_range new_three_regions[3] = { + (struct damon_addr_range){.start = 5, .end = 27}, + (struct damon_addr_range){.start = 56, .end = 57}, + (struct damon_addr_range){.start = 65, .end = 104} }; + /* 5-20-27, 56-57, 65-80-90-104 */ + unsigned long expected[] = {5, 20, 20, 27, 56, 57, + 65, 80, 80, 90, 90, 104}; + + damon_do_test_apply_three_regions(test, regions, ARRAY_SIZE(regions), + new_three_regions, expected, ARRAY_SIZE(expected)); +} + +/* + * Test a big change. The second big region has totally freed and mapped to + * different area (50-59 -> 61-63). The target regions which were in the old + * second big region (50-55-57-59) should be removed and new target region + * covering the second big region (61-63) should be created. + */ +static void damon_test_apply_three_regions3(struct kunit *test) +{ + /* 10-20-30, 50-55-57-59, 70-80-90-100 */ + unsigned long regions[] = {10, 20, 20, 30, 50, 55, 55, 57, 57, 59, + 70, 80, 80, 90, 90, 100}; + /* 5-27, 61-63, 65-104 */ + struct damon_addr_range new_three_regions[3] = { + (struct damon_addr_range){.start = 5, .end = 27}, + (struct damon_addr_range){.start = 61, .end = 63}, + (struct damon_addr_range){.start = 65, .end = 104} }; + /* 5-20-27, 61-63, 65-80-90-104 */ + unsigned long expected[] = {5, 20, 20, 27, 61, 63, + 65, 80, 80, 90, 90, 104}; + + damon_do_test_apply_three_regions(test, regions, ARRAY_SIZE(regions), + new_three_regions, expected, ARRAY_SIZE(expected)); +} + +/* + * Test another big change. Both of the second and third big regions (50-59 + * and 70-100) has totally freed and mapped to different area (30-32 and + * 65-68). The target regions which were in the old second and third big + * regions should now be removed and new target regions covering the new second + * and third big regions should be created. + */ +static void damon_test_apply_three_regions4(struct kunit *test) +{ + /* 10-20-30, 50-55-57-59, 70-80-90-100 */ + unsigned long regions[] = {10, 20, 20, 30, 50, 55, 55, 57, 57, 59, + 70, 80, 80, 90, 90, 100}; + /* 5-7, 30-32, 65-68 */ + struct damon_addr_range new_three_regions[3] = { + (struct damon_addr_range){.start = 5, .end = 7}, + (struct damon_addr_range){.start = 30, .end = 32}, + (struct damon_addr_range){.start = 65, .end = 68} }; + /* expect 5-7, 30-32, 65-68 */ + unsigned long expected[] = {5, 7, 30, 32, 65, 68}; + + damon_do_test_apply_three_regions(test, regions, ARRAY_SIZE(regions), + new_three_regions, expected, ARRAY_SIZE(expected)); +} + +static void damon_test_split_evenly_fail(struct kunit *test, + unsigned long start, unsigned long end, unsigned int nr_pieces) +{ + struct damon_target *t = damon_new_target(42); + struct damon_region *r = damon_new_region(start, end); + + damon_add_region(r, t); + KUNIT_EXPECT_EQ(test, + damon_va_evenly_split_region(t, r, nr_pieces), -EINVAL); + KUNIT_EXPECT_EQ(test, damon_nr_regions(t), 1u); + + damon_for_each_region(r, t) { + KUNIT_EXPECT_EQ(test, r->ar.start, start); + KUNIT_EXPECT_EQ(test, r->ar.end, end); + } + + damon_free_target(t); +} + +static void damon_test_split_evenly_succ(struct kunit *test, + unsigned long start, unsigned long end, unsigned int nr_pieces) +{ + struct damon_target *t = damon_new_target(42); + struct damon_region *r = damon_new_region(start, end); + unsigned long expected_width = (end - start) / nr_pieces; + unsigned long i = 0; + + damon_add_region(r, t); + KUNIT_EXPECT_EQ(test, + damon_va_evenly_split_region(t, r, nr_pieces), 0); + KUNIT_EXPECT_EQ(test, damon_nr_regions(t), nr_pieces); + + damon_for_each_region(r, t) { + if (i == nr_pieces - 1) + break; + KUNIT_EXPECT_EQ(test, + r->ar.start, start + i++ * expected_width); + KUNIT_EXPECT_EQ(test, r->ar.end, start + i * expected_width); + } + KUNIT_EXPECT_EQ(test, r->ar.start, start + i * expected_width); + KUNIT_EXPECT_EQ(test, r->ar.end, end); + damon_free_target(t); +} + +static void damon_test_split_evenly(struct kunit *test) +{ + KUNIT_EXPECT_EQ(test, damon_va_evenly_split_region(NULL, NULL, 5), + -EINVAL); + + damon_test_split_evenly_fail(test, 0, 100, 0); + damon_test_split_evenly_succ(test, 0, 100, 10); + damon_test_split_evenly_succ(test, 5, 59, 5); + damon_test_split_evenly_fail(test, 5, 6, 2); +} + +static struct kunit_case damon_test_cases[] = { + KUNIT_CASE(damon_test_three_regions_in_vmas), + KUNIT_CASE(damon_test_apply_three_regions1), + KUNIT_CASE(damon_test_apply_three_regions2), + KUNIT_CASE(damon_test_apply_three_regions3), + KUNIT_CASE(damon_test_apply_three_regions4), + KUNIT_CASE(damon_test_split_evenly), + {}, +}; + +static struct kunit_suite damon_test_suite = { + .name = "damon-primitives", + .test_cases = damon_test_cases, +}; +kunit_test_suite(damon_test_suite); + +#endif /* _DAMON_VADDR_TEST_H */ + +#endif /* CONFIG_DAMON_VADDR_KUNIT_TEST */ diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c new file mode 100644 index 0000000000000000000000000000000000000000..89b6468da2b9b0d595a34bbcf4ded9231f80c2ad --- /dev/null +++ b/mm/damon/vaddr.c @@ -0,0 +1,761 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * DAMON Primitives for Virtual Address Spaces + * + * Author: SeongJae Park + */ + +#define pr_fmt(fmt) "damon-va: " fmt + +#include +#include +#include +#include +#include +#include +#include + +#include "prmtv-common.h" + +#ifdef CONFIG_DAMON_VADDR_KUNIT_TEST +#undef DAMON_MIN_REGION +#define DAMON_MIN_REGION 1 +#endif + +/* + * 't->id' should be the pointer to the relevant 'struct pid' having reference + * count. Caller must put the returned task, unless it is NULL. + */ +static inline struct task_struct *damon_get_task_struct(struct damon_target *t) +{ + return get_pid_task((struct pid *)t->id, PIDTYPE_PID); +} + +/* + * Get the mm_struct of the given target + * + * Caller _must_ put the mm_struct after use, unless it is NULL. + * + * Returns the mm_struct of the target on success, NULL on failure + */ +static struct mm_struct *damon_get_mm(struct damon_target *t) +{ + struct task_struct *task; + struct mm_struct *mm; + + task = damon_get_task_struct(t); + if (!task) + return NULL; + + mm = get_task_mm(task); + put_task_struct(task); + return mm; +} + +/* + * Functions for the initial monitoring target regions construction + */ + +/* + * Size-evenly split a region into 'nr_pieces' small regions + * + * Returns 0 on success, or negative error code otherwise. + */ +static int damon_va_evenly_split_region(struct damon_target *t, + struct damon_region *r, unsigned int nr_pieces) +{ + unsigned long sz_orig, sz_piece, orig_end; + struct damon_region *n = NULL, *next; + unsigned long start; + + if (!r || !nr_pieces) + return -EINVAL; + + orig_end = r->ar.end; + sz_orig = r->ar.end - r->ar.start; + sz_piece = ALIGN_DOWN(sz_orig / nr_pieces, DAMON_MIN_REGION); + + if (!sz_piece) + return -EINVAL; + + r->ar.end = r->ar.start + sz_piece; + next = damon_next_region(r); + for (start = r->ar.end; start + sz_piece <= orig_end; + start += sz_piece) { + n = damon_new_region(start, start + sz_piece); + if (!n) + return -ENOMEM; + damon_insert_region(n, r, next, t); + r = n; + } + /* complement last region for possible rounding error */ + if (n) + n->ar.end = orig_end; + + return 0; +} + +static unsigned long sz_range(struct damon_addr_range *r) +{ + return r->end - r->start; +} + +/* + * Find three regions separated by two biggest unmapped regions + * + * vma the head vma of the target address space + * regions an array of three address ranges that results will be saved + * + * This function receives an address space and finds three regions in it which + * separated by the two biggest unmapped regions in the space. Please refer to + * below comments of '__damon_va_init_regions()' function to know why this is + * necessary. + * + * Returns 0 if success, or negative error code otherwise. + */ +static int __damon_va_three_regions(struct vm_area_struct *vma, + struct damon_addr_range regions[3]) +{ + struct damon_addr_range gap = {0}, first_gap = {0}, second_gap = {0}; + struct vm_area_struct *last_vma = NULL; + unsigned long start = 0; + struct rb_root rbroot; + + /* Find two biggest gaps so that first_gap > second_gap > others */ + for (; vma; vma = vma->vm_next) { + if (!last_vma) { + start = vma->vm_start; + goto next; + } + + if (vma->rb_subtree_gap <= sz_range(&second_gap)) { + rbroot.rb_node = &vma->vm_rb; + vma = rb_entry(rb_last(&rbroot), + struct vm_area_struct, vm_rb); + goto next; + } + + gap.start = last_vma->vm_end; + gap.end = vma->vm_start; + if (sz_range(&gap) > sz_range(&second_gap)) { + swap(gap, second_gap); + if (sz_range(&second_gap) > sz_range(&first_gap)) + swap(second_gap, first_gap); + } +next: + last_vma = vma; + } + + if (!sz_range(&second_gap) || !sz_range(&first_gap)) + return -EINVAL; + + /* Sort the two biggest gaps by address */ + if (first_gap.start > second_gap.start) + swap(first_gap, second_gap); + + /* Store the result */ + regions[0].start = ALIGN(start, DAMON_MIN_REGION); + regions[0].end = ALIGN(first_gap.start, DAMON_MIN_REGION); + regions[1].start = ALIGN(first_gap.end, DAMON_MIN_REGION); + regions[1].end = ALIGN(second_gap.start, DAMON_MIN_REGION); + regions[2].start = ALIGN(second_gap.end, DAMON_MIN_REGION); + regions[2].end = ALIGN(last_vma->vm_end, DAMON_MIN_REGION); + + return 0; +} + +/* + * Get the three regions in the given target (task) + * + * Returns 0 on success, negative error code otherwise. + */ +static int damon_va_three_regions(struct damon_target *t, + struct damon_addr_range regions[3]) +{ + struct mm_struct *mm; + int rc; + + mm = damon_get_mm(t); + if (!mm) + return -EINVAL; + + mmap_read_lock(mm); + rc = __damon_va_three_regions(mm->mmap, regions); + mmap_read_unlock(mm); + + mmput(mm); + return rc; +} + +/* + * Initialize the monitoring target regions for the given target (task) + * + * t the given target + * + * Because only a number of small portions of the entire address space + * is actually mapped to the memory and accessed, monitoring the unmapped + * regions is wasteful. That said, because we can deal with small noises, + * tracking every mapping is not strictly required but could even incur a high + * overhead if the mapping frequently changes or the number of mappings is + * high. The adaptive regions adjustment mechanism will further help to deal + * with the noise by simply identifying the unmapped areas as a region that + * has no access. Moreover, applying the real mappings that would have many + * unmapped areas inside will make the adaptive mechanism quite complex. That + * said, too huge unmapped areas inside the monitoring target should be removed + * to not take the time for the adaptive mechanism. + * + * For the reason, we convert the complex mappings to three distinct regions + * that cover every mapped area of the address space. Also the two gaps + * between the three regions are the two biggest unmapped areas in the given + * address space. In detail, this function first identifies the start and the + * end of the mappings and the two biggest unmapped areas of the address space. + * Then, it constructs the three regions as below: + * + * [mappings[0]->start, big_two_unmapped_areas[0]->start) + * [big_two_unmapped_areas[0]->end, big_two_unmapped_areas[1]->start) + * [big_two_unmapped_areas[1]->end, mappings[nr_mappings - 1]->end) + * + * As usual memory map of processes is as below, the gap between the heap and + * the uppermost mmap()-ed region, and the gap between the lowermost mmap()-ed + * region and the stack will be two biggest unmapped regions. Because these + * gaps are exceptionally huge areas in usual address space, excluding these + * two biggest unmapped regions will be sufficient to make a trade-off. + * + * + * + * + * (other mmap()-ed regions and small unmapped regions) + * + * + * + */ +static void __damon_va_init_regions(struct damon_ctx *ctx, + struct damon_target *t) +{ + struct damon_target *ti; + struct damon_region *r; + struct damon_addr_range regions[3]; + unsigned long sz = 0, nr_pieces; + int i, tidx = 0; + + if (damon_va_three_regions(t, regions)) { + damon_for_each_target(ti, ctx) { + if (ti == t) + break; + tidx++; + } + pr_debug("Failed to get three regions of %dth target\n", tidx); + return; + } + + for (i = 0; i < 3; i++) + sz += regions[i].end - regions[i].start; + if (ctx->min_nr_regions) + sz /= ctx->min_nr_regions; + if (sz < DAMON_MIN_REGION) + sz = DAMON_MIN_REGION; + + /* Set the initial three regions of the target */ + for (i = 0; i < 3; i++) { + r = damon_new_region(regions[i].start, regions[i].end); + if (!r) { + pr_err("%d'th init region creation failed\n", i); + return; + } + damon_add_region(r, t); + + nr_pieces = (regions[i].end - regions[i].start) / sz; + damon_va_evenly_split_region(t, r, nr_pieces); + } +} + +/* Initialize '->regions_list' of every target (task) */ +static void damon_va_init(struct damon_ctx *ctx) +{ + struct damon_target *t; + + damon_for_each_target(t, ctx) { + /* the user may set the target regions as they want */ + if (!damon_nr_regions(t)) + __damon_va_init_regions(ctx, t); + } +} + +/* + * Functions for the dynamic monitoring target regions update + */ + +/* + * Check whether a region is intersecting an address range + * + * Returns true if it is. + */ +static bool damon_intersect(struct damon_region *r, + struct damon_addr_range *re) +{ + return !(r->ar.end <= re->start || re->end <= r->ar.start); +} + +/* + * Update damon regions for the three big regions of the given target + * + * t the given target + * bregions the three big regions of the target + */ +static void damon_va_apply_three_regions(struct damon_target *t, + struct damon_addr_range bregions[3]) +{ + struct damon_region *r, *next; + unsigned int i; + + /* Remove regions which are not in the three big regions now */ + damon_for_each_region_safe(r, next, t) { + for (i = 0; i < 3; i++) { + if (damon_intersect(r, &bregions[i])) + break; + } + if (i == 3) + damon_destroy_region(r, t); + } + + /* Adjust intersecting regions to fit with the three big regions */ + for (i = 0; i < 3; i++) { + struct damon_region *first = NULL, *last; + struct damon_region *newr; + struct damon_addr_range *br; + + br = &bregions[i]; + /* Get the first and last regions which intersects with br */ + damon_for_each_region(r, t) { + if (damon_intersect(r, br)) { + if (!first) + first = r; + last = r; + } + if (r->ar.start >= br->end) + break; + } + if (!first) { + /* no damon_region intersects with this big region */ + newr = damon_new_region( + ALIGN_DOWN(br->start, + DAMON_MIN_REGION), + ALIGN(br->end, DAMON_MIN_REGION)); + if (!newr) + continue; + damon_insert_region(newr, damon_prev_region(r), r, t); + } else { + first->ar.start = ALIGN_DOWN(br->start, + DAMON_MIN_REGION); + last->ar.end = ALIGN(br->end, DAMON_MIN_REGION); + } + } +} + +/* + * Update regions for current memory mappings + */ +static void damon_va_update(struct damon_ctx *ctx) +{ + struct damon_addr_range three_regions[3]; + struct damon_target *t; + + damon_for_each_target(t, ctx) { + if (damon_va_three_regions(t, three_regions)) + continue; + damon_va_apply_three_regions(t, three_regions); + } +} + +static int damon_mkold_pmd_entry(pmd_t *pmd, unsigned long addr, + unsigned long next, struct mm_walk *walk) +{ + pte_t *pte; + spinlock_t *ptl; + + if (pmd_huge(*pmd)) { + ptl = pmd_lock(walk->mm, pmd); + if (pmd_huge(*pmd)) { + damon_pmdp_mkold(pmd, walk->mm, addr); + spin_unlock(ptl); + return 0; + } + spin_unlock(ptl); + } + + if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) + return 0; + pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); + if (!pte_present(*pte)) + goto out; + damon_ptep_mkold(pte, walk->mm, addr); +out: + pte_unmap_unlock(pte, ptl); + return 0; +} + +#ifdef CONFIG_HUGETLB_PAGE +static void damon_hugetlb_mkold(pte_t *pte, struct mm_struct *mm, + struct vm_area_struct *vma, unsigned long addr) +{ + bool referenced = false; + pte_t entry = huge_ptep_get(pte); + struct page *page = pte_page(entry); + + if (!page) + return; + + get_page(page); + + if (pte_young(entry)) { + referenced = true; + entry = pte_mkold(entry); + huge_ptep_set_access_flags(vma, addr, pte, entry, + vma->vm_flags & VM_WRITE); + } + +#ifdef CONFIG_MMU_NOTIFIER + if (mmu_notifier_clear_young(mm, addr, + addr + huge_page_size(hstate_vma(vma)))) + referenced = true; +#endif /* CONFIG_MMU_NOTIFIER */ + + if (referenced) + set_page_young(page); + + set_page_idle(page); + put_page(page); +} + +static int damon_mkold_hugetlb_entry(pte_t *pte, unsigned long hmask, + unsigned long addr, unsigned long end, + struct mm_walk *walk) +{ + struct hstate *h = hstate_vma(walk->vma); + spinlock_t *ptl; + pte_t entry; + + ptl = huge_pte_lock(h, walk->mm, pte); + entry = huge_ptep_get(pte); + if (!pte_present(entry)) + goto out; + + damon_hugetlb_mkold(pte, walk->mm, walk->vma, addr); + +out: + spin_unlock(ptl); + return 0; +} +#else +#define damon_mkold_hugetlb_entry NULL +#endif /* CONFIG_HUGETLB_PAGE */ + +static const struct mm_walk_ops damon_mkold_ops = { + .pmd_entry = damon_mkold_pmd_entry, + .hugetlb_entry = damon_mkold_hugetlb_entry, +}; + +static void damon_va_mkold(struct mm_struct *mm, unsigned long addr) +{ + mmap_read_lock(mm); + walk_page_range(mm, addr, addr + 1, &damon_mkold_ops, NULL); + mmap_read_unlock(mm); +} + +/* + * Functions for the access checking of the regions + */ + +static void __damon_va_prepare_access_check(struct damon_ctx *ctx, + struct mm_struct *mm, struct damon_region *r) +{ + r->sampling_addr = damon_rand(r->ar.start, r->ar.end); + + damon_va_mkold(mm, r->sampling_addr); +} + +static void damon_va_prepare_access_checks(struct damon_ctx *ctx) +{ + struct damon_target *t; + struct mm_struct *mm; + struct damon_region *r; + + damon_for_each_target(t, ctx) { + mm = damon_get_mm(t); + if (!mm) + continue; + damon_for_each_region(r, t) + __damon_va_prepare_access_check(ctx, mm, r); + mmput(mm); + } +} + +struct damon_young_walk_private { + unsigned long *page_sz; + bool young; +}; + +static int damon_young_pmd_entry(pmd_t *pmd, unsigned long addr, + unsigned long next, struct mm_walk *walk) +{ + pte_t *pte; + spinlock_t *ptl; + struct page *page; + struct damon_young_walk_private *priv = walk->private; + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + if (pmd_huge(*pmd)) { + ptl = pmd_lock(walk->mm, pmd); + if (!pmd_huge(*pmd)) { + spin_unlock(ptl); + goto regular_page; + } + page = damon_get_page(pmd_pfn(*pmd)); + if (!page) + goto huge_out; + if (pmd_young(*pmd) || !page_is_idle(page) || + mmu_notifier_test_young(walk->mm, + addr)) { + *priv->page_sz = ((1UL) << HPAGE_PMD_SHIFT); + priv->young = true; + } + put_page(page); +huge_out: + spin_unlock(ptl); + return 0; + } + +regular_page: +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + + if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) + return -EINVAL; + pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); + if (!pte_present(*pte)) + goto out; + page = damon_get_page(pte_pfn(*pte)); + if (!page) + goto out; + if (pte_young(*pte) || !page_is_idle(page) || + mmu_notifier_test_young(walk->mm, addr)) { + *priv->page_sz = PAGE_SIZE; + priv->young = true; + } + put_page(page); +out: + pte_unmap_unlock(pte, ptl); + return 0; +} + +#ifdef CONFIG_HUGETLB_PAGE +static int damon_young_hugetlb_entry(pte_t *pte, unsigned long hmask, + unsigned long addr, unsigned long end, + struct mm_walk *walk) +{ + struct damon_young_walk_private *priv = walk->private; + struct hstate *h = hstate_vma(walk->vma); + struct page *page; + spinlock_t *ptl; + pte_t entry; + + ptl = huge_pte_lock(h, walk->mm, pte); + entry = huge_ptep_get(pte); + if (!pte_present(entry)) + goto out; + + page = pte_page(entry); + if (!page) + goto out; + + get_page(page); + + if (pte_young(entry) || !page_is_idle(page) || + mmu_notifier_test_young(walk->mm, addr)) { + *priv->page_sz = huge_page_size(h); + priv->young = true; + } + + put_page(page); + +out: + spin_unlock(ptl); + return 0; +} +#else +#define damon_young_hugetlb_entry NULL +#endif /* CONFIG_HUGETLB_PAGE */ + +static const struct mm_walk_ops damon_young_ops = { + .pmd_entry = damon_young_pmd_entry, + .hugetlb_entry = damon_young_hugetlb_entry, +}; + +static bool damon_va_young(struct mm_struct *mm, unsigned long addr, + unsigned long *page_sz) +{ + struct damon_young_walk_private arg = { + .page_sz = page_sz, + .young = false, + }; + + mmap_read_lock(mm); + walk_page_range(mm, addr, addr + 1, &damon_young_ops, &arg); + mmap_read_unlock(mm); + return arg.young; +} + +/* + * Check whether the region was accessed after the last preparation + * + * mm 'mm_struct' for the given virtual address space + * r the region to be checked + */ +static void __damon_va_check_access(struct damon_ctx *ctx, + struct mm_struct *mm, struct damon_region *r) +{ + static struct mm_struct *last_mm; + static unsigned long last_addr; + static unsigned long last_page_sz = PAGE_SIZE; + static bool last_accessed; + + /* If the region is in the last checked page, reuse the result */ + if (mm == last_mm && (ALIGN_DOWN(last_addr, last_page_sz) == + ALIGN_DOWN(r->sampling_addr, last_page_sz))) { + if (last_accessed) + r->nr_accesses++; + return; + } + + last_accessed = damon_va_young(mm, r->sampling_addr, &last_page_sz); + if (last_accessed) + r->nr_accesses++; + + last_mm = mm; + last_addr = r->sampling_addr; +} + +static unsigned int damon_va_check_accesses(struct damon_ctx *ctx) +{ + struct damon_target *t; + struct mm_struct *mm; + struct damon_region *r; + unsigned int max_nr_accesses = 0; + + damon_for_each_target(t, ctx) { + mm = damon_get_mm(t); + if (!mm) + continue; + damon_for_each_region(r, t) { + __damon_va_check_access(ctx, mm, r); + max_nr_accesses = max(r->nr_accesses, max_nr_accesses); + } + mmput(mm); + } + + return max_nr_accesses; +} + +/* + * Functions for the target validity check and cleanup + */ + +bool damon_va_target_valid(void *target) +{ + struct damon_target *t = target; + struct task_struct *task; + + task = damon_get_task_struct(t); + if (task) { + put_task_struct(task); + return true; + } + + return false; +} + +#ifndef CONFIG_ADVISE_SYSCALLS +static unsigned long damos_madvise(struct damon_target *target, + struct damon_region *r, int behavior) +{ + return 0; +} +#else +static unsigned long damos_madvise(struct damon_target *target, + struct damon_region *r, int behavior) +{ + struct mm_struct *mm; + unsigned long start = PAGE_ALIGN(r->ar.start); + unsigned long len = PAGE_ALIGN(r->ar.end - r->ar.start); + unsigned long applied; + + mm = damon_get_mm(target); + if (!mm) + return 0; + + applied = do_madvise(mm, start, len, behavior) ? 0 : len; + mmput(mm); + + return applied; +} +#endif /* CONFIG_ADVISE_SYSCALLS */ + +static unsigned long damon_va_apply_scheme(struct damon_ctx *ctx, + struct damon_target *t, struct damon_region *r, + struct damos *scheme) +{ + int madv_action; + + switch (scheme->action) { + case DAMOS_WILLNEED: + madv_action = MADV_WILLNEED; + break; + case DAMOS_COLD: + madv_action = MADV_COLD; + break; + case DAMOS_PAGEOUT: + madv_action = MADV_PAGEOUT; + break; + case DAMOS_HUGEPAGE: + madv_action = MADV_HUGEPAGE; + break; + case DAMOS_NOHUGEPAGE: + madv_action = MADV_NOHUGEPAGE; + break; + case DAMOS_STAT: + return 0; + default: + return 0; + } + + return damos_madvise(t, r, madv_action); +} + +static int damon_va_scheme_score(struct damon_ctx *context, + struct damon_target *t, struct damon_region *r, + struct damos *scheme) +{ + + switch (scheme->action) { + case DAMOS_PAGEOUT: + return damon_pageout_score(context, r, scheme); + default: + break; + } + + return DAMOS_MAX_SCORE; +} + +void damon_va_set_primitives(struct damon_ctx *ctx) +{ + ctx->primitive.init = damon_va_init; + ctx->primitive.update = damon_va_update; + ctx->primitive.prepare_access_checks = damon_va_prepare_access_checks; + ctx->primitive.check_accesses = damon_va_check_accesses; + ctx->primitive.reset_aggregated = NULL; + ctx->primitive.target_valid = damon_va_target_valid; + ctx->primitive.cleanup = NULL; + ctx->primitive.apply_scheme = damon_va_apply_scheme; + ctx->primitive.get_scheme_score = damon_va_scheme_score; +} + +#include "vaddr-test.h" diff --git a/mm/debug.c b/mm/debug.c index aa44dea5276f54fec9e7204b6c169468624b5971..061e2eb08981e47fcfc96ca884e253f10453ef41 100644 --- a/mm/debug.c +++ b/mm/debug.c @@ -192,7 +192,6 @@ void dump_page(struct page *page, const char *reason) { __dump_page(page, reason); dump_page_owner(page); - dump_page_pinner(page); } EXPORT_SYMBOL(dump_page); diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c index 12ebc97e8b4354c72edf35f96239e3b53919dc76..d6fbf28ebf72c7fcd30c720d1f45ee2968bc847d 100644 --- a/mm/debug_vm_pgtable.c +++ b/mm/debug_vm_pgtable.c @@ -128,6 +128,8 @@ static void __init pte_advanced_tests(struct mm_struct *mm, ptep_test_and_clear_young(vma, vaddr, ptep); pte = ptep_get(ptep); WARN_ON(pte_young(pte)); + + ptep_get_and_clear_full(mm, vaddr, ptep, 1); } static void __init pte_savedwrite_tests(unsigned long pfn, pgprot_t prot) diff --git a/mm/gup.c b/mm/gup.c index b56d9748a10285fa770f016be8b69d5f627120ab..e942c53c8ceafd865e9727aac10eec75e68a929a 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -25,6 +25,9 @@ #include "internal.h" +#undef CREATE_TRACE_POINTS +#include + struct follow_page_context { struct dev_pagemap *pgmap; unsigned int page_mask; @@ -116,11 +119,14 @@ static __maybe_unused struct page *try_grab_compound_head(struct page *page, int refs, unsigned int flags) { + bool vendor_ret = false; + + trace_android_vh_try_grab_compound_head(page, refs, flags, &vendor_ret); + if (vendor_ret) + return NULL; + if (flags & FOLL_GET) { - struct page *head = try_get_compound_head(page, refs); - if (head) - set_page_pinner(head, compound_order(head)); - return head; + return try_get_compound_head(page, refs); } else if (flags & FOLL_PIN) { int orig_refs = refs; @@ -175,8 +181,6 @@ static void put_compound_head(struct page *page, int refs, unsigned int flags) refs *= GUP_PIN_COUNTING_BIAS; } - if (flags & FOLL_GET) - reset_page_pinner(page, compound_order(page)); put_page_refs(page, refs); } @@ -206,13 +210,7 @@ bool __must_check try_grab_page(struct page *page, unsigned int flags) WARN_ON_ONCE((flags & (FOLL_GET | FOLL_PIN)) == (FOLL_GET | FOLL_PIN)); if (flags & FOLL_GET) { - bool ret = try_get_page(page); - - if (ret) { - page = compound_head(page); - set_page_pinner(page, compound_order(page)); - } - return ret; + return try_get_page(page); } else if (flags & FOLL_PIN) { int refs = 1; @@ -255,24 +253,6 @@ void unpin_user_page(struct page *page) } EXPORT_SYMBOL(unpin_user_page); -/* - * put_user_page() - release a page obtained using get_user_pages() or - * follow_page(FOLL_GET) - * @page: pointer to page to be released - * - * Pages that were obtained via get_user_pages()/follow_page(FOLL_GET) must be - * released via put_user_page. - * note: If it's not a page from GUP or follow_page(FOLL_GET), it's harmless. - */ -void put_user_page(struct page *page) -{ - struct page *head = compound_head(page); - - reset_page_pinner(head, compound_order(head)); - put_page(page); -} -EXPORT_SYMBOL(put_user_page); - /** * unpin_user_pages_dirty_lock() - release and optionally dirty gup-pinned pages * @pages: array of pages to be maybe marked dirty, and definitely released. @@ -948,6 +928,9 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags) if (gup_flags & FOLL_ANON && !vma_is_anonymous(vma)) return -EFAULT; + if ((gup_flags & FOLL_LONGTERM) && vma_is_fsdax(vma)) + return -EOPNOTSUPP; + if (write) { if (!(vm_flags & VM_WRITE)) { if (!(gup_flags & FOLL_FORCE)) @@ -1085,10 +1068,14 @@ static long __get_user_pages(struct mm_struct *mm, goto next_page; } - if (!vma || check_vma_flags(vma, gup_flags)) { + if (!vma) { ret = -EFAULT; goto out; } + ret = check_vma_flags(vma, gup_flags); + if (ret) + goto out; + if (is_vm_hugetlb_page(vma)) { i = follow_hugetlb_page(mm, vma, pages, vmas, &start, &nr_pages, i, @@ -1592,26 +1579,6 @@ struct page *get_dump_page(unsigned long addr) } #endif /* CONFIG_ELF_CORE */ -#if defined(CONFIG_FS_DAX) || defined (CONFIG_CMA) -static bool check_dax_vmas(struct vm_area_struct **vmas, long nr_pages) -{ - long i; - struct vm_area_struct *vma_prev = NULL; - - for (i = 0; i < nr_pages; i++) { - struct vm_area_struct *vma = vmas[i]; - - if (vma == vma_prev) - continue; - - vma_prev = vma; - - if (vma_is_fsdax(vma)) - return true; - } - return false; -} - #ifdef CONFIG_CMA static long check_and_migrate_cma_pages(struct mm_struct *mm, unsigned long start, @@ -1730,63 +1697,23 @@ static long __gup_longterm_locked(struct mm_struct *mm, struct vm_area_struct **vmas, unsigned int gup_flags) { - struct vm_area_struct **vmas_tmp = vmas; unsigned long flags = 0; - long rc, i; - - if (gup_flags & FOLL_LONGTERM) { - if (!pages) - return -EINVAL; + long rc; - if (!vmas_tmp) { - vmas_tmp = kcalloc(nr_pages, - sizeof(struct vm_area_struct *), - GFP_KERNEL); - if (!vmas_tmp) - return -ENOMEM; - } + if (gup_flags & FOLL_LONGTERM) flags = memalloc_nocma_save(); - } - rc = __get_user_pages_locked(mm, start, nr_pages, pages, - vmas_tmp, NULL, gup_flags); + rc = __get_user_pages_locked(mm, start, nr_pages, pages, vmas, NULL, + gup_flags); if (gup_flags & FOLL_LONGTERM) { - if (rc < 0) - goto out; - - if (check_dax_vmas(vmas_tmp, rc)) { - if (gup_flags & FOLL_PIN) - unpin_user_pages(pages, rc); - else - for (i = 0; i < rc; i++) - put_page(pages[i]); - rc = -EOPNOTSUPP; - goto out; - } - - rc = check_and_migrate_cma_pages(mm, start, rc, pages, - vmas_tmp, gup_flags); -out: + if (rc > 0) + rc = check_and_migrate_cma_pages(mm, start, rc, pages, + vmas, gup_flags); memalloc_nocma_restore(flags); } - - if (vmas_tmp != vmas) - kfree(vmas_tmp); return rc; } -#else /* !CONFIG_FS_DAX && !CONFIG_CMA */ -static __always_inline long __gup_longterm_locked(struct mm_struct *mm, - unsigned long start, - unsigned long nr_pages, - struct page **pages, - struct vm_area_struct **vmas, - unsigned int flags) -{ - return __get_user_pages_locked(mm, start, nr_pages, pages, vmas, - NULL, flags); -} -#endif /* CONFIG_FS_DAX || CONFIG_CMA */ static bool is_valid_gup_flags(unsigned int gup_flags) { @@ -1813,6 +1740,10 @@ static long __get_user_pages_remote(struct mm_struct *mm, unsigned int gup_flags, struct page **pages, struct vm_area_struct **vmas, int *locked) { + unsigned int orig_gup_flags = gup_flags; + + trace_android_vh___get_user_pages_remote(locked, &gup_flags, pages); + /* * Parts of FOLL_LONGTERM behavior are incompatible with * FAULT_FLAG_ALLOW_RETRY because of the FS DAX check requirement on @@ -1820,16 +1751,23 @@ static long __get_user_pages_remote(struct mm_struct *mm, * callers that do request FOLL_LONGTERM, but do not set locked. So, * allow what we can. */ +retry: if (gup_flags & FOLL_LONGTERM) { + long ret; + if (WARN_ON_ONCE(locked)) return -EINVAL; /* * This will check the vmas (even if our vmas arg is NULL) * and return -ENOTSUPP if DAX isn't allowed in this case: */ - return __gup_longterm_locked(mm, start, nr_pages, pages, + ret = __gup_longterm_locked(mm, start, nr_pages, pages, vmas, gup_flags | FOLL_TOUCH | FOLL_REMOTE); + if (ret < 0 && orig_gup_flags != gup_flags) { + gup_flags = orig_gup_flags; + goto retry; + } } return __get_user_pages_locked(mm, start, nr_pages, pages, vmas, @@ -1948,11 +1886,23 @@ long get_user_pages(unsigned long start, unsigned long nr_pages, unsigned int gup_flags, struct page **pages, struct vm_area_struct **vmas) { + long ret; + unsigned int orig_gup_flags; + if (!is_valid_gup_flags(gup_flags)) return -EINVAL; - return __gup_longterm_locked(current->mm, start, nr_pages, + orig_gup_flags = gup_flags; + trace_android_vh_get_user_pages(&gup_flags, pages); +retry: + ret = __gup_longterm_locked(current->mm, start, nr_pages, pages, vmas, gup_flags | FOLL_TOUCH); + if (ret < 0 && orig_gup_flags != gup_flags) { + gup_flags = orig_gup_flags; + goto retry; + } + + return ret; } EXPORT_SYMBOL(get_user_pages); @@ -2760,6 +2710,7 @@ static int internal_get_user_pages_fast(unsigned long start, /* Slow path: try to get the remaining pages with get_user_pages */ start += nr_pinned << PAGE_SHIFT; pages += nr_pinned; + trace_android_vh_internal_get_user_pages_fast(&gup_flags, pages); ret = __gup_longterm_unlocked(start, nr_pages - nr_pinned, gup_flags, pages); if (ret < 0) { @@ -2985,6 +2936,7 @@ long pin_user_pages(unsigned long start, unsigned long nr_pages, return -EINVAL; gup_flags |= FOLL_PIN; + trace_android_vh_pin_user_pages(&gup_flags, pages); return __gup_longterm_locked(current->mm, start, nr_pages, pages, vmas, gup_flags); } diff --git a/mm/hmm.c b/mm/hmm.c index fb617054f96316e79ad22007379aab8582f7d72b..cbe9d0c66650452ba3e4a57b6af6c7f8f0940f56 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -296,7 +296,8 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr, * Since each architecture defines a struct page for the zero page, just * fall through and treat it like a normal page. */ - if (pte_special(pte) && !pte_devmap(pte) && + if (!vm_normal_page(walk->vma, addr, pte) && + !pte_devmap(pte) && !is_zero_pfn(pte_pfn(pte))) { if (hmm_pte_need_fault(hmm_vma_walk, pfn_req_flags, 0)) { pte_unmap(ptep); @@ -514,7 +515,7 @@ static int hmm_vma_walk_test(unsigned long start, unsigned long end, struct hmm_range *range = hmm_vma_walk->range; struct vm_area_struct *vma = walk->vma; - if (!(vma->vm_flags & (VM_IO | VM_PFNMAP | VM_MIXEDMAP)) && + if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)) && vma->vm_flags & VM_READ) return 0; diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 1eea5787fd63940cbac9fe9f67d636c366e6bba2..123f3e03287124be46c3a3d6cfc088e3f026d61b 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2402,7 +2402,8 @@ static void __split_huge_page_tail(struct page *head, int tail, #ifdef CONFIG_64BIT (1L << PG_arch_2) | #endif - (1L << PG_dirty))); + (1L << PG_dirty) | + LRU_GEN_MASK | LRU_REFS_MASK)); /* ->mapping in first tail page is compound_mapcount */ VM_BUG_ON_PAGE(tail > 2 && page_tail->mapping != TAIL_MAPPING, diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 7f0aac8ac80a1dff3cab5535626941d63e191514..fb145839f299e2c5b89af9f6b5b11d20fc198285 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3914,6 +3914,7 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, struct hstate *h = hstate_vma(vma); unsigned long sz = huge_page_size(h); struct mmu_notifier_range range; + bool force_flush = false; WARN_ON(!is_vm_hugetlb_page(vma)); BUG_ON(start & ~huge_page_mask(h)); @@ -3942,10 +3943,8 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, ptl = huge_pte_lock(h, mm, ptep); if (huge_pmd_unshare(mm, vma, &address, ptep)) { spin_unlock(ptl); - /* - * We just unmapped a page of PMDs by clearing a PUD. - * The caller's TLB flush range should cover this area. - */ + tlb_flush_pmd_range(tlb, address & PUD_MASK, PUD_SIZE); + force_flush = true; continue; } @@ -4002,6 +4001,22 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, } mmu_notifier_invalidate_range_end(&range); tlb_end_vma(tlb, vma); + + /* + * If we unshared PMDs, the TLB flush was not recorded in mmu_gather. We + * could defer the flush until now, since by holding i_mmap_rwsem we + * guaranteed that the last refernece would not be dropped. But we must + * do the flushing before we return, as otherwise i_mmap_rwsem will be + * dropped and the last reference to the shared PMDs page might be + * dropped as well. + * + * In theory we could defer the freeing of the PMD pages as well, but + * huge_pmd_unshare() relies on the exact page_count for the PMD page to + * detect sharing, so we cannot defer the release of the page either. + * Instead, do flush now. + */ + if (force_flush) + tlb_flush_mmu_tlbonly(tlb); } void __unmap_hugepage_range_final(struct mmu_gather *tlb, diff --git a/mm/ioremap.c b/mm/ioremap.c index 5fa1ab41d152605768bddd5edc5dfd63df839d15..17b1d49f79c54ca59a5735a20209464a0a9c3058 100644 --- a/mm/ioremap.c +++ b/mm/ioremap.c @@ -225,6 +225,7 @@ int ioremap_page_range(unsigned long addr, pgd_t *pgd; unsigned long start; unsigned long next; + phys_addr_t phys_start = phys_addr; int err; pgtbl_mod_mask mask = 0; @@ -246,6 +247,9 @@ int ioremap_page_range(unsigned long addr, if (mask & ARCH_PAGE_TABLE_SYNC_MASK) arch_sync_kernel_mappings(start, end); + if (IS_ENABLED(CONFIG_ARCH_HAS_IOREMAP_PHYS_HOOKS) && !err) + ioremap_phys_range_hook(phys_start, end - start, prot); + return err; } diff --git a/mm/kasan/Makefile b/mm/kasan/Makefile index 9fe39a66388a85e022750c702142d778d71284c3..adcd9acaef61507e6c9fc8449a0ff8b6831b7052 100644 --- a/mm/kasan/Makefile +++ b/mm/kasan/Makefile @@ -37,5 +37,5 @@ CFLAGS_sw_tags.o := $(CC_FLAGS_KASAN_RUNTIME) obj-$(CONFIG_KASAN) := common.o report.o obj-$(CONFIG_KASAN_GENERIC) += init.o generic.o report_generic.o shadow.o quarantine.o -obj-$(CONFIG_KASAN_HW_TAGS) += hw_tags.o report_hw_tags.o -obj-$(CONFIG_KASAN_SW_TAGS) += init.o report_sw_tags.o shadow.o sw_tags.o +obj-$(CONFIG_KASAN_HW_TAGS) += hw_tags.o report_hw_tags.o tags.o report_tags.o +obj-$(CONFIG_KASAN_SW_TAGS) += init.o report_sw_tags.o shadow.o sw_tags.o tags.o report_tags.o diff --git a/mm/kasan/common.c b/mm/kasan/common.c index 0ecd293af344317b0f188245be6160f49340ebd2..efb5726a14cd65e6130b765db0261eb6de49eb73 100644 --- a/mm/kasan/common.c +++ b/mm/kasan/common.c @@ -30,20 +30,19 @@ #include "kasan.h" #include "../slab.h" -depot_stack_handle_t kasan_save_stack(gfp_t flags) +depot_stack_handle_t kasan_save_stack(gfp_t flags, bool can_alloc) { unsigned long entries[KASAN_STACK_DEPTH]; unsigned int nr_entries; nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 0); - nr_entries = filter_irq_stacks(entries, nr_entries); - return stack_depot_save(entries, nr_entries, flags); + return __stack_depot_save(entries, nr_entries, flags, can_alloc); } void kasan_set_track(struct kasan_track *track, gfp_t flags) { track->pid = current->pid; - track->stack = kasan_save_stack(flags); + track->stack = kasan_save_stack(flags, true); } #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) @@ -295,7 +294,7 @@ static inline u8 assign_tag(struct kmem_cache *cache, /* For caches that either have a constructor or SLAB_TYPESAFE_BY_RCU: */ #ifdef CONFIG_SLAB /* For SLAB assign tags based on the object index in the freelist. */ - return (u8)obj_to_index(cache, virt_to_page(object), (void *)object); + return (u8)obj_to_index(cache, virt_to_head_page(object), (void *)object); #else /* * For SLUB assign a random tag during slab creation, otherwise reuse @@ -328,6 +327,9 @@ static inline bool ____kasan_slab_free(struct kmem_cache *cache, void *object, u8 tag; void *tagged_object; + if (!kasan_arch_is_ready()) + return false; + tag = get_tag(object); tagged_object = object; object = kasan_reset_tag(object); @@ -381,7 +383,7 @@ static inline bool ____kasan_kfree_large(void *ptr, unsigned long ip) } /* - * The object will be poisoned by kasan_free_pages() or + * The object will be poisoned by kasan_poison_pages() or * kasan_slab_free_mempool(). */ @@ -530,7 +532,7 @@ void * __must_check __kasan_kmalloc_large(const void *ptr, size_t size, return NULL; /* - * The object has already been unpoisoned by kasan_alloc_pages() for + * The object has already been unpoisoned by kasan_unpoison_pages() for * alloc_pages() or by kasan_krealloc() for krealloc(). */ diff --git a/mm/kasan/generic.c b/mm/kasan/generic.c index 53cbf28859b5a82e251c286201cf713ab8ddc620..84a038b07c6fe06c43713969cb041e330bee9951 100644 --- a/mm/kasan/generic.c +++ b/mm/kasan/generic.c @@ -163,6 +163,9 @@ static __always_inline bool check_region_inline(unsigned long addr, size_t size, bool write, unsigned long ret_ip) { + if (!kasan_arch_is_ready()) + return true; + if (unlikely(size == 0)) return true; @@ -325,7 +328,7 @@ DEFINE_ASAN_SET_SHADOW(f3); DEFINE_ASAN_SET_SHADOW(f5); DEFINE_ASAN_SET_SHADOW(f8); -void kasan_record_aux_stack(void *addr) +static void __kasan_record_aux_stack(void *addr, bool can_alloc) { struct page *page = kasan_addr_to_page(addr); struct kmem_cache *cache; @@ -342,7 +345,17 @@ void kasan_record_aux_stack(void *addr) return; alloc_meta->aux_stack[1] = alloc_meta->aux_stack[0]; - alloc_meta->aux_stack[0] = kasan_save_stack(GFP_NOWAIT); + alloc_meta->aux_stack[0] = kasan_save_stack(GFP_NOWAIT, can_alloc); +} + +void kasan_record_aux_stack(void *addr) +{ + return __kasan_record_aux_stack(addr, true); +} + +void kasan_record_aux_stack_noalloc(void *addr) +{ + return __kasan_record_aux_stack(addr, false); } void kasan_set_free_info(struct kmem_cache *cache, diff --git a/mm/kasan/hw_tags.c b/mm/kasan/hw_tags.c index ed5e5b833d61017551193a3fcb603a746cfd933a..9e1b6544bfa8e6263fc6a9db58d68b345692e591 100644 --- a/mm/kasan/hw_tags.c +++ b/mm/kasan/hw_tags.c @@ -29,6 +29,13 @@ enum kasan_arg_mode { KASAN_ARG_MODE_DEFAULT, KASAN_ARG_MODE_SYNC, KASAN_ARG_MODE_ASYNC, + KASAN_ARG_MODE_ASYMM, +}; + +enum kasan_arg_vmalloc { + KASAN_ARG_VMALLOC_DEFAULT, + KASAN_ARG_VMALLOC_OFF, + KASAN_ARG_VMALLOC_ON, }; enum kasan_arg_stacktrace { @@ -37,30 +44,30 @@ enum kasan_arg_stacktrace { KASAN_ARG_STACKTRACE_ON, }; -enum kasan_arg_fault { - KASAN_ARG_FAULT_DEFAULT, - KASAN_ARG_FAULT_REPORT, - KASAN_ARG_FAULT_PANIC, -}; - static enum kasan_arg kasan_arg __ro_after_init; static enum kasan_arg_mode kasan_arg_mode __ro_after_init; -static enum kasan_arg_stacktrace kasan_arg_stacktrace __ro_after_init; -static enum kasan_arg_fault kasan_arg_fault __ro_after_init; +static enum kasan_arg_vmalloc kasan_arg_vmalloc __initdata; +static enum kasan_arg_stacktrace kasan_arg_stacktrace __initdata; -/* Whether KASAN is enabled at all. */ +/* + * Whether KASAN is enabled at all. + * The value remains false until KASAN is initialized by kasan_init_hw_tags(). + */ DEFINE_STATIC_KEY_FALSE(kasan_flag_enabled); EXPORT_SYMBOL(kasan_flag_enabled); -/* Whether the asynchronous mode is enabled. */ -bool kasan_flag_async __ro_after_init; -EXPORT_SYMBOL_GPL(kasan_flag_async); +/* + * Whether the selected mode is synchronous, asynchronous, or asymmetric. + * Defaults to KASAN_MODE_SYNC. + */ +enum kasan_mode kasan_mode __ro_after_init; +EXPORT_SYMBOL_GPL(kasan_mode); -/* Whether to collect alloc/free stack traces. */ -DEFINE_STATIC_KEY_FALSE(kasan_flag_stacktrace); +/* Whether to enable vmalloc tagging. */ +DEFINE_STATIC_KEY_TRUE(kasan_flag_vmalloc); -/* Whether to panic or print a report and disable tag checking on fault. */ -bool kasan_flag_panic __ro_after_init; +/* Whether to collect alloc/free stack traces. */ +DEFINE_STATIC_KEY_TRUE(kasan_flag_stacktrace); /* kasan=off/on */ static int __init early_kasan_flag(char *arg) @@ -79,7 +86,7 @@ static int __init early_kasan_flag(char *arg) } early_param("kasan", early_kasan_flag); -/* kasan.mode=sync/async */ +/* kasan.mode=sync/async/asymm */ static int __init early_kasan_mode(char *arg) { if (!arg) @@ -89,6 +96,8 @@ static int __init early_kasan_mode(char *arg) kasan_arg_mode = KASAN_ARG_MODE_SYNC; else if (!strcmp(arg, "async")) kasan_arg_mode = KASAN_ARG_MODE_ASYNC; + else if (!strcmp(arg, "asymm")) + kasan_arg_mode = KASAN_ARG_MODE_ASYMM; else return -EINVAL; @@ -96,41 +105,54 @@ static int __init early_kasan_mode(char *arg) } early_param("kasan.mode", early_kasan_mode); -/* kasan.stacktrace=off/on */ -static int __init early_kasan_flag_stacktrace(char *arg) +/* kasan.vmalloc=off/on */ +static int __init early_kasan_flag_vmalloc(char *arg) { if (!arg) return -EINVAL; if (!strcmp(arg, "off")) - kasan_arg_stacktrace = KASAN_ARG_STACKTRACE_OFF; + kasan_arg_vmalloc = KASAN_ARG_VMALLOC_OFF; else if (!strcmp(arg, "on")) - kasan_arg_stacktrace = KASAN_ARG_STACKTRACE_ON; + kasan_arg_vmalloc = KASAN_ARG_VMALLOC_ON; else return -EINVAL; return 0; } -early_param("kasan.stacktrace", early_kasan_flag_stacktrace); +early_param("kasan.vmalloc", early_kasan_flag_vmalloc); -/* kasan.fault=report/panic */ -static int __init early_kasan_fault(char *arg) +/* kasan.stacktrace=off/on */ +static int __init early_kasan_flag_stacktrace(char *arg) { if (!arg) return -EINVAL; - if (!strcmp(arg, "report")) - kasan_arg_fault = KASAN_ARG_FAULT_REPORT; - else if (!strcmp(arg, "panic")) - kasan_arg_fault = KASAN_ARG_FAULT_PANIC; + if (!strcmp(arg, "off")) + kasan_arg_stacktrace = KASAN_ARG_STACKTRACE_OFF; + else if (!strcmp(arg, "on")) + kasan_arg_stacktrace = KASAN_ARG_STACKTRACE_ON; else return -EINVAL; return 0; } -early_param("kasan.fault", early_kasan_fault); +early_param("kasan.stacktrace", early_kasan_flag_stacktrace); + +static inline const char *kasan_mode_info(void) +{ + if (kasan_mode == KASAN_MODE_ASYNC) + return "async"; + else if (kasan_mode == KASAN_MODE_ASYMM) + return "asymm"; + else + return "sync"; +} -/* kasan_init_hw_tags_cpu() is called for each CPU. */ +/* + * kasan_init_hw_tags_cpu() is called for each CPU. + * Not marked as __init as a CPU can be hot-plugged after boot. + */ void kasan_init_hw_tags_cpu(void) { /* @@ -138,20 +160,19 @@ void kasan_init_hw_tags_cpu(void) * as this function is only called for MTE-capable hardware. */ - /* If KASAN is disabled via command line, don't initialize it. */ + /* + * If KASAN is disabled via command line, don't initialize it. + * When this function is called, kasan_flag_enabled is not yet + * set by kasan_init_hw_tags(). Thus, check kasan_arg instead. + */ if (kasan_arg == KASAN_ARG_OFF) return; - hw_init_tags(KASAN_TAG_MAX); - /* - * Enable async mode only when explicitly requested through - * the command line. + * Enable async or asymm modes only when explicitly requested + * through the command line. */ - if (kasan_arg_mode == KASAN_ARG_MODE_ASYNC) - hw_enable_tagging_async(); - else - hw_enable_tagging_sync(); + kasan_enable_tagging(); } /* kasan_init_hw_tags() is called once on boot CPU. */ @@ -165,124 +186,169 @@ void __init kasan_init_hw_tags(void) if (kasan_arg == KASAN_ARG_OFF) return; - /* Enable KASAN. */ - static_branch_enable(&kasan_flag_enabled); - switch (kasan_arg_mode) { case KASAN_ARG_MODE_DEFAULT: - /* - * Default to sync mode. - * Do nothing, kasan_flag_async keeps its default value. - */ + /* Default is specified by kasan_mode definition. */ break; case KASAN_ARG_MODE_SYNC: - /* Do nothing, kasan_flag_async keeps its default value. */ + kasan_mode = KASAN_MODE_SYNC; break; case KASAN_ARG_MODE_ASYNC: - /* Async mode enabled. */ - kasan_flag_async = true; + kasan_mode = KASAN_MODE_ASYNC; + break; + case KASAN_ARG_MODE_ASYMM: + kasan_mode = KASAN_MODE_ASYMM; + break; + } + + switch (kasan_arg_vmalloc) { + case KASAN_ARG_VMALLOC_DEFAULT: + /* Default is specified by kasan_flag_vmalloc definition. */ + break; + case KASAN_ARG_VMALLOC_OFF: + static_branch_disable(&kasan_flag_vmalloc); + break; + case KASAN_ARG_VMALLOC_ON: + static_branch_enable(&kasan_flag_vmalloc); break; } switch (kasan_arg_stacktrace) { case KASAN_ARG_STACKTRACE_DEFAULT: - /* Default to enabling stack trace collection. */ - static_branch_enable(&kasan_flag_stacktrace); + /* Default is specified by kasan_flag_stacktrace definition. */ break; case KASAN_ARG_STACKTRACE_OFF: - /* Do nothing, kasan_flag_stacktrace keeps its default value. */ + static_branch_disable(&kasan_flag_stacktrace); break; case KASAN_ARG_STACKTRACE_ON: static_branch_enable(&kasan_flag_stacktrace); break; } - switch (kasan_arg_fault) { - case KASAN_ARG_FAULT_DEFAULT: - /* - * Default to no panic on report. - * Do nothing, kasan_flag_panic keeps its default value. - */ - break; - case KASAN_ARG_FAULT_REPORT: - /* Do nothing, kasan_flag_panic keeps its default value. */ - break; - case KASAN_ARG_FAULT_PANIC: - /* Enable panic on report. */ - kasan_flag_panic = true; - break; - } + /* KASAN is now initialized, enable it. */ + static_branch_enable(&kasan_flag_enabled); - pr_info("KernelAddressSanitizer initialized\n"); + pr_info("KernelAddressSanitizer initialized (hw-tags, mode=%s, vmalloc=%s, stacktrace=%s)\n", + kasan_mode_info(), + kasan_vmalloc_enabled() ? "on" : "off", + kasan_stack_collection_enabled() ? "on" : "off"); } -void kasan_set_free_info(struct kmem_cache *cache, - void *object, u8 tag) +#ifdef CONFIG_KASAN_VMALLOC + +static void unpoison_vmalloc_pages(const void *addr, u8 tag) { - struct kasan_alloc_meta *alloc_meta; + struct vm_struct *area; + int i; + + /* + * As hardware tag-based KASAN only tags VM_ALLOC vmalloc allocations + * (see the comment in __kasan_unpoison_vmalloc), all of the pages + * should belong to a single area. + */ + area = find_vm_area((void *)addr); + if (WARN_ON(!area)) + return; - alloc_meta = kasan_get_alloc_meta(cache, object); - if (alloc_meta) - kasan_set_track(&alloc_meta->free_track[0], GFP_NOWAIT); + for (i = 0; i < area->nr_pages; i++) { + struct page *page = area->pages[i]; + + page_kasan_tag_set(page, tag); + } } -struct kasan_track *kasan_get_free_track(struct kmem_cache *cache, - void *object, u8 tag) +void *__kasan_unpoison_vmalloc(const void *start, unsigned long size, + kasan_vmalloc_flags_t flags) { - struct kasan_alloc_meta *alloc_meta; + u8 tag; + unsigned long redzone_start, redzone_size; - alloc_meta = kasan_get_alloc_meta(cache, object); - if (!alloc_meta) - return NULL; + if (!kasan_vmalloc_enabled()) + return (void *)start; - return &alloc_meta->free_track[0]; -} + if (!is_vmalloc_or_module_addr(start)) + return (void *)start; + + /* + * Skip unpoisoning and assigning a pointer tag for non-VM_ALLOC + * mappings as: + * + * 1. Unlike the software KASAN modes, hardware tag-based KASAN only + * supports tagging physical memory. Therefore, it can only tag a + * single mapping of normal physical pages. + * 2. Hardware tag-based KASAN can only tag memory mapped with special + * mapping protection bits, see arch_vmalloc_pgprot_modify(). + * As non-VM_ALLOC mappings can be mapped outside of vmalloc code, + * providing these bits would require tracking all non-VM_ALLOC + * mappers. + * + * Thus, for VM_ALLOC mappings, hardware tag-based KASAN only tags + * the first virtual mapping, which is created by vmalloc(). + * Tagging the page_alloc memory backing that vmalloc() allocation is + * skipped, see ___GFP_SKIP_KASAN_UNPOISON. + * + * For non-VM_ALLOC allocations, page_alloc memory is tagged as usual. + */ + if (!(flags & KASAN_VMALLOC_VM_ALLOC)) + return (void *)start; -void kasan_alloc_pages(struct page *page, unsigned int order, gfp_t flags) -{ /* - * This condition should match the one in post_alloc_hook() in - * page_alloc.c. + * Don't tag executable memory. + * The kernel doesn't tolerate having the PC register tagged. */ - bool init = !want_init_on_free() && want_init_on_alloc(flags); + if (!(flags & KASAN_VMALLOC_PROT_NORMAL)) + return (void *)start; - if (flags & __GFP_SKIP_KASAN_POISON) - SetPageSkipKASanPoison(page); + tag = kasan_random_tag(); + start = set_tag(start, tag); - if (flags & __GFP_ZEROTAGS) { - int i; + /* Unpoison and initialize memory up to size. */ + kasan_unpoison(start, size, flags & KASAN_VMALLOC_INIT); - for (i = 0; i != 1 << order; ++i) - tag_clear_highpage(page + i); - } else { - kasan_unpoison_pages(page, order, init); - } -} + /* + * Explicitly poison and initialize the in-page vmalloc() redzone. + * Unlike software KASAN modes, hardware tag-based KASAN doesn't + * unpoison memory when populating shadow for vmalloc() space. + */ + redzone_start = round_up((unsigned long)start + size, + KASAN_GRANULE_SIZE); + redzone_size = round_up(redzone_start, PAGE_SIZE) - redzone_start; + kasan_poison((void *)redzone_start, redzone_size, KASAN_TAG_INVALID, + flags & KASAN_VMALLOC_INIT); -void kasan_free_pages(struct page *page, unsigned int order) -{ /* - * This condition should match the one in free_pages_prepare() in - * page_alloc.c. + * Set per-page tag flags to allow accessing physical memory for the + * vmalloc() mapping through page_address(vmalloc_to_page()). */ - bool init = want_init_on_free(); + unpoison_vmalloc_pages(start, tag); - kasan_poison_pages(page, order, init); + return (void *)start; } -#if IS_ENABLED(CONFIG_KASAN_KUNIT_TEST) - -void kasan_set_tagging_report_once(bool state) +void __kasan_poison_vmalloc(const void *start, unsigned long size) { - hw_set_tagging_report_once(state); + /* + * No tagging here. + * The physical pages backing the vmalloc() allocation are poisoned + * through the usual page_alloc paths. + */ } -EXPORT_SYMBOL_GPL(kasan_set_tagging_report_once); -void kasan_enable_tagging_sync(void) +#endif + +void kasan_enable_tagging(void) { - hw_enable_tagging_sync(); + if (kasan_arg_mode == KASAN_ARG_MODE_ASYNC) + hw_enable_tagging_async(); + else if (kasan_arg_mode == KASAN_ARG_MODE_ASYMM) + hw_enable_tagging_asymm(); + else + hw_enable_tagging_sync(); } -EXPORT_SYMBOL_GPL(kasan_enable_tagging_sync); + +#if IS_ENABLED(CONFIG_KASAN_KUNIT_TEST) + +EXPORT_SYMBOL_GPL(kasan_enable_tagging); void kasan_force_async_fault(void) { diff --git a/mm/kasan/init.c b/mm/kasan/init.c index c4605ac9837b011460d21c000fdc3c76db764348..348f31d15a971f54035e7ab51b535d4ba9e6fdcd 100644 --- a/mm/kasan/init.c +++ b/mm/kasan/init.c @@ -220,8 +220,8 @@ static int __ref zero_p4d_populate(pgd_t *pgd, unsigned long addr, /** * kasan_populate_early_shadow - populate shadow memory region with * kasan_early_shadow_page - * @shadow_start - start of the memory range to populate - * @shadow_end - end of the memory range to populate + * @shadow_start: start of the memory range to populate + * @shadow_end: end of the memory range to populate */ int __ref kasan_populate_early_shadow(const void *shadow_start, const void *shadow_end) diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index ed9c45a63f9a879f65a6347fef6be791026b23cd..e6417aca1ebf05cb98ff9da9a5576f360318d446 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -3,25 +3,54 @@ #define __MM_KASAN_KASAN_H #include +#include #include #include +#if IS_ENABLED(CONFIG_KUNIT) + +/* Used in KUnit-compatible KASAN tests. */ +struct kunit_kasan_status { + bool report_found; + bool sync_fault; +}; + +#endif + #ifdef CONFIG_KASAN_HW_TAGS #include #include "../slab.h" -DECLARE_STATIC_KEY_FALSE(kasan_flag_stacktrace); -extern bool kasan_flag_async __ro_after_init; +DECLARE_STATIC_KEY_TRUE(kasan_flag_vmalloc); +DECLARE_STATIC_KEY_TRUE(kasan_flag_stacktrace); + +enum kasan_mode { + KASAN_MODE_SYNC, + KASAN_MODE_ASYNC, + KASAN_MODE_ASYMM, +}; + +extern enum kasan_mode kasan_mode __ro_after_init; + +static inline bool kasan_vmalloc_enabled(void) +{ + return static_branch_likely(&kasan_flag_vmalloc); +} static inline bool kasan_stack_collection_enabled(void) { return static_branch_unlikely(&kasan_flag_stacktrace); } -static inline bool kasan_async_mode_enabled(void) +static inline bool kasan_async_fault_possible(void) { - return kasan_flag_async; + return kasan_mode == KASAN_MODE_ASYNC || kasan_mode == KASAN_MODE_ASYMM; +} + +static inline bool kasan_sync_fault_possible(void) +{ + return kasan_mode == KASAN_MODE_SYNC || kasan_mode == KASAN_MODE_ASYMM; } #else @@ -30,15 +59,17 @@ static inline bool kasan_stack_collection_enabled(void) return true; } -static inline bool kasan_async_mode_enabled(void) +static inline bool kasan_async_fault_possible(void) { return false; } -#endif +static inline bool kasan_sync_fault_possible(void) +{ + return true; +} -extern bool kasan_flag_panic __ro_after_init; -extern bool kasan_flag_async __ro_after_init; +#endif #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) #define KASAN_GRANULE_SIZE (1UL << KASAN_SHADOW_SCALE_SHIFT) @@ -51,32 +82,24 @@ extern bool kasan_flag_async __ro_after_init; #define KASAN_MEMORY_PER_SHADOW_PAGE (KASAN_GRANULE_SIZE << PAGE_SHIFT) -#define KASAN_TAG_KERNEL 0xFF /* native kernel pointers tag */ -#define KASAN_TAG_INVALID 0xFE /* inaccessible memory tag */ -#define KASAN_TAG_MAX 0xFD /* maximum value for random tags */ - -#ifdef CONFIG_KASAN_HW_TAGS -#define KASAN_TAG_MIN 0xF0 /* mimimum value for random tags */ -#else -#define KASAN_TAG_MIN 0x00 /* mimimum value for random tags */ -#endif - #ifdef CONFIG_KASAN_GENERIC #define KASAN_FREE_PAGE 0xFF /* page was freed */ #define KASAN_PAGE_REDZONE 0xFE /* redzone for kmalloc_large allocations */ #define KASAN_KMALLOC_REDZONE 0xFC /* redzone inside slub object */ #define KASAN_KMALLOC_FREE 0xFB /* object was freed (kmem_cache_free/kfree) */ -#define KASAN_KMALLOC_FREETRACK 0xFA /* object was freed and has free track set */ +#define KASAN_VMALLOC_INVALID 0xF8 /* unallocated space in vmapped page */ #else #define KASAN_FREE_PAGE KASAN_TAG_INVALID #define KASAN_PAGE_REDZONE KASAN_TAG_INVALID #define KASAN_KMALLOC_REDZONE KASAN_TAG_INVALID #define KASAN_KMALLOC_FREE KASAN_TAG_INVALID -#define KASAN_KMALLOC_FREETRACK KASAN_TAG_INVALID +#define KASAN_VMALLOC_INVALID KASAN_TAG_INVALID /* only for SW_TAGS */ #endif +#ifdef CONFIG_KASAN_GENERIC + +#define KASAN_KMALLOC_FREETRACK 0xFA /* object was freed and has free track set */ #define KASAN_GLOBAL_REDZONE 0xF9 /* redzone for global variable */ -#define KASAN_VMALLOC_INVALID 0xF8 /* unallocated space in vmapped page */ /* * Stack redzone shadow values @@ -105,6 +128,8 @@ extern bool kasan_flag_async __ro_after_init; #define KASAN_ABI_VERSION 1 #endif +#endif /* CONFIG_KASAN_GENERIC */ + /* Metadata layout customization. */ #define META_BYTES_PER_BLOCK 1 #define META_BLOCKS_PER_ROW 16 @@ -154,7 +179,7 @@ struct kasan_track { depot_stack_handle_t stack; }; -#ifdef CONFIG_KASAN_SW_TAGS_IDENTIFY +#if defined(CONFIG_KASAN_TAGS_IDENTIFY) && defined(CONFIG_KASAN_SW_TAGS) #define KASAN_NR_FREE_STACKS 5 #else #define KASAN_NR_FREE_STACKS 1 @@ -171,7 +196,7 @@ struct kasan_alloc_meta { #else struct kasan_track free_track[KASAN_NR_FREE_STACKS]; #endif -#ifdef CONFIG_KASAN_SW_TAGS_IDENTIFY +#ifdef CONFIG_KASAN_TAGS_IDENTIFY u8 free_pointer_tag[KASAN_NR_FREE_STACKS]; u8 free_track_idx; #endif @@ -261,7 +286,7 @@ void kasan_report_invalid_free(void *object, unsigned long ip); struct page *kasan_addr_to_page(const void *addr); -depot_stack_handle_t kasan_save_stack(gfp_t flags); +depot_stack_handle_t kasan_save_stack(gfp_t flags, bool can_alloc); void kasan_set_track(struct kasan_track *track, gfp_t flags); void kasan_set_free_info(struct kmem_cache *cache, void *object, u8 tag); struct kasan_track *kasan_get_free_track(struct kmem_cache *cache, @@ -299,11 +324,8 @@ static inline const void *arch_kasan_set_tag(const void *addr, u8 tag) #ifndef arch_enable_tagging_async #define arch_enable_tagging_async() #endif -#ifndef arch_init_tags -#define arch_init_tags(max_tag) -#endif -#ifndef arch_set_tagging_report_once -#define arch_set_tagging_report_once(state) +#ifndef arch_enable_tagging_asymm +#define arch_enable_tagging_asymm() #endif #ifndef arch_force_async_tag_fault #define arch_force_async_tag_fault() @@ -320,35 +342,34 @@ static inline const void *arch_kasan_set_tag(const void *addr, u8 tag) #define hw_enable_tagging_sync() arch_enable_tagging_sync() #define hw_enable_tagging_async() arch_enable_tagging_async() -#define hw_init_tags(max_tag) arch_init_tags(max_tag) -#define hw_set_tagging_report_once(state) arch_set_tagging_report_once(state) +#define hw_enable_tagging_asymm() arch_enable_tagging_asymm() #define hw_force_async_tag_fault() arch_force_async_tag_fault() #define hw_get_random_tag() arch_get_random_tag() #define hw_get_mem_tag(addr) arch_get_mem_tag(addr) #define hw_set_mem_tag_range(addr, size, tag, init) \ arch_set_mem_tag_range((addr), (size), (tag), (init)) +void kasan_enable_tagging(void); + #else /* CONFIG_KASAN_HW_TAGS */ #define hw_enable_tagging_sync() #define hw_enable_tagging_async() -#define hw_set_tagging_report_once(state) +#define hw_enable_tagging_asymm() + +static inline void kasan_enable_tagging(void) { } #endif /* CONFIG_KASAN_HW_TAGS */ #if defined(CONFIG_KASAN_HW_TAGS) && IS_ENABLED(CONFIG_KASAN_KUNIT_TEST) -void kasan_set_tagging_report_once(bool state); -void kasan_enable_tagging_sync(void); void kasan_force_async_fault(void); -#else /* CONFIG_KASAN_HW_TAGS || CONFIG_KASAN_KUNIT_TEST */ +#else /* CONFIG_KASAN_HW_TAGS && CONFIG_KASAN_KUNIT_TEST */ -static inline void kasan_set_tagging_report_once(bool state) { } -static inline void kasan_enable_tagging_sync(void) { } static inline void kasan_force_async_fault(void) { } -#endif /* CONFIG_KASAN_HW_TAGS || CONFIG_KASAN_KUNIT_TEST */ +#endif /* CONFIG_KASAN_HW_TAGS && CONFIG_KASAN_KUNIT_TEST */ #ifdef CONFIG_KASAN_SW_TAGS u8 kasan_random_tag(void); @@ -461,6 +482,12 @@ static inline void kasan_poison_last_granule(const void *address, size_t size) { #endif /* CONFIG_KASAN_GENERIC */ +#ifndef kasan_arch_is_ready +static inline bool kasan_arch_is_ready(void) { return true; } +#elif !defined(CONFIG_KASAN_GENERIC) || !defined(CONFIG_KASAN_OUTLINE) +#error kasan_arch_is_ready only works in KASAN generic outline mode! +#endif + /* * Exported functions for interfaces called from assembly or from generated * code. Declarations here to avoid warning about missing declarations. diff --git a/mm/kasan/quarantine.c b/mm/kasan/quarantine.c index 728fb24c5683d7cb533f924b8b0e973483b6bef7..04dc2cfaa4ae2e66ebe0058257edcad1d666c6b7 100644 --- a/mm/kasan/quarantine.c +++ b/mm/kasan/quarantine.c @@ -132,11 +132,22 @@ static void *qlink_to_object(struct qlist_node *qlink, struct kmem_cache *cache) static void qlink_free(struct qlist_node *qlink, struct kmem_cache *cache) { void *object = qlink_to_object(qlink, cache); + struct kasan_free_meta *meta = kasan_get_free_meta(cache, object); unsigned long flags; if (IS_ENABLED(CONFIG_SLAB)) local_irq_save(flags); + /* + * If init_on_free is enabled and KASAN's free metadata is stored in + * the object, zero the metadata. Otherwise, the object's memory will + * not be properly zeroed, as KASAN saves the metadata after the slab + * allocator zeroes the object. + */ + if (slab_want_init_on_free(cache) && + cache->kasan_info.free_meta_offset == 0) + memzero_explicit(meta, sizeof(*meta)); + /* * As the object now gets freed from the quaratine, assume that its * free track is no longer valid. diff --git a/mm/kasan/report.c b/mm/kasan/report.c index 8fff1825b22cda5a0dfe265faec66a7e1a6817de..98c186f9d950348a8fbf1c5d19a5eea09a2d9497 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -39,6 +39,31 @@ static unsigned long kasan_flags; #define KASAN_BIT_REPORTED 0 #define KASAN_BIT_MULTI_SHOT 1 +enum kasan_arg_fault { + KASAN_ARG_FAULT_DEFAULT, + KASAN_ARG_FAULT_REPORT, + KASAN_ARG_FAULT_PANIC, +}; + +static enum kasan_arg_fault kasan_arg_fault __ro_after_init = KASAN_ARG_FAULT_DEFAULT; + +/* kasan.fault=report/panic */ +static int __init early_kasan_fault(char *arg) +{ + if (!arg) + return -EINVAL; + + if (!strcmp(arg, "report")) + kasan_arg_fault = KASAN_ARG_FAULT_REPORT; + else if (!strcmp(arg, "panic")) + kasan_arg_fault = KASAN_ARG_FAULT_PANIC; + else + return -EINVAL; + + return 0; +} +early_param("kasan.fault", early_kasan_fault); + bool kasan_save_enable_multi_shot(void) { return test_and_set_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags); @@ -87,7 +112,7 @@ static void start_report(unsigned long *flags) static void end_report(unsigned long *flags, unsigned long addr) { - if (!kasan_async_mode_enabled()) + if (!kasan_async_fault_possible()) trace_error_report_end(ERROR_DETECTOR_KASAN, addr); pr_err("==================================================================\n"); add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE); @@ -102,10 +127,8 @@ static void end_report(unsigned long *flags, unsigned long addr) panic_on_warn = 0; panic("panic_on_warn set ...\n"); } -#ifdef CONFIG_KASAN_HW_TAGS - if (kasan_flag_panic) + if (kasan_arg_fault == KASAN_ARG_FAULT_PANIC) panic("kasan.fault=panic set ...\n"); -#endif kasan_enable_current(); } @@ -212,7 +235,7 @@ static void describe_object(struct kmem_cache *cache, void *object, static inline bool kernel_or_module_addr(const void *addr) { - if (addr >= (void *)_stext && addr < (void *)_end) + if (is_kernel((unsigned long)addr)) return true; if (is_module_address((unsigned long)addr)) return true; @@ -321,20 +344,21 @@ static bool report_enabled(void) } #if IS_ENABLED(CONFIG_KUNIT) -static void kasan_update_kunit_status(struct kunit *cur_test) +static void kasan_update_kunit_status(struct kunit *cur_test, bool sync) { struct kunit_resource *resource; - struct kunit_kasan_expectation *kasan_data; + struct kunit_kasan_status *status; - resource = kunit_find_named_resource(cur_test, "kasan_data"); + resource = kunit_find_named_resource(cur_test, "kasan_status"); if (!resource) { kunit_set_failure(cur_test); return; } - kasan_data = (struct kunit_kasan_expectation *)resource->data; - WRITE_ONCE(kasan_data->report_found, true); + status = (struct kunit_kasan_status *)resource->data; + WRITE_ONCE(status->report_found, true); + WRITE_ONCE(status->sync_fault, sync); kunit_put_resource(resource); } #endif /* IS_ENABLED(CONFIG_KUNIT) */ @@ -348,7 +372,7 @@ void kasan_report_invalid_free(void *object, unsigned long ip) #if IS_ENABLED(CONFIG_KUNIT) if (current->kunit_test) - kasan_update_kunit_status(current->kunit_test); + kasan_update_kunit_status(current->kunit_test, true); #endif /* IS_ENABLED(CONFIG_KUNIT) */ start_report(&flags); @@ -368,7 +392,7 @@ void kasan_report_async(void) #if IS_ENABLED(CONFIG_KUNIT) if (current->kunit_test) - kasan_update_kunit_status(current->kunit_test); + kasan_update_kunit_status(current->kunit_test, false); #endif /* IS_ENABLED(CONFIG_KUNIT) */ start_report(&flags); @@ -390,7 +414,7 @@ static void __kasan_report(unsigned long addr, size_t size, bool is_write, #if IS_ENABLED(CONFIG_KUNIT) if (current->kunit_test) - kasan_update_kunit_status(current->kunit_test); + kasan_update_kunit_status(current->kunit_test, true); #endif /* IS_ENABLED(CONFIG_KUNIT) */ disable_trace_on_warning(); diff --git a/mm/kasan/report_hw_tags.c b/mm/kasan/report_hw_tags.c index 42b2168755d62d7ca0e90aad5b58a13d11b4b908..5dbbbb930e7a7a9fd5dda6999833a32cfcdb4be7 100644 --- a/mm/kasan/report_hw_tags.c +++ b/mm/kasan/report_hw_tags.c @@ -15,11 +15,6 @@ #include "kasan.h" -const char *kasan_get_bug_type(struct kasan_access_info *info) -{ - return "invalid-access"; -} - void *kasan_find_first_bad_addr(void *addr, size_t size) { return kasan_reset_tag(addr); diff --git a/mm/kasan/report_sw_tags.c b/mm/kasan/report_sw_tags.c index 3d20d3451d9eaabb611060b1aad19af8d3330d3b..d2298c357834963807a423c32bc666413cc33da7 100644 --- a/mm/kasan/report_sw_tags.c +++ b/mm/kasan/report_sw_tags.c @@ -29,49 +29,6 @@ #include "kasan.h" #include "../slab.h" -const char *kasan_get_bug_type(struct kasan_access_info *info) -{ -#ifdef CONFIG_KASAN_SW_TAGS_IDENTIFY - struct kasan_alloc_meta *alloc_meta; - struct kmem_cache *cache; - struct page *page; - const void *addr; - void *object; - u8 tag; - int i; - - tag = get_tag(info->access_addr); - addr = kasan_reset_tag(info->access_addr); - page = kasan_addr_to_page(addr); - if (page && PageSlab(page)) { - cache = page->slab_cache; - object = nearest_obj(cache, page, (void *)addr); - alloc_meta = kasan_get_alloc_meta(cache, object); - - if (alloc_meta) { - for (i = 0; i < KASAN_NR_FREE_STACKS; i++) { - if (alloc_meta->free_pointer_tag[i] == tag) - return "use-after-free"; - } - } - return "out-of-bounds"; - } - -#endif - /* - * If access_size is a negative number, then it has reason to be - * defined as out-of-bounds bug type. - * - * Casting negative numbers to size_t would indeed turn up as - * a large size_t and its value will be larger than ULONG_MAX/2, - * so that this can qualify as out-of-bounds. - */ - if (info->access_addr + info->access_size < info->access_addr) - return "out-of-bounds"; - - return "invalid-access"; -} - void *kasan_find_first_bad_addr(void *addr, size_t size) { u8 tag = get_tag(addr); diff --git a/mm/kasan/report_tags.c b/mm/kasan/report_tags.c new file mode 100644 index 0000000000000000000000000000000000000000..8a319fc16dab9742606c78099e774697c416bcca --- /dev/null +++ b/mm/kasan/report_tags.c @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2014 Samsung Electronics Co., Ltd. + * Copyright (c) 2020 Google, Inc. + */ + +#include "kasan.h" +#include "../slab.h" + +const char *kasan_get_bug_type(struct kasan_access_info *info) +{ +#ifdef CONFIG_KASAN_TAGS_IDENTIFY + struct kasan_alloc_meta *alloc_meta; + struct kmem_cache *cache; + struct page *page; + const void *addr; + void *object; + u8 tag; + int i; + + tag = get_tag(info->access_addr); + addr = kasan_reset_tag(info->access_addr); + page = kasan_addr_to_page(addr); + if (page && PageSlab(page)) { + cache = page->slab_cache; + object = nearest_obj(cache, page, (void *)addr); + alloc_meta = kasan_get_alloc_meta(cache, object); + + if (alloc_meta) { + for (i = 0; i < KASAN_NR_FREE_STACKS; i++) { + if (alloc_meta->free_pointer_tag[i] == tag) + return "use-after-free"; + } + } + return "out-of-bounds"; + } +#endif + + /* + * If access_size is a negative number, then it has reason to be + * defined as out-of-bounds bug type. + * + * Casting negative numbers to size_t would indeed turn up as + * a large size_t and its value will be larger than ULONG_MAX/2, + * so that this can qualify as out-of-bounds. + */ + if (info->access_addr + info->access_size < info->access_addr) + return "out-of-bounds"; + + return "invalid-access"; +} diff --git a/mm/kasan/shadow.c b/mm/kasan/shadow.c index 727ad46291731fafb95f717fcd15529fbe353ae9..a3438661c777b85f786cd40aa3f6d7fd53f3f1b6 100644 --- a/mm/kasan/shadow.c +++ b/mm/kasan/shadow.c @@ -73,6 +73,9 @@ void kasan_poison(const void *addr, size_t size, u8 value, bool init) { void *shadow_start, *shadow_end; + if (!kasan_arch_is_ready()) + return; + /* * Perform shadow offset calculation based on untagged address, as * some of the callers (e.g. kasan_poison_object_data) pass tagged @@ -99,6 +102,9 @@ EXPORT_SYMBOL(kasan_poison); #ifdef CONFIG_KASAN_GENERIC void kasan_poison_last_granule(const void *addr, size_t size) { + if (!kasan_arch_is_ready()) + return; + if (size & KASAN_GRANULE_MASK) { u8 *shadow = (u8 *)kasan_mem_to_shadow(addr + size); *shadow = size & KASAN_GRANULE_MASK; @@ -334,27 +340,6 @@ int kasan_populate_vmalloc(unsigned long addr, unsigned long size) return 0; } -/* - * Poison the shadow for a vmalloc region. Called as part of the - * freeing process at the time the region is freed. - */ -void kasan_poison_vmalloc(const void *start, unsigned long size) -{ - if (!is_vmalloc_or_module_addr(start)) - return; - - size = round_up(size, KASAN_GRANULE_SIZE); - kasan_poison(start, size, KASAN_VMALLOC_INVALID, false); -} - -void kasan_unpoison_vmalloc(const void *start, unsigned long size) -{ - if (!is_vmalloc_or_module_addr(start)) - return; - - kasan_unpoison(start, size, false); -} - static int kasan_depopulate_vmalloc_pte(pte_t *ptep, unsigned long addr, void *unused) { @@ -485,9 +470,48 @@ void kasan_release_vmalloc(unsigned long start, unsigned long end, } } +void *__kasan_unpoison_vmalloc(const void *start, unsigned long size, + kasan_vmalloc_flags_t flags) +{ + /* + * Software KASAN modes unpoison both VM_ALLOC and non-VM_ALLOC + * mappings, so the KASAN_VMALLOC_VM_ALLOC flag is ignored. + * Software KASAN modes can't optimize zeroing memory by combining it + * with setting memory tags, so the KASAN_VMALLOC_INIT flag is ignored. + */ + + if (!is_vmalloc_or_module_addr(start)) + return (void *)start; + + /* + * Don't tag executable memory with the tag-based mode. + * The kernel doesn't tolerate having the PC register tagged. + */ + if (IS_ENABLED(CONFIG_KASAN_SW_TAGS) && + !(flags & KASAN_VMALLOC_PROT_NORMAL)) + return (void *)start; + + start = set_tag(start, kasan_random_tag()); + kasan_unpoison(start, size, false); + return (void *)start; +} + +/* + * Poison the shadow for a vmalloc region. Called as part of the + * freeing process at the time the region is freed. + */ +void __kasan_poison_vmalloc(const void *start, unsigned long size) +{ + if (!is_vmalloc_or_module_addr(start)) + return; + + size = round_up(size, KASAN_GRANULE_SIZE); + kasan_poison(start, size, KASAN_VMALLOC_INVALID, false); +} + #else /* CONFIG_KASAN_VMALLOC */ -int kasan_module_alloc(void *addr, size_t size) +int kasan_alloc_module_shadow(void *addr, size_t size, gfp_t gfp_mask) { void *ret; size_t scaled_size; @@ -509,16 +533,21 @@ int kasan_module_alloc(void *addr, size_t size) __builtin_return_address(0)); if (ret) { + struct vm_struct *vm = find_vm_area(addr); __memset(ret, KASAN_SHADOW_INIT, shadow_size); - find_vm_area(addr)->flags |= VM_KASAN; + vm->flags |= VM_KASAN; kmemleak_ignore(ret); + + if (vm->flags & VM_DEFER_KMEMLEAK) + kmemleak_vmalloc(vm, size, gfp_mask); + return 0; } return -ENOMEM; } -void kasan_free_shadow(const struct vm_struct *vm) +void kasan_free_module_shadow(const struct vm_struct *vm) { if (vm->flags & VM_KASAN) vfree(kasan_mem_to_shadow(vm->addr)); diff --git a/mm/kasan/sw_tags.c b/mm/kasan/sw_tags.c index 9df8e7f69e870990d670647282e3396bbe45565e..77f13f391b577a539de520f1ef2d6e801c750ab3 100644 --- a/mm/kasan/sw_tags.c +++ b/mm/kasan/sw_tags.c @@ -42,7 +42,7 @@ void __init kasan_init_sw_tags(void) for_each_possible_cpu(cpu) per_cpu(prng_state, cpu) = (u32)get_cycles(); - pr_info("KernelAddressSanitizer initialized\n"); + pr_info("KernelAddressSanitizer initialized (sw-tags)\n"); } /* @@ -167,43 +167,9 @@ void __hwasan_tag_memory(unsigned long addr, u8 tag, unsigned long size) } EXPORT_SYMBOL(__hwasan_tag_memory); -void kasan_set_free_info(struct kmem_cache *cache, - void *object, u8 tag) +void kasan_tag_mismatch(unsigned long addr, unsigned long access_info, + unsigned long ret_ip) { - struct kasan_alloc_meta *alloc_meta; - u8 idx = 0; - - alloc_meta = kasan_get_alloc_meta(cache, object); - if (!alloc_meta) - return; - -#ifdef CONFIG_KASAN_SW_TAGS_IDENTIFY - idx = alloc_meta->free_track_idx; - alloc_meta->free_pointer_tag[idx] = tag; - alloc_meta->free_track_idx = (idx + 1) % KASAN_NR_FREE_STACKS; -#endif - - kasan_set_track(&alloc_meta->free_track[idx], GFP_NOWAIT); -} - -struct kasan_track *kasan_get_free_track(struct kmem_cache *cache, - void *object, u8 tag) -{ - struct kasan_alloc_meta *alloc_meta; - int i = 0; - - alloc_meta = kasan_get_alloc_meta(cache, object); - if (!alloc_meta) - return NULL; - -#ifdef CONFIG_KASAN_SW_TAGS_IDENTIFY - for (i = 0; i < KASAN_NR_FREE_STACKS; i++) { - if (alloc_meta->free_pointer_tag[i] == tag) - break; - } - if (i == KASAN_NR_FREE_STACKS) - i = alloc_meta->free_track_idx; -#endif - - return &alloc_meta->free_track[i]; + kasan_report(addr, 1 << (access_info & 0xf), access_info & 0x10, + ret_ip); } diff --git a/mm/kasan/tags.c b/mm/kasan/tags.c new file mode 100644 index 0000000000000000000000000000000000000000..8f48b9502a177dab50a66b47cff527bd8fb8ec83 --- /dev/null +++ b/mm/kasan/tags.c @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * This file contains common tag-based KASAN code. + * + * Copyright (c) 2018 Google, Inc. + * Copyright (c) 2020 Google, Inc. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "kasan.h" + +void kasan_set_free_info(struct kmem_cache *cache, + void *object, u8 tag) +{ + struct kasan_alloc_meta *alloc_meta; + u8 idx = 0; + + alloc_meta = kasan_get_alloc_meta(cache, object); + if (!alloc_meta) + return; + +#ifdef CONFIG_KASAN_TAGS_IDENTIFY + idx = alloc_meta->free_track_idx; + alloc_meta->free_pointer_tag[idx] = tag; + alloc_meta->free_track_idx = (idx + 1) % KASAN_NR_FREE_STACKS; +#endif + + kasan_set_track(&alloc_meta->free_track[idx], GFP_NOWAIT); +} + +struct kasan_track *kasan_get_free_track(struct kmem_cache *cache, + void *object, u8 tag) +{ + struct kasan_alloc_meta *alloc_meta; + int i = 0; + + alloc_meta = kasan_get_alloc_meta(cache, object); + if (!alloc_meta) + return NULL; + +#ifdef CONFIG_KASAN_TAGS_IDENTIFY + for (i = 0; i < KASAN_NR_FREE_STACKS; i++) { + if (alloc_meta->free_pointer_tag[i] == tag) + break; + } + if (i == KASAN_NR_FREE_STACKS) + i = alloc_meta->free_track_idx; +#endif + + return &alloc_meta->free_track[i]; +} diff --git a/mm/kfence/core.c b/mm/kfence/core.c index 575c685aa642291bca3418828df953233f0ffe5e..56bbacba7db2c61e99863bebe32224ecd2c579ae 100644 --- a/mm/kfence/core.c +++ b/mm/kfence/core.c @@ -574,6 +574,7 @@ static const struct file_operations objects_fops = { .open = open_objects, .read = seq_read, .llseek = seq_lseek, + .release = seq_release, }; static int __init kfence_debugfs_init(void) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 59d8caab584a5cccf7c3c7d421f161fa8316eab5..8008e6c2714ed5b423f5d856ec1ce3d5368e678e 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -443,22 +443,25 @@ static bool hugepage_vma_check(struct vm_area_struct *vma, if (!transhuge_vma_enabled(vma, vm_flags)) return false; + if (vma->vm_file && !IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) - + vma->vm_pgoff, HPAGE_PMD_NR)) + return false; + /* Enabled via shmem mount options or sysfs settings. */ - if (shmem_file(vma->vm_file) && shmem_huge_enabled(vma)) { - return IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) - vma->vm_pgoff, - HPAGE_PMD_NR); - } + if (shmem_file(vma->vm_file)) + return shmem_huge_enabled(vma); /* THP settings require madvise. */ if (!(vm_flags & VM_HUGEPAGE) && !khugepaged_always()) return false; - /* Read-only file mappings need to be aligned for THP to work. */ + /* Only regular file is valid */ if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && vma->vm_file && !inode_is_open_for_write(vma->vm_file->f_inode) && (vm_flags & VM_EXEC)) { - return IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) - vma->vm_pgoff, - HPAGE_PMD_NR); + struct inode *inode = vma->vm_file->f_inode; + + return S_ISREG(inode->i_mode); } if (!vma->anon_vma || vma->vm_ops) @@ -1763,6 +1766,10 @@ static void collapse_file(struct mm_struct *mm, filemap_flush(mapping); result = SCAN_FAIL; goto xa_unlocked; + } else if (PageWriteback(page)) { + xas_unlock_irq(&xas); + result = SCAN_FAIL; + goto xa_unlocked; } else if (trylock_page(page)) { get_page(page); xas_unlock_irq(&xas); @@ -1798,7 +1805,8 @@ static void collapse_file(struct mm_struct *mm, goto out_unlock; } - if (!is_shmem && PageDirty(page)) { + if (!is_shmem && (PageDirty(page) || + PageWriteback(page))) { /* * khugepaged only works on read-only fd, so this * page is dirty because it hasn't been flushed diff --git a/mm/kmemleak.c b/mm/kmemleak.c index fe6e3ae8e8c6719f7ae6bead5954e2f43109a523..c8dc0470efa6ded2b55929b3cdedc9b526a420c6 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -290,7 +290,7 @@ static void hex_dump_object(struct seq_file *seq, warn_or_seq_printf(seq, " hex dump (first %zu bytes):\n", len); kasan_disable_current(); warn_or_seq_hex_dump(seq, DUMP_PREFIX_NONE, HEX_ROW_SIZE, - HEX_GROUP_SIZE, ptr, len, HEX_ASCII); + HEX_GROUP_SIZE, kasan_reset_tag((void *)ptr), len, HEX_ASCII); kasan_enable_current(); } @@ -380,15 +380,20 @@ static void dump_object_info(struct kmemleak_object *object) static struct kmemleak_object *lookup_object(unsigned long ptr, int alias) { struct rb_node *rb = object_tree_root.rb_node; + unsigned long untagged_ptr = (unsigned long)kasan_reset_tag((void *)ptr); while (rb) { - struct kmemleak_object *object = - rb_entry(rb, struct kmemleak_object, rb_node); - if (ptr < object->pointer) + struct kmemleak_object *object; + unsigned long untagged_objp; + + object = rb_entry(rb, struct kmemleak_object, rb_node); + untagged_objp = (unsigned long)kasan_reset_tag((void *)object->pointer); + + if (untagged_ptr < untagged_objp) rb = object->rb_node.rb_left; - else if (object->pointer + object->size <= ptr) + else if (untagged_objp + object->size <= untagged_ptr) rb = object->rb_node.rb_right; - else if (object->pointer == ptr || alias) + else if (untagged_objp == untagged_ptr || alias) return object; else { kmemleak_warn("Found object by alias at 0x%08lx\n", @@ -575,6 +580,7 @@ static struct kmemleak_object *create_object(unsigned long ptr, size_t size, struct kmemleak_object *object, *parent; struct rb_node **link, *rb_parent; unsigned long untagged_ptr; + unsigned long untagged_objp; object = mem_pool_alloc(gfp); if (!object) { @@ -628,9 +634,10 @@ static struct kmemleak_object *create_object(unsigned long ptr, size_t size, while (*link) { rb_parent = *link; parent = rb_entry(rb_parent, struct kmemleak_object, rb_node); - if (ptr + size <= parent->pointer) + untagged_objp = (unsigned long)kasan_reset_tag((void *)parent->pointer); + if (untagged_ptr + size <= untagged_objp) link = &parent->rb_node.rb_left; - else if (parent->pointer + parent->size <= ptr) + else if (untagged_objp + parent->size <= untagged_ptr) link = &parent->rb_node.rb_right; else { kmemleak_stop("Cannot insert 0x%lx into the object search tree (overlaps existing)\n", @@ -1171,7 +1178,7 @@ static bool update_checksum(struct kmemleak_object *object) kasan_disable_current(); kcsan_disable_current(); - object->checksum = crc32(0, (void *)object->pointer, object->size); + object->checksum = crc32(0, kasan_reset_tag((void *)object->pointer), object->size); kasan_enable_current(); kcsan_enable_current(); @@ -1246,7 +1253,7 @@ static void scan_block(void *_start, void *_end, break; kasan_disable_current(); - pointer = *ptr; + pointer = *(unsigned long *)kasan_reset_tag((void *)ptr); kasan_enable_current(); untagged_ptr = (unsigned long)kasan_reset_tag((void *)pointer); @@ -1402,7 +1409,8 @@ static void kmemleak_scan(void) { unsigned long flags; struct kmemleak_object *object; - int i; + struct zone *zone; + int __maybe_unused i; int new_leaks = 0; jiffies_last_scan = jiffies; @@ -1442,9 +1450,9 @@ static void kmemleak_scan(void) * Struct page scanning for each node. */ get_online_mems(); - for_each_online_node(i) { - unsigned long start_pfn = node_start_pfn(i); - unsigned long end_pfn = node_end_pfn(i); + for_each_populated_zone(zone) { + unsigned long start_pfn = zone->zone_start_pfn; + unsigned long end_pfn = zone_end_pfn(zone); unsigned long pfn; for (pfn = start_pfn; pfn < end_pfn; pfn++) { @@ -1453,8 +1461,8 @@ static void kmemleak_scan(void) if (!page) continue; - /* only scan pages belonging to this node */ - if (page_to_nid(page) != i) + /* only scan pages belonging to this zone */ + if (page_zone(page) != zone) continue; /* only scan if page is in use */ if (page_count(page) == 0) diff --git a/mm/ksm.c b/mm/ksm.c index e2464c04ede28e487d4c4a61605eb65f4646ed9c..25b8362a4f89537abe7c03156d7407d9a18383c1 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -484,7 +484,7 @@ static int break_ksm(struct vm_area_struct *vma, unsigned long addr) NULL); else ret = VM_FAULT_WRITE; - put_user_page(page); + put_page(page); } while (!(ret & (VM_FAULT_WRITE | VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV | VM_FAULT_OOM))); /* * We must loop because handle_mm_fault() may back out if there's @@ -569,7 +569,7 @@ static struct page *get_mergeable_page(struct rmap_item *rmap_item) flush_anon_page(vma, page, addr); flush_dcache_page(page); } else { - put_user_page(page); + put_page(page); out: page = NULL; } @@ -1950,7 +1950,7 @@ struct rmap_item *unstable_tree_search_insert(struct rmap_item *rmap_item, * Don't substitute a ksm page for a forked page. */ if (page == tree_page) { - put_user_page(tree_page); + put_page(tree_page); return NULL; } @@ -1958,10 +1958,10 @@ struct rmap_item *unstable_tree_search_insert(struct rmap_item *rmap_item, parent = *new; if (ret < 0) { - put_user_page(tree_page); + put_page(tree_page); new = &parent->rb_left; } else if (ret > 0) { - put_user_page(tree_page); + put_page(tree_page); new = &parent->rb_right; } else if (!ksm_merge_across_nodes && page_to_nid(tree_page) != nid) { @@ -1970,7 +1970,7 @@ struct rmap_item *unstable_tree_search_insert(struct rmap_item *rmap_item, * it will be flushed out and put in the right unstable * tree next time: only merge with it when across_nodes. */ - put_user_page(tree_page); + put_page(tree_page); return NULL; } else { *tree_pagep = tree_page; @@ -2151,7 +2151,7 @@ static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item) */ split = PageTransCompound(page) && compound_head(page) == compound_head(tree_page); - put_user_page(tree_page); + put_page(tree_page); if (kpage) { /* * The pages were successfully merged: insert new @@ -2320,11 +2320,11 @@ next_mm: &rmap_item->rmap_list; ksm_scan.address += PAGE_SIZE; } else - put_user_page(*page); + put_page(*page); mmap_read_unlock(mm); return rmap_item; } - put_user_page(*page); + put_page(*page); ksm_scan.address += PAGE_SIZE; cond_resched(); } diff --git a/mm/madvise.c b/mm/madvise.c index 9ec5037bf297b75d64b6a057be1c3a42efda76ee..8920a7125389fa41cf1c76c7d5faa1937273e703 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -18,6 +18,8 @@ #include #include #include +#include +#include #include #include #include @@ -60,83 +62,94 @@ static int madvise_need_mmap_write(int behavior) } } +#ifdef CONFIG_ANON_VMA_NAME +struct anon_vma_name *anon_vma_name_alloc(const char *name) +{ + struct anon_vma_name *anon_name; + size_t count; + + /* Add 1 for NUL terminator at the end of the anon_name->name */ + count = strlen(name) + 1; + anon_name = kmalloc(struct_size(anon_name, name, count), GFP_KERNEL); + if (anon_name) { + kref_init(&anon_name->kref); + memcpy(anon_name->name, name, count); + } + + return anon_name; +} + +void anon_vma_name_free(struct kref *kref) +{ + struct anon_vma_name *anon_name = + container_of(kref, struct anon_vma_name, kref); + kfree(anon_name); +} + +struct anon_vma_name *anon_vma_name(struct vm_area_struct *vma) +{ + mmap_assert_locked(vma->vm_mm); + + if (vma->vm_file) + return NULL; + + return vma->anon_name; +} + +/* mmap_lock should be write-locked */ +static int replace_anon_vma_name(struct vm_area_struct *vma, + struct anon_vma_name *anon_name) +{ + struct anon_vma_name *orig_name = anon_vma_name(vma); + + if (!anon_name) { + vma->anon_name = NULL; + anon_vma_name_put(orig_name); + return 0; + } + + if (anon_vma_name_eq(orig_name, anon_name)) + return 0; + + vma->anon_name = anon_vma_name_reuse(anon_name); + anon_vma_name_put(orig_name); + + return 0; +} +#else /* CONFIG_ANON_VMA_NAME */ +static int replace_anon_vma_name(struct vm_area_struct *vma, + struct anon_vma_name *anon_name) +{ + if (anon_name) + return -EINVAL; + + return 0; +} +#endif /* CONFIG_ANON_VMA_NAME */ /* - * We can potentially split a vm area into separate - * areas, each area with its own behavior. + * Update the vm_flags on region of a vma, splitting it or merging it as + * necessary. Must be called with mmap_sem held for writing; + * Caller should ensure anon_name stability by raising its refcount even when + * anon_name belongs to a valid vma because this function might free that vma. */ -static long madvise_behavior(struct vm_area_struct *vma, - struct vm_area_struct **prev, - unsigned long start, unsigned long end, int behavior) +static int madvise_update_vma(struct vm_area_struct *vma, + struct vm_area_struct **prev, unsigned long start, + unsigned long end, unsigned long new_flags, + struct anon_vma_name *anon_name) { struct mm_struct *mm = vma->vm_mm; - int error = 0; + int error; pgoff_t pgoff; - unsigned long new_flags = vma->vm_flags; - switch (behavior) { - case MADV_NORMAL: - new_flags = new_flags & ~VM_RAND_READ & ~VM_SEQ_READ; - break; - case MADV_SEQUENTIAL: - new_flags = (new_flags & ~VM_RAND_READ) | VM_SEQ_READ; - break; - case MADV_RANDOM: - new_flags = (new_flags & ~VM_SEQ_READ) | VM_RAND_READ; - break; - case MADV_DONTFORK: - new_flags |= VM_DONTCOPY; - break; - case MADV_DOFORK: - if (vma->vm_flags & VM_IO) { - error = -EINVAL; - goto out; - } - new_flags &= ~VM_DONTCOPY; - break; - case MADV_WIPEONFORK: - /* MADV_WIPEONFORK is only supported on anonymous memory. */ - if (vma->vm_file || vma->vm_flags & VM_SHARED) { - error = -EINVAL; - goto out; - } - new_flags |= VM_WIPEONFORK; - break; - case MADV_KEEPONFORK: - new_flags &= ~VM_WIPEONFORK; - break; - case MADV_DONTDUMP: - new_flags |= VM_DONTDUMP; - break; - case MADV_DODUMP: - if (!is_vm_hugetlb_page(vma) && new_flags & VM_SPECIAL) { - error = -EINVAL; - goto out; - } - new_flags &= ~VM_DONTDUMP; - break; - case MADV_MERGEABLE: - case MADV_UNMERGEABLE: - error = ksm_madvise(vma, start, end, behavior, &new_flags); - if (error) - goto out_convert_errno; - break; - case MADV_HUGEPAGE: - case MADV_NOHUGEPAGE: - error = hugepage_madvise(vma, &new_flags, behavior); - if (error) - goto out_convert_errno; - break; - } - - if (new_flags == vma->vm_flags) { + if (new_flags == vma->vm_flags && anon_vma_name_eq(anon_vma_name(vma), anon_name)) { *prev = vma; - goto out; + return 0; } pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); *prev = vma_merge(mm, *prev, start, end, new_flags, vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma), - vma->vm_userfaultfd_ctx, vma_get_anon_name(vma)); + vma->vm_userfaultfd_ctx, anon_name); if (*prev) { vma = *prev; goto success; @@ -145,23 +158,19 @@ static long madvise_behavior(struct vm_area_struct *vma, *prev = vma; if (start != vma->vm_start) { - if (unlikely(mm->map_count >= sysctl_max_map_count)) { - error = -ENOMEM; - goto out; - } + if (unlikely(mm->map_count >= sysctl_max_map_count)) + return -ENOMEM; error = __split_vma(mm, vma, start, 1); if (error) - goto out_convert_errno; + return error; } if (end != vma->vm_end) { - if (unlikely(mm->map_count >= sysctl_max_map_count)) { - error = -ENOMEM; - goto out; - } + if (unlikely(mm->map_count >= sysctl_max_map_count)) + return -ENOMEM; error = __split_vma(mm, vma, end, 0); if (error) - goto out_convert_errno; + return error; } success: @@ -171,16 +180,13 @@ success: vm_write_begin(vma); WRITE_ONCE(vma->vm_flags, new_flags); vm_write_end(vma); + if (!vma->vm_file) { + error = replace_anon_vma_name(vma, anon_name); + if (error) + return error; + } -out_convert_errno: - /* - * madvise() returns EAGAIN if kernel resources, such as - * slab, are temporarily unavailable. - */ - if (error == -ENOMEM) - error = -EAGAIN; -out: - return error; + return 0; } #ifdef CONFIG_SWAP @@ -878,6 +884,96 @@ static long madvise_remove(struct vm_area_struct *vma, return error; } +/* + * Apply an madvise behavior to a region of a vma. madvise_update_vma + * will handle splitting a vm area into separate areas, each area with its own + * behavior. + */ +static int madvise_vma_behavior(struct vm_area_struct *vma, + struct vm_area_struct **prev, + unsigned long start, unsigned long end, + unsigned long behavior) +{ + int error; + struct anon_vma_name *anon_name; + unsigned long new_flags = vma->vm_flags; + + switch (behavior) { + case MADV_REMOVE: + return madvise_remove(vma, prev, start, end); + case MADV_WILLNEED: + return madvise_willneed(vma, prev, start, end); + case MADV_COLD: + return madvise_cold(vma, prev, start, end); + case MADV_PAGEOUT: + return madvise_pageout(vma, prev, start, end); + case MADV_FREE: + case MADV_DONTNEED: + return madvise_dontneed_free(vma, prev, start, end, behavior); + case MADV_NORMAL: + new_flags = new_flags & ~VM_RAND_READ & ~VM_SEQ_READ; + break; + case MADV_SEQUENTIAL: + new_flags = (new_flags & ~VM_RAND_READ) | VM_SEQ_READ; + break; + case MADV_RANDOM: + new_flags = (new_flags & ~VM_SEQ_READ) | VM_RAND_READ; + break; + case MADV_DONTFORK: + new_flags |= VM_DONTCOPY; + break; + case MADV_DOFORK: + if (vma->vm_flags & VM_IO) + return -EINVAL; + new_flags &= ~VM_DONTCOPY; + break; + case MADV_WIPEONFORK: + /* MADV_WIPEONFORK is only supported on anonymous memory. */ + if (vma->vm_file || vma->vm_flags & VM_SHARED) + return -EINVAL; + new_flags |= VM_WIPEONFORK; + break; + case MADV_KEEPONFORK: + new_flags &= ~VM_WIPEONFORK; + break; + case MADV_DONTDUMP: + new_flags |= VM_DONTDUMP; + break; + case MADV_DODUMP: + if (!is_vm_hugetlb_page(vma) && new_flags & VM_SPECIAL) + return -EINVAL; + new_flags &= ~VM_DONTDUMP; + break; + case MADV_MERGEABLE: + case MADV_UNMERGEABLE: + error = ksm_madvise(vma, start, end, behavior, &new_flags); + if (error) + goto out; + break; + case MADV_HUGEPAGE: + case MADV_NOHUGEPAGE: + error = hugepage_madvise(vma, &new_flags, behavior); + if (error) + goto out; + break; + } + + anon_name = anon_vma_name(vma); + anon_vma_name_get(anon_name); + error = madvise_update_vma(vma, prev, start, end, new_flags, + anon_name); + anon_vma_name_put(anon_name); + +out: + /* + * madvise() returns EAGAIN if kernel resources, such as + * slab, are temporarily unavailable. + */ + if (error == -ENOMEM) + error = -EAGAIN; + return error; +} + #ifdef CONFIG_MEMORY_FAILURE /* * Error injection support for memory error handling. @@ -931,27 +1027,6 @@ static int madvise_inject_error(int behavior, } #endif -static long -madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev, - unsigned long start, unsigned long end, int behavior) -{ - switch (behavior) { - case MADV_REMOVE: - return madvise_remove(vma, prev, start, end); - case MADV_WILLNEED: - return madvise_willneed(vma, prev, start, end); - case MADV_COLD: - return madvise_cold(vma, prev, start, end); - case MADV_PAGEOUT: - return madvise_pageout(vma, prev, start, end); - case MADV_FREE: - case MADV_DONTNEED: - return madvise_dontneed_free(vma, prev, start, end, behavior); - default: - return madvise_behavior(vma, prev, start, end, behavior); - } -} - static bool madvise_behavior_valid(int behavior) { @@ -1002,6 +1077,122 @@ process_madvise_behavior_valid(int behavior) } } +/* + * Walk the vmas in range [start,end), and call the visit function on each one. + * The visit function will get start and end parameters that cover the overlap + * between the current vma and the original range. Any unmapped regions in the + * original range will result in this function returning -ENOMEM while still + * calling the visit function on all of the existing vmas in the range. + * Must be called with the mmap_lock held for reading or writing. + */ +static +int madvise_walk_vmas(struct mm_struct *mm, unsigned long start, + unsigned long end, unsigned long arg, + int (*visit)(struct vm_area_struct *vma, + struct vm_area_struct **prev, unsigned long start, + unsigned long end, unsigned long arg)) +{ + struct vm_area_struct *vma; + struct vm_area_struct *prev; + unsigned long tmp; + int unmapped_error = 0; + + /* + * If the interval [start,end) covers some unmapped address + * ranges, just ignore them, but return -ENOMEM at the end. + * - different from the way of handling in mlock etc. + */ + vma = find_vma_prev(mm, start, &prev); + if (vma && start > vma->vm_start) + prev = vma; + + for (;;) { + int error; + + /* Still start < end. */ + if (!vma) + return -ENOMEM; + + /* Here start < (end|vma->vm_end). */ + if (start < vma->vm_start) { + unmapped_error = -ENOMEM; + start = vma->vm_start; + if (start >= end) + break; + } + + /* Here vma->vm_start <= start < (end|vma->vm_end) */ + tmp = vma->vm_end; + if (end < tmp) + tmp = end; + + /* Here vma->vm_start <= start < tmp <= (end|vma->vm_end). */ + error = visit(vma, &prev, start, tmp, arg); + if (error) + return error; + start = tmp; + if (prev && start < prev->vm_end) + start = prev->vm_end; + if (start >= end) + break; + if (prev) + vma = prev->vm_next; + else /* madvise_remove dropped mmap_lock */ + vma = find_vma(mm, start); + } + + return unmapped_error; +} + +#ifdef CONFIG_ANON_VMA_NAME +static int madvise_vma_anon_name(struct vm_area_struct *vma, + struct vm_area_struct **prev, + unsigned long start, unsigned long end, + unsigned long anon_name) +{ + int error; + + /* Only anonymous mappings can be named */ + if (vma->vm_file) + return -EBADF; + + error = madvise_update_vma(vma, prev, start, end, vma->vm_flags, + (struct anon_vma_name *)anon_name); + + /* + * madvise() returns EAGAIN if kernel resources, such as + * slab, are temporarily unavailable. + */ + if (error == -ENOMEM) + error = -EAGAIN; + return error; +} + +int madvise_set_anon_name(struct mm_struct *mm, unsigned long start, + unsigned long len_in, struct anon_vma_name *anon_name) +{ + unsigned long end; + unsigned long len; + + if (start & ~PAGE_MASK) + return -EINVAL; + len = (len_in + ~PAGE_MASK) & PAGE_MASK; + + /* Check to see whether len was rounded up from small -ve to zero */ + if (len_in && !len) + return -EINVAL; + + end = start + len; + if (end < start) + return -EINVAL; + + if (end == start) + return 0; + + return madvise_walk_vmas(mm, start, end, (unsigned long)anon_name, + madvise_vma_anon_name); +} +#endif /* CONFIG_ANON_VMA_NAME */ /* * The madvise(2) system call. * @@ -1070,10 +1261,8 @@ process_madvise_behavior_valid(int behavior) */ int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int behavior) { - unsigned long end, tmp; - struct vm_area_struct *vma, *prev; - int unmapped_error = 0; - int error = -EINVAL; + unsigned long end; + int error; int write; size_t len; struct blk_plug plug; @@ -1081,23 +1270,22 @@ int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int beh start = untagged_addr(start); if (!madvise_behavior_valid(behavior)) - return error; + return -EINVAL; if (!PAGE_ALIGNED(start)) - return error; + return -EINVAL; len = PAGE_ALIGN(len_in); /* Check to see whether len was rounded up from small -ve to zero */ if (len_in && !len) - return error; + return -EINVAL; end = start + len; if (end < start) - return error; + return -EINVAL; - error = 0; if (end == start) - return error; + return 0; #ifdef CONFIG_MEMORY_FAILURE if (behavior == MADV_HWPOISON || behavior == MADV_SOFT_OFFLINE) @@ -1112,51 +1300,9 @@ int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int beh mmap_read_lock(mm); } - /* - * If the interval [start,end) covers some unmapped address - * ranges, just ignore them, but return -ENOMEM at the end. - * - different from the way of handling in mlock etc. - */ - vma = find_vma_prev(mm, start, &prev); - if (vma && start > vma->vm_start) - prev = vma; - blk_start_plug(&plug); - for (;;) { - /* Still start < end. */ - error = -ENOMEM; - if (!vma) - goto out; - - /* Here start < (end|vma->vm_end). */ - if (start < vma->vm_start) { - unmapped_error = -ENOMEM; - start = vma->vm_start; - if (start >= end) - goto out; - } - - /* Here vma->vm_start <= start < (end|vma->vm_end) */ - tmp = vma->vm_end; - if (end < tmp) - tmp = end; - - /* Here vma->vm_start <= start < tmp <= (end|vma->vm_end). */ - error = madvise_vma(vma, &prev, start, tmp, behavior); - if (error) - goto out; - start = tmp; - if (prev && start < prev->vm_end) - start = prev->vm_end; - error = unmapped_error; - if (start >= end) - goto out; - if (prev) - vma = prev->vm_next; - else /* madvise_remove dropped mmap_lock */ - vma = find_vma(mm, start); - } -out: + error = madvise_walk_vmas(mm, start, end, behavior, + madvise_vma_behavior); blk_finish_plug(&plug); if (write) mmap_write_unlock(mm); @@ -1237,8 +1383,7 @@ SYSCALL_DEFINE5(process_madvise, int, pidfd, const struct iovec __user *, vec, iov_iter_advance(&iter, iovec.iov_len); } - if (ret == 0) - ret = total_len - iov_iter_count(&iter); + ret = (total_len - iov_iter_count(&iter)) ? : ret; release_mm: mmput(mm); diff --git a/mm/memblock.c b/mm/memblock.c index 2270e3c96cb9c72cf936339f30e2a5378ce333bc..82c3d8f4122272ea56a3f36fa3ebe639d9f23d30 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -182,6 +182,8 @@ bool __init_memblock memblock_overlaps_region(struct memblock_type *type, { unsigned long i; + memblock_cap_size(base, &size); + for (i = 0; i < type->cnt; i++) if (memblock_addrs_overlap(base, size, type->regions[i].base, type->regions[i].size)) @@ -285,7 +287,7 @@ static phys_addr_t __init_memblock memblock_find_in_range_node(phys_addr_t size, { /* pump up @end */ if (end == MEMBLOCK_ALLOC_ACCESSIBLE || - end == MEMBLOCK_ALLOC_KASAN) + end == MEMBLOCK_ALLOC_NOLEAKTRACE) end = memblock.current_limit; /* avoid allocating the first page */ @@ -364,14 +366,20 @@ void __init memblock_discard(void) addr = __pa(memblock.reserved.regions); size = PAGE_ALIGN(sizeof(struct memblock_region) * memblock.reserved.max); - __memblock_free_late(addr, size); + if (memblock_reserved_in_slab) + kfree(memblock.reserved.regions); + else + __memblock_free_late(addr, size); } if (memblock.memory.regions != memblock_memory_init_regions) { addr = __pa(memblock.memory.regions); size = PAGE_ALIGN(sizeof(struct memblock_region) * memblock.memory.max); - __memblock_free_late(addr, size); + if (memblock_memory_in_slab) + kfree(memblock.memory.regions); + else + __memblock_free_late(addr, size); } memblock_memory = NULL; @@ -1356,8 +1364,11 @@ again: return 0; done: - /* Skip kmemleak for kasan_init() due to high volume. */ - if (end != MEMBLOCK_ALLOC_KASAN) + /* + * Skip kmemleak for those places like kasan_init() and + * early_pgtable_alloc() due to high volume. + */ + if (end != MEMBLOCK_ALLOC_NOLEAKTRACE) /* * The min_count is set to 0 so that memblock allocated * blocks are never reported as leaks. This is because many @@ -1799,7 +1810,6 @@ bool __init_memblock memblock_is_region_memory(phys_addr_t base, phys_addr_t siz */ bool __init_memblock memblock_is_region_reserved(phys_addr_t base, phys_addr_t size) { - memblock_cap_size(base, &size); return memblock_overlaps_region(&memblock.reserved, base, size); } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index c4876df0e1803d68831fa71eced776a78aebe088..eed533d29bd77d28032fc611f7910a5e2fe6c752 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -231,7 +231,7 @@ enum res_type { iter != NULL; \ iter = mem_cgroup_iter(NULL, iter, NULL)) -static inline bool should_force_charge(void) +static inline bool task_is_dying(void) { return tsk_is_oom_victim(current) || fatal_signal_pending(current) || (current->flags & PF_EXITING); @@ -251,7 +251,7 @@ struct cgroup_subsys_state *vmpressure_to_css(struct vmpressure *vmpr) } #ifdef CONFIG_MEMCG_KMEM -extern spinlock_t css_set_lock; +static DEFINE_SPINLOCK(objcg_lock); static void obj_cgroup_release(struct percpu_ref *ref) { @@ -285,13 +285,13 @@ static void obj_cgroup_release(struct percpu_ref *ref) WARN_ON_ONCE(nr_bytes & (PAGE_SIZE - 1)); nr_pages = nr_bytes >> PAGE_SHIFT; - spin_lock_irqsave(&css_set_lock, flags); + spin_lock_irqsave(&objcg_lock, flags); memcg = obj_cgroup_memcg(objcg); if (nr_pages) __memcg_kmem_uncharge(memcg, nr_pages); list_del(&objcg->list); mem_cgroup_put(memcg); - spin_unlock_irqrestore(&css_set_lock, flags); + spin_unlock_irqrestore(&objcg_lock, flags); percpu_ref_exit(ref); kfree_rcu(objcg, rcu); @@ -323,7 +323,7 @@ static void memcg_reparent_objcgs(struct mem_cgroup *memcg, objcg = rcu_replace_pointer(memcg->objcg, NULL, true); - spin_lock_irq(&css_set_lock); + spin_lock_irq(&objcg_lock); /* Move active objcg to the parent's list */ xchg(&objcg->memcg, parent); @@ -338,7 +338,7 @@ static void memcg_reparent_objcgs(struct mem_cgroup *memcg, } list_splice(&memcg->objcg_list, &parent->objcg_list); - spin_unlock_irq(&css_set_lock); + spin_unlock_irq(&objcg_lock); percpu_ref_kill(&objcg->refcnt); } @@ -1730,7 +1730,7 @@ static bool mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask, * A few threads which were not waiting at mutex_lock_killable() can * fail to bail out. Therefore, check again after holding oom_lock. */ - ret = should_force_charge() || out_of_memory(&oc); + ret = task_is_dying() || out_of_memory(&oc); unlock: mutex_unlock(&oom_lock); @@ -2684,6 +2684,7 @@ static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask, struct page_counter *counter; enum oom_status oom_status; unsigned long nr_reclaimed; + bool passed_oom = false; bool may_swap = true; bool drained = false; unsigned long pflags; @@ -2720,15 +2721,6 @@ retry: if (gfp_mask & __GFP_ATOMIC) goto force; - /* - * Unlike in global OOM situations, memcg is not in a physical - * memory shortage. Allow dying and OOM-killed tasks to - * bypass the last charges so that they can exit quickly and - * free their memory. - */ - if (unlikely(should_force_charge())) - goto force; - /* * Prevent unbounded recursion when reclaim operations need to * allocate memory. This might exceed the limits temporarily, @@ -2789,8 +2781,9 @@ retry: if (gfp_mask & __GFP_NOFAIL) goto force; - if (fatal_signal_pending(current)) - goto force; + /* Avoid endless loop for tasks bypassed by the oom killer */ + if (passed_oom && task_is_dying()) + goto nomem; /* * keep retrying as long as the memcg oom killer is able to make @@ -2799,14 +2792,10 @@ retry: */ oom_status = mem_cgroup_oom(mem_over_limit, gfp_mask, get_order(nr_pages * PAGE_SIZE)); - switch (oom_status) { - case OOM_SUCCESS: + if (oom_status == OOM_SUCCESS) { + passed_oom = true; nr_retries = MAX_RECLAIM_RETRIES; goto retry; - case OOM_FAILED: - goto force; - default: - goto nomem; } nomem: if (!(gfp_mask & __GFP_NOFAIL)) @@ -2894,6 +2883,7 @@ static void commit_charge(struct page *page, struct mem_cgroup *memcg) * - LRU isolation * - lock_page_memcg() * - exclusive reference + * - mem_cgroup_trylock_pages() */ page->mem_cgroup = memcg; } @@ -5265,6 +5255,7 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg) static void mem_cgroup_free(struct mem_cgroup *memcg) { + lru_gen_exit_memcg(memcg); memcg_wb_domain_exit(memcg); /* * Flush percpu vmstats and vmevents to guarantee the value correctness @@ -5340,6 +5331,7 @@ static struct mem_cgroup *mem_cgroup_alloc(void) #endif idr_replace(&mem_cgroup_idr, memcg, memcg->id.id); trace_android_vh_mem_cgroup_alloc(memcg); + lru_gen_init_memcg(memcg); return memcg; fail: mem_cgroup_id_remove(memcg); @@ -6234,6 +6226,29 @@ static void mem_cgroup_move_task(void) } #endif +#ifdef CONFIG_LRU_GEN +static void mem_cgroup_attach(struct cgroup_taskset *tset) +{ + struct cgroup_subsys_state *css; + struct task_struct *task = NULL; + + cgroup_taskset_for_each_leader(task, css, tset) + break; + + if (!task) + return; + + task_lock(task); + if (task->mm && task->mm->owner == task) + lru_gen_migrate_mm(task->mm); + task_unlock(task); +} +#else +static void mem_cgroup_attach(struct cgroup_taskset *tset) +{ +} +#endif /* CONFIG_LRU_GEN */ + /* * Cgroup retains root cgroups across [un]mount cycles making it necessary * to verify whether we're attached to the default hierarchy on each mount @@ -6586,6 +6601,7 @@ struct cgroup_subsys memory_cgrp_subsys = { .css_free = mem_cgroup_css_free, .css_reset = mem_cgroup_css_reset, .can_attach = mem_cgroup_can_attach, + .attach = mem_cgroup_attach, .cancel_attach = mem_cgroup_cancel_attach, .post_attach = mem_cgroup_move_task, .bind = mem_cgroup_bind, diff --git a/mm/memfd.c b/mm/memfd.c index 2647c898990c80491b512944a890d47c90f23aca..fae4142f7d25451e048d43a54bb8debcda4d64c4 100644 --- a/mm/memfd.c +++ b/mm/memfd.c @@ -31,20 +31,28 @@ static void memfd_tag_pins(struct xa_state *xas) { struct page *page; - unsigned int tagged = 0; + int latency = 0; + int cache_count; lru_add_drain(); xas_lock_irq(xas); xas_for_each(xas, page, ULONG_MAX) { - if (xa_is_value(page)) - continue; - page = find_subpage(page, xas->xa_index); - if (page_count(page) - page_mapcount(page) > 1) + cache_count = 1; + if (!xa_is_value(page) && + PageTransHuge(page) && !PageHuge(page)) + cache_count = HPAGE_PMD_NR; + + if (!xa_is_value(page) && + page_count(page) - total_mapcount(page) != cache_count) xas_set_mark(xas, MEMFD_TAG_PINNED); + if (cache_count != 1) + xas_set(xas, page->index + cache_count); - if (++tagged % XA_CHECK_SCHED) + latency += cache_count; + if (latency < XA_CHECK_SCHED) continue; + latency = 0; xas_pause(xas); xas_unlock_irq(xas); @@ -73,7 +81,8 @@ static int memfd_wait_for_pins(struct address_space *mapping) error = 0; for (scan = 0; scan <= LAST_SCAN; scan++) { - unsigned int tagged = 0; + int latency = 0; + int cache_count; if (!xas_marked(&xas, MEMFD_TAG_PINNED)) break; @@ -87,10 +96,14 @@ static int memfd_wait_for_pins(struct address_space *mapping) xas_lock_irq(&xas); xas_for_each_marked(&xas, page, ULONG_MAX, MEMFD_TAG_PINNED) { bool clear = true; - if (xa_is_value(page)) - continue; - page = find_subpage(page, xas.xa_index); - if (page_count(page) - page_mapcount(page) != 1) { + + cache_count = 1; + if (!xa_is_value(page) && + PageTransHuge(page) && !PageHuge(page)) + cache_count = HPAGE_PMD_NR; + + if (!xa_is_value(page) && cache_count != + page_count(page) - total_mapcount(page)) { /* * On the last scan, we clean up all those tags * we inserted; but make a note that we still @@ -103,8 +116,11 @@ static int memfd_wait_for_pins(struct address_space *mapping) } if (clear) xas_clear_mark(&xas, MEMFD_TAG_PINNED); - if (++tagged % XA_CHECK_SCHED) + + latency += cache_count; + if (latency < XA_CHECK_SCHED) continue; + latency = 0; xas_pause(&xas); xas_unlock_irq(&xas); diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 01445ddff58d80e6be202ef4a342763e2afb6ccf..aef267c6a7246369a799b296162a37a9f69a0f87 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1938,6 +1938,7 @@ retry: else if (ret == 0) if (soft_offline_free_page(page) && try_again) { try_again = false; + flags &= ~MF_COUNT_INCREASED; goto retry; } diff --git a/mm/memory.c b/mm/memory.c index 097bf967a0479796eb069293dc666a036188dec1..a038d72a8110600b59722f62cbb4d85d49dad7cf 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -126,18 +126,6 @@ int randomize_va_space __read_mostly = 2; #endif -#ifndef arch_faults_on_old_pte -static inline bool arch_faults_on_old_pte(void) -{ - /* - * Those arches which don't have hw access flag feature need to - * implement their own helper. By default, "true" means pagefault - * will be hit on old pte. - */ - return true; -} -#endif - #ifndef arch_wants_old_prefaulted_pte static inline bool arch_wants_old_prefaulted_pte(void) { @@ -172,21 +160,9 @@ static int __init init_zero_pfn(void) } early_initcall(init_zero_pfn); -/* - * Only trace rss_stat when there is a 512kb cross over. - * Smaller changes may be lost unless every small change is - * crossing into or returning to a 512kb boundary. - */ -#define TRACE_MM_COUNTER_THRESHOLD 128 - -void mm_trace_rss_stat(struct mm_struct *mm, int member, long count, - long value) +void mm_trace_rss_stat(struct mm_struct *mm, int member, long count) { - long thresh_mask = ~(TRACE_MM_COUNTER_THRESHOLD - 1); - - /* Threshold roll-over, trace it */ - if ((count & thresh_mask) != ((count - value) & thresh_mask)) - trace_rss_stat(mm, member, count); + trace_rss_stat(mm, member, count); } EXPORT_SYMBOL_GPL(mm_trace_rss_stat); @@ -2892,7 +2868,7 @@ static inline bool cow_user_page(struct page *dst, struct page *src, * On architectures with software "accessed" bits, we would * take a double page fault, so mark it accessed here. */ - if (arch_faults_on_old_pte() && !pte_young(vmf->orig_pte)) { + if (!arch_has_hw_pte_young() && !pte_young(vmf->orig_pte)) { pte_t entry; vmf->pte = pte_offset_map_lock(mm, vmf->pmd, addr, &vmf->ptl); @@ -3159,7 +3135,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf) */ if (!pte_map_lock(vmf)) { ret = VM_FAULT_RETRY; - goto out_free_new; + goto out_invalidate_end; } if (likely(pte_same(*vmf->pte, vmf->orig_pte))) { if (old_page) { @@ -3247,6 +3223,8 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf) put_page(old_page); } return page_copied ? VM_FAULT_WRITE : 0; +out_invalidate_end: + mmu_notifier_invalidate_range_only_end(&range); out_free_new: put_page(new_page); out: @@ -3622,7 +3600,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) if (data_race(si->flags & SWP_SYNCHRONOUS_IO) && __swap_count(entry) == 1) { /* skip swapcache */ - gfp_t flags = GFP_HIGHUSER_MOVABLE; + gfp_t flags = GFP_HIGHUSER_MOVABLE | __GFP_CMA; trace_android_rvh_set_skip_swapcache_flags(&flags); page = alloc_page_vma(flags, vma, vmf->address); @@ -3662,7 +3640,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) ret = VM_FAULT_RETRY; goto out; } else { - page = swapin_readahead(entry, GFP_HIGHUSER_MOVABLE, + page = swapin_readahead(entry, GFP_HIGHUSER_MOVABLE | __GFP_CMA, vmf); swapcache = page; } @@ -4159,8 +4137,17 @@ vm_fault_t finish_fault(struct vm_fault *vmf) return ret; } - if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd))) + if (vmf->prealloc_pte) { + vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); + if (likely(pmd_none(*vmf->pmd))) { + mm_inc_nr_ptes(vma->vm_mm); + pmd_populate(vma->vm_mm, vmf->pmd, vmf->prealloc_pte); + vmf->prealloc_pte = NULL; + } + spin_unlock(vmf->ptl); + } else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd))) { return VM_FAULT_OOM; + } } /* See comment in handle_pte_fault() */ @@ -4706,8 +4693,19 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf) goto unlock; } if (vmf->flags & FAULT_FLAG_WRITE) { - if (!pte_write(entry)) - return do_wp_page(vmf); + if (!pte_write(entry)) { + if (!(vmf->flags & FAULT_FLAG_SPECULATIVE)) + return do_wp_page(vmf); + + if (!mmu_notifier_trylock(vmf->vma->vm_mm)) { + ret = VM_FAULT_RETRY; + goto unlock; + } + + ret = do_wp_page(vmf); + mmu_notifier_unlock(vmf->vma->vm_mm); + return ret; + } entry = pte_mkdirty(entry); } entry = pte_mkyoung(entry); @@ -4894,6 +4892,10 @@ static inline void mm_account_fault(struct pt_regs *regs, else perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address); } + +static void lru_gen_enter_fault(struct vm_area_struct *vma); +static void lru_gen_exit_fault(void); + #ifdef CONFIG_SPECULATIVE_PAGE_FAULT #ifndef CONFIG_ARCH_HAS_PTE_SPECIAL @@ -5011,11 +5013,15 @@ static vm_fault_t ___handle_speculative_fault(struct mm_struct *mm, goto out_walk; p4d = p4d_offset(pgd, address); + if (pgd_val(READ_ONCE(*pgd)) != pgd_val(pgdval)) + goto out_walk; p4dval = READ_ONCE(*p4d); if (p4d_none(p4dval) || unlikely(p4d_bad(p4dval))) goto out_walk; vmf.pud = pud_offset(p4d, address); + if (p4d_val(READ_ONCE(*p4d)) != p4d_val(p4dval)) + goto out_walk; pudval = READ_ONCE(*vmf.pud); if (pud_none(pudval) || unlikely(pud_bad(pudval))) goto out_walk; @@ -5025,6 +5031,8 @@ static vm_fault_t ___handle_speculative_fault(struct mm_struct *mm, goto out_walk; vmf.pmd = pmd_offset(vmf.pud, address); + if (pud_val(READ_ONCE(*vmf.pud)) != pud_val(pudval)) + goto out_walk; vmf.orig_pmd = READ_ONCE(*vmf.pmd); /* * pmd_none could mean that a hugepage collapse is in progress @@ -5052,6 +5060,11 @@ static vm_fault_t ___handle_speculative_fault(struct mm_struct *mm, */ vmf.pte = pte_offset_map(vmf.pmd, address); + if (pmd_val(READ_ONCE(*vmf.pmd)) != pmd_val(vmf.orig_pmd)) { + pte_unmap(vmf.pte); + vmf.pte = NULL; + goto out_walk; + } vmf.orig_pte = READ_ONCE(*vmf.pte); barrier(); /* See comment in handle_pte_fault() */ if (pte_none(vmf.orig_pte)) { @@ -5074,7 +5087,9 @@ static vm_fault_t ___handle_speculative_fault(struct mm_struct *mm, } mem_cgroup_enter_user_fault(); + lru_gen_enter_fault(vmf.vma); ret = handle_pte_fault(&vmf); + lru_gen_exit_fault(); mem_cgroup_exit_user_fault(); if (ret != VM_FAULT_RETRY) { @@ -5150,6 +5165,27 @@ bool can_reuse_spf_vma(struct vm_area_struct *vma, unsigned long address) } #endif /* CONFIG_SPECULATIVE_PAGE_FAULT */ +#ifdef CONFIG_LRU_GEN +static void lru_gen_enter_fault(struct vm_area_struct *vma) +{ + /* the LRU algorithm doesn't apply to sequential or random reads */ + current->in_lru_fault = !(vma->vm_flags & (VM_SEQ_READ | VM_RAND_READ)); +} + +static void lru_gen_exit_fault(void) +{ + current->in_lru_fault = false; +} +#else +static void lru_gen_enter_fault(struct vm_area_struct *vma) +{ +} + +static void lru_gen_exit_fault(void) +{ +} +#endif /* CONFIG_LRU_GEN */ + /* * By the time we get here, we already hold the mm semaphore * @@ -5181,11 +5217,15 @@ vm_fault_t handle_mm_fault(struct vm_area_struct *vma, unsigned long address, if (flags & FAULT_FLAG_USER) mem_cgroup_enter_user_fault(); + lru_gen_enter_fault(vma); + if (unlikely(is_vm_hugetlb_page(vma))) ret = hugetlb_fault(vma->vm_mm, vma, address, flags); else ret = __handle_mm_fault(vma, address, flags); + lru_gen_exit_fault(); + if (flags & FAULT_FLAG_USER) { mem_cgroup_exit_user_fault(); /* @@ -5514,7 +5554,7 @@ int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, buf, maddr + offset, bytes); } kunmap(page); - put_user_page(page); + put_page(page); } len -= bytes; buf += bytes; diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 3803f2d8fab8ae6d9f06a6411392f73cdc1d4821..3fd619ab92e973679ae385955018c43afb067cc5 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1544,7 +1544,7 @@ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages) MEMORY_OFFLINE | REPORT_FAILURE, NULL); if (ret) { reason = "failure to isolate range"; - goto failed_removal; + goto failed_removal_lru_cache_disabled; } drain_all_pages(zone); @@ -1659,6 +1659,8 @@ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages) failed_removal_isolated: undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE); memory_notify(MEM_CANCEL_OFFLINE, &arg); +failed_removal_lru_cache_disabled: + lru_cache_enable(); failed_removal: pr_debug("memory offlining [mem %#010llx-%#010llx] failed due to %s\n", (unsigned long long) start_pfn << PAGE_SHIFT, diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 58abb6d96f43601a419c63d0f0946d01733699a1..3b2584088d2813a6da00857a9867b070b2b2b5b7 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -842,7 +842,7 @@ static int mbind_range(struct mm_struct *mm, unsigned long start, prev = vma_merge(mm, prev, vmstart, vmend, vma->vm_flags, vma->anon_vma, vma->vm_file, pgoff, new_pol, vma->vm_userfaultfd_ctx, - vma_get_anon_name(vma)); + anon_vma_name(vma)); if (prev) { vma = prev; next = vma->vm_next; @@ -2240,8 +2240,8 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, * memory with both reclaim and compact as well. */ if (!page && (gfp & __GFP_DIRECT_RECLAIM)) - page = __alloc_pages_node(hpage_node, - gfp, order); + page = __alloc_pages_nodemask(gfp, order, + hpage_node, nmask); goto out; } diff --git a/mm/migrate.c b/mm/migrate.c index 7137465caf4047c5e4ab77f581288902c2aedb9e..4d25efd7fca47dec6bf1633141ba3d9e3b71d5b2 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -105,7 +105,7 @@ int isolate_movable_page(struct page *page, isolate_mode_t mode) /* Driver shouldn't use PG_isolated bit of page->flags */ WARN_ON_ONCE(PageIsolated(page)); - __SetPageIsolated(page); + SetPageIsolated(page); unlock_page(page); return 0; @@ -129,7 +129,7 @@ void putback_movable_page(struct page *page) mapping = page_mapping(page); mapping->a_ops->putback_page(page); - __ClearPageIsolated(page); + ClearPageIsolated(page); } /* @@ -162,7 +162,7 @@ void putback_movable_pages(struct list_head *l) if (PageMovable(page)) putback_movable_page(page); else - __ClearPageIsolated(page); + ClearPageIsolated(page); unlock_page(page); put_page(page); } else { @@ -952,7 +952,7 @@ static int move_to_new_page(struct page *newpage, struct page *page, VM_BUG_ON_PAGE(!PageIsolated(page), page); if (!PageMovable(page)) { rc = MIGRATEPAGE_SUCCESS; - __ClearPageIsolated(page); + ClearPageIsolated(page); goto out; } @@ -974,7 +974,7 @@ static int move_to_new_page(struct page *newpage, struct page *page, * We clear PG_movable under page_lock so any compactor * cannot try to migrate this page. */ - __ClearPageIsolated(page); + ClearPageIsolated(page); } /* @@ -1160,7 +1160,7 @@ static int unmap_and_move(new_page_t get_new_page, if (unlikely(__PageMovable(page))) { lock_page(page); if (!PageMovable(page)) - __ClearPageIsolated(page); + ClearPageIsolated(page); unlock_page(page); } goto out; @@ -1215,7 +1215,7 @@ out: if (PageMovable(page)) putback_movable_page(page); else - __ClearPageIsolated(page); + ClearPageIsolated(page); unlock_page(page); put_page(page); } @@ -1655,7 +1655,7 @@ out_putpage: * isolate_lru_page() or drop the page ref if it was * not isolated. */ - put_user_page(page); + put_page(page); out: mmap_read_unlock(mm); return err; diff --git a/mm/mlock.c b/mm/mlock.c index 188aa7458c990c93a734f3c4e903b873cea8bf0c..49fe3d9ca2846e6178540057a9c4e0b17ff4ecfa 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -119,7 +119,7 @@ static bool __munlock_isolate_lru_page(struct page *page, bool getpage) if (getpage) get_page(page); ClearPageLRU(page); - del_page_from_lru_list(page, lruvec, page_lru(page)); + del_page_from_lru_list(page, lruvec); return true; } @@ -472,14 +472,6 @@ void munlock_vma_pages_range(struct vm_area_struct *vma, */ page = follow_page(vma, start, FOLL_GET | FOLL_DUMP); if (page && !IS_ERR(page)) { - /* - * munlock_vma_pages_range uses follow_page(FOLL_GET) - * so it need to use put_user_page but the munlock - * path is quite complicated to deal with each put - * sites correctly so just unattribute them to avoid - * false positive at this moment. - */ - reset_page_pinner(page, compound_order(page)); if (PageTransTail(page)) { VM_BUG_ON_PAGE(PageMlocked(page), page); put_page(page); /* follow_page_mask() */ @@ -550,7 +542,7 @@ static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev, pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); *prev = vma_merge(mm, *prev, start, end, newflags, vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma), - vma->vm_userfaultfd_ctx, vma_get_anon_name(vma)); + vma->vm_userfaultfd_ctx, anon_vma_name(vma)); if (*prev) { vma = *prev; goto success; diff --git a/mm/mm_init.c b/mm/mm_init.c index b06a30fbedff7f3432ffe4ef82f23ed213d54d14..9351e8a25a80ddf9477e216947b7414a08973c93 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -19,10 +19,6 @@ #ifdef CONFIG_DEBUG_MEMORY_INIT int __meminitdata mminit_loglevel; -#ifndef SECTIONS_SHIFT -#define SECTIONS_SHIFT 0 -#endif - /* The zonelists are simply reported, validation is manual. */ void __init mminit_verify_zonelist(void) { @@ -69,14 +65,16 @@ void __init mminit_verify_pageflags_layout(void) shift = 8 * sizeof(unsigned long); width = shift - SECTIONS_WIDTH - NODES_WIDTH - ZONES_WIDTH - - LAST_CPUPID_SHIFT - KASAN_TAG_WIDTH; + - LAST_CPUPID_SHIFT - KASAN_TAG_WIDTH - LRU_GEN_WIDTH - LRU_REFS_WIDTH; mminit_dprintk(MMINIT_TRACE, "pageflags_layout_widths", - "Section %d Node %d Zone %d Lastcpupid %d Kasantag %d Flags %d\n", + "Section %d Node %d Zone %d Lastcpupid %d Kasantag %d Gen %d Tier %d Flags %d\n", SECTIONS_WIDTH, NODES_WIDTH, ZONES_WIDTH, LAST_CPUPID_WIDTH, KASAN_TAG_WIDTH, + LRU_GEN_WIDTH, + LRU_REFS_WIDTH, NR_PAGEFLAGS); mminit_dprintk(MMINIT_TRACE, "pageflags_layout_shifts", "Section %d Node %d Zone %d Lastcpupid %d Kasantag %d\n", diff --git a/mm/mmap.c b/mm/mmap.c index b1ca729a11f237fa5dc13176c4e328844f620fb3..cb5fded3aa0b41b9f19f107399f794d233bf7277 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -1083,7 +1084,7 @@ again: static inline int is_mergeable_vma(struct vm_area_struct *vma, struct file *file, unsigned long vm_flags, struct vm_userfaultfd_ctx vm_userfaultfd_ctx, - const char __user *anon_name) + struct anon_vma_name *anon_name) { /* * VM_SOFTDIRTY should not prevent from VMA merging, if we @@ -1101,7 +1102,7 @@ static inline int is_mergeable_vma(struct vm_area_struct *vma, return 0; if (!is_mergeable_vm_userfaultfd_ctx(vma, vm_userfaultfd_ctx)) return 0; - if (vma_get_anon_name(vma) != anon_name) + if (!anon_vma_name_eq(anon_vma_name(vma), anon_name)) return 0; return 1; } @@ -1136,7 +1137,7 @@ can_vma_merge_before(struct vm_area_struct *vma, unsigned long vm_flags, struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff, struct vm_userfaultfd_ctx vm_userfaultfd_ctx, - const char __user *anon_name) + struct anon_vma_name *anon_name) { if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx, anon_name) && is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) { @@ -1158,7 +1159,7 @@ can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags, struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff, struct vm_userfaultfd_ctx vm_userfaultfd_ctx, - const char __user *anon_name) + struct anon_vma_name *anon_name) { if (is_mergeable_vma(vma, file, vm_flags, vm_userfaultfd_ctx, anon_name) && is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) { @@ -1219,7 +1220,7 @@ struct vm_area_struct *__vma_merge(struct mm_struct *mm, struct anon_vma *anon_vma, struct file *file, pgoff_t pgoff, struct mempolicy *policy, struct vm_userfaultfd_ctx vm_userfaultfd_ctx, - const char __user *anon_name, bool keep_locked) + struct anon_vma_name *anon_name, bool keep_locked) { pgoff_t pglen = (end - addr) >> PAGE_SHIFT; struct vm_area_struct *area, *next; @@ -1249,8 +1250,7 @@ struct vm_area_struct *__vma_merge(struct mm_struct *mm, mpol_equal(vma_policy(prev), policy) && can_vma_merge_after(prev, vm_flags, anon_vma, file, pgoff, - vm_userfaultfd_ctx, - anon_name)) { + vm_userfaultfd_ctx, anon_name)) { /* * OK, it can. Can we now merge in the successor as well? */ @@ -1259,8 +1259,7 @@ struct vm_area_struct *__vma_merge(struct mm_struct *mm, can_vma_merge_before(next, vm_flags, anon_vma, file, pgoff+pglen, - vm_userfaultfd_ctx, - anon_name) && + vm_userfaultfd_ctx, anon_name) && is_mergeable_anon_vma(prev->anon_vma, next->anon_vma, NULL)) { /* cases 1, 6 */ @@ -1284,8 +1283,7 @@ struct vm_area_struct *__vma_merge(struct mm_struct *mm, mpol_equal(policy, vma_policy(next)) && can_vma_merge_before(next, vm_flags, anon_vma, file, pgoff+pglen, - vm_userfaultfd_ctx, - anon_name)) { + vm_userfaultfd_ctx, anon_name)) { if (prev && addr < prev->vm_end) /* case 4 */ err = __vma_adjust(prev, prev->vm_start, addr, prev->vm_pgoff, NULL, next, @@ -1890,8 +1888,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr, */ if (unlikely(vm_flags != vma->vm_flags && prev)) { merge = vma_merge(mm, prev, vma->vm_start, vma->vm_end, vma->vm_flags, - NULL, vma->vm_file, vma->vm_pgoff, NULL, NULL_VM_UFFD_CTX, - vma_get_anon_name(vma)); + NULL, vma->vm_file, vma->vm_pgoff, NULL, NULL_VM_UFFD_CTX, NULL); if (merge) { /* ->mmap() can change vma->vm_file and fput the original file. So * fput the vma->vm_file here or we would add an extra fput for file @@ -3295,10 +3292,9 @@ void exit_mmap(struct mm_struct *mm) (void)__oom_reap_task_mm(mm); set_bit(MMF_OOM_SKIP, &mm->flags); - mmap_write_lock(mm); - mmap_write_unlock(mm); } + mmap_write_lock(mm); if (mm->locked_vm) { vma = mm->mmap; while (vma) { @@ -3311,8 +3307,11 @@ void exit_mmap(struct mm_struct *mm) arch_exit_mmap(mm); vma = mm->mmap; - if (!vma) /* Can happen if dup_mmap() received an OOM */ + if (!vma) { + /* Can happen if dup_mmap() received an OOM */ + mmap_write_unlock(mm); return; + } lru_add_drain(); flush_cache_mm(mm); @@ -3323,16 +3322,14 @@ void exit_mmap(struct mm_struct *mm) free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, USER_PGTABLES_CEILING); tlb_finish_mmu(&tlb, 0, -1); - /* - * Walk the list again, actually closing and freeing it, - * with preemption enabled, without holding any MM locks. - */ + /* Walk the list again, actually closing and freeing it. */ while (vma) { if (vma->vm_flags & VM_ACCOUNT) nr_accounted += vma_pages(vma); vma = remove_vma(vma); cond_resched(); } + mmap_write_unlock(mm); vm_unacct_memory(nr_accounted); } @@ -3413,7 +3410,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, new_vma = __vma_merge(mm, prev, addr, addr + len, vma->vm_flags, vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma), vma->vm_userfaultfd_ctx, - vma_get_anon_name(vma), true); + anon_vma_name(vma), true); if (new_vma) { /* * Source vma may have been merged into new_vma diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c index 07f42a7a60657adf069483c6981c642d67ba78f1..ce161cd0c8a1051c831165bf353588a0e4d67a31 100644 --- a/mm/mmu_notifier.c +++ b/mm/mmu_notifier.c @@ -621,6 +621,25 @@ void __mmu_notifier_invalidate_range(struct mm_struct *mm, srcu_read_unlock(&srcu, id); } +#ifdef CONFIG_SPECULATIVE_PAGE_FAULT + +static inline void mmu_notifier_write_lock(struct mm_struct *mm) +{ + percpu_down_write(mm->mmu_notifier_lock); +} + +static inline void mmu_notifier_write_unlock(struct mm_struct *mm) +{ + percpu_up_write(mm->mmu_notifier_lock); +} + +#else /* CONFIG_SPECULATIVE_PAGE_FAULT */ + +static inline void mmu_notifier_write_lock(struct mm_struct *mm) {} +static inline void mmu_notifier_write_unlock(struct mm_struct *mm) {} + +#endif /* CONFIG_SPECULATIVE_PAGE_FAULT */ + /* * Same as mmu_notifier_register but here the caller must hold the mmap_lock in * write mode. A NULL mn signals the notifier is being registered for itree @@ -661,9 +680,13 @@ int __mmu_notifier_register(struct mmu_notifier *subscription, INIT_HLIST_HEAD(&subscriptions->deferred_list); } + mmu_notifier_write_lock(mm); + ret = mm_take_all_locks(mm); - if (unlikely(ret)) + if (unlikely(ret)) { + mmu_notifier_write_unlock(mm); goto out_clean; + } /* * Serialize the update against mmu_notifier_unregister. A @@ -698,6 +721,7 @@ int __mmu_notifier_register(struct mmu_notifier *subscription, mm->notifier_subscriptions->has_itree = true; mm_drop_all_locks(mm); + mmu_notifier_write_unlock(mm); BUG_ON(atomic_read(&mm->mm_users) <= 0); return 0; diff --git a/mm/mmzone.c b/mm/mmzone.c index 2241281ffe1677289a553eec5cbc7c21f769fdc9..1470eea5454aee86616a0788bccc740a39501fe8 100644 --- a/mm/mmzone.c +++ b/mm/mmzone.c @@ -96,6 +96,8 @@ void lruvec_init(struct lruvec *lruvec) for_each_lru(lru) INIT_LIST_HEAD(&lruvec->lists[lru]); + + lru_gen_init_lruvec(lruvec); } #if defined(CONFIG_NUMA_BALANCING) && !defined(LAST_CPUPID_NOT_IN_PAGE_FLAGS) diff --git a/mm/mprotect.c b/mm/mprotect.c index 05073d7abff48c2504c4621a4f128d49d6385b46..b7f744ce029537ff84577bb6146a1c5469e6fefa 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -35,51 +35,6 @@ #include "internal.h" -/* Determine whether we can avoid taking write faults for known dirty pages. */ -static bool may_avoid_write_fault(pte_t pte, struct vm_area_struct *vma, - unsigned long cp_flags) -{ - /* - * The dirty accountable bit indicates that we can always make the page - * writable regardless of the number of references. - */ - if (!(cp_flags & MM_CP_DIRTY_ACCT)) { - /* Otherwise, we must have exclusive access to the page. */ - if (!(vma_is_anonymous(vma) && (vma->vm_flags & VM_WRITE))) - return false; - - if (page_count(pte_page(pte)) != 1) - return false; - } - - /* - * Don't do this optimization for clean pages as we need to be notified - * of the transition from clean to dirty. - */ - if (!pte_dirty(pte)) - return false; - - /* Same for softdirty. */ - if (!pte_soft_dirty(pte) && (vma->vm_flags & VM_SOFTDIRTY)) - return false; - - /* - * For userfaultfd the user program needs to monitor write faults so we - * can't do this optimization. - */ - if (pte_uffd_wp(pte)) - return false; - - /* - * It is unclear whether this optimization can be done safely for NUMA - * pages. - */ - if (cp_flags & MM_CP_PROT_NUMA) - return false; - - return true; -} - static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, unsigned long end, pgprot_t newprot, unsigned long cp_flags) @@ -88,6 +43,7 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, spinlock_t *ptl; unsigned long pages = 0; int target_node = NUMA_NO_NODE; + bool dirty_accountable = cp_flags & MM_CP_DIRTY_ACCT; bool prot_numa = cp_flags & MM_CP_PROT_NUMA; bool uffd_wp = cp_flags & MM_CP_UFFD_WP; bool uffd_wp_resolve = cp_flags & MM_CP_UFFD_WP_RESOLVE; @@ -138,7 +94,7 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, /* Also skip shared copy-on-write pages */ if (is_cow_mapping(vma->vm_flags) && - page_mapcount(page) != 1) + page_count(page) != 1) continue; /* @@ -175,8 +131,12 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd, ptent = pte_clear_uffd_wp(ptent); } - if (may_avoid_write_fault(ptent, vma, cp_flags)) + /* Avoid taking write faults for known dirty pages */ + if (dirty_accountable && pte_dirty(ptent) && + (pte_soft_dirty(ptent) || + !(vma->vm_flags & VM_SOFTDIRTY))) { ptent = pte_mkwrite(ptent); + } ptep_modify_prot_commit(vma, addr, pte, oldpte, ptent); pages++; } else if (is_swap_pte(oldpte)) { @@ -494,7 +454,7 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev, pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); *pprev = vma_merge(mm, *pprev, start, end, newflags, vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma), - vma->vm_userfaultfd_ctx, vma_get_anon_name(vma)); + vma->vm_userfaultfd_ctx, anon_vma_name(vma)); if (*pprev) { vma = *pprev; VM_WARN_ON((vma->vm_flags ^ newflags) & ~VM_SOFTDIRTY); diff --git a/mm/oom_kill.c b/mm/oom_kill.c index acf064f3fc4eba74c878175b560fa650411f83af..e722c6877faf39e1d900eb9792ff2c3813021b3c 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -1172,27 +1173,103 @@ bool out_of_memory(struct oom_control *oc) } /* - * The pagefault handler calls here because it is out of memory, so kill a - * memory-hogging task. If oom_lock is held by somebody else, a parallel oom - * killing is already in progress so do nothing. + * The pagefault handler calls here because some allocation has failed. We have + * to take care of the memcg OOM here because this is the only safe context without + * any locks held but let the oom killer triggered from the allocation context care + * about the global OOM. */ void pagefault_out_of_memory(void) { - struct oom_control oc = { - .zonelist = NULL, - .nodemask = NULL, - .memcg = NULL, - .gfp_mask = 0, - .order = 0, - }; + static DEFINE_RATELIMIT_STATE(pfoom_rs, DEFAULT_RATELIMIT_INTERVAL, + DEFAULT_RATELIMIT_BURST); if (mem_cgroup_oom_synchronize(true)) return; - if (!mutex_trylock(&oom_lock)) + if (fatal_signal_pending(current)) return; - out_of_memory(&oc); - mutex_unlock(&oom_lock); + + if (__ratelimit(&pfoom_rs)) + pr_warn("Huh VM_FAULT_OOM leaked out to the #PF handler. Retrying PF\n"); +} + +SYSCALL_DEFINE2(process_mrelease, int, pidfd, unsigned int, flags) +{ +#ifdef CONFIG_MMU + struct mm_struct *mm = NULL; + struct task_struct *task; + struct task_struct *p; + unsigned int f_flags; + bool reap = false; + struct pid *pid; + long ret = 0; + + if (flags) + return -EINVAL; + + pid = pidfd_get_pid(pidfd, &f_flags); + if (IS_ERR(pid)) + return PTR_ERR(pid); + + task = get_pid_task(pid, PIDTYPE_TGID); + if (!task) { + ret = -ESRCH; + goto put_pid; + } + + /* + * Make sure to choose a thread which still has a reference to mm + * during the group exit + */ + p = find_lock_task_mm(task); + if (!p) { + ret = -ESRCH; + goto put_task; + } + + mm = p->mm; + mmgrab(mm); + + /* + * If we are too late and exit_mmap already checked mm_is_oom_victim + * then will block on mmap_read_lock until exit_mmap releases mmap_lock + */ + set_bit(MMF_OOM_VICTIM, &mm->flags); + + if (task_will_free_mem(p)) + reap = true; + else { + /* Error only if the work has not been done already */ + if (!test_bit(MMF_OOM_SKIP, &mm->flags)) + ret = -EINVAL; + } + task_unlock(p); + + if (!reap) + goto drop_mm; + + if (mmap_read_lock_killable(mm)) { + ret = -EINTR; + goto drop_mm; + } + /* + * Check MMF_OOM_SKIP again under mmap_read_lock protection to ensure + * possible change in exit_mmap is seen + */ + if (!test_bit(MMF_OOM_SKIP, &mm->flags) && !__oom_reap_task_mm(mm)) + ret = -EAGAIN; + mmap_read_unlock(mm); + +drop_mm: + mmdrop(mm); +put_task: + put_task_struct(task); +put_pid: + put_pid(pid); + return ret; +#else + return -ENOSYS; +#endif /* CONFIG_MMU */ } void add_to_oom_reaper(struct task_struct *p) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index e18561a8c5cd7951292e00aa3fb4d900e63d47a6..7041645cfe55d76ac87e1d76bf75eefaf1cef9f3 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -330,11 +330,6 @@ compound_page_dtor * const compound_page_dtors[NR_COMPOUND_DTORS] = { #endif }; -/* - * Try to keep at least this much lowmem free. Do not allow normal - * allocations below this point, only high priority ones. Automatically - * tuned according to the amount of memory in the system. - */ int min_free_kbytes = 1024; int user_min_free_kbytes = -1; #ifdef CONFIG_DISCONTIGMEM @@ -353,13 +348,6 @@ int watermark_boost_factor __read_mostly = 15000; #endif int watermark_scale_factor = 10; -/* - * Extra memory for the system to try freeing. Used to temporarily - * free memory, to make space for new workloads. Anyone can allocate - * down to the min watermarks controlled by min_free_kbytes above. - */ -int extra_free_kbytes = 0; - static unsigned long nr_kernel_pages __initdata; static unsigned long nr_all_pages __initdata; static unsigned long dma_reserve __initdata; @@ -394,25 +382,9 @@ int page_group_by_mobility_disabled __read_mostly; */ static DEFINE_STATIC_KEY_TRUE(deferred_pages); -/* - * Calling kasan_poison_pages() only after deferred memory initialization - * has completed. Poisoning pages during deferred memory init will greatly - * lengthen the process and cause problem in large memory systems as the - * deferred pages initialization is done with interrupt disabled. - * - * Assuming that there will be no reference to those newly initialized - * pages before they are ever allocated, this should have no effect on - * KASAN memory tracking as the poison will be properly inserted at page - * allocation time. The only corner case is when pages are allocated by - * on-demand allocation and then freed again before the deferred pages - * initialization is done, but this is not likely to happen. - */ -static inline bool should_skip_kasan_poison(struct page *page, fpi_t fpi_flags) +static inline bool deferred_pages_enabled(void) { - return static_branch_unlikely(&deferred_pages) || - (!IS_ENABLED(CONFIG_KASAN_GENERIC) && - (fpi_flags & FPI_SKIP_KASAN_POISON)) || - PageSkipKASanPoison(page); + return static_branch_unlikely(&deferred_pages); } /* Returns true if the struct page for the pfn is uninitialised */ @@ -463,11 +435,9 @@ defer_init(int nid, unsigned long pfn, unsigned long end_pfn) return false; } #else -static inline bool should_skip_kasan_poison(struct page *page, fpi_t fpi_flags) +static inline bool deferred_pages_enabled(void) { - return (!IS_ENABLED(CONFIG_KASAN_GENERIC) && - (fpi_flags & FPI_SKIP_KASAN_POISON)) || - PageSkipKASanPoison(page); + return false; } static inline bool early_page_uninitialised(unsigned long pfn) @@ -1252,16 +1222,39 @@ out: return ret; } -static void kernel_init_free_pages(struct page *page, int numpages, bool zero_tags) +/* + * Skip KASAN memory poisoning when either: + * + * 1. Deferred memory initialization has not yet completed, + * see the explanation below. + * 2. Skipping poisoning is requested via FPI_SKIP_KASAN_POISON, + * see the comment next to it. + * 3. Skipping poisoning is requested via __GFP_SKIP_KASAN_POISON, + * see the comment next to it. + * + * Poisoning pages during deferred memory init will greatly lengthen the + * process and cause problem in large memory systems as the deferred pages + * initialization is done with interrupt disabled. + * + * Assuming that there will be no reference to those newly initialized + * pages before they are ever allocated, this should have no effect on + * KASAN memory tracking as the poison will be properly inserted at page + * allocation time. The only corner case is when pages are allocated by + * on-demand allocation and then freed again before the deferred pages + * initialization is done, but this is not likely to happen. + */ +static inline bool should_skip_kasan_poison(struct page *page, fpi_t fpi_flags) +{ + return deferred_pages_enabled() || + (!IS_ENABLED(CONFIG_KASAN_GENERIC) && + (fpi_flags & FPI_SKIP_KASAN_POISON)) || + PageSkipKASanPoison(page); +} + +static void kernel_init_free_pages(struct page *page, int numpages) { int i; - if (zero_tags) { - for (i = 0; i < numpages; i++) - tag_clear_highpage(page + i); - return; - } - /* s390's use of memset() could override KASAN redzones. */ kasan_disable_current(); for (i = 0; i < numpages; i++) { @@ -1277,7 +1270,7 @@ static __always_inline bool free_pages_prepare(struct page *page, unsigned int order, bool check_free, fpi_t fpi_flags) { int bad = 0; - bool skip_kasan_poison = should_skip_kasan_poison(page, fpi_flags); + bool init = want_init_on_free(); VM_BUG_ON_PAGE(PageTail(page), page); @@ -1342,23 +1335,21 @@ static __always_inline bool free_pages_prepare(struct page *page, /* * As memory initialization might be integrated into KASAN, - * kasan_free_pages and kernel_init_free_pages must be + * KASAN poisoning and memory initialization code must be * kept together to avoid discrepancies in behavior. * * With hardware tag-based KASAN, memory tags must be set before the * page becomes unavailable via debug_pagealloc or arch_free_page. */ - if (kasan_has_integrated_init()) { - if (!skip_kasan_poison) - kasan_free_pages(page, order); - } else { - bool init = want_init_on_free(); + if (!should_skip_kasan_poison(page, fpi_flags)) { + kasan_poison_pages(page, order, init); - if (init) - kernel_init_free_pages(page, 1 << order, false); - if (!skip_kasan_poison) - kasan_poison_pages(page, order, init); + /* Memory is already initialized if KASAN did it internally. */ + if (kasan_has_integrated_init()) + init = false; } + if (init) + kernel_init_free_pages(page, 1 << order); /* * arch_free_page() can make the page's contents inaccessible. s390 @@ -2350,9 +2341,43 @@ static bool check_new_pages(struct page *page, unsigned int order) return false; } +static inline bool should_skip_kasan_unpoison(gfp_t flags, bool init_tags) +{ + /* Don't skip if a software KASAN mode is enabled. */ + if (IS_ENABLED(CONFIG_KASAN_GENERIC) || + IS_ENABLED(CONFIG_KASAN_SW_TAGS)) + return false; + + /* Skip, if hardware tag-based KASAN is not enabled. */ + if (!kasan_hw_tags_enabled()) + return true; + + /* + * With hardware tag-based KASAN enabled, skip if either: + * + * 1. Memory tags have already been cleared via tag_clear_highpage(). + * 2. Skipping has been requested via __GFP_SKIP_KASAN_UNPOISON. + */ + return init_tags || (flags & __GFP_SKIP_KASAN_UNPOISON); +} + +static inline bool should_skip_init(gfp_t flags) +{ + /* Don't skip, if hardware tag-based KASAN is not enabled. */ + if (!kasan_hw_tags_enabled()) + return false; + + /* For hardware tag-based KASAN, skip if requested. */ + return (flags & __GFP_SKIP_ZERO); +} + inline void post_alloc_hook(struct page *page, unsigned int order, gfp_t gfp_flags) { + bool init = !want_init_on_free() && want_init_on_alloc(gfp_flags) && + !should_skip_init(gfp_flags); + bool init_tags = init && (gfp_flags & __GFP_ZEROTAGS); + set_page_private(page, 0); set_page_refcounted(page); @@ -2368,19 +2393,38 @@ inline void post_alloc_hook(struct page *page, unsigned int order, /* * As memory initialization might be integrated into KASAN, - * kasan_alloc_pages and kernel_init_free_pages must be + * KASAN unpoisoning and memory initializion code must be * kept together to avoid discrepancies in behavior. */ - if (kasan_has_integrated_init()) { - kasan_alloc_pages(page, order, gfp_flags); - } else { - bool init = !want_init_on_free() && want_init_on_alloc(gfp_flags); + /* + * If memory tags should be zeroed (which happens only when memory + * should be initialized as well). + */ + if (init_tags) { + int i; + + /* Initialize both memory and tags. */ + for (i = 0; i != 1 << order; ++i) + tag_clear_highpage(page + i); + + /* Note that memory is already initialized by the loop above. */ + init = false; + } + if (!should_skip_kasan_unpoison(gfp_flags, init_tags)) { + /* Unpoison shadow memory or set memory tags. */ kasan_unpoison_pages(page, order, init); - if (init) - kernel_init_free_pages(page, 1 << order, - gfp_flags & __GFP_ZEROTAGS); + + /* Note that memory is already initialized by KASAN. */ + if (kasan_has_integrated_init()) + init = false; } + /* If memory is still not initialized, do it now. */ + if (init) + kernel_init_free_pages(page, 1 << order); + /* Propagate __GFP_SKIP_KASAN_POISON to page flags. */ + if (kasan_hw_tags_enabled() && (gfp_flags & __GFP_SKIP_KASAN_POISON)) + SetPageSkipKASanPoison(page); set_page_owner(page, order, gfp_flags); } @@ -3306,19 +3350,20 @@ static void free_unref_page_commit(struct page *page, unsigned long pfn) __count_vm_event(PGFREE); /* - * We only track unmovable, reclaimable and movable on pcp lists. + * We only track unmovable, reclaimable, movable and cma on pcp lists. * Free ISOLATE pages back to the allocator because they are being * offlined but treat HIGHATOMIC as movable pages so we can get those * areas back if necessary. Otherwise, we may have to free * excessively into the page allocator */ - if (migratetype >= MIGRATE_PCPTYPES) { + if (migratetype > MIGRATE_RECLAIMABLE) { if (unlikely(is_migrate_isolate(migratetype))) { free_one_page(zone, page, pfn, 0, migratetype, FPI_NONE); return; } - migratetype = MIGRATE_MOVABLE; + if (migratetype == MIGRATE_HIGHATOMIC) + migratetype = MIGRATE_MOVABLE; } pcp = &this_cpu_ptr(zone->pageset)->pcp; @@ -4118,7 +4163,9 @@ void warn_alloc(gfp_t gfp_mask, nodemask_t *nodemask, const char *fmt, ...) va_list args; static DEFINE_RATELIMIT_STATE(nopage_rs, 10*HZ, 1); - if ((gfp_mask & __GFP_NOWARN) || !__ratelimit(&nopage_rs)) + if ((gfp_mask & __GFP_NOWARN) || + !__ratelimit(&nopage_rs) || + ((gfp_mask & __GFP_DMA) && !has_managed_dma())) return; va_start(args, fmt); @@ -4476,13 +4523,12 @@ __perform_reclaim(gfp_t gfp_mask, unsigned int order, const struct alloc_context *ac) { unsigned int noreclaim_flag; - unsigned long pflags, progress; + unsigned long progress; cond_resched(); /* We now go into synchronous reclaim */ cpuset_memory_pressure_bump(); - psi_memstall_enter(&pflags); fs_reclaim_acquire(gfp_mask); noreclaim_flag = memalloc_noreclaim_save(); @@ -4491,7 +4537,6 @@ __perform_reclaim(gfp_t gfp_mask, unsigned int order, memalloc_noreclaim_restore(noreclaim_flag); fs_reclaim_release(gfp_mask); - psi_memstall_leave(&pflags); cond_resched(); @@ -4505,11 +4550,13 @@ __alloc_pages_direct_reclaim(gfp_t gfp_mask, unsigned int order, unsigned long *did_some_progress) { struct page *page = NULL; + unsigned long pflags; bool drained = false; + psi_memstall_enter(&pflags); *did_some_progress = __perform_reclaim(gfp_mask, order, ac); if (unlikely(!(*did_some_progress))) - return NULL; + goto out; retry: page = get_page_from_freelist(gfp_mask, order, alloc_flags, ac); @@ -4525,6 +4572,8 @@ retry: drained = true; goto retry; } +out: + psi_memstall_leave(&pflags); return page; } @@ -8005,7 +8054,6 @@ static void setup_per_zone_lowmem_reserve(void) static void __setup_per_zone_wmarks(void) { unsigned long pages_min = min_free_kbytes >> (PAGE_SHIFT - 10); - unsigned long pages_low = extra_free_kbytes >> (PAGE_SHIFT - 10); unsigned long lowmem_pages = 0; struct zone *zone; unsigned long flags; @@ -8017,13 +8065,11 @@ static void __setup_per_zone_wmarks(void) } for_each_zone(zone) { - u64 tmp, low; + u64 tmp; spin_lock_irqsave(&zone->lock, flags); tmp = (u64)pages_min * zone_managed_pages(zone); do_div(tmp, lowmem_pages); - low = (u64)pages_low * zone_managed_pages(zone); - do_div(low, nr_free_zone_pages(gfp_zone(GFP_HIGHUSER_MOVABLE))); if (is_highmem(zone)) { /* * __GFP_HIGH and PF_MEMALLOC allocations usually don't @@ -8057,8 +8103,8 @@ static void __setup_per_zone_wmarks(void) watermark_scale_factor, 10000)); zone->watermark_boost = 0; - zone->_watermark[WMARK_LOW] = min_wmark_pages(zone) + low + tmp; - zone->_watermark[WMARK_HIGH] = min_wmark_pages(zone) + low + tmp * 2; + zone->_watermark[WMARK_LOW] = min_wmark_pages(zone) + tmp; + zone->_watermark[WMARK_HIGH] = min_wmark_pages(zone) + tmp * 2; spin_unlock_irqrestore(&zone->lock, flags); } @@ -8143,7 +8189,7 @@ postcore_initcall(init_per_zone_wmark_min) /* * min_free_kbytes_sysctl_handler - just a wrapper around proc_dointvec() so * that we can call two helper functions whenever min_free_kbytes - * or extra_free_kbytes changes. + * changes. */ int min_free_kbytes_sysctl_handler(struct ctl_table *table, int write, void *buffer, size_t *length, loff_t *ppos) @@ -8670,7 +8716,12 @@ static int __alloc_contig_migrate_range(struct compact_control *cc, if (ret < 0) { if (ret == -EBUSY) { alloc_contig_dump_pages(&cc->migratepages); - page_pinner_mark_migration_failed_pages(&cc->migratepages); + list_for_each_entry(page, &cc->migratepages, lru) { + /* The page will be freed by putback_movable_pages soon */ + if (page_count(page) == 1) + continue; + page_pinner_failure_detect(page); + } } if (!list_empty(&cc->migratepages)) { @@ -9128,3 +9179,18 @@ bool take_page_off_buddy(struct page *page) return ret; } #endif + +#ifdef CONFIG_ZONE_DMA +bool has_managed_dma(void) +{ + struct pglist_data *pgdat; + + for_each_online_pgdat(pgdat) { + struct zone *zone = &pgdat->node_zones[ZONE_DMA]; + + if (managed_zone(zone)) + return true; + } + return false; +} +#endif /* CONFIG_ZONE_DMA */ diff --git a/mm/page_ext.c b/mm/page_ext.c index 7e44726b3549511bda09ef2d66cb35efd3c0208a..e5e31ff1adba5cf1751b420e4ab634c50fa22b1d 100644 --- a/mm/page_ext.c +++ b/mm/page_ext.c @@ -58,11 +58,21 @@ * can utilize this callback to initialize the state of it correctly. */ +#if defined(CONFIG_PAGE_IDLE_FLAG) && !defined(CONFIG_64BIT) +static bool need_page_idle(void) +{ + return true; +} +struct page_ext_operations page_idle_ops = { + .need = need_page_idle, +}; +#endif + static struct page_ext_operations *page_ext_ops[] = { #ifdef CONFIG_PAGE_OWNER &page_owner_ops, #endif -#if defined(CONFIG_IDLE_PAGE_TRACKING) && !defined(CONFIG_64BIT) +#if defined(CONFIG_PAGE_IDLE_FLAG) && !defined(CONFIG_64BIT) &page_idle_ops, #endif #ifdef CONFIG_PAGE_PINNER diff --git a/mm/page_idle.c b/mm/page_idle.c index 057c61df12dba277af673ee2c5e68ab6232fa9a6..144fb4ed961d79f3a6377d37ebabab69e1cf2398 100644 --- a/mm/page_idle.c +++ b/mm/page_idle.c @@ -211,16 +211,6 @@ static const struct attribute_group page_idle_attr_group = { .name = "page_idle", }; -#ifndef CONFIG_64BIT -static bool need_page_idle(void) -{ - return true; -} -struct page_ext_operations page_idle_ops = { - .need = need_page_idle, -}; -#endif - static int __init page_idle_init(void) { int err; diff --git a/mm/page_pinner.c b/mm/page_pinner.c index 3f5c48af3135271f3e6109d94e002eb6b22a65ba..b81d48fa65fdb9dc6ce414931b353737029e74bd 100644 --- a/mm/page_pinner.c +++ b/mm/page_pinner.c @@ -15,19 +15,25 @@ #include "internal.h" #define PAGE_PINNER_STACK_DEPTH 16 -#define LONGTERM_PIN_BUCKETS 4096 +static unsigned long pp_buf_size = 4096; struct page_pinner { depot_stack_handle_t handle; - s64 ts_usec; + u64 ts_usec; atomic_t count; }; +enum pp_state { + PP_PUT, + PP_FREE, + PP_FAIL_DETECTED, +}; + struct captured_pinner { depot_stack_handle_t handle; union { - s64 ts_usec; - s64 elapsed; + u64 ts_usec; + u64 elapsed; }; /* struct page fields */ @@ -36,24 +42,17 @@ struct captured_pinner { int mapcount; struct address_space *mapping; unsigned long flags; + enum pp_state state; }; -struct longterm_pinner { +struct page_pinner_buffer { spinlock_t lock; - unsigned int index; - struct captured_pinner pinner[LONGTERM_PIN_BUCKETS]; + unsigned long index; + struct captured_pinner *buffer; }; -static struct longterm_pinner lt_pinner = { - .lock = __SPIN_LOCK_UNLOCKED(lt_pinner.lock), -}; - -static s64 threshold_usec = 300000; - /* alloc_contig failed pinner */ -static struct longterm_pinner acf_pinner = { - .lock = __SPIN_LOCK_UNLOCKED(acf_pinner.lock), -}; +static struct page_pinner_buffer pp_buffer; static bool page_pinner_enabled; DEFINE_STATIC_KEY_FALSE(page_pinner_inited); @@ -128,96 +127,61 @@ static void capture_page_state(struct page *page, record->mapcount = page_mapcount(page); } -static void check_longterm_pin(struct page_pinner *page_pinner, - struct page *page) +static void add_record(struct page_pinner_buffer *pp_buf, + struct captured_pinner *record) { - s64 now, delta = 0; unsigned long flags; unsigned int idx; - struct captured_pinner record; - - now = ktime_to_us(ktime_get_boottime()); - - /* get/put_page can be raced. Ignore that case */ - if (page_pinner->ts_usec < now) - delta = now - page_pinner->ts_usec; - if (delta <= threshold_usec) - return; - - record.handle = page_pinner->handle; - record.elapsed = delta; - capture_page_state(page, &record); - - spin_lock_irqsave(<_pinner.lock, flags); - idx = lt_pinner.index++; - lt_pinner.index %= LONGTERM_PIN_BUCKETS; - lt_pinner.pinner[idx] = record; - spin_unlock_irqrestore(<_pinner.lock, flags); + spin_lock_irqsave(&pp_buf->lock, flags); + idx = pp_buf->index++; + pp_buf->index %= pp_buf_size; + pp_buf->buffer[idx] = *record; + spin_unlock_irqrestore(&pp_buf->lock, flags); } -void __reset_page_pinner(struct page *page, unsigned int order, bool free) +void __free_page_pinner(struct page *page, unsigned int order) { struct page_pinner *page_pinner; struct page_ext *page_ext; int i; + /* free_page could be called before buffer is initialized */ + if (!pp_buffer.buffer) + return; + page_ext = lookup_page_ext(page); if (unlikely(!page_ext)) return; for (i = 0; i < (1 << order); i++) { - if (!test_bit(PAGE_EXT_GET, &page_ext->flags) && - !test_bit(PAGE_EXT_PINNER_MIGRATION_FAILED, - &page_ext->flags)) + struct captured_pinner record; + + if (!test_bit(PAGE_EXT_PINNER_MIGRATION_FAILED, &page_ext->flags)) continue; page_pinner = get_page_pinner(page_ext); - if (free) { - /* record page free call path */ - __page_pinner_migration_failed(page); - atomic_set(&page_pinner->count, 0); - __clear_bit(PAGE_EXT_PINNER_MIGRATION_FAILED, &page_ext->flags); - } else { - check_longterm_pin(page_pinner, page); - } - clear_bit(PAGE_EXT_GET, &page_ext->flags); - page_ext = page_ext_next(page_ext); - } -} + /* record page free call path */ + page_ext = lookup_page_ext(page); + if (unlikely(!page_ext)) + continue; -static inline void __set_page_pinner_handle(struct page *page, - struct page_ext *page_ext, depot_stack_handle_t handle, - unsigned int order) -{ - struct page_pinner *page_pinner; - int i; - s64 usec = ktime_to_us(ktime_get_boottime()); + record.handle = save_stack(GFP_NOWAIT|__GFP_NOWARN); + record.ts_usec = (u64)ktime_to_us(ktime_get_boottime()); + record.state = PP_FREE; + capture_page_state(page, &record); - for (i = 0; i < (1 << order); i++) { - page_pinner = get_page_pinner(page_ext); - page_pinner->handle = handle; - page_pinner->ts_usec = usec; - set_bit(PAGE_EXT_GET, &page_ext->flags); - atomic_inc(&page_pinner->count); + add_record(&pp_buffer, &record); + + atomic_set(&page_pinner->count, 0); + page_pinner->ts_usec = 0; + clear_bit(PAGE_EXT_PINNER_MIGRATION_FAILED, &page_ext->flags); page_ext = page_ext_next(page_ext); } } -noinline void __set_page_pinner(struct page *page, unsigned int order) -{ - struct page_ext *page_ext = lookup_page_ext(page); - depot_stack_handle_t handle; - - if (unlikely(!page_ext)) - return; - - handle = save_stack(GFP_NOWAIT|__GFP_NOWARN); - __set_page_pinner_handle(page, page_ext, handle, order); -} - static ssize_t -print_page_pinner(bool longterm, char __user *buf, size_t count, struct captured_pinner *record) +print_page_pinner(char __user *buf, size_t count, struct captured_pinner *record) { int ret; unsigned long *entries; @@ -229,15 +193,17 @@ print_page_pinner(bool longterm, char __user *buf, size_t count, struct captured if (!kbuf) return -ENOMEM; - if (longterm) { - ret = snprintf(kbuf, count, "Page pinned for %lld us\n", + if (record->state == PP_PUT) { + ret = snprintf(kbuf, count, "At least, pinned for %llu us\n", record->elapsed); } else { - s64 ts_usec = record->ts_usec; + u64 ts_usec = record->ts_usec; unsigned long rem_usec = do_div(ts_usec, 1000000); ret = snprintf(kbuf, count, - "Page pinned ts [%5lu.%06lu]\n", + "%s [%5lu.%06lu]\n", + record->state == PP_FREE ? "Freed at" : + "Failure detected at", (unsigned long)ts_usec, rem_usec); } @@ -277,137 +243,66 @@ err: return -ENOMEM; } -void __dump_page_pinner(struct page *page) +void __page_pinner_failure_detect(struct page *page) { struct page_ext *page_ext = lookup_page_ext(page); struct page_pinner *page_pinner; - depot_stack_handle_t handle; - unsigned long *entries; - unsigned int nr_entries; - int pageblock_mt; - unsigned long pfn; - int count; - unsigned long rem_usec; - s64 ts_usec; + struct captured_pinner record; + u64 now; - if (unlikely(!page_ext)) { - pr_alert("There is not page extension available.\n"); + if (unlikely(!page_ext)) return; - } - page_pinner = get_page_pinner(page_ext); - - count = atomic_read(&page_pinner->count); - if (!count) { - pr_alert("page_pinner info is not present (never set?)\n"); + if (test_bit(PAGE_EXT_PINNER_MIGRATION_FAILED, &page_ext->flags)) return; - } - pfn = page_to_pfn(page); - ts_usec = page_pinner->ts_usec; - rem_usec = do_div(ts_usec, 1000000); - pr_alert("page last pinned %5lu.%06lu] count %d\n", - (unsigned long)ts_usec, rem_usec, count); - - pageblock_mt = get_pageblock_migratetype(page); - pr_alert("PFN %lu Block %lu type %s Flags %#lx(%pGp)\n", - pfn, - pfn >> pageblock_order, - migratetype_names[pageblock_mt], - page->flags, &page->flags); - - handle = READ_ONCE(page_pinner->handle); - if (!handle) { - pr_alert("page_pinner allocation stack trace missing\n"); - } else { - nr_entries = stack_depot_fetch(handle, &entries); - stack_trace_print(entries, nr_entries, 0); - } + now = (u64)ktime_to_us(ktime_get_boottime()); + page_pinner = get_page_pinner(page_ext); + if (!page_pinner->ts_usec) + page_pinner->ts_usec = now; + set_bit(PAGE_EXT_PINNER_MIGRATION_FAILED, &page_ext->flags); + record.handle = save_stack(GFP_NOWAIT|__GFP_NOWARN); + record.ts_usec = now; + record.state = PP_FAIL_DETECTED; + capture_page_state(page, &record); + + add_record(&pp_buffer, &record); } +EXPORT_SYMBOL_GPL(__page_pinner_failure_detect); -void __page_pinner_migration_failed(struct page *page) +void __page_pinner_put_page(struct page *page) { struct page_ext *page_ext = lookup_page_ext(page); struct page_pinner *page_pinner; struct captured_pinner record; - unsigned long flags; - unsigned int idx; + u64 now, ts_usec; if (unlikely(!page_ext)) return; - page_pinner = get_page_pinner(page_ext); if (!test_bit(PAGE_EXT_PINNER_MIGRATION_FAILED, &page_ext->flags)) return; + page_pinner = get_page_pinner(page_ext); record.handle = save_stack(GFP_NOWAIT|__GFP_NOWARN); - record.ts_usec = ktime_to_us(ktime_get_boottime()); - capture_page_state(page, &record); - - spin_lock_irqsave(&acf_pinner.lock, flags); - idx = acf_pinner.index++; - acf_pinner.index %= LONGTERM_PIN_BUCKETS; - acf_pinner.pinner[idx] = record; - spin_unlock_irqrestore(&acf_pinner.lock, flags); -} -EXPORT_SYMBOL_GPL(__page_pinner_migration_failed); - -void __page_pinner_mark_migration_failed_pages(struct list_head *page_list) -{ - struct page *page; - struct page_ext *page_ext; - - list_for_each_entry(page, page_list, lru) { - /* The page will be freed by putback_movable_pages soon */ - if (page_count(page) == 1) - continue; - page_ext = lookup_page_ext(page); - if (unlikely(!page_ext)) - continue; - __set_bit(PAGE_EXT_PINNER_MIGRATION_FAILED, &page_ext->flags); - __page_pinner_migration_failed(page); - } -} - -static ssize_t -read_longterm_page_pinner(struct file *file, char __user *buf, size_t count, - loff_t *ppos) -{ - loff_t i, idx; - struct captured_pinner record; - unsigned long flags; - - if (!static_branch_unlikely(&page_pinner_inited)) - return -EINVAL; - - if (*ppos >= LONGTERM_PIN_BUCKETS) - return 0; - - i = *ppos; - *ppos = i + 1; + now = (u64)ktime_to_us(ktime_get_boottime()); + ts_usec = page_pinner->ts_usec; - /* - * reading the records in the reverse order with newest one - * being read first followed by older ones - */ - idx = (lt_pinner.index - 1 - i + LONGTERM_PIN_BUCKETS) % - LONGTERM_PIN_BUCKETS; - spin_lock_irqsave(<_pinner.lock, flags); - record = lt_pinner.pinner[idx]; - spin_unlock_irqrestore(<_pinner.lock, flags); - if (!record.handle) - return 0; + if (now > ts_usec) + record.elapsed = now - ts_usec; + else + record.elapsed = 0; + record.state = PP_PUT; + capture_page_state(page, &record); - return print_page_pinner(true, buf, count, &record); + add_record(&pp_buffer, &record); } +EXPORT_SYMBOL_GPL(__page_pinner_put_page); -static const struct file_operations proc_longterm_pinner_operations = { - .read = read_longterm_page_pinner, -}; - -static ssize_t read_alloc_contig_failed(struct file *file, char __user *buf, +static ssize_t read_buffer(struct file *file, char __user *buf, size_t count, loff_t *ppos) { + u64 tmp; loff_t i, idx; struct captured_pinner record; unsigned long flags; @@ -415,7 +310,7 @@ static ssize_t read_alloc_contig_failed(struct file *file, char __user *buf, if (!static_branch_unlikely(&failure_tracking)) return -EINVAL; - if (*ppos >= LONGTERM_PIN_BUCKETS) + if (*ppos >= pp_buf_size) return 0; i = *ppos; @@ -425,45 +320,22 @@ static ssize_t read_alloc_contig_failed(struct file *file, char __user *buf, * reading the records in the reverse order with newest one * being read first followed by older ones */ - idx = (acf_pinner.index - 1 - i + LONGTERM_PIN_BUCKETS) % - LONGTERM_PIN_BUCKETS; + tmp = pp_buffer.index - 1 - i + pp_buf_size; + idx = do_div(tmp, pp_buf_size); - spin_lock_irqsave(&acf_pinner.lock, flags); - record = acf_pinner.pinner[idx]; - spin_unlock_irqrestore(&acf_pinner.lock, flags); + spin_lock_irqsave(&pp_buffer.lock, flags); + record = pp_buffer.buffer[idx]; + spin_unlock_irqrestore(&pp_buffer.lock, flags); if (!record.handle) return 0; - return print_page_pinner(false, buf, count, &record); + return print_page_pinner(buf, count, &record); } -static const struct file_operations proc_alloc_contig_failed_operations = { - .read = read_alloc_contig_failed, +static const struct file_operations proc_buffer_operations = { + .read = read_buffer, }; -static int pp_threshold_set(void *data, unsigned long long val) -{ - unsigned long flags; - - threshold_usec = (s64)val; - - spin_lock_irqsave(<_pinner.lock, flags); - memset(lt_pinner.pinner, 0, - sizeof(struct captured_pinner) * LONGTERM_PIN_BUCKETS); - lt_pinner.index = 0; - spin_unlock_irqrestore(<_pinner.lock, flags); - return 0; -} - -static int pp_threshold_get(void *data, unsigned long long *val) -{ - *val = (unsigned long long)threshold_usec; - - return 0; -} -DEFINE_DEBUGFS_ATTRIBUTE(pp_threshold_fops, pp_threshold_get, - pp_threshold_set, "%lld\n"); - static int failure_tracking_set(void *data, u64 val) { bool on; @@ -485,6 +357,35 @@ DEFINE_DEBUGFS_ATTRIBUTE(failure_tracking_fops, failure_tracking_get, failure_tracking_set, "%llu\n"); +static int buffer_size_set(void *data, u64 val) +{ + unsigned long flags; + struct captured_pinner *new, *old; + + new = kvmalloc_array(val, sizeof(*new), GFP_KERNEL); + if (!new) + return -ENOMEM; + + spin_lock_irqsave(&pp_buffer.lock, flags); + old = pp_buffer.buffer; + pp_buffer.buffer = new; + pp_buffer.index = 0; + pp_buf_size = val; + spin_unlock_irqrestore(&pp_buffer.lock, flags); + kvfree(old); + + return 0; +} + +static int buffer_size_get(void *data, u64 *val) +{ + *val = pp_buf_size; + return 0; +} +DEFINE_DEBUGFS_ATTRIBUTE(buffer_size_fops, + buffer_size_get, + buffer_size_set, "%llu\n"); + static int __init page_pinner_init(void) { struct dentry *pp_debugfs_root; @@ -492,23 +393,31 @@ static int __init page_pinner_init(void) if (!static_branch_unlikely(&page_pinner_inited)) return 0; - pr_info("page_pinner enabled\n"); + pp_buffer.buffer = kvmalloc_array(pp_buf_size, sizeof(*pp_buffer.buffer), + GFP_KERNEL); + if (!pp_buffer.buffer) { + pr_info("page_pinner disabled due to \n"); + return 1; + } - pp_debugfs_root = debugfs_create_dir("page_pinner", NULL); + spin_lock_init(&pp_buffer.lock); + pp_buffer.index = 0; - debugfs_create_file("longterm_pinner", 0444, pp_debugfs_root, NULL, - &proc_longterm_pinner_operations); + pr_info("page_pinner enabled\n"); - debugfs_create_file("threshold", 0644, pp_debugfs_root, NULL, - &pp_threshold_fops); + pp_debugfs_root = debugfs_create_dir("page_pinner", NULL); - debugfs_create_file("alloc_contig_failed", 0444, + debugfs_create_file("buffer", 0444, pp_debugfs_root, NULL, - &proc_alloc_contig_failed_operations); + &proc_buffer_operations); debugfs_create_file("failure_tracking", 0644, pp_debugfs_root, NULL, &failure_tracking_fops); + + debugfs_create_file("buffer_size", 0644, + pp_debugfs_root, NULL, + &buffer_size_fops); return 0; } late_initcall(page_pinner_init) diff --git a/mm/rmap.c b/mm/rmap.c index 4fdbda090c2c2948260ca3e9cf79d149efbb79fa..52291f59b0b45ee5ab3908a157ebbb80d336cf8a 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -72,6 +72,7 @@ #include #include #include +#include #include @@ -782,6 +783,12 @@ static bool page_referenced_one(struct page *page, struct vm_area_struct *vma, } if (pvmw.pte) { + if (lru_gen_enabled() && pte_young(*pvmw.pte) && + !(vma->vm_flags & (VM_SEQ_READ | VM_RAND_READ))) { + lru_gen_look_around(&pvmw); + referenced++; + } + if (ptep_clear_flush_young_notify(vma, address, pvmw.pte)) { /* diff --git a/mm/shmem.c b/mm/shmem.c index dc551017eed6affeb4ff7598977e8a186e13a075..364703bd305ed1a4b6a1fb58f4fa6a956fc3605a 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -533,7 +533,7 @@ static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo, struct shmem_inode_info *info; struct page *page; unsigned long batch = sc ? sc->nr_to_scan : 128; - int removed = 0, split = 0; + int split = 0; if (list_empty(&sbinfo->shrinklist)) return SHRINK_STOP; @@ -548,7 +548,6 @@ static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo, /* inode is about to be evicted */ if (!inode) { list_del_init(&info->shrinklist); - removed++; goto next; } @@ -556,12 +555,12 @@ static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo, if (round_up(inode->i_size, PAGE_SIZE) == round_up(inode->i_size, HPAGE_PMD_SIZE)) { list_move(&info->shrinklist, &to_remove); - removed++; goto next; } list_move(&info->shrinklist, &list); next: + sbinfo->shrinklist_len--; if (!--batch) break; } @@ -581,7 +580,7 @@ next: inode = &info->vfs_inode; if (nr_to_split && split >= nr_to_split) - goto leave; + goto move_back; page = find_get_page(inode->i_mapping, (inode->i_size & HPAGE_PMD_MASK) >> PAGE_SHIFT); @@ -595,38 +594,44 @@ next: } /* - * Leave the inode on the list if we failed to lock - * the page at this time. + * Move the inode on the list back to shrinklist if we failed + * to lock the page at this time. * * Waiting for the lock may lead to deadlock in the * reclaim path. */ if (!trylock_page(page)) { put_page(page); - goto leave; + goto move_back; } ret = split_huge_page(page); unlock_page(page); put_page(page); - /* If split failed leave the inode on the list */ + /* If split failed move the inode on the list back to shrinklist */ if (ret) - goto leave; + goto move_back; split++; drop: list_del_init(&info->shrinklist); - removed++; -leave: + goto put; +move_back: + /* + * Make sure the inode is either on the global list or deleted + * from any local list before iput() since it could be deleted + * in another thread once we put the inode (then the local list + * is corrupted). + */ + spin_lock(&sbinfo->shrinklist_lock); + list_move(&info->shrinklist, &sbinfo->shrinklist); + sbinfo->shrinklist_len++; + spin_unlock(&sbinfo->shrinklist_lock); +put: iput(inode); } - spin_lock(&sbinfo->shrinklist_lock); - list_splice_tail(&list, &sbinfo->shrinklist); - sbinfo->shrinklist_len -= removed; - spin_unlock(&sbinfo->shrinklist_lock); - return split; } diff --git a/mm/slab.h b/mm/slab.h index 5de57f34a0e6bc483679483de2a6768ccddc5ba4..4738d19b6df6753fdf2ce4ac1ff2a36b361b946e 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -169,7 +169,7 @@ static inline slab_flags_t kmem_cache_flags(unsigned int object_size, #define SLAB_CACHE_FLAGS (SLAB_NOLEAKTRACE | SLAB_RECLAIM_ACCOUNT | \ SLAB_TEMPORARY | SLAB_ACCOUNT) #else -#define SLAB_CACHE_FLAGS (0) +#define SLAB_CACHE_FLAGS (SLAB_NOLEAKTRACE) #endif /* Common flags available with current configuration */ diff --git a/mm/slab_common.c b/mm/slab_common.c index 54a4be9a862a66833e9c95c0757b6ff737ececde..d7550c9efb1a2a0ae9c4590ff6e5a77059afae4b 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -488,7 +488,7 @@ void kmem_cache_destroy(struct kmem_cache *s) { int err; - if (unlikely(!s)) + if (unlikely(!s) || !kasan_check_byte(s)) return; get_online_cpus(); diff --git a/mm/slub.c b/mm/slub.c index e7ec933a6de76f31524872d5dd71a104beec4aa7..3db21d3ed9a28dc8a8b778ebfb97a66cf6d2c6be 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -535,8 +535,8 @@ static void print_section(char *level, char *text, u8 *addr, unsigned int length) { metadata_access_enable(); - print_hex_dump(level, kasan_reset_tag(text), DUMP_PREFIX_ADDRESS, - 16, 1, addr, length, 1); + print_hex_dump(level, text, DUMP_PREFIX_ADDRESS, + 16, 1, kasan_reset_tag((void *)addr), length, 1); metadata_access_disable(); } @@ -593,7 +593,9 @@ unsigned long get_each_object_track(struct kmem_cache *s, slab_lock(page); for_each_object(p, s, page_address(page), page->objects) { t = get_track(s, p, alloc); + metadata_access_enable(); ret = fn(s, p, t, private); + metadata_access_disable(); if (ret < 0) break; num_track += 1; @@ -1594,7 +1596,8 @@ static __always_inline bool slab_free_hook(struct kmem_cache *s, } static inline bool slab_free_freelist_hook(struct kmem_cache *s, - void **head, void **tail) + void **head, void **tail, + int *cnt) { void *object; @@ -1621,6 +1624,12 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s, *head = object; if (!*tail) *tail = object; + } else { + /* + * Adjust the reconstructed freelist depth + * accordingly if object's reuse is delayed. + */ + --(*cnt); } } while (object != old_tail); @@ -3145,7 +3154,9 @@ static __always_inline void do_slab_free(struct kmem_cache *s, struct kmem_cache_cpu *c; unsigned long tid; - memcg_slab_free_hook(s, &head, 1); + /* memcg_slab_free_hook() is already called for bulk free. */ + if (!tail) + memcg_slab_free_hook(s, &head, 1); redo: /* * Determine the currently cpus per cpu slab. @@ -3189,7 +3200,7 @@ static __always_inline void slab_free(struct kmem_cache *s, struct page *page, * With KASAN enabled slab_free_freelist_hook modifies the freelist * to remove objects, whose reuse must be delayed. */ - if (slab_free_freelist_hook(s, &head, &tail)) + if (slab_free_freelist_hook(s, &head, &tail, &cnt)) do_slab_free(s, page, head, tail, cnt, addr); } @@ -3885,8 +3896,8 @@ static int kmem_cache_open(struct kmem_cache *s, slab_flags_t flags) if (alloc_kmem_cache_cpus(s)) return 0; - free_kmem_cache_nodes(s); error: + __kmem_cache_release(s); return -EINVAL; } @@ -4498,13 +4509,15 @@ int __kmem_cache_create(struct kmem_cache *s, slab_flags_t flags) return 0; err = sysfs_slab_add(s); - if (err) + if (err) { __kmem_cache_release(s); + return err; + } if (s->flags & SLAB_STORE_USER) debugfs_slab_add(s); - return err; + return 0; } void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller) @@ -4667,6 +4680,7 @@ struct loc_track { unsigned long max; unsigned long count; struct location *loc; + loff_t idx; }; static struct dentry *slab_debugfs_root; @@ -5698,11 +5712,11 @@ __initcall(slab_sysfs_init); #if defined(CONFIG_SLUB_DEBUG) && defined(CONFIG_DEBUG_FS) static int slab_debugfs_show(struct seq_file *seq, void *v) { - - struct location *l; - unsigned int idx = *(unsigned int *)v; struct loc_track *t = seq->private; + struct location *l; + unsigned long idx; + idx = (unsigned long) t->idx; if (idx < t->count) { l = &t->loc[idx]; @@ -5751,16 +5765,18 @@ static void *slab_debugfs_next(struct seq_file *seq, void *v, loff_t *ppos) { struct loc_track *t = seq->private; - v = ppos; - ++*ppos; + t->idx = ++(*ppos); if (*ppos <= t->count) - return v; + return ppos; return NULL; } static void *slab_debugfs_start(struct seq_file *seq, loff_t *ppos) { + struct loc_track *t = seq->private; + + t->idx = *ppos; return ppos; } @@ -5781,13 +5797,18 @@ static int slab_debug_trace_open(struct inode *inode, struct file *filep) sizeof(struct loc_track)); struct kmem_cache *s = file_inode(filep)->i_private; + if (!t) + return -ENOMEM; + if (strcmp(filep->f_path.dentry->d_name.name, "alloc_traces") == 0) alloc = TRACK_ALLOC; else alloc = TRACK_FREE; - if (!alloc_loc_track(t, PAGE_SIZE / sizeof(struct location), GFP_KERNEL)) + if (!alloc_loc_track(t, PAGE_SIZE / sizeof(struct location), GFP_KERNEL)) { + seq_release_private(inode, filep); return -ENOMEM; + } /* Push back cpu slabs */ flush_all(s); diff --git a/mm/swap.c b/mm/swap.c index 8d5c61de5a6ebe8c15707d4df576934ded33a1a0..2f69e44ca7374bed550d888a601b98d557047d70 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -87,9 +87,8 @@ static void __page_cache_release(struct page *page) spin_lock_irqsave(&pgdat->lru_lock, flags); lruvec = mem_cgroup_page_lruvec(page, pgdat); - VM_BUG_ON_PAGE(!PageLRU(page), page); - __ClearPageLRU(page); - del_page_from_lru_list(page, lruvec, page_off_lru(page)); + del_page_from_lru_list(page, lruvec); + __clear_page_lru_flags(page); spin_unlock_irqrestore(&pgdat->lru_lock, flags); } __ClearPageWaiters(page); @@ -240,9 +239,9 @@ static void pagevec_move_tail_fn(struct page *page, struct lruvec *lruvec, int *pgmoved = arg; if (PageLRU(page) && !PageUnevictable(page)) { - del_page_from_lru_list(page, lruvec, page_lru(page)); + del_page_from_lru_list(page, lruvec); ClearPageActive(page); - add_page_to_lru_list_tail(page, lruvec, page_lru(page)); + add_page_to_lru_list_tail(page, lruvec); (*pgmoved) += thp_nr_pages(page); } } @@ -333,13 +332,11 @@ static void __activate_page(struct page *page, struct lruvec *lruvec, void *arg) { if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) { - int lru = page_lru_base_type(page); int nr_pages = thp_nr_pages(page); - del_page_from_lru_list(page, lruvec, lru); + del_page_from_lru_list(page, lruvec); SetPageActive(page); - lru += LRU_ACTIVE; - add_page_to_lru_list(page, lruvec, lru); + add_page_to_lru_list(page, lruvec); trace_mm_lru_activate(page); __count_vm_events(PGACTIVATE, nr_pages); @@ -362,7 +359,7 @@ static bool need_activate_page_drain(int cpu) return pagevec_count(&per_cpu(lru_pvecs.activate_page, cpu)) != 0; } -static void activate_page(struct page *page) +void activate_page(struct page *page) { page = compound_head(page); if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) { @@ -382,7 +379,7 @@ static inline void activate_page_drain(int cpu) { } -static void activate_page(struct page *page) +void activate_page(struct page *page) { pg_data_t *pgdat = page_pgdat(page); @@ -423,6 +420,43 @@ static void __lru_cache_activate_page(struct page *page) local_unlock(&lru_pvecs.lock); } +#ifdef CONFIG_LRU_GEN +static void page_inc_refs(struct page *page) +{ + unsigned long refs; + unsigned long old_flags, new_flags; + + if (PageUnevictable(page)) + return; + + /* see the comment on MAX_NR_TIERS */ + do { + new_flags = old_flags = READ_ONCE(page->flags); + + if (!(new_flags & BIT(PG_referenced))) { + new_flags |= BIT(PG_referenced); + continue; + } + + if (!(new_flags & BIT(PG_workingset))) { + new_flags |= BIT(PG_workingset); + continue; + } + + refs = new_flags & LRU_REFS_MASK; + refs = min(refs + BIT(LRU_REFS_PGOFF), LRU_REFS_MASK); + + new_flags &= ~LRU_REFS_MASK; + new_flags |= refs; + } while (new_flags != old_flags && + cmpxchg(&page->flags, old_flags, new_flags) != old_flags); +} +#else +static void page_inc_refs(struct page *page) +{ +} +#endif /* CONFIG_LRU_GEN */ + /* * Mark a page as having seen activity. * @@ -437,6 +471,11 @@ void mark_page_accessed(struct page *page) { page = compound_head(page); + if (lru_gen_enabled()) { + page_inc_refs(page); + return; + } + if (!PageReferenced(page)) { SetPageReferenced(page); } else if (PageUnevictable(page)) { @@ -480,6 +519,11 @@ void lru_cache_add(struct page *page) VM_BUG_ON_PAGE(PageActive(page) && PageUnevictable(page), page); VM_BUG_ON_PAGE(PageLRU(page), page); + /* see the comment in lru_gen_add_page() */ + if (lru_gen_enabled() && !PageUnevictable(page) && + lru_gen_in_fault() && !(current->flags & PF_MEMALLOC)) + SetPageActive(page); + get_page(page); local_lock(&lru_pvecs.lock); pvec = this_cpu_ptr(&lru_pvecs.lru_add); @@ -542,8 +586,7 @@ void __lru_cache_add_inactive_or_unevictable(struct page *page, static void lru_deactivate_file_fn(struct page *page, struct lruvec *lruvec, void *arg) { - int lru; - bool active; + bool active = PageActive(page); int nr_pages = thp_nr_pages(page); if (!PageLRU(page)) @@ -556,10 +599,7 @@ static void lru_deactivate_file_fn(struct page *page, struct lruvec *lruvec, if (page_mapped(page)) return; - active = PageActive(page); - lru = page_lru_base_type(page); - - del_page_from_lru_list(page, lruvec, lru + active); + del_page_from_lru_list(page, lruvec); ClearPageActive(page); ClearPageReferenced(page); @@ -569,14 +609,14 @@ static void lru_deactivate_file_fn(struct page *page, struct lruvec *lruvec, * It can make readahead confusing. But race window * is _really_ small and it's non-critical problem. */ - add_page_to_lru_list(page, lruvec, lru); + add_page_to_lru_list(page, lruvec); SetPageReclaim(page); } else { /* * The page's writeback ends up during pagevec * We moves tha page into tail of inactive. */ - add_page_to_lru_list_tail(page, lruvec, lru); + add_page_to_lru_list_tail(page, lruvec); __count_vm_events(PGROTATED, nr_pages); } @@ -590,14 +630,13 @@ static void lru_deactivate_file_fn(struct page *page, struct lruvec *lruvec, static void lru_deactivate_fn(struct page *page, struct lruvec *lruvec, void *arg) { - if (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) { - int lru = page_lru_base_type(page); + if (PageLRU(page) && !PageUnevictable(page) && (PageActive(page) || lru_gen_enabled())) { int nr_pages = thp_nr_pages(page); - del_page_from_lru_list(page, lruvec, lru + LRU_ACTIVE); + del_page_from_lru_list(page, lruvec); ClearPageActive(page); ClearPageReferenced(page); - add_page_to_lru_list(page, lruvec, lru); + add_page_to_lru_list(page, lruvec); __count_vm_events(PGDEACTIVATE, nr_pages); __count_memcg_events(lruvec_memcg(lruvec), PGDEACTIVATE, @@ -610,11 +649,9 @@ static void lru_lazyfree_fn(struct page *page, struct lruvec *lruvec, { if (PageLRU(page) && PageAnon(page) && PageSwapBacked(page) && !PageSwapCache(page) && !PageUnevictable(page)) { - bool active = PageActive(page); int nr_pages = thp_nr_pages(page); - del_page_from_lru_list(page, lruvec, - LRU_INACTIVE_ANON + active); + del_page_from_lru_list(page, lruvec); ClearPageActive(page); ClearPageReferenced(page); /* @@ -623,7 +660,7 @@ static void lru_lazyfree_fn(struct page *page, struct lruvec *lruvec, * anonymous pages */ ClearPageSwapBacked(page); - add_page_to_lru_list(page, lruvec, LRU_INACTIVE_FILE); + add_page_to_lru_list(page, lruvec); __count_vm_events(PGLAZYFREE, nr_pages); __count_memcg_events(lruvec_memcg(lruvec), PGLAZYFREE, @@ -638,16 +675,13 @@ static void lru_lazyfree_movetail_fn(struct page *page, struct lruvec *lruvec, if (PageLRU(page) && !PageUnevictable(page) && PageSwapBacked(page) && !PageSwapCache(page)) { - bool active = PageActive(page); - - del_page_from_lru_list(page, lruvec, - LRU_INACTIVE_ANON + active); + del_page_from_lru_list(page, lruvec); ClearPageActive(page); ClearPageReferenced(page); if (add_to_tail && *add_to_tail) - add_page_to_lru_list_tail(page, lruvec, LRU_INACTIVE_FILE); + add_page_to_lru_list_tail(page, lruvec); else - add_page_to_lru_list(page, lruvec, LRU_INACTIVE_FILE); + add_page_to_lru_list(page, lruvec); } } @@ -733,7 +767,7 @@ void deactivate_file_page(struct page *page) */ void deactivate_page(struct page *page) { - if (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) { + if (PageLRU(page) && !PageUnevictable(page) && (PageActive(page) || lru_gen_enabled())) { struct pagevec *pvec; local_lock(&lru_pvecs.lock); @@ -1052,9 +1086,8 @@ void release_pages(struct page **pages, int nr) } lruvec = mem_cgroup_page_lruvec(page, locked_pgdat); - VM_BUG_ON_PAGE(!PageLRU(page), page); - __ClearPageLRU(page); - del_page_from_lru_list(page, lruvec, page_off_lru(page)); + del_page_from_lru_list(page, lruvec); + __clear_page_lru_flags(page); } __ClearPageWaiters(page); @@ -1117,8 +1150,7 @@ void lru_add_page_tail(struct page *page, struct page *page_tail, * Put page_tail on the list at the correct position * so they all end up in order. */ - add_page_to_lru_list_tail(page_tail, lruvec, - page_lru(page_tail)); + add_page_to_lru_list_tail(page_tail, lruvec); } } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ @@ -1126,7 +1158,6 @@ void lru_add_page_tail(struct page *page, struct page *page_tail, static void __pagevec_lru_add_fn(struct page *page, struct lruvec *lruvec, void *arg) { - enum lru_list lru; int was_unevictable = TestClearPageUnevictable(page); int nr_pages = thp_nr_pages(page); @@ -1162,19 +1193,17 @@ static void __pagevec_lru_add_fn(struct page *page, struct lruvec *lruvec, smp_mb__after_atomic(); if (page_evictable(page)) { - lru = page_lru(page); if (was_unevictable) __count_vm_events(UNEVICTABLE_PGRESCUED, nr_pages); } else { - lru = LRU_UNEVICTABLE; ClearPageActive(page); SetPageUnevictable(page); if (!was_unevictable) __count_vm_events(UNEVICTABLE_PGCULLED, nr_pages); } - add_page_to_lru_list(page, lruvec, lru); - trace_mm_lru_insertion(page, lru); + add_page_to_lru_list(page, lruvec); + trace_mm_lru_insertion(page); } /* diff --git a/mm/swap_state.c b/mm/swap_state.c index e797804dcc4d0cc533cbbe99880c7005f3c76c05..d13d58fe48c1fc6851bbf59001d16abfc0e90f8f 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -538,7 +538,6 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, workingset_refault(page, shadow); /* Caller will initiate read into locked page */ - SetPageWorkingset(page); lru_cache_add(page); *new_page_allocated = true; return page; diff --git a/mm/util.c b/mm/util.c index 3d05a88e0fd68e526463f15a700ce664b4cf9c34..8a859bd9d41f93cbc16f0f41d2f18404709d139c 100644 --- a/mm/util.c +++ b/mm/util.c @@ -586,6 +586,12 @@ void *kvmalloc_node(size_t size, gfp_t flags, int node) if (ret || size <= PAGE_SIZE) return ret; + /* Don't even allow crazy sizes */ + if (unlikely(size > INT_MAX)) { + WARN_ON_ONCE(!(flags & __GFP_NOWARN)); + return NULL; + } + return __vmalloc_node(size, 1, flags, node, __builtin_return_address(0)); } diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 0d1e0b97d1c9e7c4eb20be7c0218f35795190e80..bf3896abd9d5b0c1d626aad9629a27ead57318c1 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -37,6 +37,7 @@ #include #include +#include #include #include @@ -45,7 +46,7 @@ bool is_vmalloc_addr(const void *x) { - unsigned long addr = (unsigned long)x; + unsigned long addr = (unsigned long)kasan_reset_tag(x); return addr >= VMALLOC_START && addr < VMALLOC_END; } @@ -69,7 +70,6 @@ static void free_work(struct work_struct *w) } /*** Page table manipulation functions ***/ - static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, pgtbl_mod_mask *mask) { @@ -337,7 +337,7 @@ int is_vmalloc_or_module_addr(const void *x) * just put it in the vmalloc space. */ #if defined(CONFIG_MODULES) && defined(MODULES_VADDR) - unsigned long addr = (unsigned long)x; + unsigned long addr = (unsigned long)kasan_reset_tag(x); if (addr >= MODULES_VADDR && addr < MODULES_END) return 1; #endif @@ -497,6 +497,8 @@ static struct vmap_area *__find_vmap_area(unsigned long addr) { struct rb_node *n = vmap_area_root.rb_node; + addr = (unsigned long)kasan_reset_tag((void *)addr); + while (n) { struct vmap_area *va; @@ -1749,7 +1751,7 @@ static void _vm_unmap_aliases(unsigned long start, unsigned long end, int flush) rcu_read_lock(); list_for_each_entry_rcu(vb, &vbq->free, free_list) { spin_lock(&vb->lock); - if (vb->dirty) { + if (vb->dirty && vb->dirty != VMAP_BBMAP_BITS) { unsigned long va_start = vb->va->va_start; unsigned long s, e; @@ -1803,7 +1805,7 @@ EXPORT_SYMBOL_GPL(vm_unmap_aliases); void vm_unmap_ram(const void *mem, unsigned int count) { unsigned long size = (unsigned long)count << PAGE_SHIFT; - unsigned long addr = (unsigned long)mem; + unsigned long addr = (unsigned long)kasan_reset_tag(mem); struct vmap_area *va; might_sleep(); @@ -1864,12 +1866,18 @@ void *vm_map_ram(struct page **pages, unsigned int count, int node) mem = (void *)addr; } - kasan_unpoison_vmalloc(mem, size); - if (map_kernel_range(addr, size, PAGE_KERNEL, pages) < 0) { vm_unmap_ram(mem, count); return NULL; } + + /* + * Mark the pages as accessible, now that they are mapped. + * With hardware tag-based KASAN, marking is skipped for + * non-VM_ALLOC mappings, see __kasan_unpoison_vmalloc(). + */ + mem = kasan_unpoison_vmalloc(mem, size, KASAN_VMALLOC_PROT_NORMAL); + return mem; } EXPORT_SYMBOL(vm_map_ram); @@ -2058,15 +2066,16 @@ static void clear_vm_uninitialized_flag(struct vm_struct *vm) } static struct vm_struct *__get_vm_area_node(unsigned long size, - unsigned long align, unsigned long flags, unsigned long start, - unsigned long end, int node, gfp_t gfp_mask, const void *caller) + unsigned long align, unsigned long shift, unsigned long flags, + unsigned long start, unsigned long end, int node, + gfp_t gfp_mask, const void *caller) { struct vmap_area *va; struct vm_struct *area; unsigned long requested_size = size; BUG_ON(in_interrupt()); - size = PAGE_ALIGN(size); + size = ALIGN(size, 1ul << shift); if (unlikely(!size)) return NULL; @@ -2087,10 +2096,20 @@ static struct vm_struct *__get_vm_area_node(unsigned long size, return NULL; } - kasan_unpoison_vmalloc((void *)va->va_start, requested_size); - setup_vmalloc_vm(area, va, flags, caller); + /* + * Mark pages for non-VM_ALLOC mappings as accessible. Do it now as a + * best-effort approach, as they can be mapped outside of vmalloc code. + * For VM_ALLOC mappings, the pages are marked as accessible after + * getting mapped in __vmalloc_node_range(). + * With hardware tag-based KASAN, marking is skipped for + * non-VM_ALLOC mappings, see __kasan_unpoison_vmalloc(). + */ + if (!(flags & VM_ALLOC)) + area->addr = kasan_unpoison_vmalloc(area->addr, requested_size, + KASAN_VMALLOC_PROT_NORMAL); + return area; } @@ -2098,8 +2117,8 @@ struct vm_struct *__get_vm_area_caller(unsigned long size, unsigned long flags, unsigned long start, unsigned long end, const void *caller) { - return __get_vm_area_node(size, 1, flags, start, end, NUMA_NO_NODE, - GFP_KERNEL, caller); + return __get_vm_area_node(size, 1, PAGE_SHIFT, flags, start, end, + NUMA_NO_NODE, GFP_KERNEL, caller); } EXPORT_SYMBOL_GPL(__get_vm_area_caller); @@ -2116,7 +2135,8 @@ EXPORT_SYMBOL_GPL(__get_vm_area_caller); */ struct vm_struct *get_vm_area(unsigned long size, unsigned long flags) { - return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END, + return __get_vm_area_node(size, 1, PAGE_SHIFT, flags, + VMALLOC_START, VMALLOC_END, NUMA_NO_NODE, GFP_KERNEL, __builtin_return_address(0)); } @@ -2124,7 +2144,8 @@ struct vm_struct *get_vm_area(unsigned long size, unsigned long flags) struct vm_struct *get_vm_area_caller(unsigned long size, unsigned long flags, const void *caller) { - return __get_vm_area_node(size, 1, flags, VMALLOC_START, VMALLOC_END, + return __get_vm_area_node(size, 1, PAGE_SHIFT, flags, + VMALLOC_START, VMALLOC_END, NUMA_NO_NODE, GFP_KERNEL, caller); } @@ -2173,7 +2194,7 @@ struct vm_struct *remove_vm_area(const void *addr) va->vm = NULL; spin_unlock(&vmap_area_lock); - kasan_free_shadow(vm); + kasan_free_module_shadow(vm); free_unmap_vmap_area(va); return vm; @@ -2263,6 +2284,10 @@ static void __vunmap(const void *addr, int deallocate_pages) kasan_poison_vmalloc(area->addr, get_vm_area_size(area)); + if (IS_ENABLED(CONFIG_ARCH_HAS_IOREMAP_PHYS_HOOKS) && + area->flags & VM_IOREMAP) + iounmap_phys_range_hook(area->phys_addr, get_vm_area_size(area)); + vm_remove_mappings(area, deallocate_pages); if (deallocate_pages) { @@ -2550,22 +2575,64 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align, const void *caller) { struct vm_struct *area; - void *addr; + void *ret; + kasan_vmalloc_flags_t kasan_flags = KASAN_VMALLOC_NONE; unsigned long real_size = size; + unsigned int shift = PAGE_SHIFT; - size = PAGE_ALIGN(size); if (!size || (size >> PAGE_SHIFT) > totalram_pages()) goto fail; - area = __get_vm_area_node(real_size, align, VM_ALLOC | VM_UNINITIALIZED | - vm_flags, start, end, node, gfp_mask, caller); + area = __get_vm_area_node(real_size, align, shift, VM_ALLOC | + VM_UNINITIALIZED | vm_flags, start, end, node, + gfp_mask, caller); if (!area) goto fail; - addr = __vmalloc_area_node(area, gfp_mask, prot, node); - if (!addr) + /* + * Prepare arguments for __vmalloc_area_node() and + * kasan_unpoison_vmalloc(). + */ + if (pgprot_val(prot) == pgprot_val(PAGE_KERNEL)) { + if (kasan_hw_tags_enabled()) { + /* + * Modify protection bits to allow tagging. + * This must be done before mapping. + */ + prot = arch_vmap_pgprot_tagged(prot); + + /* + * Skip page_alloc poisoning and zeroing for physical + * pages backing VM_ALLOC mapping. Memory is instead + * poisoned and zeroed by kasan_unpoison_vmalloc(). + */ + gfp_mask |= __GFP_SKIP_KASAN_UNPOISON | __GFP_SKIP_ZERO; + } + + /* Take note that the mapping is PAGE_KERNEL. */ + kasan_flags |= KASAN_VMALLOC_PROT_NORMAL; + } + + /* Allocate physical pages and map them into vmalloc space. */ + ret = __vmalloc_area_node(area, gfp_mask, prot, node); + if (!ret) return NULL; + /* + * Mark the pages as accessible, now that they are mapped. + * The init condition should match the one in post_alloc_hook() + * (except for the should_skip_init() check) to make sure that memory + * is initialized under the same conditions regardless of the enabled + * KASAN mode. + * Tag-based KASAN modes only assign tags to normal non-executable + * allocations, see __kasan_unpoison_vmalloc(). + */ + kasan_flags |= KASAN_VMALLOC_VM_ALLOC; + if (!want_init_on_free() && want_init_on_alloc(gfp_mask)) + kasan_flags |= KASAN_VMALLOC_INIT; + /* KASAN_VMALLOC_PROT_NORMAL already set if required. */ + area->addr = kasan_unpoison_vmalloc(area->addr, real_size, kasan_flags); + /* * In this function, newly allocated vm_struct has VM_UNINITIALIZED * flag. It means that vm_struct is not fully initialized. @@ -2573,9 +2640,11 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align, */ clear_vm_uninitialized_flag(area); - kmemleak_vmalloc(area, size, gfp_mask); + size = PAGE_ALIGN(size); + if (!(vm_flags & VM_DEFER_KMEMLEAK)) + kmemleak_vmalloc(area, size, gfp_mask); - return addr; + return area->addr; fail: warn_alloc(gfp_mask, NULL, @@ -2878,6 +2947,8 @@ long vread(char *buf, char *addr, unsigned long count) unsigned long buflen = count; unsigned long n; + addr = kasan_reset_tag(addr); + /* Don't allow overflow */ if ((unsigned long) addr + count < count) count = -(unsigned long) addr; @@ -3329,9 +3400,6 @@ retry: for (area = 0; area < nr_vms; area++) { if (kasan_populate_vmalloc(vas[area]->va_start, sizes[area])) goto err_free_shadow; - - kasan_unpoison_vmalloc((void *)vas[area]->va_start, - sizes[area]); } /* insert all vm's */ @@ -3344,6 +3412,16 @@ retry: } spin_unlock(&vmap_area_lock); + /* + * Mark allocated areas as accessible. Do it now as a best-effort + * approach, as they can be mapped outside of vmalloc code. + * With hardware tag-based KASAN, marking is skipped for + * non-VM_ALLOC mappings, see __kasan_unpoison_vmalloc(). + */ + for (area = 0; area < nr_vms; area++) + vms[area]->addr = kasan_unpoison_vmalloc(vms[area]->addr, + vms[area]->size, KASAN_VMALLOC_PROT_NORMAL); + kfree(vas); return vms; diff --git a/mm/vmscan.c b/mm/vmscan.c index d49cd9cb79e0eca6a611abedf549a50da8f73340..cb1a9b5b0729af5121fba5dcae272438d8a12c75 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -51,6 +51,10 @@ #include #include #include +#include +#include +#include +#include #include #include @@ -129,6 +133,13 @@ struct scan_control { /* The file pages on the current node are dangerously low */ unsigned int file_is_tiny:1; +#ifdef CONFIG_LRU_GEN + /* help make better choices when multiple memcgs are available */ + unsigned int memcgs_need_aging:1; + unsigned int memcgs_need_swapping:1; + unsigned int memcgs_avoid_swapping:1; +#endif + /* Allocation order */ s8 order; @@ -931,9 +942,11 @@ static int __remove_mapping(struct address_space *mapping, struct page *page, if (PageSwapCache(page)) { swp_entry_t swap = { .val = page_private(page) }; - mem_cgroup_swapout(page, swap); + + /* get a shadow entry before mem_cgroup_swapout() clears page_memcg() */ if (reclaimed && !mapping_exiting(mapping)) shadow = workingset_eviction(page, target_memcg); + mem_cgroup_swapout(page, swap); __delete_from_swap_cache(page, swap, shadow); xa_unlock_irqrestore(&mapping->i_pages, flags); put_swap_page(page, swap); @@ -1144,6 +1157,11 @@ static unsigned int shrink_page_list(struct list_head *page_list, if (!sc->may_unmap && page_mapped(page)) goto keep_locked; + /* page_update_gen() tried to promote this page? */ + if (lru_gen_enabled() && !ignore_references && + page_mapped(page) && PageReferenced(page)) + goto keep_locked; + may_enter_fs = (sc->gfp_mask & __GFP_FS) || (PageSwapCache(page) && (sc->gfp_mask & __GFP_IO)); @@ -1837,10 +1855,9 @@ int isolate_lru_page(struct page *page) spin_lock_irq(&pgdat->lru_lock); lruvec = mem_cgroup_page_lruvec(page, pgdat); if (PageLRU(page)) { - int lru = page_lru(page); get_page(page); ClearPageLRU(page); - del_page_from_lru_list(page, lruvec, lru); + del_page_from_lru_list(page, lruvec); ret = 0; } spin_unlock_irq(&pgdat->lru_lock); @@ -1912,13 +1929,12 @@ static unsigned noinline_for_stack move_pages_to_lru(struct lruvec *lruvec, int nr_pages, nr_moved = 0; LIST_HEAD(pages_to_free); struct page *page; - enum lru_list lru; while (!list_empty(list)) { page = lru_to_page(list); VM_BUG_ON_PAGE(PageLRU(page), page); + list_del(&page->lru); if (unlikely(!page_evictable(page))) { - list_del(&page->lru); spin_unlock_irq(&pgdat->lru_lock); putback_lru_page(page); spin_lock_irq(&pgdat->lru_lock); @@ -1927,16 +1943,11 @@ static unsigned noinline_for_stack move_pages_to_lru(struct lruvec *lruvec, lruvec = mem_cgroup_page_lruvec(page, pgdat); SetPageLRU(page); - lru = page_lru(page); - - nr_pages = thp_nr_pages(page); - update_lru_size(lruvec, lru, page_zonenum(page), nr_pages); - list_move(&page->lru, &lruvec->lists[lru]); + add_page_to_lru_list(page, lruvec); if (put_page_testzero(page)) { - __ClearPageLRU(page); - __ClearPageActive(page); - del_page_from_lru_list(page, lruvec, lru); + del_page_from_lru_list(page, lruvec); + __clear_page_lru_flags(page); if (unlikely(PageCompound(page))) { spin_unlock_irq(&pgdat->lru_lock); @@ -1945,6 +1956,7 @@ static unsigned noinline_for_stack move_pages_to_lru(struct lruvec *lruvec, } else list_add(&page->lru, &pages_to_free); } else { + nr_pages = thp_nr_pages(page); nr_moved += nr_pages; if (PageActive(page)) workingset_age_nonresident(lruvec, nr_pages); @@ -2288,6 +2300,106 @@ enum scan_balance { SCAN_FILE, }; +static void prepare_scan_count(pg_data_t *pgdat, struct scan_control *sc) +{ + unsigned long file; + struct lruvec *target_lruvec; + + if (lru_gen_enabled()) + return; + + target_lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, pgdat); + + /* + * Determine the scan balance between anon and file LRUs. + */ + spin_lock_irq(&pgdat->lru_lock); + sc->anon_cost = target_lruvec->anon_cost; + sc->file_cost = target_lruvec->file_cost; + spin_unlock_irq(&pgdat->lru_lock); + + /* + * Target desirable inactive:active list ratios for the anon + * and file LRU lists. + */ + if (!sc->force_deactivate) { + unsigned long refaults; + + refaults = lruvec_page_state(target_lruvec, + WORKINGSET_ACTIVATE_ANON); + if (refaults != target_lruvec->refaults[0] || + inactive_is_low(target_lruvec, LRU_INACTIVE_ANON)) + sc->may_deactivate |= DEACTIVATE_ANON; + else + sc->may_deactivate &= ~DEACTIVATE_ANON; + + /* + * When refaults are being observed, it means a new + * workingset is being established. Deactivate to get + * rid of any stale active pages quickly. + */ + refaults = lruvec_page_state(target_lruvec, + WORKINGSET_ACTIVATE_FILE); + if (refaults != target_lruvec->refaults[1] || + inactive_is_low(target_lruvec, LRU_INACTIVE_FILE)) + sc->may_deactivate |= DEACTIVATE_FILE; + else + sc->may_deactivate &= ~DEACTIVATE_FILE; + } else + sc->may_deactivate = DEACTIVATE_ANON | DEACTIVATE_FILE; + + /* + * If we have plenty of inactive file pages that aren't + * thrashing, try to reclaim those first before touching + * anonymous pages. + */ + file = lruvec_page_state(target_lruvec, NR_INACTIVE_FILE); + if (file >> sc->priority && !(sc->may_deactivate & DEACTIVATE_FILE)) + sc->cache_trim_mode = 1; + else + sc->cache_trim_mode = 0; + + /* + * Prevent the reclaimer from falling into the cache trap: as + * cache pages start out inactive, every cache fault will tip + * the scan balance towards the file LRU. And as the file LRU + * shrinks, so does the window for rotation from references. + * This means we have a runaway feedback loop where a tiny + * thrashing file LRU becomes infinitely more attractive than + * anon pages. Try to detect this based on file LRU size. + */ + if (!cgroup_reclaim(sc)) { + unsigned long total_high_wmark = 0; + unsigned long free, anon; + int z; + + free = sum_zone_node_page_state(pgdat->node_id, NR_FREE_PAGES); + file = node_page_state(pgdat, NR_ACTIVE_FILE) + + node_page_state(pgdat, NR_INACTIVE_FILE); + + for (z = 0; z < MAX_NR_ZONES; z++) { + struct zone *zone = &pgdat->node_zones[z]; + + if (!managed_zone(zone)) + continue; + + total_high_wmark += high_wmark_pages(zone); + } + + /* + * Consider anon: if that's low too, this isn't a + * runaway file reclaim problem, but rather just + * extreme pressure. Reclaim as per usual then. + */ + anon = node_page_state(pgdat, NR_INACTIVE_ANON); + + sc->file_is_tiny = + file + free <= total_high_wmark && + !(sc->may_deactivate & DEACTIVATE_ANON) && + anon >> sc->priority; + } +} + /* * Determine how aggressively the anon and file LRU lists should be * scanned. The relative value of each set of LRU lists is determined @@ -2500,158 +2612,2793 @@ out: } } -static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) -{ - unsigned long nr[NR_LRU_LISTS]; - unsigned long targets[NR_LRU_LISTS]; - unsigned long nr_to_scan; - enum lru_list lru; - unsigned long nr_reclaimed = 0; - unsigned long nr_to_reclaim = sc->nr_to_reclaim; - struct blk_plug plug; - bool scan_adjusted; +#ifdef CONFIG_LRU_GEN - get_scan_count(lruvec, sc, nr); +#ifdef CONFIG_LRU_GEN_ENABLED +DEFINE_STATIC_KEY_ARRAY_TRUE(lru_gen_caps, NR_LRU_GEN_CAPS); +#else +DEFINE_STATIC_KEY_ARRAY_FALSE(lru_gen_caps, NR_LRU_GEN_CAPS); +#endif - /* Record the original scan target for proportional adjustments later */ - memcpy(targets, nr, sizeof(nr)); +/****************************************************************************** + * shorthand helpers + ******************************************************************************/ - /* - * Global reclaiming within direct reclaim at DEF_PRIORITY is a normal - * event that can occur when there is little memory pressure e.g. - * multiple streaming readers/writers. Hence, we do not abort scanning - * when the requested number of pages are reclaimed when scanning at - * DEF_PRIORITY on the assumption that the fact we are direct - * reclaiming implies that kswapd is not keeping up and it is best to - * do a batch of work at once. For memcg reclaim one check is made to - * abort proportional reclaim if either the file or anon lru has already - * dropped to zero at the first pass. - */ - scan_adjusted = (!cgroup_reclaim(sc) && !current_is_kswapd() && - sc->priority == DEF_PRIORITY); +#define DEFINE_MAX_SEQ(lruvec) \ + unsigned long max_seq = READ_ONCE((lruvec)->lrugen.max_seq) - blk_start_plug(&plug); - while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || - nr[LRU_INACTIVE_FILE]) { - unsigned long nr_anon, nr_file, percentage; - unsigned long nr_scanned; +#define DEFINE_MIN_SEQ(lruvec) \ + unsigned long min_seq[ANON_AND_FILE] = { \ + READ_ONCE((lruvec)->lrugen.min_seq[LRU_GEN_ANON]), \ + READ_ONCE((lruvec)->lrugen.min_seq[LRU_GEN_FILE]), \ + } - for_each_evictable_lru(lru) { - if (nr[lru]) { - nr_to_scan = min(nr[lru], SWAP_CLUSTER_MAX); - nr[lru] -= nr_to_scan; +#define for_each_gen_type_zone(gen, type, zone) \ + for ((gen) = 0; (gen) < MAX_NR_GENS; (gen)++) \ + for ((type) = 0; (type) < ANON_AND_FILE; (type)++) \ + for ((zone) = 0; (zone) < MAX_NR_ZONES; (zone)++) - nr_reclaimed += shrink_list(lru, nr_to_scan, - lruvec, sc); - } - } +static int page_lru_gen(struct page *page) +{ + unsigned long flags = READ_ONCE(page->flags); - cond_resched(); + return ((flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1; +} - if (nr_reclaimed < nr_to_reclaim || scan_adjusted) - continue; +static int page_lru_tier(struct page *page) +{ + int refs; + unsigned long flags = READ_ONCE(page->flags); - /* - * For kswapd and memcg, reclaim at least the number of pages - * requested. Ensure that the anon and file LRUs are scanned - * proportionally what was requested by get_scan_count(). We - * stop reclaiming one LRU and reduce the amount scanning - * proportional to the original scan target. - */ - nr_file = nr[LRU_INACTIVE_FILE] + nr[LRU_ACTIVE_FILE]; - nr_anon = nr[LRU_INACTIVE_ANON] + nr[LRU_ACTIVE_ANON]; + refs = (flags & LRU_REFS_FLAGS) == LRU_REFS_FLAGS ? + ((flags & LRU_REFS_MASK) >> LRU_REFS_PGOFF) + 1 : 0; - /* - * It's just vindictive to attack the larger once the smaller - * has gone to zero. And given the way we stop scanning the - * smaller below, this makes sure that we only make one nudge - * towards proportionality once we've got nr_to_reclaim. - */ - if (!nr_file || !nr_anon) - break; + return lru_tier_from_refs(refs); +} - if (nr_file > nr_anon) { - unsigned long scan_target = targets[LRU_INACTIVE_ANON] + - targets[LRU_ACTIVE_ANON] + 1; - lru = LRU_BASE; - percentage = nr_anon * 100 / scan_target; - } else { - unsigned long scan_target = targets[LRU_INACTIVE_FILE] + - targets[LRU_ACTIVE_FILE] + 1; - lru = LRU_FILE; - percentage = nr_file * 100 / scan_target; - } +static bool get_cap(int cap) +{ +#ifdef CONFIG_LRU_GEN_ENABLED + return static_branch_likely(&lru_gen_caps[cap]); +#else + return static_branch_unlikely(&lru_gen_caps[cap]); +#endif +} - /* Stop scanning the smaller of the LRU */ - nr[lru] = 0; - nr[lru + LRU_ACTIVE] = 0; +static struct lruvec *get_lruvec(struct mem_cgroup *memcg, int nid) +{ + struct pglist_data *pgdat = NODE_DATA(nid); - /* - * Recalculate the other LRU scan count based on its original - * scan target and the percentage scanning already complete - */ - lru = (lru == LRU_FILE) ? LRU_BASE : LRU_FILE; - nr_scanned = targets[lru] - nr[lru]; - nr[lru] = targets[lru] * (100 - percentage) / 100; - nr[lru] -= min(nr[lru], nr_scanned); +#ifdef CONFIG_MEMCG + if (memcg) { + struct lruvec *lruvec = &memcg->nodeinfo[nid]->lruvec; - lru += LRU_ACTIVE; - nr_scanned = targets[lru] - nr[lru]; - nr[lru] = targets[lru] * (100 - percentage) / 100; - nr[lru] -= min(nr[lru], nr_scanned); + /* for hotadd_new_pgdat() */ + if (!lruvec->pgdat) + lruvec->pgdat = pgdat; - scan_adjusted = true; + return lruvec; } - blk_finish_plug(&plug); - sc->nr_reclaimed += nr_reclaimed; +#endif + VM_BUG_ON(!mem_cgroup_disabled()); - /* - * Even if we did not try to evict anon pages at all, we want to - * rebalance the anon lru active/inactive ratio. - */ - if (total_swap_pages && inactive_is_low(lruvec, LRU_INACTIVE_ANON)) - shrink_active_list(SWAP_CLUSTER_MAX, lruvec, - sc, LRU_ACTIVE_ANON); + return pgdat ? &pgdat->__lruvec : NULL; } -/* Use reclaim/compaction for costly allocs or under memory pressure */ -static bool in_reclaim_compaction(struct scan_control *sc) +static int get_swappiness(struct lruvec *lruvec, struct scan_control *sc) { - if (IS_ENABLED(CONFIG_COMPACTION) && sc->order && - (sc->order > PAGE_ALLOC_COSTLY_ORDER || - sc->priority < DEF_PRIORITY - 2)) - return true; + struct mem_cgroup *memcg = lruvec_memcg(lruvec); - return false; + if (mem_cgroup_get_nr_swap_pages(memcg) < MIN_LRU_BATCH) + return 0; + + return mem_cgroup_swappiness(memcg); } -/* - * Reclaim/compaction is used for high-order allocation requests. It reclaims - * order-0 pages before compacting the zone. should_continue_reclaim() returns - * true if more pages should be reclaimed such that when the page allocator - * calls try_to_compact_pages() that it will have enough free pages to succeed. - * It will give up earlier than that if there is difficulty reclaiming pages. - */ -static inline bool should_continue_reclaim(struct pglist_data *pgdat, - unsigned long nr_reclaimed, - struct scan_control *sc) +static int get_nr_gens(struct lruvec *lruvec, int type) { - unsigned long pages_for_compaction; - unsigned long inactive_lru_pages; - int z; + return lruvec->lrugen.max_seq - lruvec->lrugen.min_seq[type] + 1; +} - /* If not in reclaim/compaction mode, stop */ - if (!in_reclaim_compaction(sc)) - return false; +static bool __maybe_unused seq_is_valid(struct lruvec *lruvec) +{ + /* see the comment on lru_gen_struct */ + return get_nr_gens(lruvec, LRU_GEN_FILE) >= MIN_NR_GENS && + get_nr_gens(lruvec, LRU_GEN_FILE) <= get_nr_gens(lruvec, LRU_GEN_ANON) && + get_nr_gens(lruvec, LRU_GEN_ANON) <= MAX_NR_GENS; +} - /* - * Stop if we failed to reclaim any pages from the last SWAP_CLUSTER_MAX - * number of pages that were scanned. This will return to the caller - * with the risk reclaim/compaction and the resulting allocation attempt - * fails. In the past we have tried harder for __GFP_RETRY_MAYFAIL - * allocations through requiring that the full LRU list has been scanned - * first, by assuming that zero delta of sc->nr_scanned means full LRU - * scan, but that approximation was wrong, and there were corner cases +/****************************************************************************** + * mm_struct list + ******************************************************************************/ + +static struct lru_gen_mm_list *get_mm_list(struct mem_cgroup *memcg) +{ + static struct lru_gen_mm_list mm_list = { + .fifo = LIST_HEAD_INIT(mm_list.fifo), + .lock = __SPIN_LOCK_UNLOCKED(mm_list.lock), + }; + +#ifdef CONFIG_MEMCG + if (memcg) + return &memcg->mm_list; +#endif + VM_BUG_ON(!mem_cgroup_disabled()); + + return &mm_list; +} + +void lru_gen_add_mm(struct mm_struct *mm) +{ + int nid; + struct mem_cgroup *memcg = get_mem_cgroup_from_mm(mm); + struct lru_gen_mm_list *mm_list = get_mm_list(memcg); + + VM_BUG_ON_MM(!list_empty(&mm->lru_gen.list), mm); +#ifdef CONFIG_MEMCG + VM_BUG_ON_MM(mm->lru_gen.memcg, mm); + mm->lru_gen.memcg = memcg; +#endif + spin_lock(&mm_list->lock); + + for_each_node_state(nid, N_MEMORY) { + struct lruvec *lruvec = get_lruvec(memcg, nid); + + if (!lruvec) + continue; + + if (lruvec->mm_state.tail == &mm_list->fifo) + lruvec->mm_state.tail = &mm->lru_gen.list; + } + + list_add_tail(&mm->lru_gen.list, &mm_list->fifo); + + spin_unlock(&mm_list->lock); +} + +void lru_gen_del_mm(struct mm_struct *mm) +{ + int nid; + struct lru_gen_mm_list *mm_list; + struct mem_cgroup *memcg = NULL; + + if (list_empty(&mm->lru_gen.list)) + return; + +#ifdef CONFIG_MEMCG + memcg = mm->lru_gen.memcg; +#endif + mm_list = get_mm_list(memcg); + + spin_lock(&mm_list->lock); + + for_each_node(nid) { + struct lruvec *lruvec = get_lruvec(memcg, nid); + + if (!lruvec) + continue; + + if (lruvec->mm_state.tail == &mm->lru_gen.list) + lruvec->mm_state.tail = lruvec->mm_state.tail->next; + + if (lruvec->mm_state.head != &mm->lru_gen.list) + continue; + + lruvec->mm_state.head = lruvec->mm_state.head->next; + if (lruvec->mm_state.head == &mm_list->fifo) + WRITE_ONCE(lruvec->mm_state.seq, lruvec->mm_state.seq + 1); + } + + list_del_init(&mm->lru_gen.list); + + spin_unlock(&mm_list->lock); + +#ifdef CONFIG_MEMCG + mem_cgroup_put(mm->lru_gen.memcg); + mm->lru_gen.memcg = NULL; +#endif +} + +#ifdef CONFIG_MEMCG +void lru_gen_migrate_mm(struct mm_struct *mm) +{ + struct mem_cgroup *memcg; + + lockdep_assert_held(&mm->owner->alloc_lock); + + /* for mm_update_next_owner() */ + if (mem_cgroup_disabled()) + return; + + rcu_read_lock(); + memcg = mem_cgroup_from_task(mm->owner); + rcu_read_unlock(); + if (memcg == mm->lru_gen.memcg) + return; + + VM_BUG_ON_MM(!mm->lru_gen.memcg, mm); + VM_BUG_ON_MM(list_empty(&mm->lru_gen.list), mm); + + lru_gen_del_mm(mm); + lru_gen_add_mm(mm); +} +#endif + +/* + * Bloom filters with m=1<<15, k=2 and the false positive rates of ~1/5 when + * n=10,000 and ~1/2 when n=20,000, where, conventionally, m is the number of + * bits in a bitmap, k is the number of hash functions and n is the number of + * inserted items. + * + * Page table walkers use one of the two filters to reduce their search space. + * To get rid of non-leaf entries that no longer have enough leaf entries, the + * aging uses the double-buffering technique to flip to the other filter each + * time it produces a new generation. For non-leaf entries that have enough + * leaf entries, the aging carries them over to the next generation in + * walk_pmd_range(); the eviction also report them when walking the rmap + * in lru_gen_look_around(). + * + * For future optimizations: + * 1. It's not necessary to keep both filters all the time. The spare one can be + * freed after the RCU grace period and reallocated if needed again. + * 2. And when reallocating, it's worth scaling its size according to the number + * of inserted entries in the other filter, to reduce the memory overhead on + * small systems and false positives on large systems. + * 3. Jenkins' hash function is an alternative to Knuth's. + */ +#define BLOOM_FILTER_SHIFT 15 + +static inline int filter_gen_from_seq(unsigned long seq) +{ + return seq % NR_BLOOM_FILTERS; +} + +static void get_item_key(void *item, int *key) +{ + u32 hash = hash_ptr(item, BLOOM_FILTER_SHIFT * 2); + + BUILD_BUG_ON(BLOOM_FILTER_SHIFT * 2 > BITS_PER_TYPE(u32)); + + key[0] = hash & (BIT(BLOOM_FILTER_SHIFT) - 1); + key[1] = hash >> BLOOM_FILTER_SHIFT; +} + +static void reset_bloom_filter(struct lruvec *lruvec, unsigned long seq) +{ + unsigned long *filter; + int gen = filter_gen_from_seq(seq); + + lockdep_assert_held(&get_mm_list(lruvec_memcg(lruvec))->lock); + + filter = lruvec->mm_state.filters[gen]; + if (filter) { + bitmap_clear(filter, 0, BIT(BLOOM_FILTER_SHIFT)); + return; + } + + filter = bitmap_zalloc(BIT(BLOOM_FILTER_SHIFT), GFP_ATOMIC); + WRITE_ONCE(lruvec->mm_state.filters[gen], filter); +} + +static void update_bloom_filter(struct lruvec *lruvec, unsigned long seq, void *item) +{ + int key[2]; + unsigned long *filter; + int gen = filter_gen_from_seq(seq); + + filter = READ_ONCE(lruvec->mm_state.filters[gen]); + if (!filter) + return; + + get_item_key(item, key); + + if (!test_bit(key[0], filter)) + set_bit(key[0], filter); + if (!test_bit(key[1], filter)) + set_bit(key[1], filter); +} + +static bool test_bloom_filter(struct lruvec *lruvec, unsigned long seq, void *item) +{ + int key[2]; + unsigned long *filter; + int gen = filter_gen_from_seq(seq); + + filter = READ_ONCE(lruvec->mm_state.filters[gen]); + if (!filter) + return true; + + get_item_key(item, key); + + return test_bit(key[0], filter) && test_bit(key[1], filter); +} + +static void reset_mm_stats(struct lruvec *lruvec, struct lru_gen_mm_walk *walk, bool last) +{ + int i; + int hist; + + lockdep_assert_held(&get_mm_list(lruvec_memcg(lruvec))->lock); + + if (walk) { + hist = lru_hist_from_seq(walk->max_seq); + + for (i = 0; i < NR_MM_STATS; i++) { + WRITE_ONCE(lruvec->mm_state.stats[hist][i], + lruvec->mm_state.stats[hist][i] + walk->mm_stats[i]); + walk->mm_stats[i] = 0; + } + } + + if (NR_HIST_GENS > 1 && last) { + hist = lru_hist_from_seq(lruvec->mm_state.seq + 1); + + for (i = 0; i < NR_MM_STATS; i++) + WRITE_ONCE(lruvec->mm_state.stats[hist][i], 0); + } +} + +static bool should_skip_mm(struct mm_struct *mm, struct lru_gen_mm_walk *walk) +{ + int type; + unsigned long size = 0; + struct pglist_data *pgdat = lruvec_pgdat(walk->lruvec); + + if (!walk->full_scan && cpumask_empty(mm_cpumask(mm)) && + !node_isset(pgdat->node_id, mm->lru_gen.nodes)) + return true; + + node_clear(pgdat->node_id, mm->lru_gen.nodes); + + for (type = !walk->can_swap; type < ANON_AND_FILE; type++) { + size += type ? get_mm_counter(mm, MM_FILEPAGES) : + get_mm_counter(mm, MM_ANONPAGES) + + get_mm_counter(mm, MM_SHMEMPAGES); + } + + if (size < MIN_LRU_BATCH) + return true; + + if (mm_is_oom_victim(mm)) + return true; + + return !mmget_not_zero(mm); +} + +static bool iterate_mm_list(struct lruvec *lruvec, struct lru_gen_mm_walk *walk, + struct mm_struct **iter) +{ + bool first = false; + bool last = true; + struct mm_struct *mm = NULL; + struct mem_cgroup *memcg = lruvec_memcg(lruvec); + struct lru_gen_mm_list *mm_list = get_mm_list(memcg); + struct lru_gen_mm_state *mm_state = &lruvec->mm_state; + + /* + * There are four interesting cases for this page table walker: + * 1. It tries to start a new iteration of mm_list with a stale max_seq; + * there is nothing to be done. + * 2. It's the first of the current generation, and it needs to reset + * the Bloom filter for the next generation. + * 3. It reaches the end of mm_list, and it needs to increment + * mm_state->seq; the iteration is done. + * 4. It's the last of the current generation, and it needs to reset the + * mm stats counters for the next generation. + */ + if (*iter) + mmput_async(*iter); + else if (walk->max_seq <= READ_ONCE(mm_state->seq)) + return false; + + spin_lock(&mm_list->lock); + + VM_BUG_ON(mm_state->seq + 1 < walk->max_seq); + VM_BUG_ON(*iter && mm_state->seq > walk->max_seq); + VM_BUG_ON(*iter && !mm_state->nr_walkers); + + if (walk->max_seq <= mm_state->seq) { + if (!*iter) + last = false; + goto done; + } + + if (!mm_state->nr_walkers) { + VM_BUG_ON(mm_state->head && mm_state->head != &mm_list->fifo); + + mm_state->head = mm_list->fifo.next; + first = true; + } + + while (!mm && mm_state->head != &mm_list->fifo) { + mm = list_entry(mm_state->head, struct mm_struct, lru_gen.list); + + mm_state->head = mm_state->head->next; + + /* full scan for those added after the last iteration */ + if (!mm_state->tail || mm_state->tail == &mm->lru_gen.list) { + mm_state->tail = mm_state->head; + walk->full_scan = true; + } + + if (should_skip_mm(mm, walk)) + mm = NULL; + } + + if (mm_state->head == &mm_list->fifo) + WRITE_ONCE(mm_state->seq, mm_state->seq + 1); +done: + if (*iter && !mm) + mm_state->nr_walkers--; + if (!*iter && mm) + mm_state->nr_walkers++; + + if (mm_state->nr_walkers) + last = false; + + if (mm && first) + reset_bloom_filter(lruvec, walk->max_seq + 1); + + if (*iter || last) + reset_mm_stats(lruvec, walk, last); + + spin_unlock(&mm_list->lock); + + *iter = mm; + + return last; +} + +static bool iterate_mm_list_nowalk(struct lruvec *lruvec, unsigned long max_seq) +{ + bool success = false; + struct mem_cgroup *memcg = lruvec_memcg(lruvec); + struct lru_gen_mm_list *mm_list = get_mm_list(memcg); + struct lru_gen_mm_state *mm_state = &lruvec->mm_state; + + if (max_seq <= READ_ONCE(mm_state->seq)) + return false; + + spin_lock(&mm_list->lock); + + VM_BUG_ON(mm_state->seq + 1 < max_seq); + + if (max_seq > mm_state->seq && !mm_state->nr_walkers) { + VM_BUG_ON(mm_state->head && mm_state->head != &mm_list->fifo); + + WRITE_ONCE(mm_state->seq, mm_state->seq + 1); + reset_mm_stats(lruvec, NULL, true); + success = true; + } + + spin_unlock(&mm_list->lock); + + return success; +} + +/****************************************************************************** + * refault feedback loop + ******************************************************************************/ + +/* + * A feedback loop based on Proportional-Integral-Derivative (PID) controller. + * + * The P term is refaulted/(evicted+protected) from a tier in the generation + * currently being evicted; the I term is the exponential moving average of the + * P term over the generations previously evicted, using the smoothing factor + * 1/2; the D term isn't supported. + * + * The setpoint (SP) is always the first tier of one type; the process variable + * (PV) is either any tier of the other type or any other tier of the same + * type. + * + * The error is the difference between the SP and the PV; the correction is + * turn off protection when SP>PV or turn on protection when SPlrugen; + int hist = lru_hist_from_seq(lrugen->min_seq[type]); + + pos->refaulted = lrugen->avg_refaulted[type][tier] + + atomic_long_read(&lrugen->refaulted[hist][type][tier]); + pos->total = lrugen->avg_total[type][tier] + + atomic_long_read(&lrugen->evicted[hist][type][tier]); + if (tier) + pos->total += lrugen->protected[hist][type][tier - 1]; + pos->gain = gain; +} + +static void reset_ctrl_pos(struct lruvec *lruvec, int type, bool carryover) +{ + int hist, tier; + struct lru_gen_struct *lrugen = &lruvec->lrugen; + bool clear = carryover ? NR_HIST_GENS == 1 : NR_HIST_GENS > 1; + unsigned long seq = carryover ? lrugen->min_seq[type] : lrugen->max_seq + 1; + + lockdep_assert_held(&lruvec_pgdat(lruvec)->lru_lock); + + if (!carryover && !clear) + return; + + hist = lru_hist_from_seq(seq); + + for (tier = 0; tier < MAX_NR_TIERS; tier++) { + if (carryover) { + unsigned long sum; + + sum = lrugen->avg_refaulted[type][tier] + + atomic_long_read(&lrugen->refaulted[hist][type][tier]); + WRITE_ONCE(lrugen->avg_refaulted[type][tier], sum / 2); + + sum = lrugen->avg_total[type][tier] + + atomic_long_read(&lrugen->evicted[hist][type][tier]); + if (tier) + sum += lrugen->protected[hist][type][tier - 1]; + WRITE_ONCE(lrugen->avg_total[type][tier], sum / 2); + } + + if (clear) { + atomic_long_set(&lrugen->refaulted[hist][type][tier], 0); + atomic_long_set(&lrugen->evicted[hist][type][tier], 0); + if (tier) + WRITE_ONCE(lrugen->protected[hist][type][tier - 1], 0); + } + } +} + +static bool positive_ctrl_err(struct ctrl_pos *sp, struct ctrl_pos *pv) +{ + /* + * Return true if the PV has a limited number of refaults or a lower + * refaulted/total than the SP. + */ + return pv->refaulted < MIN_LRU_BATCH || + pv->refaulted * (sp->total + MIN_LRU_BATCH) * sp->gain <= + (sp->refaulted + 1) * pv->total * pv->gain; +} + +/****************************************************************************** + * the aging + ******************************************************************************/ + +static int page_update_gen(struct page *page, int gen) +{ + unsigned long old_flags, new_flags; + + VM_BUG_ON(gen >= MAX_NR_GENS); + VM_BUG_ON(!rcu_read_lock_held()); + + do { + new_flags = old_flags = READ_ONCE(page->flags); + + /* for shrink_page_list() */ + if (!(new_flags & LRU_GEN_MASK)) { + new_flags |= BIT(PG_referenced); + continue; + } + + new_flags &= ~LRU_GEN_MASK; + new_flags |= (gen + 1UL) << LRU_GEN_PGOFF; + new_flags &= ~(LRU_REFS_MASK | LRU_REFS_FLAGS); + } while (new_flags != old_flags && + cmpxchg(&page->flags, old_flags, new_flags) != old_flags); + + return ((old_flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1; +} + +static int page_inc_gen(struct lruvec *lruvec, struct page *page, bool reclaiming) +{ + unsigned long old_flags, new_flags; + int type = page_is_file_lru(page); + struct lru_gen_struct *lrugen = &lruvec->lrugen; + int new_gen, old_gen = lru_gen_from_seq(lrugen->min_seq[type]); + + do { + new_flags = old_flags = READ_ONCE(page->flags); + VM_BUG_ON_PAGE(!(new_flags & LRU_GEN_MASK), page); + + new_gen = ((new_flags & LRU_GEN_MASK) >> LRU_GEN_PGOFF) - 1; + /* page_update_gen() has promoted this page? */ + if (new_gen >= 0 && new_gen != old_gen) + return new_gen; + + new_gen = (old_gen + 1) % MAX_NR_GENS; + + new_flags &= ~LRU_GEN_MASK; + new_flags |= (new_gen + 1UL) << LRU_GEN_PGOFF; + new_flags &= ~(LRU_REFS_MASK | LRU_REFS_FLAGS); + /* for end_page_writeback() */ + if (reclaiming) + new_flags |= BIT(PG_reclaim); + } while (cmpxchg(&page->flags, old_flags, new_flags) != old_flags); + + lru_gen_update_size(lruvec, page, old_gen, new_gen); + + return new_gen; +} + +static void update_batch_size(struct lru_gen_mm_walk *walk, struct page *page, + int old_gen, int new_gen) +{ + int type = page_is_file_lru(page); + int zone = page_zonenum(page); + int delta = thp_nr_pages(page); + + VM_BUG_ON(old_gen >= MAX_NR_GENS); + VM_BUG_ON(new_gen >= MAX_NR_GENS); + + walk->batched++; + + walk->nr_pages[old_gen][type][zone] -= delta; + walk->nr_pages[new_gen][type][zone] += delta; +} + +static void reset_batch_size(struct lruvec *lruvec, struct lru_gen_mm_walk *walk) +{ + int gen, type, zone; + struct lru_gen_struct *lrugen = &lruvec->lrugen; + + walk->batched = 0; + + for_each_gen_type_zone(gen, type, zone) { + enum lru_list lru = type * LRU_INACTIVE_FILE; + int delta = walk->nr_pages[gen][type][zone]; + + if (!delta) + continue; + + walk->nr_pages[gen][type][zone] = 0; + WRITE_ONCE(lrugen->nr_pages[gen][type][zone], + lrugen->nr_pages[gen][type][zone] + delta); + + if (lru_gen_is_active(lruvec, gen)) + lru += LRU_ACTIVE; + __update_lru_size(lruvec, lru, zone, delta); + } +} + +static int should_skip_vma(unsigned long start, unsigned long end, struct mm_walk *walk) +{ + struct address_space *mapping; + struct vm_area_struct *vma = walk->vma; + struct lru_gen_mm_walk *priv = walk->private; + + if (!vma_is_accessible(vma) || is_vm_hugetlb_page(vma) || + (vma->vm_flags & (VM_LOCKED | VM_SPECIAL | VM_SEQ_READ | VM_RAND_READ)) || + vma == get_gate_vma(vma->vm_mm)) + return true; + + if (vma_is_anonymous(vma)) + return !priv->can_swap; + + if (WARN_ON_ONCE(!vma->vm_file || !vma->vm_file->f_mapping)) + return true; + + mapping = vma->vm_file->f_mapping; + if (mapping_unevictable(mapping)) + return true; + + /* check readpage to exclude special mappings like dax, etc. */ + return shmem_mapping(mapping) ? !priv->can_swap : !mapping->a_ops->readpage; +} + +/* + * Some userspace memory allocators map many single-page VMAs. Instead of + * returning back to the PGD table for each of such VMAs, finish an entire PMD + * table to reduce zigzags and improve cache performance. + */ +static bool get_next_vma(struct mm_walk *walk, unsigned long mask, unsigned long size, + unsigned long *start, unsigned long *end) +{ + unsigned long next = round_up(*end, size); + + VM_BUG_ON(mask & size); + VM_BUG_ON(*start >= *end); + VM_BUG_ON((next & mask) != (*start & mask)); + + while (walk->vma) { + if (next >= walk->vma->vm_end) { + walk->vma = walk->vma->vm_next; + continue; + } + + if ((next & mask) != (walk->vma->vm_start & mask)) + return false; + + if (should_skip_vma(walk->vma->vm_start, walk->vma->vm_end, walk)) { + walk->vma = walk->vma->vm_next; + continue; + } + + *start = max(next, walk->vma->vm_start); + next = (next | ~mask) + 1; + /* rounded-up boundaries can wrap to 0 */ + *end = next && next < walk->vma->vm_end ? next : walk->vma->vm_end; + + return true; + } + + return false; +} + +static bool suitable_to_scan(int total, int young) +{ + int n = clamp_t(int, cache_line_size() / sizeof(pte_t), 2, 8); + + /* suitable if the average number of young PTEs per cacheline is >=1 */ + return young * n >= total; +} + +static bool walk_pte_range(pmd_t *pmd, unsigned long start, unsigned long end, + struct mm_walk *walk) +{ + int i; + pte_t *pte; + spinlock_t *ptl; + unsigned long addr; + int total = 0; + int young = 0; + struct lru_gen_mm_walk *priv = walk->private; + struct mem_cgroup *memcg = lruvec_memcg(priv->lruvec); + struct pglist_data *pgdat = lruvec_pgdat(priv->lruvec); + int old_gen, new_gen = lru_gen_from_seq(priv->max_seq); + + VM_BUG_ON(pmd_leaf(*pmd)); + + ptl = pte_lockptr(walk->mm, pmd); + if (!spin_trylock(ptl)) + return false; + + arch_enter_lazy_mmu_mode(); + + pte = pte_offset_map(pmd, start & PMD_MASK); +restart: + for (i = pte_index(start), addr = start; addr != end; i++, addr += PAGE_SIZE) { + struct page *page; + unsigned long pfn = pte_pfn(pte[i]); + + VM_BUG_ON(addr < walk->vma->vm_start || addr >= walk->vma->vm_end); + + total++; + priv->mm_stats[MM_PTE_TOTAL]++; + + if (!pte_present(pte[i]) || is_zero_pfn(pfn)) + continue; + + if (WARN_ON_ONCE(pte_devmap(pte[i]) || pte_special(pte[i]))) + continue; + + if (!pte_young(pte[i])) { + priv->mm_stats[MM_PTE_OLD]++; + continue; + } + + VM_BUG_ON(!pfn_valid(pfn)); + if (pfn < pgdat->node_start_pfn || pfn >= pgdat_end_pfn(pgdat)) + continue; + + page = compound_head(pfn_to_page(pfn)); + if (page_to_nid(page) != pgdat->node_id) + continue; + + if (page_memcg_rcu(page) != memcg) + continue; + + if (!ptep_test_and_clear_young(walk->vma, addr, pte + i)) + continue; + + young++; + priv->mm_stats[MM_PTE_YOUNG]++; + + if (pte_dirty(pte[i]) && !PageDirty(page) && + !(PageAnon(page) && PageSwapBacked(page) && !PageSwapCache(page))) + set_page_dirty(page); + + old_gen = page_update_gen(page, new_gen); + if (old_gen >= 0 && old_gen != new_gen) + update_batch_size(priv, page, old_gen, new_gen); + } + + if (i < PTRS_PER_PTE && get_next_vma(walk, PMD_MASK, PAGE_SIZE, &start, &end)) + goto restart; + + pte_unmap(pte); + + arch_leave_lazy_mmu_mode(); + spin_unlock(ptl); + + return suitable_to_scan(total, young); +} + +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) +static void walk_pmd_range_locked(pud_t *pud, unsigned long next, struct vm_area_struct *vma, + struct mm_walk *walk, unsigned long *start) +{ + int i; + pmd_t *pmd; + spinlock_t *ptl; + struct lru_gen_mm_walk *priv = walk->private; + struct mem_cgroup *memcg = lruvec_memcg(priv->lruvec); + struct pglist_data *pgdat = lruvec_pgdat(priv->lruvec); + int old_gen, new_gen = lru_gen_from_seq(priv->max_seq); + + VM_BUG_ON(pud_leaf(*pud)); + + /* try to batch at most 1+MIN_LRU_BATCH+1 entries */ + if (*start == -1) { + *start = next; + return; + } + + i = next == -1 ? 0 : pmd_index(next) - pmd_index(*start); + if (i && i <= MIN_LRU_BATCH) { + __set_bit(i - 1, priv->bitmap); + return; + } + + pmd = pmd_offset(pud, *start); + + ptl = pmd_lockptr(walk->mm, pmd); + if (!spin_trylock(ptl)) + goto done; + + arch_enter_lazy_mmu_mode(); + + do { + struct page *page; + unsigned long pfn = pmd_pfn(pmd[i]); + unsigned long addr = i ? (*start & PMD_MASK) + i * PMD_SIZE : *start; + + VM_BUG_ON(addr < vma->vm_start || addr >= vma->vm_end); + + if (!pmd_present(pmd[i]) || is_huge_zero_pmd(pmd[i])) + goto next; + + if (WARN_ON_ONCE(pmd_devmap(pmd[i]))) + goto next; + + if (!pmd_trans_huge(pmd[i])) { + if (IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) && + get_cap(LRU_GEN_NONLEAF_YOUNG)) + pmdp_test_and_clear_young(vma, addr, pmd + i); + goto next; + } + + VM_BUG_ON(!pfn_valid(pfn)); + if (pfn < pgdat->node_start_pfn || pfn >= pgdat_end_pfn(pgdat)) + goto next; + + page = pfn_to_page(pfn); + VM_BUG_ON_PAGE(PageTail(page), page); + if (page_to_nid(page) != pgdat->node_id) + goto next; + + if (page_memcg_rcu(page) != memcg) + goto next; + + if (!pmdp_test_and_clear_young(vma, addr, pmd + i)) + goto next; + + priv->mm_stats[MM_PTE_YOUNG]++; + + if (pmd_dirty(pmd[i]) && !PageDirty(page) && + !(PageAnon(page) && PageSwapBacked(page) && !PageSwapCache(page))) + set_page_dirty(page); + + old_gen = page_update_gen(page, new_gen); + if (old_gen >= 0 && old_gen != new_gen) + update_batch_size(priv, page, old_gen, new_gen); +next: + i = i > MIN_LRU_BATCH ? 0 : + find_next_bit(priv->bitmap, MIN_LRU_BATCH, i) + 1; + } while (i <= MIN_LRU_BATCH); + + arch_leave_lazy_mmu_mode(); + spin_unlock(ptl); +done: + *start = -1; + bitmap_zero(priv->bitmap, MIN_LRU_BATCH); +} +#else +static void walk_pmd_range_locked(pud_t *pud, unsigned long next, struct vm_area_struct *vma, + struct mm_walk *walk, unsigned long *start) +{ +} +#endif + +static void walk_pmd_range(pud_t *pud, unsigned long start, unsigned long end, + struct mm_walk *walk) +{ + int i; + pmd_t *pmd; + unsigned long next; + unsigned long addr; + struct vm_area_struct *vma; + unsigned long pos = -1; + struct lru_gen_mm_walk *priv = walk->private; + + VM_BUG_ON(pud_leaf(*pud)); + + /* + * Finish an entire PMD in two passes: the first only reaches to PTE + * tables to avoid taking the PMD lock; the second, if necessary, takes + * the PMD lock to clear the accessed bit in PMD entries. + */ + pmd = pmd_offset(pud, start & PUD_MASK); +restart: + /* walk_pte_range() may call get_next_vma() */ + vma = walk->vma; + for (i = pmd_index(start), addr = start; addr != end; i++, addr = next) { + pmd_t val = pmd_read_atomic(pmd + i); + + /* for pmd_read_atomic() */ + barrier(); + + next = pmd_addr_end(addr, end); + + if (!pmd_present(val)) { + priv->mm_stats[MM_PTE_TOTAL]++; + continue; + } + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + if (pmd_trans_huge(val)) { + unsigned long pfn = pmd_pfn(val); + struct pglist_data *pgdat = lruvec_pgdat(priv->lruvec); + + priv->mm_stats[MM_PTE_TOTAL]++; + + if (is_huge_zero_pmd(val)) + continue; + + if (!pmd_young(val)) { + priv->mm_stats[MM_PTE_OLD]++; + continue; + } + + if (pfn < pgdat->node_start_pfn || pfn >= pgdat_end_pfn(pgdat)) + continue; + + walk_pmd_range_locked(pud, addr, vma, walk, &pos); + continue; + } +#endif + priv->mm_stats[MM_PMD_TOTAL]++; + +#ifdef CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG + if (get_cap(LRU_GEN_NONLEAF_YOUNG)) { + if (!pmd_young(val)) + continue; + + walk_pmd_range_locked(pud, addr, vma, walk, &pos); + } +#endif + if (!priv->full_scan && !test_bloom_filter(priv->lruvec, priv->max_seq, pmd + i)) + continue; + + priv->mm_stats[MM_PMD_FOUND]++; + + if (!walk_pte_range(&val, addr, next, walk)) + continue; + + priv->mm_stats[MM_PMD_ADDED]++; + + /* carry over to the next generation */ + update_bloom_filter(priv->lruvec, priv->max_seq + 1, pmd + i); + } + + walk_pmd_range_locked(pud, -1, vma, walk, &pos); + + if (i < PTRS_PER_PMD && get_next_vma(walk, PUD_MASK, PMD_SIZE, &start, &end)) + goto restart; +} + +static int walk_pud_range(p4d_t *p4d, unsigned long start, unsigned long end, + struct mm_walk *walk) +{ + int i; + pud_t *pud; + unsigned long addr; + unsigned long next; + struct lru_gen_mm_walk *priv = walk->private; + + VM_BUG_ON(p4d_leaf(*p4d)); + + pud = pud_offset(p4d, start & P4D_MASK); +restart: + for (i = pud_index(start), addr = start; addr != end; i++, addr = next) { + pud_t val = READ_ONCE(pud[i]); + + next = pud_addr_end(addr, end); + + if (!pud_present(val) || WARN_ON_ONCE(pud_leaf(val))) + continue; + + walk_pmd_range(&val, addr, next, walk); + + if (priv->batched >= MAX_LRU_BATCH) { + end = (addr | ~PUD_MASK) + 1; + goto done; + } + } + + if (i < PTRS_PER_PUD && get_next_vma(walk, P4D_MASK, PUD_SIZE, &start, &end)) + goto restart; + + end = round_up(end, P4D_SIZE); +done: + /* rounded-up boundaries can wrap to 0 */ + priv->next_addr = end && walk->vma ? max(end, walk->vma->vm_start) : 0; + + return -EAGAIN; +} + +static void walk_mm(struct lruvec *lruvec, struct mm_struct *mm, struct lru_gen_mm_walk *walk) +{ + static const struct mm_walk_ops mm_walk_ops = { + .test_walk = should_skip_vma, + .p4d_entry = walk_pud_range, + }; + + int err; + struct mem_cgroup *memcg = lruvec_memcg(lruvec); + struct pglist_data *pgdat = lruvec_pgdat(lruvec); + + walk->next_addr = FIRST_USER_ADDRESS; + + do { + err = -EBUSY; + + /* page_update_gen() requires stable page_memcg() */ + if (!mem_cgroup_trylock_pages(memcg)) + break; + + /* the caller might be holding the lock for write */ + if (mmap_read_trylock(mm)) { + unsigned long start = walk->next_addr; + unsigned long end = mm->highest_vm_end; + + err = walk_page_range(mm, start, end, &mm_walk_ops, walk); + + mmap_read_unlock(mm); + + if (walk->batched) { + spin_lock_irq(&pgdat->lru_lock); + reset_batch_size(lruvec, walk); + spin_unlock_irq(&pgdat->lru_lock); + } + } + + mem_cgroup_unlock_pages(); + + cond_resched(); + } while (err == -EAGAIN && walk->next_addr && !mm_is_oom_victim(mm)); +} + +static struct lru_gen_mm_walk *alloc_mm_walk(void) +{ + if (current->reclaim_state && current->reclaim_state->mm_walk) + return current->reclaim_state->mm_walk; + + return kzalloc(sizeof(struct lru_gen_mm_walk), + __GFP_HIGH | __GFP_NOMEMALLOC | __GFP_NOWARN); +} + +static void free_mm_walk(struct lru_gen_mm_walk *walk) +{ + if (!current->reclaim_state || !current->reclaim_state->mm_walk) + kfree(walk); +} + +static void inc_min_seq(struct lruvec *lruvec) +{ + int type; + struct lru_gen_struct *lrugen = &lruvec->lrugen; + + VM_BUG_ON(!seq_is_valid(lruvec)); + + for (type = 0; type < ANON_AND_FILE; type++) { + if (get_nr_gens(lruvec, type) != MAX_NR_GENS) + continue; + + reset_ctrl_pos(lruvec, type, true); + WRITE_ONCE(lrugen->min_seq[type], lrugen->min_seq[type] + 1); + } +} + +static bool try_to_inc_min_seq(struct lruvec *lruvec, bool can_swap) +{ + int gen, type, zone; + bool success = false; + struct lru_gen_struct *lrugen = &lruvec->lrugen; + DEFINE_MIN_SEQ(lruvec); + + VM_BUG_ON(!seq_is_valid(lruvec)); + + for (type = !can_swap; type < ANON_AND_FILE; type++) { + while (min_seq[type] + MIN_NR_GENS <= lrugen->max_seq) { + gen = lru_gen_from_seq(min_seq[type]); + + for (zone = 0; zone < MAX_NR_ZONES; zone++) { + if (!list_empty(&lrugen->lists[gen][type][zone])) + goto next; + } + + min_seq[type]++; + } +next: + ; + } + + /* see the comment on lru_gen_struct */ + if (can_swap) { + min_seq[LRU_GEN_ANON] = min(min_seq[LRU_GEN_ANON], min_seq[LRU_GEN_FILE]); + min_seq[LRU_GEN_FILE] = max(min_seq[LRU_GEN_ANON], lrugen->min_seq[LRU_GEN_FILE]); + } + + for (type = !can_swap; type < ANON_AND_FILE; type++) { + if (min_seq[type] == lrugen->min_seq[type]) + continue; + + reset_ctrl_pos(lruvec, type, true); + WRITE_ONCE(lrugen->min_seq[type], min_seq[type]); + success = true; + } + + return success; +} + +static void inc_max_seq(struct lruvec *lruvec) +{ + int prev, next; + int type, zone; + struct lru_gen_struct *lrugen = &lruvec->lrugen; + struct pglist_data *pgdat = lruvec_pgdat(lruvec); + + spin_lock_irq(&pgdat->lru_lock); + + VM_BUG_ON(!seq_is_valid(lruvec)); + + inc_min_seq(lruvec); + + /* + * Update the active/inactive LRU sizes for compatibility. Both sides of + * the current max_seq need to be covered, since max_seq+1 can overlap + * with min_seq[LRU_GEN_ANON] if swapping is constrained. And if they do + * overlap, cold/hot inversion happens. This can be solved by moving + * pages from min_seq to min_seq+1 but is omitted for simplicity. + */ + prev = lru_gen_from_seq(lrugen->max_seq - 1); + next = lru_gen_from_seq(lrugen->max_seq + 1); + + for (type = 0; type < ANON_AND_FILE; type++) { + for (zone = 0; zone < MAX_NR_ZONES; zone++) { + enum lru_list lru = type * LRU_INACTIVE_FILE; + long delta = lrugen->nr_pages[prev][type][zone] - + lrugen->nr_pages[next][type][zone]; + + if (!delta) + continue; + + WARN_ON_ONCE(delta != (int)delta); + + __update_lru_size(lruvec, lru, zone, delta); + __update_lru_size(lruvec, lru + LRU_ACTIVE, zone, -delta); + } + } + + for (type = 0; type < ANON_AND_FILE; type++) + reset_ctrl_pos(lruvec, type, false); + + WRITE_ONCE(lrugen->timestamps[next], jiffies); + /* make sure preceding modifications appear */ + smp_store_release(&lrugen->max_seq, lrugen->max_seq + 1); + + spin_unlock_irq(&pgdat->lru_lock); +} + +static bool try_to_inc_max_seq(struct lruvec *lruvec, unsigned long max_seq, + struct scan_control *sc, bool can_swap, bool full_scan) +{ + bool success; + struct lru_gen_mm_walk *walk; + struct mm_struct *mm = NULL; + struct lru_gen_struct *lrugen = &lruvec->lrugen; + + VM_BUG_ON(max_seq > READ_ONCE(lrugen->max_seq)); + + /* + * If the hardware doesn't automatically set the accessed bit, fallback + * to lru_gen_look_around(), which only clears the accessed bit in a + * handful of PTEs. Spreading the work out over a period of time usually + * is less efficient, but it avoids bursty page faults. + */ + if (!full_scan && (!arch_has_hw_pte_young() || !get_cap(LRU_GEN_MM_WALK))) { + success = iterate_mm_list_nowalk(lruvec, max_seq); + goto done; + } + + walk = alloc_mm_walk(); + if (!walk) { + success = iterate_mm_list_nowalk(lruvec, max_seq); + goto done; + } + + walk->lruvec = lruvec; + walk->max_seq = max_seq; + walk->can_swap = can_swap; + walk->full_scan = full_scan; + + do { + success = iterate_mm_list(lruvec, walk, &mm); + if (mm) + walk_mm(lruvec, mm, walk); + + cond_resched(); + } while (mm); + + free_mm_walk(walk); +done: + if (!success) { + if (!current_is_kswapd() && !sc->priority) + wait_event_killable(lruvec->mm_state.wait, + max_seq < READ_ONCE(lrugen->max_seq)); + + return max_seq < READ_ONCE(lrugen->max_seq); + } + + VM_BUG_ON(max_seq != READ_ONCE(lrugen->max_seq)); + + inc_max_seq(lruvec); + /* either this sees any waiters or they will see updated max_seq */ + if (wq_has_sleeper(&lruvec->mm_state.wait)) + wake_up_all(&lruvec->mm_state.wait); + + wakeup_flusher_threads(WB_REASON_VMSCAN); + + return true; +} + +static long get_nr_evictable(struct lruvec *lruvec, unsigned long max_seq, + unsigned long *min_seq, bool can_swap, bool *need_aging) +{ + int gen, type, zone; + long old = 0; + long young = 0; + long total = 0; + struct lru_gen_struct *lrugen = &lruvec->lrugen; + + for (type = !can_swap; type < ANON_AND_FILE; type++) { + unsigned long seq; + + for (seq = min_seq[type]; seq <= max_seq; seq++) { + long size = 0; + + gen = lru_gen_from_seq(seq); + + for (zone = 0; zone < MAX_NR_ZONES; zone++) + size += READ_ONCE(lrugen->nr_pages[gen][type][zone]); + + total += size; + if (seq == max_seq) + young += size; + if (seq + MIN_NR_GENS == max_seq) + old += size; + } + } + + /* + * The aging and the eviction is a typical producer-consumer model. The + * aging tries to be lazy to reduce the unnecessary overhead. On the + * other hand, the eviction stalls when the number of generations + * reaches MIN_NR_GENS. So ideally, there should be MIN_NR_GENS+1 + * generations, hence the first two if's. + * + * In addition, it's ideal to spread pages out evenly, meaning + * 1/(MIN_NR_GENS+1) of the total number of pages for each generation. A + * reasonable range for this average portion would [1/MIN_NR_GENS, + * 1/(MIN_NR_GENS+2)]. From the consumer's POV, the eviction only cares + * about the lower bound of cold pages, i.e., 1/(MIN_NR_GENS+2), whereas + * from the producer's POV, the aging only cares about the upper bound + * of hot pages, i.e., 1/MIN_NR_GENS. + */ + if (min_seq[LRU_GEN_FILE] + MIN_NR_GENS > max_seq) + *need_aging = true; + else if (min_seq[LRU_GEN_FILE] + MIN_NR_GENS < max_seq) + *need_aging = false; + else if (young * MIN_NR_GENS > total) + *need_aging = true; + else if (old * (MIN_NR_GENS + 2) < total) + *need_aging = true; + else + *need_aging = false; + + return total > 0 ? total : 0; +} + +static bool age_lruvec(struct lruvec *lruvec, struct scan_control *sc, + unsigned long min_ttl) +{ + bool need_aging; + long nr_to_scan; + int swappiness = get_swappiness(lruvec, sc); + struct mem_cgroup *memcg = lruvec_memcg(lruvec); + DEFINE_MAX_SEQ(lruvec); + DEFINE_MIN_SEQ(lruvec); + + if (min_ttl) { + int gen = lru_gen_from_seq(min_seq[LRU_GEN_FILE]); + unsigned long birth = READ_ONCE(lruvec->lrugen.timestamps[gen]); + + if (time_is_after_jiffies(birth + min_ttl)) + return false; + } + + mem_cgroup_calculate_protection(NULL, memcg); + + if (mem_cgroup_below_min(memcg)) + return false; + + nr_to_scan = get_nr_evictable(lruvec, max_seq, min_seq, swappiness, &need_aging); + if (!nr_to_scan) + return false; + + nr_to_scan >>= sc->priority; + + if (!mem_cgroup_online(memcg)) + nr_to_scan++; + + if (nr_to_scan && need_aging && (!mem_cgroup_below_low(memcg) || sc->memcg_low_reclaim)) + try_to_inc_max_seq(lruvec, max_seq, sc, swappiness, false); + + return true; +} + +/* to protect the working set of the last N jiffies */ +static unsigned long lru_gen_min_ttl __read_mostly; + +static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc) +{ + struct mem_cgroup *memcg; + bool success = false; + unsigned long min_ttl = READ_ONCE(lru_gen_min_ttl); + + VM_BUG_ON(!current_is_kswapd()); + + /* + * To reduce the chance of going into the aging path or swapping, which + * can be costly, optimistically skip them unless their corresponding + * flags were cleared in the eviction path. This improves the overall + * performance when multiple memcgs are available. + */ + if (!sc->memcgs_need_aging) { + sc->memcgs_need_aging = true; + sc->memcgs_avoid_swapping = !sc->memcgs_need_swapping; + sc->memcgs_need_swapping = true; + return; + } + + sc->memcgs_need_swapping = true; + sc->memcgs_avoid_swapping = true; + + current->reclaim_state->mm_walk = &pgdat->mm_walk; + + memcg = mem_cgroup_iter(NULL, NULL, NULL); + do { + struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat); + + if (age_lruvec(lruvec, sc, min_ttl)) + success = true; + + cond_resched(); + } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL))); + + current->reclaim_state->mm_walk = NULL; + + /* + * The main goal is to OOM kill if every generation from all memcgs is + * younger than min_ttl. However, another theoretical possibility is all + * memcgs are either below min or empty. + */ + if (!success && mutex_trylock(&oom_lock)) { + struct oom_control oc = { + .gfp_mask = sc->gfp_mask, + .order = sc->order, + }; + + out_of_memory(&oc); + + mutex_unlock(&oom_lock); + } +} + +/* + * This function exploits spatial locality when shrink_page_list() walks the + * rmap. It scans the adjacent PTEs of a young PTE and promotes hot pages. + * If the scan was done cacheline efficiently, it adds the PMD entry pointing + * to the PTE table to the Bloom filter. This process is a feedback loop from + * the eviction to the aging. + */ +void lru_gen_look_around(struct page_vma_mapped_walk *pvmw) +{ + int i; + pte_t *pte; + unsigned long start; + unsigned long end; + unsigned long addr; + struct page *page; + struct lru_gen_mm_walk *walk; + int young = 0; + unsigned long bitmap[BITS_TO_LONGS(MIN_LRU_BATCH)] = {}; + struct mem_cgroup *memcg = page_memcg(pvmw->page); + struct pglist_data *pgdat = page_pgdat(pvmw->page); + struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat); + DEFINE_MAX_SEQ(lruvec); + int old_gen, new_gen = lru_gen_from_seq(max_seq); + + lockdep_assert_held(pvmw->ptl); + VM_BUG_ON_PAGE(PageLRU(pvmw->page), pvmw->page); + + if (spin_is_contended(pvmw->ptl)) + return; + + start = max(pvmw->address & PMD_MASK, pvmw->vma->vm_start); + end = pmd_addr_end(pvmw->address, pvmw->vma->vm_end); + + if (end - start > MIN_LRU_BATCH * PAGE_SIZE) { + if (pvmw->address - start < MIN_LRU_BATCH * PAGE_SIZE / 2) + end = start + MIN_LRU_BATCH * PAGE_SIZE; + else if (end - pvmw->address < MIN_LRU_BATCH * PAGE_SIZE / 2) + start = end - MIN_LRU_BATCH * PAGE_SIZE; + else { + start = pvmw->address - MIN_LRU_BATCH * PAGE_SIZE / 2; + end = pvmw->address + MIN_LRU_BATCH * PAGE_SIZE / 2; + } + } + + pte = pvmw->pte - (pvmw->address - start) / PAGE_SIZE; + + rcu_read_lock(); + arch_enter_lazy_mmu_mode(); + + for (i = 0, addr = start; addr != end; i++, addr += PAGE_SIZE) { + unsigned long pfn = pte_pfn(pte[i]); + + VM_BUG_ON(addr < pvmw->vma->vm_start || addr >= pvmw->vma->vm_end); + + if (!pte_present(pte[i]) || is_zero_pfn(pfn)) + continue; + + if (WARN_ON_ONCE(pte_devmap(pte[i]) || pte_special(pte[i]))) + continue; + + if (!pte_young(pte[i])) + continue; + + VM_BUG_ON(!pfn_valid(pfn)); + if (pfn < pgdat->node_start_pfn || pfn >= pgdat_end_pfn(pgdat)) + continue; + + page = compound_head(pfn_to_page(pfn)); + if (page_to_nid(page) != pgdat->node_id) + continue; + + if (page_memcg_rcu(page) != memcg) + continue; + + if (!ptep_test_and_clear_young(pvmw->vma, addr, pte + i)) + continue; + + young++; + + if (pte_dirty(pte[i]) && !PageDirty(page) && + !(PageAnon(page) && PageSwapBacked(page) && !PageSwapCache(page))) + set_page_dirty(page); + + old_gen = page_lru_gen(page); + if (old_gen < 0) + SetPageReferenced(page); + else if (old_gen != new_gen) + __set_bit(i, bitmap); + } + + arch_leave_lazy_mmu_mode(); + rcu_read_unlock(); + + /* feedback from rmap walkers to page table walkers */ + if (suitable_to_scan(i, young)) + update_bloom_filter(lruvec, max_seq, pvmw->pmd); + + walk = current->reclaim_state ? current->reclaim_state->mm_walk : NULL; + + if (!walk && bitmap_weight(bitmap, MIN_LRU_BATCH) < PAGEVEC_SIZE) { + for_each_set_bit(i, bitmap, MIN_LRU_BATCH) + activate_page(pte_page(pte[i])); + return; + } + + /* page_update_gen() requires stable page_memcg() */ + if (!mem_cgroup_trylock_pages(memcg)) + return; + + if (!walk) { + spin_lock_irq(&pgdat->lru_lock); + new_gen = lru_gen_from_seq(lruvec->lrugen.max_seq); + } + + for_each_set_bit(i, bitmap, MIN_LRU_BATCH) { + page = compound_head(pte_page(pte[i])); + if (page_memcg_rcu(page) != memcg) + continue; + + old_gen = page_update_gen(page, new_gen); + if (old_gen < 0 || old_gen == new_gen) + continue; + + if (walk) + update_batch_size(walk, page, old_gen, new_gen); + else + lru_gen_update_size(lruvec, page, old_gen, new_gen); + } + + if (!walk) + spin_unlock_irq(&pgdat->lru_lock); + + mem_cgroup_unlock_pages(); +} + +/****************************************************************************** + * the eviction + ******************************************************************************/ + +static bool sort_page(struct lruvec *lruvec, struct page *page, int tier_idx) +{ + bool success; + int gen = page_lru_gen(page); + int type = page_is_file_lru(page); + int zone = page_zonenum(page); + int tier = page_lru_tier(page); + int delta = thp_nr_pages(page); + struct lru_gen_struct *lrugen = &lruvec->lrugen; + + VM_BUG_ON_PAGE(gen >= MAX_NR_GENS, page); + + if (!page_evictable(page)) { + success = lru_gen_del_page(lruvec, page, true); + VM_BUG_ON_PAGE(!success, page); + SetPageUnevictable(page); + add_page_to_lru_list(page, lruvec); + __count_vm_events(UNEVICTABLE_PGCULLED, delta); + return true; + } + + if (type == LRU_GEN_FILE && PageAnon(page) && PageDirty(page)) { + success = lru_gen_del_page(lruvec, page, true); + VM_BUG_ON_PAGE(!success, page); + SetPageSwapBacked(page); + add_page_to_lru_list_tail(page, lruvec); + return true; + } + + if (gen != lru_gen_from_seq(lrugen->min_seq[type])) { + list_move(&page->lru, &lrugen->lists[gen][type][zone]); + return true; + } + + if (tier > tier_idx) { + int hist = lru_hist_from_seq(lrugen->min_seq[type]); + + gen = page_inc_gen(lruvec, page, false); + list_move_tail(&page->lru, &lrugen->lists[gen][type][zone]); + + WRITE_ONCE(lrugen->protected[hist][type][tier - 1], + lrugen->protected[hist][type][tier - 1] + delta); + __mod_lruvec_state(lruvec, WORKINGSET_ACTIVATE_BASE + type, delta); + return true; + } + + if (PageLocked(page) || PageWriteback(page) || + (type == LRU_GEN_FILE && PageDirty(page))) { + gen = page_inc_gen(lruvec, page, true); + list_move(&page->lru, &lrugen->lists[gen][type][zone]); + return true; + } + + return false; +} + +static bool isolate_page(struct lruvec *lruvec, struct page *page, struct scan_control *sc) +{ + bool success; + + if (!sc->may_unmap && page_mapped(page)) + return false; + + if (!(sc->may_writepage && (sc->gfp_mask & __GFP_IO)) && + (PageDirty(page) || (PageAnon(page) && !PageSwapCache(page)))) + return false; + + if (!get_page_unless_zero(page)) + return false; + + ClearPageLRU(page); + + success = lru_gen_del_page(lruvec, page, true); + VM_BUG_ON_PAGE(!success, page); + + return true; +} + +static int scan_pages(struct lruvec *lruvec, struct scan_control *sc, + int type, int tier, struct list_head *list) +{ + int gen, zone; + enum vm_event_item item; + int sorted = 0; + int scanned = 0; + int isolated = 0; + int remaining = MAX_LRU_BATCH; + struct lru_gen_struct *lrugen = &lruvec->lrugen; + struct mem_cgroup *memcg = lruvec_memcg(lruvec); + + VM_BUG_ON(!list_empty(list)); + + if (get_nr_gens(lruvec, type) == MIN_NR_GENS) + return 0; + + gen = lru_gen_from_seq(lrugen->min_seq[type]); + + for (zone = sc->reclaim_idx; zone >= 0; zone--) { + LIST_HEAD(moved); + int skipped = 0; + struct list_head *head = &lrugen->lists[gen][type][zone]; + + while (!list_empty(head)) { + struct page *page = lru_to_page(head); + int delta = thp_nr_pages(page); + + VM_BUG_ON_PAGE(PageTail(page), page); + VM_BUG_ON_PAGE(PageUnevictable(page), page); + VM_BUG_ON_PAGE(PageActive(page), page); + VM_BUG_ON_PAGE(page_is_file_lru(page) != type, page); + VM_BUG_ON_PAGE(page_zonenum(page) != zone, page); + + prefetchw_prev_lru_page(page, head, flags); + + scanned += delta; + + if (sort_page(lruvec, page, tier)) + sorted += delta; + else if (isolate_page(lruvec, page, sc)) { + list_add(&page->lru, list); + isolated += delta; + } else { + list_move(&page->lru, &moved); + skipped += delta; + } + + if (!--remaining || max(isolated, skipped) >= MIN_LRU_BATCH) + break; + } + + if (skipped) { + list_splice(&moved, head); + __count_zid_vm_events(PGSCAN_SKIP, zone, skipped); + } + + if (!remaining || isolated >= MIN_LRU_BATCH) + break; + } + + item = current_is_kswapd() ? PGSCAN_KSWAPD : PGSCAN_DIRECT; + if (!cgroup_reclaim(sc)) { + __count_vm_events(item, isolated); + __count_vm_events(PGREFILL, sorted); + } + __count_memcg_events(memcg, item, isolated); + __count_memcg_events(memcg, PGREFILL, sorted); + __count_vm_events(PGSCAN_ANON + type, isolated); + + /* + * There might not be eligible pages due to reclaim_idx, may_unmap and + * may_writepage. Check the remaining to prevent livelock if there is no + * progress. + */ + return isolated || !remaining ? scanned : 0; +} + +static int get_tier_idx(struct lruvec *lruvec, int type) +{ + int tier; + struct ctrl_pos sp, pv; + + /* + * To leave a margin for fluctuations, use a larger gain factor (1:2). + * This value is chosen because any other tier would have at least twice + * as many refaults as the first tier. + */ + read_ctrl_pos(lruvec, type, 0, 1, &sp); + for (tier = 1; tier < MAX_NR_TIERS; tier++) { + read_ctrl_pos(lruvec, type, tier, 2, &pv); + if (!positive_ctrl_err(&sp, &pv)) + break; + } + + return tier - 1; +} + +static int get_type_to_scan(struct lruvec *lruvec, int swappiness, int *tier_idx) +{ + int type, tier; + struct ctrl_pos sp, pv; + int gain[ANON_AND_FILE] = { swappiness, 200 - swappiness }; + + /* + * Compare the first tier of anon with that of file to determine which + * type to scan. Also need to compare other tiers of the selected type + * with the first tier of the other type to determine the last tier (of + * the selected type) to evict. + */ + read_ctrl_pos(lruvec, LRU_GEN_ANON, 0, gain[LRU_GEN_ANON], &sp); + read_ctrl_pos(lruvec, LRU_GEN_FILE, 0, gain[LRU_GEN_FILE], &pv); + type = positive_ctrl_err(&sp, &pv); + + read_ctrl_pos(lruvec, !type, 0, gain[!type], &sp); + for (tier = 1; tier < MAX_NR_TIERS; tier++) { + read_ctrl_pos(lruvec, type, tier, gain[type], &pv); + if (!positive_ctrl_err(&sp, &pv)) + break; + } + + *tier_idx = tier - 1; + + return type; +} + +static int isolate_pages(struct lruvec *lruvec, struct scan_control *sc, int swappiness, + int *type_scanned, struct list_head *list) +{ + int i; + int type; + int scanned; + int tier = -1; + DEFINE_MIN_SEQ(lruvec); + + VM_BUG_ON(!seq_is_valid(lruvec)); + + /* + * Try to make the obvious choice first. When anon and file are both + * available from the same generation, interpret swappiness 1 as file + * first and 200 as anon first. + */ + if (!swappiness) + type = LRU_GEN_FILE; + else if (min_seq[LRU_GEN_ANON] < min_seq[LRU_GEN_FILE]) + type = LRU_GEN_ANON; + else if (swappiness == 1) + type = LRU_GEN_FILE; + else if (swappiness == 200) + type = LRU_GEN_ANON; + else + type = get_type_to_scan(lruvec, swappiness, &tier); + + for (i = !swappiness; i < ANON_AND_FILE; i++) { + if (tier < 0) + tier = get_tier_idx(lruvec, type); + + scanned = scan_pages(lruvec, sc, type, tier, list); + if (scanned) + break; + + type = !type; + tier = -1; + } + + *type_scanned = type; + + return scanned; +} + +static int evict_pages(struct lruvec *lruvec, struct scan_control *sc, int swappiness, + bool *swapped) +{ + int type; + int scanned; + int reclaimed; + LIST_HEAD(list); + struct page *page; + enum vm_event_item item; + struct reclaim_stat stat; + struct lru_gen_mm_walk *walk; + struct mem_cgroup *memcg = lruvec_memcg(lruvec); + struct pglist_data *pgdat = lruvec_pgdat(lruvec); + + spin_lock_irq(&pgdat->lru_lock); + + scanned = isolate_pages(lruvec, sc, swappiness, &type, &list); + + if (try_to_inc_min_seq(lruvec, swappiness)) + scanned++; + + if (get_nr_gens(lruvec, LRU_GEN_FILE) == MIN_NR_GENS) + scanned = 0; + + spin_unlock_irq(&pgdat->lru_lock); + + if (list_empty(&list)) + return scanned; + + reclaimed = shrink_page_list(&list, pgdat, sc, &stat, false); + + /* + * To avoid livelock, don't add rejected pages back to the same lists + * they were isolated from. See lru_gen_add_page(). + */ + list_for_each_entry(page, &list, lru) { + ClearPageReferenced(page); + ClearPageWorkingset(page); + + if (PageReclaim(page) && (PageDirty(page) || PageWriteback(page))) + ClearPageActive(page); + else + SetPageActive(page); + } + + spin_lock_irq(&pgdat->lru_lock); + + move_pages_to_lru(lruvec, &list); + + walk = current->reclaim_state ? current->reclaim_state->mm_walk : NULL; + if (walk && walk->batched) + reset_batch_size(lruvec, walk); + + item = current_is_kswapd() ? PGSTEAL_KSWAPD : PGSTEAL_DIRECT; + if (!cgroup_reclaim(sc)) + __count_vm_events(item, reclaimed); + __count_memcg_events(memcg, item, reclaimed); + __count_vm_events(PGSTEAL_ANON + type, reclaimed); + + spin_unlock_irq(&pgdat->lru_lock); + + mem_cgroup_uncharge_list(&list); + free_unref_page_list(&list); + + sc->nr_reclaimed += reclaimed; + + if (type == LRU_GEN_ANON && swapped) + *swapped = true; + + return scanned; +} + +static long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *sc, bool can_swap) +{ + bool need_aging; + long nr_to_scan; + struct mem_cgroup *memcg = lruvec_memcg(lruvec); + DEFINE_MAX_SEQ(lruvec); + DEFINE_MIN_SEQ(lruvec); + + if (mem_cgroup_below_min(memcg) || + (mem_cgroup_below_low(memcg) && !sc->memcg_low_reclaim)) + return 0; + + nr_to_scan = get_nr_evictable(lruvec, max_seq, min_seq, can_swap, &need_aging); + if (!nr_to_scan) + return 0; + + /* reset the priority if the target has been met */ + nr_to_scan >>= sc->nr_reclaimed < sc->nr_to_reclaim ? sc->priority : DEF_PRIORITY; + + if (!mem_cgroup_online(memcg)) + nr_to_scan++; + + if (!nr_to_scan) + return 0; + + if (!need_aging) { + sc->memcgs_need_aging = false; + return nr_to_scan; + } + + /* leave the work to lru_gen_age_node() */ + if (current_is_kswapd()) + return 0; + + /* try other memcgs before going to the aging path */ + if (!cgroup_reclaim(sc) && !sc->force_deactivate) { + sc->skipped_deactivate = true; + return 0; + } + + if (try_to_inc_max_seq(lruvec, max_seq, sc, can_swap, false)) + return nr_to_scan; + + return min_seq[LRU_GEN_FILE] + MIN_NR_GENS <= max_seq ? nr_to_scan : 0; +} + +static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) +{ + struct blk_plug plug; + long scanned = 0; + bool swapped = false; + unsigned long reclaimed = sc->nr_reclaimed; + struct pglist_data *pgdat = lruvec_pgdat(lruvec); + + lru_add_drain(); + + blk_start_plug(&plug); + + if (current_is_kswapd()) + current->reclaim_state->mm_walk = &pgdat->mm_walk; + + while (true) { + int delta; + int swappiness; + long nr_to_scan; + + if (sc->may_swap) + swappiness = get_swappiness(lruvec, sc); + else if (!cgroup_reclaim(sc) && get_swappiness(lruvec, sc)) + swappiness = 1; + else + swappiness = 0; + + nr_to_scan = get_nr_to_scan(lruvec, sc, swappiness); + if (!nr_to_scan) + break; + + delta = evict_pages(lruvec, sc, swappiness, &swapped); + if (!delta) + break; + + if (sc->memcgs_avoid_swapping && swappiness < 200 && swapped) + break; + + scanned += delta; + if (scanned >= nr_to_scan) { + if (!swapped && sc->nr_reclaimed - reclaimed >= MIN_LRU_BATCH) + sc->memcgs_need_swapping = false; + break; + } + + cond_resched(); + } + + if (current_is_kswapd()) + current->reclaim_state->mm_walk = NULL; + + blk_finish_plug(&plug); +} + +/****************************************************************************** + * state change + ******************************************************************************/ + +static bool __maybe_unused state_is_valid(struct lruvec *lruvec) +{ + struct lru_gen_struct *lrugen = &lruvec->lrugen; + + if (lrugen->enabled) { + enum lru_list lru; + + for_each_evictable_lru(lru) { + if (!list_empty(&lruvec->lists[lru])) + return false; + } + } else { + int gen, type, zone; + + for_each_gen_type_zone(gen, type, zone) { + if (!list_empty(&lrugen->lists[gen][type][zone])) + return false; + + /* unlikely but not a bug when reset_batch_size() is pending */ + VM_WARN_ON(lrugen->nr_pages[gen][type][zone]); + } + } + + return true; +} + +static bool fill_evictable(struct lruvec *lruvec) +{ + enum lru_list lru; + int remaining = MAX_LRU_BATCH; + + for_each_evictable_lru(lru) { + int type = is_file_lru(lru); + bool active = is_active_lru(lru); + struct list_head *head = &lruvec->lists[lru]; + + while (!list_empty(head)) { + bool success; + struct page *page = lru_to_page(head); + + VM_BUG_ON_PAGE(PageTail(page), page); + VM_BUG_ON_PAGE(PageUnevictable(page), page); + VM_BUG_ON_PAGE(PageActive(page) != active, page); + VM_BUG_ON_PAGE(page_is_file_lru(page) != type, page); + VM_BUG_ON_PAGE(page_lru_gen(page) < MAX_NR_GENS, page); + + prefetchw_prev_lru_page(page, head, flags); + + del_page_from_lru_list(page, lruvec); + success = lru_gen_add_page(lruvec, page, false); + VM_BUG_ON(!success); + + if (!--remaining) + return false; + } + } + + return true; +} + +static bool drain_evictable(struct lruvec *lruvec) +{ + int gen, type, zone; + int remaining = MAX_LRU_BATCH; + + for_each_gen_type_zone(gen, type, zone) { + struct list_head *head = &lruvec->lrugen.lists[gen][type][zone]; + + while (!list_empty(head)) { + bool success; + struct page *page = lru_to_page(head); + + VM_BUG_ON_PAGE(PageTail(page), page); + VM_BUG_ON_PAGE(PageUnevictable(page), page); + VM_BUG_ON_PAGE(PageActive(page), page); + VM_BUG_ON_PAGE(page_is_file_lru(page) != type, page); + VM_BUG_ON_PAGE(page_zonenum(page) != zone, page); + + prefetchw_prev_lru_page(page, head, flags); + + success = lru_gen_del_page(lruvec, page, false); + VM_BUG_ON(!success); + add_page_to_lru_list(page, lruvec); + + if (!--remaining) + return false; + } + } + + return true; +} + +static void lru_gen_change_state(bool enable) +{ + static DEFINE_MUTEX(state_mutex); + + struct mem_cgroup *memcg; + + cgroup_lock(); + cpus_read_lock(); + get_online_mems(); + mutex_lock(&state_mutex); + + if (enable == lru_gen_enabled()) + goto unlock; + + if (enable) + static_branch_enable_cpuslocked(&lru_gen_caps[LRU_GEN_CORE]); + else + static_branch_disable_cpuslocked(&lru_gen_caps[LRU_GEN_CORE]); + + memcg = mem_cgroup_iter(NULL, NULL, NULL); + do { + int nid; + + for_each_node(nid) { + struct pglist_data *pgdat = NODE_DATA(nid); + struct lruvec *lruvec = get_lruvec(memcg, nid); + + if (!lruvec) + continue; + + if (!pgdat) { + lruvec->lrugen.enabled = enable; + continue; + } + + spin_lock_irq(&pgdat->lru_lock); + + VM_BUG_ON(!seq_is_valid(lruvec)); + VM_BUG_ON(!state_is_valid(lruvec)); + + lruvec->lrugen.enabled = enable; + + while (!(enable ? fill_evictable(lruvec) : drain_evictable(lruvec))) { + spin_unlock_irq(&pgdat->lru_lock); + cond_resched(); + spin_lock_irq(&pgdat->lru_lock); + } + + spin_unlock_irq(&pgdat->lru_lock); + } + + cond_resched(); + } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL))); +unlock: + mutex_unlock(&state_mutex); + put_online_mems(); + cpus_read_unlock(); + cgroup_unlock(); +} + +/****************************************************************************** + * sysfs interface + ******************************************************************************/ + +static ssize_t show_min_ttl(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + return sprintf(buf, "%u\n", jiffies_to_msecs(READ_ONCE(lru_gen_min_ttl))); +} + +static ssize_t store_min_ttl(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t len) +{ + unsigned int msecs; + + if (kstrtouint(buf, 0, &msecs)) + return -EINVAL; + + WRITE_ONCE(lru_gen_min_ttl, msecs_to_jiffies(msecs)); + + return len; +} + +static struct kobj_attribute lru_gen_min_ttl_attr = __ATTR( + min_ttl_ms, 0644, show_min_ttl, store_min_ttl +); + +static ssize_t show_enable(struct kobject *kobj, struct kobj_attribute *attr, char *buf) +{ + unsigned int caps = 0; + + if (get_cap(LRU_GEN_CORE)) + caps |= BIT(LRU_GEN_CORE); + + if (arch_has_hw_pte_young() && get_cap(LRU_GEN_MM_WALK)) + caps |= BIT(LRU_GEN_MM_WALK); + + if (IS_ENABLED(CONFIG_ARCH_HAS_NONLEAF_PMD_YOUNG) && get_cap(LRU_GEN_NONLEAF_YOUNG)) + caps |= BIT(LRU_GEN_NONLEAF_YOUNG); + + return snprintf(buf, PAGE_SIZE, "0x%04x\n", caps); +} + +static ssize_t store_enable(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t len) +{ + int i; + unsigned int caps; + + if (tolower(*buf) == 'n') + caps = 0; + else if (tolower(*buf) == 'y') + caps = -1; + else if (kstrtouint(buf, 0, &caps)) + return -EINVAL; + + for (i = 0; i < NR_LRU_GEN_CAPS; i++) { + bool enable = caps & BIT(i); + + if (i == LRU_GEN_CORE) + lru_gen_change_state(enable); + else if (enable) + static_branch_enable(&lru_gen_caps[i]); + else + static_branch_disable(&lru_gen_caps[i]); + } + + return len; +} + +static struct kobj_attribute lru_gen_enabled_attr = __ATTR( + enabled, 0644, show_enable, store_enable +); + +static struct attribute *lru_gen_attrs[] = { + &lru_gen_min_ttl_attr.attr, + &lru_gen_enabled_attr.attr, + NULL +}; + +static struct attribute_group lru_gen_attr_group = { + .name = "lru_gen", + .attrs = lru_gen_attrs, +}; + +/****************************************************************************** + * debugfs interface + ******************************************************************************/ + +static void *lru_gen_seq_start(struct seq_file *m, loff_t *pos) +{ + struct mem_cgroup *memcg; + loff_t nr_to_skip = *pos; + + m->private = kvmalloc(PATH_MAX, GFP_KERNEL); + if (!m->private) + return ERR_PTR(-ENOMEM); + + memcg = mem_cgroup_iter(NULL, NULL, NULL); + do { + int nid; + + for_each_node_state(nid, N_MEMORY) { + if (!nr_to_skip--) + return get_lruvec(memcg, nid); + } + } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL))); + + return NULL; +} + +static void lru_gen_seq_stop(struct seq_file *m, void *v) +{ + if (!IS_ERR_OR_NULL(v)) + mem_cgroup_iter_break(NULL, lruvec_memcg(v)); + + kvfree(m->private); + m->private = NULL; +} + +static void *lru_gen_seq_next(struct seq_file *m, void *v, loff_t *pos) +{ + int nid = lruvec_pgdat(v)->node_id; + struct mem_cgroup *memcg = lruvec_memcg(v); + + ++*pos; + + nid = next_memory_node(nid); + if (nid == MAX_NUMNODES) { + memcg = mem_cgroup_iter(NULL, memcg, NULL); + if (!memcg) + return NULL; + + nid = first_memory_node; + } + + return get_lruvec(memcg, nid); +} + +static void lru_gen_seq_show_full(struct seq_file *m, struct lruvec *lruvec, + unsigned long max_seq, unsigned long *min_seq, + unsigned long seq) +{ + int i; + int type, tier; + int hist = lru_hist_from_seq(seq); + struct lru_gen_struct *lrugen = &lruvec->lrugen; + + for (tier = 0; tier < MAX_NR_TIERS; tier++) { + seq_printf(m, " %10d", tier); + for (type = 0; type < ANON_AND_FILE; type++) { + unsigned long n[3] = {}; + + if (seq == max_seq) { + n[0] = READ_ONCE(lrugen->avg_refaulted[type][tier]); + n[1] = READ_ONCE(lrugen->avg_total[type][tier]); + + seq_printf(m, " %10luR %10luT %10lu ", n[0], n[1], n[2]); + } else if (seq == min_seq[type] || NR_HIST_GENS > 1) { + n[0] = atomic_long_read(&lrugen->refaulted[hist][type][tier]); + n[1] = atomic_long_read(&lrugen->evicted[hist][type][tier]); + if (tier) + n[2] = READ_ONCE(lrugen->protected[hist][type][tier - 1]); + + seq_printf(m, " %10lur %10lue %10lup", n[0], n[1], n[2]); + } else + seq_puts(m, " 0 0 0 "); + } + seq_putc(m, '\n'); + } + + seq_puts(m, " "); + for (i = 0; i < NR_MM_STATS; i++) { + if (seq == max_seq && NR_HIST_GENS == 1) + seq_printf(m, " %10lu%c", READ_ONCE(lruvec->mm_state.stats[hist][i]), + toupper(MM_STAT_CODES[i])); + else if (seq != max_seq && NR_HIST_GENS > 1) + seq_printf(m, " %10lu%c", READ_ONCE(lruvec->mm_state.stats[hist][i]), + MM_STAT_CODES[i]); + else + seq_puts(m, " 0 "); + } + seq_putc(m, '\n'); +} + +static int lru_gen_seq_show(struct seq_file *m, void *v) +{ + unsigned long seq; + bool full = !debugfs_real_fops(m->file)->write; + struct lruvec *lruvec = v; + struct lru_gen_struct *lrugen = &lruvec->lrugen; + int nid = lruvec_pgdat(lruvec)->node_id; + struct mem_cgroup *memcg = lruvec_memcg(lruvec); + DEFINE_MAX_SEQ(lruvec); + DEFINE_MIN_SEQ(lruvec); + + if (nid == first_memory_node) { + const char *path = memcg ? m->private : ""; + +#ifdef CONFIG_MEMCG + if (memcg) + cgroup_path(memcg->css.cgroup, m->private, PATH_MAX); +#endif + seq_printf(m, "memcg %5hu %s\n", mem_cgroup_id(memcg), path); + } + + seq_printf(m, " node %5d\n", nid); + + if (!full) + seq = min_seq[LRU_GEN_ANON]; + else if (max_seq >= MAX_NR_GENS) + seq = max_seq - MAX_NR_GENS + 1; + else + seq = 0; + + for (; seq <= max_seq; seq++) { + int type, zone; + int gen = lru_gen_from_seq(seq); + unsigned long birth = READ_ONCE(lruvec->lrugen.timestamps[gen]); + + seq_printf(m, " %10lu %10u", seq, jiffies_to_msecs(jiffies - birth)); + + for (type = 0; type < ANON_AND_FILE; type++) { + long size = 0; + char mark = full && seq < min_seq[type] ? 'x' : ' '; + + for (zone = 0; zone < MAX_NR_ZONES; zone++) + size += READ_ONCE(lrugen->nr_pages[gen][type][zone]); + + seq_printf(m, " %10lu%c", max(size, 0L), mark); + } + + seq_putc(m, '\n'); + + if (full) + lru_gen_seq_show_full(m, lruvec, max_seq, min_seq, seq); + } + + return 0; +} + +static const struct seq_operations lru_gen_seq_ops = { + .start = lru_gen_seq_start, + .stop = lru_gen_seq_stop, + .next = lru_gen_seq_next, + .show = lru_gen_seq_show, +}; + +static int run_aging(struct lruvec *lruvec, unsigned long seq, struct scan_control *sc, + bool can_swap, bool full_scan) +{ + DEFINE_MAX_SEQ(lruvec); + + if (seq == max_seq) + try_to_inc_max_seq(lruvec, max_seq, sc, can_swap, full_scan); + + return seq > max_seq ? -EINVAL : 0; +} + +static int run_eviction(struct lruvec *lruvec, unsigned long seq, struct scan_control *sc, + int swappiness, unsigned long nr_to_reclaim) +{ + struct blk_plug plug; + int err = -EINTR; + DEFINE_MAX_SEQ(lruvec); + + if (seq + MIN_NR_GENS > max_seq) + return -EINVAL; + + sc->nr_reclaimed = 0; + + blk_start_plug(&plug); + + while (!signal_pending(current)) { + DEFINE_MIN_SEQ(lruvec); + + if (seq < min_seq[!swappiness] || sc->nr_reclaimed >= nr_to_reclaim || + !evict_pages(lruvec, sc, swappiness, NULL)) { + err = 0; + break; + } + + cond_resched(); + } + + blk_finish_plug(&plug); + + return err; +} + +static int run_cmd(char cmd, int memcg_id, int nid, unsigned long seq, + struct scan_control *sc, int swappiness, unsigned long opt) +{ + struct lruvec *lruvec; + int err = -EINVAL; + struct mem_cgroup *memcg = NULL; + + if (!mem_cgroup_disabled()) { + rcu_read_lock(); + memcg = mem_cgroup_from_id(memcg_id); +#ifdef CONFIG_MEMCG + if (memcg && !css_tryget(&memcg->css)) + memcg = NULL; +#endif + rcu_read_unlock(); + + if (!memcg) + goto done; + } + if (memcg_id != mem_cgroup_id(memcg)) + goto done; + + if (nid < 0 || nid >= MAX_NUMNODES || !node_state(nid, N_MEMORY)) + goto done; + + lruvec = get_lruvec(memcg, nid); + + if (swappiness < 0) + swappiness = get_swappiness(lruvec, sc); + else if (swappiness > 200) + goto done; + + switch (cmd) { + case '+': + err = run_aging(lruvec, seq, sc, swappiness, opt); + break; + case '-': + err = run_eviction(lruvec, seq, sc, swappiness, opt); + break; + } +done: + mem_cgroup_put(memcg); + + return err; +} + +static ssize_t lru_gen_seq_write(struct file *file, const char __user *src, + size_t len, loff_t *pos) +{ + void *buf; + char *cur, *next; + unsigned int flags; + int err = 0; + struct scan_control sc = { + .may_writepage = true, + .may_unmap = true, + .may_swap = true, + .reclaim_idx = MAX_NR_ZONES - 1, + .gfp_mask = GFP_KERNEL, + }; + + buf = kvmalloc(len + 1, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + if (copy_from_user(buf, src, len)) { + kvfree(buf); + return -EFAULT; + } + + next = buf; + next[len] = '\0'; + + sc.reclaim_state.mm_walk = alloc_mm_walk(); + if (!sc.reclaim_state.mm_walk) { + kvfree(buf); + return -ENOMEM; + } + + set_task_reclaim_state(current, &sc.reclaim_state); + flags = memalloc_noreclaim_save(); + + while ((cur = strsep(&next, ",;\n"))) { + int n; + int end; + char cmd; + unsigned int memcg_id; + unsigned int nid; + unsigned long seq; + unsigned int swappiness = -1; + unsigned long opt = -1; + + cur = skip_spaces(cur); + if (!*cur) + continue; + + n = sscanf(cur, "%c %u %u %lu %n %u %n %lu %n", &cmd, &memcg_id, &nid, + &seq, &end, &swappiness, &end, &opt, &end); + if (n < 4 || cur[end]) { + err = -EINVAL; + break; + } + + err = run_cmd(cmd, memcg_id, nid, seq, &sc, swappiness, opt); + if (err) + break; + } + + memalloc_noreclaim_restore(flags); + set_task_reclaim_state(current, NULL); + + free_mm_walk(sc.reclaim_state.mm_walk); + kvfree(buf); + + return err ? : len; +} + +static int lru_gen_seq_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &lru_gen_seq_ops); +} + +static const struct file_operations lru_gen_rw_fops = { + .open = lru_gen_seq_open, + .read = seq_read, + .write = lru_gen_seq_write, + .llseek = seq_lseek, + .release = seq_release, +}; + +static const struct file_operations lru_gen_ro_fops = { + .open = lru_gen_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +/****************************************************************************** + * initialization + ******************************************************************************/ + +void lru_gen_init_lruvec(struct lruvec *lruvec) +{ + int i; + int gen, type, zone; + struct lru_gen_struct *lrugen = &lruvec->lrugen; + + lrugen->max_seq = MIN_NR_GENS + 1; + lrugen->enabled = lru_gen_enabled(); + + for (i = 0; i <= MIN_NR_GENS + 1; i++) + lrugen->timestamps[i] = jiffies; + + for_each_gen_type_zone(gen, type, zone) + INIT_LIST_HEAD(&lrugen->lists[gen][type][zone]); + + lruvec->mm_state.seq = MIN_NR_GENS; + init_waitqueue_head(&lruvec->mm_state.wait); +} + +#ifdef CONFIG_MEMCG +void lru_gen_init_memcg(struct mem_cgroup *memcg) +{ + INIT_LIST_HEAD(&memcg->mm_list.fifo); + spin_lock_init(&memcg->mm_list.lock); +} + +void lru_gen_exit_memcg(struct mem_cgroup *memcg) +{ + int i; + int nid; + + for_each_node(nid) { + struct lruvec *lruvec = get_lruvec(memcg, nid); + + VM_BUG_ON(memchr_inv(lruvec->lrugen.nr_pages, 0, + sizeof(lruvec->lrugen.nr_pages))); + + for (i = 0; i < NR_BLOOM_FILTERS; i++) { + bitmap_free(lruvec->mm_state.filters[i]); + lruvec->mm_state.filters[i] = NULL; + } + } +} +#endif + +static int __init init_lru_gen(void) +{ + BUILD_BUG_ON(MIN_NR_GENS + 1 >= MAX_NR_GENS); + BUILD_BUG_ON(BIT(LRU_GEN_WIDTH) <= MAX_NR_GENS); + BUILD_BUG_ON(sizeof(MM_STAT_CODES) != NR_MM_STATS + 1); + + if (sysfs_create_group(mm_kobj, &lru_gen_attr_group)) + pr_err("lru_gen: failed to create sysfs group\n"); + + debugfs_create_file("lru_gen", 0644, NULL, NULL, &lru_gen_rw_fops); + debugfs_create_file("lru_gen_full", 0444, NULL, NULL, &lru_gen_ro_fops); + + return 0; +}; +late_initcall(init_lru_gen); + +#else + +static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc) +{ +} + +static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) +{ +} + +#endif /* CONFIG_LRU_GEN */ + +static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) +{ + unsigned long nr[NR_LRU_LISTS]; + unsigned long targets[NR_LRU_LISTS]; + unsigned long nr_to_scan; + enum lru_list lru; + unsigned long nr_reclaimed = 0; + unsigned long nr_to_reclaim = sc->nr_to_reclaim; + struct blk_plug plug; + bool scan_adjusted; + + if (lru_gen_enabled()) { + lru_gen_shrink_lruvec(lruvec, sc); + return; + } + + get_scan_count(lruvec, sc, nr); + + /* Record the original scan target for proportional adjustments later */ + memcpy(targets, nr, sizeof(nr)); + + /* + * Global reclaiming within direct reclaim at DEF_PRIORITY is a normal + * event that can occur when there is little memory pressure e.g. + * multiple streaming readers/writers. Hence, we do not abort scanning + * when the requested number of pages are reclaimed when scanning at + * DEF_PRIORITY on the assumption that the fact we are direct + * reclaiming implies that kswapd is not keeping up and it is best to + * do a batch of work at once. For memcg reclaim one check is made to + * abort proportional reclaim if either the file or anon lru has already + * dropped to zero at the first pass. + */ + scan_adjusted = (!cgroup_reclaim(sc) && !current_is_kswapd() && + sc->priority == DEF_PRIORITY); + + blk_start_plug(&plug); + while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || + nr[LRU_INACTIVE_FILE]) { + unsigned long nr_anon, nr_file, percentage; + unsigned long nr_scanned; + + for_each_evictable_lru(lru) { + if (nr[lru]) { + nr_to_scan = min(nr[lru], SWAP_CLUSTER_MAX); + nr[lru] -= nr_to_scan; + + nr_reclaimed += shrink_list(lru, nr_to_scan, + lruvec, sc); + } + } + + cond_resched(); + + if (nr_reclaimed < nr_to_reclaim || scan_adjusted) + continue; + + /* + * For kswapd and memcg, reclaim at least the number of pages + * requested. Ensure that the anon and file LRUs are scanned + * proportionally what was requested by get_scan_count(). We + * stop reclaiming one LRU and reduce the amount scanning + * proportional to the original scan target. + */ + nr_file = nr[LRU_INACTIVE_FILE] + nr[LRU_ACTIVE_FILE]; + nr_anon = nr[LRU_INACTIVE_ANON] + nr[LRU_ACTIVE_ANON]; + + /* + * It's just vindictive to attack the larger once the smaller + * has gone to zero. And given the way we stop scanning the + * smaller below, this makes sure that we only make one nudge + * towards proportionality once we've got nr_to_reclaim. + */ + if (!nr_file || !nr_anon) + break; + + if (nr_file > nr_anon) { + unsigned long scan_target = targets[LRU_INACTIVE_ANON] + + targets[LRU_ACTIVE_ANON] + 1; + lru = LRU_BASE; + percentage = nr_anon * 100 / scan_target; + } else { + unsigned long scan_target = targets[LRU_INACTIVE_FILE] + + targets[LRU_ACTIVE_FILE] + 1; + lru = LRU_FILE; + percentage = nr_file * 100 / scan_target; + } + + /* Stop scanning the smaller of the LRU */ + nr[lru] = 0; + nr[lru + LRU_ACTIVE] = 0; + + /* + * Recalculate the other LRU scan count based on its original + * scan target and the percentage scanning already complete + */ + lru = (lru == LRU_FILE) ? LRU_BASE : LRU_FILE; + nr_scanned = targets[lru] - nr[lru]; + nr[lru] = targets[lru] * (100 - percentage) / 100; + nr[lru] -= min(nr[lru], nr_scanned); + + lru += LRU_ACTIVE; + nr_scanned = targets[lru] - nr[lru]; + nr[lru] = targets[lru] * (100 - percentage) / 100; + nr[lru] -= min(nr[lru], nr_scanned); + + scan_adjusted = true; + } + blk_finish_plug(&plug); + sc->nr_reclaimed += nr_reclaimed; + + /* + * Even if we did not try to evict anon pages at all, we want to + * rebalance the anon lru active/inactive ratio. + */ + if (total_swap_pages && inactive_is_low(lruvec, LRU_INACTIVE_ANON)) + shrink_active_list(SWAP_CLUSTER_MAX, lruvec, + sc, LRU_ACTIVE_ANON); +} + +/* Use reclaim/compaction for costly allocs or under memory pressure */ +static bool in_reclaim_compaction(struct scan_control *sc) +{ + if (IS_ENABLED(CONFIG_COMPACTION) && sc->order && + (sc->order > PAGE_ALLOC_COSTLY_ORDER || + sc->priority < DEF_PRIORITY - 2)) + return true; + + return false; +} + +/* + * Reclaim/compaction is used for high-order allocation requests. It reclaims + * order-0 pages before compacting the zone. should_continue_reclaim() returns + * true if more pages should be reclaimed such that when the page allocator + * calls try_to_compact_pages() that it will have enough free pages to succeed. + * It will give up earlier than that if there is difficulty reclaiming pages. + */ +static inline bool should_continue_reclaim(struct pglist_data *pgdat, + unsigned long nr_reclaimed, + struct scan_control *sc) +{ + unsigned long pages_for_compaction; + unsigned long inactive_lru_pages; + int z; + + /* If not in reclaim/compaction mode, stop */ + if (!in_reclaim_compaction(sc)) + return false; + + /* + * Stop if we failed to reclaim any pages from the last SWAP_CLUSTER_MAX + * number of pages that were scanned. This will return to the caller + * with the risk reclaim/compaction and the resulting allocation attempt + * fails. In the past we have tried harder for __GFP_RETRY_MAYFAIL + * allocations through requiring that the full LRU list has been scanned + * first, by assuming that zero delta of sc->nr_scanned means full LRU + * scan, but that approximation was wrong, and there were corner cases * where always a non-zero amount of pages were scanned. */ if (!nr_reclaimed) @@ -2748,7 +5495,6 @@ static void shrink_node(pg_data_t *pgdat, struct scan_control *sc) unsigned long nr_reclaimed, nr_scanned; struct lruvec *target_lruvec; bool reclaimable = false; - unsigned long file; target_lruvec = mem_cgroup_lruvec(sc->target_mem_cgroup, pgdat); @@ -2758,93 +5504,7 @@ again: nr_reclaimed = sc->nr_reclaimed; nr_scanned = sc->nr_scanned; - /* - * Determine the scan balance between anon and file LRUs. - */ - spin_lock_irq(&pgdat->lru_lock); - sc->anon_cost = target_lruvec->anon_cost; - sc->file_cost = target_lruvec->file_cost; - spin_unlock_irq(&pgdat->lru_lock); - - /* - * Target desirable inactive:active list ratios for the anon - * and file LRU lists. - */ - if (!sc->force_deactivate) { - unsigned long refaults; - - refaults = lruvec_page_state(target_lruvec, - WORKINGSET_ACTIVATE_ANON); - if (refaults != target_lruvec->refaults[0] || - inactive_is_low(target_lruvec, LRU_INACTIVE_ANON)) - sc->may_deactivate |= DEACTIVATE_ANON; - else - sc->may_deactivate &= ~DEACTIVATE_ANON; - - /* - * When refaults are being observed, it means a new - * workingset is being established. Deactivate to get - * rid of any stale active pages quickly. - */ - refaults = lruvec_page_state(target_lruvec, - WORKINGSET_ACTIVATE_FILE); - if (refaults != target_lruvec->refaults[1] || - inactive_is_low(target_lruvec, LRU_INACTIVE_FILE)) - sc->may_deactivate |= DEACTIVATE_FILE; - else - sc->may_deactivate &= ~DEACTIVATE_FILE; - } else - sc->may_deactivate = DEACTIVATE_ANON | DEACTIVATE_FILE; - - /* - * If we have plenty of inactive file pages that aren't - * thrashing, try to reclaim those first before touching - * anonymous pages. - */ - file = lruvec_page_state(target_lruvec, NR_INACTIVE_FILE); - if (file >> sc->priority && !(sc->may_deactivate & DEACTIVATE_FILE)) - sc->cache_trim_mode = 1; - else - sc->cache_trim_mode = 0; - - /* - * Prevent the reclaimer from falling into the cache trap: as - * cache pages start out inactive, every cache fault will tip - * the scan balance towards the file LRU. And as the file LRU - * shrinks, so does the window for rotation from references. - * This means we have a runaway feedback loop where a tiny - * thrashing file LRU becomes infinitely more attractive than - * anon pages. Try to detect this based on file LRU size. - */ - if (!cgroup_reclaim(sc)) { - unsigned long total_high_wmark = 0; - unsigned long free, anon; - int z; - - free = sum_zone_node_page_state(pgdat->node_id, NR_FREE_PAGES); - file = node_page_state(pgdat, NR_ACTIVE_FILE) + - node_page_state(pgdat, NR_INACTIVE_FILE); - - for (z = 0; z < MAX_NR_ZONES; z++) { - struct zone *zone = &pgdat->node_zones[z]; - if (!managed_zone(zone)) - continue; - - total_high_wmark += high_wmark_pages(zone); - } - - /* - * Consider anon: if that's low too, this isn't a - * runaway file reclaim problem, but rather just - * extreme pressure. Reclaim as per usual then. - */ - anon = node_page_state(pgdat, NR_INACTIVE_ANON); - - sc->file_is_tiny = - file + free <= total_high_wmark && - !(sc->may_deactivate & DEACTIVATE_ANON) && - anon >> sc->priority; - } + prepare_scan_count(pgdat, sc); shrink_node_memcgs(pgdat, sc); @@ -3064,6 +5724,9 @@ static void snapshot_refaults(struct mem_cgroup *target_memcg, pg_data_t *pgdat) struct lruvec *target_lruvec; unsigned long refaults; + if (lru_gen_enabled()) + return; + target_lruvec = mem_cgroup_lruvec(target_memcg, pgdat); refaults = lruvec_page_state(target_lruvec, WORKINGSET_ACTIVATE_ANON); target_lruvec->refaults[0] = refaults; @@ -3439,6 +6102,11 @@ static void age_active_anon(struct pglist_data *pgdat, struct mem_cgroup *memcg; struct lruvec *lruvec; + if (lru_gen_enabled()) { + lru_gen_age_node(pgdat, sc); + return; + } + if (!total_swap_pages) return; @@ -4423,12 +7091,9 @@ void check_move_unevictable_pages(struct pagevec *pvec) continue; if (page_evictable(page)) { - enum lru_list lru = page_lru_base_type(page); - - VM_BUG_ON_PAGE(PageActive(page), page); + del_page_from_lru_list(page, lruvec); ClearPageUnevictable(page); - del_page_from_lru_list(page, lruvec, LRU_UNEVICTABLE); - add_page_to_lru_list(page, lruvec, lru); + add_page_to_lru_list(page, lruvec); pgrescued += nr_pages; } } diff --git a/mm/workingset.c b/mm/workingset.c index 975a4d2dd02eeb064a440e13cdce1cf1ee6a6c55..25eaab8811927073b348e2e917fcef66590bc89b 100644 --- a/mm/workingset.c +++ b/mm/workingset.c @@ -185,7 +185,6 @@ static unsigned int bucket_order __read_mostly; static void *pack_shadow(int memcgid, pg_data_t *pgdat, unsigned long eviction, bool workingset) { - eviction >>= bucket_order; eviction &= EVICTION_MASK; eviction = (eviction << MEM_CGROUP_ID_SHIFT) | memcgid; eviction = (eviction << NODES_SHIFT) | pgdat->node_id; @@ -210,10 +209,116 @@ static void unpack_shadow(void *shadow, int *memcgidp, pg_data_t **pgdat, *memcgidp = memcgid; *pgdat = NODE_DATA(nid); - *evictionp = entry << bucket_order; + *evictionp = entry; *workingsetp = workingset; } +#ifdef CONFIG_LRU_GEN + +static int page_lru_refs(struct page *page) +{ + unsigned long flags = READ_ONCE(page->flags); + + BUILD_BUG_ON(LRU_GEN_WIDTH + LRU_REFS_WIDTH > BITS_PER_LONG - EVICTION_SHIFT); + + /* see the comment on MAX_NR_TIERS */ + return flags & BIT(PG_workingset) ? (flags & LRU_REFS_MASK) >> LRU_REFS_PGOFF : 0; +} + +static void *lru_gen_eviction(struct page *page) +{ + int hist, tier; + unsigned long token; + unsigned long min_seq; + struct lruvec *lruvec; + struct lru_gen_struct *lrugen; + int type = page_is_file_lru(page); + int refs = page_lru_refs(page); + int delta = thp_nr_pages(page); + bool workingset = PageWorkingset(page); + struct mem_cgroup *memcg = page_memcg(page); + struct pglist_data *pgdat = page_pgdat(page); + + lruvec = mem_cgroup_lruvec(memcg, pgdat); + lrugen = &lruvec->lrugen; + min_seq = READ_ONCE(lrugen->min_seq[type]); + token = (min_seq << LRU_REFS_WIDTH) | refs; + + hist = lru_hist_from_seq(min_seq); + tier = lru_tier_from_refs(refs + workingset); + atomic_long_add(delta, &lrugen->evicted[hist][type][tier]); + + return pack_shadow(mem_cgroup_id(memcg), pgdat, token, workingset); +} + +static void lru_gen_refault(struct page *page, void *shadow) +{ + int hist, tier, refs; + int memcg_id; + bool workingset; + unsigned long token; + unsigned long min_seq; + struct lruvec *lruvec; + struct lru_gen_struct *lrugen; + struct mem_cgroup *memcg; + struct pglist_data *pgdat; + int type = page_is_file_lru(page); + int delta = thp_nr_pages(page); + + unpack_shadow(shadow, &memcg_id, &pgdat, &token, &workingset); + + refs = token & (BIT(LRU_REFS_WIDTH) - 1); + if (refs && !workingset) + return; + + if (page_pgdat(page) != pgdat) + return; + + rcu_read_lock(); + memcg = page_memcg_rcu(page); + if (mem_cgroup_id(memcg) != memcg_id) + goto unlock; + + token >>= LRU_REFS_WIDTH; + lruvec = mem_cgroup_lruvec(memcg, pgdat); + lrugen = &lruvec->lrugen; + min_seq = READ_ONCE(lrugen->min_seq[type]); + if (token != (min_seq & (EVICTION_MASK >> LRU_REFS_WIDTH))) + goto unlock; + + hist = lru_hist_from_seq(min_seq); + tier = lru_tier_from_refs(refs + workingset); + atomic_long_add(delta, &lrugen->refaulted[hist][type][tier]); + mod_lruvec_state(lruvec, WORKINGSET_REFAULT_BASE + type, delta); + + /* + * Count the following two cases as stalls: + * 1. For pages accessed through page tables, hotter pages pushed out + * hot pages which refaulted immediately. + * 2. For pages accessed through file descriptors, numbers of accesses + * might have been beyond the limit. + */ + if (lru_gen_in_fault() || refs + workingset == BIT(LRU_REFS_WIDTH)) { + SetPageWorkingset(page); + mod_lruvec_state(lruvec, WORKINGSET_RESTORE_BASE + type, delta); + } +unlock: + rcu_read_unlock(); +} + +#else + +static void *lru_gen_eviction(struct page *page) +{ + return NULL; +} + +static void lru_gen_refault(struct page *page, void *shadow) +{ +} + +#endif /* CONFIG_LRU_GEN */ + /** * workingset_age_nonresident - age non-resident entries as LRU ages * @lruvec: the lruvec that was aged @@ -262,11 +367,15 @@ void *workingset_eviction(struct page *page, struct mem_cgroup *target_memcg) VM_BUG_ON_PAGE(page_count(page), page); VM_BUG_ON_PAGE(!PageLocked(page), page); + if (lru_gen_enabled()) + return lru_gen_eviction(page); + lruvec = mem_cgroup_lruvec(target_memcg, pgdat); workingset_age_nonresident(lruvec, thp_nr_pages(page)); /* XXX: target_memcg can be NULL, go through lruvec */ memcgid = mem_cgroup_id(lruvec_memcg(lruvec)); eviction = atomic_long_read(&lruvec->nonresident_age); + eviction >>= bucket_order; return pack_shadow(memcgid, pgdat, eviction, PageWorkingset(page)); } @@ -294,7 +403,13 @@ void workingset_refault(struct page *page, void *shadow) bool workingset; int memcgid; + if (lru_gen_enabled()) { + lru_gen_refault(page, shadow); + return; + } + unpack_shadow(shadow, &memcgid, &pgdat, &eviction, &workingset); + eviction <<= bucket_order; rcu_read_lock(); /* diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 933e596f9af22fd7602abe4a3465c90980c42604..6fce68d224b2fed42ce9f8b0694f500244ff4373 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -1835,10 +1835,11 @@ static inline void zs_pool_dec_isolated(struct zs_pool *pool) VM_BUG_ON(atomic_long_read(&pool->isolated_pages) <= 0); atomic_long_dec(&pool->isolated_pages); /* - * There's no possibility of racing, since wait_for_isolated_drain() - * checks the isolated count under &class->lock after enqueuing - * on migration_wait. + * Checking pool->destroying must happen after atomic_long_dec() + * for pool->isolated_pages above. Paired with the smp_mb() in + * zs_unregister_migration(). */ + smp_mb__after_atomic(); if (atomic_long_read(&pool->isolated_pages) == 0 && pool->destroying) wake_up_all(&pool->migration_wait); } diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 15bbfaf943fd12dea78bf8eda8f024a49005e6ef..d12c9a8a9953ef90ab95ecb5cc4a9f64ffa10095 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -120,9 +120,6 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head) } vlan_vid_del(real_dev, vlan->vlan_proto, vlan_id); - - /* Get rid of the vlan's reference to real_dev */ - dev_put(real_dev); } int vlan_check_real_dev(struct net_device *real_dev, @@ -184,9 +181,6 @@ int register_vlan_dev(struct net_device *dev, struct netlink_ext_ack *extack) if (err) goto out_unregister_netdev; - /* Account for reference in struct vlan_dev_priv */ - dev_hold(real_dev); - vlan_stacked_transfer_operstate(real_dev, dev, vlan); linkwatch_fire_event(dev); /* _MUST_ call rfc2863_policy() */ diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index ec8408d1638fbdf2742a3c07eaeb8ac0511883f5..86a1c99025ea06b51ea0420fcee68236e158a996 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -606,6 +606,9 @@ static int vlan_dev_init(struct net_device *dev) if (!vlan->vlan_pcpu_stats) return -ENOMEM; + /* Get vlan's reference to real_dev */ + dev_hold(real_dev); + return 0; } @@ -813,6 +816,9 @@ static void vlan_dev_free(struct net_device *dev) free_percpu(vlan->vlan_pcpu_stats); vlan->vlan_pcpu_stats = NULL; + + /* Get rid of the vlan's reference to real_dev */ + dev_put(vlan->real_dev); } void vlan_setup(struct net_device *dev) diff --git a/net/9p/client.c b/net/9p/client.c index eb42bbb72f523e2f1931d772206176d58c8c245e..bf6ed00d7c37da5accdb677b455ae2bb4867f167 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -538,6 +538,8 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req) kfree(ename); } else { err = p9pdu_readf(&req->rc, c->proto_version, "d", &ecode); + if (err) + goto out_err; err = -ecode; p9_debug(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode); diff --git a/net/9p/mod.c b/net/9p/mod.c index 5126566850bd07a21a4dbc55df0c925b2d91f5e6..94cd2d132fd7fa9796b20e4407ec69c6e0348daf 100644 --- a/net/9p/mod.c +++ b/net/9p/mod.c @@ -189,4 +189,5 @@ MODULE_AUTHOR("Latchesar Ionkov "); MODULE_AUTHOR("Eric Van Hensbergen "); MODULE_AUTHOR("Ron Minnich "); MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(VFS_internal_I_am_really_a_filesystem_and_am_NOT_a_driver); MODULE_DESCRIPTION("Plan 9 Resource Sharing Support (9P2000)"); diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c index 3ec1a51a6944ea0d322b7c0b3cccb8b8cc616f05..432ac5a16f2e04243aa88eac3ab309558ef28313 100644 --- a/net/9p/trans_xen.c +++ b/net/9p/trans_xen.c @@ -304,9 +304,9 @@ static void xen_9pfs_front_free(struct xen_9pfs_front_priv *priv) ref = priv->rings[i].intf->ref[j]; gnttab_end_foreign_access(ref, 0, 0); } - free_pages((unsigned long)priv->rings[i].data.in, - priv->rings[i].intf->ring_order - - (PAGE_SHIFT - XEN_PAGE_SHIFT)); + free_pages_exact(priv->rings[i].data.in, + 1UL << (priv->rings[i].intf->ring_order + + XEN_PAGE_SHIFT)); } gnttab_end_foreign_access(priv->rings[i].ref, 0, 0); free_page((unsigned long)priv->rings[i].intf); @@ -345,8 +345,8 @@ static int xen_9pfs_front_alloc_dataring(struct xenbus_device *dev, if (ret < 0) goto out; ring->ref = ret; - bytes = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, - order - (PAGE_SHIFT - XEN_PAGE_SHIFT)); + bytes = alloc_pages_exact(1UL << (order + XEN_PAGE_SHIFT), + GFP_KERNEL | __GFP_ZERO); if (!bytes) { ret = -ENOMEM; goto out; @@ -377,9 +377,7 @@ out: if (bytes) { for (i--; i >= 0; i--) gnttab_end_foreign_access(ring->intf->ref[i], 0, 0); - free_pages((unsigned long)bytes, - ring->intf->ring_order - - (PAGE_SHIFT - XEN_PAGE_SHIFT)); + free_pages_exact(bytes, 1UL << (order + XEN_PAGE_SHIFT)); } gnttab_end_foreign_access(ring->ref, 0, 0); free_page((unsigned long)ring->intf); diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 269ee89d2c2bed67fe1c7f7de7c1265b931daa92..9e0eef7fe9add8a6c97d9fa3923e47d3aa6b7a84 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -77,6 +77,7 @@ static void ax25_kill_by_device(struct net_device *dev) { ax25_dev *ax25_dev; ax25_cb *s; + struct sock *sk; if ((ax25_dev = ax25_dev_ax25dev(dev)) == NULL) return; @@ -85,11 +86,22 @@ static void ax25_kill_by_device(struct net_device *dev) again: ax25_for_each(s, &ax25_list) { if (s->ax25_dev == ax25_dev) { - s->ax25_dev = NULL; + sk = s->sk; + if (!sk) { + spin_unlock_bh(&ax25_list_lock); + s->ax25_dev = NULL; + ax25_disconnect(s, ENETUNREACH); + spin_lock_bh(&ax25_list_lock); + goto again; + } + sock_hold(sk); spin_unlock_bh(&ax25_list_lock); + lock_sock(sk); + s->ax25_dev = NULL; + release_sock(sk); ax25_disconnect(s, ENETUNREACH); spin_lock_bh(&ax25_list_lock); - + sock_put(sk); /* The entry could have been deleted from the * list meanwhile and thus the next pointer is * no longer valid. Play it safe and restart @@ -534,7 +546,7 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname, ax25_cb *ax25; struct net_device *dev; char devname[IFNAMSIZ]; - unsigned long opt; + unsigned int opt; int res = 0; if (level != SOL_AX25) @@ -566,7 +578,7 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname, break; case AX25_T1: - if (opt < 1 || opt > ULONG_MAX / HZ) { + if (opt < 1 || opt > UINT_MAX / HZ) { res = -EINVAL; break; } @@ -575,7 +587,7 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname, break; case AX25_T2: - if (opt < 1 || opt > ULONG_MAX / HZ) { + if (opt < 1 || opt > UINT_MAX / HZ) { res = -EINVAL; break; } @@ -591,7 +603,7 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname, break; case AX25_T3: - if (opt < 1 || opt > ULONG_MAX / HZ) { + if (opt < 1 || opt > UINT_MAX / HZ) { res = -EINVAL; break; } @@ -599,7 +611,7 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname, break; case AX25_IDLE: - if (opt > ULONG_MAX / (60 * HZ)) { + if (opt > UINT_MAX / (60 * HZ)) { res = -EINVAL; break; } diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index ba0027d1f2dfd81e4a95df742ba50d414d556d3e..ee9cead7654502cf5f9b3db1de7870c88babd46d 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -1561,10 +1561,14 @@ int batadv_bla_init(struct batadv_priv *bat_priv) return 0; bat_priv->bla.claim_hash = batadv_hash_new(128); - bat_priv->bla.backbone_hash = batadv_hash_new(32); + if (!bat_priv->bla.claim_hash) + return -ENOMEM; - if (!bat_priv->bla.claim_hash || !bat_priv->bla.backbone_hash) + bat_priv->bla.backbone_hash = batadv_hash_new(32); + if (!bat_priv->bla.backbone_hash) { + batadv_hash_destroy(bat_priv->bla.claim_hash); return -ENOMEM; + } batadv_hash_set_lock_class(bat_priv->bla.claim_hash, &batadv_claim_hash_lock_class_key); diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index 33904595fc56a1e512edd5f217ff602f5a9744fe..fe0898a9b4e82963652da8a32615ea2030e620f9 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -151,22 +151,25 @@ static bool batadv_is_on_batman_iface(const struct net_device *net_dev) struct net *net = dev_net(net_dev); struct net_device *parent_dev; struct net *parent_net; + int iflink; bool ret; /* check if this is a batman-adv mesh interface */ if (batadv_softif_is_valid(net_dev)) return true; - /* no more parents..stop recursion */ - if (dev_get_iflink(net_dev) == 0 || - dev_get_iflink(net_dev) == net_dev->ifindex) + iflink = dev_get_iflink(net_dev); + if (iflink == 0) return false; parent_net = batadv_getlink_net(net_dev, net); + /* iflink to itself, most likely physical device */ + if (net == parent_net && iflink == net_dev->ifindex) + return false; + /* recurse over the parent device */ - parent_dev = __dev_get_by_index((struct net *)parent_net, - dev_get_iflink(net_dev)); + parent_dev = __dev_get_by_index((struct net *)parent_net, iflink); /* if we got a NULL parent_dev there is something broken.. */ if (!parent_dev) { pr_err("Cannot find parent device\n"); @@ -216,14 +219,15 @@ static struct net_device *batadv_get_real_netdevice(struct net_device *netdev) struct net_device *real_netdev = NULL; struct net *real_net; struct net *net; - int ifindex; + int iflink; ASSERT_RTNL(); if (!netdev) return NULL; - if (netdev->ifindex == dev_get_iflink(netdev)) { + iflink = dev_get_iflink(netdev); + if (iflink == 0) { dev_hold(netdev); return netdev; } @@ -233,9 +237,16 @@ static struct net_device *batadv_get_real_netdevice(struct net_device *netdev) goto out; net = dev_net(hard_iface->soft_iface); - ifindex = dev_get_iflink(netdev); real_net = batadv_getlink_net(netdev, net); - real_netdev = dev_get_by_index(real_net, ifindex); + + /* iflink to itself, most likely physical device */ + if (net == real_net && netdev->ifindex == iflink) { + real_netdev = netdev; + dev_hold(real_netdev); + goto out; + } + + real_netdev = dev_get_by_index(real_net, iflink); out: if (hard_iface) diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index 70fee9b42e25f55be746b2a08877c03dd24d1e2d..9f267b190779f3b5804514693c331fded1396ba5 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -196,29 +196,41 @@ int batadv_mesh_init(struct net_device *soft_iface) bat_priv->gw.generation = 0; - ret = batadv_v_mesh_init(bat_priv); - if (ret < 0) - goto err; - ret = batadv_originator_init(bat_priv); - if (ret < 0) - goto err; + if (ret < 0) { + atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING); + goto err_orig; + } ret = batadv_tt_init(bat_priv); - if (ret < 0) - goto err; + if (ret < 0) { + atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING); + goto err_tt; + } + + ret = batadv_v_mesh_init(bat_priv); + if (ret < 0) { + atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING); + goto err_v; + } ret = batadv_bla_init(bat_priv); - if (ret < 0) - goto err; + if (ret < 0) { + atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING); + goto err_bla; + } ret = batadv_dat_init(bat_priv); - if (ret < 0) - goto err; + if (ret < 0) { + atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING); + goto err_dat; + } ret = batadv_nc_mesh_init(bat_priv); - if (ret < 0) - goto err; + if (ret < 0) { + atomic_set(&bat_priv->mesh_state, BATADV_MESH_DEACTIVATING); + goto err_nc; + } batadv_gw_init(bat_priv); batadv_mcast_init(bat_priv); @@ -228,8 +240,20 @@ int batadv_mesh_init(struct net_device *soft_iface) return 0; -err: - batadv_mesh_free(soft_iface); +err_nc: + batadv_dat_free(bat_priv); +err_dat: + batadv_bla_free(bat_priv); +err_bla: + batadv_v_mesh_free(bat_priv); +err_v: + batadv_tt_free(bat_priv); +err_tt: + batadv_originator_free(bat_priv); +err_orig: + batadv_purge_outstanding_packets(bat_priv, NULL); + atomic_set(&bat_priv->mesh_state, BATADV_MESH_INACTIVE); + return ret; } diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c index 9af99c39b9fd90c88d2b8efc48da0142fb9ca3db..139894ca788b95f5e5af57123d318625a1cbb7a5 100644 --- a/net/batman-adv/multicast.c +++ b/net/batman-adv/multicast.c @@ -1373,6 +1373,7 @@ batadv_mcast_forw_rtr_node_get(struct batadv_priv *bat_priv, * @bat_priv: the bat priv with all the soft interface information * @skb: The multicast packet to check * @orig: an originator to be set to forward the skb to + * @is_routable: stores whether the destination is routable * * Return: the forwarding mode as enum batadv_forw_mode and in case of * BATADV_FORW_SINGLE set the orig to the single originator the skb @@ -1380,17 +1381,16 @@ batadv_mcast_forw_rtr_node_get(struct batadv_priv *bat_priv, */ enum batadv_forw_mode batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb, - struct batadv_orig_node **orig) + struct batadv_orig_node **orig, int *is_routable) { int ret, tt_count, ip_count, unsnoop_count, total_count; bool is_unsnoopable = false; unsigned int mcast_fanout; struct ethhdr *ethhdr; - int is_routable = 0; int rtr_count = 0; ret = batadv_mcast_forw_mode_check(bat_priv, skb, &is_unsnoopable, - &is_routable); + is_routable); if (ret == -ENOMEM) return BATADV_FORW_NONE; else if (ret < 0) @@ -1403,7 +1403,7 @@ batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb, ip_count = batadv_mcast_forw_want_all_ip_count(bat_priv, ethhdr); unsnoop_count = !is_unsnoopable ? 0 : atomic_read(&bat_priv->mcast.num_want_all_unsnoopables); - rtr_count = batadv_mcast_forw_rtr_count(bat_priv, is_routable); + rtr_count = batadv_mcast_forw_rtr_count(bat_priv, *is_routable); total_count = tt_count + ip_count + unsnoop_count + rtr_count; @@ -1723,6 +1723,7 @@ batadv_mcast_forw_want_rtr(struct batadv_priv *bat_priv, * @bat_priv: the bat priv with all the soft interface information * @skb: the multicast packet to transmit * @vid: the vlan identifier + * @is_routable: stores whether the destination is routable * * Sends copies of a frame with multicast destination to any node that signaled * interest in it, that is either via the translation table or the according @@ -1735,7 +1736,7 @@ batadv_mcast_forw_want_rtr(struct batadv_priv *bat_priv, * is neither IPv4 nor IPv6. NET_XMIT_SUCCESS otherwise. */ int batadv_mcast_forw_send(struct batadv_priv *bat_priv, struct sk_buff *skb, - unsigned short vid) + unsigned short vid, int is_routable) { int ret; @@ -1751,12 +1752,16 @@ int batadv_mcast_forw_send(struct batadv_priv *bat_priv, struct sk_buff *skb, return ret; } + if (!is_routable) + goto skip_mc_router; + ret = batadv_mcast_forw_want_rtr(bat_priv, skb, vid); if (ret != NET_XMIT_SUCCESS) { kfree_skb(skb); return ret; } +skip_mc_router: consume_skb(skb); return ret; } diff --git a/net/batman-adv/multicast.h b/net/batman-adv/multicast.h index 3e114bc5ca3bb84b09041aa5b4f522d4388c8a3d..1e787b522e69c67b277cb534679b665447c3324b 100644 --- a/net/batman-adv/multicast.h +++ b/net/batman-adv/multicast.h @@ -44,7 +44,8 @@ enum batadv_forw_mode { enum batadv_forw_mode batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb, - struct batadv_orig_node **mcast_single_orig); + struct batadv_orig_node **mcast_single_orig, + int *is_routable); int batadv_mcast_forw_send_orig(struct batadv_priv *bat_priv, struct sk_buff *skb, @@ -52,7 +53,7 @@ int batadv_mcast_forw_send_orig(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node); int batadv_mcast_forw_send(struct batadv_priv *bat_priv, struct sk_buff *skb, - unsigned short vid); + unsigned short vid, int is_routable); void batadv_mcast_init(struct batadv_priv *bat_priv); @@ -71,7 +72,8 @@ void batadv_mcast_purge_orig(struct batadv_orig_node *orig_node); static inline enum batadv_forw_mode batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb, - struct batadv_orig_node **mcast_single_orig) + struct batadv_orig_node **mcast_single_orig, + int *is_routable) { return BATADV_FORW_ALL; } @@ -88,7 +90,7 @@ batadv_mcast_forw_send_orig(struct batadv_priv *bat_priv, static inline int batadv_mcast_forw_send(struct batadv_priv *bat_priv, struct sk_buff *skb, - unsigned short vid) + unsigned short vid, int is_routable) { kfree_skb(skb); return NET_XMIT_DROP; diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c index c7a55647b520e87d560bdcb29a644eef30f2855c..121459704b0693e7597e90c7389034f66a2d13ea 100644 --- a/net/batman-adv/netlink.c +++ b/net/batman-adv/netlink.c @@ -1361,21 +1361,21 @@ static const struct genl_small_ops batadv_netlink_ops[] = { { .cmd = BATADV_CMD_TP_METER, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .flags = GENL_ADMIN_PERM, + .flags = GENL_UNS_ADMIN_PERM, .doit = batadv_netlink_tp_meter_start, .internal_flags = BATADV_FLAG_NEED_MESH, }, { .cmd = BATADV_CMD_TP_METER_CANCEL, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .flags = GENL_ADMIN_PERM, + .flags = GENL_UNS_ADMIN_PERM, .doit = batadv_netlink_tp_meter_cancel, .internal_flags = BATADV_FLAG_NEED_MESH, }, { .cmd = BATADV_CMD_GET_ROUTING_ALGOS, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .flags = GENL_ADMIN_PERM, + .flags = GENL_UNS_ADMIN_PERM, .dumpit = batadv_algo_dump, }, { @@ -1390,68 +1390,68 @@ static const struct genl_small_ops batadv_netlink_ops[] = { { .cmd = BATADV_CMD_GET_TRANSTABLE_LOCAL, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .flags = GENL_ADMIN_PERM, + .flags = GENL_UNS_ADMIN_PERM, .dumpit = batadv_tt_local_dump, }, { .cmd = BATADV_CMD_GET_TRANSTABLE_GLOBAL, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .flags = GENL_ADMIN_PERM, + .flags = GENL_UNS_ADMIN_PERM, .dumpit = batadv_tt_global_dump, }, { .cmd = BATADV_CMD_GET_ORIGINATORS, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .flags = GENL_ADMIN_PERM, + .flags = GENL_UNS_ADMIN_PERM, .dumpit = batadv_orig_dump, }, { .cmd = BATADV_CMD_GET_NEIGHBORS, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .flags = GENL_ADMIN_PERM, + .flags = GENL_UNS_ADMIN_PERM, .dumpit = batadv_hardif_neigh_dump, }, { .cmd = BATADV_CMD_GET_GATEWAYS, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .flags = GENL_ADMIN_PERM, + .flags = GENL_UNS_ADMIN_PERM, .dumpit = batadv_gw_dump, }, { .cmd = BATADV_CMD_GET_BLA_CLAIM, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .flags = GENL_ADMIN_PERM, + .flags = GENL_UNS_ADMIN_PERM, .dumpit = batadv_bla_claim_dump, }, { .cmd = BATADV_CMD_GET_BLA_BACKBONE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .flags = GENL_ADMIN_PERM, + .flags = GENL_UNS_ADMIN_PERM, .dumpit = batadv_bla_backbone_dump, }, { .cmd = BATADV_CMD_GET_DAT_CACHE, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .flags = GENL_ADMIN_PERM, + .flags = GENL_UNS_ADMIN_PERM, .dumpit = batadv_dat_cache_dump, }, { .cmd = BATADV_CMD_GET_MCAST_FLAGS, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .flags = GENL_ADMIN_PERM, + .flags = GENL_UNS_ADMIN_PERM, .dumpit = batadv_mcast_flags_dump, }, { .cmd = BATADV_CMD_SET_MESH, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .flags = GENL_ADMIN_PERM, + .flags = GENL_UNS_ADMIN_PERM, .doit = batadv_netlink_set_mesh, .internal_flags = BATADV_FLAG_NEED_MESH, }, { .cmd = BATADV_CMD_SET_HARDIF, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .flags = GENL_ADMIN_PERM, + .flags = GENL_UNS_ADMIN_PERM, .doit = batadv_netlink_set_hardif, .internal_flags = BATADV_FLAG_NEED_MESH | BATADV_FLAG_NEED_HARDIF, @@ -1467,7 +1467,7 @@ static const struct genl_small_ops batadv_netlink_ops[] = { { .cmd = BATADV_CMD_SET_VLAN, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .flags = GENL_ADMIN_PERM, + .flags = GENL_UNS_ADMIN_PERM, .doit = batadv_netlink_set_vlan, .internal_flags = BATADV_FLAG_NEED_MESH | BATADV_FLAG_NEED_VLAN, diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c index 61ddd6d709a0e4376d409cf4c20e95378ae48164..35b3e03c07774b1095b290938c49a224a0c9f34e 100644 --- a/net/batman-adv/network-coding.c +++ b/net/batman-adv/network-coding.c @@ -155,8 +155,10 @@ int batadv_nc_mesh_init(struct batadv_priv *bat_priv) &batadv_nc_coding_hash_lock_class_key); bat_priv->nc.decoding_hash = batadv_hash_new(128); - if (!bat_priv->nc.decoding_hash) + if (!bat_priv->nc.decoding_hash) { + batadv_hash_destroy(bat_priv->nc.coding_hash); goto err; + } batadv_hash_set_lock_class(bat_priv->nc.decoding_hash, &batadv_nc_decoding_hash_lock_class_key); diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 82e7ca886605a1541274f8d93c52782affeac0f5..7496047b318a412e51d33849bd22f4d59357b5c2 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -200,6 +200,7 @@ static netdev_tx_t batadv_interface_tx(struct sk_buff *skb, int gw_mode; enum batadv_forw_mode forw_mode = BATADV_FORW_SINGLE; struct batadv_orig_node *mcast_single_orig = NULL; + int mcast_is_routable = 0; int network_offset = ETH_HLEN; __be16 proto; @@ -302,7 +303,8 @@ static netdev_tx_t batadv_interface_tx(struct sk_buff *skb, send: if (do_bcast && !is_broadcast_ether_addr(ethhdr->h_dest)) { forw_mode = batadv_mcast_forw_mode(bat_priv, skb, - &mcast_single_orig); + &mcast_single_orig, + &mcast_is_routable); if (forw_mode == BATADV_FORW_NONE) goto dropped; @@ -367,7 +369,8 @@ send: ret = batadv_mcast_forw_send_orig(bat_priv, skb, vid, mcast_single_orig); } else if (forw_mode == BATADV_FORW_SOME) { - ret = batadv_mcast_forw_send(bat_priv, skb, vid); + ret = batadv_mcast_forw_send(bat_priv, skb, vid, + mcast_is_routable); } else { if (batadv_dat_snoop_outgoing_arp_request(bat_priv, skb)) diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index a510f7f86a7dca68330a12ac76dce237978ff581..de946ea8f13c8c96d5153c62b68e480c2bbcade6 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -4405,8 +4405,10 @@ int batadv_tt_init(struct batadv_priv *bat_priv) return ret; ret = batadv_tt_global_init(bat_priv); - if (ret < 0) + if (ret < 0) { + batadv_tt_local_table_free(bat_priv); return ret; + } batadv_tvlv_handler_register(bat_priv, batadv_tt_tvlv_ogm_handler_v1, batadv_tt_tvlv_unicast_handler_v1, diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c index 0a2d78e811cf5bbea7d735bc345a1ddc9a46cbf8..83eb84e8e688f3bcab98c99758ee99ae7f85dc46 100644 --- a/net/bluetooth/cmtp/core.c +++ b/net/bluetooth/cmtp/core.c @@ -501,9 +501,7 @@ static int __init cmtp_init(void) { BT_INFO("CMTP (CAPI Emulation) ver %s", VERSION); - cmtp_init_sockets(); - - return 0; + return cmtp_init_sockets(); } static void __exit cmtp_exit(void) diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 2ad66f64879f1863e3f441b288deb82e673c5310..2e7998bad133bc45ca5e88c2b5f9d079c5a9ac4e 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -3810,6 +3810,7 @@ int hci_register_dev(struct hci_dev *hdev) return id; err_wqueue: + debugfs_remove_recursive(hdev->debugfs); destroy_workqueue(hdev->workqueue); destroy_workqueue(hdev->req_workqueue); err: diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 9f52145bb7b7639e5c221778188126f8143ef870..72b4127360c7f24f8036471c7ff3d3538fbb02c5 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -5661,7 +5661,13 @@ static void hci_le_adv_report_evt(struct hci_dev *hdev, struct sk_buff *skb) struct hci_ev_le_advertising_info *ev = ptr; s8 rssi; - if (ev->length <= HCI_MAX_AD_LENGTH) { + if (ptr > (void *)skb_tail_pointer(skb) - sizeof(*ev)) { + bt_dev_err(hdev, "Malicious advertising data."); + break; + } + + if (ev->length <= HCI_MAX_AD_LENGTH && + ev->data + ev->length <= skb_tail_pointer(skb)) { rssi = ev->data[ev->length]; process_adv_report(hdev, ev->evt_type, &ev->bdaddr, ev->bdaddr_type, NULL, 0, rssi, diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 1a94ed2f8a4f8ebe72a706f01f9472b67b3e92e6..d965b7c66bd628b760e28d51ad441deb4a6a8f0b 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -2118,7 +2118,7 @@ int __hci_req_enable_ext_advertising(struct hci_request *req, u8 instance) /* Set duration per instance since controller is responsible for * scheduling it. */ - if (adv_instance && adv_instance->duration) { + if (adv_instance && adv_instance->timeout) { u16 duration = adv_instance->timeout * MSEC_PER_SEC; /* Time = N * 10 ms */ diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index c99d65ef13b1e208141dd11822a40c235a99f2f0..d2c67852059921bfd188eee1a4a79736f9415323 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -161,7 +161,11 @@ static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int alen) break; } - if (chan->psm && bdaddr_type_is_le(chan->src_type)) + /* Use L2CAP_MODE_LE_FLOWCTL (CoC) in case of LE address and + * L2CAP_MODE_EXT_FLOWCTL (ECRED) has not been set. + */ + if (chan->psm && bdaddr_type_is_le(chan->src_type) && + chan->mode != L2CAP_MODE_EXT_FLOWCTL) chan->mode = L2CAP_MODE_LE_FLOWCTL; chan->state = BT_BOUND; @@ -172,6 +176,21 @@ done: return err; } +static void l2cap_sock_init_pid(struct sock *sk) +{ + struct l2cap_chan *chan = l2cap_pi(sk)->chan; + + /* Only L2CAP_MODE_EXT_FLOWCTL ever need to access the PID in order to + * group the channels being requested. + */ + if (chan->mode != L2CAP_MODE_EXT_FLOWCTL) + return; + + spin_lock(&sk->sk_peer_lock); + sk->sk_peer_pid = get_pid(task_tgid(current)); + spin_unlock(&sk->sk_peer_lock); +} + static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr, int alen, int flags) { @@ -240,9 +259,15 @@ static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr, return -EINVAL; } - if (chan->psm && bdaddr_type_is_le(chan->src_type) && !chan->mode) + /* Use L2CAP_MODE_LE_FLOWCTL (CoC) in case of LE address and + * L2CAP_MODE_EXT_FLOWCTL (ECRED) has not been set. + */ + if (chan->psm && bdaddr_type_is_le(chan->src_type) && + chan->mode != L2CAP_MODE_EXT_FLOWCTL) chan->mode = L2CAP_MODE_LE_FLOWCTL; + l2cap_sock_init_pid(sk); + err = l2cap_chan_connect(chan, la.l2_psm, __le16_to_cpu(la.l2_cid), &la.l2_bdaddr, la.l2_bdaddr_type); if (err) @@ -298,6 +323,8 @@ static int l2cap_sock_listen(struct socket *sock, int backlog) goto done; } + l2cap_sock_init_pid(sk); + sk->sk_max_ack_backlog = backlog; sk->sk_ack_backlog = 0; @@ -876,6 +903,8 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, struct l2cap_conn *conn; int len, err = 0; u32 opt; + u16 mtu; + u8 mode; BT_DBG("sk %p", sk); @@ -1058,16 +1087,16 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, break; } - if (copy_from_sockptr(&opt, optval, sizeof(u16))) { + if (copy_from_sockptr(&mtu, optval, sizeof(u16))) { err = -EFAULT; break; } if (chan->mode == L2CAP_MODE_EXT_FLOWCTL && sk->sk_state == BT_CONNECTED) - err = l2cap_chan_reconfigure(chan, opt); + err = l2cap_chan_reconfigure(chan, mtu); else - chan->imtu = opt; + chan->imtu = mtu; break; @@ -1089,14 +1118,14 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, break; } - if (copy_from_sockptr(&opt, optval, sizeof(u8))) { + if (copy_from_sockptr(&mode, optval, sizeof(u8))) { err = -EFAULT; break; } - BT_DBG("opt %u", opt); + BT_DBG("mode %u", mode); - err = l2cap_set_mode(chan, opt); + err = l2cap_set_mode(chan, mode); if (err) break; @@ -1508,6 +1537,9 @@ static void l2cap_sock_close_cb(struct l2cap_chan *chan) { struct sock *sk = chan->data; + if (!sk) + return; + l2cap_sock_kill(sk); } @@ -1516,6 +1548,9 @@ static void l2cap_sock_teardown_cb(struct l2cap_chan *chan, int err) struct sock *sk = chan->data; struct sock *parent; + if (!sk) + return; + BT_DBG("chan %p state %s", chan, state_to_string(chan->state)); /* This callback can be called both for server (BT_LISTEN) @@ -1707,8 +1742,10 @@ static void l2cap_sock_destruct(struct sock *sk) { BT_DBG("sk %p", sk); - if (l2cap_pi(sk)->chan) + if (l2cap_pi(sk)->chan) { + l2cap_pi(sk)->chan->data = NULL; l2cap_chan_put(l2cap_pi(sk)->chan); + } if (l2cap_pi(sk)->rx_busy_skb) { kfree_skb(l2cap_pi(sk)->rx_busy_skb); diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 7c24a9acbc4592b47a468e19c024e1c22b8c3ded..2f2b8ddc4dd5de311445e5bc5674f33fdc882bc0 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -134,6 +134,7 @@ static struct sco_conn *sco_conn_add(struct hci_conn *hcon) return NULL; spin_lock_init(&conn->lock); + INIT_DELAYED_WORK(&conn->timeout_work, sco_sock_timeout); hcon->sco_data = conn; conn->hcon = hcon; @@ -197,11 +198,11 @@ static void sco_conn_del(struct hci_conn *hcon, int err) sco_chan_del(sk, err); bh_unlock_sock(sk); sock_put(sk); - - /* Ensure no more work items will run before freeing conn. */ - cancel_delayed_work_sync(&conn->timeout_work); } + /* Ensure no more work items will run before freeing conn. */ + cancel_delayed_work_sync(&conn->timeout_work); + hcon->sco_data = NULL; kfree(conn); } @@ -214,8 +215,6 @@ static void __sco_chan_add(struct sco_conn *conn, struct sock *sk, sco_pi(sk)->conn = conn; conn->sk = sk; - INIT_DELAYED_WORK(&conn->timeout_work, sco_sock_timeout); - if (parent) bt_accept_enqueue(parent, sk, true); } @@ -281,7 +280,8 @@ static int sco_connect(struct hci_dev *hdev, struct sock *sk) return err; } -static int sco_send_frame(struct sock *sk, struct msghdr *msg, int len) +static int sco_send_frame(struct sock *sk, void *buf, int len, + unsigned int msg_flags) { struct sco_conn *conn = sco_pi(sk)->conn; struct sk_buff *skb; @@ -293,15 +293,11 @@ static int sco_send_frame(struct sock *sk, struct msghdr *msg, int len) BT_DBG("sk %p len %d", sk, len); - skb = bt_skb_send_alloc(sk, len, msg->msg_flags & MSG_DONTWAIT, &err); + skb = bt_skb_send_alloc(sk, len, msg_flags & MSG_DONTWAIT, &err); if (!skb) return err; - if (memcpy_from_msg(skb_put(skb, len), msg, len)) { - kfree_skb(skb); - return -EFAULT; - } - + memcpy(skb_put(skb, len), buf, len); hci_send_sco(conn->hcon, skb); return len; @@ -726,6 +722,7 @@ static int sco_sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; + void *buf; int err; BT_DBG("sock %p, sk %p", sock, sk); @@ -737,14 +734,24 @@ static int sco_sock_sendmsg(struct socket *sock, struct msghdr *msg, if (msg->msg_flags & MSG_OOB) return -EOPNOTSUPP; + buf = kmalloc(len, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + if (memcpy_from_msg(buf, msg, len)) { + kfree(buf); + return -EFAULT; + } + lock_sock(sk); if (sk->sk_state == BT_CONNECTED) - err = sco_send_frame(sk, msg, len); + err = sco_send_frame(sk, buf, len, msg->msg_flags); else err = -ENOTCONN; release_sock(sk); + kfree(buf); return err; } diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 72d424a5a14291119a37477a05b280c078f9e1bb..eb684f31fd6988b9aaf8863dadef96fb8736aba1 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -481,6 +481,12 @@ static void convert_skb_to___skb(struct sk_buff *skb, struct __sk_buff *__skb) __skb->gso_segs = skb_shinfo(skb)->gso_segs; } +static struct proto bpf_dummy_proto = { + .name = "bpf_dummy", + .owner = THIS_MODULE, + .obj_size = sizeof(struct sock), +}; + int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr) { @@ -525,20 +531,19 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, break; } - sk = kzalloc(sizeof(struct sock), GFP_USER); + sk = sk_alloc(net, AF_UNSPEC, GFP_USER, &bpf_dummy_proto, 1); if (!sk) { kfree(data); kfree(ctx); return -ENOMEM; } - sock_net_set(sk, net); sock_init_data(NULL, sk); skb = build_skb(data, 0); if (!skb) { kfree(data); kfree(ctx); - kfree(sk); + sk_free(sk); return -ENOMEM; } skb->sk = sk; @@ -611,8 +616,7 @@ out: if (dev && dev != net->loopback_dev) dev_put(dev); kfree_skb(skb); - bpf_sk_storage_free(sk); - kfree(sk); + sk_free(sk); kfree(ctx); return ret; } diff --git a/net/bpfilter/bpfilter_kern.c b/net/bpfilter/bpfilter_kern.c index 51a941b56ec3aab03d8151b4ce1fa169139f05cb..4dfcd0d7ff3f7920fbcb44f8b8fba960f13c7f22 100644 --- a/net/bpfilter/bpfilter_kern.c +++ b/net/bpfilter/bpfilter_kern.c @@ -134,3 +134,4 @@ static void __exit fini_umh(void) module_init(load_umh); module_exit(fini_umh); MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(VFS_internal_I_am_really_a_filesystem_and_am_NOT_a_driver); diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 8edfb98ae1d583665f5396b5110a3875e0518561..68c0d0f928908e44ade334aa9931279190be8a13 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -743,6 +743,9 @@ static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff if (nf_bridge->frag_max_size && nf_bridge->frag_max_size < mtu) mtu = nf_bridge->frag_max_size; + nf_bridge_update_protocol(skb); + nf_bridge_push_encap_header(skb); + if (skb_is_gso(skb) || skb->len + mtu_reserved <= mtu) { nf_bridge_info_free(skb); return br_dev_queue_push_xmit(net, sk, skb); @@ -760,8 +763,6 @@ static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff IPCB(skb)->frag_max_size = nf_bridge->frag_max_size; - nf_bridge_update_protocol(skb); - data = this_cpu_ptr(&brnf_frag_data_storage); if (skb_vlan_tag_present(skb)) { @@ -789,8 +790,6 @@ static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size; - nf_bridge_update_protocol(skb); - data = this_cpu_ptr(&brnf_frag_data_storage); data->encap_size = nf_bridge_encap_header_len(skb); data->size = ETH_HLEN + data->encap_size; diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 73f71c22f4c03c2e2eb6e2c2961eb3f78d4d660a..31b00ba5dcc84d402970d13f6fe5416879c6de69 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -1590,7 +1590,8 @@ static size_t br_get_linkxstats_size(const struct net_device *dev, int attr) } return numvls * nla_total_size(sizeof(struct bridge_vlan_xstats)) + - nla_total_size(sizeof(struct br_mcast_stats)) + + nla_total_size_64bit(sizeof(struct br_mcast_stats)) + + (p ? nla_total_size_64bit(sizeof(p->stp_xstats)) : 0) + nla_total_size(0); } diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 5e5726048a1af35a4128595dcc29efb98c2b29a8..2b88b17cc8b25bd3929309fd6223429c1650e782 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -931,9 +931,7 @@ static inline unsigned long br_multicast_lmqt(const struct net_bridge *br) static inline unsigned long br_multicast_gmi(const struct net_bridge *br) { - /* use the RFC default of 2 for QRV */ - return 2 * br->multicast_query_interval + - br->multicast_query_response_interval; + return br->multicast_membership_interval; } #else static inline int br_multicast_rcv(struct net_bridge *br, diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 08c77418c687b75e1ca303922314b010397296fc..852f4b54e8811a275ca3df6d261c5a9c481b6db9 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -543,10 +543,10 @@ static bool __allowed_ingress(const struct net_bridge *br, if (!br_opt_get(br, BROPT_VLAN_STATS_ENABLED)) { if (*state == BR_STATE_FORWARDING) { *state = br_vlan_get_pvid_state(vg); - return br_vlan_state_allowed(*state, true); - } else { - return true; + if (!br_vlan_state_allowed(*state, true)) + goto drop; } + return true; } } v = br_vlan_find(vg, *vid); @@ -1873,7 +1873,8 @@ static int br_vlan_rtm_dump(struct sk_buff *skb, struct netlink_callback *cb) goto out_err; } err = br_vlan_dump_dev(dev, skb, cb, dump_flags); - if (err && err != -EMSGSIZE) + /* if the dump completed without an error we return 0 here */ + if (err != -EMSGSIZE) goto out_err; } else { for_each_netdev_rcu(net, dev) { diff --git a/net/can/isotp.c b/net/can/isotp.c index 5fc28f190677bee615a60a6bbb85341b5251b291..d0581dc6a65fd5565969f084e95fe126e17f947b 100644 --- a/net/can/isotp.c +++ b/net/can/isotp.c @@ -56,6 +56,7 @@ #include #include #include +#include #include #include #include @@ -119,9 +120,9 @@ enum { }; struct tpcon { - int idx; - int len; - u8 state; + unsigned int idx; + unsigned int len; + u32 state; u8 bs; u8 sn; u8 ll_dl; @@ -145,6 +146,7 @@ struct isotp_sock { struct tpcon rx, tx; struct list_head notifier; wait_queue_head_t wait; + spinlock_t rx_lock; /* protect single thread state machine */ }; static LIST_HEAD(isotp_notifier_list); @@ -615,11 +617,17 @@ static void isotp_rcv(struct sk_buff *skb, void *data) n_pci_type = cf->data[ae] & 0xF0; + /* Make sure the state changes and data structures stay consistent at + * CAN frame reception time. This locking is not needed in real world + * use cases but the inconsistency can be triggered with syzkaller. + */ + spin_lock(&so->rx_lock); + if (so->opt.flags & CAN_ISOTP_HALF_DUPLEX) { /* check rx/tx path half duplex expectations */ if ((so->tx.state != ISOTP_IDLE && n_pci_type != N_PCI_FC) || (so->rx.state != ISOTP_IDLE && n_pci_type == N_PCI_FC)) - return; + goto out_unlock; } switch (n_pci_type) { @@ -668,6 +676,9 @@ static void isotp_rcv(struct sk_buff *skb, void *data) isotp_rcv_cf(sk, cf, ae, skb); break; } + +out_unlock: + spin_unlock(&so->rx_lock); } static void isotp_fill_dataframe(struct canfd_frame *cf, struct isotp_sock *so, @@ -846,6 +857,7 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) { struct sock *sk = sock->sk; struct isotp_sock *so = isotp_sk(sk); + u32 old_state = so->tx.state; struct sk_buff *skb; struct net_device *dev; struct canfd_frame *cf; @@ -858,46 +870,61 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) return -EADDRNOTAVAIL; /* we do not support multiple buffers - for now */ - if (so->tx.state != ISOTP_IDLE || wq_has_sleeper(&so->wait)) { - if (msg->msg_flags & MSG_DONTWAIT) - return -EAGAIN; + if (cmpxchg(&so->tx.state, ISOTP_IDLE, ISOTP_SENDING) != ISOTP_IDLE || + wq_has_sleeper(&so->wait)) { + if (msg->msg_flags & MSG_DONTWAIT) { + err = -EAGAIN; + goto err_out; + } /* wait for complete transmission of current pdu */ - wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE); + err = wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE); + if (err) + goto err_out; } - if (!size || size > MAX_MSG_LENGTH) - return -EINVAL; + if (!size || size > MAX_MSG_LENGTH) { + err = -EINVAL; + goto err_out_drop; + } + + /* take care of a potential SF_DL ESC offset for TX_DL > 8 */ + off = (so->tx.ll_dl > CAN_MAX_DLEN) ? 1 : 0; + + /* does the given data fit into a single frame for SF_BROADCAST? */ + if ((so->opt.flags & CAN_ISOTP_SF_BROADCAST) && + (size > so->tx.ll_dl - SF_PCI_SZ4 - ae - off)) { + err = -EINVAL; + goto err_out_drop; + } err = memcpy_from_msg(so->tx.buf, msg, size); if (err < 0) - return err; + goto err_out_drop; dev = dev_get_by_index(sock_net(sk), so->ifindex); - if (!dev) - return -ENXIO; + if (!dev) { + err = -ENXIO; + goto err_out_drop; + } skb = sock_alloc_send_skb(sk, so->ll.mtu + sizeof(struct can_skb_priv), msg->msg_flags & MSG_DONTWAIT, &err); if (!skb) { dev_put(dev); - return err; + goto err_out_drop; } can_skb_reserve(skb); can_skb_prv(skb)->ifindex = dev->ifindex; can_skb_prv(skb)->skbcnt = 0; - so->tx.state = ISOTP_SENDING; so->tx.len = size; so->tx.idx = 0; cf = (struct canfd_frame *)skb->data; skb_put_zero(skb, so->ll.mtu); - /* take care of a potential SF_DL ESC offset for TX_DL > 8 */ - off = (so->tx.ll_dl > CAN_MAX_DLEN) ? 1 : 0; - /* check for single frame transmission depending on TX_DL */ if (size <= so->tx.ll_dl - SF_PCI_SZ4 - ae - off) { /* The message size generally fits into a SingleFrame - good. @@ -947,15 +974,28 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) if (err) { pr_notice_once("can-isotp: %s: can_send_ret %d\n", __func__, err); - return err; + goto err_out_drop; } if (wait_tx_done) { /* wait for complete transmission of current pdu */ wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE); + + if (sk->sk_err) + return -sk->sk_err; } return size; + +err_out_drop: + /* drop this PDU and unlock a potential wait queue */ + old_state = ISOTP_IDLE; +err_out: + so->tx.state = old_state; + if (so->tx.state == ISOTP_IDLE) + wake_up_interruptible(&so->wait); + + return err; } static int isotp_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, @@ -1024,7 +1064,7 @@ static int isotp_release(struct socket *sock) lock_sock(sk); /* remove current filters & unregister */ - if (so->bound) { + if (so->bound && (!(so->opt.flags & CAN_ISOTP_SF_BROADCAST))) { if (so->ifindex) { struct net_device *dev; @@ -1064,15 +1104,12 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len) struct net_device *dev; int err = 0; int notify_enetdown = 0; + int do_rx_reg = 1; if (len < ISOTP_MIN_NAMELEN) return -EINVAL; - if (addr->can_addr.tp.rx_id == addr->can_addr.tp.tx_id) - return -EADDRNOTAVAIL; - - if ((addr->can_addr.tp.rx_id | addr->can_addr.tp.tx_id) & - (CAN_ERR_FLAG | CAN_RTR_FLAG)) + if (addr->can_addr.tp.tx_id & (CAN_ERR_FLAG | CAN_RTR_FLAG)) return -EADDRNOTAVAIL; if (!addr->can_ifindex) @@ -1080,6 +1117,23 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len) lock_sock(sk); + /* do not register frame reception for functional addressing */ + if (so->opt.flags & CAN_ISOTP_SF_BROADCAST) + do_rx_reg = 0; + + /* do not validate rx address for functional addressing */ + if (do_rx_reg) { + if (addr->can_addr.tp.rx_id == addr->can_addr.tp.tx_id) { + err = -EADDRNOTAVAIL; + goto out; + } + + if (addr->can_addr.tp.rx_id & (CAN_ERR_FLAG | CAN_RTR_FLAG)) { + err = -EADDRNOTAVAIL; + goto out; + } + } + if (so->bound && addr->can_ifindex == so->ifindex && addr->can_addr.tp.rx_id == so->rxid && addr->can_addr.tp.tx_id == so->txid) @@ -1105,13 +1159,14 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len) ifindex = dev->ifindex; - can_rx_register(net, dev, addr->can_addr.tp.rx_id, - SINGLE_MASK(addr->can_addr.tp.rx_id), isotp_rcv, sk, - "isotp", sk); + if (do_rx_reg) + can_rx_register(net, dev, addr->can_addr.tp.rx_id, + SINGLE_MASK(addr->can_addr.tp.rx_id), + isotp_rcv, sk, "isotp", sk); dev_put(dev); - if (so->bound) { + if (so->bound && do_rx_reg) { /* unregister old filter */ if (so->ifindex) { dev = dev_get_by_index(net, so->ifindex); @@ -1160,16 +1215,13 @@ static int isotp_getname(struct socket *sock, struct sockaddr *uaddr, int peer) return ISOTP_MIN_NAMELEN; } -static int isotp_setsockopt(struct socket *sock, int level, int optname, +static int isotp_setsockopt_locked(struct socket *sock, int level, int optname, sockptr_t optval, unsigned int optlen) { struct sock *sk = sock->sk; struct isotp_sock *so = isotp_sk(sk); int ret = 0; - if (level != SOL_CAN_ISOTP) - return -EINVAL; - if (so->bound) return -EISCONN; @@ -1244,6 +1296,22 @@ static int isotp_setsockopt(struct socket *sock, int level, int optname, return ret; } +static int isotp_setsockopt(struct socket *sock, int level, int optname, + sockptr_t optval, unsigned int optlen) + +{ + struct sock *sk = sock->sk; + int ret; + + if (level != SOL_CAN_ISOTP) + return -EINVAL; + + lock_sock(sk); + ret = isotp_setsockopt_locked(sock, level, optname, optval, optlen); + release_sock(sk); + return ret; +} + static int isotp_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) { @@ -1311,7 +1379,7 @@ static void isotp_notify(struct isotp_sock *so, unsigned long msg, case NETDEV_UNREGISTER: lock_sock(sk); /* remove current filters & unregister */ - if (so->bound) + if (so->bound && (!(so->opt.flags & CAN_ISOTP_SF_BROADCAST))) can_rx_unregister(dev_net(dev), dev, so->rxid, SINGLE_MASK(so->rxid), isotp_rcv, sk); @@ -1388,6 +1456,7 @@ static int isotp_init(struct sock *sk) so->txtimer.function = isotp_tx_timer_handler; init_waitqueue_head(&so->wait); + spin_lock_init(&so->rx_lock); spin_lock(&isotp_notifier_lock); list_add_tail(&so->notifier, &isotp_notifier_list); diff --git a/net/can/j1939/j1939-priv.h b/net/can/j1939/j1939-priv.h index 12369b604ce95a5cd014da13a71f376de1280e80..cea712fb2a9e026f8a4a0eaadd4c04a0f74eccaf 100644 --- a/net/can/j1939/j1939-priv.h +++ b/net/can/j1939/j1939-priv.h @@ -326,6 +326,7 @@ int j1939_session_activate(struct j1939_session *session); void j1939_tp_schedule_txtimer(struct j1939_session *session, int msec); void j1939_session_timers_cancel(struct j1939_session *session); +#define J1939_MIN_TP_PACKET_SIZE 9 #define J1939_MAX_TP_PACKET_SIZE (7 * 0xff) #define J1939_MAX_ETP_PACKET_SIZE (7 * 0x00ffffff) diff --git a/net/can/j1939/main.c b/net/can/j1939/main.c index 6884d18f919c74c2df226a1bbcaf2c29fcbeb05c..ca75d1b8f415c070b9789605dcbbdee63cc1ddea 100644 --- a/net/can/j1939/main.c +++ b/net/can/j1939/main.c @@ -75,6 +75,13 @@ static void j1939_can_recv(struct sk_buff *iskb, void *data) skcb->addr.pgn = (cf->can_id >> 8) & J1939_PGN_MAX; /* set default message type */ skcb->addr.type = J1939_TP; + + if (!j1939_address_is_valid(skcb->addr.sa)) { + netdev_err_once(priv->ndev, "%s: sa is broadcast address, ignoring!\n", + __func__); + goto done; + } + if (j1939_pgn_is_pdu1(skcb->addr.pgn)) { /* Type 1: with destination address */ skcb->addr.da = skcb->addr.pgn; @@ -249,11 +256,14 @@ struct j1939_priv *j1939_netdev_start(struct net_device *ndev) struct j1939_priv *priv, *priv_new; int ret; - priv = j1939_priv_get_by_ndev(ndev); + spin_lock(&j1939_netdev_lock); + priv = j1939_priv_get_by_ndev_locked(ndev); if (priv) { kref_get(&priv->rx_kref); + spin_unlock(&j1939_netdev_lock); return priv; } + spin_unlock(&j1939_netdev_lock); priv = j1939_priv_create(ndev); if (!priv) @@ -269,10 +279,10 @@ struct j1939_priv *j1939_netdev_start(struct net_device *ndev) /* Someone was faster than us, use their priv and roll * back our's. */ + kref_get(&priv_new->rx_kref); spin_unlock(&j1939_netdev_lock); dev_put(ndev); kfree(priv); - kref_get(&priv_new->rx_kref); return priv_new; } j1939_priv_set(ndev, priv); diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c index bdc95bd7a851ff803c643ad4da7b7af8d24c9ff1..9c39b0f5d6e07d3dabc1b649ecffe1b2ddd88340 100644 --- a/net/can/j1939/transport.c +++ b/net/can/j1939/transport.c @@ -1230,12 +1230,11 @@ static enum hrtimer_restart j1939_tp_rxtimer(struct hrtimer *hrtimer) session->err = -ETIME; j1939_session_deactivate(session); } else { - netdev_alert(priv->ndev, "%s: 0x%p: rx timeout, send abort\n", - __func__, session); - j1939_session_list_lock(session->priv); if (session->state >= J1939_SESSION_ACTIVE && session->state < J1939_SESSION_ACTIVE_MAX) { + netdev_alert(priv->ndev, "%s: 0x%p: rx timeout, send abort\n", + __func__, session); j1939_session_get(session); hrtimer_start(&session->rxtimer, ms_to_ktime(J1939_XTP_ABORT_TIMEOUT_MS), @@ -1597,6 +1596,8 @@ j1939_session *j1939_xtp_rx_rts_session_new(struct j1939_priv *priv, abort = J1939_XTP_ABORT_FAULT; else if (len > priv->tp_max_packet_size) abort = J1939_XTP_ABORT_RESOURCE; + else if (len < J1939_MIN_TP_PACKET_SIZE) + abort = J1939_XTP_ABORT_FAULT; } if (abort != J1939_XTP_NO_ABORT) { @@ -1771,6 +1772,7 @@ static void j1939_xtp_rx_dpo(struct j1939_priv *priv, struct sk_buff *skb, static void j1939_xtp_rx_dat_one(struct j1939_session *session, struct sk_buff *skb) { + enum j1939_xtp_abort abort = J1939_XTP_ABORT_FAULT; struct j1939_priv *priv = session->priv; struct j1939_sk_buff_cb *skcb; struct sk_buff *se_skb = NULL; @@ -1785,9 +1787,11 @@ static void j1939_xtp_rx_dat_one(struct j1939_session *session, skcb = j1939_skb_to_cb(skb); dat = skb->data; - if (skb->len <= 1) + if (skb->len != 8) { /* makes no sense */ + abort = J1939_XTP_ABORT_UNEXPECTED_DATA; goto out_session_cancel; + } switch (session->last_cmd) { case 0xff: @@ -1885,7 +1889,7 @@ static void j1939_xtp_rx_dat_one(struct j1939_session *session, out_session_cancel: kfree_skb(se_skb); j1939_session_timers_cancel(session); - j1939_session_cancel(session, J1939_XTP_ABORT_FAULT); + j1939_session_cancel(session, abort); j1939_session_put(session); } @@ -2000,6 +2004,12 @@ static void j1939_tp_cmd_recv(struct j1939_priv *priv, struct sk_buff *skb) extd = J1939_ETP; fallthrough; case J1939_TP_CMD_BAM: + if (cmd == J1939_TP_CMD_BAM && !j1939_cb_is_broadcast(skcb)) { + netdev_err_once(priv->ndev, "%s: BAM to unicast (%02x), ignoring!\n", + __func__, skcb->addr.sa); + return; + } + fallthrough; case J1939_TP_CMD_RTS: /* fall through */ if (skcb->addr.type != extd) return; @@ -2061,6 +2071,12 @@ static void j1939_tp_cmd_recv(struct j1939_priv *priv, struct sk_buff *skb) break; case J1939_ETP_CMD_ABORT: /* && J1939_TP_CMD_ABORT */ + if (j1939_cb_is_broadcast(skcb)) { + netdev_err_once(priv->ndev, "%s: abort to broadcast (%02x), ignoring!\n", + __func__, skcb->addr.sa); + return; + } + if (j1939_tp_im_transmitter(skcb)) j1939_xtp_rx_abort(priv, skb, true); diff --git a/net/core/dev.c b/net/core/dev.c index 0e02690201814924b85a217e9805d57b36210384..d09d243697d70ea3f4e256d60734d9373deec13b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2980,6 +2980,8 @@ int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) if (dev->num_tc) netif_setup_tc(dev, txq); + dev_qdisc_change_real_num_tx(dev, txq); + dev->real_num_tx_queues = txq; if (disabling) { @@ -3178,6 +3180,12 @@ static u16 skb_tx_hash(const struct net_device *dev, qoffset = sb_dev->tc_to_txq[tc].offset; qcount = sb_dev->tc_to_txq[tc].count; + if (unlikely(!qcount)) { + net_warn_ratelimited("%s: invalid qcount, qoffset %u for tc %u\n", + sb_dev->name, qoffset, tc); + qoffset = 0; + qcount = dev->real_num_tx_queues; + } } if (skb_rx_queue_recorded(skb)) { @@ -3868,7 +3876,8 @@ int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *skb) skb_reset_mac_header(skb); __skb_pull(skb, skb_network_offset(skb)); skb->pkt_type = PACKET_LOOPBACK; - skb->ip_summed = CHECKSUM_UNNECESSARY; + if (skb->ip_summed == CHECKSUM_NONE) + skb->ip_summed = CHECKSUM_UNNECESSARY; WARN_ON(!skb_dst(skb)); skb_dst_force(skb); netif_rx_ni(skb); @@ -4145,7 +4154,10 @@ static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev) if (dev->flags & IFF_UP) { int cpu = smp_processor_id(); /* ok because BHs are off */ - if (txq->xmit_lock_owner != cpu) { + /* Other cpus might concurrently change txq->xmit_lock_owner + * to -1 or to their cpu id, but not to our id. + */ + if (READ_ONCE(txq->xmit_lock_owner) != cpu) { if (dev_xmit_recursion()) goto recursion_alert; @@ -9334,6 +9346,12 @@ static int bpf_xdp_link_update(struct bpf_link *link, struct bpf_prog *new_prog, goto out_unlock; } old_prog = link->prog; + if (old_prog->type != new_prog->type || + old_prog->expected_attach_type != new_prog->expected_attach_type) { + err = -EINVAL; + goto out_unlock; + } + if (old_prog == new_prog) { /* no-op, don't disturb drivers */ bpf_prog_put(new_prog); diff --git a/net/core/devlink.c b/net/core/devlink.c index 96cf4bc1f9585b65b7f5ec3878a52bab690c0c4d..646d90f63dafc72c4a5bd6bcfda276677dec98c7 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -3265,14 +3265,6 @@ static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info) return err; } - if (info->attrs[DEVLINK_ATTR_NETNS_PID] || - info->attrs[DEVLINK_ATTR_NETNS_FD] || - info->attrs[DEVLINK_ATTR_NETNS_ID]) { - dest_net = devlink_netns_get(skb, info); - if (IS_ERR(dest_net)) - return PTR_ERR(dest_net); - } - if (info->attrs[DEVLINK_ATTR_RELOAD_ACTION]) action = nla_get_u8(info->attrs[DEVLINK_ATTR_RELOAD_ACTION]); else @@ -3315,6 +3307,14 @@ static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info) return -EINVAL; } } + if (info->attrs[DEVLINK_ATTR_NETNS_PID] || + info->attrs[DEVLINK_ATTR_NETNS_FD] || + info->attrs[DEVLINK_ATTR_NETNS_ID]) { + dest_net = devlink_netns_get(skb, info); + if (IS_ERR(dest_net)) + return PTR_ERR(dest_net); + } + err = devlink_reload(devlink, dest_net, action, limit, &actions_performed, info->extack); if (dest_net) @@ -7852,8 +7852,6 @@ static const struct genl_small_ops devlink_nl_ops[] = { GENL_DONT_VALIDATE_DUMP_STRICT, .dumpit = devlink_nl_cmd_health_reporter_dump_get_dumpit, .flags = GENL_ADMIN_PERM, - .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT | - DEVLINK_NL_FLAG_NO_LOCK, }, { .cmd = DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR, diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index db65ce62b625ad375cba872ae2f2b31454baca81..ed9dd17f9348c284070fc0cdec5583fcc5533f8d 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -280,13 +280,17 @@ static void trace_napi_poll_hit(void *ignore, struct napi_struct *napi, rcu_read_lock(); list_for_each_entry_rcu(new_stat, &hw_stats_list, list) { + struct net_device *dev; + /* * only add a note to our monitor buffer if: * 1) this is the dev we received on * 2) its after the last_rx delta * 3) our rx_dropped count has gone up */ - if ((new_stat->dev == napi->dev) && + /* Paired with WRITE_ONCE() in dropmon_net_event() */ + dev = READ_ONCE(new_stat->dev); + if ((dev == napi->dev) && (time_after(jiffies, new_stat->last_rx + dm_hw_check_delta)) && (napi->dev->stats.rx_dropped != new_stat->last_drop_val)) { trace_drop_common(NULL, NULL); @@ -1574,7 +1578,10 @@ static int dropmon_net_event(struct notifier_block *ev_block, mutex_lock(&net_dm_mutex); list_for_each_entry_safe(new_stat, tmp, &hw_stats_list, list) { if (new_stat->dev == dev) { - new_stat->dev = NULL; + + /* Paired with READ_ONCE() in trace_napi_poll_hit() */ + WRITE_ONCE(new_stat->dev, NULL); + if (trace_state == TRACE_OFF) { list_del_rcu(&new_stat->list); kfree_rcu(new_stat, rcu); diff --git a/net/core/dst_cache.c b/net/core/dst_cache.c index be74ab4551c204a0ddf2cfc17c886d0f36f5be98..0ccfd5fa5cb9b5f608ab6715a718cd57ee4eecab 100644 --- a/net/core/dst_cache.c +++ b/net/core/dst_cache.c @@ -162,3 +162,22 @@ void dst_cache_destroy(struct dst_cache *dst_cache) free_percpu(dst_cache->cache); } EXPORT_SYMBOL_GPL(dst_cache_destroy); + +void dst_cache_reset_now(struct dst_cache *dst_cache) +{ + int i; + + if (!dst_cache->cache) + return; + + dst_cache->reset_ts = jiffies; + for_each_possible_cpu(i) { + struct dst_cache_pcpu *idst = per_cpu_ptr(dst_cache->cache, i); + struct dst_entry *dst = idst->dst; + + idst->cookie = 0; + idst->dst = NULL; + dst_release(dst); + } +} +EXPORT_SYMBOL_GPL(dst_cache_reset_now); diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 9258ffc4ebffc7d6f6eaff9ed2725fde13cc9cd4..a6159624eec0465eecec7510727f354b3d558574 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -323,7 +323,7 @@ jumped: if (!err && ops->suppress && INDIRECT_CALL_MT(ops->suppress, fib6_rule_suppress, fib4_rule_suppress, - rule, arg)) + rule, flags, arg)) continue; if (err != -EAGAIN) { diff --git a/net/core/filter.c b/net/core/filter.c index 035d66227ae20874b51bb8605adf9c11ac3bedb7..3b52ddd92659a04073731b71d7366ba6e4d1613e 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2730,6 +2730,9 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start, if (unlikely(flags)) return -EINVAL; + if (unlikely(len == 0)) + return 0; + /* First find the starting scatterlist element */ i = msg->sg.start; do { @@ -4693,12 +4696,14 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname, switch (optname) { case SO_RCVBUF: val = min_t(u32, val, sysctl_rmem_max); + val = min_t(int, val, INT_MAX / 2); sk->sk_userlocks |= SOCK_RCVBUF_LOCK; WRITE_ONCE(sk->sk_rcvbuf, max_t(int, val * 2, SOCK_MIN_RCVBUF)); break; case SO_SNDBUF: val = min_t(u32, val, sysctl_wmem_max); + val = min_t(int, val, INT_MAX / 2); sk->sk_userlocks |= SOCK_SNDBUF_LOCK; WRITE_ONCE(sk->sk_sndbuf, max_t(int, val * 2, SOCK_MIN_SNDBUF)); @@ -7901,9 +7906,9 @@ void bpf_warn_invalid_xdp_action(u32 act) { const u32 act_max = XDP_REDIRECT; - WARN_ONCE(1, "%s XDP return value %u, expect packet loss!\n", - act > act_max ? "Illegal" : "Driver unsupported", - act); + pr_warn_once("%s XDP return value %u, expect packet loss!\n", + act > act_max ? "Illegal" : "Driver unsupported", + act); } EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action); @@ -9475,6 +9480,27 @@ static u32 sk_skb_convert_ctx_access(enum bpf_access_type type, *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, si->src_reg, off); break; + case offsetof(struct __sk_buff, cb[0]) ... + offsetofend(struct __sk_buff, cb[4]) - 1: + BUILD_BUG_ON(sizeof_field(struct sk_skb_cb, data) < 20); + BUILD_BUG_ON((offsetof(struct sk_buff, cb) + + offsetof(struct sk_skb_cb, data)) % + sizeof(__u64)); + + prog->cb_access = 1; + off = si->off; + off -= offsetof(struct __sk_buff, cb[0]); + off += offsetof(struct sk_buff, cb); + off += offsetof(struct sk_skb_cb, data); + if (type == BPF_WRITE) + *insn++ = BPF_STX_MEM(BPF_SIZE(si->code), si->dst_reg, + si->src_reg, off); + else + *insn++ = BPF_LDX_MEM(BPF_SIZE(si->code), si->dst_reg, + si->src_reg, off); + break; + + default: return bpf_convert_ctx_access(type, si, insn_buf, prog, target_size); diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c index 8ec7d13d28608b773b54aa6c3a6729f8eaf1fc3f..f590b0e672a9b1760497991e490b6471984af0a8 100644 --- a/net/core/lwtunnel.c +++ b/net/core/lwtunnel.c @@ -192,6 +192,10 @@ int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int remaining, nla_entype = nla_find(attrs, attrlen, RTA_ENCAP_TYPE); if (nla_entype) { + if (nla_len(nla_entype) < sizeof(u16)) { + NL_SET_ERR_MSG(extack, "Invalid RTA_ENCAP_TYPE"); + return -EINVAL; + } encap_type = nla_get_u16(nla_entype); if (lwtunnel_valid_encap_type(encap_type, diff --git a/net/core/neighbour.c b/net/core/neighbour.c index c452ebf209394a67c5064a1abcb5912d8dac2271..52a1c8725337b7601c51355c8b8e26b381e18ae1 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -380,7 +380,7 @@ EXPORT_SYMBOL(neigh_ifdown); static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev, - bool exempt_from_gc) + u8 flags, bool exempt_from_gc) { struct neighbour *n = NULL; unsigned long now = jiffies; @@ -413,6 +413,7 @@ do_alloc: n->updated = n->used = now; n->nud_state = NUD_NONE; n->output = neigh_blackhole; + n->flags = flags; seqlock_init(&n->hh.hh_lock); n->parms = neigh_parms_clone(&tbl->parms); timer_setup(&n->timer, neigh_timer_handler, 0); @@ -576,19 +577,18 @@ struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net, } EXPORT_SYMBOL(neigh_lookup_nodev); -static struct neighbour *___neigh_create(struct neigh_table *tbl, - const void *pkey, - struct net_device *dev, - bool exempt_from_gc, bool want_ref) +static struct neighbour * +___neigh_create(struct neigh_table *tbl, const void *pkey, + struct net_device *dev, u8 flags, + bool exempt_from_gc, bool want_ref) { - struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev, exempt_from_gc); - u32 hash_val; - unsigned int key_len = tbl->key_len; - int error; + u32 hash_val, key_len = tbl->key_len; + struct neighbour *n1, *rc, *n; struct neigh_hash_table *nht; + int error; + n = neigh_alloc(tbl, dev, flags, exempt_from_gc); trace_neigh_create(tbl, dev, pkey, n, exempt_from_gc); - if (!n) { rc = ERR_PTR(-ENOBUFS); goto out; @@ -675,7 +675,7 @@ out_neigh_release: struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey, struct net_device *dev, bool want_ref) { - return ___neigh_create(tbl, pkey, dev, false, want_ref); + return ___neigh_create(tbl, pkey, dev, 0, false, want_ref); } EXPORT_SYMBOL(__neigh_create); @@ -734,11 +734,10 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, ASSERT_RTNL(); - n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL); + n = kzalloc(sizeof(*n) + key_len, GFP_KERNEL); if (!n) goto out; - n->protocol = 0; write_pnet(&n->net, net); memcpy(n->key, pkey, key_len); n->dev = dev; @@ -1222,7 +1221,7 @@ static void neigh_update_hhs(struct neighbour *neigh) lladdr instead of overriding it if it is different. NEIGH_UPDATE_F_ADMIN means that the change is administrative. - + NEIGH_UPDATE_F_USE means that the entry is user triggered. NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing NTF_ROUTER flag. NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as @@ -1260,6 +1259,12 @@ static int __neigh_update(struct neighbour *neigh, const u8 *lladdr, goto out; ext_learn_change = neigh_update_ext_learned(neigh, flags, ¬ify); + if (flags & NEIGH_UPDATE_F_USE) { + new = old & ~NUD_PERMANENT; + neigh->nud_state = new; + err = 0; + goto out; + } if (!(new & NUD_VALID)) { neigh_del_timer(neigh); @@ -1950,7 +1955,9 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, exempt_from_gc = ndm->ndm_state & NUD_PERMANENT || ndm->ndm_flags & NTF_EXT_LEARNED; - neigh = ___neigh_create(tbl, dst, dev, exempt_from_gc, true); + neigh = ___neigh_create(tbl, dst, dev, + ndm->ndm_flags & NTF_EXT_LEARNED, + exempt_from_gc, true); if (IS_ERR(neigh)) { err = PTR_ERR(neigh); goto out; @@ -1969,22 +1976,20 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, if (protocol) neigh->protocol = protocol; - if (ndm->ndm_flags & NTF_EXT_LEARNED) flags |= NEIGH_UPDATE_F_EXT_LEARNED; - if (ndm->ndm_flags & NTF_ROUTER) flags |= NEIGH_UPDATE_F_ISROUTER; + if (ndm->ndm_flags & NTF_USE) + flags |= NEIGH_UPDATE_F_USE; - if (ndm->ndm_flags & NTF_USE) { + err = __neigh_update(neigh, lladdr, ndm->ndm_state, flags, + NETLINK_CB(skb).portid, extack); + if (!err && ndm->ndm_flags & NTF_USE) { neigh_event_send(neigh, NULL); err = 0; - } else - err = __neigh_update(neigh, lladdr, ndm->ndm_state, flags, - NETLINK_CB(skb).portid, extack); - + } neigh_release(neigh); - out: return err; } diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c index c714e6a9dad4c91150a7dbd380f66ca82a3b81cd..eadb696360b486c0165c0b4906b5ea76c3430d40 100644 --- a/net/core/net-procfs.c +++ b/net/core/net-procfs.c @@ -193,12 +193,23 @@ static const struct seq_operations softnet_seq_ops = { .show = softnet_seq_show, }; -static void *ptype_get_idx(loff_t pos) +static void *ptype_get_idx(struct seq_file *seq, loff_t pos) { + struct list_head *ptype_list = NULL; struct packet_type *pt = NULL; + struct net_device *dev; loff_t i = 0; int t; + for_each_netdev_rcu(seq_file_net(seq), dev) { + ptype_list = &dev->ptype_all; + list_for_each_entry_rcu(pt, ptype_list, list) { + if (i == pos) + return pt; + ++i; + } + } + list_for_each_entry_rcu(pt, &ptype_all, list) { if (i == pos) return pt; @@ -219,22 +230,40 @@ static void *ptype_seq_start(struct seq_file *seq, loff_t *pos) __acquires(RCU) { rcu_read_lock(); - return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN; + return *pos ? ptype_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; } static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos) { + struct net_device *dev; struct packet_type *pt; struct list_head *nxt; int hash; ++*pos; if (v == SEQ_START_TOKEN) - return ptype_get_idx(0); + return ptype_get_idx(seq, 0); pt = v; nxt = pt->list.next; + if (pt->dev) { + if (nxt != &pt->dev->ptype_all) + goto found; + + dev = pt->dev; + for_each_netdev_continue_rcu(seq_file_net(seq), dev) { + if (!list_empty(&dev->ptype_all)) { + nxt = dev->ptype_all.next; + goto found; + } + } + + nxt = ptype_all.next; + goto ptype_all; + } + if (pt->type == htons(ETH_P_ALL)) { +ptype_all: if (nxt != &ptype_all) goto found; hash = 0; @@ -263,7 +292,8 @@ static int ptype_seq_show(struct seq_file *seq, void *v) if (v == SEQ_START_TOKEN) seq_puts(seq, "Type Device Function\n"); - else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) { + else if ((!pt->af_packet_net || net_eq(pt->af_packet_net, seq_file_net(seq))) && + (!pt->dev || net_eq(dev_net(pt->dev), seq_file_net(seq)))) { if (pt->type == htons(ETH_P_ALL)) seq_puts(seq, "ALL "); else diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index b4562f9d074cf24081171261e112438195a57a39..989b3f7ee85f40341a9549d424f7a4b262acbacc 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -175,6 +175,14 @@ static int change_carrier(struct net_device *dev, unsigned long new_carrier) static ssize_t carrier_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { + struct net_device *netdev = to_net_dev(dev); + + /* The check is also done in change_carrier; this helps returning early + * without hitting the trylock/restart in netdev_store. + */ + if (!netdev->netdev_ops->ndo_change_carrier) + return -EOPNOTSUPP; + return netdev_store(dev, attr, buf, len, change_carrier); } @@ -196,10 +204,16 @@ static ssize_t speed_show(struct device *dev, struct net_device *netdev = to_net_dev(dev); int ret = -EINVAL; + /* The check is also done in __ethtool_get_link_ksettings; this helps + * returning early without hitting the trylock/restart below. + */ + if (!netdev->ethtool_ops->get_link_ksettings) + return ret; + if (!rtnl_trylock()) return restart_syscall(); - if (netif_running(netdev)) { + if (netif_running(netdev) && netif_device_present(netdev)) { struct ethtool_link_ksettings cmd; if (!__ethtool_get_link_ksettings(netdev, &cmd)) @@ -216,6 +230,12 @@ static ssize_t duplex_show(struct device *dev, struct net_device *netdev = to_net_dev(dev); int ret = -EINVAL; + /* The check is also done in __ethtool_get_link_ksettings; this helps + * returning early without hitting the trylock/restart below. + */ + if (!netdev->ethtool_ops->get_link_ksettings) + return ret; + if (!rtnl_trylock()) return restart_syscall(); @@ -468,6 +488,14 @@ static ssize_t proto_down_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { + struct net_device *netdev = to_net_dev(dev); + + /* The check is also done in change_proto_down; this helps returning + * early without hitting the trylock/restart in netdev_store. + */ + if (!netdev->netdev_ops->ndo_change_proto_down) + return -EOPNOTSUPP; + return netdev_store(dev, attr, buf, len, change_proto_down); } NETDEVICE_SHOW_RW(proto_down, fmt_dec); @@ -478,6 +506,12 @@ static ssize_t phys_port_id_show(struct device *dev, struct net_device *netdev = to_net_dev(dev); ssize_t ret = -EINVAL; + /* The check is also done in dev_get_phys_port_id; this helps returning + * early without hitting the trylock/restart below. + */ + if (!netdev->netdev_ops->ndo_get_phys_port_id) + return -EOPNOTSUPP; + if (!rtnl_trylock()) return restart_syscall(); @@ -500,6 +534,13 @@ static ssize_t phys_port_name_show(struct device *dev, struct net_device *netdev = to_net_dev(dev); ssize_t ret = -EINVAL; + /* The checks are also done in dev_get_phys_port_name; this helps + * returning early without hitting the trylock/restart below. + */ + if (!netdev->netdev_ops->ndo_get_phys_port_name && + !netdev->netdev_ops->ndo_get_devlink_port) + return -EOPNOTSUPP; + if (!rtnl_trylock()) return restart_syscall(); @@ -522,6 +563,14 @@ static ssize_t phys_switch_id_show(struct device *dev, struct net_device *netdev = to_net_dev(dev); ssize_t ret = -EINVAL; + /* The checks are also done in dev_get_phys_port_name; this helps + * returning early without hitting the trylock/restart below. This works + * because recurse is false when calling dev_get_port_parent_id. + */ + if (!netdev->netdev_ops->ndo_get_port_parent_id && + !netdev->netdev_ops->ndo_get_devlink_port) + return -EOPNOTSUPP; + if (!rtnl_trylock()) return restart_syscall(); @@ -1179,6 +1228,12 @@ static ssize_t tx_maxrate_store(struct netdev_queue *queue, if (!capable(CAP_NET_ADMIN)) return -EPERM; + /* The check is also done later; this helps returning early without + * hitting the trylock/restart below. + */ + if (!dev->netdev_ops->ndo_set_tx_maxrate) + return -EOPNOTSUPP; + err = kstrtou32(buf, 10, &rate); if (err < 0) return err; @@ -1749,6 +1804,9 @@ static void remove_queue_kobjects(struct net_device *dev) net_rx_queue_update_kobjects(dev, real_rx, 0); netdev_queue_update_kobjects(dev, real_tx, 0); + + dev->real_num_rx_queues = 0; + dev->real_num_tx_queues = 0; #ifdef CONFIG_SYSFS kset_unregister(dev->queues_kset); #endif @@ -1957,9 +2015,9 @@ int netdev_register_kobject(struct net_device *ndev) int netdev_change_owner(struct net_device *ndev, const struct net *net_old, const struct net *net_new) { + kuid_t old_uid = GLOBAL_ROOT_UID, new_uid = GLOBAL_ROOT_UID; + kgid_t old_gid = GLOBAL_ROOT_GID, new_gid = GLOBAL_ROOT_GID; struct device *dev = &ndev->dev; - kuid_t old_uid, new_uid; - kgid_t old_gid, new_gid; int error; net_ns_get_ownership(net_old, &old_uid, &old_gid); diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 5c9d95f30be6056e465f6d7a2cfcb2edcaffe657..cbff7d94b993ec941197c99af3118350e7c95445 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -183,8 +183,10 @@ static void ops_exit_list(const struct pernet_operations *ops, { struct net *net; if (ops->exit) { - list_for_each_entry(net, net_exit_list, exit_list) + list_for_each_entry(net, net_exit_list, exit_list) { ops->exit(net); + cond_resched(); + } } if (ops->exit_batch) ops->exit_batch(net_exit_list); @@ -486,7 +488,9 @@ struct net *copy_net_ns(unsigned long flags, if (rv < 0) { put_userns: +#ifdef CONFIG_KEYS key_remove_domain(net->key_domain); +#endif put_user_ns(user_ns); net_drop_ns(net); dec_ucounts: @@ -618,7 +622,9 @@ static void cleanup_net(struct work_struct *work) list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) { list_del_init(&net->exit_list); dec_net_namespaces(net->ucounts); +#ifdef CONFIG_KEYS key_remove_domain(net->key_domain); +#endif put_user_ns(net->user_ns); net_drop_ns(net); } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 7266571d5c7e2ec083642158d6122ac2cbbbfc30..9ff6d4160daba60a77b30248c1d1fb539271c220 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1705,6 +1705,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, { struct ifinfomsg *ifm; struct nlmsghdr *nlh; + struct Qdisc *qdisc; ASSERT_RTNL(); nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags); @@ -1722,6 +1723,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, if (tgt_netnsid >= 0 && nla_put_s32(skb, IFLA_TARGET_NETNSID, tgt_netnsid)) goto nla_put_failure; + qdisc = rtnl_dereference(dev->qdisc); if (nla_put_string(skb, IFLA_IFNAME, dev->name) || nla_put_u32(skb, IFLA_TXQLEN, dev->tx_queue_len) || nla_put_u8(skb, IFLA_OPERSTATE, @@ -1740,8 +1742,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, #endif put_master_ifindex(skb, dev) || nla_put_u8(skb, IFLA_CARRIER, netif_carrier_ok(dev)) || - (dev->qdisc && - nla_put_string(skb, IFLA_QDISC, dev->qdisc->ops->id)) || + (qdisc && + nla_put_string(skb, IFLA_QDISC, qdisc->ops->id)) || nla_put_ifalias(skb, dev) || nla_put_u32(skb, IFLA_CARRIER_CHANGES, atomic_read(&dev->carrier_up_count) + @@ -3238,8 +3240,8 @@ static int __rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr *slave_attr[RTNL_SLAVE_MAX_TYPE + 1]; unsigned char name_assign_type = NET_NAME_USER; struct nlattr *linkinfo[IFLA_INFO_MAX + 1]; - const struct rtnl_link_ops *m_ops = NULL; - struct net_device *master_dev = NULL; + const struct rtnl_link_ops *m_ops; + struct net_device *master_dev; struct net *net = sock_net(skb->sk); const struct rtnl_link_ops *ops; struct nlattr *tb[IFLA_MAX + 1]; @@ -3277,6 +3279,8 @@ replay: else dev = NULL; + master_dev = NULL; + m_ops = NULL; if (dev) { master_dev = netdev_master_upper_dev_get(dev); if (master_dev) @@ -5257,7 +5261,7 @@ nla_put_failure: static size_t if_nlmsg_stats_size(const struct net_device *dev, u32 filter_mask) { - size_t size = 0; + size_t size = NLMSG_ALIGN(sizeof(struct if_stats_msg)); if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_64, 0)) size += nla_total_size_64bit(sizeof(struct rtnl_link_stats64)); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 75dfbde8d2e6babb63c14f569be30340223c9bad..0c3c0c9cbe23a1e2a659acce1ee2b31c5c7d1bbe 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -771,7 +771,7 @@ void skb_dump(const char *level, const struct sk_buff *skb, bool full_pkt) ntohs(skb->protocol), skb->pkt_type, skb->skb_iif); if (dev) - printk("%sdev name=%s feat=0x%pNF\n", + printk("%sdev name=%s feat=%pNF\n", level, dev->name, &dev->features); if (sk) printk("%ssk family=%hu type=%u proto=%u\n", @@ -2141,7 +2141,7 @@ void *__pskb_pull_tail(struct sk_buff *skb, int delta) /* Free pulled out fragments. */ while ((list = skb_shinfo(skb)->frag_list) != insp) { skb_shinfo(skb)->frag_list = list->next; - kfree_skb(list); + consume_skb(list); } /* And insert new clone at head. */ if (clone) { @@ -3296,19 +3296,7 @@ EXPORT_SYMBOL(skb_split); */ static int skb_prepare_for_shift(struct sk_buff *skb) { - int ret = 0; - - if (skb_cloned(skb)) { - /* Save and restore truesize: pskb_expand_head() may reallocate - * memory where ksize(kmalloc(S)) != ksize(kmalloc(S)), but we - * cannot change truesize at this point. - */ - unsigned int save_truesize = skb->truesize; - - ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); - skb->truesize = save_truesize; - } - return ret; + return skb_unclone_keeptruesize(skb, GFP_ATOMIC); } /** @@ -3692,6 +3680,7 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb, list_skb = list_skb->next; err = 0; + delta_truesize += nskb->truesize; if (skb_shared(nskb)) { tmp = skb_clone(nskb, GFP_ATOMIC); if (tmp) { @@ -3716,7 +3705,6 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb, tail = nskb; delta_len += nskb->len; - delta_truesize += nskb->truesize; skb_push(nskb, -skb_network_offset(nskb) + offset); @@ -6046,7 +6034,7 @@ static int pskb_carve_frag_list(struct sk_buff *skb, /* Free pulled out fragments. */ while ((list = shinfo->frag_list) != insp) { shinfo->frag_list = list->next; - kfree_skb(list); + consume_skb(list); } /* And insert new clone at head. */ if (clone) { diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 5dd5569f89bf5aa15bcce26af05bd28037907afe..e4bb89599b44bc77b4712312d300997c32fa916b 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -943,7 +943,7 @@ static int sk_psock_verdict_recv(read_descriptor_t *desc, struct sk_buff *skb, struct sk_psock *psock; struct bpf_prog *prog; int ret = __SK_DROP; - int len = skb->len; + int len = orig_len; /* clone here so sk_eat_skb() in tcp_read_sock does not drop our data */ skb = skb_clone(skb, GFP_ATOMIC); diff --git a/net/core/sock.c b/net/core/sock.c index a51ea59ed698ef46dc5d20e726627b983d9bba0f..70396a4067616d55a3a698e857d481fbed9d4d20 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1257,6 +1257,16 @@ set_sndbuf: } EXPORT_SYMBOL(sock_setsockopt); +static const struct cred *sk_get_peer_cred(struct sock *sk) +{ + const struct cred *cred; + + spin_lock(&sk->sk_peer_lock); + cred = get_cred(sk->sk_peer_cred); + spin_unlock(&sk->sk_peer_lock); + + return cred; +} static void cred_to_ucred(struct pid *pid, const struct cred *cred, struct ucred *ucred) @@ -1430,7 +1440,11 @@ int sock_getsockopt(struct socket *sock, int level, int optname, struct ucred peercred; if (len > sizeof(peercred)) len = sizeof(peercred); + + spin_lock(&sk->sk_peer_lock); cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred); + spin_unlock(&sk->sk_peer_lock); + if (copy_to_user(optval, &peercred, len)) return -EFAULT; goto lenout; @@ -1438,20 +1452,23 @@ int sock_getsockopt(struct socket *sock, int level, int optname, case SO_PEERGROUPS: { + const struct cred *cred; int ret, n; - if (!sk->sk_peer_cred) + cred = sk_get_peer_cred(sk); + if (!cred) return -ENODATA; - n = sk->sk_peer_cred->group_info->ngroups; + n = cred->group_info->ngroups; if (len < n * sizeof(gid_t)) { len = n * sizeof(gid_t); + put_cred(cred); return put_user(len, optlen) ? -EFAULT : -ERANGE; } len = n * sizeof(gid_t); - ret = groups_to_user((gid_t __user *)optval, - sk->sk_peer_cred->group_info); + ret = groups_to_user((gid_t __user *)optval, cred->group_info); + put_cred(cred); if (ret) return ret; goto lenout; @@ -1794,9 +1811,10 @@ static void __sk_destruct(struct rcu_head *head) sk->sk_frag.page = NULL; } - if (sk->sk_peer_cred) - put_cred(sk->sk_peer_cred); + /* We do not need to acquire sk->sk_peer_lock, we are the last user. */ + put_cred(sk->sk_peer_cred); put_pid(sk->sk_peer_pid); + if (likely(sk->sk_net_refcnt)) put_net(sock_net(sk)); sk_prot_free(sk->sk_prot_creator, sk); @@ -1871,123 +1889,120 @@ static void sk_init_common(struct sock *sk) struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) { struct proto *prot = READ_ONCE(sk->sk_prot); - struct sock *newsk; + struct sk_filter *filter; bool is_charged = true; + struct sock *newsk; newsk = sk_prot_alloc(prot, priority, sk->sk_family); - if (newsk != NULL) { - struct sk_filter *filter; + if (!newsk) + goto out; - sock_copy(newsk, sk); + sock_copy(newsk, sk); - newsk->sk_prot_creator = prot; + newsk->sk_prot_creator = prot; - /* SANITY */ - if (likely(newsk->sk_net_refcnt)) - get_net(sock_net(newsk)); - sk_node_init(&newsk->sk_node); - sock_lock_init(newsk); - bh_lock_sock(newsk); - newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL; - newsk->sk_backlog.len = 0; + /* SANITY */ + if (likely(newsk->sk_net_refcnt)) { + get_net(sock_net(newsk)); + sock_inuse_add(sock_net(newsk), 1); + } + sk_node_init(&newsk->sk_node); + sock_lock_init(newsk); + bh_lock_sock(newsk); + newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL; + newsk->sk_backlog.len = 0; - atomic_set(&newsk->sk_rmem_alloc, 0); - /* - * sk_wmem_alloc set to one (see sk_free() and sock_wfree()) - */ - refcount_set(&newsk->sk_wmem_alloc, 1); - atomic_set(&newsk->sk_omem_alloc, 0); - sk_init_common(newsk); + atomic_set(&newsk->sk_rmem_alloc, 0); - newsk->sk_dst_cache = NULL; - newsk->sk_dst_pending_confirm = 0; - newsk->sk_wmem_queued = 0; - newsk->sk_forward_alloc = 0; - atomic_set(&newsk->sk_drops, 0); - newsk->sk_send_head = NULL; - newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK; - atomic_set(&newsk->sk_zckey, 0); + /* sk_wmem_alloc set to one (see sk_free() and sock_wfree()) */ + refcount_set(&newsk->sk_wmem_alloc, 1); - sock_reset_flag(newsk, SOCK_DONE); + atomic_set(&newsk->sk_omem_alloc, 0); + sk_init_common(newsk); - /* sk->sk_memcg will be populated at accept() time */ - newsk->sk_memcg = NULL; + newsk->sk_dst_cache = NULL; + newsk->sk_dst_pending_confirm = 0; + newsk->sk_wmem_queued = 0; + newsk->sk_forward_alloc = 0; + atomic_set(&newsk->sk_drops, 0); + newsk->sk_send_head = NULL; + newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK; + atomic_set(&newsk->sk_zckey, 0); - cgroup_sk_clone(&newsk->sk_cgrp_data); + sock_reset_flag(newsk, SOCK_DONE); - rcu_read_lock(); - filter = rcu_dereference(sk->sk_filter); - if (filter != NULL) - /* though it's an empty new sock, the charging may fail - * if sysctl_optmem_max was changed between creation of - * original socket and cloning - */ - is_charged = sk_filter_charge(newsk, filter); - RCU_INIT_POINTER(newsk->sk_filter, filter); - rcu_read_unlock(); + /* sk->sk_memcg will be populated at accept() time */ + newsk->sk_memcg = NULL; - if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) { - /* We need to make sure that we don't uncharge the new - * socket if we couldn't charge it in the first place - * as otherwise we uncharge the parent's filter. - */ - if (!is_charged) - RCU_INIT_POINTER(newsk->sk_filter, NULL); - sk_free_unlock_clone(newsk); - newsk = NULL; - goto out; - } - RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL); + cgroup_sk_clone(&newsk->sk_cgrp_data); - if (bpf_sk_storage_clone(sk, newsk)) { - sk_free_unlock_clone(newsk); - newsk = NULL; - goto out; - } + rcu_read_lock(); + filter = rcu_dereference(sk->sk_filter); + if (filter != NULL) + /* though it's an empty new sock, the charging may fail + * if sysctl_optmem_max was changed between creation of + * original socket and cloning + */ + is_charged = sk_filter_charge(newsk, filter); + RCU_INIT_POINTER(newsk->sk_filter, filter); + rcu_read_unlock(); - /* Clear sk_user_data if parent had the pointer tagged - * as not suitable for copying when cloning. + if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) { + /* We need to make sure that we don't uncharge the new + * socket if we couldn't charge it in the first place + * as otherwise we uncharge the parent's filter. */ - if (sk_user_data_is_nocopy(newsk)) - newsk->sk_user_data = NULL; + if (!is_charged) + RCU_INIT_POINTER(newsk->sk_filter, NULL); + sk_free_unlock_clone(newsk); + newsk = NULL; + goto out; + } + RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL); - newsk->sk_err = 0; - newsk->sk_err_soft = 0; - newsk->sk_priority = 0; - newsk->sk_incoming_cpu = raw_smp_processor_id(); - if (likely(newsk->sk_net_refcnt)) - sock_inuse_add(sock_net(newsk), 1); + if (bpf_sk_storage_clone(sk, newsk)) { + sk_free_unlock_clone(newsk); + newsk = NULL; + goto out; + } - /* - * Before updating sk_refcnt, we must commit prior changes to memory - * (Documentation/RCU/rculist_nulls.rst for details) - */ - smp_wmb(); - refcount_set(&newsk->sk_refcnt, 2); + /* Clear sk_user_data if parent had the pointer tagged + * as not suitable for copying when cloning. + */ + if (sk_user_data_is_nocopy(newsk)) + newsk->sk_user_data = NULL; - /* - * Increment the counter in the same struct proto as the master - * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that - * is the same as sk->sk_prot->socks, as this field was copied - * with memcpy). - * - * This _changes_ the previous behaviour, where - * tcp_create_openreq_child always was incrementing the - * equivalent to tcp_prot->socks (inet_sock_nr), so this have - * to be taken into account in all callers. -acme - */ - sk_refcnt_debug_inc(newsk); - sk_set_socket(newsk, NULL); - sk_tx_queue_clear(newsk); - RCU_INIT_POINTER(newsk->sk_wq, NULL); + newsk->sk_err = 0; + newsk->sk_err_soft = 0; + newsk->sk_priority = 0; + newsk->sk_incoming_cpu = raw_smp_processor_id(); - if (newsk->sk_prot->sockets_allocated) - sk_sockets_allocated_inc(newsk); + /* Before updating sk_refcnt, we must commit prior changes to memory + * (Documentation/RCU/rculist_nulls.rst for details) + */ + smp_wmb(); + refcount_set(&newsk->sk_refcnt, 2); - if (sock_needs_netstamp(sk) && - newsk->sk_flags & SK_FLAGS_TIMESTAMP) - net_enable_timestamp(); - } + /* Increment the counter in the same struct proto as the master + * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that + * is the same as sk->sk_prot->socks, as this field was copied + * with memcpy). + * + * This _changes_ the previous behaviour, where + * tcp_create_openreq_child always was incrementing the + * equivalent to tcp_prot->socks (inet_sock_nr), so this have + * to be taken into account in all callers. -acme + */ + sk_refcnt_debug_inc(newsk); + sk_set_socket(newsk, NULL); + sk_tx_queue_clear(newsk); + RCU_INIT_POINTER(newsk->sk_wq, NULL); + + if (newsk->sk_prot->sockets_allocated) + sk_sockets_allocated_inc(newsk); + + if (sock_needs_netstamp(sk) && newsk->sk_flags & SK_FLAGS_TIMESTAMP) + net_enable_timestamp(); out: return newsk; } @@ -3016,6 +3031,8 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_peer_pid = NULL; sk->sk_peer_cred = NULL; + spin_lock_init(&sk->sk_peer_lock); + sk->sk_write_pending = 0; sk->sk_rcvlowat = 1; sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; diff --git a/net/core/sock_map.c b/net/core/sock_map.c index ddc899e83313afd8937b167cffeed1292c5ec315..4ea5bc65848f29f3b9417e9d75244b264dcb8cda 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -52,7 +52,7 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr) if (err) goto free_stab; - stab->sks = bpf_map_area_alloc(stab->map.max_entries * + stab->sks = bpf_map_area_alloc((u64) stab->map.max_entries * sizeof(struct sock *), stab->map.numa_node); if (stab->sks) diff --git a/net/core/stream.c b/net/core/stream.c index 4f1d4aa5fb38d989a9c81f32dfce3f31bbc1fa47..a166a32b411fa6d9c37f5aed5da9b85b912a8086 100644 --- a/net/core/stream.c +++ b/net/core/stream.c @@ -195,9 +195,6 @@ void sk_stream_kill_queues(struct sock *sk) /* First the read buffer. */ __skb_queue_purge(&sk->sk_receive_queue); - /* Next, the error queue. */ - __skb_queue_purge(&sk->sk_error_queue); - /* Next, the write queue. */ WARN_ON(!skb_queue_empty(&sk->sk_write_queue)); diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index d86d8d11cfe4a97062bfb9434f89bbe41a26f740..2e0a4378e778a6eec356de351d26532d9674856d 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -419,7 +419,7 @@ static struct ctl_table net_core_table[] = { .mode = 0600, .proc_handler = proc_dolongvec_minmax_bpf_restricted, .extra1 = &long_one, - .extra2 = &long_max, + .extra2 = &bpf_jit_limit_max, }, #endif { diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index a352ce4f878a374e078f95b38b11631295f4bc35..2535d3dfb92c8a8fc3a53bdc3d8bff37eb875dc1 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -2063,10 +2063,54 @@ u8 dcb_ieee_getapp_default_prio_mask(const struct net_device *dev) } EXPORT_SYMBOL(dcb_ieee_getapp_default_prio_mask); +static void dcbnl_flush_dev(struct net_device *dev) +{ + struct dcb_app_type *itr, *tmp; + + spin_lock_bh(&dcb_lock); + + list_for_each_entry_safe(itr, tmp, &dcb_app_list, list) { + if (itr->ifindex == dev->ifindex) { + list_del(&itr->list); + kfree(itr); + } + } + + spin_unlock_bh(&dcb_lock); +} + +static int dcbnl_netdevice_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + + switch (event) { + case NETDEV_UNREGISTER: + if (!dev->dcbnl_ops) + return NOTIFY_DONE; + + dcbnl_flush_dev(dev); + + return NOTIFY_OK; + default: + return NOTIFY_DONE; + } +} + +static struct notifier_block dcbnl_nb __read_mostly = { + .notifier_call = dcbnl_netdevice_event, +}; + static int __init dcbnl_init(void) { + int err; + INIT_LIST_HEAD(&dcb_app_list); + err = register_netdevice_notifier(&dcbnl_nb); + if (err) + return err; + rtnl_register(PF_UNSPEC, RTM_GETDCB, dcb_doit, NULL, 0); rtnl_register(PF_UNSPEC, RTM_SETDCB, dcb_doit, NULL, 0); diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index c5c1d2b8045e8efd9bf32a2db1e679c13cbf1852..5183e627468d8901f4f80ed8d74a655aa2a6557f 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -48,7 +48,7 @@ extern bool dccp_debug; extern struct inet_hashinfo dccp_hashinfo; -extern struct percpu_counter dccp_orphan_count; +DECLARE_PER_CPU(unsigned int, dccp_orphan_count); void dccp_time_wait(struct sock *sk, int state, int timeo); diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 6d705d90c6149c4d6043645addd9db94b7d25353..548cf0135647da5a87fee919c18adf1395714b93 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -42,8 +42,8 @@ DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly; EXPORT_SYMBOL_GPL(dccp_statistics); -struct percpu_counter dccp_orphan_count; -EXPORT_SYMBOL_GPL(dccp_orphan_count); +DEFINE_PER_CPU(unsigned int, dccp_orphan_count); +EXPORT_PER_CPU_SYMBOL_GPL(dccp_orphan_count); struct inet_hashinfo dccp_hashinfo; EXPORT_SYMBOL_GPL(dccp_hashinfo); @@ -1055,7 +1055,7 @@ adjudge_to_death: bh_lock_sock(sk); WARN_ON(sock_owned_by_user(sk)); - percpu_counter_inc(sk->sk_prot->orphan_count); + this_cpu_inc(dccp_orphan_count); /* Have we already been destroyed by a softirq or backlog? */ if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED) @@ -1115,13 +1115,10 @@ static int __init dccp_init(void) BUILD_BUG_ON(sizeof(struct dccp_skb_cb) > sizeof_field(struct sk_buff, cb)); - rc = percpu_counter_init(&dccp_orphan_count, 0, GFP_KERNEL); - if (rc) - goto out_fail; inet_hashinfo_init(&dccp_hashinfo); rc = inet_hashinfo2_init_mod(&dccp_hashinfo); if (rc) - goto out_free_percpu; + goto out_fail; rc = -ENOBUFS; dccp_hashinfo.bind_bucket_cachep = kmem_cache_create("dccp_bind_bucket", @@ -1226,8 +1223,6 @@ out_free_bind_bucket_cachep: kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep); out_free_hashinfo2: inet_hashinfo2_free_mod(&dccp_hashinfo); -out_free_percpu: - percpu_counter_destroy(&dccp_orphan_count); out_fail: dccp_hashinfo.bhash = NULL; dccp_hashinfo.ehash = NULL; @@ -1250,7 +1245,6 @@ static void __exit dccp_fini(void) dccp_ackvec_exit(); dccp_sysctl_exit(); inet_hashinfo2_free_mod(&dccp_hashinfo); - percpu_counter_destroy(&dccp_orphan_count); } module_init(dccp_init); diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index d8efec516d8686645d788642cec3a1446109f56b..979dee6bb88c5767f5749145ad0931c2cd421691 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -249,6 +249,9 @@ struct ethnl_reply_data { static inline int ethnl_ops_begin(struct net_device *dev) { + if (dev && dev->reg_state == NETREG_UNREGISTERING) + return -ENODEV; + if (dev && dev->ethtool_ops->begin) return dev->ethtool_ops->begin(dev); else diff --git a/net/ethtool/pause.c b/net/ethtool/pause.c index d4ac02718b72adccc5913b281650e1bcb0c1434c..c7bc704c8862a46f1adfc70f19fd2bcfae5a99a9 100644 --- a/net/ethtool/pause.c +++ b/net/ethtool/pause.c @@ -62,8 +62,7 @@ static int pause_reply_size(const struct ethnl_req_info *req_base, if (req_base->flags & ETHTOOL_FLAG_STATS) n += nla_total_size(0) + /* _PAUSE_STATS */ - nla_total_size_64bit(sizeof(u64)) * - (ETHTOOL_A_PAUSE_STAT_MAX - 2); + nla_total_size_64bit(sizeof(u64)) * ETHTOOL_PAUSE_STAT_CNT; return n; } diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c index b34e4f827e75668f0406993cc84daa9f14966237..a493965f157f289c4afd3ac5d8f8c58ef088a66f 100644 --- a/net/ieee802154/nl802154.c +++ b/net/ieee802154/nl802154.c @@ -1441,7 +1441,7 @@ static int nl802154_send_key(struct sk_buff *msg, u32 cmd, u32 portid, hdr = nl802154hdr_put(msg, portid, seq, flags, cmd); if (!hdr) - return -1; + return -ENOBUFS; if (nla_put_u32(msg, NL802154_ATTR_IFINDEX, dev->ifindex)) goto nla_put_failure; @@ -1634,7 +1634,7 @@ static int nl802154_send_device(struct sk_buff *msg, u32 cmd, u32 portid, hdr = nl802154hdr_put(msg, portid, seq, flags, cmd); if (!hdr) - return -1; + return -ENOBUFS; if (nla_put_u32(msg, NL802154_ATTR_IFINDEX, dev->ifindex)) goto nla_put_failure; @@ -1812,7 +1812,7 @@ static int nl802154_send_devkey(struct sk_buff *msg, u32 cmd, u32 portid, hdr = nl802154hdr_put(msg, portid, seq, flags, cmd); if (!hdr) - return -1; + return -ENOBUFS; if (nla_put_u32(msg, NL802154_ATTR_IFINDEX, dev->ifindex)) goto nla_put_failure; @@ -1988,7 +1988,7 @@ static int nl802154_send_seclevel(struct sk_buff *msg, u32 cmd, u32 portid, hdr = nl802154hdr_put(msg, portid, seq, flags, cmd); if (!hdr) - return -1; + return -ENOBUFS; if (nla_put_u32(msg, NL802154_ATTR_IFINDEX, dev->ifindex)) goto nla_put_failure; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index e4dbf5ad921828c935fb18c907fd31c9049e83da..482a4538e315854cec5856b3b7a468a3c80a26eb 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1378,8 +1378,11 @@ struct sk_buff *inet_gso_segment(struct sk_buff *skb, } ops = rcu_dereference(inet_offloads[proto]); - if (likely(ops && ops->callbacks.gso_segment)) + if (likely(ops && ops->callbacks.gso_segment)) { segs = ops->callbacks.gso_segment(skb, features); + if (!segs) + skb->network_header = skb_mac_header(skb) + nhoff - skb->head; + } if (IS_ERR_OR_NULL(segs)) goto out; @@ -2007,6 +2010,10 @@ static int __init inet_init(void) ip_init(); + /* Initialise per-cpu ipv4 mibs */ + if (init_ipv4_mibs()) + panic("%s: Cannot init ipv4 mibs\n", __func__); + /* Setup TCP slab cache for open requests. */ tcp_init(); @@ -2037,12 +2044,6 @@ static int __init inet_init(void) if (init_inet_pernet_ops()) pr_crit("%s: Cannot init ipv4 inet pernet ops\n", __func__); - /* - * Initialise per-cpu ipv4 mibs - */ - - if (init_ipv4_mibs()) - pr_crit("%s: Cannot init ipv4 mibs\n", __func__); ipv4_proc_init(); diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 7c185977742974829dfb02b8068b8ddb2bc350e6..148ef484a66ce4b1c794bbdffbca802a1ff1dc11 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -2582,7 +2582,7 @@ static int __devinet_sysctl_register(struct net *net, char *dev_name, free: kfree(t); out: - return -ENOBUFS; + return -ENOMEM; } static void __devinet_sysctl_unregister(struct net *net, diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 61dfb64e725673a01b28a384f39eb63c7a08de1c..a3271ec3e1627fb4f6e29da0e0fb1a638fe7e789 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -673,7 +673,7 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) struct xfrm_dst *dst = (struct xfrm_dst *)skb_dst(skb); u32 padto; - padto = min(x->tfcpad, __xfrm_state_mtu(x, dst->child_mtu_cached)); + padto = min(x->tfcpad, xfrm_state_mtu(x, dst->child_mtu_cached)); if (skb->len < padto) esp.tfclen = padto - skb->len; } diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c index 5aa7344dbec7fd80e8bab542e675d26ea423ad16..3450c9ba2728c7a1f5ac021fa88db37ff0cd977b 100644 --- a/net/ipv4/esp4_offload.c +++ b/net/ipv4/esp4_offload.c @@ -160,6 +160,9 @@ static struct sk_buff *xfrm4_beet_gso_segment(struct xfrm_state *x, skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4; } + if (proto == IPPROTO_IPV6) + skb_shinfo(skb)->gso_type |= SKB_GSO_IPXIP4; + __skb_pull(skb, skb_transport_offset(skb)); ops = rcu_dereference(inet_offloads[proto]); if (likely(ops && ops->callbacks.gso_segment)) diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 647bceab56c2d56470488865912b0e69b43bb9bc..917ea953dfad8f1d8a3d579de906b7109c79e2ae 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -1578,7 +1578,7 @@ static int __net_init fib_net_init(struct net *net) int error; #ifdef CONFIG_IP_ROUTE_CLASSID - net->ipv4.fib_num_tclassid_users = 0; + atomic_set(&net->ipv4.fib_num_tclassid_users, 0); #endif error = ip_fib_net_init(net); if (error < 0) diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index ce54a30c2ef1e8e79c8922be5eee35055fa51178..d279cb8ac1584487885f66819634b421c01bf819 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -141,6 +141,7 @@ INDIRECT_CALLABLE_SCOPE int fib4_rule_action(struct fib_rule *rule, } INDIRECT_CALLABLE_SCOPE bool fib4_rule_suppress(struct fib_rule *rule, + int flags, struct fib_lookup_arg *arg) { struct fib_result *result = (struct fib_result *) arg->result; @@ -263,7 +264,7 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, if (tb[FRA_FLOW]) { rule4->tclassid = nla_get_u32(tb[FRA_FLOW]); if (rule4->tclassid) - net->ipv4.fib_num_tclassid_users++; + atomic_inc(&net->ipv4.fib_num_tclassid_users); } #endif @@ -295,7 +296,7 @@ static int fib4_rule_delete(struct fib_rule *rule) #ifdef CONFIG_IP_ROUTE_CLASSID if (((struct fib4_rule *)rule)->tclassid) - net->ipv4.fib_num_tclassid_users--; + atomic_dec(&net->ipv4.fib_num_tclassid_users); #endif net->ipv4.fib_has_custom_rules = true; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 1f75dc686b6b64b0ae90e1ea2e71dda12e513efc..838a876c168caf5a39af46ce90afea11691fd035 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -222,7 +223,7 @@ void fib_nh_release(struct net *net, struct fib_nh *fib_nh) { #ifdef CONFIG_IP_ROUTE_CLASSID if (fib_nh->nh_tclassid) - net->ipv4.fib_num_tclassid_users--; + atomic_dec(&net->ipv4.fib_num_tclassid_users); #endif fib_nh_common_release(&fib_nh->nh_common); } @@ -251,7 +252,6 @@ void free_fib_info(struct fib_info *fi) pr_warn("Freeing alive fib_info %p\n", fi); return; } - fib_info_cnt--; call_rcu(&fi->rcu, free_fib_info_rcu); } @@ -262,6 +262,10 @@ void fib_release_info(struct fib_info *fi) spin_lock_bh(&fib_info_lock); if (fi && --fi->fib_treeref == 0) { hlist_del(&fi->fib_hash); + + /* Paired with READ_ONCE() in fib_create_info(). */ + WRITE_ONCE(fib_info_cnt, fib_info_cnt - 1); + if (fi->fib_prefsrc) hlist_del(&fi->fib_lhash); if (fi->nh) { @@ -318,11 +322,15 @@ static inline int nh_comp(struct fib_info *fi, struct fib_info *ofi) static inline unsigned int fib_devindex_hashfn(unsigned int val) { - unsigned int mask = DEVINDEX_HASHSIZE - 1; + return hash_32(val, DEVINDEX_HASHBITS); +} + +static struct hlist_head * +fib_info_devhash_bucket(const struct net_device *dev) +{ + u32 val = net_hash_mix(dev_net(dev)) ^ dev->ifindex; - return (val ^ - (val >> DEVINDEX_HASHBITS) ^ - (val >> (DEVINDEX_HASHBITS * 2))) & mask; + return &fib_info_devhash[fib_devindex_hashfn(val)]; } static unsigned int fib_info_hashfn_1(int init_val, u8 protocol, u8 scope, @@ -432,12 +440,11 @@ int ip_fib_check_default(__be32 gw, struct net_device *dev) { struct hlist_head *head; struct fib_nh *nh; - unsigned int hash; spin_lock(&fib_info_lock); - hash = fib_devindex_hashfn(dev->ifindex); - head = &fib_info_devhash[hash]; + head = fib_info_devhash_bucket(dev); + hlist_for_each_entry(nh, head, nh_hash) { if (nh->fib_nh_dev == dev && nh->fib_nh_gw4 == gw && @@ -633,7 +640,7 @@ int fib_nh_init(struct net *net, struct fib_nh *nh, #ifdef CONFIG_IP_ROUTE_CLASSID nh->nh_tclassid = cfg->fc_flow; if (nh->nh_tclassid) - net->ipv4.fib_num_tclassid_users++; + atomic_inc(&net->ipv4.fib_num_tclassid_users); #endif #ifdef CONFIG_IP_ROUTE_MULTIPATH nh->fib_nh_weight = nh_weight; @@ -663,6 +670,19 @@ static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining, return nhs; } +static int fib_gw_from_attr(__be32 *gw, struct nlattr *nla, + struct netlink_ext_ack *extack) +{ + if (nla_len(nla) < sizeof(*gw)) { + NL_SET_ERR_MSG(extack, "Invalid IPv4 address in RTA_GATEWAY"); + return -EINVAL; + } + + *gw = nla_get_in_addr(nla); + + return 0; +} + /* only called when fib_nh is integrated into fib_info */ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, int remaining, struct fib_config *cfg, @@ -705,7 +725,11 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, return -EINVAL; } if (nla) { - fib_cfg.fc_gw4 = nla_get_in_addr(nla); + ret = fib_gw_from_attr(&fib_cfg.fc_gw4, nla, + extack); + if (ret) + goto errout; + if (fib_cfg.fc_gw4) fib_cfg.fc_gw_family = AF_INET; } else if (nlav) { @@ -715,10 +739,18 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, } nla = nla_find(attrs, attrlen, RTA_FLOW); - if (nla) + if (nla) { + if (nla_len(nla) < sizeof(u32)) { + NL_SET_ERR_MSG(extack, "Invalid RTA_FLOW"); + return -EINVAL; + } fib_cfg.fc_flow = nla_get_u32(nla); + } fib_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP); + /* RTA_ENCAP_TYPE length checked in + * lwtunnel_valid_encap_type_attr + */ nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE); if (nla) fib_cfg.fc_encap_type = nla_get_u16(nla); @@ -903,6 +935,7 @@ int fib_nh_match(struct net *net, struct fib_config *cfg, struct fib_info *fi, attrlen = rtnh_attrlen(rtnh); if (attrlen > 0) { struct nlattr *nla, *nlav, *attrs = rtnh_attrs(rtnh); + int err; nla = nla_find(attrs, attrlen, RTA_GATEWAY); nlav = nla_find(attrs, attrlen, RTA_VIA); @@ -913,12 +946,17 @@ int fib_nh_match(struct net *net, struct fib_config *cfg, struct fib_info *fi, } if (nla) { + __be32 gw; + + err = fib_gw_from_attr(&gw, nla, extack); + if (err) + return err; + if (nh->fib_nh_gw_family != AF_INET || - nla_get_in_addr(nla) != nh->fib_nh_gw4) + gw != nh->fib_nh_gw4) return 1; } else if (nlav) { struct fib_config cfg2; - int err; err = fib_gw_from_via(&cfg2, nlav, extack); if (err) @@ -941,8 +979,14 @@ int fib_nh_match(struct net *net, struct fib_config *cfg, struct fib_info *fi, #ifdef CONFIG_IP_ROUTE_CLASSID nla = nla_find(attrs, attrlen, RTA_FLOW); - if (nla && nla_get_u32(nla) != nh->nh_tclassid) - return 1; + if (nla) { + if (nla_len(nla) < sizeof(u32)) { + NL_SET_ERR_MSG(extack, "Invalid RTA_FLOW"); + return -EINVAL; + } + if (nla_get_u32(nla) != nh->nh_tclassid) + return 1; + } #endif } @@ -1394,7 +1438,9 @@ struct fib_info *fib_create_info(struct fib_config *cfg, #endif err = -ENOBUFS; - if (fib_info_cnt >= fib_info_hash_size) { + + /* Paired with WRITE_ONCE() in fib_release_info() */ + if (READ_ONCE(fib_info_cnt) >= fib_info_hash_size) { unsigned int new_size = fib_info_hash_size << 1; struct hlist_head *new_info_hash; struct hlist_head *new_laddrhash; @@ -1426,7 +1472,6 @@ struct fib_info *fib_create_info(struct fib_config *cfg, return ERR_PTR(err); } - fib_info_cnt++; fi->fib_net = net; fi->fib_protocol = cfg->fc_protocol; fi->fib_scope = cfg->fc_scope; @@ -1553,6 +1598,7 @@ link_it: fi->fib_treeref++; refcount_set(&fi->fib_clntref, 1); spin_lock_bh(&fib_info_lock); + fib_info_cnt++; hlist_add_head(&fi->fib_hash, &fib_info_hash[fib_info_hashfn(fi)]); if (fi->fib_prefsrc) { @@ -1566,12 +1612,10 @@ link_it: } else { change_nexthops(fi) { struct hlist_head *head; - unsigned int hash; if (!nexthop_nh->fib_nh_dev) continue; - hash = fib_devindex_hashfn(nexthop_nh->fib_nh_dev->ifindex); - head = &fib_info_devhash[hash]; + head = fib_info_devhash_bucket(nexthop_nh->fib_nh_dev); hlist_add_head(&nexthop_nh->nh_hash, head); } endfor_nexthops(fi) } @@ -1663,7 +1707,7 @@ EXPORT_SYMBOL_GPL(fib_nexthop_info); #if IS_ENABLED(CONFIG_IP_ROUTE_MULTIPATH) || IS_ENABLED(CONFIG_IPV6) int fib_add_nexthop(struct sk_buff *skb, const struct fib_nh_common *nhc, - int nh_weight, u8 rt_family) + int nh_weight, u8 rt_family, u32 nh_tclassid) { const struct net_device *dev = nhc->nhc_dev; struct rtnexthop *rtnh; @@ -1681,6 +1725,9 @@ int fib_add_nexthop(struct sk_buff *skb, const struct fib_nh_common *nhc, rtnh->rtnh_flags = flags; + if (nh_tclassid && nla_put_u32(skb, RTA_FLOW, nh_tclassid)) + goto nla_put_failure; + /* length of rtnetlink header + attributes */ rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh; @@ -1708,14 +1755,13 @@ static int fib_add_multipath(struct sk_buff *skb, struct fib_info *fi) } for_nexthops(fi) { - if (fib_add_nexthop(skb, &nh->nh_common, nh->fib_nh_weight, - AF_INET) < 0) - goto nla_put_failure; + u32 nh_tclassid = 0; #ifdef CONFIG_IP_ROUTE_CLASSID - if (nh->nh_tclassid && - nla_put_u32(skb, RTA_FLOW, nh->nh_tclassid)) - goto nla_put_failure; + nh_tclassid = nh->nh_tclassid; #endif + if (fib_add_nexthop(skb, &nh->nh_common, nh->fib_nh_weight, + AF_INET, nh_tclassid) < 0) + goto nla_put_failure; } endfor_nexthops(fi); mp_end: @@ -1919,8 +1965,7 @@ void fib_nhc_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig) void fib_sync_mtu(struct net_device *dev, u32 orig_mtu) { - unsigned int hash = fib_devindex_hashfn(dev->ifindex); - struct hlist_head *head = &fib_info_devhash[hash]; + struct hlist_head *head = fib_info_devhash_bucket(dev); struct fib_nh *nh; hlist_for_each_entry(nh, head, nh_hash) { @@ -1939,12 +1984,11 @@ void fib_sync_mtu(struct net_device *dev, u32 orig_mtu) */ int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force) { - int ret = 0; - int scope = RT_SCOPE_NOWHERE; + struct hlist_head *head = fib_info_devhash_bucket(dev); struct fib_info *prev_fi = NULL; - unsigned int hash = fib_devindex_hashfn(dev->ifindex); - struct hlist_head *head = &fib_info_devhash[hash]; + int scope = RT_SCOPE_NOWHERE; struct fib_nh *nh; + int ret = 0; if (force) scope = -1; @@ -2089,7 +2133,6 @@ out: int fib_sync_up(struct net_device *dev, unsigned char nh_flags) { struct fib_info *prev_fi; - unsigned int hash; struct hlist_head *head; struct fib_nh *nh; int ret; @@ -2105,8 +2148,7 @@ int fib_sync_up(struct net_device *dev, unsigned char nh_flags) } prev_fi = NULL; - hash = fib_devindex_hashfn(dev->ifindex); - head = &fib_info_devhash[hash]; + head = fib_info_devhash_bucket(dev); ret = 0; hlist_for_each_entry(nh, head, nh_hash) { diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 1dfa561e8f98104e713d592a76a70d49e58bd64a..addd595bb3fe67cf4f2784f832ccc7bacf0207f5 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -892,7 +892,7 @@ void inet_csk_destroy_sock(struct sock *sk) sk_refcnt_debug_release(sk); - percpu_counter_dec(sk->sk_prot->orphan_count); + this_cpu_dec(*sk->sk_prot->orphan_count); sock_put(sk); } @@ -951,7 +951,7 @@ static void inet_child_forget(struct sock *sk, struct request_sock *req, sock_orphan(child); - percpu_counter_inc(sk->sk_prot->orphan_count); + this_cpu_inc(*sk->sk_prot->orphan_count); if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(req)->tfo_listener) { BUG_ON(rcu_access_pointer(tcp_sk(child)->fastopen_rsk) != req); diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 93474b1bea4e002e4fc2aefc3d311b0445961cf7..fa9f1de58df460f2446856f59840e65bbfcc4205 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -261,6 +261,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, r->idiag_state = sk->sk_state; r->idiag_timer = 0; r->idiag_retrans = 0; + r->idiag_expires = 0; if (inet_diag_msg_attrs_fill(sk, skb, r, ext, sk_user_ns(NETLINK_CB(cb->skb).sk), @@ -314,9 +315,6 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, r->idiag_retrans = icsk->icsk_probes_out; r->idiag_expires = jiffies_delta_to_msecs(sk->sk_timer.expires - jiffies); - } else { - r->idiag_timer = 0; - r->idiag_expires = 0; } if ((ext & (1 << (INET_DIAG_INFO - 1))) && handler->idiag_info_size) { diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 10d31733297d7358b55b45d64bc447039365df1d..e0e8a65d561ec6553970af7ac8948db515f774dc 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -204,9 +204,9 @@ void inet_frag_kill(struct inet_frag_queue *fq) /* The RCU read lock provides a memory barrier * guaranteeing that if fqdir->dead is false then * the hash table destruction will not start until - * after we unlock. Paired with inet_frags_exit_net(). + * after we unlock. Paired with fqdir_pre_exit(). */ - if (!fqdir->dead) { + if (!READ_ONCE(fqdir->dead)) { rhashtable_remove_fast(&fqdir->rhashtable, &fq->node, fqdir->f->rhash_params); refcount_dec(&fq->refcnt); @@ -321,9 +321,11 @@ static struct inet_frag_queue *inet_frag_create(struct fqdir *fqdir, /* TODO : call from rcu_read_lock() and no longer use refcount_inc_not_zero() */ struct inet_frag_queue *inet_frag_find(struct fqdir *fqdir, void *key) { + /* This pairs with WRITE_ONCE() in fqdir_pre_exit(). */ + long high_thresh = READ_ONCE(fqdir->high_thresh); struct inet_frag_queue *fq = NULL, *prev; - if (!fqdir->high_thresh || frag_mem_limit(fqdir) > fqdir->high_thresh) + if (!high_thresh || frag_mem_limit(fqdir) > high_thresh) return NULL; rcu_read_lock(); diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 45fb450b45227ce05cc5a2fd8b8a32089ad5c476..e093847c334da59f789509aba9184a58e20951f8 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -242,8 +242,10 @@ static inline int compute_score(struct sock *sk, struct net *net, if (!inet_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif)) return -1; + score = sk->sk_bound_dev_if ? 2 : 1; - score = sk->sk_family == PF_INET ? 2 : 1; + if (sk->sk_family == PF_INET) + score++; if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id()) score++; } @@ -596,7 +598,7 @@ bool inet_ehash_nolisten(struct sock *sk, struct sock *osk, bool *found_dup_sk) if (ok) { sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); } else { - percpu_counter_inc(sk->sk_prot->orphan_count); + this_cpu_inc(*sk->sk_prot->orphan_count); inet_sk_set_state(sk, TCP_CLOSE); sock_set_flag(sk, SOCK_DEAD); inet_csk_destroy_sock(sk); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index cfeb8890f94ee95b2246ba98beb338a33fc45d1d..fad803d2d711ef0d97f7150f0f710a35ac822946 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -144,7 +144,8 @@ static void ip_expire(struct timer_list *t) rcu_read_lock(); - if (qp->q.fqdir->dead) + /* Paired with WRITE_ONCE() in fqdir_pre_exit(). */ + if (READ_ONCE(qp->q.fqdir->dead)) goto out_rcu_unlock; spin_lock(&qp->q.lock); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index a9cc05043fa47618e13480fe062aadc10692c646..e4504dd510c6d741ae450990184737cc18a58a52 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -599,8 +599,9 @@ static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) key = &info->key; ip_tunnel_init_flow(&fl4, IPPROTO_GRE, key->u.ipv4.dst, key->u.ipv4.src, - tunnel_id_to_key32(key->tun_id), key->tos, 0, - skb->mark, skb_get_hash(skb)); + tunnel_id_to_key32(key->tun_id), + key->tos & ~INET_ECN_MASK, 0, skb->mark, + skb_get_hash(skb)); rt = ip_route_output_key(dev_net(dev), &fl4); if (IS_ERR(rt)) return PTR_ERR(rt); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 10d4cde31c6bf517938f404c7597188ca9cb4c6b..5e48b3d3a00db61f457956607397307da0fe3900 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -162,12 +162,19 @@ int ip_build_and_send_pkt(struct sk_buff *skb, const struct sock *sk, iph->daddr = (opt && opt->opt.srr ? opt->opt.faddr : daddr); iph->saddr = saddr; iph->protocol = sk->sk_protocol; - if (ip_dont_fragment(sk, &rt->dst)) { + /* Do not bother generating IPID for small packets (eg SYNACK) */ + if (skb->len <= IPV4_MIN_MTU || ip_dont_fragment(sk, &rt->dst)) { iph->frag_off = htons(IP_DF); iph->id = 0; } else { iph->frag_off = 0; - __ip_select_ident(net, iph, 1); + /* TCP packets here are SYNACK with fat IPv4/TCP options. + * Avoid using the hashed IP ident generator. + */ + if (sk->sk_protocol == IPPROTO_TCP) + iph->id = (__force __be16)prandom_u32(); + else + __ip_select_ident(net, iph, 1); } if (opt && opt->opt.optlen) { @@ -614,18 +621,6 @@ void ip_fraglist_init(struct sk_buff *skb, struct iphdr *iph, } EXPORT_SYMBOL(ip_fraglist_init); -static void ip_fraglist_ipcb_prepare(struct sk_buff *skb, - struct ip_fraglist_iter *iter) -{ - struct sk_buff *to = iter->frag; - - /* Copy the flags to each fragment. */ - IPCB(to)->flags = IPCB(skb)->flags; - - if (iter->offset == 0) - ip_options_fragment(to); -} - void ip_fraglist_prepare(struct sk_buff *skb, struct ip_fraglist_iter *iter) { unsigned int hlen = iter->hlen; @@ -671,7 +666,7 @@ void ip_frag_init(struct sk_buff *skb, unsigned int hlen, EXPORT_SYMBOL(ip_frag_init); static void ip_frag_ipcb(struct sk_buff *from, struct sk_buff *to, - bool first_frag, struct ip_frag_state *state) + bool first_frag) { /* Copy the flags to each fragment. */ IPCB(to)->flags = IPCB(from)->flags; @@ -850,8 +845,20 @@ int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, /* Prepare header of the next frame, * before previous one went down. */ if (iter.frag) { - ip_fraglist_ipcb_prepare(skb, &iter); + bool first_frag = (iter.offset == 0); + + IPCB(iter.frag)->flags = IPCB(skb)->flags; ip_fraglist_prepare(skb, &iter); + if (first_frag && IPCB(skb)->opt.optlen) { + /* ipcb->opt is not populated for frags + * coming from __ip_make_skb(), + * ip_options_fragment() needs optlen + */ + IPCB(iter.frag)->opt.optlen = + IPCB(skb)->opt.optlen; + ip_options_fragment(iter.frag); + ip_send_check(iter.iph); + } } skb->tstamp = tstamp; @@ -905,7 +912,7 @@ slow_path: err = PTR_ERR(skb2); goto fail; } - ip_frag_ipcb(skb, skb2, first_frag, &state); + ip_frag_ipcb(skb, skb2, first_frag); /* * Put this fragment into the sending queue. diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 939792a3881461275b9c7fae5b3a5e0881a59584..be1976536f1c00e0b8e525207c79004628b6f8fc 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -261,7 +261,9 @@ static int __net_init ipmr_rules_init(struct net *net) return 0; err2: + rtnl_lock(); ipmr_free_table(mrt); + rtnl_unlock(); err1: fib_rules_unregister(ops); return err; diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index a8b980ad11d4e1ca225dea411bb8a4af7c995ca3..1088564d4dbcb2490f25008a7872e9c7a378222e 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -505,8 +505,11 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par) if (IS_ERR(config)) return PTR_ERR(config); } - } else if (memcmp(&config->clustermac, &cipinfo->clustermac, ETH_ALEN)) + } else if (memcmp(&config->clustermac, &cipinfo->clustermac, ETH_ALEN)) { + clusterip_config_entry_put(config); + clusterip_config_put(config); return -EINVAL; + } ret = nf_ct_netns_get(par->net, par->family); if (ret < 0) { diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 1075cc2136ac6adf460902854570eb6f41c485ef..8bd3f5e3c0e7ad9c54c04db0d90cdaaf1b372763 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -924,15 +924,36 @@ static void remove_nexthop(struct net *net, struct nexthop *nh, /* if any FIB entries reference this nexthop, any dst entries * need to be regenerated */ -static void nh_rt_cache_flush(struct net *net, struct nexthop *nh) +static void nh_rt_cache_flush(struct net *net, struct nexthop *nh, + struct nexthop *replaced_nh) { struct fib6_info *f6i; + struct nh_group *nhg; + int i; if (!list_empty(&nh->fi_list)) rt_cache_flush(net); list_for_each_entry(f6i, &nh->f6i_list, nh_list) ipv6_stub->fib6_update_sernum(net, f6i); + + /* if an IPv6 group was replaced, we have to release all old + * dsts to make sure all refcounts are released + */ + if (!replaced_nh->is_group) + return; + + /* new dsts must use only the new nexthop group */ + synchronize_net(); + + nhg = rtnl_dereference(replaced_nh->nh_grp); + for (i = 0; i < nhg->num_nh; i++) { + struct nh_grp_entry *nhge = &nhg->nh_entries[i]; + struct nh_info *nhi = rtnl_dereference(nhge->nh->nh_info); + + if (nhi->family == AF_INET6) + ipv6_stub->fib6_nh_release_dsts(&nhi->fib6_nh); + } } static int replace_nexthop_grp(struct net *net, struct nexthop *old, @@ -1111,7 +1132,7 @@ static int replace_nexthop(struct net *net, struct nexthop *old, err = replace_nexthop_single(net, old, new, extack); if (!err) { - nh_rt_cache_flush(net, old); + nh_rt_cache_flush(net, old, new); __remove_nexthop(net, new, NULL); nexthop_put(new); @@ -1355,11 +1376,15 @@ static int nh_create_ipv6(struct net *net, struct nexthop *nh, /* sets nh_dev if successful */ err = ipv6_stub->fib6_nh_init(net, fib6_nh, &fib6_cfg, GFP_KERNEL, extack); - if (err) + if (err) { + /* IPv6 is not enabled, don't call fib6_nh_release */ + if (err == -EAFNOSUPPORT) + goto out; ipv6_stub->fib6_nh_release(fib6_nh); - else + } else { nh->nh_flags = fib6_nh->fib_nh_flags; - + } +out: return err; } diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 8ce8b7300b9d3eaa47a56096a43498518176598a..e60ca03543a536b2b9a32522d87b93f94ea5b02b 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -172,16 +172,22 @@ static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident) struct sock *sk = NULL; struct inet_sock *isk; struct hlist_nulls_node *hnode; - int dif = skb->dev->ifindex; + int dif, sdif; if (skb->protocol == htons(ETH_P_IP)) { + dif = inet_iif(skb); + sdif = inet_sdif(skb); pr_debug("try to find: num = %d, daddr = %pI4, dif = %d\n", (int)ident, &ip_hdr(skb)->daddr, dif); #if IS_ENABLED(CONFIG_IPV6) } else if (skb->protocol == htons(ETH_P_IPV6)) { + dif = inet6_iif(skb); + sdif = inet6_sdif(skb); pr_debug("try to find: num = %d, daddr = %pI6c, dif = %d\n", (int)ident, &ipv6_hdr(skb)->daddr, dif); #endif + } else { + return NULL; } read_lock_bh(&ping_table.lock); @@ -220,7 +226,8 @@ static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident) continue; } - if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif) + if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif && + sk->sk_bound_dev_if != sdif) continue; sock_hold(sk); diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 8d5e1695b9aa878aecf007b0ef0e334105b568ae..80d13d8f982dcfe5aef45c414e59a0d66a5c7487 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -53,7 +53,7 @@ static int sockstat_seq_show(struct seq_file *seq, void *v) struct net *net = seq->private; int orphans, sockets; - orphans = percpu_counter_sum_positive(&tcp_orphan_count); + orphans = tcp_orphan_count_sum(); sockets = proto_sockets_allocated_sum_positive(&tcp_prot); socket_seq_show(seq); diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 7d26e0f8bdaeb5f334e77deb5966a976db3d3ff4..5d95f80314f95521eb645f55d7cda33fe894f848 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -721,6 +721,7 @@ static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) int ret = -EINVAL; int chk_addr_ret; + lock_sock(sk); if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_in)) goto out; @@ -740,7 +741,9 @@ static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) inet->inet_saddr = 0; /* Use device */ sk_dst_reset(sk); ret = 0; -out: return ret; +out: + release_sock(sk); + return ret; } /* diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 54230852e5f95d8f3e6f651c3efd7a69dd2a1096..a3ec2a08027b82ba9966cc7a67d927eaadcc9f57 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -280,8 +280,8 @@ #include #include -struct percpu_counter tcp_orphan_count; -EXPORT_SYMBOL_GPL(tcp_orphan_count); +DEFINE_PER_CPU(unsigned int, tcp_orphan_count); +EXPORT_PER_CPU_SYMBOL_GPL(tcp_orphan_count); long sysctl_tcp_mem[3] __read_mostly; EXPORT_SYMBOL(sysctl_tcp_mem); @@ -956,7 +956,7 @@ int tcp_send_mss(struct sock *sk, int *size_goal, int flags) */ static void tcp_remove_empty_skb(struct sock *sk, struct sk_buff *skb) { - if (skb && !skb->len) { + if (skb && TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) { tcp_unlink_write_queue(skb, sk); if (tcp_write_queue_empty(sk)) tcp_chrono_stop(sk, TCP_CHRONO_BUSY); @@ -1652,11 +1652,13 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, if (!copied) copied = used; break; - } else if (used <= len) { - seq += used; - copied += used; - offset += used; } + if (WARN_ON_ONCE(used > len)) + used = len; + seq += used; + copied += used; + offset += used; + /* If recv_actor drops the lock (e.g. TCP splice * receive) the skb pointer might be invalid when * getting here: tcp_collapse might have deleted it @@ -1746,6 +1748,77 @@ int tcp_mmap(struct file *file, struct socket *sock, } EXPORT_SYMBOL(tcp_mmap); +static skb_frag_t *skb_advance_to_frag(struct sk_buff *skb, u32 offset_skb, + u32 *offset_frag) +{ + skb_frag_t *frag; + + if (unlikely(offset_skb >= skb->len)) + return NULL; + + offset_skb -= skb_headlen(skb); + if ((int)offset_skb < 0 || skb_has_frag_list(skb)) + return NULL; + + frag = skb_shinfo(skb)->frags; + while (offset_skb) { + if (skb_frag_size(frag) > offset_skb) { + *offset_frag = offset_skb; + return frag; + } + offset_skb -= skb_frag_size(frag); + ++frag; + } + *offset_frag = 0; + return frag; +} + +static int tcp_copy_straggler_data(struct tcp_zerocopy_receive *zc, + struct sk_buff *skb, u32 copylen, + u32 *offset, u32 *seq) +{ + unsigned long copy_address = (unsigned long)zc->copybuf_address; + struct msghdr msg = {}; + struct iovec iov; + int err; + + if (copy_address != zc->copybuf_address) + return -EINVAL; + + err = import_single_range(READ, (void __user *)copy_address, + copylen, &iov, &msg.msg_iter); + if (err) + return err; + err = skb_copy_datagram_msg(skb, *offset, &msg, copylen); + if (err) + return err; + zc->recv_skip_hint -= copylen; + *offset += copylen; + *seq += copylen; + return (__s32)copylen; +} + +static int tcp_zerocopy_handle_leftover_data(struct tcp_zerocopy_receive *zc, + struct sock *sk, + struct sk_buff *skb, + u32 *seq, + s32 copybuf_len) +{ + u32 offset, copylen = min_t(u32, copybuf_len, zc->recv_skip_hint); + + if (!copylen) + return 0; + /* skb is null if inq < PAGE_SIZE. */ + if (skb) + offset = *seq - TCP_SKB_CB(skb)->seq; + else + skb = tcp_recv_skb(sk, *seq, &offset); + + zc->copybuf_len = tcp_copy_straggler_data(zc, skb, copylen, &offset, + seq); + return zc->copybuf_len < 0 ? 0 : copylen; +} + static int tcp_zerocopy_vm_insert_batch(struct vm_area_struct *vma, struct page **pages, unsigned long pages_to_map, @@ -1779,8 +1852,10 @@ static int tcp_zerocopy_vm_insert_batch(struct vm_area_struct *vma, static int tcp_zerocopy_receive(struct sock *sk, struct tcp_zerocopy_receive *zc) { + u32 length = 0, offset, vma_len, avail_len, aligned_len, copylen = 0; unsigned long address = (unsigned long)zc->address; - u32 length = 0, seq, offset, zap_len; + s32 copybuf_len = zc->copybuf_len; + struct tcp_sock *tp = tcp_sk(sk); #define PAGE_BATCH_SIZE 8 struct page *pages[PAGE_BATCH_SIZE]; const skb_frag_t *frags = NULL; @@ -1788,10 +1863,12 @@ static int tcp_zerocopy_receive(struct sock *sk, struct sk_buff *skb = NULL; unsigned long pg_idx = 0; unsigned long curr_addr; - struct tcp_sock *tp; - int inq; + u32 seq = tp->copied_seq; + int inq = tcp_inq(sk); int ret; + zc->copybuf_len = 0; + if (address & (PAGE_SIZE - 1) || address != zc->address) return -EINVAL; @@ -1800,8 +1877,6 @@ static int tcp_zerocopy_receive(struct sock *sk, sock_rps_record_flow(sk); - tp = tcp_sk(sk); - mmap_read_lock(current->mm); vma = find_vma(current->mm, address); @@ -1809,22 +1884,23 @@ static int tcp_zerocopy_receive(struct sock *sk, mmap_read_unlock(current->mm); return -EINVAL; } - zc->length = min_t(unsigned long, zc->length, vma->vm_end - address); - - seq = tp->copied_seq; - inq = tcp_inq(sk); - zc->length = min_t(u32, zc->length, inq); - zap_len = zc->length & ~(PAGE_SIZE - 1); - if (zap_len) { - zap_page_range(vma, address, zap_len); + vma_len = min_t(unsigned long, zc->length, vma->vm_end - address); + avail_len = min_t(u32, vma_len, inq); + aligned_len = avail_len & ~(PAGE_SIZE - 1); + if (aligned_len) { + zap_page_range(vma, address, aligned_len); + zc->length = aligned_len; zc->recv_skip_hint = 0; } else { - zc->recv_skip_hint = zc->length; + zc->length = avail_len; + zc->recv_skip_hint = avail_len; } ret = 0; curr_addr = address; while (length + PAGE_SIZE <= zc->length) { if (zc->recv_skip_hint < PAGE_SIZE) { + u32 offset_frag; + /* If we're here, finish the current batch. */ if (pg_idx) { ret = tcp_zerocopy_vm_insert_batch(vma, pages, @@ -1845,16 +1921,9 @@ static int tcp_zerocopy_receive(struct sock *sk, skb = tcp_recv_skb(sk, seq, &offset); } zc->recv_skip_hint = skb->len - offset; - offset -= skb_headlen(skb); - if ((int)offset < 0 || skb_has_frag_list(skb)) + frags = skb_advance_to_frag(skb, offset, &offset_frag); + if (!frags || offset_frag) break; - frags = skb_shinfo(skb)->frags; - while (offset) { - if (skb_frag_size(frags) > offset) - goto out; - offset -= skb_frag_size(frags); - frags++; - } } if (skb_frag_size(frags) != PAGE_SIZE || skb_frag_off(frags)) { int remaining = zc->recv_skip_hint; @@ -1888,13 +1957,18 @@ static int tcp_zerocopy_receive(struct sock *sk, } out: mmap_read_unlock(current->mm); - if (length) { + /* Try to copy straggler data. */ + if (!ret) + copylen = tcp_zerocopy_handle_leftover_data(zc, sk, skb, &seq, + copybuf_len); + + if (length + copylen) { WRITE_ONCE(tp->copied_seq, seq); tcp_rcv_space_adjust(sk); /* Clean up data we have read: This will do ACK frames. */ tcp_recv_skb(sk, seq, &offset); - tcp_cleanup_rbuf(sk, length); + tcp_cleanup_rbuf(sk, length + copylen); ret = 0; if (length == zc->length) zc->recv_skip_hint = 0; @@ -2394,11 +2468,36 @@ void tcp_shutdown(struct sock *sk, int how) } EXPORT_SYMBOL(tcp_shutdown); +int tcp_orphan_count_sum(void) +{ + int i, total = 0; + + for_each_possible_cpu(i) + total += per_cpu(tcp_orphan_count, i); + + return max(total, 0); +} + +static int tcp_orphan_cache; +static struct timer_list tcp_orphan_timer; +#define TCP_ORPHAN_TIMER_PERIOD msecs_to_jiffies(100) + +static void tcp_orphan_update(struct timer_list *unused) +{ + WRITE_ONCE(tcp_orphan_cache, tcp_orphan_count_sum()); + mod_timer(&tcp_orphan_timer, jiffies + TCP_ORPHAN_TIMER_PERIOD); +} + +static bool tcp_too_many_orphans(int shift) +{ + return READ_ONCE(tcp_orphan_cache) << shift > sysctl_tcp_max_orphans; +} + bool tcp_check_oom(struct sock *sk, int shift) { bool too_many_orphans, out_of_socket_memory; - too_many_orphans = tcp_too_many_orphans(sk, shift); + too_many_orphans = tcp_too_many_orphans(shift); out_of_socket_memory = tcp_out_of_memory(sk); if (too_many_orphans) @@ -2508,7 +2607,7 @@ adjudge_to_death: /* remove backlog if any, without releasing ownership. */ __release_sock(sk); - percpu_counter_inc(sk->sk_prot->orphan_count); + this_cpu_inc(tcp_orphan_count); /* Have we already been destroyed by a softirq or backlog? */ if (state != TCP_CLOSE && sk->sk_state == TCP_CLOSE) @@ -3834,7 +3933,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level, } #ifdef CONFIG_MMU case TCP_ZEROCOPY_RECEIVE: { - struct tcp_zerocopy_receive zc; + struct tcp_zerocopy_receive zc = {}; int err; if (get_user(len, optlen)) @@ -3852,7 +3951,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level, lock_sock(sk); err = tcp_zerocopy_receive(sk, &zc); release_sock(sk); - if (len == sizeof(zc)) + if (len >= offsetofend(struct tcp_zerocopy_receive, err)) goto zerocopy_rcv_sk_err; switch (len) { case offsetofend(struct tcp_zerocopy_receive, err): @@ -4145,7 +4244,10 @@ void __init tcp_init(void) sizeof_field(struct sk_buff, cb)); percpu_counter_init(&tcp_sockets_allocated, 0, GFP_KERNEL); - percpu_counter_init(&tcp_orphan_count, 0, GFP_KERNEL); + + timer_setup(&tcp_orphan_timer, tcp_orphan_update, TIMER_DEFERRABLE); + mod_timer(&tcp_orphan_timer, jiffies + TCP_ORPHAN_TIMER_PERIOD); + inet_hashinfo_init(&tcp_hashinfo); inet_hashinfo2_init(&tcp_hashinfo, "tcp_listen_portaddr_hash", thash_entries, 21, /* one slot per 2 MB*/ diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index f91ae827d47f5e0b7c18b114ebd446c6b7f9c967..6b745ce4108c8786631bc57e704e7753d0d70356 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -317,6 +317,7 @@ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock, bool cork = false, enospc = sk_msg_full(msg); struct sock *sk_redir; u32 tosend, delta = 0; + u32 eval = __SK_NONE; int ret; more_data: @@ -360,13 +361,24 @@ more_data: case __SK_REDIRECT: sk_redir = psock->sk_redir; sk_msg_apply_bytes(psock, tosend); + if (!psock->apply_bytes) { + /* Clean up before releasing the sock lock. */ + eval = psock->eval; + psock->eval = __SK_NONE; + psock->sk_redir = NULL; + } if (psock->cork) { cork = true; psock->cork = NULL; } sk_msg_return(sk, msg, tosend); release_sock(sk); + ret = tcp_bpf_sendmsg_redir(sk_redir, msg, tosend, flags); + + if (eval == __SK_REDIRECT) + sock_put(sk_redir); + lock_sock(sk); if (unlikely(ret < 0)) { int free = sk_msg_free_nocharge(sk, msg); @@ -561,7 +573,6 @@ static void tcp_bpf_rebuild_protos(struct proto prot[TCP_BPF_NUM_CFGS], struct proto *base) { prot[TCP_BPF_BASE] = *base; - prot[TCP_BPF_BASE].unhash = sock_map_unhash; prot[TCP_BPF_BASE].close = sock_map_close; prot[TCP_BPF_BASE].recvmsg = tcp_bpf_recvmsg; prot[TCP_BPF_BASE].stream_memory_read = tcp_bpf_stream_read; diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index c7bf5b26bf0c24837034eaee0189847d49ded6e9..fffa011a007d4de7d17d473b22d9a9970e732fd9 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -337,8 +337,6 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) return; if (tcp_in_slow_start(tp)) { - if (hystart && after(ack, ca->end_seq)) - bictcp_hystart_reset(sk); acked = tcp_slow_start(tp, acked); if (!acked) return; @@ -398,6 +396,9 @@ static void hystart_update(struct sock *sk, u32 delay) struct bictcp *ca = inet_csk_ca(sk); u32 threshold; + if (after(tp->snd_una, ca->end_seq)) + bictcp_hystart_reset(sk); + if (hystart_detect & HYSTART_ACK_TRAIN) { u32 now = bictcp_clock_us(sk); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index d7600bc58ab9c8daa61724e82bf96fb54c22006b..06ec2887a4665a0e076c515cad5eaa908b1ee870 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1621,6 +1621,8 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb, (mss != tcp_skb_seglen(skb))) goto out; + if (!tcp_skb_can_collapse(prev, skb)) + goto out; len = skb->len; pcount = tcp_skb_pcount(skb); if (tcp_skb_shift(prev, skb, pcount, len)) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 71395e745bc5e77e12d00514e56b62e1b02407a1..017cd666387f3269648dff82d241788f676e7085 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1022,6 +1022,20 @@ static void tcp_v4_reqsk_destructor(struct request_sock *req) DEFINE_STATIC_KEY_FALSE(tcp_md5_needed); EXPORT_SYMBOL(tcp_md5_needed); +static bool better_md5_match(struct tcp_md5sig_key *old, struct tcp_md5sig_key *new) +{ + if (!old) + return true; + + /* l3index always overrides non-l3index */ + if (old->l3index && new->l3index == 0) + return false; + if (old->l3index == 0 && new->l3index) + return true; + + return old->prefixlen < new->prefixlen; +} + /* Find the Key structure for an address. */ struct tcp_md5sig_key *__tcp_md5_do_lookup(const struct sock *sk, int l3index, const union tcp_md5_addr *addr, @@ -1059,8 +1073,7 @@ struct tcp_md5sig_key *__tcp_md5_do_lookup(const struct sock *sk, int l3index, match = false; } - if (match && (!best_match || - key->prefixlen > best_match->prefixlen)) + if (match && better_md5_match(best_match, key)) best_match = key; } return best_match; @@ -1090,7 +1103,7 @@ static struct tcp_md5sig_key *tcp_md5_do_lookup_exact(const struct sock *sk, lockdep_sock_is_held(sk)) { if (key->family != family) continue; - if (key->l3index && key->l3index != l3index) + if (key->l3index != l3index) continue; if (!memcmp(&key->addr, addr, size) && key->prefixlen == prefixlen) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 19ef4577b70d6f205245ecfe80c53a8ff768cc78..37e70cd46feafc9b497a77ca042d9858e242deb1 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1561,7 +1561,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, return -ENOMEM; } - if (skb_unclone(skb, gfp)) + if (skb_unclone_keeptruesize(skb, gfp)) return -ENOMEM; /* Get a new skb... force flag on. */ @@ -1670,7 +1670,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) { u32 delta_truesize; - if (skb_unclone(skb, GFP_ATOMIC)) + if (skb_unclone_keeptruesize(skb, GFP_ATOMIC)) return -ENOMEM; delta_truesize = __pskb_trim_head(skb, len); @@ -3184,7 +3184,7 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) cur_mss, GFP_ATOMIC)) return -ENOMEM; /* We'll try again later. */ } else { - if (skb_unclone(skb, GFP_ATOMIC)) + if (skb_unclone_keeptruesize(skb, GFP_ATOMIC)) return -ENOMEM; diff = tcp_skb_pcount(skb); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index e73312546c5a15c39dfdd083d730bba15e7c8e5a..ef2068a60d4ad49db4dd781178b060dd78541b58 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -390,7 +390,8 @@ static int compute_score(struct sock *sk, struct net *net, dif, sdif); if (!dev_match) return -1; - score += 4; + if (sk->sk_bound_dev_if) + score += 4; if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id()) score++; @@ -898,7 +899,7 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4, kfree_skb(skb); return -EINVAL; } - if (skb->len > cork->gso_size * UDP_MAX_SEGMENTS) { + if (datalen > cork->gso_size * UDP_MAX_SEGMENTS) { kfree_skb(skb); return -EINVAL; } @@ -1035,7 +1036,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) __be16 dport; u8 tos; int err, is_udplite = IS_UDPLITE(sk); - int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; + int corkreq = READ_ONCE(up->corkflag) || msg->msg_flags&MSG_MORE; int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); struct sk_buff *skb; struct ip_options_data opt_copy; @@ -1343,7 +1344,7 @@ int udp_sendpage(struct sock *sk, struct page *page, int offset, } up->len += size; - if (!(up->corkflag || (flags&MSG_MORE))) + if (!(READ_ONCE(up->corkflag) || (flags&MSG_MORE))) ret = udp_push_pending_frames(sk); if (!ret) ret = size; @@ -2609,9 +2610,9 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, switch (optname) { case UDP_CORK: if (val != 0) { - up->corkflag = 1; + WRITE_ONCE(up->corkflag, 1); } else { - up->corkflag = 0; + WRITE_ONCE(up->corkflag, 0); lock_sock(sk); push_pending_frames(sk); release_sock(sk); @@ -2734,7 +2735,7 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname, switch (optname) { case UDP_CORK: - val = up->corkflag; + val = READ_ONCE(up->corkflag); break; case UDP_ENCAP: @@ -3005,7 +3006,7 @@ int udp4_seq_show(struct seq_file *seq, void *v) { seq_setwidth(seq, 127); if (v == SEQ_START_TOKEN) - seq_puts(seq, " sl local_address rem_address st tx_queue " + seq_puts(seq, " sl local_address rem_address st tx_queue " "rx_queue tr tm->when retrnsmt uid timeout " "inode ref pointer drops"); else { diff --git a/net/ipv4/udp_tunnel_nic.c b/net/ipv4/udp_tunnel_nic.c index b91003538d87a03855df9bd35b751017d37fe031..bc3a043a5d5c7635b7acabe76bb19f7ad59d8db9 100644 --- a/net/ipv4/udp_tunnel_nic.c +++ b/net/ipv4/udp_tunnel_nic.c @@ -846,7 +846,7 @@ udp_tunnel_nic_unregister(struct net_device *dev, struct udp_tunnel_nic *utn) list_for_each_entry(node, &info->shared->devices, list) if (node->dev == dev) break; - if (node->dev != dev) + if (list_entry_is_head(node, &info->shared->devices, list)) return; list_del(&node->list); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index df0d19a7e5579d315d549bdce39e2b8330dc9db3..ebf4866e10bc2d1518e507822ea001160d1b7b0a 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2601,7 +2601,7 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev, __u32 valid_lft, u32 prefered_lft) { struct inet6_ifaddr *ifp = ipv6_get_ifaddr(net, addr, dev, 1); - int create = 0; + int create = 0, update_lft = 0; if (!ifp && valid_lft) { int max_addresses = in6_dev->cnf.max_addresses; @@ -2645,19 +2645,32 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev, unsigned long now; u32 stored_lft; - /* Update lifetime (RFC4862 5.5.3 e) - * We deviate from RFC4862 by honoring all Valid Lifetimes to - * improve the reaction of SLAAC to renumbering events - * (draft-gont-6man-slaac-renum-06, Section 4.2) - */ + /* update lifetime (RFC2462 5.5.3 e) */ spin_lock_bh(&ifp->lock); now = jiffies; if (ifp->valid_lft > (now - ifp->tstamp) / HZ) stored_lft = ifp->valid_lft - (now - ifp->tstamp) / HZ; else stored_lft = 0; - if (!create && stored_lft) { + const u32 minimum_lft = min_t(u32, + stored_lft, MIN_VALID_LIFETIME); + valid_lft = max(valid_lft, minimum_lft); + + /* RFC4862 Section 5.5.3e: + * "Note that the preferred lifetime of the + * corresponding address is always reset to + * the Preferred Lifetime in the received + * Prefix Information option, regardless of + * whether the valid lifetime is also reset or + * ignored." + * + * So we should always update prefered_lft here. + */ + update_lft = 1; + } + + if (update_lft) { ifp->valid_lft = valid_lft; ifp->prefered_lft = prefered_lft; ifp->tstamp = now; @@ -3121,6 +3134,9 @@ static void sit_add_v4_addrs(struct inet6_dev *idev) memcpy(&addr.s6_addr32[3], idev->dev->dev_addr, 4); if (idev->dev->flags&IFF_POINTOPOINT) { + if (idev->cnf.addr_gen_mode == IN6_ADDR_GEN_MODE_NONE) + return; + addr.s6_addr32[0] = htonl(0xfe800000); scope = IFA_LINK; plen = 64; @@ -3725,6 +3741,7 @@ static int addrconf_ifdown(struct net_device *dev, bool unregister) struct inet6_dev *idev; struct inet6_ifaddr *ifa, *tmp; bool keep_addr = false; + bool was_ready; int state, i; ASSERT_RTNL(); @@ -3790,7 +3807,10 @@ restart: addrconf_del_rs_timer(idev); - /* Step 2: clear flags for stateless addrconf */ + /* Step 2: clear flags for stateless addrconf, repeated down + * detection + */ + was_ready = idev->if_flags & IF_READY; if (!unregister) idev->if_flags &= ~(IF_RS_SENT|IF_RA_RCVD|IF_READY); @@ -3864,7 +3884,7 @@ restart: if (unregister) { ipv6_ac_destroy_dev(idev); ipv6_mc_destroy_dev(idev); - } else { + } else if (was_ready) { ipv6_mc_down(idev); } @@ -4988,6 +5008,7 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, nla_put_s32(skb, IFA_TARGET_NETNSID, args->netnsid)) goto error; + spin_lock_bh(&ifa->lock); if (!((ifa->flags&IFA_F_PERMANENT) && (ifa->prefered_lft == INFINITY_LIFE_TIME))) { preferred = ifa->prefered_lft; @@ -5009,6 +5030,7 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, preferred = INFINITY_LIFE_TIME; valid = INFINITY_LIFE_TIME; } + spin_unlock_bh(&ifa->lock); if (!ipv6_addr_any(&ifa->peer_addr)) { if (nla_put_in6_addr(skb, IFA_LOCAL, &ifa->addr) < 0 || diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index c6db41b5063f7360f7ef7764ce5e04e138c5aabb..1fbc6c989a7c5b479b7890f3970e112600183636 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -1018,6 +1018,7 @@ static const struct ipv6_stub ipv6_stub_impl = { .ip6_mtu_from_fib6 = ip6_mtu_from_fib6, .fib6_nh_init = fib6_nh_init, .fib6_nh_release = fib6_nh_release, + .fib6_nh_release_dsts = fib6_nh_release_dsts, .fib6_update_sernum = fib6_update_sernum_stub, .fib6_rt_update = fib6_rt_update, .ip6_del_rt = ip6_del_rt, diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index cb708fbb1c1729c13281c360e07e695578b5efb8..0158b0163ca8ee847a7d100ee9d6a6a8482b13a7 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -708,7 +708,7 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) struct xfrm_dst *dst = (struct xfrm_dst *)skb_dst(skb); u32 padto; - padto = min(x->tfcpad, __xfrm_state_mtu(x, dst->child_mtu_cached)); + padto = min(x->tfcpad, xfrm_state_mtu(x, dst->child_mtu_cached)); if (skb->len < padto) esp.tfclen = padto - skb->len; } @@ -808,6 +808,12 @@ int esp6_input_done2(struct sk_buff *skb, int err) struct tcphdr *th; offset = ipv6_skip_exthdr(skb, offset, &nexthdr, &frag_off); + + if (offset < 0) { + err = -EINVAL; + goto out; + } + uh = (void *)(skb->data + offset); th = (void *)(skb->data + offset); hdr_len += offset; diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c index 4af56affaafd436fbd35ade87ffd2b7c8e6d4d91..1c3f02d05d2bfdaad4e915a1dca29c9d71b310bf 100644 --- a/net/ipv6/esp6_offload.c +++ b/net/ipv6/esp6_offload.c @@ -198,6 +198,9 @@ static struct sk_buff *xfrm6_beet_gso_segment(struct xfrm_state *x, ipv6_skip_exthdr(skb, 0, &proto, &frag); } + if (proto == IPPROTO_IPIP) + skb_shinfo(skb)->gso_type |= SKB_GSO_IPXIP6; + __skb_pull(skb, skb_transport_offset(skb)); ops = rcu_dereference(inet6_offloads[proto]); if (likely(ops && ops->callbacks.gso_segment)) diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 8f9a83314de7d5cd8f66e2bb404d3be132c7c64f..3e4c87b29b11522f090608a2c30446f6c8657260 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -267,6 +267,7 @@ INDIRECT_CALLABLE_SCOPE int fib6_rule_action(struct fib_rule *rule, } INDIRECT_CALLABLE_SCOPE bool fib6_rule_suppress(struct fib_rule *rule, + int flags, struct fib_lookup_arg *arg) { struct fib6_result *res = arg->result; @@ -294,8 +295,7 @@ INDIRECT_CALLABLE_SCOPE bool fib6_rule_suppress(struct fib_rule *rule, return false; suppress_route: - if (!(arg->flags & FIB_LOOKUP_NOREF)) - ip6_rt_put(rt); + ip6_rt_put_flags(rt, flags); return true; } diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 55c290d556059a989cebc6d26cb77e8b01edb5a5..67c9114835c84864a3353c6f1c16853ea62a21c5 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -106,7 +106,7 @@ static inline int compute_score(struct sock *sk, struct net *net, if (!inet_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif)) return -1; - score = 1; + score = sk->sk_bound_dev_if ? 2 : 1; if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id()) score++; } diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index e43f1fbac28b67ed3fe186adbde26d2653167864..c783b912313217bf3a8fc5838142c03f03c22017 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -110,7 +110,7 @@ void fib6_update_sernum(struct net *net, struct fib6_info *f6i) fn = rcu_dereference_protected(f6i->fib6_node, lockdep_is_held(&f6i->fib6_table->tb6_lock)); if (fn) - fn->fn_sernum = fib6_new_sernum(net); + WRITE_ONCE(fn->fn_sernum, fib6_new_sernum(net)); } /* @@ -587,12 +587,13 @@ static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb, spin_unlock_bh(&table->tb6_lock); if (res > 0) { cb->args[4] = 1; - cb->args[5] = w->root->fn_sernum; + cb->args[5] = READ_ONCE(w->root->fn_sernum); } } else { - if (cb->args[5] != w->root->fn_sernum) { + int sernum = READ_ONCE(w->root->fn_sernum); + if (cb->args[5] != sernum) { /* Begin at the root if the tree changed */ - cb->args[5] = w->root->fn_sernum; + cb->args[5] = sernum; w->state = FWS_INIT; w->node = w->root; w->skip = w->count; @@ -1342,7 +1343,7 @@ static void __fib6_update_sernum_upto_root(struct fib6_info *rt, /* paired with smp_rmb() in rt6_get_cookie_safe() */ smp_wmb(); while (fn) { - fn->fn_sernum = sernum; + WRITE_ONCE(fn->fn_sernum, sernum); fn = rcu_dereference_protected(fn->parent, lockdep_is_held(&rt->fib6_table->tb6_lock)); } @@ -2171,8 +2172,8 @@ static int fib6_clean_node(struct fib6_walker *w) }; if (c->sernum != FIB6_NO_SERNUM_CHANGE && - w->node->fn_sernum != c->sernum) - w->node->fn_sernum = c->sernum; + READ_ONCE(w->node->fn_sernum) != c->sernum) + WRITE_ONCE(w->node->fn_sernum, c->sernum); if (!c->func) { WARN_ON_ONCE(c->sernum == FIB6_NO_SERNUM_CHANGE); @@ -2536,7 +2537,7 @@ static void ipv6_route_seq_setup_walk(struct ipv6_route_iter *iter, iter->w.state = FWS_INIT; iter->w.node = iter->w.root; iter->w.args = iter; - iter->sernum = iter->w.root->fn_sernum; + iter->sernum = READ_ONCE(iter->w.root->fn_sernum); INIT_LIST_HEAD(&iter->w.lh); fib6_walker_link(net, &iter->w); } @@ -2564,8 +2565,10 @@ static struct fib6_table *ipv6_route_seq_next_table(struct fib6_table *tbl, static void ipv6_route_check_sernum(struct ipv6_route_iter *iter) { - if (iter->sernum != iter->w.root->fn_sernum) { - iter->sernum = iter->w.root->fn_sernum; + int sernum = READ_ONCE(iter->w.root->fn_sernum); + + if (iter->sernum != sernum) { + iter->sernum = sernum; iter->w.state = FWS_INIT; iter->w.node = iter->w.root; WARN_ON(iter->w.skip); diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index aa673a6a7e4328d03e89527ed3daaccd69c39084..ceb85c67ce3952b7142eeec29bff46a7eaf5217b 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -450,8 +450,10 @@ fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq, err = -EINVAL; goto done; } - if (fl_shared_exclusive(fl) || fl->opt) + if (fl_shared_exclusive(fl) || fl->opt) { + WRITE_ONCE(sock_net(sk)->ipv6.flowlabel_has_excl, 1); static_branch_deferred_inc(&ipv6_flowlabel_exclusive); + } return fl; done: diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 09fa49bbf617db53eb04fd9d739276fc3a14c53d..9a0263f2523237058ec316d4cf007a9917460681 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -755,6 +755,7 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb, fl6->daddr = key->u.ipv6.dst; fl6->flowlabel = key->label; fl6->flowi6_uid = sock_net_uid(dev_net(dev), NULL); + fl6->fl6_gre_key = tunnel_id_to_key32(key->tun_id); dsfield = key->tos; flags = key->tun_flags & @@ -990,6 +991,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, fl6.daddr = key->u.ipv6.dst; fl6.flowlabel = key->label; fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL); + fl6.fl6_gre_key = tunnel_id_to_key32(key->tun_id); dsfield = key->tos; if (!(tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT)) @@ -1098,6 +1100,7 @@ static void ip6gre_tnl_link_config_common(struct ip6_tnl *t) fl6->flowi6_oif = p->link; fl6->flowlabel = 0; fl6->flowi6_proto = IPPROTO_GRE; + fl6->fl6_gre_key = t->parms.o_key; if (!(p->flags&IP6_TNL_F_USE_ORIG_TCLASS)) fl6->flowlabel |= IPV6_TCLASS_MASK & p->flowinfo; @@ -1543,7 +1546,7 @@ static void ip6gre_fb_tunnel_init(struct net_device *dev) static struct inet6_protocol ip6gre_protocol __read_mostly = { .handler = gre_rcv, .err_handler = ip6gre_err, - .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, + .flags = INET6_PROTO_FINAL, }; static void ip6gre_destroy_tunnels(struct net *net, struct list_head *head) diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index a80f90bf3ae7dc1aec904fd93b3d8e8c87a926e4..15c8eef1ef443854e207bfaa238cb7aa321fedb9 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -113,6 +113,8 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, if (likely(ops && ops->callbacks.gso_segment)) { skb_reset_transport_header(skb); segs = ops->callbacks.gso_segment(skb, features); + if (!segs) + skb->network_header = skb_mac_header(skb) + nhoff - skb->head; } if (IS_ERR_OR_NULL(segs)) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 72a673a43a75419afe0a0d60035af922fc60ab6f..d6f2126f461848a9e10f49863e8f059f77025a31 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -193,7 +193,7 @@ static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) /* Policy lookup after SNAT yielded a new policy */ if (skb_dst(skb)->xfrm) { - IPCB(skb)->flags |= IPSKB_REROUTED; + IP6CB(skb)->flags |= IP6SKB_REROUTED; return dst_output(net, sk, skb); } #endif @@ -487,13 +487,14 @@ static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu) int ip6_forward(struct sk_buff *skb) { - struct inet6_dev *idev = __in6_dev_get_safely(skb->dev); struct dst_entry *dst = skb_dst(skb); struct ipv6hdr *hdr = ipv6_hdr(skb); struct inet6_skb_parm *opt = IP6CB(skb); struct net *net = dev_net(dst->dev); + struct inet6_dev *idev; u32 mtu; + idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif)); if (net->ipv6.devconf_all->forwarding == 0) goto error; @@ -1431,8 +1432,6 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, if (np->frag_size) mtu = np->frag_size; } - if (mtu < IPV6_MIN_MTU) - return -EINVAL; cork->base.fragsize = mtu; cork->base.gso_size = ipc6->gso_size; cork->base.tx_flags = 0; @@ -1494,8 +1493,6 @@ static int __ip6_append_data(struct sock *sk, fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len + (opt ? opt->opt_nflen : 0); - maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - - sizeof(struct frag_hdr); headersize = sizeof(struct ipv6hdr) + (opt ? opt->opt_flen + opt->opt_nflen : 0) + @@ -1503,6 +1500,13 @@ static int __ip6_append_data(struct sock *sk, sizeof(struct frag_hdr) : 0) + rt->rt6i_nfheader_len; + if (mtu < fragheaderlen || + ((mtu - fragheaderlen) & ~7) + fragheaderlen < sizeof(struct frag_hdr)) + goto emsgsize; + + maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - + sizeof(struct frag_hdr); + /* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit * the first fragment */ diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 08441f06afd482066fa1381f8f0d1fb436a1d73d..3a2741569b84764797021a975345db6a687e1fc7 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1066,14 +1066,14 @@ int ip6_tnl_xmit_ctl(struct ip6_tnl *t, if (unlikely(!ipv6_chk_addr_and_flags(net, laddr, ldev, false, 0, IFA_F_TENTATIVE))) - pr_warn("%s xmit: Local address not yet configured!\n", - p->name); + pr_warn_ratelimited("%s xmit: Local address not yet configured!\n", + p->name); else if (!(p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) && !ipv6_addr_is_multicast(raddr) && unlikely(ipv6_chk_addr_and_flags(net, raddr, ldev, true, 0, IFA_F_TENTATIVE))) - pr_warn("%s xmit: Routing loop! Remote address found on this node!\n", - p->name); + pr_warn_ratelimited("%s xmit: Routing loop! Remote address found on this node!\n", + p->name); else ret = 1; rcu_read_unlock(); diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 23aeeb46f99fc96f248b27694cc2438265545908..99f2dc802e366f8591172456ccfa506c6bb74ba0 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -804,6 +804,8 @@ vti6_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) struct net *net = dev_net(dev); struct vti6_net *ip6n = net_generic(net, vti6_net_id); + memset(&p1, 0, sizeof(p1)); + switch (cmd) { case SIOCGETTUNNEL: if (dev == ip6n->fb_tnl_dev) { diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 06b0d2c329b94b25f2945b220347c5d9defdc72e..41cb348a7c3c4904ba4111d1c0e48044c7963ee2 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -248,7 +248,9 @@ static int __net_init ip6mr_rules_init(struct net *net) return 0; err2: + rtnl_lock(); ip6mr_free_table(mrt); + rtnl_unlock(); err1: fib_rules_unregister(ops); return err; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index eb2b5404806c649c2fe4cb0c51b4481dace1652b..d36168baf6776381792fe41b256d4580e29f759d 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -273,6 +273,7 @@ ip6t_do_table(struct sk_buff *skb, * things we don't know, ie. tcp syn flag or ports). If the * rule is also a fragment-specific rule, non-fragments won't * match it. */ + acpar.fragoff = 0; acpar.hotdrop = false; acpar.state = state; diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c index 733c83d38b30889813b92ce1bcbfcc002868e918..4ad8b2032f1f92210fc041cd96a0c8eb31d90c9e 100644 --- a/net/ipv6/netfilter/ip6t_rt.c +++ b/net/ipv6/netfilter/ip6t_rt.c @@ -25,12 +25,7 @@ MODULE_AUTHOR("Andras Kis-Szabo "); static inline bool segsleft_match(u_int32_t min, u_int32_t max, u_int32_t id, bool invert) { - bool r; - pr_debug("segsleft_match:%c 0x%x <= 0x%x <= 0x%x\n", - invert ? '!' : ' ', min, id, max); - r = (id >= min && id <= max) ^ invert; - pr_debug(" result %s\n", r ? "PASS" : "FAILED"); - return r; + return (id >= min && id <= max) ^ invert; } static bool rt_mt6(const struct sk_buff *skb, struct xt_action_param *par) @@ -65,30 +60,6 @@ static bool rt_mt6(const struct sk_buff *skb, struct xt_action_param *par) return false; } - pr_debug("IPv6 RT LEN %u %u ", hdrlen, rh->hdrlen); - pr_debug("TYPE %04X ", rh->type); - pr_debug("SGS_LEFT %u %02X\n", rh->segments_left, rh->segments_left); - - pr_debug("IPv6 RT segsleft %02X ", - segsleft_match(rtinfo->segsleft[0], rtinfo->segsleft[1], - rh->segments_left, - !!(rtinfo->invflags & IP6T_RT_INV_SGS))); - pr_debug("type %02X %02X %02X ", - rtinfo->rt_type, rh->type, - (!(rtinfo->flags & IP6T_RT_TYP) || - ((rtinfo->rt_type == rh->type) ^ - !!(rtinfo->invflags & IP6T_RT_INV_TYP)))); - pr_debug("len %02X %04X %02X ", - rtinfo->hdrlen, hdrlen, - !(rtinfo->flags & IP6T_RT_LEN) || - ((rtinfo->hdrlen == hdrlen) ^ - !!(rtinfo->invflags & IP6T_RT_INV_LEN))); - pr_debug("res %02X %02X %02X ", - rtinfo->flags & IP6T_RT_RES, - ((const struct rt0_hdr *)rh)->reserved, - !((rtinfo->flags & IP6T_RT_RES) && - (((const struct rt0_hdr *)rh)->reserved))); - ret = (segsleft_match(rtinfo->segsleft[0], rtinfo->segsleft[1], rh->segments_left, !!(rtinfo->invflags & IP6T_RT_INV_SGS))) && @@ -107,22 +78,22 @@ static bool rt_mt6(const struct sk_buff *skb, struct xt_action_param *par) reserved), sizeof(_reserved), &_reserved); + if (!rp) { + par->hotdrop = true; + return false; + } ret = (*rp == 0); } - pr_debug("#%d ", rtinfo->addrnr); if (!(rtinfo->flags & IP6T_RT_FST)) { return ret; } else if (rtinfo->flags & IP6T_RT_FST_NSTRICT) { - pr_debug("Not strict "); if (rtinfo->addrnr > (unsigned int)((hdrlen - 8) / 16)) { - pr_debug("There isn't enough space\n"); return false; } else { unsigned int i = 0; - pr_debug("#%d ", rtinfo->addrnr); for (temp = 0; temp < (unsigned int)((hdrlen - 8) / 16); temp++) { @@ -138,26 +109,20 @@ static bool rt_mt6(const struct sk_buff *skb, struct xt_action_param *par) return false; } - if (ipv6_addr_equal(ap, &rtinfo->addrs[i])) { - pr_debug("i=%d temp=%d;\n", i, temp); + if (ipv6_addr_equal(ap, &rtinfo->addrs[i])) i++; - } if (i == rtinfo->addrnr) break; } - pr_debug("i=%d #%d\n", i, rtinfo->addrnr); if (i == rtinfo->addrnr) return ret; else return false; } } else { - pr_debug("Strict "); if (rtinfo->addrnr > (unsigned int)((hdrlen - 8) / 16)) { - pr_debug("There isn't enough space\n"); return false; } else { - pr_debug("#%d ", rtinfo->addrnr); for (temp = 0; temp < rtinfo->addrnr; temp++) { ap = skb_header_pointer(skb, ptr @@ -173,7 +138,6 @@ static bool rt_mt6(const struct sk_buff *skb, struct xt_action_param *par) if (!ipv6_addr_equal(ap, &rtinfo->addrs[temp])) break; } - pr_debug("temp=%d #%d\n", temp, rtinfo->addrnr); if (temp == rtinfo->addrnr && temp == (unsigned int)((hdrlen - 8) / 16)) return ret; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 00f133a55ef7ca203e58a5afe05c7a6dcc3e3c86..38349054e361ef919eb886d0f2ae25713b5f20c2 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -1020,6 +1020,9 @@ static int do_rawv6_setsockopt(struct sock *sk, int level, int optname, struct raw6_sock *rp = raw6_sk(sk); int val; + if (optlen < sizeof(val)) + return -EINVAL; + if (copy_from_sockptr(&val, optval, sizeof(val))) return -EFAULT; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 159002e3181228f9461abae0fa972e0d490db57c..1a81088c80d2b6f0b28d9aea09d07e869cb30044 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2674,7 +2674,7 @@ static void ip6_link_failure(struct sk_buff *skb) if (from) { fn = rcu_dereference(from->fib6_node); if (fn && (rt->rt6i_flags & RTF_DEFAULT)) - fn->fn_sernum = -1; + WRITE_ONCE(fn->fn_sernum, -1); } } rcu_read_unlock(); @@ -3570,6 +3570,25 @@ void fib6_nh_release(struct fib6_nh *fib6_nh) fib_nh_common_release(&fib6_nh->nh_common); } +void fib6_nh_release_dsts(struct fib6_nh *fib6_nh) +{ + int cpu; + + if (!fib6_nh->rt6i_pcpu) + return; + + for_each_possible_cpu(cpu) { + struct rt6_info *pcpu_rt, **ppcpu_rt; + + ppcpu_rt = per_cpu_ptr(fib6_nh->rt6i_pcpu, cpu); + pcpu_rt = xchg(ppcpu_rt, NULL); + if (pcpu_rt) { + dst_dev_put(&pcpu_rt->dst); + dst_release(&pcpu_rt->dst); + } + } +} + static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg, gfp_t gfp_flags, struct netlink_ext_ack *extack) @@ -5071,6 +5090,19 @@ out: return should_notify; } +static int fib6_gw_from_attr(struct in6_addr *gw, struct nlattr *nla, + struct netlink_ext_ack *extack) +{ + if (nla_len(nla) < sizeof(*gw)) { + NL_SET_ERR_MSG(extack, "Invalid IPv6 address in RTA_GATEWAY"); + return -EINVAL; + } + + *gw = nla_get_in6_addr(nla); + + return 0; +} + static int ip6_route_multipath_add(struct fib6_config *cfg, struct netlink_ext_ack *extack) { @@ -5111,10 +5143,18 @@ static int ip6_route_multipath_add(struct fib6_config *cfg, nla = nla_find(attrs, attrlen, RTA_GATEWAY); if (nla) { - r_cfg.fc_gateway = nla_get_in6_addr(nla); + err = fib6_gw_from_attr(&r_cfg.fc_gateway, nla, + extack); + if (err) + goto cleanup; + r_cfg.fc_flags |= RTF_GATEWAY; } r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP); + + /* RTA_ENCAP_TYPE length checked in + * lwtunnel_valid_encap_type_attr + */ nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE); if (nla) r_cfg.fc_encap_type = nla_get_u16(nla); @@ -5281,7 +5321,13 @@ static int ip6_route_multipath_del(struct fib6_config *cfg, nla = nla_find(attrs, attrlen, RTA_GATEWAY); if (nla) { - nla_memcpy(&r_cfg.fc_gateway, nla, 16); + err = fib6_gw_from_attr(&r_cfg.fc_gateway, nla, + extack); + if (err) { + last_err = err; + goto next_rtnh; + } + r_cfg.fc_flags |= RTF_GATEWAY; } } @@ -5289,6 +5335,7 @@ static int ip6_route_multipath_del(struct fib6_config *cfg, if (err) last_err = err; +next_rtnh: rtnh = rtnh_next(rtnh, &remaining); } @@ -5543,14 +5590,15 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb, goto nla_put_failure; if (fib_add_nexthop(skb, &rt->fib6_nh->nh_common, - rt->fib6_nh->fib_nh_weight, AF_INET6) < 0) + rt->fib6_nh->fib_nh_weight, AF_INET6, + 0) < 0) goto nla_put_failure; list_for_each_entry_safe(sibling, next_sibling, &rt->fib6_siblings, fib6_siblings) { if (fib_add_nexthop(skb, &sibling->fib6_nh->nh_common, sibling->fib6_nh->fib_nh_weight, - AF_INET6) < 0) + AF_INET6, 0) < 0) goto nla_put_failure; } diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index 897fa59c47de7e1056aafadbbc06a29b6ab905fa..4d4399c5c5ea94911bce37e457d3c9cb323059a1 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -160,6 +160,14 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto) hdr->hop_limit = ip6_dst_hoplimit(skb_dst(skb)); memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); + + /* the control block has been erased, so we have to set the + * iif once again. + * We read the receiving interface index directly from the + * skb->skb_iif as it is done in the IPv4 receiving path (i.e.: + * ip_rcv_core(...)). + */ + IP6CB(skb)->iif = skb->skb_iif; } hdr->nexthdr = NEXTHDR_ROUTING; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index a6a3d759246ecb7f8bb02fafb8638590f052d1d7..bab0e99f6e356fa65ea81bf741c841be0e7fcdd3 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1924,7 +1924,6 @@ static int __net_init sit_init_net(struct net *net) return 0; err_reg_dev: - ipip6_dev_free(sitn->fb_tunnel_dev); free_netdev(sitn->fb_tunnel_dev); err_alloc_dev: return err; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index a448b6cd472738f4477a06d19c1dc9e5c68b6b45..069551a04369e16425d8f0ae51e83add92fda2ae 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -133,7 +133,8 @@ static int compute_score(struct sock *sk, struct net *net, dev_match = udp_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif); if (!dev_match) return -1; - score++; + if (sk->sk_bound_dev_if) + score++; if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id()) score++; @@ -1188,7 +1189,7 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6, kfree_skb(skb); return -EINVAL; } - if (skb->len > cork->gso_size * UDP_MAX_SEGMENTS) { + if (datalen > cork->gso_size * UDP_MAX_SEGMENTS) { kfree_skb(skb); return -EINVAL; } @@ -1288,7 +1289,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) int addr_len = msg->msg_namelen; bool connected = false; int ulen = len; - int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; + int corkreq = READ_ONCE(up->corkflag) || msg->msg_flags&MSG_MORE; int err; int is_udplite = IS_UDPLITE(sk); int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); @@ -1419,7 +1420,6 @@ do_udp_sendmsg: if (!fl6.flowi6_oif) fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex; - fl6.flowi6_mark = ipc6.sockc.mark; fl6.flowi6_uid = sk->sk_uid; if (msg->msg_controllen) { @@ -1455,6 +1455,7 @@ do_udp_sendmsg: ipc6.opt = opt; fl6.flowi6_proto = sk->sk_protocol; + fl6.flowi6_mark = ipc6.sockc.mark; fl6.daddr = *daddr; if (ipv6_addr_any(&fl6.saddr) && !ipv6_addr_any(&np->saddr)) fl6.saddr = np->saddr; diff --git a/net/key/af_key.c b/net/key/af_key.c index ef9b4ac03e7b74e5ddad0ca2d3d337583fc25fee..d1364b858fdf0bd546d0b90617ba2406a9bed03a 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -2627,7 +2627,7 @@ static int pfkey_migrate(struct sock *sk, struct sk_buff *skb, } return xfrm_migrate(&sel, dir, XFRM_POLICY_TYPE_MAIN, m, i, - kma ? &k : NULL, net, NULL); + kma ? &k : NULL, net, NULL, 0); out: return err; diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index cd4cf84a7f99fd53ca5f58fc3650a465f9d635ae..6ef8ded4ec764bc35c1ab1a07f0ae54ddac46f94 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -9,7 +9,7 @@ * Copyright 2007, Michael Wu * Copyright 2007-2010, Intel Corporation * Copyright(c) 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018-2020 Intel Corporation + * Copyright (C) 2018-2021 Intel Corporation */ /** @@ -191,7 +191,8 @@ static void ieee80211_add_addbaext(struct ieee80211_sub_if_data *sdata, sband = ieee80211_get_sband(sdata); if (!sband) return; - he_cap = ieee80211_get_he_iftype_cap(sband, sdata->vif.type); + he_cap = ieee80211_get_he_iftype_cap(sband, + ieee80211_vif_type_p2p(&sdata->vif)); if (!he_cap) return; diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index b37c8a983d88d3580cae9434bf086fb9340652fb..4b4ab1961068fcbcc170d5eb960eb03a344a9058 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -9,7 +9,7 @@ * Copyright 2007, Michael Wu * Copyright 2007-2010, Intel Corporation * Copyright(c) 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018 - 2020 Intel Corporation + * Copyright (C) 2018 - 2022 Intel Corporation */ #include @@ -106,7 +106,7 @@ static void ieee80211_send_addba_request(struct ieee80211_sub_if_data *sdata, mgmt->u.action.u.addba_req.start_seq_num = cpu_to_le16(start_seq_num << 4); - ieee80211_tx_skb(sdata, skb); + ieee80211_tx_skb_tid(sdata, skb, tid); } void ieee80211_send_bar(struct ieee80211_vif *vif, u8 *ra, u16 tid, u16 ssn) @@ -213,6 +213,8 @@ ieee80211_agg_start_txq(struct sta_info *sta, int tid, bool enable) struct ieee80211_txq *txq = sta->sta.txq[tid]; struct txq_info *txqi; + lockdep_assert_held(&sta->ampdu_mlme.mtx); + if (!txq) return; @@ -290,7 +292,6 @@ static void ieee80211_remove_tid_tx(struct sta_info *sta, int tid) ieee80211_assign_tid_tx(sta, tid, NULL); ieee80211_agg_splice_finish(sta->sdata, tid); - ieee80211_agg_start_txq(sta, tid, false); kfree_rcu(tid_tx, rcu_head); } @@ -480,8 +481,7 @@ static void ieee80211_send_addba_with_timeout(struct sta_info *sta, /* send AddBA request */ ieee80211_send_addba_request(sdata, sta->sta.addr, tid, - tid_tx->dialog_token, - sta->tid_seq[tid] >> 4, + tid_tx->dialog_token, tid_tx->ssn, buf_size, tid_tx->timeout); WARN_ON(test_and_set_bit(HT_AGG_STATE_SENT_ADDBA, &tid_tx->state)); @@ -523,6 +523,7 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) params.ssn = sta->tid_seq[tid] >> 4; ret = drv_ampdu_action(local, sdata, ¶ms); + tid_tx->ssn = params.ssn; if (ret == IEEE80211_AMPDU_TX_START_DELAY_ADDBA) { return; } else if (ret == IEEE80211_AMPDU_TX_START_IMMEDIATE) { @@ -625,6 +626,14 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, return -EINVAL; } + if (test_sta_flag(sta, WLAN_STA_MFP) && + !test_sta_flag(sta, WLAN_STA_AUTHORIZED)) { + ht_dbg(sdata, + "MFP STA not authorized - deny BA session request %pM tid %d\n", + sta->sta.addr, tid); + return -EINVAL; + } + /* * 802.11n-2009 11.5.1.1: If the initiating STA is an HT STA, is a * member of an IBSS, and has no other existing Block Ack agreement @@ -889,6 +898,7 @@ void ieee80211_stop_tx_ba_cb(struct sta_info *sta, int tid, { struct ieee80211_sub_if_data *sdata = sta->sdata; bool send_delba = false; + bool start_txq = false; ht_dbg(sdata, "Stopping Tx BA session for %pM tid %d\n", sta->sta.addr, tid); @@ -906,10 +916,14 @@ void ieee80211_stop_tx_ba_cb(struct sta_info *sta, int tid, send_delba = true; ieee80211_remove_tid_tx(sta, tid); + start_txq = true; unlock_sta: spin_unlock_bh(&sta->lock); + if (start_txq) + ieee80211_agg_start_txq(sta, tid, false); + if (send_delba) ieee80211_send_delba(sdata, sta->sta.addr, tid, WLAN_BACK_INITIATOR, WLAN_REASON_QSTA_NOT_USE); diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index e429dbb10df71d986448ad2d1361f94c5defc252..d46ed4cbe771757b48b15090d33a17a8d4b83096 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1217,7 +1217,10 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, return 0; error: + mutex_lock(&local->mtx); ieee80211_vif_release_channel(sdata); + mutex_unlock(&local->mtx); + return err; } diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index bcdfd19a596be1d352b48b3fa0ebad1277a0cc46..a172f69c71123b9f2ab01798150c3caf4c21e28c 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -1201,8 +1201,11 @@ static inline void drv_wake_tx_queue(struct ieee80211_local *local, { struct ieee80211_sub_if_data *sdata = vif_to_sdata(txq->txq.vif); - if (local->in_reconfig) + /* In reconfig don't transmit now, but mark for waking later */ + if (local->in_reconfig) { + set_bit(IEEE80211_TXQ_STOP_NETIF_TX, &txq->flags); return; + } if (!check_sdata_in_driver(sdata)) return; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 7f2be08b72a56a80be12d822a75c66420af38591..fe8f586886b414e22d4cebce25d13d20c69b3d6d 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -374,7 +374,7 @@ struct ieee80211_mgd_auth_data { u8 key[WLAN_KEY_LEN_WEP104]; u8 key_len, key_idx; - bool done; + bool done, waiting; bool peer_confirmed; bool timeout_started; diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index 620ecf922408b1f870d4da8ab7eef02e4d33ab15..870c8eafef92910d9ebc8e5e555c322250ba5b6a 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -60,7 +60,10 @@ static struct mesh_table *mesh_table_alloc(void) atomic_set(&newtbl->entries, 0); spin_lock_init(&newtbl->gates_lock); spin_lock_init(&newtbl->walk_lock); - rhashtable_init(&newtbl->rhead, &mesh_rht_params); + if (rhashtable_init(&newtbl->rhead, &mesh_rht_params)) { + kfree(newtbl); + return NULL; + } return newtbl; } diff --git a/net/mac80211/mesh_ps.c b/net/mac80211/mesh_ps.c index 204830a55240b4829991f4d815a6f1a10538c0f5..3fbd0b9ff9135474cdc8d900c52daa1958b1cadd 100644 --- a/net/mac80211/mesh_ps.c +++ b/net/mac80211/mesh_ps.c @@ -2,6 +2,7 @@ /* * Copyright 2012-2013, Marco Porsch * Copyright 2012-2013, cozybit Inc. + * Copyright (C) 2021 Intel Corporation */ #include "mesh.h" @@ -588,7 +589,7 @@ void ieee80211_mps_frame_release(struct sta_info *sta, /* only transmit to PS STA with announced, non-zero awake window */ if (test_sta_flag(sta, WLAN_STA_PS_STA) && - (!elems->awake_window || !le16_to_cpu(*elems->awake_window))) + (!elems->awake_window || !get_unaligned_le16(elems->awake_window))) return; if (!test_sta_flag(sta, WLAN_STA_MPSP_OWNER)) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 32bc30ec50ec96380cee30f7107565d597b8e794..0dba353d3f8fe16c786f7cc383778e6a0b8a60f7 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -37,6 +37,7 @@ #define IEEE80211_AUTH_TIMEOUT_SAE (HZ * 2) #define IEEE80211_AUTH_MAX_TRIES 3 #define IEEE80211_AUTH_WAIT_ASSOC (HZ * 5) +#define IEEE80211_AUTH_WAIT_SAE_RETRY (HZ * 2) #define IEEE80211_ASSOC_TIMEOUT (HZ / 5) #define IEEE80211_ASSOC_TIMEOUT_LONG (HZ / 2) #define IEEE80211_ASSOC_TIMEOUT_SHORT (HZ / 10) @@ -2493,11 +2494,18 @@ static void ieee80211_sta_tx_wmm_ac_notify(struct ieee80211_sub_if_data *sdata, u16 tx_time) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - u16 tid = ieee80211_get_tid(hdr); - int ac = ieee80211_ac_from_tid(tid); - struct ieee80211_sta_tx_tspec *tx_tspec = &ifmgd->tx_tspec[ac]; + u16 tid; + int ac; + struct ieee80211_sta_tx_tspec *tx_tspec; unsigned long now = jiffies; + if (!ieee80211_is_data_qos(hdr->frame_control)) + return; + + tid = ieee80211_get_tid(hdr); + ac = ieee80211_ac_from_tid(tid); + tx_tspec = &ifmgd->tx_tspec[ac]; + if (likely(!tx_tspec->admitted_time)) return; @@ -2992,8 +3000,15 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, (status_code == WLAN_STATUS_ANTI_CLOG_REQUIRED || (auth_transaction == 1 && (status_code == WLAN_STATUS_SAE_HASH_TO_ELEMENT || - status_code == WLAN_STATUS_SAE_PK)))) + status_code == WLAN_STATUS_SAE_PK)))) { + /* waiting for userspace now */ + ifmgd->auth_data->waiting = true; + ifmgd->auth_data->timeout = + jiffies + IEEE80211_AUTH_WAIT_SAE_RETRY; + ifmgd->auth_data->timeout_started = true; + run_again(sdata, ifmgd->auth_data->timeout); return; + } sdata_info(sdata, "%pM denied authentication (status %d)\n", mgmt->sa, status_code); @@ -4519,10 +4534,10 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) if (ifmgd->auth_data && ifmgd->auth_data->timeout_started && time_after(jiffies, ifmgd->auth_data->timeout)) { - if (ifmgd->auth_data->done) { + if (ifmgd->auth_data->done || ifmgd->auth_data->waiting) { /* - * ok ... we waited for assoc but userspace didn't, - * so let's just kill the auth data + * ok ... we waited for assoc or continuation but + * userspace didn't do it, so kill the auth data */ ieee80211_destroy_auth_data(sdata, false); } else if (ieee80211_auth(sdata)) { @@ -5187,7 +5202,7 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, */ if (new_sta) { u32 rates = 0, basic_rates = 0; - bool have_higher_than_11mbit; + bool have_higher_than_11mbit = false; int min_rate = INT_MAX, min_rate_index = -1; const struct cfg80211_bss_ies *ies; int shift = ieee80211_vif_get_shift(&sdata->vif); diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 38b5695c2a0c83bcbb57a1ca9cd1052c98d6aa27..1e7614abd947de8fab8b3bfc1c11cabd8bbd10e3 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1945,7 +1945,8 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) int keyid = rx->sta->ptk_idx; sta_ptk = rcu_dereference(rx->sta->ptk[keyid]); - if (ieee80211_has_protected(fc)) { + if (ieee80211_has_protected(fc) && + !(status->flag & RX_FLAG_IV_STRIPPED)) { cs = rx->sta->cipher_scheme; keyid = ieee80211_get_keyid(rx->skb, cs); @@ -2909,13 +2910,13 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) ether_addr_equal(sdata->vif.addr, hdr->addr3)) return RX_CONTINUE; - ac = ieee80211_select_queue_80211(sdata, skb, hdr); + ac = ieee802_1d_to_ac[skb->priority]; q = sdata->vif.hw_queue[ac]; if (ieee80211_queue_stopped(&local->hw, q)) { IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, dropped_frames_congestion); return RX_DROP_MONITOR; } - skb_set_queue_mapping(skb, q); + skb_set_queue_mapping(skb, ac); if (!--mesh_hdr->ttl) { if (!is_multicast_ether_addr(hdr->addr1)) @@ -4064,7 +4065,8 @@ static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx) if (!bssid) return false; if (ether_addr_equal(sdata->vif.addr, hdr->addr2) || - ether_addr_equal(sdata->u.ibss.bssid, hdr->addr2)) + ether_addr_equal(sdata->u.ibss.bssid, hdr->addr2) || + !is_valid_ether_addr(hdr->addr2)) return false; if (ieee80211_is_beacon(hdr->frame_control)) return true; @@ -4798,7 +4800,7 @@ void ieee80211_rx_list(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta, goto drop; break; case RX_ENC_VHT: - if (WARN_ONCE(status->rate_idx > 9 || + if (WARN_ONCE(status->rate_idx > 11 || !status->nss || status->nss > 8, "Rate marked as a VHT rate but data is invalid: MCS: %d, NSS: %d\n", diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 355e006432ccc305b45a398a287eafc755b3fc75..b9e5f8e8f29cce0fe845cd82a549115968e2b2ac 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -190,6 +190,7 @@ struct tid_ampdu_tx { u8 stop_initiator; bool tx_stop; u16 buf_size; + u16 ssn; u16 failed_bar_ssn; bool bar_pending; diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 673ad3cf2c3ab90fdce97dffbcbfef9117a85f1e..bbbcc678c655c44e32b15c4da666928a7e896c88 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -2177,7 +2177,11 @@ bool ieee80211_parse_tx_radiotap(struct sk_buff *skb, } vht_mcs = iterator.this_arg[4] >> 4; + if (vht_mcs > 11) + vht_mcs = 0; vht_nss = iterator.this_arg[4] & 0xF; + if (!vht_nss || vht_nss > 8) + vht_nss = 1; break; /* @@ -3365,6 +3369,14 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata, if (!ieee80211_amsdu_prepare_head(sdata, fast_tx, head)) goto out; + /* If n == 2, the "while (*frag_tail)" loop above didn't execute + * and frag_tail should be &skb_shinfo(head)->frag_list. + * However, ieee80211_amsdu_prepare_head() can reallocate it. + * Reload frag_tail to have it pointing to the correct place. + */ + if (n == 2) + frag_tail = &skb_shinfo(head)->frag_list; + /* * Pad out the previous subframe to a multiple of 4 by adding the * padding to the next one, that's being added. Note that head->len diff --git a/net/mac80211/util.c b/net/mac80211/util.c index fbf56a203c0e89daa0acbf5d8fefc9826dad997a..a1f129292ad884d2531e4eb9855126f95bd6a2e1 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -950,7 +950,12 @@ static void ieee80211_parse_extension_element(u32 *crc, struct ieee802_11_elems *elems) { const void *data = elem->data + 1; - u8 len = elem->datalen - 1; + u8 len; + + if (!elem->datalen) + return; + + len = elem->datalen - 1; switch (elem->data[0]) { case WLAN_EID_EXT_HE_MU_EDCA: diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index bca47fad5a16280b808bef4c8832d6f0e0626b37..4eed23e27610439b316e8d846bd22af83399b8bf 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -520,6 +520,9 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx, return RX_DROP_UNUSABLE; } + /* reload hdr - skb might have been reallocated */ + hdr = (void *)rx->skb->data; + data_len = skb->len - hdrlen - IEEE80211_CCMP_HDR_LEN - mic_len; if (!rx->sta || data_len < 0) return RX_DROP_UNUSABLE; @@ -749,6 +752,9 @@ ieee80211_crypto_gcmp_decrypt(struct ieee80211_rx_data *rx) return RX_DROP_UNUSABLE; } + /* reload hdr - skb might have been reallocated */ + hdr = (void *)rx->skb->data; + data_len = skb->len - hdrlen - IEEE80211_GCMP_HDR_LEN - mic_len; if (!rx->sta || data_len < 0) return RX_DROP_UNUSABLE; diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index f2868a8a50c302d98bf6fbf3b1b13130e13fa05b..9c047c148a112aa9ea7839dd7867fba8db99a227 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -1490,22 +1490,52 @@ static void mpls_dev_destroy_rcu(struct rcu_head *head) kfree(mdev); } -static void mpls_ifdown(struct net_device *dev, int event) +static int mpls_ifdown(struct net_device *dev, int event) { struct mpls_route __rcu **platform_label; struct net *net = dev_net(dev); - u8 alive, deleted; unsigned index; platform_label = rtnl_dereference(net->mpls.platform_label); for (index = 0; index < net->mpls.platform_labels; index++) { struct mpls_route *rt = rtnl_dereference(platform_label[index]); + bool nh_del = false; + u8 alive = 0; if (!rt) continue; - alive = 0; - deleted = 0; + if (event == NETDEV_UNREGISTER) { + u8 deleted = 0; + + for_nexthops(rt) { + struct net_device *nh_dev = + rtnl_dereference(nh->nh_dev); + + if (!nh_dev || nh_dev == dev) + deleted++; + if (nh_dev == dev) + nh_del = true; + } endfor_nexthops(rt); + + /* if there are no more nexthops, delete the route */ + if (deleted == rt->rt_nhn) { + mpls_route_update(net, index, NULL, NULL); + continue; + } + + if (nh_del) { + size_t size = sizeof(*rt) + rt->rt_nhn * + rt->rt_nh_size; + struct mpls_route *orig = rt; + + rt = kmalloc(size, GFP_KERNEL); + if (!rt) + return -ENOMEM; + memcpy(rt, orig, size); + } + } + change_nexthops(rt) { unsigned int nh_flags = nh->nh_flags; @@ -1529,16 +1559,15 @@ static void mpls_ifdown(struct net_device *dev, int event) next: if (!(nh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN))) alive++; - if (!rtnl_dereference(nh->nh_dev)) - deleted++; } endfor_nexthops(rt); WRITE_ONCE(rt->rt_nhn_alive, alive); - /* if there are no more nexthops, delete the route */ - if (event == NETDEV_UNREGISTER && deleted == rt->rt_nhn) - mpls_route_update(net, index, NULL, NULL); + if (nh_del) + mpls_route_update(net, index, rt, NULL); } + + return 0; } static void mpls_ifup(struct net_device *dev, unsigned int flags) @@ -1596,8 +1625,12 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event, return NOTIFY_OK; switch (event) { + int err; + case NETDEV_DOWN: - mpls_ifdown(dev, event); + err = mpls_ifdown(dev, event); + if (err) + return notifier_from_errno(err); break; case NETDEV_UP: flags = dev_get_flags(dev); @@ -1608,13 +1641,18 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event, break; case NETDEV_CHANGE: flags = dev_get_flags(dev); - if (flags & (IFF_RUNNING | IFF_LOWER_UP)) + if (flags & (IFF_RUNNING | IFF_LOWER_UP)) { mpls_ifup(dev, RTNH_F_DEAD | RTNH_F_LINKDOWN); - else - mpls_ifdown(dev, event); + } else { + err = mpls_ifdown(dev, event); + if (err) + return notifier_from_errno(err); + } break; case NETDEV_UNREGISTER: - mpls_ifdown(dev, event); + err = mpls_ifdown(dev, event); + if (err) + return notifier_from_errno(err); mdev = mpls_dev_get(dev); if (mdev) { mpls_dev_sysctl_unregister(dev, mdev); @@ -1625,8 +1663,6 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event, case NETDEV_CHANGENAME: mdev = mpls_dev_get(dev); if (mdev) { - int err; - mpls_dev_sysctl_unregister(dev, mdev); err = mpls_dev_sysctl_register(dev, mdev); if (err) diff --git a/net/mptcp/mptcp_diag.c b/net/mptcp/mptcp_diag.c index 5f390a97f556de8a0dcfaf985ac14e09bc66d086..f1af3f44875edef913b9ea78bcc93eb25f87e18a 100644 --- a/net/mptcp/mptcp_diag.c +++ b/net/mptcp/mptcp_diag.c @@ -36,7 +36,7 @@ static int mptcp_diag_dump_one(struct netlink_callback *cb, struct sock *sk; net = sock_net(in_skb->sk); - msk = mptcp_token_get_sock(req->id.idiag_cookie[0]); + msk = mptcp_token_get_sock(net, req->id.idiag_cookie[0]); if (!msk) goto out_nosk; diff --git a/net/mptcp/options.c b/net/mptcp/options.c index ac0233c9cd349ff7425bdba4c9854c1f1048cff0..64afe71e2129adf79c6505f1ac6b7a644210c873 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -368,9 +368,10 @@ static void schedule_3rdack_retransmission(struct sock *sk) /* reschedule with a timeout above RTT, as we must look only for drop */ if (tp->srtt_us) - timeout = tp->srtt_us << 1; + timeout = usecs_to_jiffies(tp->srtt_us >> (3 - 1)); else timeout = TCP_TIMEOUT_INIT; + timeout += jiffies; WARN_ON_ONCE(icsk->icsk_ack.pending & ICSK_ACK_TIMER); icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 3ca8b359e399a5c6c52a03d7435b7af52bebf420..8123c79e279130fae0d6e595e64c5fb3e5059b0e 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2149,7 +2149,7 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err, */ if (WARN_ON_ONCE(!new_mptcp_sock)) { tcp_sk(newsk)->is_mptcp = 0; - return newsk; + goto out; } /* acquire the 2nd reference for the owning socket */ @@ -2174,6 +2174,8 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err, MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK); } +out: + newsk->sk_kern_sock = kern; return newsk; } diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 13ab89dc19141de0fb8416c87424c9d87adaceed..3e5af8397434a97f9a06633d633af4dbb7be1fc1 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -424,7 +424,7 @@ int mptcp_token_new_connect(struct sock *sk); void mptcp_token_accept(struct mptcp_subflow_request_sock *r, struct mptcp_sock *msk); bool mptcp_token_exists(u32 token); -struct mptcp_sock *mptcp_token_get_sock(u32 token); +struct mptcp_sock *mptcp_token_get_sock(struct net *net, u32 token); struct mptcp_sock *mptcp_token_iter_next(const struct net *net, long *s_slot, long *s_num); void mptcp_token_destroy(struct mptcp_sock *msk); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index bba5696fee36d5c9873ee9719bec224dacbaec3b..2e923849092412e3d5bb425cd3fe3acba4549310 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -69,7 +69,7 @@ static struct mptcp_sock *subflow_token_join_request(struct request_sock *req, struct mptcp_sock *msk; int local_id; - msk = mptcp_token_get_sock(subflow_req->token); + msk = mptcp_token_get_sock(sock_net(req_to_sk(req)), subflow_req->token); if (!msk) { SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINNOTOKEN); return NULL; diff --git a/net/mptcp/syncookies.c b/net/mptcp/syncookies.c index 37127781aee987055acf380f8696a2326e50971f..7f22526346a7e6667b73599ce27771fc18824cfd 100644 --- a/net/mptcp/syncookies.c +++ b/net/mptcp/syncookies.c @@ -108,18 +108,12 @@ bool mptcp_token_join_cookie_init_state(struct mptcp_subflow_request_sock *subfl e->valid = 0; - msk = mptcp_token_get_sock(e->token); + msk = mptcp_token_get_sock(net, e->token); if (!msk) { spin_unlock_bh(&join_entry_locks[i]); return false; } - /* If this fails, the token got re-used in the mean time by another - * mptcp socket in a different netns, i.e. entry is outdated. - */ - if (!net_eq(sock_net((struct sock *)msk), net)) - goto err_put; - subflow_req->remote_nonce = e->remote_nonce; subflow_req->local_nonce = e->local_nonce; subflow_req->backup = e->backup; @@ -128,11 +122,6 @@ bool mptcp_token_join_cookie_init_state(struct mptcp_subflow_request_sock *subfl subflow_req->msk = msk; spin_unlock_bh(&join_entry_locks[i]); return true; - -err_put: - spin_unlock_bh(&join_entry_locks[i]); - sock_put((struct sock *)msk); - return false; } void __init mptcp_join_cookie_init(void) diff --git a/net/mptcp/token.c b/net/mptcp/token.c index 0691a4883f3ab13d24992602b9645cd1ab0f3616..f0d656bf27ada007a6f7306d88f0870fc337e7a0 100644 --- a/net/mptcp/token.c +++ b/net/mptcp/token.c @@ -232,6 +232,7 @@ found: /** * mptcp_token_get_sock - retrieve mptcp connection sock using its token + * @net: restrict to this namespace * @token: token of the mptcp connection to retrieve * * This function returns the mptcp connection structure with the given token. @@ -239,7 +240,7 @@ found: * * returns NULL if no connection with the given token value exists. */ -struct mptcp_sock *mptcp_token_get_sock(u32 token) +struct mptcp_sock *mptcp_token_get_sock(struct net *net, u32 token) { struct hlist_nulls_node *pos; struct token_bucket *bucket; @@ -252,11 +253,15 @@ struct mptcp_sock *mptcp_token_get_sock(u32 token) again: sk_nulls_for_each_rcu(sk, pos, &bucket->msk_chain) { msk = mptcp_sk(sk); - if (READ_ONCE(msk->token) != token) + if (READ_ONCE(msk->token) != token || + !net_eq(sock_net(sk), net)) continue; + if (!refcount_inc_not_zero(&sk->sk_refcnt)) goto not_found; - if (READ_ONCE(msk->token) != token) { + + if (READ_ONCE(msk->token) != token || + !net_eq(sock_net(sk), net)) { sock_put(sk); goto again; } diff --git a/net/mptcp/token_test.c b/net/mptcp/token_test.c index e1bd6f0a0676fa1f6ecec42dc9e24f97247b33fe..5d984bec1cd865b78ee19adfe248bac57dc1d24e 100644 --- a/net/mptcp/token_test.c +++ b/net/mptcp/token_test.c @@ -11,6 +11,7 @@ static struct mptcp_subflow_request_sock *build_req_sock(struct kunit *test) GFP_USER); KUNIT_EXPECT_NOT_ERR_OR_NULL(test, req); mptcp_token_init_request((struct request_sock *)req); + sock_net_set((struct sock *)req, &init_net); return req; } @@ -22,7 +23,7 @@ static void mptcp_token_test_req_basic(struct kunit *test) KUNIT_ASSERT_EQ(test, 0, mptcp_token_new_request((struct request_sock *)req)); KUNIT_EXPECT_NE(test, 0, (int)req->token); - KUNIT_EXPECT_PTR_EQ(test, null_msk, mptcp_token_get_sock(req->token)); + KUNIT_EXPECT_PTR_EQ(test, null_msk, mptcp_token_get_sock(&init_net, req->token)); /* cleanup */ mptcp_token_destroy_request((struct request_sock *)req); @@ -55,6 +56,7 @@ static struct mptcp_sock *build_msk(struct kunit *test) msk = kunit_kzalloc(test, sizeof(struct mptcp_sock), GFP_USER); KUNIT_EXPECT_NOT_ERR_OR_NULL(test, msk); refcount_set(&((struct sock *)msk)->sk_refcnt, 1); + sock_net_set((struct sock *)msk, &init_net); return msk; } @@ -74,11 +76,11 @@ static void mptcp_token_test_msk_basic(struct kunit *test) mptcp_token_new_connect((struct sock *)icsk)); KUNIT_EXPECT_NE(test, 0, (int)ctx->token); KUNIT_EXPECT_EQ(test, ctx->token, msk->token); - KUNIT_EXPECT_PTR_EQ(test, msk, mptcp_token_get_sock(ctx->token)); + KUNIT_EXPECT_PTR_EQ(test, msk, mptcp_token_get_sock(&init_net, ctx->token)); KUNIT_EXPECT_EQ(test, 2, (int)refcount_read(&sk->sk_refcnt)); mptcp_token_destroy(msk); - KUNIT_EXPECT_PTR_EQ(test, null_msk, mptcp_token_get_sock(ctx->token)); + KUNIT_EXPECT_PTR_EQ(test, null_msk, mptcp_token_get_sock(&init_net, ctx->token)); } static void mptcp_token_test_accept(struct kunit *test) @@ -90,11 +92,11 @@ static void mptcp_token_test_accept(struct kunit *test) mptcp_token_new_request((struct request_sock *)req)); msk->token = req->token; mptcp_token_accept(req, msk); - KUNIT_EXPECT_PTR_EQ(test, msk, mptcp_token_get_sock(msk->token)); + KUNIT_EXPECT_PTR_EQ(test, msk, mptcp_token_get_sock(&init_net, msk->token)); /* this is now a no-op */ mptcp_token_destroy_request((struct request_sock *)req); - KUNIT_EXPECT_PTR_EQ(test, msk, mptcp_token_get_sock(msk->token)); + KUNIT_EXPECT_PTR_EQ(test, msk, mptcp_token_get_sock(&init_net, msk->token)); /* cleanup */ mptcp_token_destroy(msk); @@ -116,7 +118,7 @@ static void mptcp_token_test_destroyed(struct kunit *test) /* simulate race on removal */ refcount_set(&sk->sk_refcnt, 0); - KUNIT_EXPECT_PTR_EQ(test, null_msk, mptcp_token_get_sock(msk->token)); + KUNIT_EXPECT_PTR_EQ(test, null_msk, mptcp_token_get_sock(&init_net, msk->token)); /* cleanup */ mptcp_token_destroy(msk); diff --git a/net/ncsi/ncsi-cmd.c b/net/ncsi/ncsi-cmd.c index ba9ae482141b0f4be035085a6f9427cf581cf169..dda8b76b77988ab422afce64470e924a90124809 100644 --- a/net/ncsi/ncsi-cmd.c +++ b/net/ncsi/ncsi-cmd.c @@ -18,6 +18,8 @@ #include "internal.h" #include "ncsi-pkt.h" +static const int padding_bytes = 26; + u32 ncsi_calculate_checksum(unsigned char *data, int len) { u32 checksum = 0; @@ -213,12 +215,17 @@ static int ncsi_cmd_handler_oem(struct sk_buff *skb, { struct ncsi_cmd_oem_pkt *cmd; unsigned int len; + int payload; + /* NC-SI spec DSP_0222_1.2.0, section 8.2.2.2 + * requires payload to be padded with 0 to + * 32-bit boundary before the checksum field. + * Ensure the padding bytes are accounted for in + * skb allocation + */ + payload = ALIGN(nca->payload, 4); len = sizeof(struct ncsi_cmd_pkt_hdr) + 4; - if (nca->payload < 26) - len += 26; - else - len += nca->payload; + len += max(payload, padding_bytes); cmd = skb_put_zero(skb, len); memcpy(&cmd->mfr_id, nca->data, nca->payload); @@ -272,6 +279,7 @@ static struct ncsi_request *ncsi_alloc_command(struct ncsi_cmd_arg *nca) struct net_device *dev = nd->dev; int hlen = LL_RESERVED_SPACE(dev); int tlen = dev->needed_tailroom; + int payload; int len = hlen + tlen; struct sk_buff *skb; struct ncsi_request *nr; @@ -281,14 +289,14 @@ static struct ncsi_request *ncsi_alloc_command(struct ncsi_cmd_arg *nca) return NULL; /* NCSI command packet has 16-bytes header, payload, 4 bytes checksum. + * Payload needs padding so that the checksum field following payload is + * aligned to 32-bit boundary. * The packet needs padding if its payload is less than 26 bytes to * meet 64 bytes minimal ethernet frame length. */ len += sizeof(struct ncsi_cmd_pkt_hdr) + 4; - if (nca->payload < 26) - len += 26; - else - len += nca->payload; + payload = ALIGN(nca->payload, 4); + len += max(payload, padding_bytes); /* Allocate skb */ skb = alloc_skb(len, GFP_ATOMIC); diff --git a/net/ncsi/ncsi-netlink.c b/net/ncsi/ncsi-netlink.c index bb5f1650f11cb3a5e1c2c714603ab3468a759992..c189b4c8a1823e328367c2c98ff9caf5be69286b 100644 --- a/net/ncsi/ncsi-netlink.c +++ b/net/ncsi/ncsi-netlink.c @@ -112,7 +112,11 @@ static int ncsi_write_package_info(struct sk_buff *skb, pnest = nla_nest_start_noflag(skb, NCSI_PKG_ATTR); if (!pnest) return -ENOMEM; - nla_put_u32(skb, NCSI_PKG_ATTR_ID, np->id); + rc = nla_put_u32(skb, NCSI_PKG_ATTR_ID, np->id); + if (rc) { + nla_nest_cancel(skb, pnest); + return rc; + } if ((0x1 << np->id) == ndp->package_whitelist) nla_put_flag(skb, NCSI_PKG_ATTR_FORCED); cnest = nla_nest_start_noflag(skb, NCSI_PKG_ATTR_CHANNEL_LIST); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 15ccd8462b8df303fc18e784712dc79813c8355e..56c53318a10750783c3c4af883af830c39d5cb87 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -94,7 +94,7 @@ config NF_CONNTRACK_MARK config NF_CONNTRACK_SECMARK bool 'Connection tracking security mark support' depends on NETWORK_SECMARK - default m if NETFILTER_ADVANCED=n + default y if NETFILTER_ADVANCED=n help This option enables security markings to be applied to connections. Typically they are copied to connections from diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 63d032191e6269acff9e48b51b6c0e09a411ceb1..60332fdb6dd4429fb1e22a645476b3954d9f04e3 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -406,14 +406,15 @@ static int __nf_register_net_hook(struct net *net, int pf, p = nf_entry_dereference(*pp); new_hooks = nf_hook_entries_grow(p, reg); - if (!IS_ERR(new_hooks)) + if (!IS_ERR(new_hooks)) { + hooks_validate(new_hooks); rcu_assign_pointer(*pp, new_hooks); + } mutex_unlock(&nf_hook_mutex); if (IS_ERR(new_hooks)) return PTR_ERR(new_hooks); - hooks_validate(new_hooks); #ifdef CONFIG_NETFILTER_INGRESS if (nf_ingress_hook(reg, pf)) net_inc_ingress_queue(); diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 7cd1d31fb2b88492201b5cffec58b4ec7a19ec4a..b0670388da49a1f4d25106450466e76fbc874d20 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -132,11 +132,11 @@ htable_size(u8 hbits) { size_t hsize; - /* We must fit both into u32 in jhash and size_t */ + /* We must fit both into u32 in jhash and INT_MAX in kvmalloc_node() */ if (hbits > 31) return 0; hsize = jhash_size(hbits); - if ((((size_t)-1) - sizeof(struct htable)) / sizeof(struct hbucket *) + if ((INT_MAX - sizeof(struct htable)) / sizeof(struct hbucket *) < hsize) return 0; diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index c100c6b112c81ee5cb0eccda5421372756c305ac..2c467c422dc6343a296ccfff9097069440fd66cd 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -1468,6 +1468,10 @@ int __init ip_vs_conn_init(void) int idx; /* Compute size and mask */ + if (ip_vs_conn_tab_bits < 8 || ip_vs_conn_tab_bits > 20) { + pr_info("conn_tab_bits not in [8, 20]. Using default value\n"); + ip_vs_conn_tab_bits = CONFIG_IP_VS_TAB_BITS; + } ip_vs_conn_tab_size = 1 << ip_vs_conn_tab_bits; ip_vs_conn_tab_mask = ip_vs_conn_tab_size - 1; diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index c0b8215ab3d47a6719cbb0d57cffb7b73326ecad..3a76da58d88bb68d39ea0f61a314310832a23cab 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1976,7 +1976,6 @@ ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int struct ip_vs_proto_data *pd; struct ip_vs_conn *cp; int ret, pkts; - int conn_reuse_mode; struct sock *sk; /* Already marked as IPVS request or reply? */ @@ -2053,15 +2052,16 @@ ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int cp = INDIRECT_CALL_1(pp->conn_in_get, ip_vs_conn_in_get_proto, ipvs, af, skb, &iph); - conn_reuse_mode = sysctl_conn_reuse_mode(ipvs); - if (conn_reuse_mode && !iph.fragoffs && is_new_conn(skb, &iph) && cp) { + if (!iph.fragoffs && is_new_conn(skb, &iph) && cp) { + int conn_reuse_mode = sysctl_conn_reuse_mode(ipvs); bool old_ct = false, resched = false; if (unlikely(sysctl_expire_nodest_conn(ipvs)) && cp->dest && unlikely(!atomic_read(&cp->dest->weight))) { resched = true; old_ct = ip_vs_conn_uses_old_conntrack(cp, skb); - } else if (is_new_conn_expected(cp, conn_reuse_mode)) { + } else if (conn_reuse_mode && + is_new_conn_expected(cp, conn_reuse_mode)) { old_ct = ip_vs_conn_uses_old_conntrack(cp, skb); if (!atomic_read(&cp->n_control)) { resched = true; diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index c25097092a060bf579edec4ac9638edb4f6fd569..29ec3ef63edc7a6b59b37bd0eb5e95b25e32bf57 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -4090,6 +4090,11 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs) tbl[idx++].data = &ipvs->sysctl_conn_reuse_mode; tbl[idx++].data = &ipvs->sysctl_schedule_icmp; tbl[idx++].data = &ipvs->sysctl_ignore_tunneled; +#ifdef CONFIG_IP_VS_DEBUG + /* Global sysctls must be ro in non-init netns */ + if (!net_eq(net, &init_net)) + tbl[idx++].mode = 0444; +#endif ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl); if (ipvs->sysctl_hdr == NULL) { diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index deaf24904fa270e9e393f3ce9cf82b913283c6d9..9b88e4c04af1e904ae3fcb4fee61db71ae88778d 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -76,6 +76,9 @@ static __read_mostly struct kmem_cache *nf_conntrack_cachep; static DEFINE_SPINLOCK(nf_conntrack_locks_all_lock); static __read_mostly bool nf_conntrack_locks_all; +/* serialize hash resizes and nf_ct_iterate_cleanup */ +static DEFINE_MUTEX(nf_conntrack_mutex); + #define GC_SCAN_INTERVAL (120u * HZ) #define GC_SCAN_MAX_DURATION msecs_to_jiffies(10) @@ -658,7 +661,7 @@ bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report) tstamp = nf_conn_tstamp_find(ct); if (tstamp) { - s32 timeout = ct->timeout - nfct_time_stamp; + s32 timeout = READ_ONCE(ct->timeout) - nfct_time_stamp; tstamp->stop = ktime_get_real_ns(); if (timeout < 0) @@ -978,7 +981,7 @@ static int nf_ct_resolve_clash_harder(struct sk_buff *skb, u32 repl_idx) } /* We want the clashing entry to go away real soon: 1 second timeout. */ - loser_ct->timeout = nfct_time_stamp + HZ; + WRITE_ONCE(loser_ct->timeout, nfct_time_stamp + HZ); /* IPS_NAT_CLASH removes the entry automatically on the first * reply. Also prevents UDP tracker from moving the entry to @@ -1485,7 +1488,7 @@ __nf_conntrack_alloc(struct net *net, /* save hash for reusing when confirming */ *(unsigned long *)(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev) = hash; ct->status = 0; - ct->timeout = 0; + WRITE_ONCE(ct->timeout, 0); write_pnet(&ct->ct_net, net); memset(&ct->__nfct_init_offset, 0, offsetof(struct nf_conn, proto) - @@ -1833,15 +1836,17 @@ repeat: pr_debug("nf_conntrack_in: Can't track with proto module\n"); nf_conntrack_put(&ct->ct_general); skb->_nfct = 0; - NF_CT_STAT_INC_ATOMIC(state->net, invalid); - if (ret == -NF_DROP) - NF_CT_STAT_INC_ATOMIC(state->net, drop); /* Special case: TCP tracker reports an attempt to reopen a * closed/aborted connection. We have to go back and create a * fresh conntrack. */ if (ret == -NF_REPEAT) goto repeat; + + NF_CT_STAT_INC_ATOMIC(state->net, invalid); + if (ret == -NF_DROP) + NF_CT_STAT_INC_ATOMIC(state->net, drop); + ret = -ret; goto out; } @@ -2177,28 +2182,31 @@ get_next_corpse(int (*iter)(struct nf_conn *i, void *data), spinlock_t *lockp; for (; *bucket < nf_conntrack_htable_size; (*bucket)++) { + struct hlist_nulls_head *hslot = &nf_conntrack_hash[*bucket]; + + if (hlist_nulls_empty(hslot)) + continue; + lockp = &nf_conntrack_locks[*bucket % CONNTRACK_LOCKS]; local_bh_disable(); nf_conntrack_lock(lockp); - if (*bucket < nf_conntrack_htable_size) { - hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[*bucket], hnnode) { - if (NF_CT_DIRECTION(h) != IP_CT_DIR_REPLY) - continue; - /* All nf_conn objects are added to hash table twice, one - * for original direction tuple, once for the reply tuple. - * - * Exception: In the IPS_NAT_CLASH case, only the reply - * tuple is added (the original tuple already existed for - * a different object). - * - * We only need to call the iterator once for each - * conntrack, so we just use the 'reply' direction - * tuple while iterating. - */ - ct = nf_ct_tuplehash_to_ctrack(h); - if (iter(ct, data)) - goto found; - } + hlist_nulls_for_each_entry(h, n, hslot, hnnode) { + if (NF_CT_DIRECTION(h) != IP_CT_DIR_REPLY) + continue; + /* All nf_conn objects are added to hash table twice, one + * for original direction tuple, once for the reply tuple. + * + * Exception: In the IPS_NAT_CLASH case, only the reply + * tuple is added (the original tuple already existed for + * a different object). + * + * We only need to call the iterator once for each + * conntrack, so we just use the 'reply' direction + * tuple while iterating. + */ + ct = nf_ct_tuplehash_to_ctrack(h); + if (iter(ct, data)) + goto found; } spin_unlock(lockp); local_bh_enable(); @@ -2216,26 +2224,20 @@ found: static void nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data), void *data, u32 portid, int report) { - unsigned int bucket = 0, sequence; + unsigned int bucket = 0; struct nf_conn *ct; might_sleep(); - for (;;) { - sequence = read_seqcount_begin(&nf_conntrack_generation); - - while ((ct = get_next_corpse(iter, data, &bucket)) != NULL) { - /* Time to push up daises... */ + mutex_lock(&nf_conntrack_mutex); + while ((ct = get_next_corpse(iter, data, &bucket)) != NULL) { + /* Time to push up daises... */ - nf_ct_delete(ct, portid, report); - nf_ct_put(ct); - cond_resched(); - } - - if (!read_seqcount_retry(&nf_conntrack_generation, sequence)) - break; - bucket = 0; + nf_ct_delete(ct, portid, report); + nf_ct_put(ct); + cond_resched(); } + mutex_unlock(&nf_conntrack_mutex); } struct iter_data { @@ -2465,8 +2467,10 @@ int nf_conntrack_hash_resize(unsigned int hashsize) if (!hash) return -ENOMEM; + mutex_lock(&nf_conntrack_mutex); old_size = nf_conntrack_htable_size; if (old_size == hashsize) { + mutex_unlock(&nf_conntrack_mutex); kvfree(hash); return 0; } @@ -2502,6 +2506,8 @@ int nf_conntrack_hash_resize(unsigned int hashsize) nf_conntrack_all_unlock(); local_bh_enable(); + mutex_unlock(&nf_conntrack_mutex); + synchronize_net(); kvfree(old_hash); return 0; diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index cb4cfa4f61a8d372674173ad6fe09a09a3670a02..eeeaa34b3e7b5d5cecaf24554fa8e51641a89abe 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -973,11 +973,9 @@ ctnetlink_alloc_filter(const struct nlattr * const cda[], u8 family) CTA_TUPLE_REPLY, filter->family, &filter->zone, - filter->orig_flags); - if (err < 0) { - err = -EINVAL; + filter->reply_flags); + if (err < 0) goto err_filter; - } } return filter; @@ -1973,7 +1971,7 @@ static int ctnetlink_change_timeout(struct nf_conn *ct, if (timeout > INT_MAX) timeout = INT_MAX; - ct->timeout = nfct_time_stamp + (u32)timeout; + WRITE_ONCE(ct->timeout, nfct_time_stamp + (u32)timeout); if (test_bit(IPS_DYING_BIT, &ct->status)) return -ETIME; @@ -2285,7 +2283,8 @@ ctnetlink_create_conntrack(struct net *net, if (helper->from_nlattr) helper->from_nlattr(helpinfo, ct); - /* not in hash table yet so not strictly necessary */ + /* disable helper auto-assignment for this entry */ + ct->status |= IPS_HELPER; RCU_INIT_POINTER(help->helper, helper); } } else { diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 810cca24b399019862f185a65d064aa56aa8a21c..7626f3e1c70a774968676d1111ee6376284afc17 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -489,6 +489,15 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct, pr_debug("Setting vtag %x for dir %d\n", ih->init_tag, !dir); ct->proto.sctp.vtag[!dir] = ih->init_tag; + + /* don't renew timeout on init retransmit so + * port reuse by client or NAT middlebox cannot + * keep entry alive indefinitely (incl. nat info). + */ + if (new_state == SCTP_CONNTRACK_CLOSED && + old_state == SCTP_CONNTRACK_CLOSED && + nf_ct_is_confirmed(ct)) + ignore = true; } ct->proto.sctp.state = new_state; diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index af402f458ee020da5946327a23ac6b5f68d7b5a0..0528e9c26cebd56acc53c9989ff03cd8f4b71425 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -105,10 +105,13 @@ int nf_conntrack_udp_packet(struct nf_conn *ct, */ if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { unsigned long extra = timeouts[UDP_CT_UNREPLIED]; + bool stream = false; /* Still active after two seconds? Extend timeout. */ - if (time_after(jiffies, ct->proto.udp.stream_ts)) + if (time_after(jiffies, ct->proto.udp.stream_ts)) { extra = timeouts[UDP_CT_REPLIED]; + stream = true; + } nf_ct_refresh_acct(ct, ctinfo, skb, extra); @@ -117,7 +120,7 @@ int nf_conntrack_udp_packet(struct nf_conn *ct, return NF_ACCEPT; /* Also, more likely to be important, and not a probe */ - if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status)) + if (stream && !test_and_set_bit(IPS_ASSURED_BIT, &ct->status)) nf_conntrack_event_cache(IPCT_ASSURED, ct); } else { nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[UDP_CT_UNREPLIED]); diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index f4029fc2c88469136271b45181a0fb42427c42f9..d091d51b5e19fef212a08d22d7aaeb04ce5edcad 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -151,8 +151,8 @@ static void flow_offload_fixup_ct_timeout(struct nf_conn *ct) else return; - if (nf_flow_timeout_delta(ct->timeout) > (__s32)timeout) - ct->timeout = nfct_time_stamp + timeout; + if (nf_flow_timeout_delta(READ_ONCE(ct->timeout)) > (__s32)timeout) + WRITE_ONCE(ct->timeout, nfct_time_stamp + timeout); } static void flow_offload_fixup_ct_state(struct nf_conn *ct) diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c index a6b654b028dd43906c17325f7cb814d527d6bd18..d1862782be450307f5a392e7d22d688393031a0d 100644 --- a/net/netfilter/nf_flow_table_offload.c +++ b/net/netfilter/nf_flow_table_offload.c @@ -63,11 +63,11 @@ static void nf_flow_rule_lwt_match(struct nf_flow_match *match, sizeof(struct in6_addr)); if (memcmp(&key->enc_ipv6.src, &in6addr_any, sizeof(struct in6_addr))) - memset(&key->enc_ipv6.src, 0xff, + memset(&mask->enc_ipv6.src, 0xff, sizeof(struct in6_addr)); if (memcmp(&key->enc_ipv6.dst, &in6addr_any, sizeof(struct in6_addr))) - memset(&key->enc_ipv6.dst, 0xff, + memset(&mask->enc_ipv6.dst, 0xff, sizeof(struct in6_addr)); enc_keys |= BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS); key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; diff --git a/net/netfilter/nf_nat_masquerade.c b/net/netfilter/nf_nat_masquerade.c index 8e8a65d46345b292c8fcac2fc7fcdfd1be1c5557..acd73f717a0883d791fc351851a98bac4144705f 100644 --- a/net/netfilter/nf_nat_masquerade.c +++ b/net/netfilter/nf_nat_masquerade.c @@ -9,8 +9,19 @@ #include +struct masq_dev_work { + struct work_struct work; + struct net *net; + union nf_inet_addr addr; + int ifindex; + int (*iter)(struct nf_conn *i, void *data); +}; + +#define MAX_MASQ_WORKER_COUNT 16 + static DEFINE_MUTEX(masq_mutex); static unsigned int masq_refcnt __read_mostly; +static atomic_t masq_worker_count __read_mostly; unsigned int nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum, @@ -63,13 +74,71 @@ nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum, } EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4); -static int device_cmp(struct nf_conn *i, void *ifindex) +static void iterate_cleanup_work(struct work_struct *work) +{ + struct masq_dev_work *w; + + w = container_of(work, struct masq_dev_work, work); + + nf_ct_iterate_cleanup_net(w->net, w->iter, (void *)w, 0, 0); + + put_net(w->net); + kfree(w); + atomic_dec(&masq_worker_count); + module_put(THIS_MODULE); +} + +/* Iterate conntrack table in the background and remove conntrack entries + * that use the device/address being removed. + * + * In case too many work items have been queued already or memory allocation + * fails iteration is skipped, conntrack entries will time out eventually. + */ +static void nf_nat_masq_schedule(struct net *net, union nf_inet_addr *addr, + int ifindex, + int (*iter)(struct nf_conn *i, void *data), + gfp_t gfp_flags) +{ + struct masq_dev_work *w; + + if (atomic_read(&masq_worker_count) > MAX_MASQ_WORKER_COUNT) + return; + + net = maybe_get_net(net); + if (!net) + return; + + if (!try_module_get(THIS_MODULE)) + goto err_module; + + w = kzalloc(sizeof(*w), gfp_flags); + if (w) { + /* We can overshoot MAX_MASQ_WORKER_COUNT, no big deal */ + atomic_inc(&masq_worker_count); + + INIT_WORK(&w->work, iterate_cleanup_work); + w->ifindex = ifindex; + w->net = net; + w->iter = iter; + if (addr) + w->addr = *addr; + schedule_work(&w->work); + return; + } + + module_put(THIS_MODULE); + err_module: + put_net(net); +} + +static int device_cmp(struct nf_conn *i, void *arg) { const struct nf_conn_nat *nat = nfct_nat(i); + const struct masq_dev_work *w = arg; if (!nat) return 0; - return nat->masq_index == (int)(long)ifindex; + return nat->masq_index == w->ifindex; } static int masq_device_event(struct notifier_block *this, @@ -85,8 +154,8 @@ static int masq_device_event(struct notifier_block *this, * and forget them. */ - nf_ct_iterate_cleanup_net(net, device_cmp, - (void *)(long)dev->ifindex, 0, 0); + nf_nat_masq_schedule(net, NULL, dev->ifindex, + device_cmp, GFP_KERNEL); } return NOTIFY_DONE; @@ -94,35 +163,45 @@ static int masq_device_event(struct notifier_block *this, static int inet_cmp(struct nf_conn *ct, void *ptr) { - struct in_ifaddr *ifa = (struct in_ifaddr *)ptr; - struct net_device *dev = ifa->ifa_dev->dev; struct nf_conntrack_tuple *tuple; + struct masq_dev_work *w = ptr; - if (!device_cmp(ct, (void *)(long)dev->ifindex)) + if (!device_cmp(ct, ptr)) return 0; tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple; - return ifa->ifa_address == tuple->dst.u3.ip; + return nf_inet_addr_cmp(&w->addr, &tuple->dst.u3); } static int masq_inet_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct in_device *idev = ((struct in_ifaddr *)ptr)->ifa_dev; - struct net *net = dev_net(idev->dev); + const struct in_ifaddr *ifa = ptr; + const struct in_device *idev; + const struct net_device *dev; + union nf_inet_addr addr; + + if (event != NETDEV_DOWN) + return NOTIFY_DONE; /* The masq_dev_notifier will catch the case of the device going * down. So if the inetdev is dead and being destroyed we have * no work to do. Otherwise this is an individual address removal * and we have to perform the flush. */ + idev = ifa->ifa_dev; if (idev->dead) return NOTIFY_DONE; - if (event == NETDEV_DOWN) - nf_ct_iterate_cleanup_net(net, inet_cmp, ptr, 0, 0); + memset(&addr, 0, sizeof(addr)); + + addr.ip = ifa->ifa_address; + + dev = idev->dev; + nf_nat_masq_schedule(dev_net(idev->dev), &addr, dev->ifindex, + inet_cmp, GFP_KERNEL); return NOTIFY_DONE; } @@ -136,8 +215,6 @@ static struct notifier_block masq_inet_notifier = { }; #if IS_ENABLED(CONFIG_IPV6) -static atomic_t v6_worker_count __read_mostly; - static int nat_ipv6_dev_get_saddr(struct net *net, const struct net_device *dev, const struct in6_addr *daddr, unsigned int srcprefs, @@ -187,40 +264,6 @@ nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range, } EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6); -struct masq_dev_work { - struct work_struct work; - struct net *net; - struct in6_addr addr; - int ifindex; -}; - -static int inet6_cmp(struct nf_conn *ct, void *work) -{ - struct masq_dev_work *w = (struct masq_dev_work *)work; - struct nf_conntrack_tuple *tuple; - - if (!device_cmp(ct, (void *)(long)w->ifindex)) - return 0; - - tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple; - - return ipv6_addr_equal(&w->addr, &tuple->dst.u3.in6); -} - -static void iterate_cleanup_work(struct work_struct *work) -{ - struct masq_dev_work *w; - - w = container_of(work, struct masq_dev_work, work); - - nf_ct_iterate_cleanup_net(w->net, inet6_cmp, (void *)w, 0, 0); - - put_net(w->net); - kfree(w); - atomic_dec(&v6_worker_count); - module_put(THIS_MODULE); -} - /* atomic notifier; can't call nf_ct_iterate_cleanup_net (it can sleep). * * Defer it to the system workqueue. @@ -233,36 +276,19 @@ static int masq_inet6_event(struct notifier_block *this, { struct inet6_ifaddr *ifa = ptr; const struct net_device *dev; - struct masq_dev_work *w; - struct net *net; + union nf_inet_addr addr; - if (event != NETDEV_DOWN || atomic_read(&v6_worker_count) >= 16) + if (event != NETDEV_DOWN) return NOTIFY_DONE; dev = ifa->idev->dev; - net = maybe_get_net(dev_net(dev)); - if (!net) - return NOTIFY_DONE; - if (!try_module_get(THIS_MODULE)) - goto err_module; + memset(&addr, 0, sizeof(addr)); - w = kmalloc(sizeof(*w), GFP_ATOMIC); - if (w) { - atomic_inc(&v6_worker_count); - - INIT_WORK(&w->work, iterate_cleanup_work); - w->ifindex = dev->ifindex; - w->net = net; - w->addr = ifa->addr; - schedule_work(&w->work); + addr.in6 = ifa->addr; - return NOTIFY_DONE; - } - - module_put(THIS_MODULE); - err_module: - put_net(net); + nf_nat_masq_schedule(dev_net(dev), &addr, dev->ifindex, inet_cmp, + GFP_ATOMIC); return NOTIFY_DONE; } diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index bbd1209694b89bb1164c39ce581f54b24f61e8d4..bb8607ff94bc7a46751724d530efc58d325cdde0 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -46,6 +46,15 @@ void nf_unregister_queue_handler(struct net *net) } EXPORT_SYMBOL(nf_unregister_queue_handler); +static void nf_queue_sock_put(struct sock *sk) +{ +#ifdef CONFIG_INET + sock_gen_put(sk); +#else + sock_put(sk); +#endif +} + static void nf_queue_entry_release_refs(struct nf_queue_entry *entry) { struct nf_hook_state *state = &entry->state; @@ -56,7 +65,7 @@ static void nf_queue_entry_release_refs(struct nf_queue_entry *entry) if (state->out) dev_put(state->out); if (state->sk) - sock_put(state->sk); + nf_queue_sock_put(state->sk); #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) if (entry->physin) @@ -91,16 +100,17 @@ static void __nf_queue_entry_init_physdevs(struct nf_queue_entry *entry) } /* Bump dev refs so they don't vanish while packet is out */ -void nf_queue_entry_get_refs(struct nf_queue_entry *entry) +bool nf_queue_entry_get_refs(struct nf_queue_entry *entry) { struct nf_hook_state *state = &entry->state; + if (state->sk && !refcount_inc_not_zero(&state->sk->sk_refcnt)) + return false; + if (state->in) dev_hold(state->in); if (state->out) dev_hold(state->out); - if (state->sk) - sock_hold(state->sk); #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) if (entry->physin) @@ -108,6 +118,7 @@ void nf_queue_entry_get_refs(struct nf_queue_entry *entry) if (entry->physout) dev_hold(entry->physout); #endif + return true; } EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs); @@ -178,6 +189,18 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state, break; } + if (skb_sk_is_prefetched(skb)) { + struct sock *sk = skb->sk; + + if (!sk_is_refcounted(sk)) { + if (!refcount_inc_not_zero(&sk->sk_refcnt)) + return -ENOTCONN; + + /* drop refcount on skb_orphan */ + skb->destructor = sock_edemux; + } + } + entry = kmalloc(sizeof(*entry) + route_key_size, GFP_ATOMIC); if (!entry) return -ENOMEM; @@ -196,7 +219,10 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state, __nf_queue_entry_init_physdevs(entry); - nf_queue_entry_get_refs(entry); + if (!nf_queue_entry_get_refs(entry)) { + kfree(entry); + return -ENOTCONN; + } switch (entry->state.pf) { case AF_INET: diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index c605a3e713e7612c63a30dd0defb1604146cc145..fdd1da9ecea9e1e4757d52619ab55c27848df8a6 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -4265,7 +4265,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, if (ops->privsize != NULL) size = ops->privsize(nla, &desc); alloc_size = sizeof(*set) + size + udlen; - if (alloc_size < size) + if (alloc_size < size || alloc_size > INT_MAX) return -ENOMEM; set = kvzalloc(alloc_size, GFP_KERNEL); if (!set) @@ -5924,12 +5924,15 @@ static int nf_tables_updobj(const struct nft_ctx *ctx, { struct nft_object *newobj; struct nft_trans *trans; - int err; + int err = -ENOMEM; + + if (!try_module_get(type->owner)) + return -ENOENT; trans = nft_trans_alloc(ctx, NFT_MSG_NEWOBJ, sizeof(struct nft_trans_obj)); if (!trans) - return -ENOMEM; + goto err_trans; newobj = nft_obj_init(ctx, type, attr); if (IS_ERR(newobj)) { @@ -5946,6 +5949,8 @@ static int nf_tables_updobj(const struct nft_ctx *ctx, err_free_trans: kfree(trans); +err_trans: + module_put(type->owner); return err; } @@ -7555,7 +7560,7 @@ static void nft_obj_commit_update(struct nft_trans *trans) if (obj->ops->update) obj->ops->update(obj, newobj); - kfree(newobj); + nft_obj_destroy(&trans->ctx, newobj); } static void nft_commit_release(struct nft_trans *trans) @@ -8202,7 +8207,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) break; case NFT_MSG_NEWOBJ: if (nft_trans_obj_update(trans)) { - kfree(nft_trans_obj_newobj(trans)); + nft_obj_destroy(&trans->ctx, nft_trans_obj_newobj(trans)); nft_trans_destroy(trans); } else { trans->ctx.table->use--; diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c index e5fcbb0e4b8e5041bfbf0cc08efbabdcb4c81b63..839fd09f1bb4a7e43eb5539b952032be6eaccc84 100644 --- a/net/netfilter/nf_tables_offload.c +++ b/net/netfilter/nf_tables_offload.c @@ -94,7 +94,8 @@ struct nft_flow_rule *nft_flow_rule_create(struct net *net, expr = nft_expr_first(rule); while (nft_expr_more(rule, expr)) { - if (expr->ops->offload_flags & NFT_OFFLOAD_F_ACTION) + if (expr->ops->offload_action && + expr->ops->offload_action(expr)) num_actions++; expr = nft_expr_next(expr); diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index b35e8d9a5b37ec14cc6eb39e55d0c1ade1791da8..33c13edbca4bbf9e9406bf3bc86b80d0bdf8ed9e 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -557,7 +557,8 @@ __build_packet_message(struct nfnl_log_net *log, goto nla_put_failure; if (indev && skb->dev && - skb->mac_header != skb->network_header) { + skb_mac_header_was_set(skb) && + skb_mac_header_len(skb) != 0) { struct nfulnl_msg_packet_hw phw; int len; diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index d1d8bca03b4f0b9048c7f01a4c16d59247021d09..1640da5c50776189b8cfc354eb5e6d09ccc50363 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -562,7 +562,8 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, goto nla_put_failure; if (indev && entskb->dev && - entskb->mac_header != entskb->network_header) { + skb_mac_header_was_set(entskb) && + skb_mac_header_len(entskb) != 0) { struct nfqnl_msg_packet_hw phw; int len; @@ -711,9 +712,15 @@ static struct nf_queue_entry * nf_queue_entry_dup(struct nf_queue_entry *e) { struct nf_queue_entry *entry = kmemdup(e, e->size, GFP_ATOMIC); - if (entry) - nf_queue_entry_get_refs(entry); - return entry; + + if (!entry) + return NULL; + + if (nf_queue_entry_get_refs(entry)) + return entry; + + kfree(entry); + return NULL; } #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) diff --git a/net/netfilter/nft_dup_netdev.c b/net/netfilter/nft_dup_netdev.c index 40788b3f1071a7e82f1dddff270d808e8edf40cb..70c457476b87407cb50fadeff7f3e45835c68daa 100644 --- a/net/netfilter/nft_dup_netdev.c +++ b/net/netfilter/nft_dup_netdev.c @@ -67,6 +67,11 @@ static int nft_dup_netdev_offload(struct nft_offload_ctx *ctx, return nft_fwd_dup_netdev_offload(ctx, flow, FLOW_ACTION_MIRRED, oif); } +static bool nft_dup_netdev_offload_action(const struct nft_expr *expr) +{ + return true; +} + static struct nft_expr_type nft_dup_netdev_type; static const struct nft_expr_ops nft_dup_netdev_ops = { .type = &nft_dup_netdev_type, @@ -75,6 +80,7 @@ static const struct nft_expr_ops nft_dup_netdev_ops = { .init = nft_dup_netdev_init, .dump = nft_dup_netdev_dump, .offload = nft_dup_netdev_offload, + .offload_action = nft_dup_netdev_offload_action, }; static struct nft_expr_type nft_dup_netdev_type __read_mostly = { diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index 5c84a968dae2922ec21d625d179e0fe196e34a85..58904bee1a0df4ff621bd41f1b02ba0d7d81c907 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -141,17 +141,8 @@ static int nft_dynset_init(const struct nft_ctx *ctx, return -EBUSY; priv->op = ntohl(nla_get_be32(tb[NFTA_DYNSET_OP])); - switch (priv->op) { - case NFT_DYNSET_OP_ADD: - case NFT_DYNSET_OP_DELETE: - break; - case NFT_DYNSET_OP_UPDATE: - if (!(set->flags & NFT_SET_TIMEOUT)) - return -EOPNOTSUPP; - break; - default: + if (priv->op > NFT_DYNSET_OP_DELETE) return -EOPNOTSUPP; - } timeout = 0; if (tb[NFTA_DYNSET_TIMEOUT] != NULL) { diff --git a/net/netfilter/nft_fwd_netdev.c b/net/netfilter/nft_fwd_netdev.c index b77985986b24ea6fd2f530bcc3e4befde45e3b99..3b0dcd170551be91d4e45a412d13de6de585d19e 100644 --- a/net/netfilter/nft_fwd_netdev.c +++ b/net/netfilter/nft_fwd_netdev.c @@ -77,6 +77,11 @@ static int nft_fwd_netdev_offload(struct nft_offload_ctx *ctx, return nft_fwd_dup_netdev_offload(ctx, flow, FLOW_ACTION_REDIRECT, oif); } +static bool nft_fwd_netdev_offload_action(const struct nft_expr *expr) +{ + return true; +} + struct nft_fwd_neigh { enum nft_registers sreg_dev:8; enum nft_registers sreg_addr:8; @@ -219,6 +224,7 @@ static const struct nft_expr_ops nft_fwd_netdev_ops = { .dump = nft_fwd_netdev_dump, .validate = nft_fwd_validate, .offload = nft_fwd_netdev_offload, + .offload_action = nft_fwd_netdev_offload_action, }; static const struct nft_expr_ops * diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c index c63eb3b171784bcf82ea32ed676a657bd9177472..5c9d88560a474de749c392542755520ad2f002f2 100644 --- a/net/netfilter/nft_immediate.c +++ b/net/netfilter/nft_immediate.c @@ -213,6 +213,16 @@ static int nft_immediate_offload(struct nft_offload_ctx *ctx, return 0; } +static bool nft_immediate_offload_action(const struct nft_expr *expr) +{ + const struct nft_immediate_expr *priv = nft_expr_priv(expr); + + if (priv->dreg == NFT_REG_VERDICT) + return true; + + return false; +} + static const struct nft_expr_ops nft_imm_ops = { .type = &nft_imm_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_immediate_expr)), @@ -224,7 +234,7 @@ static const struct nft_expr_ops nft_imm_ops = { .dump = nft_immediate_dump, .validate = nft_immediate_validate, .offload = nft_immediate_offload, - .offload_flags = NFT_OFFLOAD_F_ACTION, + .offload_action = nft_immediate_offload_action, }; struct nft_expr_type nft_imm_type __read_mostly = { diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index 1ebee25de67721934531095848d358c9895f0620..6a8495bd08bb2585af8cf0ff7defbe3cfd6cceae 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -502,6 +502,9 @@ static int nft_payload_l4csum_offset(const struct nft_pktinfo *pkt, struct sk_buff *skb, unsigned int *l4csum_offset) { + if (pkt->xt.fragoff) + return -1; + switch (pkt->tprot) { case IPPROTO_TCP: *l4csum_offset = offsetof(struct tcphdr, check); diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 2d73f265b12c9bbbe2b2b12b117333729ae073a2..f67c4436c5d31f38fd59310a744437db7ff981f3 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -1290,6 +1290,11 @@ static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old) if (!new->scratch_aligned) goto out_scratch; #endif + for_each_possible_cpu(i) + *per_cpu_ptr(new->scratch, i) = NULL; + + if (pipapo_realloc_scratch(new, old->bsize_max)) + goto out_scratch_realloc; rcu_head_init(&new->rcu); @@ -1334,6 +1339,9 @@ out_lt: kvfree(dst->lt); dst--; } +out_scratch_realloc: + for_each_possible_cpu(i) + kfree(*per_cpu_ptr(new->scratch, i)); #ifdef NFT_PIPAPO_ALIGN free_percpu(new->scratch_aligned); #endif diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c index eabdb8d552eef95329fbcc792d20f42ca01449ba..10332178da8c5048f8b925b2a8ea1f04660f673e 100644 --- a/net/netfilter/nft_set_pipapo_avx2.c +++ b/net/netfilter/nft_set_pipapo_avx2.c @@ -887,7 +887,7 @@ static int nft_pipapo_avx2_lookup_8b_6(unsigned long *map, unsigned long *fill, NFT_PIPAPO_AVX2_BUCKET_LOAD8(4, lt, 4, pkt[4], bsize); NFT_PIPAPO_AVX2_AND(5, 0, 1); - NFT_PIPAPO_AVX2_BUCKET_LOAD8(6, lt, 6, pkt[5], bsize); + NFT_PIPAPO_AVX2_BUCKET_LOAD8(6, lt, 5, pkt[5], bsize); NFT_PIPAPO_AVX2_AND(7, 2, 3); /* Stall */ diff --git a/net/netfilter/nft_synproxy.c b/net/netfilter/nft_synproxy.c index 4fda8b3f176265b445e075fa51ddd0eb14c5ebe6..59c4dfaf2ea1fc97cade542fae9901fa89742217 100644 --- a/net/netfilter/nft_synproxy.c +++ b/net/netfilter/nft_synproxy.c @@ -191,8 +191,10 @@ static int nft_synproxy_do_init(const struct nft_ctx *ctx, if (err) goto nf_ct_failure; err = nf_synproxy_ipv6_init(snet, ctx->net); - if (err) + if (err) { + nf_synproxy_ipv4_fini(snet, ctx->net); goto nf_ct_failure; + } break; } diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 8434da3c0487a4ef1159c5c3f453f3d135c72051..e55af5c078ac096985c842484a8a81e42bffadda 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -586,7 +586,10 @@ static int netlink_insert(struct sock *sk, u32 portid) /* We need to ensure that the socket is hashed and visible. */ smp_wmb(); - nlk_sk(sk)->bound = portid; + /* Paired with lockless reads from netlink_bind(), + * netlink_connect() and netlink_sendmsg(). + */ + WRITE_ONCE(nlk_sk(sk)->bound, portid); err: release_sock(sk); @@ -1004,7 +1007,8 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, if (nlk->ngroups < BITS_PER_LONG) groups &= (1UL << nlk->ngroups) - 1; - bound = nlk->bound; + /* Paired with WRITE_ONCE() in netlink_insert() */ + bound = READ_ONCE(nlk->bound); if (bound) { /* Ensure nlk->portid is up-to-date. */ smp_rmb(); @@ -1090,8 +1094,9 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr, /* No need for barriers here as we return to user-space without * using any of the bound attributes. + * Paired with WRITE_ONCE() in netlink_insert(). */ - if (!nlk->bound) + if (!READ_ONCE(nlk->bound)) err = netlink_autobind(sock); if (err == 0) { @@ -1858,6 +1863,11 @@ static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) if (msg->msg_flags & MSG_OOB) return -EOPNOTSUPP; + if (len == 0) { + pr_warn_once("Zero length message leads to an empty skb\n"); + return -ENODATA; + } + err = scm_send(sock, msg, &scm, true); if (err < 0) return err; @@ -1880,7 +1890,8 @@ static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) dst_group = nlk->dst_group; } - if (!nlk->bound) { + /* Paired with WRITE_ONCE() in netlink_insert() */ + if (!READ_ONCE(nlk->bound)) { err = netlink_autobind(sock); if (err) goto out; diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 6d16e1ab1a8aba336f375ea0512e0ba86eff2876..e5c8a295e64066971869b83ece1daee60a7f2ddc 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -298,7 +298,7 @@ static int nr_setsockopt(struct socket *sock, int level, int optname, { struct sock *sk = sock->sk; struct nr_sock *nr = nr_sk(sk); - unsigned long opt; + unsigned int opt; if (level != SOL_NETROM) return -ENOPROTOOPT; @@ -306,18 +306,18 @@ static int nr_setsockopt(struct socket *sock, int level, int optname, if (optlen < sizeof(unsigned int)) return -EINVAL; - if (copy_from_sockptr(&opt, optval, sizeof(unsigned int))) + if (copy_from_sockptr(&opt, optval, sizeof(opt))) return -EFAULT; switch (optname) { case NETROM_T1: - if (opt < 1 || opt > ULONG_MAX / HZ) + if (opt < 1 || opt > UINT_MAX / HZ) return -EINVAL; nr->t1 = opt * HZ; return 0; case NETROM_T2: - if (opt < 1 || opt > ULONG_MAX / HZ) + if (opt < 1 || opt > UINT_MAX / HZ) return -EINVAL; nr->t2 = opt * HZ; return 0; @@ -329,13 +329,13 @@ static int nr_setsockopt(struct socket *sock, int level, int optname, return 0; case NETROM_T4: - if (opt < 1 || opt > ULONG_MAX / HZ) + if (opt < 1 || opt > UINT_MAX / HZ) return -EINVAL; nr->t4 = opt * HZ; return 0; case NETROM_IDLE: - if (opt > ULONG_MAX / (60 * HZ)) + if (opt > UINT_MAX / (60 * HZ)) return -EINVAL; nr->idle = opt * 60 * HZ; return 0; diff --git a/net/nfc/af_nfc.c b/net/nfc/af_nfc.c index 4a9e72073564a0a1c2e432d05e282c8aa61d0003..581358dcbdf8d1d3eabef893ef8beb667a3ee39b 100644 --- a/net/nfc/af_nfc.c +++ b/net/nfc/af_nfc.c @@ -60,6 +60,9 @@ int nfc_proto_register(const struct nfc_protocol *nfc_proto) proto_tab[nfc_proto->id] = nfc_proto; write_unlock(&proto_tab_lock); + if (rc) + proto_unregister(nfc_proto->proto); + return rc; } EXPORT_SYMBOL(nfc_proto_register); diff --git a/net/nfc/core.c b/net/nfc/core.c index eb377f87bcae8207dcd97d61456c09f1b4b801d6..6800470dd6df7a0eb216fa3dc078294946366e30 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -94,13 +94,13 @@ int nfc_dev_up(struct nfc_dev *dev) device_lock(&dev->dev); - if (dev->rfkill && rfkill_blocked(dev->rfkill)) { - rc = -ERFKILL; + if (!device_is_registered(&dev->dev)) { + rc = -ENODEV; goto error; } - if (!device_is_registered(&dev->dev)) { - rc = -ENODEV; + if (dev->rfkill && rfkill_blocked(dev->rfkill)) { + rc = -ERFKILL; goto error; } @@ -1117,11 +1117,7 @@ int nfc_register_device(struct nfc_dev *dev) if (rc) pr_err("Could not register llcp device\n"); - rc = nfc_genl_device_added(dev); - if (rc) - pr_debug("The userspace won't be notified that the device %s was added\n", - dev_name(&dev->dev)); - + device_lock(&dev->dev); dev->rfkill = rfkill_alloc(dev_name(&dev->dev), &dev->dev, RFKILL_TYPE_NFC, &nfc_rfkill_ops, dev); if (dev->rfkill) { @@ -1130,6 +1126,12 @@ int nfc_register_device(struct nfc_dev *dev) dev->rfkill = NULL; } } + device_unlock(&dev->dev); + + rc = nfc_genl_device_added(dev); + if (rc) + pr_debug("The userspace won't be notified that the device %s was added\n", + dev_name(&dev->dev)); return 0; } @@ -1146,10 +1148,17 @@ void nfc_unregister_device(struct nfc_dev *dev) pr_debug("dev_name=%s\n", dev_name(&dev->dev)); + rc = nfc_genl_device_removed(dev); + if (rc) + pr_debug("The userspace won't be notified that the device %s " + "was removed\n", dev_name(&dev->dev)); + + device_lock(&dev->dev); if (dev->rfkill) { rfkill_unregister(dev->rfkill); rfkill_destroy(dev->rfkill); } + device_unlock(&dev->dev); if (dev->ops->check_presence) { device_lock(&dev->dev); @@ -1159,11 +1168,6 @@ void nfc_unregister_device(struct nfc_dev *dev) cancel_work_sync(&dev->check_pres_work); } - rc = nfc_genl_device_removed(dev); - if (rc) - pr_debug("The userspace won't be notified that the device %s " - "was removed\n", dev_name(&dev->dev)); - nfc_llcp_unregister_device(dev); mutex_lock(&nfc_devlist_mutex); diff --git a/net/nfc/digital_core.c b/net/nfc/digital_core.c index e3599ed4a7a8765513a3f4c40c7da987cfae0398..9c9caa307cf1689dc786ac53b0aa27f115990b9c 100644 --- a/net/nfc/digital_core.c +++ b/net/nfc/digital_core.c @@ -277,6 +277,7 @@ int digital_tg_configure_hw(struct nfc_digital_dev *ddev, int type, int param) static int digital_tg_listen_mdaa(struct nfc_digital_dev *ddev, u8 rf_tech) { struct digital_tg_mdaa_params *params; + int rc; params = kzalloc(sizeof(*params), GFP_KERNEL); if (!params) @@ -291,8 +292,12 @@ static int digital_tg_listen_mdaa(struct nfc_digital_dev *ddev, u8 rf_tech) get_random_bytes(params->nfcid2 + 2, NFC_NFCID2_MAXSIZE - 2); params->sc = DIGITAL_SENSF_FELICA_SC; - return digital_send_cmd(ddev, DIGITAL_CMD_TG_LISTEN_MDAA, NULL, params, - 500, digital_tg_recv_atr_req, NULL); + rc = digital_send_cmd(ddev, DIGITAL_CMD_TG_LISTEN_MDAA, NULL, params, + 500, digital_tg_recv_atr_req, NULL); + if (rc) + kfree(params); + + return rc; } static int digital_tg_listen_md(struct nfc_digital_dev *ddev, u8 rf_tech) diff --git a/net/nfc/digital_technology.c b/net/nfc/digital_technology.c index 84d2345c75a3f2a9e9b89dade25421b1d4a8acab..3adf4589852affc6cd3665857ac0eba0ff21b641 100644 --- a/net/nfc/digital_technology.c +++ b/net/nfc/digital_technology.c @@ -465,8 +465,12 @@ static int digital_in_send_sdd_req(struct nfc_digital_dev *ddev, skb_put_u8(skb, sel_cmd); skb_put_u8(skb, DIGITAL_SDD_REQ_SEL_PAR); - return digital_in_send_cmd(ddev, skb, 30, digital_in_recv_sdd_res, - target); + rc = digital_in_send_cmd(ddev, skb, 30, digital_in_recv_sdd_res, + target); + if (rc) + kfree_skb(skb); + + return rc; } static void digital_in_recv_sens_res(struct nfc_digital_dev *ddev, void *arg, diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index 6cfd30fc07985e69fb68e84b71f3c9fa64aa62b2..0b93a17b9f11f53395c1a2e6f1ebddb3720bb180 100644 --- a/net/nfc/llcp_sock.c +++ b/net/nfc/llcp_sock.c @@ -789,6 +789,11 @@ static int llcp_sock_sendmsg(struct socket *sock, struct msghdr *msg, lock_sock(sk); + if (!llcp_sock->local) { + release_sock(sk); + return -ENODEV; + } + if (sk->sk_type == SOCK_DGRAM) { DECLARE_SOCKADDR(struct sockaddr_nfc_llcp *, addr, msg->msg_name); diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 32e8154363cabe37d8ccc66c43dff47c67d4ba24..e38719e2ee582e538ef6672ff1a9cb315bb2b2ac 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -144,12 +144,15 @@ inline int nci_request(struct nci_dev *ndev, { int rc; - if (!test_bit(NCI_UP, &ndev->flags)) - return -ENETDOWN; - /* Serialize all requests */ mutex_lock(&ndev->req_lock); - rc = __nci_request(ndev, req, opt, timeout); + /* check the state after obtaing the lock against any races + * from nci_close_device when the device gets removed. + */ + if (test_bit(NCI_UP, &ndev->flags)) + rc = __nci_request(ndev, req, opt, timeout); + else + rc = -ENETDOWN; mutex_unlock(&ndev->req_lock); return rc; @@ -470,6 +473,11 @@ static int nci_open_device(struct nci_dev *ndev) mutex_lock(&ndev->req_lock); + if (test_bit(NCI_UNREG, &ndev->flags)) { + rc = -ENODEV; + goto done; + } + if (test_bit(NCI_UP, &ndev->flags)) { rc = -EALREADY; goto done; @@ -533,6 +541,10 @@ done: static int nci_close_device(struct nci_dev *ndev) { nci_req_cancel(ndev, ENODEV); + + /* This mutex needs to be held as a barrier for + * caller nci_unregister_device + */ mutex_lock(&ndev->req_lock); if (!test_and_clear_bit(NCI_UP, &ndev->flags)) { @@ -565,13 +577,13 @@ static int nci_close_device(struct nci_dev *ndev) clear_bit(NCI_INIT, &ndev->flags); - del_timer_sync(&ndev->cmd_timer); - /* Flush cmd wq */ flush_workqueue(ndev->cmd_wq); - /* Clear flags */ - ndev->flags = 0; + del_timer_sync(&ndev->cmd_timer); + + /* Clear flags except NCI_UNREG */ + ndev->flags &= BIT(NCI_UNREG); mutex_unlock(&ndev->req_lock); @@ -1256,6 +1268,12 @@ void nci_unregister_device(struct nci_dev *ndev) { struct nci_conn_info *conn_info, *n; + /* This set_bit is not protected with specialized barrier, + * However, it is fine because the mutex_lock(&ndev->req_lock); + * in nci_close_device() will help to emit one. + */ + set_bit(NCI_UNREG, &ndev->flags); + nci_close_device(ndev); destroy_workqueue(ndev->cmd_wq); diff --git a/net/nfc/nci/rsp.c b/net/nfc/nci/rsp.c index a48297b79f34fb05cedf8919215e78312a4e2a34..b0ed2b47ac43700893e05c169f5ca9e10bb406ad 100644 --- a/net/nfc/nci/rsp.c +++ b/net/nfc/nci/rsp.c @@ -277,6 +277,8 @@ static void nci_core_conn_close_rsp_packet(struct nci_dev *ndev, ndev->cur_conn_id); if (conn_info) { list_del(&conn_info->list); + if (conn_info == ndev->rf_conn_info) + ndev->rf_conn_info = NULL; devm_kfree(&ndev->nfc_dev->dev, conn_info); } } diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index bec7847f8eaac6d6bbfc541ce215f3ccd5753d0e..78acc4e9ac932b74908f5e879fa67e39a131aeb6 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -636,8 +636,10 @@ static int nfc_genl_dump_devices_done(struct netlink_callback *cb) { struct class_dev_iter *iter = (struct class_dev_iter *) cb->args[0]; - nfc_device_iter_exit(iter); - kfree(iter); + if (iter) { + nfc_device_iter_exit(iter); + kfree(iter); + } return 0; } @@ -1392,8 +1394,10 @@ static int nfc_genl_dump_ses_done(struct netlink_callback *cb) { struct class_dev_iter *iter = (struct class_dev_iter *) cb->args[0]; - nfc_device_iter_exit(iter); - kfree(iter); + if (iter) { + nfc_device_iter_exit(iter); + kfree(iter); + } return 0; } diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index fc487f9812fc554003b8d6c8c3b36a7ea3d760df..525c1540f10e6484344135d1707ea7100c4fc91d 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -422,12 +422,43 @@ static void set_ipv6_addr(struct sk_buff *skb, u8 l4_proto, memcpy(addr, new_addr, sizeof(__be32[4])); } -static void set_ipv6_fl(struct ipv6hdr *nh, u32 fl, u32 mask) +static void set_ipv6_dsfield(struct sk_buff *skb, struct ipv6hdr *nh, u8 ipv6_tclass, u8 mask) { + u8 old_ipv6_tclass = ipv6_get_dsfield(nh); + + ipv6_tclass = OVS_MASKED(old_ipv6_tclass, ipv6_tclass, mask); + + if (skb->ip_summed == CHECKSUM_COMPLETE) + csum_replace(&skb->csum, (__force __wsum)(old_ipv6_tclass << 12), + (__force __wsum)(ipv6_tclass << 12)); + + ipv6_change_dsfield(nh, ~mask, ipv6_tclass); +} + +static void set_ipv6_fl(struct sk_buff *skb, struct ipv6hdr *nh, u32 fl, u32 mask) +{ + u32 ofl; + + ofl = nh->flow_lbl[0] << 16 | nh->flow_lbl[1] << 8 | nh->flow_lbl[2]; + fl = OVS_MASKED(ofl, fl, mask); + /* Bits 21-24 are always unmasked, so this retains their values. */ - OVS_SET_MASKED(nh->flow_lbl[0], (u8)(fl >> 16), (u8)(mask >> 16)); - OVS_SET_MASKED(nh->flow_lbl[1], (u8)(fl >> 8), (u8)(mask >> 8)); - OVS_SET_MASKED(nh->flow_lbl[2], (u8)fl, (u8)mask); + nh->flow_lbl[0] = (u8)(fl >> 16); + nh->flow_lbl[1] = (u8)(fl >> 8); + nh->flow_lbl[2] = (u8)fl; + + if (skb->ip_summed == CHECKSUM_COMPLETE) + csum_replace(&skb->csum, (__force __wsum)htonl(ofl), (__force __wsum)htonl(fl)); +} + +static void set_ipv6_ttl(struct sk_buff *skb, struct ipv6hdr *nh, u8 new_ttl, u8 mask) +{ + new_ttl = OVS_MASKED(nh->hop_limit, new_ttl, mask); + + if (skb->ip_summed == CHECKSUM_COMPLETE) + csum_replace(&skb->csum, (__force __wsum)(nh->hop_limit << 8), + (__force __wsum)(new_ttl << 8)); + nh->hop_limit = new_ttl; } static void set_ip_ttl(struct sk_buff *skb, struct iphdr *nh, u8 new_ttl, @@ -545,18 +576,17 @@ static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *flow_key, } } if (mask->ipv6_tclass) { - ipv6_change_dsfield(nh, ~mask->ipv6_tclass, key->ipv6_tclass); + set_ipv6_dsfield(skb, nh, key->ipv6_tclass, mask->ipv6_tclass); flow_key->ip.tos = ipv6_get_dsfield(nh); } if (mask->ipv6_label) { - set_ipv6_fl(nh, ntohl(key->ipv6_label), + set_ipv6_fl(skb, nh, ntohl(key->ipv6_label), ntohl(mask->ipv6_label)); flow_key->ipv6.label = *(__be32 *)nh & htonl(IPV6_FLOWINFO_FLOWLABEL); } if (mask->ipv6_hlimit) { - OVS_SET_MASKED(nh->hop_limit, key->ipv6_hlimit, - mask->ipv6_hlimit); + set_ipv6_ttl(skb, nh, key->ipv6_hlimit, mask->ipv6_hlimit); flow_key->ip.ttl = nh->hop_limit; } return 0; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 08144559eed56ea19a5ceb1c2cc349caf1040aea..d0c95d7dd292d89f0de3b36816ff927e0ffdacfb 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1735,6 +1735,7 @@ static int fanout_add(struct sock *sk, struct fanout_args *args) match->prot_hook.dev = po->prot_hook.dev; match->prot_hook.func = packet_rcv_fanout; match->prot_hook.af_packet_priv = match; + match->prot_hook.af_packet_net = read_pnet(&match->net); match->prot_hook.id_match = match_fanout_group; match->max_num_members = args->max_num_members; list_add(&match->list, &fanout_list); @@ -1749,7 +1750,10 @@ static int fanout_add(struct sock *sk, struct fanout_args *args) err = -ENOSPC; if (refcount_read(&match->sk_ref) < match->max_num_members) { __dev_remove_pack(&po->prot_hook); - po->fanout = match; + + /* Paired with packet_setsockopt(PACKET_FANOUT_DATA) */ + WRITE_ONCE(po->fanout, match); + po->rollover = rollover; rollover = NULL; refcount_set(&match->sk_ref, refcount_read(&match->sk_ref) + 1); @@ -2274,8 +2278,11 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, copy_skb = skb_get(skb); skb_head = skb->data; } - if (copy_skb) + if (copy_skb) { + memset(&PACKET_SKB_CB(copy_skb)->sa.ll, 0, + sizeof(PACKET_SKB_CB(copy_skb)->sa.ll)); skb_set_owner_r(copy_skb, sk); + } } snaplen = po->rx_ring.frame_size - macoff; if ((int)snaplen < 0) { @@ -3323,6 +3330,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, po->prot_hook.func = packet_rcv_spkt; po->prot_hook.af_packet_priv = sk; + po->prot_hook.af_packet_net = sock_net(sk); if (proto) { po->prot_hook.type = proto; @@ -3429,6 +3437,8 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, sock_recv_ts_and_drops(msg, sk, skb); if (msg->msg_name) { + const size_t max_len = min(sizeof(skb->cb), + sizeof(struct sockaddr_storage)); int copy_len; /* If the address length field is there to be filled @@ -3451,6 +3461,10 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, msg->msg_namelen = sizeof(struct sockaddr_ll); } } + if (WARN_ON_ONCE(copy_len > max_len)) { + copy_len = max_len; + msg->msg_namelen = copy_len; + } memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, copy_len); } @@ -3904,7 +3918,8 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, } case PACKET_FANOUT_DATA: { - if (!po->fanout) + /* Paired with the WRITE_ONCE() in fanout_add() */ + if (!READ_ONCE(po->fanout)) return -EINVAL; return fanout_set_data(po, optval, optlen); @@ -4461,9 +4476,10 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, } out_free_pg_vec: - bitmap_free(rx_owner_map); - if (pg_vec) + if (pg_vec) { + bitmap_free(rx_owner_map); free_pg_vec(pg_vec, order, req->tp_block_nr); + } out: return err; } diff --git a/net/phonet/pep.c b/net/phonet/pep.c index a1525916885ae9741972e32b16363319edc1aa31..65d463ad87707782a44152014fa760798554145f 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -868,6 +868,7 @@ static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp, err = pep_accept_conn(newsk, skb); if (err) { + __sock_put(sk); sock_put(newsk); newsk = NULL; goto drop; @@ -946,6 +947,8 @@ static int pep_ioctl(struct sock *sk, int cmd, unsigned long arg) ret = -EBUSY; else if (sk->sk_state == TCP_ESTABLISHED) ret = -EISCONN; + else if (!pn->pn_sk.sobject) + ret = -EADDRNOTAVAIL; else ret = pep_sock_enable(sk, NULL, 0); release_sock(sk); diff --git a/net/rds/connection.c b/net/rds/connection.c index a3bc4b54d4910567c915f41cd6bc44f279eaadf3..b4cc699c5fad37736a31619db2b0ac41babd164e 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -253,6 +253,7 @@ static struct rds_connection *__rds_conn_create(struct net *net, * should end up here, but if it * does, reset/destroy the connection. */ + kfree(conn->c_path); kmem_cache_free(rds_conn_slab, conn); conn = ERR_PTR(-EOPNOTSUPP); goto out; diff --git a/net/rds/ib.c b/net/rds/ib.c index deecbdcdae84efc933ca8413e1490030ce4dc019..24c9a9005a6fbac0ba408649dc090042f5c8eca5 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -30,7 +30,6 @@ * SOFTWARE. * */ -#include #include #include #include @@ -108,7 +107,6 @@ static void rds_ib_dev_free(struct work_struct *work) rds_ib_destroy_mr_pool(rds_ibdev->mr_1m_pool); if (rds_ibdev->pd) ib_dealloc_pd(rds_ibdev->pd); - dma_pool_destroy(rds_ibdev->rid_hdrs_pool); list_for_each_entry_safe(i_ipaddr, i_next, &rds_ibdev->ipaddr_list, list) { list_del(&i_ipaddr->list); @@ -191,14 +189,6 @@ static int rds_ib_add_one(struct ib_device *device) rds_ibdev->pd = NULL; goto put_dev; } - rds_ibdev->rid_hdrs_pool = dma_pool_create(device->name, - device->dma_device, - sizeof(struct rds_header), - L1_CACHE_BYTES, 0); - if (!rds_ibdev->rid_hdrs_pool) { - ret = -ENOMEM; - goto put_dev; - } rds_ibdev->mr_1m_pool = rds_ib_create_mr_pool(rds_ibdev, RDS_IB_MR_1M_POOL); diff --git a/net/rds/ib.h b/net/rds/ib.h index c23a11d9ad3628b415ad4576488fd2635c9e81d8..2ba71102b1f1f28025d6b31007efab81b9d36b6f 100644 --- a/net/rds/ib.h +++ b/net/rds/ib.h @@ -246,7 +246,6 @@ struct rds_ib_device { struct list_head conn_list; struct ib_device *dev; struct ib_pd *pd; - struct dma_pool *rid_hdrs_pool; /* RDS headers DMA pool */ u8 odp_capable:1; unsigned int max_mrs; @@ -380,11 +379,6 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id, int rds_ib_cm_initiate_connect(struct rdma_cm_id *cm_id, bool isv6); void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_event *event); -struct rds_header **rds_dma_hdrs_alloc(struct ib_device *ibdev, - struct dma_pool *pool, - dma_addr_t **dma_addrs, u32 num_hdrs); -void rds_dma_hdrs_free(struct dma_pool *pool, struct rds_header **hdrs, - dma_addr_t *dma_addrs, u32 num_hdrs); #define rds_ib_conn_error(conn, fmt...) \ __rds_ib_conn_error(conn, KERN_WARNING "RDS/IB: " fmt) diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index b36b60668b1da9d6714f29e8e64e8facfeae434a..f5cbe963cd8f781cdfbb99a59fb51cfae2bd0baf 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -30,7 +30,6 @@ * SOFTWARE. * */ -#include #include #include #include @@ -441,42 +440,87 @@ static inline void ibdev_put_vector(struct rds_ib_device *rds_ibdev, int index) rds_ibdev->vector_load[index]--; } +static void rds_dma_hdr_free(struct ib_device *dev, struct rds_header *hdr, + dma_addr_t dma_addr, enum dma_data_direction dir) +{ + ib_dma_unmap_single(dev, dma_addr, sizeof(*hdr), dir); + kfree(hdr); +} + +static struct rds_header *rds_dma_hdr_alloc(struct ib_device *dev, + dma_addr_t *dma_addr, enum dma_data_direction dir) +{ + struct rds_header *hdr; + + hdr = kzalloc_node(sizeof(*hdr), GFP_KERNEL, ibdev_to_node(dev)); + if (!hdr) + return NULL; + + *dma_addr = ib_dma_map_single(dev, hdr, sizeof(*hdr), + DMA_BIDIRECTIONAL); + if (ib_dma_mapping_error(dev, *dma_addr)) { + kfree(hdr); + return NULL; + } + + return hdr; +} + +/* Free the DMA memory used to store struct rds_header. + * + * @dev: the RDS IB device + * @hdrs: pointer to the array storing DMA memory pointers + * @dma_addrs: pointer to the array storing DMA addresses + * @num_hdars: number of headers to free. + */ +static void rds_dma_hdrs_free(struct rds_ib_device *dev, + struct rds_header **hdrs, dma_addr_t *dma_addrs, u32 num_hdrs, + enum dma_data_direction dir) +{ + u32 i; + + for (i = 0; i < num_hdrs; i++) + rds_dma_hdr_free(dev->dev, hdrs[i], dma_addrs[i], dir); + kvfree(hdrs); + kvfree(dma_addrs); +} + + /* Allocate DMA coherent memory to be used to store struct rds_header for * sending/receiving packets. The pointers to the DMA memory and the * associated DMA addresses are stored in two arrays. * - * @ibdev: the IB device - * @pool: the DMA memory pool + * @dev: the RDS IB device * @dma_addrs: pointer to the array for storing DMA addresses * @num_hdrs: number of headers to allocate * * It returns the pointer to the array storing the DMA memory pointers. On * error, NULL pointer is returned. */ -struct rds_header **rds_dma_hdrs_alloc(struct ib_device *ibdev, - struct dma_pool *pool, - dma_addr_t **dma_addrs, u32 num_hdrs) +static struct rds_header **rds_dma_hdrs_alloc(struct rds_ib_device *dev, + dma_addr_t **dma_addrs, u32 num_hdrs, + enum dma_data_direction dir) { struct rds_header **hdrs; dma_addr_t *hdr_daddrs; u32 i; hdrs = kvmalloc_node(sizeof(*hdrs) * num_hdrs, GFP_KERNEL, - ibdev_to_node(ibdev)); + ibdev_to_node(dev->dev)); if (!hdrs) return NULL; hdr_daddrs = kvmalloc_node(sizeof(*hdr_daddrs) * num_hdrs, GFP_KERNEL, - ibdev_to_node(ibdev)); + ibdev_to_node(dev->dev)); if (!hdr_daddrs) { kvfree(hdrs); return NULL; } for (i = 0; i < num_hdrs; i++) { - hdrs[i] = dma_pool_zalloc(pool, GFP_KERNEL, &hdr_daddrs[i]); + hdrs[i] = rds_dma_hdr_alloc(dev->dev, &hdr_daddrs[i], dir); if (!hdrs[i]) { - rds_dma_hdrs_free(pool, hdrs, hdr_daddrs, i); + rds_dma_hdrs_free(dev, hdrs, hdr_daddrs, i, dir); return NULL; } } @@ -485,24 +529,6 @@ struct rds_header **rds_dma_hdrs_alloc(struct ib_device *ibdev, return hdrs; } -/* Free the DMA memory used to store struct rds_header. - * - * @pool: the DMA memory pool - * @hdrs: pointer to the array storing DMA memory pointers - * @dma_addrs: pointer to the array storing DMA addresses - * @num_hdars: number of headers to free. - */ -void rds_dma_hdrs_free(struct dma_pool *pool, struct rds_header **hdrs, - dma_addr_t *dma_addrs, u32 num_hdrs) -{ - u32 i; - - for (i = 0; i < num_hdrs; i++) - dma_pool_free(pool, hdrs[i], dma_addrs[i]); - kvfree(hdrs); - kvfree(dma_addrs); -} - /* * This needs to be very careful to not leave IS_ERR pointers around for * cleanup to trip over. @@ -516,7 +542,6 @@ static int rds_ib_setup_qp(struct rds_connection *conn) struct rds_ib_device *rds_ibdev; unsigned long max_wrs; int ret, fr_queue_space; - struct dma_pool *pool; /* * It's normal to see a null device if an incoming connection races @@ -612,25 +637,26 @@ static int rds_ib_setup_qp(struct rds_connection *conn) goto recv_cq_out; } - pool = rds_ibdev->rid_hdrs_pool; - ic->i_send_hdrs = rds_dma_hdrs_alloc(dev, pool, &ic->i_send_hdrs_dma, - ic->i_send_ring.w_nr); + ic->i_send_hdrs = rds_dma_hdrs_alloc(rds_ibdev, &ic->i_send_hdrs_dma, + ic->i_send_ring.w_nr, + DMA_TO_DEVICE); if (!ic->i_send_hdrs) { ret = -ENOMEM; rdsdebug("DMA send hdrs alloc failed\n"); goto qp_out; } - ic->i_recv_hdrs = rds_dma_hdrs_alloc(dev, pool, &ic->i_recv_hdrs_dma, - ic->i_recv_ring.w_nr); + ic->i_recv_hdrs = rds_dma_hdrs_alloc(rds_ibdev, &ic->i_recv_hdrs_dma, + ic->i_recv_ring.w_nr, + DMA_FROM_DEVICE); if (!ic->i_recv_hdrs) { ret = -ENOMEM; rdsdebug("DMA recv hdrs alloc failed\n"); goto send_hdrs_dma_out; } - ic->i_ack = dma_pool_zalloc(pool, GFP_KERNEL, - &ic->i_ack_dma); + ic->i_ack = rds_dma_hdr_alloc(rds_ibdev->dev, &ic->i_ack_dma, + DMA_TO_DEVICE); if (!ic->i_ack) { ret = -ENOMEM; rdsdebug("DMA ack header alloc failed\n"); @@ -666,18 +692,19 @@ sends_out: vfree(ic->i_sends); ack_dma_out: - dma_pool_free(pool, ic->i_ack, ic->i_ack_dma); + rds_dma_hdr_free(rds_ibdev->dev, ic->i_ack, ic->i_ack_dma, + DMA_TO_DEVICE); ic->i_ack = NULL; recv_hdrs_dma_out: - rds_dma_hdrs_free(pool, ic->i_recv_hdrs, ic->i_recv_hdrs_dma, - ic->i_recv_ring.w_nr); + rds_dma_hdrs_free(rds_ibdev, ic->i_recv_hdrs, ic->i_recv_hdrs_dma, + ic->i_recv_ring.w_nr, DMA_FROM_DEVICE); ic->i_recv_hdrs = NULL; ic->i_recv_hdrs_dma = NULL; send_hdrs_dma_out: - rds_dma_hdrs_free(pool, ic->i_send_hdrs, ic->i_send_hdrs_dma, - ic->i_send_ring.w_nr); + rds_dma_hdrs_free(rds_ibdev, ic->i_send_hdrs, ic->i_send_hdrs_dma, + ic->i_send_ring.w_nr, DMA_TO_DEVICE); ic->i_send_hdrs = NULL; ic->i_send_hdrs_dma = NULL; @@ -1110,29 +1137,30 @@ void rds_ib_conn_path_shutdown(struct rds_conn_path *cp) } if (ic->rds_ibdev) { - struct dma_pool *pool; - - pool = ic->rds_ibdev->rid_hdrs_pool; - /* then free the resources that ib callbacks use */ if (ic->i_send_hdrs) { - rds_dma_hdrs_free(pool, ic->i_send_hdrs, + rds_dma_hdrs_free(ic->rds_ibdev, + ic->i_send_hdrs, ic->i_send_hdrs_dma, - ic->i_send_ring.w_nr); + ic->i_send_ring.w_nr, + DMA_TO_DEVICE); ic->i_send_hdrs = NULL; ic->i_send_hdrs_dma = NULL; } if (ic->i_recv_hdrs) { - rds_dma_hdrs_free(pool, ic->i_recv_hdrs, + rds_dma_hdrs_free(ic->rds_ibdev, + ic->i_recv_hdrs, ic->i_recv_hdrs_dma, - ic->i_recv_ring.w_nr); + ic->i_recv_ring.w_nr, + DMA_FROM_DEVICE); ic->i_recv_hdrs = NULL; ic->i_recv_hdrs_dma = NULL; } if (ic->i_ack) { - dma_pool_free(pool, ic->i_ack, ic->i_ack_dma); + rds_dma_hdr_free(ic->rds_ibdev->dev, ic->i_ack, + ic->i_ack_dma, DMA_TO_DEVICE); ic->i_ack = NULL; } } else { diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c index 3cffcec5fb371b8878cfdc72bfb5efa416255c8d..6fdedd9dbbc28ff060010b966472d6d6a2decea8 100644 --- a/net/rds/ib_recv.c +++ b/net/rds/ib_recv.c @@ -662,10 +662,16 @@ static void rds_ib_send_ack(struct rds_ib_connection *ic, unsigned int adv_credi seq = rds_ib_get_ack(ic); rdsdebug("send_ack: ic %p ack %llu\n", ic, (unsigned long long) seq); + + ib_dma_sync_single_for_cpu(ic->rds_ibdev->dev, ic->i_ack_dma, + sizeof(*hdr), DMA_TO_DEVICE); rds_message_populate_header(hdr, 0, 0, 0); hdr->h_ack = cpu_to_be64(seq); hdr->h_credit = adv_credits; rds_message_make_checksum(hdr); + ib_dma_sync_single_for_device(ic->rds_ibdev->dev, ic->i_ack_dma, + sizeof(*hdr), DMA_TO_DEVICE); + ic->i_ack_queued = jiffies; ret = ib_post_send(ic->i_cm_id->qp, &ic->i_ack_wr, NULL); @@ -845,6 +851,7 @@ static void rds_ib_process_recv(struct rds_connection *conn, struct rds_ib_connection *ic = conn->c_transport_data; struct rds_ib_incoming *ibinc = ic->i_ibinc; struct rds_header *ihdr, *hdr; + dma_addr_t dma_addr = ic->i_recv_hdrs_dma[recv - ic->i_recvs]; /* XXX shut down the connection if port 0,0 are seen? */ @@ -863,6 +870,8 @@ static void rds_ib_process_recv(struct rds_connection *conn, ihdr = ic->i_recv_hdrs[recv - ic->i_recvs]; + ib_dma_sync_single_for_cpu(ic->rds_ibdev->dev, dma_addr, + sizeof(*ihdr), DMA_FROM_DEVICE); /* Validate the checksum. */ if (!rds_message_verify_checksum(ihdr)) { rds_ib_conn_error(conn, "incoming message " @@ -870,7 +879,7 @@ static void rds_ib_process_recv(struct rds_connection *conn, "forcing a reconnect\n", &conn->c_faddr); rds_stats_inc(s_recv_drop_bad_checksum); - return; + goto done; } /* Process the ACK sequence which comes with every packet */ @@ -899,7 +908,7 @@ static void rds_ib_process_recv(struct rds_connection *conn, */ rds_ib_frag_free(ic, recv->r_frag); recv->r_frag = NULL; - return; + goto done; } /* @@ -933,7 +942,7 @@ static void rds_ib_process_recv(struct rds_connection *conn, hdr->h_dport != ihdr->h_dport) { rds_ib_conn_error(conn, "fragment header mismatch; forcing reconnect\n"); - return; + goto done; } } @@ -965,6 +974,9 @@ static void rds_ib_process_recv(struct rds_connection *conn, rds_inc_put(&ibinc->ii_inc); } +done: + ib_dma_sync_single_for_device(ic->rds_ibdev->dev, dma_addr, + sizeof(*ihdr), DMA_FROM_DEVICE); } void rds_ib_recv_cqe_handler(struct rds_ib_connection *ic, diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c index dfe778220657afe41902dea5248e8829337904b2..92b4a8689aae7af21cd3cf7f1e5306614c42fad4 100644 --- a/net/rds/ib_send.c +++ b/net/rds/ib_send.c @@ -638,6 +638,10 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, send->s_sge[0].length = sizeof(struct rds_header); send->s_sge[0].lkey = ic->i_pd->local_dma_lkey; + ib_dma_sync_single_for_cpu(ic->rds_ibdev->dev, + ic->i_send_hdrs_dma[pos], + sizeof(struct rds_header), + DMA_TO_DEVICE); memcpy(ic->i_send_hdrs[pos], &rm->m_inc.i_hdr, sizeof(struct rds_header)); @@ -688,6 +692,10 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, adv_credits = 0; rds_ib_stats_inc(s_ib_tx_credit_updates); } + ib_dma_sync_single_for_device(ic->rds_ibdev->dev, + ic->i_send_hdrs_dma[pos], + sizeof(struct rds_header), + DMA_TO_DEVICE); if (prev) prev->s_wr.next = &send->s_wr; diff --git a/net/rds/tcp.c b/net/rds/tcp.c index abf19c0e3ba0bfcf0396df2d78b937e288b84ab0..5327d130c4b5691e788bbbcb990a349d714ad8d4 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -500,7 +500,7 @@ void rds_tcp_tune(struct socket *sock) sk->sk_userlocks |= SOCK_SNDBUF_LOCK; } if (rtn->rcvbuf_size > 0) { - sk->sk_sndbuf = rtn->rcvbuf_size; + sk->sk_rcvbuf = rtn->rcvbuf_size; sk->sk_userlocks |= SOCK_RCVBUF_LOCK; } release_sock(sk); diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 6be2672a65eabebd5b8fc28d82ed9b3c58542005..df864e6922679140d5b7c8c3aadd65a92906e9e3 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -157,7 +157,7 @@ static void rxrpc_congestion_timeout(struct rxrpc_call *call) static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j) { struct sk_buff *skb; - unsigned long resend_at, rto_j; + unsigned long resend_at; rxrpc_seq_t cursor, seq, top; ktime_t now, max_age, oldest, ack_ts; int ix; @@ -165,10 +165,8 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j) _enter("{%d,%d}", call->tx_hard_ack, call->tx_top); - rto_j = call->peer->rto_j; - now = ktime_get_real(); - max_age = ktime_sub(now, jiffies_to_usecs(rto_j)); + max_age = ktime_sub(now, jiffies_to_usecs(call->peer->rto_j)); spin_lock_bh(&call->lock); @@ -213,7 +211,7 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j) } resend_at = nsecs_to_jiffies(ktime_to_ns(ktime_sub(now, oldest))); - resend_at += jiffies + rto_j; + resend_at += jiffies + rxrpc_get_rto_backoff(call->peer, retrans); WRITE_ONCE(call->resend_at, resend_at); if (unacked) diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index 7e574c75be8e14f8c8e7142e32edbea9a302d5e3..f5fb223aba82ab3684cc2880bd51c1b8a1c04481 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -135,16 +135,20 @@ struct rxrpc_bundle *rxrpc_get_bundle(struct rxrpc_bundle *bundle) return bundle; } +static void rxrpc_free_bundle(struct rxrpc_bundle *bundle) +{ + rxrpc_put_peer(bundle->params.peer); + kfree(bundle); +} + void rxrpc_put_bundle(struct rxrpc_bundle *bundle) { unsigned int d = bundle->debug_id; unsigned int u = atomic_dec_return(&bundle->usage); _debug("PUT B=%x %u", d, u); - if (u == 0) { - rxrpc_put_peer(bundle->params.peer); - kfree(bundle); - } + if (u == 0) + rxrpc_free_bundle(bundle); } /* @@ -334,7 +338,7 @@ static struct rxrpc_bundle *rxrpc_look_up_bundle(struct rxrpc_conn_parameters *c return candidate; found_bundle_free: - kfree(candidate); + rxrpc_free_bundle(candidate); found_bundle: rxrpc_get_bundle(bundle); spin_unlock(&local->client_bundles_lock); diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 10f2bf2e9068abc15ef4ece90e7d6652194b4f77..a45c83f22236e2648c2fa4f97ebebb088361a071 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -468,7 +468,7 @@ done: if (call->peer->rtt_count > 1) { unsigned long nowj = jiffies, ack_lost_at; - ack_lost_at = rxrpc_get_rto_backoff(call->peer, retrans); + ack_lost_at = rxrpc_get_rto_backoff(call->peer, false); ack_lost_at += nowj; WRITE_ONCE(call->ack_lost_at, ack_lost_at); rxrpc_reduce_call_timer(call, ack_lost_at, nowj, diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c index 68396d05205252177ea9525b43972f1598480e12..0298fe2ad6d323b377b46d5dc70c8d472d899966 100644 --- a/net/rxrpc/peer_object.c +++ b/net/rxrpc/peer_object.c @@ -299,6 +299,12 @@ static struct rxrpc_peer *rxrpc_create_peer(struct rxrpc_sock *rx, return peer; } +static void rxrpc_free_peer(struct rxrpc_peer *peer) +{ + rxrpc_put_local(peer->local); + kfree_rcu(peer, rcu); +} + /* * Set up a new incoming peer. There shouldn't be any other matching peers * since we've already done a search in the list from the non-reentrant context @@ -365,7 +371,7 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_sock *rx, spin_unlock_bh(&rxnet->peer_hash_lock); if (peer) - kfree(candidate); + rxrpc_free_peer(candidate); else peer = candidate; } @@ -420,8 +426,7 @@ static void __rxrpc_put_peer(struct rxrpc_peer *peer) list_del_init(&peer->keepalive_link); spin_unlock_bh(&rxnet->peer_hash_lock); - rxrpc_put_local(peer->local); - kfree_rcu(peer, rcu); + rxrpc_free_peer(peer); } /* @@ -457,8 +462,7 @@ void rxrpc_put_peer_locked(struct rxrpc_peer *peer) if (n == 0) { hash_del_rcu(&peer->hash_link); list_del_init(&peer->keepalive_link); - rxrpc_put_local(peer->local); - kfree_rcu(peer, rcu); + rxrpc_free_peer(peer); } } diff --git a/net/rxrpc/rtt.c b/net/rxrpc/rtt.c index 4e565eeab426078b8a8266493a0affecb5385020..be61d6f5be8d150554df8fef1d9c02513b0460a6 100644 --- a/net/rxrpc/rtt.c +++ b/net/rxrpc/rtt.c @@ -22,7 +22,7 @@ static u32 rxrpc_rto_min_us(struct rxrpc_peer *peer) static u32 __rxrpc_set_rto(const struct rxrpc_peer *peer) { - return _usecs_to_jiffies((peer->srtt_us >> 3) + peer->rttvar_us); + return usecs_to_jiffies((peer->srtt_us >> 3) + peer->rttvar_us); } static u32 rxrpc_bound_rto(u32 rto) diff --git a/net/sched/act_api.c b/net/sched/act_api.c index f613299ca7f0a668cc2c7579a4f0515e16af1d78..7b29aa1a3ce9a20ba165fb1030628d83c064a3bb 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -680,15 +680,24 @@ int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions, restart_act_graph: for (i = 0; i < nr_actions; i++) { const struct tc_action *a = actions[i]; + int repeat_ttl; if (jmp_prgcnt > 0) { jmp_prgcnt -= 1; continue; } + + repeat_ttl = 32; repeat: ret = a->ops->act(skb, a, res); - if (ret == TC_ACT_REPEAT) - goto repeat; /* we need a ttl - JHS */ + + if (unlikely(ret == TC_ACT_REPEAT)) { + if (--repeat_ttl != 0) + goto repeat; + /* suspicious opcode, stop pipeline */ + net_warn_ratelimited("TC_ACT_REPEAT abuse ?\n"); + return TC_ACT_OK; + } if (TC_ACT_EXT_CMP(ret, TC_ACT_JUMP)) { jmp_prgcnt = ret & TCA_ACT_MAX_PRIO_MASK; diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 812c3c70a53a04029ccf11b78d0c59d877fb2180..825b3e9b55f7e23900e55af586b45e826e1f5fbb 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -514,11 +514,6 @@ static bool tcf_ct_flow_table_lookup(struct tcf_ct_params *p, struct nf_conn *ct; u8 dir; - /* Previously seen or loopback */ - ct = nf_ct_get(skb, &ctinfo); - if ((ct && !nf_ct_is_template(ct)) || ctinfo == IP_CT_UNTRACKED) - return false; - switch (family) { case NFPROTO_IPV4: if (!tcf_ct_flow_table_fill_tuple_ipv4(skb, &tuple, &tcph)) diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 0b0eb18919c09d08f96dc2d4fcaffef524cf2be5..24d561d8d9c97efecf668c376a873342f8d6d74c 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -218,6 +219,7 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a, bool want_ingress; bool is_redirect; bool expects_nh; + bool at_ingress; int m_eaction; int mac_len; bool at_nh; @@ -253,7 +255,8 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a, * ingress - that covers the TC S/W datapath. */ is_redirect = tcf_mirred_is_act_redirect(m_eaction); - use_reinsert = skb_at_tc_ingress(skb) && is_redirect && + at_ingress = skb_at_tc_ingress(skb); + use_reinsert = at_ingress && is_redirect && tcf_mirred_can_reinsert(retval); if (!use_reinsert) { skb2 = skb_clone(skb, GFP_ATOMIC); @@ -261,10 +264,12 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a, goto out; } + want_ingress = tcf_mirred_act_wants_ingress(m_eaction); + /* All mirred/redirected skbs should clear previous ct info */ nf_reset_ct(skb2); - - want_ingress = tcf_mirred_act_wants_ingress(m_eaction); + if (want_ingress && !at_ingress) /* drop dst for egress -> ingress */ + skb_dst_drop(skb2); expects_nh = want_ingress || !m_mac_header_xmit; at_nh = skb->data == skb_network_header(skb); diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 8073657a0fd2519bdbc4891fbf7d63713cd5907e..9a789a057a741ba9ffe58dbad5104284406347de 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1045,7 +1045,7 @@ static int __tcf_qdisc_find(struct net *net, struct Qdisc **q, /* Find qdisc */ if (!*parent) { - *q = dev->qdisc; + *q = rcu_dereference(dev->qdisc); *parent = (*q)->handle; } else { *q = qdisc_lookup_rcu(dev, TC_H_MAJ(*parent)); @@ -1954,9 +1954,9 @@ static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n, bool prio_allocate; u32 parent; u32 chain_index; - struct Qdisc *q = NULL; + struct Qdisc *q; struct tcf_chain_info chain_info; - struct tcf_chain *chain = NULL; + struct tcf_chain *chain; struct tcf_block *block; struct tcf_proto *tp; unsigned long cl; @@ -1984,6 +1984,8 @@ replay: tp = NULL; cl = 0; block = NULL; + q = NULL; + chain = NULL; if (prio == 0) { /* If no priority is provided by the user, @@ -2589,7 +2591,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) parent = tcm->tcm_parent; if (!parent) - q = dev->qdisc; + q = rtnl_dereference(dev->qdisc); else q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent)); if (!q) @@ -2804,8 +2806,8 @@ static int tc_ctl_chain(struct sk_buff *skb, struct nlmsghdr *n, struct tcmsg *t; u32 parent; u32 chain_index; - struct Qdisc *q = NULL; - struct tcf_chain *chain = NULL; + struct Qdisc *q; + struct tcf_chain *chain; struct tcf_block *block; unsigned long cl; int err; @@ -2815,6 +2817,7 @@ static int tc_ctl_chain(struct sk_buff *skb, struct nlmsghdr *n, return -EPERM; replay: + q = NULL; err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack); if (err < 0) @@ -2974,7 +2977,7 @@ static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb) parent = tcm->tcm_parent; if (!parent) { - q = dev->qdisc; + q = rtnl_dereference(dev->qdisc); parent = q->handle; } else { q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent)); @@ -3703,6 +3706,7 @@ int tc_setup_flow_action(struct flow_action *flow_action, entry->mpls_mangle.ttl = tcf_mpls_ttl(act); break; default: + err = -EOPNOTSUPP; goto err_out_locked; } } else if (is_tcf_skbedit_ptype(act)) { diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index a5212a3f86e2f2e4110c217e92770768e55f0af7..8ff6945b9f8f4db3dc0c87689b700d35a0e6d11c 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -2169,18 +2169,24 @@ static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg, arg->count = arg->skip; + rcu_read_lock(); idr_for_each_entry_continue_ul(&head->handle_idr, f, tmp, id) { /* don't return filters that are being deleted */ if (!refcount_inc_not_zero(&f->refcnt)) continue; + rcu_read_unlock(); + if (arg->fn(tp, f, arg) < 0) { __fl_put(f); arg->stop = 1; + rcu_read_lock(); break; } __fl_put(f); arg->count++; + rcu_read_lock(); } + rcu_read_unlock(); arg->cookie = id; } diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 54a8c363bcddaa6f20388ccf46e30cfb490bb0df..6e18aa41778285e7fa97e606f46cfc67e0f03236 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -301,7 +301,7 @@ struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle) if (!handle) return NULL; - q = qdisc_match_from_root(dev->qdisc, handle); + q = qdisc_match_from_root(rtnl_dereference(dev->qdisc), handle); if (q) goto out; @@ -320,7 +320,7 @@ struct Qdisc *qdisc_lookup_rcu(struct net_device *dev, u32 handle) if (!handle) return NULL; - q = qdisc_match_from_root(dev->qdisc, handle); + q = qdisc_match_from_root(rcu_dereference(dev->qdisc), handle); if (q) goto out; @@ -513,6 +513,12 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt, return stab; } + if (s->size_log > STAB_SIZE_LOG_MAX || + s->cell_log > STAB_SIZE_LOG_MAX) { + NL_SET_ERR_MSG(extack, "Invalid logarithmic size of size table"); + return ERR_PTR(-EINVAL); + } + stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL); if (!stab) return ERR_PTR(-ENOMEM); @@ -1076,10 +1082,10 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, skip: if (!ingress) { notify_and_destroy(net, skb, n, classid, - dev->qdisc, new); + rtnl_dereference(dev->qdisc), new); if (new && !new->ops->attach) qdisc_refcount_inc(new); - dev->qdisc = new ? : &noop_qdisc; + rcu_assign_pointer(dev->qdisc, new ? : &noop_qdisc); if (new && new->ops->attach) new->ops->attach(new); @@ -1198,7 +1204,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev, err = -ENOENT; if (!ops) { - NL_SET_ERR_MSG(extack, "Specified qdisc not found"); + NL_SET_ERR_MSG(extack, "Specified qdisc kind is unknown"); goto err_out; } @@ -1454,7 +1460,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, q = dev_ingress_queue(dev)->qdisc_sleeping; } } else { - q = dev->qdisc; + q = rtnl_dereference(dev->qdisc); } if (!q) { NL_SET_ERR_MSG(extack, "Cannot find specified qdisc on specified device"); @@ -1543,7 +1549,7 @@ replay: q = dev_ingress_queue(dev)->qdisc_sleeping; } } else { - q = dev->qdisc; + q = rtnl_dereference(dev->qdisc); } /* It may be default qdisc, ignore it */ @@ -1765,7 +1771,8 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) s_q_idx = 0; q_idx = 0; - if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx, + if (tc_dump_qdisc_root(rtnl_dereference(dev->qdisc), + skb, cb, &q_idx, s_q_idx, true, tca[TCA_DUMP_INVISIBLE]) < 0) goto done; @@ -2041,7 +2048,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, } else if (qid1) { qid = qid1; } else if (qid == 0) - qid = dev->qdisc->handle; + qid = rtnl_dereference(dev->qdisc)->handle; /* Now qid is genuine qdisc handle consistent * both with parent and child. @@ -2052,7 +2059,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, portid = TC_H_MAKE(qid, portid); } else { if (qid == 0) - qid = dev->qdisc->handle; + qid = rtnl_dereference(dev->qdisc)->handle; } /* OK. Locate qdisc */ @@ -2213,7 +2220,8 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) s_t = cb->args[0]; t = 0; - if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t, true) < 0) + if (tc_dump_tclass_root(rtnl_dereference(dev->qdisc), + skb, tcm, cb, &t, s_t, true) < 0) goto done; dev_queue = dev_ingress_queue(dev); diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index c2c37ffd94f220fcfb35cbcf066a7ae25b657f38..c580139fcedecc0958b23abe32d6aede34effdf1 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -2736,7 +2736,7 @@ static int cake_init(struct Qdisc *sch, struct nlattr *opt, q->tins = kvcalloc(CAKE_MAX_TINS, sizeof(struct cake_tin_data), GFP_KERNEL); if (!q->tins) - goto nomem; + return -ENOMEM; for (i = 0; i < CAKE_MAX_TINS; i++) { struct cake_tin_data *b = q->tins + i; @@ -2766,10 +2766,6 @@ static int cake_init(struct Qdisc *sch, struct nlattr *opt, q->min_netlen = ~0; q->min_adjlen = ~0; return 0; - -nomem: - cake_destroy(sch); - return -ENOMEM; } static int cake_dump(struct Qdisc *sch, struct sk_buff *skb) diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c index c76701ac35abf58e8aa1aa0aa10f2406801e4d00..9c224872ef0355e2b4d128c1924cb242106e31c0 100644 --- a/net/sched/sch_ets.c +++ b/net/sched/sch_ets.c @@ -667,12 +667,14 @@ static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt, q->classes[i].deficit = quanta[i]; } } + for (i = q->nbands; i < oldbands; i++) { + if (i >= q->nstrict && q->classes[i].qdisc->q.qlen) + list_del(&q->classes[i].alist); + qdisc_tree_flush_backlog(q->classes[i].qdisc); + } q->nstrict = nstrict; memcpy(q->prio2band, priomap, sizeof(priomap)); - for (i = q->nbands; i < oldbands; i++) - qdisc_tree_flush_backlog(q->classes[i].qdisc); - for (i = 0; i < q->nbands; i++) q->classes[i].quantum = quanta[i]; diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c index a579a4131d22d771c9766f5ad6cdb16ece3034c0..e1040421b79797fefaa26b8d7d3f44b91896e1de 100644 --- a/net/sched/sch_fifo.c +++ b/net/sched/sch_fifo.c @@ -233,6 +233,9 @@ int fifo_set_limit(struct Qdisc *q, unsigned int limit) if (strncmp(q->ops->id + 1, "fifo", 4) != 0) return 0; + if (!q->ops->change) + return 0; + nla = kmalloc(nla_attr_size(sizeof(struct tc_fifo_qopt)), GFP_KERNEL); if (nla) { nla->nla_type = RTM_NEWQDISC; diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c index cac684952edc55712197b307536fdc9ad3c7e064..c70802785518f4b33cbee68bbde563af60226f8c 100644 --- a/net/sched/sch_fq_pie.c +++ b/net/sched/sch_fq_pie.c @@ -531,6 +531,7 @@ static void fq_pie_destroy(struct Qdisc *sch) struct fq_pie_sched_data *q = qdisc_priv(sch); tcf_block_put(q->block); + q->p_params.tupdate = 0; del_timer_sync(&q->adapt_timer); kvfree(q->flows); } diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 05aa2571a4095520eab57a7c75da0ab57a21ed78..5d5391adb667cb508aaf3d7eec0a5efb990c52e9 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -1088,30 +1088,33 @@ static void attach_default_qdiscs(struct net_device *dev) if (!netif_is_multiqueue(dev) || dev->priv_flags & IFF_NO_QUEUE) { netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL); - dev->qdisc = txq->qdisc_sleeping; - qdisc_refcount_inc(dev->qdisc); + qdisc = txq->qdisc_sleeping; + rcu_assign_pointer(dev->qdisc, qdisc); + qdisc_refcount_inc(qdisc); } else { qdisc = qdisc_create_dflt(txq, &mq_qdisc_ops, TC_H_ROOT, NULL); if (qdisc) { - dev->qdisc = qdisc; + rcu_assign_pointer(dev->qdisc, qdisc); qdisc->ops->attach(qdisc); } } + qdisc = rtnl_dereference(dev->qdisc); /* Detect default qdisc setup/init failed and fallback to "noqueue" */ - if (dev->qdisc == &noop_qdisc) { + if (qdisc == &noop_qdisc) { netdev_warn(dev, "default qdisc (%s) fail, fallback to %s\n", default_qdisc_ops->id, noqueue_qdisc_ops.id); dev->priv_flags |= IFF_NO_QUEUE; netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL); - dev->qdisc = txq->qdisc_sleeping; - qdisc_refcount_inc(dev->qdisc); + qdisc = txq->qdisc_sleeping; + rcu_assign_pointer(dev->qdisc, qdisc); + qdisc_refcount_inc(qdisc); dev->priv_flags ^= IFF_NO_QUEUE; } #ifdef CONFIG_NET_SCHED - if (dev->qdisc != &noop_qdisc) - qdisc_hash_add(dev->qdisc, false); + if (qdisc != &noop_qdisc) + qdisc_hash_add(qdisc, false); #endif } @@ -1141,7 +1144,7 @@ void dev_activate(struct net_device *dev) * and noqueue_qdisc for virtual interfaces */ - if (dev->qdisc == &noop_qdisc) + if (rtnl_dereference(dev->qdisc) == &noop_qdisc) attach_default_qdiscs(dev); if (!netif_carrier_ok(dev)) @@ -1303,6 +1306,15 @@ static int qdisc_change_tx_queue_len(struct net_device *dev, return 0; } +void dev_qdisc_change_real_num_tx(struct net_device *dev, + unsigned int new_real_tx) +{ + struct Qdisc *qdisc = rtnl_dereference(dev->qdisc); + + if (qdisc->ops->change_real_num_tx) + qdisc->ops->change_real_num_tx(qdisc, new_real_tx); +} + int dev_qdisc_change_tx_queue_len(struct net_device *dev) { bool up = dev->flags & IFF_UP; @@ -1337,7 +1349,7 @@ static void dev_init_scheduler_queue(struct net_device *dev, void dev_init_scheduler(struct net_device *dev) { - dev->qdisc = &noop_qdisc; + rcu_assign_pointer(dev->qdisc, &noop_qdisc); netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc); if (dev_ingress_queue(dev)) dev_init_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc); @@ -1365,8 +1377,8 @@ void dev_shutdown(struct net_device *dev) netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc); if (dev_ingress_queue(dev)) shutdown_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc); - qdisc_put(dev->qdisc); - dev->qdisc = &noop_qdisc; + qdisc_put(rtnl_dereference(dev->qdisc)); + rcu_assign_pointer(dev->qdisc, &noop_qdisc); WARN_ON(timer_pending(&dev->watchdog_timer)); } @@ -1377,6 +1389,7 @@ void psched_ratecfg_precompute(struct psched_ratecfg *r, { memset(r, 0, sizeof(*r)); r->overhead = conf->overhead; + r->mpu = conf->mpu; r->rate_bytes_ps = max_t(u64, conf->rate, rate64); r->linklayer = (conf->linklayer & TC_LINKLAYER_MASK); r->mult = 1; diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c index e79f1afe0cfd6f55c2f0fdecf9c1e70bad0961e9..db18d8a860f9cc50608afe47d1b0fa9996295505 100644 --- a/net/sched/sch_mq.c +++ b/net/sched/sch_mq.c @@ -125,6 +125,29 @@ static void mq_attach(struct Qdisc *sch) priv->qdiscs = NULL; } +static void mq_change_real_num_tx(struct Qdisc *sch, unsigned int new_real_tx) +{ +#ifdef CONFIG_NET_SCHED + struct net_device *dev = qdisc_dev(sch); + struct Qdisc *qdisc; + unsigned int i; + + for (i = new_real_tx; i < dev->real_num_tx_queues; i++) { + qdisc = netdev_get_tx_queue(dev, i)->qdisc_sleeping; + /* Only update the default qdiscs we created, + * qdiscs with handles are always hashed. + */ + if (qdisc != &noop_qdisc && !qdisc->handle) + qdisc_hash_del(qdisc); + } + for (i = dev->real_num_tx_queues; i < new_real_tx; i++) { + qdisc = netdev_get_tx_queue(dev, i)->qdisc_sleeping; + if (qdisc != &noop_qdisc && !qdisc->handle) + qdisc_hash_add(qdisc, false); + } +#endif +} + static int mq_dump(struct Qdisc *sch, struct sk_buff *skb) { struct net_device *dev = qdisc_dev(sch); @@ -288,6 +311,7 @@ struct Qdisc_ops mq_qdisc_ops __read_mostly = { .init = mq_init, .destroy = mq_destroy, .attach = mq_attach, + .change_real_num_tx = mq_change_real_num_tx, .dump = mq_dump, .owner = THIS_MODULE, }; diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index 8766ab5b8788042df03b0b8cdb9e35734ef1a8a7..50e15add6068f366cd788abbf7f9184d6f141a82 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -306,6 +306,28 @@ static void mqprio_attach(struct Qdisc *sch) priv->qdiscs = NULL; } +static void mqprio_change_real_num_tx(struct Qdisc *sch, + unsigned int new_real_tx) +{ + struct net_device *dev = qdisc_dev(sch); + struct Qdisc *qdisc; + unsigned int i; + + for (i = new_real_tx; i < dev->real_num_tx_queues; i++) { + qdisc = netdev_get_tx_queue(dev, i)->qdisc_sleeping; + /* Only update the default qdiscs we created, + * qdiscs with handles are always hashed. + */ + if (qdisc != &noop_qdisc && !qdisc->handle) + qdisc_hash_del(qdisc); + } + for (i = dev->real_num_tx_queues; i < new_real_tx; i++) { + qdisc = netdev_get_tx_queue(dev, i)->qdisc_sleeping; + if (qdisc != &noop_qdisc && !qdisc->handle) + qdisc_hash_add(qdisc, false); + } +} + static struct netdev_queue *mqprio_queue_get(struct Qdisc *sch, unsigned long cl) { @@ -529,22 +551,28 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, for (i = tc.offset; i < tc.offset + tc.count; i++) { struct netdev_queue *q = netdev_get_tx_queue(dev, i); struct Qdisc *qdisc = rtnl_dereference(q->qdisc); - struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL; - struct gnet_stats_queue __percpu *cpu_qstats = NULL; spin_lock_bh(qdisc_lock(qdisc)); + if (qdisc_is_percpu_stats(qdisc)) { - cpu_bstats = qdisc->cpu_bstats; - cpu_qstats = qdisc->cpu_qstats; + qlen = qdisc_qlen_sum(qdisc); + + __gnet_stats_copy_basic(NULL, &bstats, + qdisc->cpu_bstats, + &qdisc->bstats); + __gnet_stats_copy_queue(&qstats, + qdisc->cpu_qstats, + &qdisc->qstats, + qlen); + } else { + qlen += qdisc->q.qlen; + bstats.bytes += qdisc->bstats.bytes; + bstats.packets += qdisc->bstats.packets; + qstats.backlog += qdisc->qstats.backlog; + qstats.drops += qdisc->qstats.drops; + qstats.requeues += qdisc->qstats.requeues; + qstats.overlimits += qdisc->qstats.overlimits; } - - qlen = qdisc_qlen_sum(qdisc); - __gnet_stats_copy_basic(NULL, &sch->bstats, - cpu_bstats, &qdisc->bstats); - __gnet_stats_copy_queue(&sch->qstats, - cpu_qstats, - &qdisc->qstats, - qlen); spin_unlock_bh(qdisc_lock(qdisc)); } @@ -623,6 +651,7 @@ static struct Qdisc_ops mqprio_qdisc_ops __read_mostly = { .init = mqprio_init, .destroy = mqprio_destroy, .attach = mqprio_attach, + .change_real_num_tx = mqprio_change_real_num_tx, .dump = mqprio_dump, .owner = THIS_MODULE, }; diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index ade2d6ddc91483a51e3e3057f51b1e1e75707dd1..af8c63a9ec18c53cdca0ac0bee6ff97af58996ec 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -1421,10 +1421,8 @@ static int qfq_init_qdisc(struct Qdisc *sch, struct nlattr *opt, if (err < 0) return err; - if (qdisc_dev(sch)->tx_queue_len + 1 > QFQ_MAX_AGG_CLASSES) - max_classes = QFQ_MAX_AGG_CLASSES; - else - max_classes = qdisc_dev(sch)->tx_queue_len + 1; + max_classes = min_t(u64, (u64)qdisc_dev(sch)->tx_queue_len + 1, + QFQ_MAX_AGG_CLASSES); /* max_cl_shift = floor(log_2(max_classes)) */ max_cl_shift = __fls(max_classes); q->max_agg_classes = 1<base_time); } -static ktime_t taprio_get_time(struct taprio_sched *q) +static ktime_t taprio_mono_to_any(const struct taprio_sched *q, ktime_t mono) { - ktime_t mono = ktime_get(); + /* This pairs with WRITE_ONCE() in taprio_parse_clockid() */ + enum tk_offsets tk_offset = READ_ONCE(q->tk_offset); - switch (q->tk_offset) { + switch (tk_offset) { case TK_OFFS_MAX: return mono; default: - return ktime_mono_to_any(mono, q->tk_offset); + return ktime_mono_to_any(mono, tk_offset); } +} - return KTIME_MAX; +static ktime_t taprio_get_time(const struct taprio_sched *q) +{ + return taprio_mono_to_any(q, ktime_get()); } static void taprio_free_sched_cb(struct rcu_head *head) @@ -321,7 +325,7 @@ static ktime_t get_tcp_tstamp(struct taprio_sched *q, struct sk_buff *skb) return 0; } - return ktime_mono_to_any(skb->skb_mstamp_ns, q->tk_offset); + return taprio_mono_to_any(q, skb->skb_mstamp_ns); } /* There are a few scenarios where we will have to modify the txtime from @@ -1341,6 +1345,7 @@ static int taprio_parse_clockid(struct Qdisc *sch, struct nlattr **tb, } } else if (tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) { int clockid = nla_get_s32(tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]); + enum tk_offsets tk_offset; /* We only support static clockids and we don't allow * for it to be modified after the first init. @@ -1355,22 +1360,24 @@ static int taprio_parse_clockid(struct Qdisc *sch, struct nlattr **tb, switch (clockid) { case CLOCK_REALTIME: - q->tk_offset = TK_OFFS_REAL; + tk_offset = TK_OFFS_REAL; break; case CLOCK_MONOTONIC: - q->tk_offset = TK_OFFS_MAX; + tk_offset = TK_OFFS_MAX; break; case CLOCK_BOOTTIME: - q->tk_offset = TK_OFFS_BOOT; + tk_offset = TK_OFFS_BOOT; break; case CLOCK_TAI: - q->tk_offset = TK_OFFS_TAI; + tk_offset = TK_OFFS_TAI; break; default: NL_SET_ERR_MSG(extack, "Invalid 'clockid'"); err = -EINVAL; goto out; } + /* This pairs with READ_ONCE() in taprio_mono_to_any */ + WRITE_ONCE(q->tk_offset, tk_offset); q->clockid = clockid; } else { @@ -1630,6 +1637,10 @@ static void taprio_destroy(struct Qdisc *sch) list_del(&q->taprio_list); spin_unlock(&taprio_list_lock); + /* Note that taprio_reset() might not be called if an error + * happens in qdisc_create(), after taprio_init() has been called. + */ + hrtimer_cancel(&q->advance_timer); taprio_disable_offload(dev, q, NULL); diff --git a/net/sctp/diag.c b/net/sctp/diag.c index 493fc01e5d2b7c6c5a00918bd8bd4baf80dde563..68ff82ff49a3dc0373b1992618f606cf694a93d0 100644 --- a/net/sctp/diag.c +++ b/net/sctp/diag.c @@ -61,10 +61,6 @@ static void inet_diag_msg_sctpasoc_fill(struct inet_diag_msg *r, r->idiag_timer = SCTP_EVENT_TIMEOUT_T3_RTX; r->idiag_retrans = asoc->rtx_data_chunks; r->idiag_expires = jiffies_to_msecs(t3_rtx->expires - jiffies); - } else { - r->idiag_timer = 0; - r->idiag_retrans = 0; - r->idiag_expires = 0; } } @@ -144,13 +140,14 @@ static int inet_sctp_diag_fill(struct sock *sk, struct sctp_association *asoc, r = nlmsg_data(nlh); BUG_ON(!sk_fullsock(sk)); + r->idiag_timer = 0; + r->idiag_retrans = 0; + r->idiag_expires = 0; if (asoc) { inet_diag_msg_sctpasoc_fill(r, sk, asoc); } else { inet_diag_msg_common_fill(r, sk); r->idiag_state = sk->sk_state; - r->idiag_timer = 0; - r->idiag_retrans = 0; } if (inet_diag_msg_attrs_fill(sk, skb, r, ext, user_ns, net_admin)) @@ -292,9 +289,8 @@ out: return err; } -static int sctp_sock_dump(struct sctp_transport *tsp, void *p) +static int sctp_sock_dump(struct sctp_endpoint *ep, struct sctp_transport *tsp, void *p) { - struct sctp_endpoint *ep = tsp->asoc->ep; struct sctp_comm_param *commp = p; struct sock *sk = ep->base.sk; struct sk_buff *skb = commp->skb; @@ -304,6 +300,8 @@ static int sctp_sock_dump(struct sctp_transport *tsp, void *p) int err = 0; lock_sock(sk); + if (ep != tsp->asoc->ep) + goto release; list_for_each_entry(assoc, &ep->asocs, asocs) { if (cb->args[4] < cb->args[1]) goto next; @@ -346,9 +344,8 @@ release: return err; } -static int sctp_sock_filter(struct sctp_transport *tsp, void *p) +static int sctp_sock_filter(struct sctp_endpoint *ep, struct sctp_transport *tsp, void *p) { - struct sctp_endpoint *ep = tsp->asoc->ep; struct sctp_comm_param *commp = p; struct sock *sk = ep->base.sk; const struct inet_diag_req_v2 *r = commp->r; @@ -507,8 +504,8 @@ skip: if (!(idiag_states & ~(TCPF_LISTEN | TCPF_CLOSE))) goto done; - sctp_for_each_transport(sctp_sock_filter, sctp_sock_dump, - net, &pos, &commp); + sctp_transport_traverse_process(sctp_sock_filter, sctp_sock_dump, + net, &pos, &commp); cb->args[2] = pos; done: diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 48c9c2c7602f78a099be61c93fe170ed3fc13574..efffde7f2328e4e954bbcf8c1942fb650a590166 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -184,6 +184,18 @@ void sctp_endpoint_free(struct sctp_endpoint *ep) } /* Final destructor for endpoint. */ +static void sctp_endpoint_destroy_rcu(struct rcu_head *head) +{ + struct sctp_endpoint *ep = container_of(head, struct sctp_endpoint, rcu); + struct sock *sk = ep->base.sk; + + sctp_sk(sk)->ep = NULL; + sock_put(sk); + + kfree(ep); + SCTP_DBG_OBJCNT_DEC(ep); +} + static void sctp_endpoint_destroy(struct sctp_endpoint *ep) { struct sock *sk; @@ -213,18 +225,13 @@ static void sctp_endpoint_destroy(struct sctp_endpoint *ep) if (sctp_sk(sk)->bind_hash) sctp_put_port(sk); - sctp_sk(sk)->ep = NULL; - /* Give up our hold on the sock */ - sock_put(sk); - - kfree(ep); - SCTP_DBG_OBJCNT_DEC(ep); + call_rcu(&ep->rcu, sctp_endpoint_destroy_rcu); } /* Hold a reference to an endpoint. */ -void sctp_endpoint_hold(struct sctp_endpoint *ep) +int sctp_endpoint_hold(struct sctp_endpoint *ep) { - refcount_inc(&ep->base.refcnt); + return refcount_inc_not_zero(&ep->base.refcnt); } /* Release a reference to an endpoint and clean up if there are diff --git a/net/sctp/input.c b/net/sctp/input.c index 49c49a4d203f0b68f55eb2864c15b70abbf661b3..34494a0b28bd0c911884e70e324156b73ba11c77 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -677,7 +677,7 @@ static int sctp_rcv_ootb(struct sk_buff *skb) ch = skb_header_pointer(skb, offset, sizeof(*ch), &_ch); /* Break out if chunk length is less then minimal. */ - if (ntohs(ch->length) < sizeof(_ch)) + if (!ch || ntohs(ch->length) < sizeof(_ch)) break; ch_end = offset + SCTP_PAD4(ntohs(ch->length)); diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index fa0d96320baae70f876776dc91b0cccf2f706409..64e0f4864b73373a9395b570652a745b2f03b106 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -3651,7 +3651,7 @@ struct sctp_chunk *sctp_make_strreset_req( outlen = (sizeof(outreq) + stream_len) * out; inlen = (sizeof(inreq) + stream_len) * in; - retval = sctp_make_reconf(asoc, outlen + inlen); + retval = sctp_make_reconf(asoc, SCTP_PAD4(outlen) + SCTP_PAD4(inlen)); if (!retval) return NULL; diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index b65bdaa84228f6dad3a27817d895267abe21d10d..ee0b2b03657ca5b8e9fa637c2992c4af1592d3de 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -149,6 +149,12 @@ static enum sctp_disposition __sctp_sf_do_9_1_abort( void *arg, struct sctp_cmd_seq *commands); +static enum sctp_disposition +__sctp_sf_do_9_2_reshutack(struct net *net, const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, void *arg, + struct sctp_cmd_seq *commands); + /* Small helper function that checks if the chunk length * is of the appropriate length. The 'required_length' argument * is set to be the size of a specific chunk we are testing. @@ -330,6 +336,14 @@ enum sctp_disposition sctp_sf_do_5_1B_init(struct net *net, if (!chunk->singleton) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + /* Make sure that the INIT chunk has a valid length. + * Normally, this would cause an ABORT with a Protocol Violation + * error, but since we don't have an association, we'll + * just discard the packet. + */ + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_init_chunk))) + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + /* If the packet is an OOTB packet which is temporarily on the * control endpoint, respond with an ABORT. */ @@ -344,14 +358,6 @@ enum sctp_disposition sctp_sf_do_5_1B_init(struct net *net, if (chunk->sctp_hdr->vtag != 0) return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands); - /* Make sure that the INIT chunk has a valid length. - * Normally, this would cause an ABORT with a Protocol Violation - * error, but since we don't have an association, we'll - * just discard the packet. - */ - if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_init_chunk))) - return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); - /* If the INIT is coming toward a closing socket, we'll send back * and ABORT. Essentially, this catches the race of INIT being * backloged to the socket at the same time as the user isses close(). @@ -697,6 +703,9 @@ enum sctp_disposition sctp_sf_do_5_1D_ce(struct net *net, struct sock *sk; int error = 0; + if (asoc && !sctp_vtag_verify(chunk, asoc)) + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + /* If the packet is an OOTB packet which is temporarily on the * control endpoint, respond with an ABORT. */ @@ -711,7 +720,8 @@ enum sctp_disposition sctp_sf_do_5_1D_ce(struct net *net, * in sctp_unpack_cookie(). */ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr))) - return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, + commands); /* If the endpoint is not listening or if the number of associations * on the TCP-style socket exceed the max backlog, respond with an @@ -1480,19 +1490,16 @@ static enum sctp_disposition sctp_sf_do_unexpected_init( if (!chunk->singleton) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + /* Make sure that the INIT chunk has a valid length. */ + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_init_chunk))) + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + /* 3.1 A packet containing an INIT chunk MUST have a zero Verification * Tag. */ if (chunk->sctp_hdr->vtag != 0) return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands); - /* Make sure that the INIT chunk has a valid length. - * In this case, we generate a protocol violation since we have - * an association established. - */ - if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_init_chunk))) - return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, - commands); /* Grab the INIT header. */ chunk->subh.init_hdr = (struct sctp_inithdr *)chunk->skb->data; @@ -1810,9 +1817,9 @@ static enum sctp_disposition sctp_sf_do_dupcook_a( * its peer. */ if (sctp_state(asoc, SHUTDOWN_ACK_SENT)) { - disposition = sctp_sf_do_9_2_reshutack(net, ep, asoc, - SCTP_ST_CHUNK(chunk->chunk_hdr->type), - chunk, commands); + disposition = __sctp_sf_do_9_2_reshutack(net, ep, asoc, + SCTP_ST_CHUNK(chunk->chunk_hdr->type), + chunk, commands); if (SCTP_DISPOSITION_NOMEM == disposition) goto nomem; @@ -2141,9 +2148,11 @@ enum sctp_disposition sctp_sf_do_5_2_4_dupcook( * enough for the chunk header. Cookie length verification is * done later. */ - if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr))) - return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, - commands); + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr))) { + if (!sctp_vtag_verify(chunk, asoc)) + asoc = NULL; + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); + } /* "Decode" the chunk. We have no optional parameters so we * are in good shape. @@ -2280,7 +2289,7 @@ enum sctp_disposition sctp_sf_shutdown_pending_abort( */ if (SCTP_ADDR_DEL == sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest)) - return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); if (!sctp_err_chunk_valid(chunk)) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); @@ -2326,7 +2335,7 @@ enum sctp_disposition sctp_sf_shutdown_sent_abort( */ if (SCTP_ADDR_DEL == sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest)) - return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); if (!sctp_err_chunk_valid(chunk)) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); @@ -2596,7 +2605,7 @@ enum sctp_disposition sctp_sf_do_9_1_abort( */ if (SCTP_ADDR_DEL == sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest)) - return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); if (!sctp_err_chunk_valid(chunk)) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); @@ -2909,13 +2918,11 @@ enum sctp_disposition sctp_sf_do_9_2_shut_ctsn( * that belong to this association, it should discard the INIT chunk and * retransmit the SHUTDOWN ACK chunk. */ -enum sctp_disposition sctp_sf_do_9_2_reshutack( - struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const union sctp_subtype type, - void *arg, - struct sctp_cmd_seq *commands) +static enum sctp_disposition +__sctp_sf_do_9_2_reshutack(struct net *net, const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, void *arg, + struct sctp_cmd_seq *commands) { struct sctp_chunk *chunk = arg; struct sctp_chunk *reply; @@ -2949,6 +2956,26 @@ nomem: return SCTP_DISPOSITION_NOMEM; } +enum sctp_disposition +sctp_sf_do_9_2_reshutack(struct net *net, const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, void *arg, + struct sctp_cmd_seq *commands) +{ + struct sctp_chunk *chunk = arg; + + if (!chunk->singleton) + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_init_chunk))) + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + + if (chunk->sctp_hdr->vtag != 0) + return sctp_sf_tabort_8_4_8(net, ep, asoc, type, arg, commands); + + return __sctp_sf_do_9_2_reshutack(net, ep, asoc, type, arg, commands); +} + /* * sctp_sf_do_ecn_cwr * @@ -3562,6 +3589,9 @@ enum sctp_disposition sctp_sf_ootb(struct net *net, SCTP_INC_STATS(net, SCTP_MIB_OUTOFBLUES); + if (asoc && !sctp_vtag_verify(chunk, asoc)) + asoc = NULL; + ch = (struct sctp_chunkhdr *)chunk->chunk_hdr; do { /* Report violation if the chunk is less then minimal */ @@ -3677,12 +3707,6 @@ static enum sctp_disposition sctp_sf_shut_8_4_5( SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS); - /* If the chunk length is invalid, we don't want to process - * the reset of the packet. - */ - if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr))) - return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); - /* We need to discard the rest of the packet to prevent * potential bomming attacks from additional bundled chunks. * This is documented in SCTP Threats ID. @@ -3710,6 +3734,9 @@ enum sctp_disposition sctp_sf_do_8_5_1_E_sa(struct net *net, { struct sctp_chunk *chunk = arg; + if (!sctp_vtag_verify(chunk, asoc)) + asoc = NULL; + /* Make sure that the SHUTDOWN_ACK chunk has a valid length. */ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr))) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, @@ -3745,6 +3772,11 @@ enum sctp_disposition sctp_sf_do_asconf(struct net *net, return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); } + /* Make sure that the ASCONF ADDIP chunk has a valid length. */ + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_addip_chunk))) + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, + commands); + /* ADD-IP: Section 4.1.1 * This chunk MUST be sent in an authenticated way by using * the mechanism defined in [I-D.ietf-tsvwg-sctp-auth]. If this chunk @@ -3753,13 +3785,7 @@ enum sctp_disposition sctp_sf_do_asconf(struct net *net, */ if (!asoc->peer.asconf_capable || (!net->sctp.addip_noauth && !chunk->auth)) - return sctp_sf_discard_chunk(net, ep, asoc, type, arg, - commands); - - /* Make sure that the ASCONF ADDIP chunk has a valid length. */ - if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_addip_chunk))) - return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, - commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); hdr = (struct sctp_addiphdr *)chunk->skb->data; serial = ntohl(hdr->serial); @@ -3888,6 +3914,12 @@ enum sctp_disposition sctp_sf_do_asconf_ack(struct net *net, return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); } + /* Make sure that the ADDIP chunk has a valid length. */ + if (!sctp_chunk_length_valid(asconf_ack, + sizeof(struct sctp_addip_chunk))) + return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, + commands); + /* ADD-IP, Section 4.1.2: * This chunk MUST be sent in an authenticated way by using * the mechanism defined in [I-D.ietf-tsvwg-sctp-auth]. If this chunk @@ -3896,14 +3928,7 @@ enum sctp_disposition sctp_sf_do_asconf_ack(struct net *net, */ if (!asoc->peer.asconf_capable || (!net->sctp.addip_noauth && !asconf_ack->auth)) - return sctp_sf_discard_chunk(net, ep, asoc, type, arg, - commands); - - /* Make sure that the ADDIP chunk has a valid length. */ - if (!sctp_chunk_length_valid(asconf_ack, - sizeof(struct sctp_addip_chunk))) - return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, - commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); addip_hdr = (struct sctp_addiphdr *)asconf_ack->skb->data; rcvd_serial = ntohl(addip_hdr->serial); @@ -4475,6 +4500,9 @@ enum sctp_disposition sctp_sf_discard_chunk(struct net *net, { struct sctp_chunk *chunk = arg; + if (asoc && !sctp_vtag_verify(chunk, asoc)) + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + /* Make sure that the chunk has a valid length. * Since we don't know the chunk type, we use a general * chunkhdr structure to make a comparison. @@ -4542,6 +4570,9 @@ enum sctp_disposition sctp_sf_violation(struct net *net, { struct sctp_chunk *chunk = arg; + if (!sctp_vtag_verify(chunk, asoc)) + return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + /* Make sure that the chunk has a valid length. */ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr))) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, @@ -6248,6 +6279,7 @@ static struct sctp_packet *sctp_ootb_pkt_new( * yet. */ switch (chunk->chunk_hdr->type) { + case SCTP_CID_INIT: case SCTP_CID_INIT_ACK: { struct sctp_initack_chunk *initack; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index f87c82378872a13949d492d3b605c33d8391af85..89ce8764422be7e80bf9acde9322b31c52f5345c 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -5228,11 +5228,12 @@ int sctp_transport_lookup_process(int (*cb)(struct sctp_transport *, void *), } EXPORT_SYMBOL_GPL(sctp_transport_lookup_process); -int sctp_for_each_transport(int (*cb)(struct sctp_transport *, void *), - int (*cb_done)(struct sctp_transport *, void *), - struct net *net, int *pos, void *p) { +int sctp_transport_traverse_process(sctp_callback_t cb, sctp_callback_t cb_done, + struct net *net, int *pos, void *p) +{ struct rhashtable_iter hti; struct sctp_transport *tsp; + struct sctp_endpoint *ep; int ret; again: @@ -5241,26 +5242,32 @@ again: tsp = sctp_transport_get_idx(net, &hti, *pos + 1); for (; !IS_ERR_OR_NULL(tsp); tsp = sctp_transport_get_next(net, &hti)) { - ret = cb(tsp, p); - if (ret) - break; + ep = tsp->asoc->ep; + if (sctp_endpoint_hold(ep)) { /* asoc can be peeled off */ + ret = cb(ep, tsp, p); + if (ret) + break; + sctp_endpoint_put(ep); + } (*pos)++; sctp_transport_put(tsp); } sctp_transport_walk_stop(&hti); if (ret) { - if (cb_done && !cb_done(tsp, p)) { + if (cb_done && !cb_done(ep, tsp, p)) { (*pos)++; + sctp_endpoint_put(ep); sctp_transport_put(tsp); goto again; } + sctp_endpoint_put(ep); sctp_transport_put(tsp); } return ret; } -EXPORT_SYMBOL_GPL(sctp_for_each_transport); +EXPORT_SYMBOL_GPL(sctp_transport_traverse_process); /* 7.2.1 Association Status (SCTP_STATUS) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 030d7f30b13fe0bcfed8c87ecb260abad5c9e15e..4f16d406ad8ea9e1716ee26ca826c13c2cf876df 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -146,14 +146,18 @@ static int __smc_release(struct smc_sock *smc) sock_set_flag(sk, SOCK_DEAD); sk->sk_shutdown |= SHUTDOWN_MASK; } else { - if (sk->sk_state != SMC_LISTEN && sk->sk_state != SMC_INIT) - sock_put(sk); /* passive closing */ - if (sk->sk_state == SMC_LISTEN) { - /* wake up clcsock accept */ - rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR); + if (sk->sk_state != SMC_CLOSED) { + if (sk->sk_state != SMC_LISTEN && + sk->sk_state != SMC_INIT) + sock_put(sk); /* passive closing */ + if (sk->sk_state == SMC_LISTEN) { + /* wake up clcsock accept */ + rc = kernel_sock_shutdown(smc->clcsock, + SHUT_RDWR); + } + sk->sk_state = SMC_CLOSED; + sk->sk_state_change(sk); } - sk->sk_state = SMC_CLOSED; - sk->sk_state_change(sk); smc_restore_fallback_changes(smc); } @@ -176,7 +180,7 @@ static int smc_release(struct socket *sock) { struct sock *sk = sock->sk; struct smc_sock *smc; - int rc = 0; + int old_state, rc = 0; if (!sk) goto out; @@ -184,10 +188,14 @@ static int smc_release(struct socket *sock) sock_hold(sk); /* sock_put below */ smc = smc_sk(sk); + old_state = sk->sk_state; + /* cleanup for a dangling non-blocking connect */ - if (smc->connect_nonblock && sk->sk_state == SMC_INIT) + if (smc->connect_nonblock && old_state == SMC_INIT) tcp_abort(smc->clcsock->sk, ECONNABORTED); - flush_work(&smc->connect_work); + + if (cancel_work_sync(&smc->connect_work)) + sock_put(&smc->sk); /* sock_hold in smc_connect for passive closing */ if (sk->sk_state == SMC_LISTEN) /* smc_close_non_accepted() is called and acquires @@ -197,6 +205,10 @@ static int smc_release(struct socket *sock) else lock_sock(sk); + if (old_state == SMC_INIT && sk->sk_state == SMC_ACTIVE && + !smc->use_fallback) + smc_close_active_abort(smc); + rc = __smc_release(smc); /* detach socket */ @@ -509,12 +521,26 @@ static void smc_link_save_peer_info(struct smc_link *link, static void smc_switch_to_fallback(struct smc_sock *smc) { + wait_queue_head_t *smc_wait = sk_sleep(&smc->sk); + wait_queue_head_t *clc_wait = sk_sleep(smc->clcsock->sk); + unsigned long flags; + smc->use_fallback = true; if (smc->sk.sk_socket && smc->sk.sk_socket->file) { smc->clcsock->file = smc->sk.sk_socket->file; smc->clcsock->file->private_data = smc->clcsock; smc->clcsock->wq.fasync_list = smc->sk.sk_socket->wq.fasync_list; + + /* There may be some entries remaining in + * smc socket->wq, which should be removed + * to clcsocket->wq during the fallback. + */ + spin_lock_irqsave(&smc_wait->lock, flags); + spin_lock_nested(&clc_wait->lock, SINGLE_DEPTH_NESTING); + list_splice_init(&smc_wait->head, &clc_wait->head); + spin_unlock(&clc_wait->lock); + spin_unlock_irqrestore(&smc_wait->lock, flags); } } @@ -1018,7 +1044,7 @@ static void smc_connect_work(struct work_struct *work) if (smc->clcsock->sk->sk_err) { smc->sk.sk_err = smc->clcsock->sk->sk_err; } else if ((1 << smc->clcsock->sk->sk_state) & - (TCPF_SYN_SENT | TCP_SYN_RECV)) { + (TCPF_SYN_SENT | TCPF_SYN_RECV)) { rc = sk_stream_wait_connect(smc->clcsock->sk, &timeo); if ((rc == -EPIPE) && ((1 << smc->clcsock->sk->sk_state) & @@ -1860,8 +1886,10 @@ static int smc_listen(struct socket *sock, int backlog) smc->clcsock->sk->sk_user_data = (void *)((uintptr_t)smc | SK_USER_DATA_NOCOPY); rc = kernel_listen(smc->clcsock, backlog); - if (rc) + if (rc) { + smc->clcsock->sk->sk_data_ready = smc->clcsk_data_ready; goto out; + } sk->sk_max_ack_backlog = backlog; sk->sk_ack_backlog = 0; sk->sk_state = SMC_LISTEN; @@ -2092,8 +2120,10 @@ static __poll_t smc_poll(struct file *file, struct socket *sock, static int smc_shutdown(struct socket *sock, int how) { struct sock *sk = sock->sk; + bool do_shutdown = true; struct smc_sock *smc; int rc = -EINVAL; + int old_state; int rc1 = 0; smc = smc_sk(sk); @@ -2120,7 +2150,11 @@ static int smc_shutdown(struct socket *sock, int how) } switch (how) { case SHUT_RDWR: /* shutdown in both directions */ + old_state = sk->sk_state; rc = smc_close_active(smc); + if (old_state == SMC_ACTIVE && + sk->sk_state == SMC_PEERCLOSEWAIT1) + do_shutdown = false; break; case SHUT_WR: rc = smc_close_shutdown_write(smc); @@ -2130,7 +2164,7 @@ static int smc_shutdown(struct socket *sock, int how) /* nothing more to do because peer is not involved */ break; } - if (smc->clcsock) + if (do_shutdown && smc->clcsock) rc1 = kernel_sock_shutdown(smc->clcsock, how); /* map sock_shutdown_cmd constants to sk_shutdown value range */ sk->sk_shutdown |= how + 1; diff --git a/net/smc/smc.h b/net/smc/smc.h index d65e15f0c944c1d599492261ac240445dfd64356..e6919fe31617b5514657a6e8719da1fe414fb501 100644 --- a/net/smc/smc.h +++ b/net/smc/smc.h @@ -170,6 +170,11 @@ struct smc_connection { u16 tx_cdc_seq; /* sequence # for CDC send */ u16 tx_cdc_seq_fin; /* sequence # - tx completed */ spinlock_t send_lock; /* protect wr_sends */ + atomic_t cdc_pend_tx_wr; /* number of pending tx CDC wqe + * - inc when post wqe, + * - dec on polled tx cqe + */ + wait_queue_head_t cdc_pend_tx_wq; /* wakeup on no cdc_pend_tx_wr*/ struct delayed_work tx_work; /* retry of smc_cdc_msg_send */ u32 tx_off; /* base offset in peer rmb */ diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index b1ce6ccbfaec8b640261e388445090d796255ab3..0c490cdde6a49f20256f09895a67e272df1f2a86 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -31,10 +31,6 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd, struct smc_sock *smc; int diff; - if (!conn) - /* already dismissed */ - return; - smc = container_of(conn, struct smc_sock, conn); bh_lock_sock(&smc->sk); if (!wc_status) { @@ -51,6 +47,12 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd, conn); conn->tx_cdc_seq_fin = cdcpend->ctrl_seq; } + + if (atomic_dec_and_test(&conn->cdc_pend_tx_wr) && + unlikely(wq_has_sleeper(&conn->cdc_pend_tx_wq))) + wake_up(&conn->cdc_pend_tx_wq); + WARN_ON(atomic_read(&conn->cdc_pend_tx_wr) < 0); + smc_tx_sndbuf_nonfull(smc); bh_unlock_sock(&smc->sk); } @@ -107,6 +109,10 @@ int smc_cdc_msg_send(struct smc_connection *conn, conn->tx_cdc_seq++; conn->local_tx_ctrl.seqno = conn->tx_cdc_seq; smc_host_msg_to_cdc((struct smc_cdc_msg *)wr_buf, conn, &cfed); + + atomic_inc(&conn->cdc_pend_tx_wr); + smp_mb__after_atomic(); /* Make sure cdc_pend_tx_wr added before post */ + rc = smc_wr_tx_send(link, (struct smc_wr_tx_pend_priv *)pend); if (!rc) { smc_curs_copy(&conn->rx_curs_confirmed, &cfed, conn); @@ -114,6 +120,7 @@ int smc_cdc_msg_send(struct smc_connection *conn, } else { conn->tx_cdc_seq--; conn->local_tx_ctrl.seqno = conn->tx_cdc_seq; + atomic_dec(&conn->cdc_pend_tx_wr); } return rc; @@ -136,7 +143,18 @@ int smcr_cdc_msg_send_validation(struct smc_connection *conn, peer->token = htonl(local->token); peer->prod_flags.failover_validation = 1; + /* We need to set pend->conn here to make sure smc_cdc_tx_handler() + * can handle properly + */ + smc_cdc_add_pending_send(conn, pend); + + atomic_inc(&conn->cdc_pend_tx_wr); + smp_mb__after_atomic(); /* Make sure cdc_pend_tx_wr added before post */ + rc = smc_wr_tx_send(link, (struct smc_wr_tx_pend_priv *)pend); + if (unlikely(rc)) + atomic_dec(&conn->cdc_pend_tx_wr); + return rc; } @@ -150,9 +168,11 @@ static int smcr_cdc_get_slot_and_msg_send(struct smc_connection *conn) again: link = conn->lnk; + if (!smc_wr_tx_link_hold(link)) + return -ENOLINK; rc = smc_cdc_get_free_slot(conn, link, &wr_buf, NULL, &pend); if (rc) - return rc; + goto put_out; spin_lock_bh(&conn->send_lock); if (link != conn->lnk) { @@ -160,6 +180,7 @@ again: spin_unlock_bh(&conn->send_lock); smc_wr_tx_put_slot(link, (struct smc_wr_tx_pend_priv *)pend); + smc_wr_tx_link_put(link); if (again) return -ENOLINK; again = true; @@ -167,6 +188,8 @@ again: } rc = smc_cdc_msg_send(conn, wr_buf, pend); spin_unlock_bh(&conn->send_lock); +put_out: + smc_wr_tx_link_put(link); return rc; } @@ -188,31 +211,9 @@ int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn) return rc; } -static bool smc_cdc_tx_filter(struct smc_wr_tx_pend_priv *tx_pend, - unsigned long data) +void smc_cdc_wait_pend_tx_wr(struct smc_connection *conn) { - struct smc_connection *conn = (struct smc_connection *)data; - struct smc_cdc_tx_pend *cdc_pend = - (struct smc_cdc_tx_pend *)tx_pend; - - return cdc_pend->conn == conn; -} - -static void smc_cdc_tx_dismisser(struct smc_wr_tx_pend_priv *tx_pend) -{ - struct smc_cdc_tx_pend *cdc_pend = - (struct smc_cdc_tx_pend *)tx_pend; - - cdc_pend->conn = NULL; -} - -void smc_cdc_tx_dismiss_slots(struct smc_connection *conn) -{ - struct smc_link *link = conn->lnk; - - smc_wr_tx_dismiss_slots(link, SMC_CDC_MSG_TYPE, - smc_cdc_tx_filter, smc_cdc_tx_dismisser, - (unsigned long)conn); + wait_event(conn->cdc_pend_tx_wq, !atomic_read(&conn->cdc_pend_tx_wr)); } /* Send a SMC-D CDC header. diff --git a/net/smc/smc_cdc.h b/net/smc/smc_cdc.h index 0a0a89abd38b29f3c290959a60a9f8832da0a9fd..696cc11f2303b95318f6750479bb8abffde3ca24 100644 --- a/net/smc/smc_cdc.h +++ b/net/smc/smc_cdc.h @@ -291,7 +291,7 @@ int smc_cdc_get_free_slot(struct smc_connection *conn, struct smc_wr_buf **wr_buf, struct smc_rdma_wr **wr_rdma_buf, struct smc_cdc_tx_pend **pend); -void smc_cdc_tx_dismiss_slots(struct smc_connection *conn); +void smc_cdc_wait_pend_tx_wr(struct smc_connection *conn); int smc_cdc_msg_send(struct smc_connection *conn, struct smc_wr_buf *wr_buf, struct smc_cdc_tx_pend *pend); int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn); diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c index 0f9ffba07d26858cfd07c53567e14700884553d4..84102db5bb31452ed1e8aec94514418e4741d464 100644 --- a/net/smc/smc_close.c +++ b/net/smc/smc_close.c @@ -195,6 +195,7 @@ int smc_close_active(struct smc_sock *smc) int old_state; long timeout; int rc = 0; + int rc1 = 0; timeout = current->flags & PF_EXITING ? 0 : sock_flag(sk, SOCK_LINGER) ? @@ -228,6 +229,15 @@ again: /* send close request */ rc = smc_close_final(conn); sk->sk_state = SMC_PEERCLOSEWAIT1; + + /* actively shutdown clcsock before peer close it, + * prevent peer from entering TIME_WAIT state. + */ + if (smc->clcsock && smc->clcsock->sk) { + rc1 = kernel_sock_shutdown(smc->clcsock, + SHUT_RDWR); + rc = rc ? rc : rc1; + } } else { /* peer event has changed the state */ goto again; diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index c491dd8e67cda3012a52d15f915e75930b67ef8a..d69aac6c1fcea555bdaecb434655f1093da8265e 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -204,18 +204,17 @@ static void smc_lgr_unregister_conn(struct smc_connection *conn) void smc_lgr_cleanup_early(struct smc_connection *conn) { struct smc_link_group *lgr = conn->lgr; - struct list_head *lgr_list; spinlock_t *lgr_lock; if (!lgr) return; smc_conn_free(conn); - lgr_list = smc_lgr_list_head(lgr, &lgr_lock); + smc_lgr_list_head(lgr, &lgr_lock); spin_lock_bh(lgr_lock); /* do not use this link group for new connections */ - if (!list_empty(lgr_list)) - list_del_init(lgr_list); + if (!list_empty(&lgr->list)) + list_del_init(&lgr->list); spin_unlock_bh(lgr_lock); __smc_lgr_terminate(lgr, true); } @@ -227,7 +226,7 @@ static void smcr_lgr_link_deactivate_all(struct smc_link_group *lgr) for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { struct smc_link *lnk = &lgr->lnk[i]; - if (smc_link_usable(lnk)) + if (smc_link_sendable(lnk)) lnk->state = SMC_LNK_INACTIVE; } wake_up_all(&lgr->llc_msg_waiter); @@ -287,13 +286,14 @@ static u8 smcr_next_link_id(struct smc_link_group *lgr) int i; while (1) { +again: link_id = ++lgr->next_link_id; if (!link_id) /* skip zero as link_id */ link_id = ++lgr->next_link_id; for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { if (smc_link_usable(&lgr->lnk[i]) && lgr->lnk[i].link_id == link_id) - continue; + goto again; } break; } @@ -550,7 +550,7 @@ struct smc_link *smc_switch_conns(struct smc_link_group *lgr, to_lnk = &lgr->lnk[i]; break; } - if (!to_lnk) { + if (!to_lnk || !smc_wr_tx_link_hold(to_lnk)) { smc_lgr_terminate_sched(lgr); return NULL; } @@ -582,24 +582,26 @@ again: read_unlock_bh(&lgr->conns_lock); /* pre-fetch buffer outside of send_lock, might sleep */ rc = smc_cdc_get_free_slot(conn, to_lnk, &wr_buf, NULL, &pend); - if (rc) { - smcr_link_down_cond_sched(to_lnk); - return NULL; - } + if (rc) + goto err_out; /* avoid race with smcr_tx_sndbuf_nonempty() */ spin_lock_bh(&conn->send_lock); conn->lnk = to_lnk; rc = smc_switch_cursor(smc, pend, wr_buf); spin_unlock_bh(&conn->send_lock); sock_put(&smc->sk); - if (rc) { - smcr_link_down_cond_sched(to_lnk); - return NULL; - } + if (rc) + goto err_out; goto again; } read_unlock_bh(&lgr->conns_lock); + smc_wr_tx_link_put(to_lnk); return to_lnk; + +err_out: + smcr_link_down_cond_sched(to_lnk); + smc_wr_tx_link_put(to_lnk); + return NULL; } static void smcr_buf_unuse(struct smc_buf_desc *rmb_desc, @@ -655,13 +657,13 @@ void smc_conn_free(struct smc_connection *conn) smc_ism_unset_conn(conn); tasklet_kill(&conn->rx_tsklet); } else { - smc_cdc_tx_dismiss_slots(conn); + smc_cdc_wait_pend_tx_wr(conn); if (current_work() != &conn->abort_work) cancel_work_sync(&conn->abort_work); } if (!list_empty(&lgr->list)) { - smc_lgr_unregister_conn(conn); smc_buf_unuse(conn, lgr); /* allow buffer reuse */ + smc_lgr_unregister_conn(conn); } if (!lgr->conns_num) @@ -732,7 +734,7 @@ void smcr_link_clear(struct smc_link *lnk, bool log) smc_llc_link_clear(lnk, log); smcr_buf_unmap_lgr(lnk); smcr_rtoken_clear_link(lnk); - smc_ib_modify_qp_reset(lnk); + smc_ib_modify_qp_error(lnk); smc_wr_free_link(lnk); smc_ib_destroy_queue_pair(lnk); smc_ib_dealloc_protection_domain(lnk); @@ -876,7 +878,7 @@ static void smc_conn_kill(struct smc_connection *conn, bool soft) else tasklet_unlock_wait(&conn->rx_tsklet); } else { - smc_cdc_tx_dismiss_slots(conn); + smc_cdc_wait_pend_tx_wr(conn); } smc_lgr_unregister_conn(conn); smc_close_active_abort(smc); @@ -1127,7 +1129,6 @@ static void smcr_link_down(struct smc_link *lnk) if (!lgr || lnk->state == SMC_LNK_UNUSED || list_empty(&lgr->list)) return; - smc_ib_modify_qp_reset(lnk); to_lnk = smc_switch_conns(lgr, lnk, true); if (!to_lnk) { /* no backup link available */ smcr_link_clear(lnk, true); @@ -1208,14 +1209,26 @@ static void smc_link_down_work(struct work_struct *work) mutex_unlock(&lgr->llc_conf_mutex); } -/* Determine vlan of internal TCP socket. - * @vlan_id: address to store the determined vlan id into - */ +static int smc_vlan_by_tcpsk_walk(struct net_device *lower_dev, + struct netdev_nested_priv *priv) +{ + unsigned short *vlan_id = (unsigned short *)priv->data; + + if (is_vlan_dev(lower_dev)) { + *vlan_id = vlan_dev_vlan_id(lower_dev); + return 1; + } + + return 0; +} + +/* Determine vlan of internal TCP socket. */ int smc_vlan_by_tcpsk(struct socket *clcsock, struct smc_init_info *ini) { struct dst_entry *dst = sk_dst_get(clcsock->sk); + struct netdev_nested_priv priv; struct net_device *ndev; - int i, nest_lvl, rc = 0; + int rc = 0; ini->vlan_id = 0; if (!dst) { @@ -1233,20 +1246,9 @@ int smc_vlan_by_tcpsk(struct socket *clcsock, struct smc_init_info *ini) goto out_rel; } + priv.data = (void *)&ini->vlan_id; rtnl_lock(); - nest_lvl = ndev->lower_level; - for (i = 0; i < nest_lvl; i++) { - struct list_head *lower = &ndev->adj_list.lower; - - if (list_empty(lower)) - break; - lower = lower->next; - ndev = (struct net_device *)netdev_lower_get_next(ndev, &lower); - if (is_vlan_dev(ndev)) { - ini->vlan_id = vlan_dev_vlan_id(ndev); - break; - } - } + netdev_walk_all_lower_dev(ndev, smc_vlan_by_tcpsk_walk, &priv); rtnl_unlock(); out_rel: @@ -1314,7 +1316,8 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini) (ini->smcd_version == SMC_V2 || lgr->vlan_id == ini->vlan_id) && (role == SMC_CLNT || ini->is_smcd || - lgr->conns_num < SMC_RMBS_PER_LGR_MAX)) { + (lgr->conns_num < SMC_RMBS_PER_LGR_MAX && + !bitmap_full(lgr->rtokens_used_mask, SMC_RMBS_PER_LGR_MAX)))) { /* link group found */ ini->first_contact_local = 0; conn->lgr = lgr; @@ -1354,6 +1357,7 @@ create: conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE; conn->local_tx_ctrl.len = SMC_WR_TX_SIZE; conn->urg_state = SMC_URG_READ; + init_waitqueue_head(&conn->cdc_pend_tx_wq); INIT_WORK(&smc->conn.abort_work, smc_conn_abort_work); if (ini->is_smcd) { conn->rx_off = sizeof(struct smcd_cdc_msg); diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index 4745a9a5a28f5d921d816bdfee0b74f33575a2d7..9364d0f35ccecf7352ce59a9ae26ef2b7d7df686 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -359,6 +359,12 @@ static inline bool smc_link_usable(struct smc_link *lnk) return true; } +static inline bool smc_link_sendable(struct smc_link *lnk) +{ + return smc_link_usable(lnk) && + lnk->qp_attr.cur_qp_state == IB_QPS_RTS; +} + static inline bool smc_link_active(struct smc_link *lnk) { return lnk->state == SMC_LNK_ACTIVE; diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index fc766b537ac7a9c2b36272c7f9b03e567b7bc6d0..f1ffbd414602ee7f9dfc42f4cfdc73151031ed80 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -100,12 +100,12 @@ int smc_ib_modify_qp_rts(struct smc_link *lnk) IB_QP_MAX_QP_RD_ATOMIC); } -int smc_ib_modify_qp_reset(struct smc_link *lnk) +int smc_ib_modify_qp_error(struct smc_link *lnk) { struct ib_qp_attr qp_attr; memset(&qp_attr, 0, sizeof(qp_attr)); - qp_attr.qp_state = IB_QPS_RESET; + qp_attr.qp_state = IB_QPS_ERR; return ib_modify_qp(lnk->roce_qp, &qp_attr, IB_QP_STATE); } diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h index 2ce481187dd0bec5bfb5a339bcac11c5eadbb553..f90d15eae2aabc73fddf57261df86b245df5ea2b 100644 --- a/net/smc/smc_ib.h +++ b/net/smc/smc_ib.h @@ -74,6 +74,7 @@ int smc_ib_create_queue_pair(struct smc_link *lnk); int smc_ib_ready_link(struct smc_link *lnk); int smc_ib_modify_qp_rts(struct smc_link *lnk); int smc_ib_modify_qp_reset(struct smc_link *lnk); +int smc_ib_modify_qp_error(struct smc_link *lnk); long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev); int smc_ib_get_memory_region(struct ib_pd *pd, int access_flags, struct smc_buf_desc *buf_slot, u8 link_idx); diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index 2e7560eba9812635c22f18f41aafc38b063dd02a..ee1f0fdba08558690b5f20e8756e9ab83f2138d9 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -383,9 +383,11 @@ int smc_llc_send_confirm_link(struct smc_link *link, struct smc_wr_buf *wr_buf; int rc; + if (!smc_wr_tx_link_hold(link)) + return -ENOLINK; rc = smc_llc_add_pending_send(link, &wr_buf, &pend); if (rc) - return rc; + goto put_out; confllc = (struct smc_llc_msg_confirm_link *)wr_buf; memset(confllc, 0, sizeof(*confllc)); confllc->hd.common.type = SMC_LLC_CONFIRM_LINK; @@ -402,6 +404,8 @@ int smc_llc_send_confirm_link(struct smc_link *link, confllc->max_links = SMC_LLC_ADD_LNK_MAX_LINKS; /* send llc message */ rc = smc_wr_tx_send(link, pend); +put_out: + smc_wr_tx_link_put(link); return rc; } @@ -415,9 +419,11 @@ static int smc_llc_send_confirm_rkey(struct smc_link *send_link, struct smc_link *link; int i, rc, rtok_ix; + if (!smc_wr_tx_link_hold(send_link)) + return -ENOLINK; rc = smc_llc_add_pending_send(send_link, &wr_buf, &pend); if (rc) - return rc; + goto put_out; rkeyllc = (struct smc_llc_msg_confirm_rkey *)wr_buf; memset(rkeyllc, 0, sizeof(*rkeyllc)); rkeyllc->hd.common.type = SMC_LLC_CONFIRM_RKEY; @@ -444,6 +450,8 @@ static int smc_llc_send_confirm_rkey(struct smc_link *send_link, (u64)sg_dma_address(rmb_desc->sgt[send_link->link_idx].sgl)); /* send llc message */ rc = smc_wr_tx_send(send_link, pend); +put_out: + smc_wr_tx_link_put(send_link); return rc; } @@ -456,9 +464,11 @@ static int smc_llc_send_delete_rkey(struct smc_link *link, struct smc_wr_buf *wr_buf; int rc; + if (!smc_wr_tx_link_hold(link)) + return -ENOLINK; rc = smc_llc_add_pending_send(link, &wr_buf, &pend); if (rc) - return rc; + goto put_out; rkeyllc = (struct smc_llc_msg_delete_rkey *)wr_buf; memset(rkeyllc, 0, sizeof(*rkeyllc)); rkeyllc->hd.common.type = SMC_LLC_DELETE_RKEY; @@ -467,6 +477,8 @@ static int smc_llc_send_delete_rkey(struct smc_link *link, rkeyllc->rkey[0] = htonl(rmb_desc->mr_rx[link->link_idx]->rkey); /* send llc message */ rc = smc_wr_tx_send(link, pend); +put_out: + smc_wr_tx_link_put(link); return rc; } @@ -480,9 +492,11 @@ int smc_llc_send_add_link(struct smc_link *link, u8 mac[], u8 gid[], struct smc_wr_buf *wr_buf; int rc; + if (!smc_wr_tx_link_hold(link)) + return -ENOLINK; rc = smc_llc_add_pending_send(link, &wr_buf, &pend); if (rc) - return rc; + goto put_out; addllc = (struct smc_llc_msg_add_link *)wr_buf; memset(addllc, 0, sizeof(*addllc)); @@ -504,6 +518,8 @@ int smc_llc_send_add_link(struct smc_link *link, u8 mac[], u8 gid[], } /* send llc message */ rc = smc_wr_tx_send(link, pend); +put_out: + smc_wr_tx_link_put(link); return rc; } @@ -517,9 +533,11 @@ int smc_llc_send_delete_link(struct smc_link *link, u8 link_del_id, struct smc_wr_buf *wr_buf; int rc; + if (!smc_wr_tx_link_hold(link)) + return -ENOLINK; rc = smc_llc_add_pending_send(link, &wr_buf, &pend); if (rc) - return rc; + goto put_out; delllc = (struct smc_llc_msg_del_link *)wr_buf; memset(delllc, 0, sizeof(*delllc)); @@ -536,6 +554,8 @@ int smc_llc_send_delete_link(struct smc_link *link, u8 link_del_id, delllc->reason = htonl(reason); /* send llc message */ rc = smc_wr_tx_send(link, pend); +put_out: + smc_wr_tx_link_put(link); return rc; } @@ -547,9 +567,11 @@ static int smc_llc_send_test_link(struct smc_link *link, u8 user_data[16]) struct smc_wr_buf *wr_buf; int rc; + if (!smc_wr_tx_link_hold(link)) + return -ENOLINK; rc = smc_llc_add_pending_send(link, &wr_buf, &pend); if (rc) - return rc; + goto put_out; testllc = (struct smc_llc_msg_test_link *)wr_buf; memset(testllc, 0, sizeof(*testllc)); testllc->hd.common.type = SMC_LLC_TEST_LINK; @@ -557,6 +579,8 @@ static int smc_llc_send_test_link(struct smc_link *link, u8 user_data[16]) memcpy(testllc->user_data, user_data, sizeof(testllc->user_data)); /* send llc message */ rc = smc_wr_tx_send(link, pend); +put_out: + smc_wr_tx_link_put(link); return rc; } @@ -567,13 +591,16 @@ static int smc_llc_send_message(struct smc_link *link, void *llcbuf) struct smc_wr_buf *wr_buf; int rc; - if (!smc_link_usable(link)) + if (!smc_wr_tx_link_hold(link)) return -ENOLINK; rc = smc_llc_add_pending_send(link, &wr_buf, &pend); if (rc) - return rc; + goto put_out; memcpy(wr_buf, llcbuf, sizeof(union smc_llc_msg)); - return smc_wr_tx_send(link, pend); + rc = smc_wr_tx_send(link, pend); +put_out: + smc_wr_tx_link_put(link); + return rc; } /* schedule an llc send on link, may wait for buffers, @@ -586,13 +613,16 @@ static int smc_llc_send_message_wait(struct smc_link *link, void *llcbuf) struct smc_wr_buf *wr_buf; int rc; - if (!smc_link_usable(link)) + if (!smc_wr_tx_link_hold(link)) return -ENOLINK; rc = smc_llc_add_pending_send(link, &wr_buf, &pend); if (rc) - return rc; + goto put_out; memcpy(wr_buf, llcbuf, sizeof(union smc_llc_msg)); - return smc_wr_tx_send_wait(link, pend, SMC_LLC_WAIT_TIME); + rc = smc_wr_tx_send_wait(link, pend, SMC_LLC_WAIT_TIME); +put_out: + smc_wr_tx_link_put(link); + return rc; } /********************************* receive ***********************************/ @@ -672,9 +702,11 @@ static int smc_llc_add_link_cont(struct smc_link *link, struct smc_buf_desc *rmb; u8 n; + if (!smc_wr_tx_link_hold(link)) + return -ENOLINK; rc = smc_llc_add_pending_send(link, &wr_buf, &pend); if (rc) - return rc; + goto put_out; addc_llc = (struct smc_llc_msg_add_link_cont *)wr_buf; memset(addc_llc, 0, sizeof(*addc_llc)); @@ -706,7 +738,10 @@ static int smc_llc_add_link_cont(struct smc_link *link, addc_llc->hd.length = sizeof(struct smc_llc_msg_add_link_cont); if (lgr->role == SMC_CLNT) addc_llc->hd.flags |= SMC_LLC_FLAG_RESP; - return smc_wr_tx_send(link, pend); + rc = smc_wr_tx_send(link, pend); +put_out: + smc_wr_tx_link_put(link); + return rc; } static int smc_llc_cli_rkey_exchange(struct smc_link *link, @@ -1323,7 +1358,7 @@ void smc_llc_send_link_delete_all(struct smc_link_group *lgr, bool ord, u32 rsn) delllc.reason = htonl(rsn); for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { - if (!smc_link_usable(&lgr->lnk[i])) + if (!smc_link_sendable(&lgr->lnk[i])) continue; if (!smc_llc_send_message_wait(&lgr->lnk[i], &delllc)) break; @@ -1787,7 +1822,7 @@ void smc_llc_link_active(struct smc_link *link) link->smcibdev->ibdev->name, link->ibport); link->state = SMC_LNK_ACTIVE; if (link->lgr->llc_testlink_time) { - link->llc_testlink_time = link->lgr->llc_testlink_time * HZ; + link->llc_testlink_time = link->lgr->llc_testlink_time; schedule_delayed_work(&link->llc_testlink_wrk, link->llc_testlink_time); } diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index f3c18b991d35c42442b332fbe280e976a8fd6517..9007c7e3bae4e89470ea7fd72061a5f777910f60 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -112,7 +112,7 @@ static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name) pnettable = &sn->pnettable; /* remove table entry */ - write_lock(&pnettable->lock); + mutex_lock(&pnettable->lock); list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, list) { if (!pnet_name || @@ -130,7 +130,7 @@ static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name) rc = 0; } } - write_unlock(&pnettable->lock); + mutex_unlock(&pnettable->lock); /* if this is not the initial namespace, stop here */ if (net != &init_net) @@ -191,7 +191,7 @@ static int smc_pnet_add_by_ndev(struct net_device *ndev) sn = net_generic(net, smc_net_id); pnettable = &sn->pnettable; - write_lock(&pnettable->lock); + mutex_lock(&pnettable->lock); list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, list) { if (pnetelem->type == SMC_PNET_ETH && !pnetelem->ndev && !strncmp(pnetelem->eth_name, ndev->name, IFNAMSIZ)) { @@ -205,7 +205,7 @@ static int smc_pnet_add_by_ndev(struct net_device *ndev) break; } } - write_unlock(&pnettable->lock); + mutex_unlock(&pnettable->lock); return rc; } @@ -223,7 +223,7 @@ static int smc_pnet_remove_by_ndev(struct net_device *ndev) sn = net_generic(net, smc_net_id); pnettable = &sn->pnettable; - write_lock(&pnettable->lock); + mutex_lock(&pnettable->lock); list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, list) { if (pnetelem->type == SMC_PNET_ETH && pnetelem->ndev == ndev) { dev_put(pnetelem->ndev); @@ -236,7 +236,7 @@ static int smc_pnet_remove_by_ndev(struct net_device *ndev) break; } } - write_unlock(&pnettable->lock); + mutex_unlock(&pnettable->lock); return rc; } @@ -371,7 +371,7 @@ static int smc_pnet_add_eth(struct smc_pnettable *pnettable, struct net *net, rc = -EEXIST; new_netdev = true; - write_lock(&pnettable->lock); + mutex_lock(&pnettable->lock); list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) { if (tmp_pe->type == SMC_PNET_ETH && !strncmp(tmp_pe->eth_name, eth_name, IFNAMSIZ)) { @@ -381,9 +381,9 @@ static int smc_pnet_add_eth(struct smc_pnettable *pnettable, struct net *net, } if (new_netdev) { list_add_tail(&new_pe->list, &pnettable->pnetlist); - write_unlock(&pnettable->lock); + mutex_unlock(&pnettable->lock); } else { - write_unlock(&pnettable->lock); + mutex_unlock(&pnettable->lock); kfree(new_pe); goto out_put; } @@ -445,7 +445,7 @@ static int smc_pnet_add_ib(struct smc_pnettable *pnettable, char *ib_name, new_pe->ib_port = ib_port; new_ibdev = true; - write_lock(&pnettable->lock); + mutex_lock(&pnettable->lock); list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) { if (tmp_pe->type == SMC_PNET_IB && !strncmp(tmp_pe->ib_name, ib_name, IB_DEVICE_NAME_MAX)) { @@ -455,9 +455,9 @@ static int smc_pnet_add_ib(struct smc_pnettable *pnettable, char *ib_name, } if (new_ibdev) { list_add_tail(&new_pe->list, &pnettable->pnetlist); - write_unlock(&pnettable->lock); + mutex_unlock(&pnettable->lock); } else { - write_unlock(&pnettable->lock); + mutex_unlock(&pnettable->lock); kfree(new_pe); } return (new_ibdev) ? 0 : -EEXIST; @@ -602,7 +602,7 @@ static int _smc_pnet_dump(struct net *net, struct sk_buff *skb, u32 portid, pnettable = &sn->pnettable; /* dump pnettable entries */ - read_lock(&pnettable->lock); + mutex_lock(&pnettable->lock); list_for_each_entry(pnetelem, &pnettable->pnetlist, list) { if (pnetid && !smc_pnet_match(pnetelem->pnet_name, pnetid)) continue; @@ -617,7 +617,7 @@ static int _smc_pnet_dump(struct net *net, struct sk_buff *skb, u32 portid, break; } } - read_unlock(&pnettable->lock); + mutex_unlock(&pnettable->lock); return idx; } @@ -859,7 +859,7 @@ int smc_pnet_net_init(struct net *net) struct smc_pnetids_ndev *pnetids_ndev = &sn->pnetids_ndev; INIT_LIST_HEAD(&pnettable->pnetlist); - rwlock_init(&pnettable->lock); + mutex_init(&pnettable->lock); INIT_LIST_HEAD(&pnetids_ndev->list); rwlock_init(&pnetids_ndev->lock); @@ -939,7 +939,7 @@ static int smc_pnet_find_ndev_pnetid_by_table(struct net_device *ndev, sn = net_generic(net, smc_net_id); pnettable = &sn->pnettable; - read_lock(&pnettable->lock); + mutex_lock(&pnettable->lock); list_for_each_entry(pnetelem, &pnettable->pnetlist, list) { if (pnetelem->type == SMC_PNET_ETH && ndev == pnetelem->ndev) { /* get pnetid of netdev device */ @@ -948,7 +948,7 @@ static int smc_pnet_find_ndev_pnetid_by_table(struct net_device *ndev, break; } } - read_unlock(&pnettable->lock); + mutex_unlock(&pnettable->lock); return rc; } @@ -1129,7 +1129,7 @@ int smc_pnetid_by_table_ib(struct smc_ib_device *smcibdev, u8 ib_port) sn = net_generic(&init_net, smc_net_id); pnettable = &sn->pnettable; - read_lock(&pnettable->lock); + mutex_lock(&pnettable->lock); list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) { if (tmp_pe->type == SMC_PNET_IB && !strncmp(tmp_pe->ib_name, ib_name, IB_DEVICE_NAME_MAX) && @@ -1139,7 +1139,7 @@ int smc_pnetid_by_table_ib(struct smc_ib_device *smcibdev, u8 ib_port) break; } } - read_unlock(&pnettable->lock); + mutex_unlock(&pnettable->lock); return rc; } @@ -1158,7 +1158,7 @@ int smc_pnetid_by_table_smcd(struct smcd_dev *smcddev) sn = net_generic(&init_net, smc_net_id); pnettable = &sn->pnettable; - read_lock(&pnettable->lock); + mutex_lock(&pnettable->lock); list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) { if (tmp_pe->type == SMC_PNET_IB && !strncmp(tmp_pe->ib_name, ib_name, IB_DEVICE_NAME_MAX)) { @@ -1167,7 +1167,7 @@ int smc_pnetid_by_table_smcd(struct smcd_dev *smcddev) break; } } - read_unlock(&pnettable->lock); + mutex_unlock(&pnettable->lock); return rc; } diff --git a/net/smc/smc_pnet.h b/net/smc/smc_pnet.h index 14039272f7e4263a3d86f29d0c4f048731af387f..80a88eea494918663f0263b9577c0ca49dcf1542 100644 --- a/net/smc/smc_pnet.h +++ b/net/smc/smc_pnet.h @@ -29,7 +29,7 @@ struct smc_link_group; * @pnetlist: List of PNETIDs */ struct smc_pnettable { - rwlock_t lock; + struct mutex lock; struct list_head pnetlist; }; diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c index ff02952b3d03eff40ef13b91a042f38ea7a3afc2..52ef1fca0b604ce49d7d93da88c765748b14ae08 100644 --- a/net/smc/smc_tx.c +++ b/net/smc/smc_tx.c @@ -479,7 +479,7 @@ static int smc_tx_rdma_writes(struct smc_connection *conn, /* Wakeup sndbuf consumers from any context (IRQ or process) * since there is more data to transmit; usable snd_wnd as max transmit */ -static int _smcr_tx_sndbuf_nonempty(struct smc_connection *conn) +static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn) { struct smc_cdc_producer_flags *pflags = &conn->local_tx_ctrl.prod_flags; struct smc_link *link = conn->lnk; @@ -488,8 +488,11 @@ static int _smcr_tx_sndbuf_nonempty(struct smc_connection *conn) struct smc_wr_buf *wr_buf; int rc; + if (!link || !smc_wr_tx_link_hold(link)) + return -ENOLINK; rc = smc_cdc_get_free_slot(conn, link, &wr_buf, &wr_rdma_buf, &pend); if (rc < 0) { + smc_wr_tx_link_put(link); if (rc == -EBUSY) { struct smc_sock *smc = container_of(conn, struct smc_sock, conn); @@ -530,22 +533,7 @@ static int _smcr_tx_sndbuf_nonempty(struct smc_connection *conn) out_unlock: spin_unlock_bh(&conn->send_lock); - return rc; -} - -static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn) -{ - struct smc_link *link = conn->lnk; - int rc = -ENOLINK; - - if (!link) - return rc; - - atomic_inc(&link->wr_tx_refcnt); - if (smc_link_usable(link)) - rc = _smcr_tx_sndbuf_nonempty(conn); - if (atomic_dec_and_test(&link->wr_tx_refcnt)) - wake_up_all(&link->wr_tx_wait); + smc_wr_tx_link_put(link); return rc; } diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c index 9dbe4804853e051b62af3d9ba379a1e4b893a328..5a81f8c9ebf9050297163abe3e8093c1de3aabc0 100644 --- a/net/smc/smc_wr.c +++ b/net/smc/smc_wr.c @@ -62,13 +62,9 @@ static inline bool smc_wr_is_tx_pend(struct smc_link *link) } /* wait till all pending tx work requests on the given link are completed */ -int smc_wr_tx_wait_no_pending_sends(struct smc_link *link) +void smc_wr_tx_wait_no_pending_sends(struct smc_link *link) { - if (wait_event_timeout(link->wr_tx_wait, !smc_wr_is_tx_pend(link), - SMC_WR_TX_WAIT_PENDING_TIME)) - return 0; - else /* timeout */ - return -EPIPE; + wait_event(link->wr_tx_wait, !smc_wr_is_tx_pend(link)); } static inline int smc_wr_tx_find_pending_index(struct smc_link *link, u64 wr_id) @@ -87,7 +83,6 @@ static inline void smc_wr_tx_process_cqe(struct ib_wc *wc) struct smc_wr_tx_pend pnd_snd; struct smc_link *link; u32 pnd_snd_idx; - int i; link = wc->qp->qp_context; @@ -115,14 +110,6 @@ static inline void smc_wr_tx_process_cqe(struct ib_wc *wc) if (!test_and_clear_bit(pnd_snd_idx, link->wr_tx_mask)) return; if (wc->status) { - for_each_set_bit(i, link->wr_tx_mask, link->wr_tx_cnt) { - /* clear full struct smc_wr_tx_pend including .priv */ - memset(&link->wr_tx_pends[i], 0, - sizeof(link->wr_tx_pends[i])); - memset(&link->wr_tx_bufs[i], 0, - sizeof(link->wr_tx_bufs[i])); - clear_bit(i, link->wr_tx_mask); - } /* terminate link */ smcr_link_down_cond_sched(link); } @@ -169,7 +156,7 @@ void smc_wr_tx_cq_handler(struct ib_cq *ib_cq, void *cq_context) static inline int smc_wr_tx_get_free_slot_index(struct smc_link *link, u32 *idx) { *idx = link->wr_tx_cnt; - if (!smc_link_usable(link)) + if (!smc_link_sendable(link)) return -ENOLINK; for_each_clear_bit(*idx, link->wr_tx_mask, link->wr_tx_cnt) { if (!test_and_set_bit(*idx, link->wr_tx_mask)) @@ -212,7 +199,7 @@ int smc_wr_tx_get_free_slot(struct smc_link *link, } else { rc = wait_event_interruptible_timeout( link->wr_tx_wait, - !smc_link_usable(link) || + !smc_link_sendable(link) || lgr->terminating || (smc_wr_tx_get_free_slot_index(link, &idx) != -EBUSY), SMC_WR_TX_WAIT_FREE_SLOT_TIME); @@ -288,18 +275,20 @@ int smc_wr_tx_send_wait(struct smc_link *link, struct smc_wr_tx_pend_priv *priv, unsigned long timeout) { struct smc_wr_tx_pend *pend; + u32 pnd_idx; int rc; pend = container_of(priv, struct smc_wr_tx_pend, priv); pend->compl_requested = 1; - init_completion(&link->wr_tx_compl[pend->idx]); + pnd_idx = pend->idx; + init_completion(&link->wr_tx_compl[pnd_idx]); rc = smc_wr_tx_send(link, priv); if (rc) return rc; /* wait for completion by smc_wr_tx_process_cqe() */ rc = wait_for_completion_interruptible_timeout( - &link->wr_tx_compl[pend->idx], timeout); + &link->wr_tx_compl[pnd_idx], timeout); if (rc <= 0) rc = -ENODATA; if (rc > 0) @@ -349,25 +338,6 @@ int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr) return rc; } -void smc_wr_tx_dismiss_slots(struct smc_link *link, u8 wr_tx_hdr_type, - smc_wr_tx_filter filter, - smc_wr_tx_dismisser dismisser, - unsigned long data) -{ - struct smc_wr_tx_pend_priv *tx_pend; - struct smc_wr_rx_hdr *wr_tx; - int i; - - for_each_set_bit(i, link->wr_tx_mask, link->wr_tx_cnt) { - wr_tx = (struct smc_wr_rx_hdr *)&link->wr_tx_bufs[i]; - if (wr_tx->type != wr_tx_hdr_type) - continue; - tx_pend = &link->wr_tx_pends[i].priv; - if (filter(tx_pend, data)) - dismisser(tx_pend); - } -} - /****************************** receive queue ********************************/ int smc_wr_rx_register_handler(struct smc_wr_rx_handler *handler) @@ -572,10 +542,7 @@ void smc_wr_free_link(struct smc_link *lnk) smc_wr_wakeup_reg_wait(lnk); smc_wr_wakeup_tx_wait(lnk); - if (smc_wr_tx_wait_no_pending_sends(lnk)) - memset(lnk->wr_tx_mask, 0, - BITS_TO_LONGS(SMC_WR_BUF_CNT) * - sizeof(*lnk->wr_tx_mask)); + smc_wr_tx_wait_no_pending_sends(lnk); wait_event(lnk->wr_reg_wait, (!atomic_read(&lnk->wr_reg_refcnt))); wait_event(lnk->wr_tx_wait, (!atomic_read(&lnk->wr_tx_refcnt))); diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h index 423b8709f1c9e4fd2462dc2bb929ebc24e01f454..cb58e60078f57aabef19d4a8ad29a184246bffe0 100644 --- a/net/smc/smc_wr.h +++ b/net/smc/smc_wr.h @@ -22,7 +22,6 @@ #define SMC_WR_BUF_CNT 16 /* # of ctrl buffers per link */ #define SMC_WR_TX_WAIT_FREE_SLOT_TIME (10 * HZ) -#define SMC_WR_TX_WAIT_PENDING_TIME (5 * HZ) #define SMC_WR_TX_SIZE 44 /* actual size of wr_send data (<=SMC_WR_BUF_SIZE) */ @@ -60,6 +59,20 @@ static inline void smc_wr_tx_set_wr_id(atomic_long_t *wr_tx_id, long val) atomic_long_set(wr_tx_id, val); } +static inline bool smc_wr_tx_link_hold(struct smc_link *link) +{ + if (!smc_link_sendable(link)) + return false; + atomic_inc(&link->wr_tx_refcnt); + return true; +} + +static inline void smc_wr_tx_link_put(struct smc_link *link) +{ + if (atomic_dec_and_test(&link->wr_tx_refcnt)) + wake_up_all(&link->wr_tx_wait); +} + static inline void smc_wr_wakeup_tx_wait(struct smc_link *lnk) { wake_up_all(&lnk->wr_tx_wait); @@ -108,7 +121,7 @@ void smc_wr_tx_dismiss_slots(struct smc_link *lnk, u8 wr_rx_hdr_type, smc_wr_tx_filter filter, smc_wr_tx_dismisser dismisser, unsigned long data); -int smc_wr_tx_wait_no_pending_sends(struct smc_link *link); +void smc_wr_tx_wait_no_pending_sends(struct smc_link *link); int smc_wr_rx_register_handler(struct smc_wr_rx_handler *handler); int smc_wr_rx_post_init(struct smc_link *link); diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c index b3815c1e8f2ea874ae7c4a5eef3f3f9282c254a0..cd9954c4ad808a09f8a8ce23bfa30d635d9a7027 100644 --- a/net/strparser/strparser.c +++ b/net/strparser/strparser.c @@ -27,18 +27,10 @@ static struct workqueue_struct *strp_wq; -struct _strp_msg { - /* Internal cb structure. struct strp_msg must be first for passing - * to upper layer. - */ - struct strp_msg strp; - int accum_len; -}; - static inline struct _strp_msg *_strp_msg(struct sk_buff *skb) { return (struct _strp_msg *)((void *)skb->cb + - offsetof(struct qdisc_skb_cb, data)); + offsetof(struct sk_skb_cb, strp)); } /* Lower lock held */ diff --git a/net/sunrpc/addr.c b/net/sunrpc/addr.c index 6e4dbd577a39fa701460ade9cd1fde4563b63057..d435bffc6199978500dc9106ee63cad8a158f87e 100644 --- a/net/sunrpc/addr.c +++ b/net/sunrpc/addr.c @@ -162,8 +162,10 @@ static int rpc_parse_scope_id(struct net *net, const char *buf, const size_t buflen, const char *delim, struct sockaddr_in6 *sin6) { - char *p; + char p[IPV6_SCOPE_ID_LEN + 1]; size_t len; + u32 scope_id = 0; + struct net_device *dev; if ((buf + buflen) == delim) return 1; @@ -175,29 +177,23 @@ static int rpc_parse_scope_id(struct net *net, const char *buf, return 0; len = (buf + buflen) - delim - 1; - p = kmemdup_nul(delim + 1, len, GFP_KERNEL); - if (p) { - u32 scope_id = 0; - struct net_device *dev; - - dev = dev_get_by_name(net, p); - if (dev != NULL) { - scope_id = dev->ifindex; - dev_put(dev); - } else { - if (kstrtou32(p, 10, &scope_id) != 0) { - kfree(p); - return 0; - } - } - - kfree(p); - - sin6->sin6_scope_id = scope_id; - return 1; + if (len > IPV6_SCOPE_ID_LEN) + return 0; + + memcpy(p, delim + 1, len); + p[len] = 0; + + dev = dev_get_by_name(net, p); + if (dev != NULL) { + scope_id = dev->ifindex; + dev_put(dev); + } else { + if (kstrtou32(p, 10, &scope_id) != 0) + return 0; } - return 0; + sin6->sin6_scope_id = scope_id; + return 1; } static size_t rpc_pton6(struct net *net, const char *buf, const size_t buflen, diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index e22f2d65457da45f3f0a14ec6fe8517ee0cdf1c3..f5111d62972d3d17f74022812ad8c0ed8aba6802 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -643,7 +643,7 @@ static bool gss_check_seq_num(const struct svc_rqst *rqstp, struct rsc *rsci, } __set_bit(seq_num % GSS_SEQ_WIN, sd->sd_win); goto ok; - } else if (seq_num <= sd->sd_max - GSS_SEQ_WIN) { + } else if (seq_num + GSS_SEQ_WIN <= sd->sd_max) { goto toolow; } if (__test_and_set_bit(seq_num % GSS_SEQ_WIN, sd->sd_win)) diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index eadc0ede928c320a7afae33e1821d7e1ce63f246..5f854ffbab925b20b49516a35740168c4a3701e0 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -599,9 +599,9 @@ static int __rpc_rmdir(struct inode *dir, struct dentry *dentry) dget(dentry); ret = simple_rmdir(dir, dentry); + d_drop(dentry); if (!ret) fsnotify_rmdir(dir, dentry); - d_delete(dentry); dput(dentry); return ret; } @@ -612,9 +612,9 @@ static int __rpc_unlink(struct inode *dir, struct dentry *dentry) dget(dentry); ret = simple_unlink(dir, dentry); + d_drop(dentry); if (!ret) fsnotify_unlink(dir, dentry); - d_delete(dentry); dput(dentry); return ret; } diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index 236fadc4a4399a89cf6bfba15cf4ebdcb81976bb..6c320623cdcd7c715dbc74a63bb6779212bf97aa 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -139,5 +139,6 @@ cleanup_sunrpc(void) rcu_barrier(); /* Wait for completion of call_rcu()'s */ } MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(VFS_internal_I_am_really_a_filesystem_and_am_NOT_a_driver); fs_initcall(init_sunrpc); /* Ensure we're initialised before nfs */ module_exit(cleanup_sunrpc); diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 8201531ce5d9771a6af1c032938f8718c5aa0cd8..04aaca4b8bf93fa0cd2fc337153f692de6f86cea 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1552,15 +1552,14 @@ xprt_transmit(struct rpc_task *task) { struct rpc_rqst *next, *req = task->tk_rqstp; struct rpc_xprt *xprt = req->rq_xprt; - int counter, status; + int status; spin_lock(&xprt->queue_lock); - counter = 0; - while (!list_empty(&xprt->xmit_queue)) { - if (++counter == 20) + for (;;) { + next = list_first_entry_or_null(&xprt->xmit_queue, + struct rpc_rqst, rq_xmit); + if (!next) break; - next = list_first_entry(&xprt->xmit_queue, - struct rpc_rqst, rq_xmit); xprt_pin_rqst(next); spin_unlock(&xprt->queue_lock); status = xprt_request_transmit(next, task); @@ -1568,13 +1567,16 @@ xprt_transmit(struct rpc_task *task) status = 0; spin_lock(&xprt->queue_lock); xprt_unpin_rqst(next); - if (status == 0) { - if (!xprt_request_data_received(task) || - test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate)) - continue; - } else if (test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate)) - task->tk_status = status; - break; + if (status < 0) { + if (test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate)) + task->tk_status = status; + break; + } + /* Was @task transmitted, and has it received a reply? */ + if (xprt_request_data_received(task) && + !test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate)) + break; + cond_resched_lock(&xprt->queue_lock); } spin_unlock(&xprt->queue_lock); } diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 25554260a5931541288c2abb72184fa44e847462..dcc1992b14d76a9ecdfc5affa12ccb3a8258eec9 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -449,6 +449,7 @@ static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt) IB_POLL_WORKQUEUE); if (IS_ERR(ep->re_attr.send_cq)) { rc = PTR_ERR(ep->re_attr.send_cq); + ep->re_attr.send_cq = NULL; goto out_destroy; } @@ -457,6 +458,7 @@ static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt) IB_POLL_WORKQUEUE); if (IS_ERR(ep->re_attr.recv_cq)) { rc = PTR_ERR(ep->re_attr.recv_cq); + ep->re_attr.recv_cq = NULL; goto out_destroy; } ep->re_receive_count = 0; @@ -495,6 +497,7 @@ static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt) ep->re_pd = ib_alloc_pd(device, 0); if (IS_ERR(ep->re_pd)) { rc = PTR_ERR(ep->re_pd); + ep->re_pd = NULL; goto out_destroy; } diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 12e535b43d887bac125cd925db0c3cbf376f8cce..6911f1cab2063fb5dd3842301c04b91d66c4ba1c 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -342,16 +342,18 @@ static int tipc_enable_bearer(struct net *net, const char *name, goto rejected; } - test_and_set_bit_lock(0, &b->up); - rcu_assign_pointer(tn->bearer_list[bearer_id], b); - if (skb) - tipc_bearer_xmit_skb(net, bearer_id, skb, &b->bcast_addr); - + /* Create monitoring data before accepting activate messages */ if (tipc_mon_create(net, bearer_id)) { bearer_disable(net, b); + kfree_skb(skb); return -ENOMEM; } + test_and_set_bit_lock(0, &b->up); + rcu_assign_pointer(tn->bearer_list[bearer_id], b); + if (skb) + tipc_bearer_xmit_skb(net, bearer_id, skb, &b->bcast_addr); + pr_info("Enabled bearer <%s>, priority %u\n", name, prio); return res; diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c index f8e73c4a00933335939c7c629498bdba392dacdf..6f91b9a306dc39e8ce91977c5485948b2ff7cde7 100644 --- a/net/tipc/crypto.c +++ b/net/tipc/crypto.c @@ -590,6 +590,10 @@ static int tipc_aead_init(struct tipc_aead **aead, struct tipc_aead_key *ukey, tmp->cloned = NULL; tmp->authsize = TIPC_AES_GCM_TAG_SIZE; tmp->key = kmemdup(ukey, tipc_aead_key_size(ukey), GFP_KERNEL); + if (!tmp->key) { + tipc_aead_free(&tmp->rcu); + return -ENOMEM; + } memcpy(&tmp->salt, ukey->key + keylen, TIPC_AES_GCM_SALT_SIZE); atomic_set(&tmp->users, 0); atomic64_set(&tmp->seqno, 0); @@ -2276,45 +2280,55 @@ static bool tipc_crypto_key_rcv(struct tipc_crypto *rx, struct tipc_msg *hdr) struct tipc_crypto *tx = tipc_net(rx->net)->crypto_tx; struct tipc_aead_key *skey = NULL; u16 key_gen = msg_key_gen(hdr); - u16 size = msg_data_sz(hdr); + u32 size = msg_data_sz(hdr); u8 *data = msg_data(hdr); + unsigned int keylen; + + /* Verify whether the size can exist in the packet */ + if (unlikely(size < sizeof(struct tipc_aead_key) + TIPC_AEAD_KEYLEN_MIN)) { + pr_debug("%s: message data size is too small\n", rx->name); + goto exit; + } + + keylen = ntohl(*((__be32 *)(data + TIPC_AEAD_ALG_NAME))); + + /* Verify the supplied size values */ + if (unlikely(size != keylen + sizeof(struct tipc_aead_key) || + keylen > TIPC_AEAD_KEY_SIZE_MAX)) { + pr_debug("%s: invalid MSG_CRYPTO key size\n", rx->name); + goto exit; + } spin_lock(&rx->lock); if (unlikely(rx->skey || (key_gen == rx->key_gen && rx->key.keys))) { pr_err("%s: key existed <%p>, gen %d vs %d\n", rx->name, rx->skey, key_gen, rx->key_gen); - goto exit; + goto exit_unlock; } /* Allocate memory for the key */ skey = kmalloc(size, GFP_ATOMIC); if (unlikely(!skey)) { pr_err("%s: unable to allocate memory for skey\n", rx->name); - goto exit; + goto exit_unlock; } /* Copy key from msg data */ - skey->keylen = ntohl(*((__be32 *)(data + TIPC_AEAD_ALG_NAME))); + skey->keylen = keylen; memcpy(skey->alg_name, data, TIPC_AEAD_ALG_NAME); memcpy(skey->key, data + TIPC_AEAD_ALG_NAME + sizeof(__be32), skey->keylen); - /* Sanity check */ - if (unlikely(size != tipc_aead_key_size(skey))) { - kfree(skey); - skey = NULL; - goto exit; - } - rx->key_gen = key_gen; rx->skey_mode = msg_key_mode(hdr); rx->skey = skey; rx->nokey = 0; mb(); /* for nokey flag */ -exit: +exit_unlock: spin_unlock(&rx->lock); +exit: /* Schedule the key attaching on this crypto */ if (likely(skey && queue_delayed_work(tx->wq, &rx->work, 0))) return true; diff --git a/net/tipc/link.c b/net/tipc/link.c index c92e6984933cb7b48b4c4fc1f779470939d80765..7a353ff6284486de8d752bc3a476272b3e83668b 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1258,8 +1258,11 @@ static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb, return false; #ifdef CONFIG_TIPC_CRYPTO case MSG_CRYPTO: - tipc_crypto_msg_rcv(l->net, skb); - return true; + if (TIPC_SKB_CB(skb)->decrypted) { + tipc_crypto_msg_rcv(l->net, skb); + return true; + } + fallthrough; #endif default: pr_warn("Dropping received illegal msg type\n"); @@ -2156,7 +2159,7 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, struct tipc_msg *hdr = buf_msg(skb); struct tipc_gap_ack_blks *ga = NULL; bool reply = msg_probe(hdr), retransmitted = false; - u16 dlen = msg_data_sz(hdr), glen = 0; + u32 dlen = msg_data_sz(hdr), glen = 0; u16 peers_snd_nxt = msg_next_sent(hdr); u16 peers_tol = msg_link_tolerance(hdr); u16 peers_prio = msg_linkprio(hdr); @@ -2170,6 +2173,10 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, void *data; trace_tipc_proto_rcv(skb, false, l->name); + + if (dlen > U16_MAX) + goto exit; + if (tipc_link_is_blocked(l) || !xmitq) goto exit; @@ -2238,6 +2245,11 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, break; case STATE_MSG: + /* Validate Gap ACK blocks, drop if invalid */ + glen = tipc_get_gap_ack_blks(&ga, l, hdr, true); + if (glen > dlen) + break; + l->rcv_nxt_state = msg_seqno(hdr) + 1; /* Update own tolerance if peer indicates a non-zero value */ @@ -2263,9 +2275,6 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, break; } - /* Receive Gap ACK blocks from peer if any */ - glen = tipc_get_gap_ack_blks(&ga, l, hdr, true); - tipc_mon_rcv(l->net, data + glen, dlen - glen, l->addr, &l->mon_state, l->bearer_id); diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c index 6dce2abf436eeddd4983e1fe09417338af87a97f..a37190da5a504878d2fbda8f6b11bbe0e84760a8 100644 --- a/net/tipc/monitor.c +++ b/net/tipc/monitor.c @@ -465,6 +465,8 @@ void tipc_mon_rcv(struct net *net, void *data, u16 dlen, u32 addr, state->probing = false; /* Sanity check received domain record */ + if (new_member_cnt > MAX_MON_DOMAIN) + return; if (dlen < dom_rec_len(arrv_dom, 0)) return; if (dlen != dom_rec_len(arrv_dom, new_member_cnt)) diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index fe4edce459ad4a2d657bc854f5668e5defcd9b8d..a757fe28bcb5f064858c82cee9171cf4bd78e7ae 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -315,7 +315,7 @@ static bool tipc_update_nametbl(struct net *net, struct distr_item *i, pr_warn_ratelimited("Failed to remove binding %u,%u from %x\n", type, lower, node); } else { - pr_warn("Unrecognized name table message received\n"); + pr_warn_ratelimited("Unknown name table message received\n"); } return false; } diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index f6a6acef42235e170dfd1806e657022ce75bb093..54c5328f492d2e7bc3870a624cc0236e159168eb 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -931,7 +931,7 @@ static int __tipc_nl_add_nametable_publ(struct tipc_nl_msg *msg, list_for_each_entry(p, &sr->all_publ, all_publ) if (p->key == *last_key) break; - if (p->key != *last_key) + if (list_entry_is_head(p, &sr->all_publ, all_publ)) return -EPIPE; } else { p = list_first_entry(&sr->all_publ, diff --git a/net/tipc/socket.c b/net/tipc/socket.c index ce957ee5383c4c8907645a3c5012935fd6c9640d..8d2c98531af456389f0087660c4a83a2a0e0b4f7 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -3743,7 +3743,7 @@ static int __tipc_nl_list_sk_publ(struct sk_buff *skb, if (p->key == *last_publ) break; } - if (p->key != *last_publ) { + if (list_entry_is_head(p, &tsk->publications, binding_sock)) { /* We never set seq or call nl_dump_check_consistent() * this means that setting prev_seq here will cause the * consistence check to fail in the netlink callback diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 32a51b20509c92335f8de759cadfa80cbf03cafc..58d22d6b86ae6f04d4903032fda3b837c3a2157c 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -61,7 +61,7 @@ static DEFINE_MUTEX(tcpv6_prot_mutex); static const struct proto *saved_tcpv4_prot; static DEFINE_MUTEX(tcpv4_prot_mutex); static struct proto tls_prots[TLS_NUM_PROTS][TLS_NUM_CONFIG][TLS_NUM_CONFIG]; -static struct proto_ops tls_sw_proto_ops; +static struct proto_ops tls_proto_ops[TLS_NUM_PROTS][TLS_NUM_CONFIG][TLS_NUM_CONFIG]; static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG], const struct proto *base); @@ -71,6 +71,8 @@ void update_sk_prot(struct sock *sk, struct tls_context *ctx) WRITE_ONCE(sk->sk_prot, &tls_prots[ip_ver][ctx->tx_conf][ctx->rx_conf]); + WRITE_ONCE(sk->sk_socket->ops, + &tls_proto_ops[ip_ver][ctx->tx_conf][ctx->rx_conf]); } int wait_on_pending_writer(struct sock *sk, long *timeo) @@ -578,8 +580,6 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval, if (tx) { ctx->sk_write_space = sk->sk_write_space; sk->sk_write_space = tls_write_space; - } else { - sk->sk_socket->ops = &tls_sw_proto_ops; } goto out; @@ -637,6 +637,39 @@ struct tls_context *tls_ctx_create(struct sock *sk) return ctx; } +static void build_proto_ops(struct proto_ops ops[TLS_NUM_CONFIG][TLS_NUM_CONFIG], + const struct proto_ops *base) +{ + ops[TLS_BASE][TLS_BASE] = *base; + + ops[TLS_SW ][TLS_BASE] = ops[TLS_BASE][TLS_BASE]; + ops[TLS_SW ][TLS_BASE].sendpage_locked = tls_sw_sendpage_locked; + + ops[TLS_BASE][TLS_SW ] = ops[TLS_BASE][TLS_BASE]; + ops[TLS_BASE][TLS_SW ].splice_read = tls_sw_splice_read; + + ops[TLS_SW ][TLS_SW ] = ops[TLS_SW ][TLS_BASE]; + ops[TLS_SW ][TLS_SW ].splice_read = tls_sw_splice_read; + +#ifdef CONFIG_TLS_DEVICE + ops[TLS_HW ][TLS_BASE] = ops[TLS_BASE][TLS_BASE]; + ops[TLS_HW ][TLS_BASE].sendpage_locked = NULL; + + ops[TLS_HW ][TLS_SW ] = ops[TLS_BASE][TLS_SW ]; + ops[TLS_HW ][TLS_SW ].sendpage_locked = NULL; + + ops[TLS_BASE][TLS_HW ] = ops[TLS_BASE][TLS_SW ]; + + ops[TLS_SW ][TLS_HW ] = ops[TLS_SW ][TLS_SW ]; + + ops[TLS_HW ][TLS_HW ] = ops[TLS_HW ][TLS_SW ]; + ops[TLS_HW ][TLS_HW ].sendpage_locked = NULL; +#endif +#ifdef CONFIG_TLS_TOE + ops[TLS_HW_RECORD][TLS_HW_RECORD] = *base; +#endif +} + static void tls_build_proto(struct sock *sk) { int ip_ver = sk->sk_family == AF_INET6 ? TLSV6 : TLSV4; @@ -648,6 +681,8 @@ static void tls_build_proto(struct sock *sk) mutex_lock(&tcpv6_prot_mutex); if (likely(prot != saved_tcpv6_prot)) { build_protos(tls_prots[TLSV6], prot); + build_proto_ops(tls_proto_ops[TLSV6], + sk->sk_socket->ops); smp_store_release(&saved_tcpv6_prot, prot); } mutex_unlock(&tcpv6_prot_mutex); @@ -658,6 +693,8 @@ static void tls_build_proto(struct sock *sk) mutex_lock(&tcpv4_prot_mutex); if (likely(prot != saved_tcpv4_prot)) { build_protos(tls_prots[TLSV4], prot); + build_proto_ops(tls_proto_ops[TLSV4], + sk->sk_socket->ops); smp_store_release(&saved_tcpv4_prot, prot); } mutex_unlock(&tcpv4_prot_mutex); @@ -868,10 +905,6 @@ static int __init tls_register(void) if (err) return err; - tls_sw_proto_ops = inet_stream_ops; - tls_sw_proto_ops.splice_read = tls_sw_splice_read; - tls_sw_proto_ops.sendpage_locked = tls_sw_sendpage_locked; - tls_device_init(); tcp_register_ulp(&tcp_tls_ulp_ops); diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 15395683b8e2a360dd451e08e0297180528ac2d0..8cd011ea9fbb851769bf1db4196a99ffe9e11585 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -35,6 +35,7 @@ * SOFTWARE. */ +#include #include #include #include @@ -43,6 +44,14 @@ #include #include +noinline void tls_err_abort(struct sock *sk, int err) +{ + WARN_ON_ONCE(err >= 0); + /* sk->sk_err should contain a positive error code. */ + sk->sk_err = -err; + sk->sk_error_report(sk); +} + static int __skb_nsg(struct sk_buff *skb, int offset, int len, unsigned int recursion_level) { @@ -419,7 +428,7 @@ int tls_tx_records(struct sock *sk, int flags) tx_err: if (rc < 0 && rc != -EAGAIN) - tls_err_abort(sk, EBADMSG); + tls_err_abort(sk, -EBADMSG); return rc; } @@ -450,7 +459,7 @@ static void tls_encrypt_done(struct crypto_async_request *req, int err) /* If err is already set on socket, return the same code */ if (sk->sk_err) { - ctx->async_wait.err = sk->sk_err; + ctx->async_wait.err = -sk->sk_err; } else { ctx->async_wait.err = err; tls_err_abort(sk, err); @@ -506,7 +515,7 @@ static int tls_do_encryption(struct sock *sk, memcpy(&rec->iv_data[iv_offset], tls_ctx->tx.iv, prot->iv_size + prot->salt_size); - xor_iv_with_seq(prot->version, rec->iv_data, tls_ctx->tx.rec_seq); + xor_iv_with_seq(prot->version, rec->iv_data + iv_offset, tls_ctx->tx.rec_seq); sge->offset += prot->prepend_size; sge->length -= prot->prepend_size; @@ -764,7 +773,7 @@ static int tls_push_record(struct sock *sk, int flags, msg_pl->sg.size + prot->tail_size, i); if (rc < 0) { if (rc != -EINPROGRESS) { - tls_err_abort(sk, EBADMSG); + tls_err_abort(sk, -EBADMSG); if (split) { tls_ctx->pending_open_record_frags = true; tls_merge_open_record(sk, rec, tmp, orig_end); @@ -1478,7 +1487,7 @@ static int decrypt_internal(struct sock *sk, struct sk_buff *skb, else memcpy(iv + iv_offset, tls_ctx->rx.iv, prot->salt_size); - xor_iv_with_seq(prot->version, iv, tls_ctx->rx.rec_seq); + xor_iv_with_seq(prot->version, iv + iv_offset, tls_ctx->rx.rec_seq); /* Prepare AAD */ tls_make_aad(aad, rxm->full_len - prot->overhead_size + @@ -1828,7 +1837,7 @@ int tls_sw_recvmsg(struct sock *sk, err = decrypt_skb_update(sk, skb, &msg->msg_iter, &chunk, &zc, async_capable); if (err < 0 && err != -EINPROGRESS) { - tls_err_abort(sk, EBADMSG); + tls_err_abort(sk, -EBADMSG); goto recv_end; } @@ -1998,21 +2007,18 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, if (!skb) goto splice_read_end; - if (!ctx->decrypted) { - err = decrypt_skb_update(sk, skb, NULL, &chunk, &zc, false); - - /* splice does not support reading control messages */ - if (ctx->control != TLS_RECORD_TYPE_DATA) { - err = -EINVAL; - goto splice_read_end; - } + err = decrypt_skb_update(sk, skb, NULL, &chunk, &zc, false); + if (err < 0) { + tls_err_abort(sk, -EBADMSG); + goto splice_read_end; + } - if (err < 0) { - tls_err_abort(sk, EBADMSG); - goto splice_read_end; - } - ctx->decrypted = 1; + /* splice does not support reading control messages */ + if (ctx->control != TLS_RECORD_TYPE_DATA) { + err = -EINVAL; + goto splice_read_end; } + rxm = strp_msg(skb); chunk = min_t(unsigned int, rxm->full_len, len); diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index d5c0ae34b1e457f17115dbc4b2a5468feacac667..fa99fe5bcf6ce3ec2e86e50904027824a9dc75b7 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -593,20 +593,42 @@ static void unix_release_sock(struct sock *sk, int embrion) static void init_peercred(struct sock *sk) { - put_pid(sk->sk_peer_pid); - if (sk->sk_peer_cred) - put_cred(sk->sk_peer_cred); + const struct cred *old_cred; + struct pid *old_pid; + + spin_lock(&sk->sk_peer_lock); + old_pid = sk->sk_peer_pid; + old_cred = sk->sk_peer_cred; sk->sk_peer_pid = get_pid(task_tgid(current)); sk->sk_peer_cred = get_current_cred(); + spin_unlock(&sk->sk_peer_lock); + + put_pid(old_pid); + put_cred(old_cred); } static void copy_peercred(struct sock *sk, struct sock *peersk) { - put_pid(sk->sk_peer_pid); - if (sk->sk_peer_cred) - put_cred(sk->sk_peer_cred); + const struct cred *old_cred; + struct pid *old_pid; + + if (sk < peersk) { + spin_lock(&sk->sk_peer_lock); + spin_lock_nested(&peersk->sk_peer_lock, SINGLE_DEPTH_NESTING); + } else { + spin_lock(&peersk->sk_peer_lock); + spin_lock_nested(&sk->sk_peer_lock, SINGLE_DEPTH_NESTING); + } + old_pid = sk->sk_peer_pid; + old_cred = sk->sk_peer_cred; sk->sk_peer_pid = get_pid(peersk->sk_peer_pid); sk->sk_peer_cred = get_cred(peersk->sk_peer_cred); + + spin_unlock(&sk->sk_peer_lock); + spin_unlock(&peersk->sk_peer_lock); + + put_pid(old_pid); + put_cred(old_cred); } static int unix_listen(struct socket *sock, int backlog) @@ -2986,4 +3008,5 @@ fs_initcall(af_unix_init); module_exit(af_unix_exit); MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(VFS_internal_I_am_really_a_filesystem_and_am_NOT_a_driver); MODULE_ALIAS_NETPROTO(PF_UNIX); diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 12e2ddaf887f204a091f157905f270046fc384a6..d45d5366115a769b21bfc1db5a67f7d53c3fa9b8 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -192,8 +192,11 @@ void wait_for_unix_gc(void) { /* If number of inflight sockets is insane, * force a garbage collect right now. + * Paired with the WRITE_ONCE() in unix_inflight(), + * unix_notinflight() and gc_in_progress(). */ - if (unix_tot_inflight > UNIX_INFLIGHT_TRIGGER_GC && !gc_in_progress) + if (READ_ONCE(unix_tot_inflight) > UNIX_INFLIGHT_TRIGGER_GC && + !READ_ONCE(gc_in_progress)) unix_gc(); wait_event(unix_gc_wait, gc_in_progress == false); } @@ -213,7 +216,9 @@ void unix_gc(void) if (gc_in_progress) goto out; - gc_in_progress = true; + /* Paired with READ_ONCE() in wait_for_unix_gc(). */ + WRITE_ONCE(gc_in_progress, true); + /* First, select candidates for garbage collection. Only * in-flight sockets are considered, and from those only ones * which don't have any external reference. @@ -299,7 +304,10 @@ void unix_gc(void) /* All candidates should have been detached by now. */ BUG_ON(!list_empty(&gc_candidates)); - gc_in_progress = false; + + /* Paired with READ_ONCE() in wait_for_unix_gc(). */ + WRITE_ONCE(gc_in_progress, false); + wake_up(&unix_gc_wait); out: diff --git a/net/unix/scm.c b/net/unix/scm.c index 052ae709ce2899e74ebb005d8886e42ccbf8b849..aa27a02478dc1a7e4022f77e6ea7ac55f40b95c7 100644 --- a/net/unix/scm.c +++ b/net/unix/scm.c @@ -60,7 +60,8 @@ void unix_inflight(struct user_struct *user, struct file *fp) } else { BUG_ON(list_empty(&u->link)); } - unix_tot_inflight++; + /* Paired with READ_ONCE() in wait_for_unix_gc() */ + WRITE_ONCE(unix_tot_inflight, unix_tot_inflight + 1); } user->unix_inflight++; spin_unlock(&unix_gc_lock); @@ -80,7 +81,8 @@ void unix_notinflight(struct user_struct *user, struct file *fp) if (atomic_long_dec_and_test(&u->inflight)) list_del_init(&u->link); - unix_tot_inflight--; + /* Paired with READ_ONCE() in wait_for_unix_gc() */ + WRITE_ONCE(unix_tot_inflight, unix_tot_inflight - 1); } user->unix_inflight--; spin_unlock(&unix_gc_lock); diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 326250513570eeba572b14281fd586e7a99de3c5..005aa701f4d522023c713149e360914a8e44e375 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1279,6 +1279,8 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr, * non-blocking call. */ err = -EALREADY; + if (flags & O_NONBLOCK) + goto out; break; default: if ((sk->sk_state == TCP_LISTEN) || @@ -1355,6 +1357,7 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr, sk->sk_state = sk->sk_state == TCP_ESTABLISHED ? TCP_CLOSING : TCP_CLOSE; sock->state = SS_UNCONNECTED; vsock_transport_cancel_pkt(vsk); + vsock_remove_connected(vsk); goto out_wait; } else if (timeout == 0) { err = -ETIMEDOUT; diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index ce404104a2128254866cf5a7b93f9158fe04c1b3..5c0439c3bcfa4f767e96eb43dc1e817183252ed6 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -1157,7 +1157,8 @@ void virtio_transport_recv_pkt(struct virtio_transport *t, space_available = virtio_transport_space_update(sk, pkt); /* Update CID in case it has changed after a transport reset event */ - vsk->local_addr.svm_cid = dst.svm_cid; + if (vsk->local_addr.svm_cid != VMADDR_CID_ANY) + vsk->local_addr.svm_cid = dst.svm_cid; if (space_available) sk->sk_write_space(sk); diff --git a/net/wireless/core.c b/net/wireless/core.c index 240282c083aa746a55f5998dbfd6ec356a404985..3f4554723761d5ceada93ac0de8a48451d178815 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -501,6 +501,7 @@ use_default_name: INIT_WORK(&rdev->propagate_cac_done_wk, cfg80211_propagate_cac_done_wk); INIT_WORK(&rdev->mgmt_registrations_update_wk, cfg80211_mgmt_registrations_update_wk); + spin_lock_init(&rdev->mgmt_registrations_lock); #ifdef CONFIG_CFG80211_DEFAULT_PS rdev->wiphy.flags |= WIPHY_FLAG_PS_ON_BY_DEFAULT; @@ -1256,7 +1257,6 @@ void cfg80211_init_wdev(struct wireless_dev *wdev) INIT_LIST_HEAD(&wdev->event_list); spin_lock_init(&wdev->event_lock); INIT_LIST_HEAD(&wdev->mgmt_registrations); - spin_lock_init(&wdev->mgmt_registrations_lock); INIT_LIST_HEAD(&wdev->pmsr_list); spin_lock_init(&wdev->pmsr_lock); INIT_WORK(&wdev->pmsr_free_wk, cfg80211_pmsr_free_wk); diff --git a/net/wireless/core.h b/net/wireless/core.h index 7df91f940212418fdb57fbab6ff12c5e74071559..a3362a32acb3220c462ab5c617e8e41ff5abfda8 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -101,6 +101,8 @@ struct cfg80211_registered_device { struct work_struct propagate_cac_done_wk; struct work_struct mgmt_registrations_update_wk; + /* lock for all wdev lists */ + spinlock_t mgmt_registrations_lock; /* must be last because of the way we do wiphy_priv(), * and it should at least be aligned to NETDEV_ALIGN */ diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 0ac820780437d54baa4da315eb7e5397cf65b112..6dcfc5a348742501fbdca02fcee98640f2e7c0f5 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -448,9 +448,9 @@ static void cfg80211_mgmt_registrations_update(struct wireless_dev *wdev) ASSERT_RTNL(); - spin_lock_bh(&wdev->mgmt_registrations_lock); + spin_lock_bh(&rdev->mgmt_registrations_lock); if (!wdev->mgmt_registrations_need_update) { - spin_unlock_bh(&wdev->mgmt_registrations_lock); + spin_unlock_bh(&rdev->mgmt_registrations_lock); return; } @@ -475,7 +475,7 @@ static void cfg80211_mgmt_registrations_update(struct wireless_dev *wdev) rcu_read_unlock(); wdev->mgmt_registrations_need_update = 0; - spin_unlock_bh(&wdev->mgmt_registrations_lock); + spin_unlock_bh(&rdev->mgmt_registrations_lock); rdev_update_mgmt_frame_registrations(rdev, wdev, &upd); } @@ -499,6 +499,7 @@ int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_portid, int match_len, bool multicast_rx, struct netlink_ext_ack *extack) { + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_mgmt_registration *reg, *nreg; int err = 0; u16 mgmt_type; @@ -544,7 +545,7 @@ int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_portid, if (!nreg) return -ENOMEM; - spin_lock_bh(&wdev->mgmt_registrations_lock); + spin_lock_bh(&rdev->mgmt_registrations_lock); list_for_each_entry(reg, &wdev->mgmt_registrations, list) { int mlen = min(match_len, reg->match_len); @@ -579,7 +580,7 @@ int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_portid, list_add(&nreg->list, &wdev->mgmt_registrations); } wdev->mgmt_registrations_need_update = 1; - spin_unlock_bh(&wdev->mgmt_registrations_lock); + spin_unlock_bh(&rdev->mgmt_registrations_lock); cfg80211_mgmt_registrations_update(wdev); @@ -587,7 +588,7 @@ int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_portid, out: kfree(nreg); - spin_unlock_bh(&wdev->mgmt_registrations_lock); + spin_unlock_bh(&rdev->mgmt_registrations_lock); return err; } @@ -598,7 +599,7 @@ void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlportid) struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct cfg80211_mgmt_registration *reg, *tmp; - spin_lock_bh(&wdev->mgmt_registrations_lock); + spin_lock_bh(&rdev->mgmt_registrations_lock); list_for_each_entry_safe(reg, tmp, &wdev->mgmt_registrations, list) { if (reg->nlportid != nlportid) @@ -611,7 +612,7 @@ void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlportid) schedule_work(&rdev->mgmt_registrations_update_wk); } - spin_unlock_bh(&wdev->mgmt_registrations_lock); + spin_unlock_bh(&rdev->mgmt_registrations_lock); if (nlportid && rdev->crit_proto_nlportid == nlportid) { rdev->crit_proto_nlportid = 0; @@ -624,15 +625,16 @@ void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlportid) void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev) { + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct cfg80211_mgmt_registration *reg, *tmp; - spin_lock_bh(&wdev->mgmt_registrations_lock); + spin_lock_bh(&rdev->mgmt_registrations_lock); list_for_each_entry_safe(reg, tmp, &wdev->mgmt_registrations, list) { list_del(®->list); kfree(reg); } wdev->mgmt_registrations_need_update = 1; - spin_unlock_bh(&wdev->mgmt_registrations_lock); + spin_unlock_bh(&rdev->mgmt_registrations_lock); cfg80211_mgmt_registrations_update(wdev); } @@ -780,7 +782,7 @@ bool cfg80211_rx_mgmt_khz(struct wireless_dev *wdev, int freq, int sig_dbm, data = buf + ieee80211_hdrlen(mgmt->frame_control); data_len = len - ieee80211_hdrlen(mgmt->frame_control); - spin_lock_bh(&wdev->mgmt_registrations_lock); + spin_lock_bh(&rdev->mgmt_registrations_lock); list_for_each_entry(reg, &wdev->mgmt_registrations, list) { if (reg->frame_type != ftype) @@ -804,7 +806,7 @@ bool cfg80211_rx_mgmt_khz(struct wireless_dev *wdev, int freq, int sig_dbm, break; } - spin_unlock_bh(&wdev->mgmt_registrations_lock); + spin_unlock_bh(&rdev->mgmt_registrations_lock); trace_cfg80211_return_bool(result); return result; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 0890b42153f536bf52395311d033d12f5443b989..5d8959a5e7608f35f0b787966ee455dee70620da 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -475,7 +475,8 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { .len = IEEE80211_MAX_MESH_ID_LEN }, [NL80211_ATTR_MPATH_NEXT_HOP] = NLA_POLICY_ETH_ADDR_COMPAT, - [NL80211_ATTR_REG_ALPHA2] = { .type = NLA_STRING, .len = 2 }, + /* allow 3 for NUL-termination, we used to declare this NLA_STRING */ + [NL80211_ATTR_REG_ALPHA2] = NLA_POLICY_RANGE(NLA_BINARY, 2, 3), [NL80211_ATTR_REG_RULES] = { .type = NLA_NESTED }, [NL80211_ATTR_BSS_CTS_PROT] = { .type = NLA_U8 }, @@ -12939,6 +12940,9 @@ static int handle_nan_filter(struct nlattr *attr_filter, i = 0; nla_for_each_nested(attr, attr_filter, rem) { filter[i].filter = nla_memdup(attr, GFP_KERNEL); + if (!filter[i].filter) + goto err; + filter[i].len = nla_len(attr); i++; } @@ -12951,6 +12955,15 @@ static int handle_nan_filter(struct nlattr *attr_filter, } return 0; + +err: + i = 0; + nla_for_each_nested(attr, attr_filter, rem) { + kfree(filter[i].filter); + i++; + } + kfree(filter); + return -ENOMEM; } static int nl80211_nan_add_func(struct sk_buff *skb, @@ -17124,7 +17137,8 @@ void cfg80211_ch_switch_notify(struct net_device *dev, wdev->chandef = *chandef; wdev->preset_chandef = *chandef; - if (wdev->iftype == NL80211_IFTYPE_STATION && + if ((wdev->iftype == NL80211_IFTYPE_STATION || + wdev->iftype == NL80211_IFTYPE_P2P_CLIENT) && !WARN_ON(!wdev->current_bss)) cfg80211_update_assoc_bss_entry(wdev, chandef->chan); diff --git a/net/wireless/scan.c b/net/wireless/scan.c index fab1f0d504036a7518e222c92a277f5caeb04b4e..fd614a5a00b42093ae00def95f548384ba3aa3cc 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -418,14 +418,17 @@ cfg80211_add_nontrans_list(struct cfg80211_bss *trans_bss, } ssid_len = ssid[1]; ssid = ssid + 2; - rcu_read_unlock(); /* check if nontrans_bss is in the list */ list_for_each_entry(bss, &trans_bss->nontrans_list, nontrans_list) { - if (is_bss(bss, nontrans_bss->bssid, ssid, ssid_len)) + if (is_bss(bss, nontrans_bss->bssid, ssid, ssid_len)) { + rcu_read_unlock(); return 0; + } } + rcu_read_unlock(); + /* add to the list */ list_add_tail(&nontrans_bss->nontrans_list, &trans_bss->nontrans_list); return 0; diff --git a/net/wireless/util.c b/net/wireless/util.c index 0e40e71c18011ddef5c1c01d351e96db7330d3bd..423fc9e941077273aa0938c5e9c5e5757f6d84cd 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -1028,14 +1028,14 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, !(rdev->wiphy.interface_modes & (1 << ntype))) return -EOPNOTSUPP; - /* if it's part of a bridge, reject changing type to station/ibss */ - if (netif_is_bridge_port(dev) && - (ntype == NL80211_IFTYPE_ADHOC || - ntype == NL80211_IFTYPE_STATION || - ntype == NL80211_IFTYPE_P2P_CLIENT)) - return -EBUSY; - if (ntype != otype) { + /* if it's part of a bridge, reject changing type to station/ibss */ + if (netif_is_bridge_port(dev) && + (ntype == NL80211_IFTYPE_ADHOC || + ntype == NL80211_IFTYPE_STATION || + ntype == NL80211_IFTYPE_P2P_CLIENT)) + return -EBUSY; + dev->ieee80211_ptr->use_4addr = false; dev->ieee80211_ptr->mesh_id_up_len = 0; wdev_lock(dev->ieee80211_ptr); @@ -1044,6 +1044,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, switch (otype) { case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_P2P_GO: cfg80211_stop_ap(rdev, dev, true); break; case NL80211_IFTYPE_ADHOC: diff --git a/net/xfrm/xfrm_compat.c b/net/xfrm/xfrm_compat.c index 2bf26939016316a70eb8dcc35adf7f582cb2e84e..a0f62fa02e06e0aa97901aaf226dc84895f6a8ec 100644 --- a/net/xfrm/xfrm_compat.c +++ b/net/xfrm/xfrm_compat.c @@ -127,6 +127,7 @@ static const struct nla_policy compat_policy[XFRMA_MAX+1] = { [XFRMA_SET_MARK] = { .type = NLA_U32 }, [XFRMA_SET_MARK_MASK] = { .type = NLA_U32 }, [XFRMA_IF_ID] = { .type = NLA_U32 }, + [XFRMA_MTIMER_THRESH] = { .type = NLA_U32 }, }; static struct nlmsghdr *xfrm_nlmsg_put_compat(struct sk_buff *skb, @@ -274,9 +275,10 @@ static int xfrm_xlate64_attr(struct sk_buff *dst, const struct nlattr *src) case XFRMA_SET_MARK: case XFRMA_SET_MARK_MASK: case XFRMA_IF_ID: + case XFRMA_MTIMER_THRESH: return xfrm_nla_cpy(dst, src, nla_len(src)); default: - BUILD_BUG_ON(XFRMA_MAX != XFRMA_IF_ID); + BUILD_BUG_ON(XFRMA_MAX != XFRMA_MTIMER_THRESH); pr_warn_once("unsupported nla_type %d\n", src->nla_type); return -EOPNOTSUPP; } @@ -431,7 +433,7 @@ static int xfrm_xlate32_attr(void *dst, const struct nlattr *nla, int err; if (type > XFRMA_MAX) { - BUILD_BUG_ON(XFRMA_MAX != XFRMA_IF_ID); + BUILD_BUG_ON(XFRMA_MAX != XFRMA_MTIMER_THRESH); NL_SET_ERR_MSG(extack, "Bad attribute"); return -EOPNOTSUPP; } diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index e843b0d9e2a61c16551be51f69bc441ccad4f921..c255aac6b816b4911b435ae76cd3377b88f2d9dd 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -223,6 +223,9 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, if (x->encap || x->tfcpad) return -EINVAL; + if (xuo->flags & ~(XFRM_OFFLOAD_IPV6 | XFRM_OFFLOAD_INBOUND)) + return -EINVAL; + dev = dev_get_by_index(net, xuo->ifindex); if (!dev) { if (!(xuo->flags & XFRM_OFFLOAD_INBOUND)) { @@ -261,7 +264,8 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, xso->dev = dev; xso->real_dev = dev; xso->num_exthdrs = 1; - xso->flags = xuo->flags; + /* Don't forward bit that is not implemented */ + xso->flags = xuo->flags & ~XFRM_OFFLOAD_IPV6; err = dev->xfrmdev_ops->xdo_dev_state_add(x); if (err) { diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c index e9ce23343f5cac44703f5394fc0f8c2baee82e49..4420c8fd318a67b3f2004cc3fae59b3cdc1657b9 100644 --- a/net/xfrm/xfrm_interface.c +++ b/net/xfrm/xfrm_interface.c @@ -643,11 +643,16 @@ static int xfrmi_newlink(struct net *src_net, struct net_device *dev, struct netlink_ext_ack *extack) { struct net *net = dev_net(dev); - struct xfrm_if_parms p; + struct xfrm_if_parms p = {}; struct xfrm_if *xi; int err; xfrmi_netlink_parms(data, &p); + if (!p.if_id) { + NL_SET_ERR_MSG(extack, "if_id must be non zero"); + return -EINVAL; + } + xi = xfrmi_locate(net, &p); if (xi) return -EEXIST; @@ -672,9 +677,14 @@ static int xfrmi_changelink(struct net_device *dev, struct nlattr *tb[], { struct xfrm_if *xi = netdev_priv(dev); struct net *net = xi->net; - struct xfrm_if_parms p; + struct xfrm_if_parms p = {}; xfrmi_netlink_parms(data, &p); + if (!p.if_id) { + NL_SET_ERR_MSG(extack, "if_id must be non zero"); + return -EINVAL; + } + xi = xfrmi_locate(net, &p); if (!xi) { xi = netdev_priv(dev); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 3a9831c05ec71c7733be2b1742a82115a1a34cdf..3d0ffd9270041d2840e96670575cbb407dcf7a46 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -31,8 +31,10 @@ #include #include #include +#include #include #include +#include #if IS_ENABLED(CONFIG_IPV6_MIP6) #include #endif @@ -3293,7 +3295,7 @@ decode_session4(struct sk_buff *skb, struct flowi *fl, bool reverse) fl4->flowi4_proto = iph->protocol; fl4->daddr = reverse ? iph->saddr : iph->daddr; fl4->saddr = reverse ? iph->daddr : iph->saddr; - fl4->flowi4_tos = iph->tos; + fl4->flowi4_tos = iph->tos & ~INET_ECN_MASK; if (!ip_is_fragment(iph)) { switch (iph->protocol) { @@ -3455,6 +3457,26 @@ decode_session6(struct sk_buff *skb, struct flowi *fl, bool reverse) } fl6->flowi6_proto = nexthdr; return; + case IPPROTO_GRE: + if (!onlyproto && + (nh + offset + 12 < skb->data || + pskb_may_pull(skb, nh + offset + 12 - skb->data))) { + struct gre_base_hdr *gre_hdr; + __be32 *gre_key; + + nh = skb_network_header(skb); + gre_hdr = (struct gre_base_hdr *)(nh + offset); + gre_key = (__be32 *)(gre_hdr + 1); + + if (gre_hdr->flags & GRE_KEY) { + if (gre_hdr->flags & GRE_CSUM) + gre_key++; + fl6->fl6_gre_key = *gre_key; + } + } + fl6->flowi6_proto = nexthdr; + return; + #if IS_ENABLED(CONFIG_IPV6_MIP6) case IPPROTO_MH: offset += ipv6_optlen(exthdr); @@ -4265,7 +4287,7 @@ static bool xfrm_migrate_selector_match(const struct xfrm_selector *sel_cmp, } static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector *sel, - u8 dir, u8 type, struct net *net) + u8 dir, u8 type, struct net *net, u32 if_id) { struct xfrm_policy *pol, *ret = NULL; struct hlist_head *chain; @@ -4274,7 +4296,8 @@ static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector * spin_lock_bh(&net->xfrm.xfrm_policy_lock); chain = policy_hash_direct(net, &sel->daddr, &sel->saddr, sel->family, dir); hlist_for_each_entry(pol, chain, bydst) { - if (xfrm_migrate_selector_match(sel, &pol->selector) && + if ((if_id == 0 || pol->if_id == if_id) && + xfrm_migrate_selector_match(sel, &pol->selector) && pol->type == type) { ret = pol; priority = ret->priority; @@ -4286,7 +4309,8 @@ static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector * if ((pol->priority >= priority) && ret) break; - if (xfrm_migrate_selector_match(sel, &pol->selector) && + if ((if_id == 0 || pol->if_id == if_id) && + xfrm_migrate_selector_match(sel, &pol->selector) && pol->type == type) { ret = pol; break; @@ -4402,7 +4426,7 @@ static int xfrm_migrate_check(const struct xfrm_migrate *m, int num_migrate) int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, struct xfrm_migrate *m, int num_migrate, struct xfrm_kmaddress *k, struct net *net, - struct xfrm_encap_tmpl *encap) + struct xfrm_encap_tmpl *encap, u32 if_id) { int i, err, nx_cur = 0, nx_new = 0; struct xfrm_policy *pol = NULL; @@ -4421,14 +4445,14 @@ int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, } /* Stage 1 - find policy */ - if ((pol = xfrm_migrate_policy_find(sel, dir, type, net)) == NULL) { + if ((pol = xfrm_migrate_policy_find(sel, dir, type, net, if_id)) == NULL) { err = -ENOENT; goto out; } /* Stage 2 - find and update state(s) */ for (i = 0, mp = m; i < num_migrate; i++, mp++) { - if ((x = xfrm_migrate_state_find(mp, net))) { + if ((x = xfrm_migrate_state_find(mp, net, if_id))) { x_cur[nx_cur] = x; nx_cur++; xc = xfrm_state_migrate(x, mp, encap); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 6cbbe81a8b4cdc3d6079edbc3c71be5256b37e99..3ca60391c0a2de0a9259279f2f4ab9ae0db83e90 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1542,9 +1542,6 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, memcpy(&x->mark, &orig->mark, sizeof(x->mark)); memcpy(&x->props.smark, &orig->props.smark, sizeof(x->props.smark)); - if (xfrm_init_state(x) < 0) - goto error; - x->props.flags = orig->props.flags; x->props.extra_flags = orig->props.extra_flags; @@ -1557,6 +1554,9 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, x->km.seq = orig->km.seq; x->replay = orig->replay; x->preplay = orig->preplay; + x->mapping_maxage = orig->mapping_maxage; + x->new_mapping = 0; + x->new_mapping_sport = 0; return x; @@ -1566,7 +1566,8 @@ out: return NULL; } -struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *net) +struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *net, + u32 if_id) { unsigned int h; struct xfrm_state *x = NULL; @@ -1582,6 +1583,8 @@ struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *n continue; if (m->reqid && x->props.reqid != m->reqid) continue; + if (if_id != 0 && x->if_id != if_id) + continue; if (!xfrm_addr_equal(&x->id.daddr, &m->old_daddr, m->old_family) || !xfrm_addr_equal(&x->props.saddr, &m->old_saddr, @@ -1597,6 +1600,8 @@ struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *n if (x->props.mode != m->mode || x->id.proto != m->proto) continue; + if (if_id != 0 && x->if_id != if_id) + continue; if (!xfrm_addr_equal(&x->id.daddr, &m->old_daddr, m->old_family) || !xfrm_addr_equal(&x->props.saddr, &m->old_saddr, @@ -1623,6 +1628,11 @@ struct xfrm_state *xfrm_state_migrate(struct xfrm_state *x, if (!xc) return NULL; + xc->props.family = m->new_family; + + if (xfrm_init_state(xc) < 0) + goto error; + memcpy(&xc->id.daddr, &m->new_daddr, sizeof(xc->id.daddr)); memcpy(&xc->props.saddr, &m->new_saddr, sizeof(xc->props.saddr)); @@ -2208,7 +2218,7 @@ int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol) } EXPORT_SYMBOL(km_query); -int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport) +static int __km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport) { int err = -EINVAL; struct xfrm_mgr *km; @@ -2223,6 +2233,24 @@ int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport) rcu_read_unlock(); return err; } + +int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport) +{ + int ret = 0; + + if (x->mapping_maxage) { + if ((jiffies / HZ - x->new_mapping) > x->mapping_maxage || + x->new_mapping_sport != sport) { + x->new_mapping_sport = sport; + x->new_mapping = jiffies / HZ; + ret = __km_new_mapping(x, ipaddr, sport); + } + } else { + ret = __km_new_mapping(x, ipaddr, sport); + } + + return ret; +} EXPORT_SYMBOL(km_new_mapping); void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 portid) @@ -2519,7 +2547,7 @@ void xfrm_state_delete_tunnel(struct xfrm_state *x) } EXPORT_SYMBOL(xfrm_state_delete_tunnel); -u32 __xfrm_state_mtu(struct xfrm_state *x, int mtu) +u32 xfrm_state_mtu(struct xfrm_state *x, int mtu) { const struct xfrm_type *type = READ_ONCE(x->type); struct crypto_aead *aead; @@ -2550,17 +2578,7 @@ u32 __xfrm_state_mtu(struct xfrm_state *x, int mtu) return ((mtu - x->props.header_len - crypto_aead_authsize(aead) - net_adj) & ~(blksize - 1)) + net_adj - 2; } -EXPORT_SYMBOL_GPL(__xfrm_state_mtu); - -u32 xfrm_state_mtu(struct xfrm_state *x, int mtu) -{ - mtu = __xfrm_state_mtu(x, mtu); - - if (x->props.family == AF_INET6 && mtu < IPV6_MIN_MTU) - return IPV6_MIN_MTU; - - return mtu; -} +EXPORT_SYMBOL_GPL(xfrm_state_mtu); int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload) { diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 40ea3d553a33c3feca779bd235e222c8cf0e0a2f..53d0958b02113ade9a1a95e409a4c40392a8a7cc 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -282,6 +282,10 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, err = 0; + if (attrs[XFRMA_MTIMER_THRESH]) + if (!attrs[XFRMA_ENCAP]) + err = -EINVAL; + out: return err; } @@ -521,6 +525,7 @@ static void xfrm_update_ae_params(struct xfrm_state *x, struct nlattr **attrs, struct nlattr *lt = attrs[XFRMA_LTIME_VAL]; struct nlattr *et = attrs[XFRMA_ETIMER_THRESH]; struct nlattr *rt = attrs[XFRMA_REPLAY_THRESH]; + struct nlattr *mt = attrs[XFRMA_MTIMER_THRESH]; if (re) { struct xfrm_replay_state_esn *replay_esn; @@ -552,6 +557,9 @@ static void xfrm_update_ae_params(struct xfrm_state *x, struct nlattr **attrs, if (rt) x->replay_maxdiff = nla_get_u32(rt); + + if (mt) + x->mapping_maxage = nla_get_u32(mt); } static void xfrm_smark_init(struct nlattr **attrs, struct xfrm_mark *m) @@ -964,8 +972,13 @@ static int copy_to_user_state_extra(struct xfrm_state *x, if (ret) goto out; } - if (x->security) + if (x->security) { ret = copy_sec_ctx(x->security, skb); + if (ret) + goto out; + } + if (x->mapping_maxage) + ret = nla_put_u32(skb, XFRMA_MTIMER_THRESH, x->mapping_maxage); out: return ret; } @@ -2423,6 +2436,7 @@ static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh, int n = 0; struct net *net = sock_net(skb->sk); struct xfrm_encap_tmpl *encap = NULL; + u32 if_id = 0; if (attrs[XFRMA_MIGRATE] == NULL) return -EINVAL; @@ -2447,7 +2461,10 @@ static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh, return 0; } - err = xfrm_migrate(&pi->sel, pi->dir, type, m, n, kmp, net, encap); + if (attrs[XFRMA_IF_ID]) + if_id = nla_get_u32(attrs[XFRMA_IF_ID]); + + err = xfrm_migrate(&pi->sel, pi->dir, type, m, n, kmp, net, encap, if_id); kfree(encap); @@ -2901,7 +2918,7 @@ static inline unsigned int xfrm_sa_len(struct xfrm_state *x) if (x->props.extra_flags) l += nla_total_size(sizeof(x->props.extra_flags)); if (x->xso.dev) - l += nla_total_size(sizeof(x->xso)); + l += nla_total_size(sizeof(struct xfrm_user_offload)); if (x->props.smark.v | x->props.smark.m) { l += nla_total_size(sizeof(x->props.smark.v)); l += nla_total_size(sizeof(x->props.smark.m)); @@ -2912,6 +2929,9 @@ static inline unsigned int xfrm_sa_len(struct xfrm_state *x) /* Must count x->lastused as it may become non-zero behind our back. */ l += nla_total_size_64bit(sizeof(u64)); + if (x->mapping_maxage) + l += nla_total_size(sizeof(x->mapping_maxage)); + return l; } diff --git a/samples/crypto/fips140_lab_util.c b/samples/crypto/fips140_lab_util.c new file mode 100644 index 0000000000000000000000000000000000000000..996839dbd2e39087830b0596f4c0c7f951b3cf23 --- /dev/null +++ b/samples/crypto/fips140_lab_util.c @@ -0,0 +1,550 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2021 Google LLC + * + * This program provides commands that dump certain types of output from the + * fips140 kernel module, as required by the FIPS lab for evaluation purposes. + * + * While the fips140 kernel module can only be accessed directly by other kernel + * code, an easy-to-use userspace utility program was desired for lab testing. + * When possible, this program uses AF_ALG to access the crypto algorithms; this + * requires that the kernel has AF_ALG enabled. Where AF_ALG isn't sufficient, + * a custom device node /dev/fips140 is used instead; this requires that the + * fips140 module is loaded and has evaluation testing support compiled in. + * + * This program can be compiled and run on an Android device as follows: + * + * NDK_DIR=$HOME/android-ndk-r23b # adjust directory path as needed + * $NDK_DIR/toolchains/llvm/prebuilt/linux-x86_64/bin/aarch64-linux-android31-clang \ + * fips140_lab_util.c -O2 -Wall -o fips140_lab_util + * adb push fips140_lab_util /data/local/tmp/ + * adb root + * adb shell /data/local/tmp/fips140_lab_util + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../../crypto/fips140-eval-testing-uapi.h" + +/* --------------------------------------------------------------------------- + * Utility functions + * ---------------------------------------------------------------------------*/ + +#define ARRAY_SIZE(A) (sizeof(A) / sizeof((A)[0])) + +static void __attribute__((noreturn)) +do_die(const char *format, va_list va, int err) +{ + fputs("ERROR: ", stderr); + vfprintf(stderr, format, va); + if (err) + fprintf(stderr, ": %s", strerror(err)); + putc('\n', stderr); + exit(1); +} + +static void __attribute__((noreturn, format(printf, 1, 2))) +die_errno(const char *format, ...) +{ + va_list va; + + va_start(va, format); + do_die(format, va, errno); + va_end(va); +} + +static void __attribute__((noreturn, format(printf, 1, 2))) +die(const char *format, ...) +{ + va_list va; + + va_start(va, format); + do_die(format, va, 0); + va_end(va); +} + +static void __attribute__((noreturn)) +assertion_failed(const char *expr, const char *file, int line) +{ + die("Assertion failed: %s at %s:%d", expr, file, line); +} + +#define ASSERT(e) ({ if (!(e)) assertion_failed(#e, __FILE__, __LINE__); }) + +static void rand_bytes(uint8_t *bytes, size_t count) +{ + size_t i; + + for (i = 0; i < count; i++) + bytes[i] = rand(); +} + +static const char *booltostr(bool b) +{ + return b ? "true" : "false"; +} + +static const char *bytes_to_hex(const uint8_t *bytes, size_t count) +{ + static char hex[1025]; + size_t i; + + ASSERT(count <= 512); + for (i = 0; i < count; i++) + sprintf(&hex[2*i], "%02x", bytes[i]); + return hex; +} + +static void usage(void); + +/* --------------------------------------------------------------------------- + * /dev/fips140 ioctls + * ---------------------------------------------------------------------------*/ + +static int get_fips140_device_number(void) +{ + FILE *f; + char line[128]; + int number; + char name[32]; + + f = fopen("/proc/devices", "r"); + if (!f) + die_errno("Failed to open /proc/devices"); + while (fgets(line, sizeof(line), f)) { + if (sscanf(line, "%d %31s", &number, name) == 2 && + strcmp(name, "fips140") == 0) + return number; + } + fclose(f); + die("fips140 device node is unavailable.\n" +"The fips140 device node is only available when the fips140 module is loaded\n" +"and has been built with evaluation testing support."); +} + +static void create_fips140_node_if_needed(void) +{ + struct stat stbuf; + int major; + + if (stat("/dev/fips140", &stbuf) == 0) + return; + + major = get_fips140_device_number(); + if (mknod("/dev/fips140", S_IFCHR | 0600, makedev(major, 1)) != 0) + die_errno("Failed to create fips140 device node"); +} + +static int fips140_dev_fd = -1; + +static int fips140_ioctl(int cmd, const void *arg) +{ + if (fips140_dev_fd < 0) { + create_fips140_node_if_needed(); + fips140_dev_fd = open("/dev/fips140", O_RDONLY); + if (fips140_dev_fd < 0) + die_errno("Failed to open /dev/fips140"); + } + return ioctl(fips140_dev_fd, cmd, arg); +} + +static bool fips140_is_approved_service(const char *name) +{ + int ret = fips140_ioctl(FIPS140_IOCTL_IS_APPROVED_SERVICE, name); + + if (ret < 0) + die_errno("FIPS140_IOCTL_IS_APPROVED_SERVICE unexpectedly failed"); + if (ret == 1) + return true; + if (ret == 0) + return false; + die("FIPS140_IOCTL_IS_APPROVED_SERVICE returned unexpected value %d", + ret); +} + +static const char *fips140_module_version(void) +{ + static char buf[256]; + int ret; + + memset(buf, 0, sizeof(buf)); + ret = fips140_ioctl(FIPS140_IOCTL_MODULE_VERSION, buf); + if (ret < 0) + die_errno("FIPS140_IOCTL_MODULE_VERSION unexpectedly failed"); + if (ret != 0) + die("FIPS140_IOCTL_MODULE_VERSION returned unexpected value %d", + ret); + return buf; +} + +/* --------------------------------------------------------------------------- + * AF_ALG utilities + * ---------------------------------------------------------------------------*/ + +#define AF_ALG_MAX_RNG_REQUEST_SIZE 128 + +static int get_alg_fd(const char *alg_type, const char *alg_name) +{ + struct sockaddr_alg addr = {}; + int alg_fd; + + alg_fd = socket(AF_ALG, SOCK_SEQPACKET, 0); + if (alg_fd < 0) + die("Failed to create AF_ALG socket.\n" +"AF_ALG is only available when it has been enabled in the kernel.\n"); + + strncpy((char *)addr.salg_type, alg_type, sizeof(addr.salg_type) - 1); + strncpy((char *)addr.salg_name, alg_name, sizeof(addr.salg_name) - 1); + + if (bind(alg_fd, (void *)&addr, sizeof(addr)) != 0) + die_errno("Failed to bind AF_ALG socket to %s %s", + alg_type, alg_name); + return alg_fd; +} + +static int get_req_fd(int alg_fd, const char *alg_name) +{ + int req_fd = accept(alg_fd, NULL, NULL); + + if (req_fd < 0) + die_errno("Failed to get request file descriptor for %s", + alg_name); + return req_fd; +} + +/* --------------------------------------------------------------------------- + * show_invalid_inputs command + * ---------------------------------------------------------------------------*/ + +enum direction { + UNSPECIFIED, + DECRYPT, + ENCRYPT, +}; + +static const struct invalid_input_test { + const char *alg_type; + const char *alg_name; + const char *key; + size_t key_size; + const char *msg; + size_t msg_size; + const char *iv; + size_t iv_size; + enum direction direction; + int setkey_error; + int crypt_error; +} invalid_input_tests[] = { + { + .alg_type = "skcipher", + .alg_name = "cbc(aes)", + .key_size = 16, + }, { + .alg_type = "skcipher", + .alg_name = "cbc(aes)", + .key_size = 17, + .setkey_error = EINVAL, + }, { + .alg_type = "skcipher", + .alg_name = "cbc(aes)", + .key_size = 24, + }, { + .alg_type = "skcipher", + .alg_name = "cbc(aes)", + .key_size = 32, + }, { + .alg_type = "skcipher", + .alg_name = "cbc(aes)", + .key_size = 33, + .setkey_error = EINVAL, + }, { + .alg_type = "skcipher", + .alg_name = "cbc(aes)", + .key_size = 16, + .msg_size = 1, + .direction = DECRYPT, + .crypt_error = EINVAL, + }, { + .alg_type = "skcipher", + .alg_name = "cbc(aes)", + .key_size = 16, + .msg_size = 16, + .direction = ENCRYPT, + }, { + .alg_type = "skcipher", + .alg_name = "cbc(aes)", + .key_size = 16, + .msg_size = 17, + .direction = ENCRYPT, + .crypt_error = EINVAL, + }, { + .alg_type = "hash", + .alg_name = "cmac(aes)", + .key_size = 29, + .setkey_error = EINVAL, + }, { + .alg_type = "skcipher", + .alg_name = "xts(aes)", + .key_size = 32, + }, { + .alg_type = "skcipher", + .alg_name = "xts(aes)", + .key = "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" + "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0", + .key_size = 32, + .setkey_error = EINVAL, + } +}; + +static const char *describe_crypt_op(const struct invalid_input_test *t) +{ + if (t->direction == ENCRYPT) + return "encryption"; + if (t->direction == DECRYPT) + return "decryption"; + if (strcmp(t->alg_type, "hash") == 0) + return "hashing"; + ASSERT(0); +} + +static bool af_alg_setkey(const struct invalid_input_test *t, int alg_fd) +{ + const uint8_t *key = (const uint8_t *)t->key; + uint8_t _key[t->key_size]; + + if (t->key_size == 0) + return true; + + if (t->key == NULL) { + rand_bytes(_key, t->key_size); + key = _key; + } + if (setsockopt(alg_fd, SOL_ALG, ALG_SET_KEY, key, t->key_size) != 0) { + printf("%s: setting %zu-byte key failed with error '%s'\n", + t->alg_name, t->key_size, strerror(errno)); + printf("\tkey was %s\n\n", bytes_to_hex(key, t->key_size)); + ASSERT(t->setkey_error == errno); + return false; + } + printf("%s: setting %zu-byte key succeeded\n", + t->alg_name, t->key_size); + printf("\tkey was %s\n\n", bytes_to_hex(key, t->key_size)); + ASSERT(t->setkey_error == 0); + return true; +} + +static void af_alg_process_msg(const struct invalid_input_test *t, int alg_fd) +{ + struct iovec iov; + struct msghdr hdr = { + .msg_iov = &iov, + .msg_iovlen = 1, + }; + const uint8_t *msg = (const uint8_t *)t->msg; + uint8_t *_msg = NULL; + uint8_t *output = NULL; + uint8_t *control = NULL; + size_t controllen = 0; + struct cmsghdr *cmsg; + int req_fd; + + if (t->msg_size == 0) + return; + + req_fd = get_req_fd(alg_fd, t->alg_name); + + if (t->msg == NULL) { + _msg = malloc(t->msg_size); + rand_bytes(_msg, t->msg_size); + msg = _msg; + } + output = malloc(t->msg_size); + iov.iov_base = (void *)msg; + iov.iov_len = t->msg_size; + + if (t->direction != UNSPECIFIED) + controllen += CMSG_SPACE(sizeof(uint32_t)); + if (t->iv_size) + controllen += CMSG_SPACE(sizeof(struct af_alg_iv) + t->iv_size); + control = calloc(1, controllen); + hdr.msg_control = control; + hdr.msg_controllen = controllen; + cmsg = CMSG_FIRSTHDR(&hdr); + if (t->direction != UNSPECIFIED) { + cmsg->cmsg_level = SOL_ALG; + cmsg->cmsg_type = ALG_SET_OP; + cmsg->cmsg_len = CMSG_LEN(sizeof(uint32_t)); + *(uint32_t *)CMSG_DATA(cmsg) = t->direction == DECRYPT ? + ALG_OP_DECRYPT : ALG_OP_ENCRYPT; + cmsg = CMSG_NXTHDR(&hdr, cmsg); + } + if (t->iv_size) { + struct af_alg_iv *alg_iv; + + cmsg->cmsg_level = SOL_ALG; + cmsg->cmsg_type = ALG_SET_IV; + cmsg->cmsg_len = CMSG_LEN(sizeof(*alg_iv) + t->iv_size); + alg_iv = (struct af_alg_iv *)CMSG_DATA(cmsg); + alg_iv->ivlen = t->iv_size; + memcpy(alg_iv->iv, t->iv, t->iv_size); + } + + if (sendmsg(req_fd, &hdr, 0) != t->msg_size) + die_errno("sendmsg failed"); + + if (read(req_fd, output, t->msg_size) != t->msg_size) { + printf("%s: %s of %zu-byte message failed with error '%s'\n", + t->alg_name, describe_crypt_op(t), t->msg_size, + strerror(errno)); + printf("\tmessage was %s\n\n", bytes_to_hex(msg, t->msg_size)); + ASSERT(t->crypt_error == errno); + } else { + printf("%s: %s of %zu-byte message succeeded\n", + t->alg_name, describe_crypt_op(t), t->msg_size); + printf("\tmessage was %s\n\n", bytes_to_hex(msg, t->msg_size)); + ASSERT(t->crypt_error == 0); + } + free(_msg); + free(output); + free(control); + close(req_fd); +} + +static void test_invalid_input(const struct invalid_input_test *t) +{ + int alg_fd = get_alg_fd(t->alg_type, t->alg_name); + + if (af_alg_setkey(t, alg_fd)) + af_alg_process_msg(t, alg_fd); + + close(alg_fd); +} + +static int cmd_show_invalid_inputs(int argc, char *argv[]) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(invalid_input_tests); i++) + test_invalid_input(&invalid_input_tests[i]); + return 0; +} + +/* --------------------------------------------------------------------------- + * show_module_version command + * ---------------------------------------------------------------------------*/ + +static int cmd_show_module_version(int argc, char *argv[]) +{ + printf("fips140_module_version() => \"%s\"\n", + fips140_module_version()); + return 0; +} + +/* --------------------------------------------------------------------------- + * show_service_indicators command + * ---------------------------------------------------------------------------*/ + +static const char * const default_services_to_show[] = { + "aes", + "cbc(aes)", + "cbcmac(aes)", + "cmac(aes)", + "ctr(aes)", + "cts(cbc(aes))", + "ecb(aes)", + "essiv(cbc(aes),sha256)", + "gcm(aes)", + "hmac(sha1)", + "hmac(sha224)", + "hmac(sha256)", + "hmac(sha384)", + "hmac(sha512)", + "jitterentropy_rng", + "sha1", + "sha224", + "sha256", + "sha384", + "sha512", + "stdrng", + "xcbc(aes)", + "xts(aes)", +}; + +static int cmd_show_service_indicators(int argc, char *argv[]) +{ + const char * const *services = default_services_to_show; + int count = ARRAY_SIZE(default_services_to_show); + int i; + + if (argc > 1) { + services = (const char **)(argv + 1); + count = argc - 1; + } + for (i = 0; i < count; i++) { + printf("fips140_is_approved_service(\"%s\") => %s\n", + services[i], + booltostr(fips140_is_approved_service(services[i]))); + } + return 0; +} + +/* --------------------------------------------------------------------------- + * main() + * ---------------------------------------------------------------------------*/ + +static const struct command { + const char *name; + int (*func)(int argc, char *argv[]); +} commands[] = { + { "show_invalid_inputs", cmd_show_invalid_inputs }, + { "show_module_version", cmd_show_module_version }, + { "show_service_indicators", cmd_show_service_indicators }, +}; + +static void usage(void) +{ + fprintf(stderr, +"Usage:\n" +" fips140_lab_util show_invalid_inputs\n" +" fips140_lab_util show_module_version\n" +" fips140_lab_util show_service_indicators [SERVICE]...\n" + ); +} + +int main(int argc, char *argv[]) +{ + int i; + + if (argc < 2) { + usage(); + return 2; + } + for (i = 1; i < argc; i++) { + if (strcmp(argv[i], "--help") == 0) { + usage(); + return 2; + } + } + + for (i = 0; i < ARRAY_SIZE(commands); i++) { + if (strcmp(commands[i].name, argv[1]) == 0) + return commands[i].func(argc - 1, argv + 1); + } + fprintf(stderr, "Unknown command: %s\n\n", argv[1]); + usage(); + return 2; +} diff --git a/samples/ftrace/ftrace-direct-modify.c b/samples/ftrace/ftrace-direct-modify.c index 5b9a09957c6e0e653590affbf1b214b7328262f5..89e6bf27cd9f6c5f7a95a534fca7bff5d7fb21d8 100644 --- a/samples/ftrace/ftrace-direct-modify.c +++ b/samples/ftrace/ftrace-direct-modify.c @@ -3,6 +3,9 @@ #include #include +extern void my_direct_func1(void); +extern void my_direct_func2(void); + void my_direct_func1(void) { trace_printk("my direct func1\n"); diff --git a/samples/ftrace/ftrace-direct-too.c b/samples/ftrace/ftrace-direct-too.c index 3f0079c9bd6fa2bb99e303731d34911b4f8f035f..11b99325f3dbf910d3fce35bacb4684449ffdf83 100644 --- a/samples/ftrace/ftrace-direct-too.c +++ b/samples/ftrace/ftrace-direct-too.c @@ -4,6 +4,9 @@ #include /* for handle_mm_fault() */ #include +extern void my_direct_func(struct vm_area_struct *vma, + unsigned long address, unsigned int flags); + void my_direct_func(struct vm_area_struct *vma, unsigned long address, unsigned int flags) { diff --git a/samples/ftrace/ftrace-direct.c b/samples/ftrace/ftrace-direct.c index a2729d1ef17f538e66aa2e443b286c0560dee99f..642c50b5f7166e0f8bef86987e0ac15f311cc98c 100644 --- a/samples/ftrace/ftrace-direct.c +++ b/samples/ftrace/ftrace-direct.c @@ -4,6 +4,8 @@ #include /* for wake_up_process() */ #include +extern void my_direct_func(struct task_struct *p); + void my_direct_func(struct task_struct *p) { trace_printk("waking up %s-%d\n", p->comm, p->pid); diff --git a/samples/kprobes/kretprobe_example.c b/samples/kprobes/kretprobe_example.c index 5dc1bf3baa98b3a1598c8b957af0fef27b74017f..228321ecb16168e1a61311dda0e7deb783d5dd9c 100644 --- a/samples/kprobes/kretprobe_example.c +++ b/samples/kprobes/kretprobe_example.c @@ -86,7 +86,7 @@ static int __init kretprobe_init(void) ret = register_kretprobe(&my_kretprobe); if (ret < 0) { pr_err("register_kretprobe failed, returned %d\n", ret); - return -1; + return ret; } pr_info("Planted return probe at %s: %p\n", my_kretprobe.kp.symbol_name, my_kretprobe.kp.addr); diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include index 0d6e118207913d035e9a1549876d34d8eff851b6..f247e691562d438816eed3e1f3eb52e75408b568 100644 --- a/scripts/Kbuild.include +++ b/scripts/Kbuild.include @@ -67,90 +67,6 @@ define filechk fi endef -###### -# gcc support functions -# See documentation in Documentation/kbuild/makefiles.rst - -# cc-cross-prefix -# Usage: CROSS_COMPILE := $(call cc-cross-prefix, m68k-linux-gnu- m68k-linux-) -# Return first where a gcc is found in PATH. -# If no gcc found in PATH with listed prefixes return nothing -# -# Note: '2>/dev/null' is here to force Make to invoke a shell. Otherwise, it -# would try to directly execute the shell builtin 'command'. This workaround -# should be kept for a long time since this issue was fixed only after the -# GNU Make 4.2.1 release. -cc-cross-prefix = $(firstword $(foreach c, $(1), \ - $(if $(shell command -v -- $(c)gcc 2>/dev/null), $(c)))) - -# output directory for tests below -TMPOUT = $(if $(KBUILD_EXTMOD),$(firstword $(KBUILD_EXTMOD))/).tmp_$$$$ - -# try-run -# Usage: option = $(call try-run, $(CC)...-o "$$TMP",option-ok,otherwise) -# Exit code chooses option. "$$TMP" serves as a temporary file and is -# automatically cleaned up. -try-run = $(shell set -e; \ - TMP=$(TMPOUT)/tmp; \ - TMPO=$(TMPOUT)/tmp.o; \ - mkdir -p $(TMPOUT); \ - trap "rm -rf $(TMPOUT)" EXIT; \ - if ($(1)) >/dev/null 2>&1; \ - then echo "$(2)"; \ - else echo "$(3)"; \ - fi) - -# as-option -# Usage: cflags-y += $(call as-option,-Wa$(comma)-isa=foo,) - -as-option = $(call try-run,\ - $(CC) $(KBUILD_CFLAGS) $(1) -c -x assembler /dev/null -o "$$TMP",$(1),$(2)) - -# as-instr -# Usage: cflags-y += $(call as-instr,instr,option1,option2) - -as-instr = $(call try-run,\ - printf "%b\n" "$(1)" | $(CC) $(KBUILD_AFLAGS) -c -x assembler -o "$$TMP" -,$(2),$(3)) - -# __cc-option -# Usage: MY_CFLAGS += $(call __cc-option,$(CC),$(MY_CFLAGS),-march=winchip-c6,-march=i586) -__cc-option = $(call try-run,\ - $(1) -Werror $(2) $(3) -c -x c /dev/null -o "$$TMP",$(3),$(4)) - -# cc-option -# Usage: cflags-y += $(call cc-option,-march=winchip-c6,-march=i586) - -cc-option = $(call __cc-option, $(CC),\ - $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS),$(1),$(2)) - -# cc-option-yn -# Usage: flag := $(call cc-option-yn,-march=winchip-c6) -cc-option-yn = $(call try-run,\ - $(CC) -Werror $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) $(1) -c -x c /dev/null -o "$$TMP",y,n) - -# cc-disable-warning -# Usage: cflags-y += $(call cc-disable-warning,unused-but-set-variable) -cc-disable-warning = $(call try-run,\ - $(CC) -Werror $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) -W$(strip $(1)) -c -x c /dev/null -o "$$TMP",-Wno-$(strip $(1))) - -# cc-ifversion -# Usage: EXTRA_CFLAGS += $(call cc-ifversion, -lt, 0402, -O1) -cc-ifversion = $(shell [ $(CONFIG_GCC_VERSION)0 $(1) $(2)000 ] && echo $(3) || echo $(4)) - -# ld-option -# Usage: KBUILD_LDFLAGS += $(call ld-option, -X, -Y) -ld-option = $(call try-run, $(LD) $(KBUILD_LDFLAGS) $(1) -v,$(1),$(2),$(3)) - -# ld-version -# Note this is mainly for HJ Lu's 3 number binutil versions -ld-version = $(shell $(LD) --version | $(srctree)/scripts/ld-version.sh) - -# ld-ifversion -# Usage: $(call ld-ifversion, -ge, 22252, y) -ld-ifversion = $(shell [ $(ld-version) $(1) $(2) ] && echo $(3) || echo $(4)) - -###### - ### # Shorthand for $(Q)$(MAKE) -f scripts/Makefile.build obj= # Usage: diff --git a/scripts/Kconfig.include b/scripts/Kconfig.include index a5fe72c504ffb86fc31368927b29dd4bfc434b22..0496efd6e11794223b4870ebc2e340e4098a006e 100644 --- a/scripts/Kconfig.include +++ b/scripts/Kconfig.include @@ -39,8 +39,23 @@ as-instr = $(success,printf "%b\n" "$(1)" | $(CC) $(CLANG_FLAGS) -c -x assembler $(error-if,$(failure,command -v $(CC)),compiler '$(CC)' not found) $(error-if,$(failure,command -v $(LD)),linker '$(LD)' not found) -# Fail if the linker is gold as it's not capable of linking the kernel proper -$(error-if,$(success, $(LD) -v | grep -q gold), gold linker '$(LD)' not supported) +# Get the compiler name, version, and error out if it is not supported. +cc-info := $(shell,$(srctree)/scripts/cc-version.sh $(CC)) +$(error-if,$(success,test -z "$(cc-info)"),Sorry$(comma) this compiler is not supported.) +cc-name := $(shell,set -- $(cc-info) && echo $1) +cc-version := $(shell,set -- $(cc-info) && echo $2) + +# Get the assembler name, version, and error out if it is not supported. +as-info := $(shell,$(srctree)/scripts/as-version.sh $(CC) $(CLANG_FLAGS)) +$(error-if,$(success,test -z "$(as-info)"),Sorry$(comma) this assembler is not supported.) +as-name := $(shell,set -- $(as-info) && echo $1) +as-version := $(shell,set -- $(as-info) && echo $2) + +# Get the linker name, version, and error out if it is not supported. +ld-info := $(shell,$(srctree)/scripts/ld-version.sh $(LD)) +$(error-if,$(success,test -z "$(ld-info)"),Sorry$(comma) this linker is not supported.) +ld-name := $(shell,set -- $(ld-info) && echo $1) +ld-version := $(shell,set -- $(ld-info) && echo $2) # machine bit flags # $(m32-flag): -m32 if the compiler supports it, or an empty string otherwise. diff --git a/scripts/Makefile.asm-generic b/scripts/Makefile.asm-generic index 82ad63dcd62b4733ab7370306ceb4407575651eb..1d501c57f9eff9b595d79433d14a063540ba2f87 100644 --- a/scripts/Makefile.asm-generic +++ b/scripts/Makefile.asm-generic @@ -14,10 +14,10 @@ src := $(subst /generated,,$(obj)) # $(generic)/Kbuild lists mandatory-y. Exclude um since it is a special case. ifneq ($(SRCARCH),um) -include $(generic)/Kbuild +include $(srctree)/$(generic)/Kbuild endif -include scripts/Kbuild.include +include $(srctree)/scripts/Kbuild.include redundant := $(filter $(mandatory-y) $(generated-y), $(generic-y)) redundant += $(foreach f, $(generic-y), $(if $(wildcard $(srctree)/$(src)/$(f)),$(f))) diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 1a4aa5edfd1e690f2f8ba2ac83f7862094193ed9..37c628fce5bddebffdc240de302b9cd4f9388d64 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -36,27 +36,28 @@ subdir-ccflags-y := # Read auto.conf if it exists, otherwise ignore -include include/config/auto.conf -include scripts/Kbuild.include +include $(srctree)/scripts/Kbuild.include +include $(srctree)/scripts/Makefile.compiler # The filename Kbuild has precedence over Makefile kbuild-dir := $(if $(filter /%,$(src)),$(src),$(srctree)/$(src)) kbuild-file := $(if $(wildcard $(kbuild-dir)/Kbuild),$(kbuild-dir)/Kbuild,$(kbuild-dir)/Makefile) include $(kbuild-file) -include scripts/Makefile.lib +include $(srctree)/scripts/Makefile.lib # Do not include hostprogs rules unless needed. # $(sort ...) is used here to remove duplicated words and excessive spaces. hostprogs := $(sort $(hostprogs)) ifneq ($(hostprogs),) -include scripts/Makefile.host +include $(srctree)/scripts/Makefile.host endif # Do not include userprogs rules unless needed. # $(sort ...) is used here to remove duplicated words and excessive spaces. userprogs := $(sort $(userprogs)) ifneq ($(userprogs),) -include scripts/Makefile.userprogs +include $(srctree)/scripts/Makefile.userprogs endif ifndef obj @@ -88,6 +89,10 @@ endif targets-for-modules := $(patsubst %.o, %.mod, $(filter %.o, $(obj-m))) +ifdef CONFIG_LTO_CLANG +targets-for-modules += $(patsubst %.o, %.lto.o, $(filter %.o, $(obj-m))) +endif + ifdef need-modorder targets-for-modules += $(obj)/modules.order endif @@ -273,12 +278,35 @@ $(obj)/%.o: $(src)/%.c $(recordmcount_source) $$(objtool_dep) FORCE $(call if_changed_rule,cc_o_c) $(call cmd,force_checksrc) +ifdef CONFIG_LTO_CLANG +# Module .o files may contain LLVM bitcode, compile them into native code +# before ELF processing +quiet_cmd_cc_lto_link_modules = LTO [M] $@ +cmd_cc_lto_link_modules = \ + $(LD) $(ld_flags) -r -o $@ \ + $(shell [ -s $(@:.lto.o=.o.symversions) ] && \ + echo -T $(@:.lto.o=.o.symversions)) \ + --whole-archive $(filter-out FORCE,$^) + +ifdef CONFIG_STACK_VALIDATION +ifneq ($(SKIP_STACK_VALIDATION),1) +# objtool was skipped for LLVM bitcode, run it now that we have compiled +# modules into native code +cmd_cc_lto_link_modules += ; \ + $(objtree)/tools/objtool/objtool $(objtool_args) --module $@ +endif # SKIP_STACK_VALIDATION +endif + +$(obj)/%.lto.o: $(obj)/%.o FORCE + $(call if_changed,cc_lto_link_modules) +endif + cmd_mod = { \ echo $(if $($*-objs)$($*-y)$($*-m), $(addprefix $(obj)/, $($*-objs) $($*-y) $($*-m)), $(@:.mod=.o)); \ $(undefined_syms) echo; \ } > $@ -$(obj)/%.mod: $(obj)/%.o FORCE +$(obj)/%.mod: $(obj)/%$(mod-prelink-ext).o FORCE $(call if_changed,mod) quiet_cmd_cc_lst_c = MKLST $@ diff --git a/scripts/Makefile.clang b/scripts/Makefile.clang new file mode 100644 index 0000000000000000000000000000000000000000..3ae63bd355824a2d6f0cba982fb46320279c3b06 --- /dev/null +++ b/scripts/Makefile.clang @@ -0,0 +1,35 @@ +# Individual arch/{arch}/Makefiles should use -EL/-EB to set intended +# endianness and -m32/-m64 to set word size based on Kconfigs instead of +# relying on the target triple. +CLANG_TARGET_FLAGS_arm := arm-linux-gnueabi +CLANG_TARGET_FLAGS_arm64 := aarch64-linux-gnu +CLANG_TARGET_FLAGS_hexagon := hexagon-linux-musl +CLANG_TARGET_FLAGS_m68k := m68k-linux-gnu +CLANG_TARGET_FLAGS_mips := mipsel-linux-gnu +CLANG_TARGET_FLAGS_powerpc := powerpc64le-linux-gnu +CLANG_TARGET_FLAGS_riscv := riscv64-linux-gnu +CLANG_TARGET_FLAGS_s390 := s390x-linux-gnu +CLANG_TARGET_FLAGS_x86 := x86_64-linux-gnu +CLANG_TARGET_FLAGS := $(CLANG_TARGET_FLAGS_$(SRCARCH)) + +ifeq ($(CROSS_COMPILE),) +ifeq ($(CLANG_TARGET_FLAGS),) +$(error Specify CROSS_COMPILE or add '--target=' option to scripts/Makefile.clang) +else +CLANG_FLAGS += --target=$(CLANG_TARGET_FLAGS) +endif # CLANG_TARGET_FLAGS +else +CLANG_FLAGS += --target=$(notdir $(CROSS_COMPILE:%-=%)) +endif # CROSS_COMPILE + +ifeq ($(LLVM_IAS),0) +CLANG_FLAGS += -no-integrated-as +GCC_TOOLCHAIN_DIR := $(dir $(shell which $(CROSS_COMPILE)elfedit)) +CLANG_FLAGS += --prefix=$(GCC_TOOLCHAIN_DIR)$(notdir $(CROSS_COMPILE)) +else +CLANG_FLAGS += -integrated-as +endif +CLANG_FLAGS += -Werror=unknown-warning-option +KBUILD_CFLAGS += $(CLANG_FLAGS) +KBUILD_AFLAGS += $(CLANG_FLAGS) +export CLANG_FLAGS diff --git a/scripts/Makefile.clean b/scripts/Makefile.clean index d9e0ceace6a64f35cdfe2819a591b6149edd3d38..7cab16b59eb047f2136aaab20a06e8cd941083a5 100644 --- a/scripts/Makefile.clean +++ b/scripts/Makefile.clean @@ -8,7 +8,7 @@ src := $(obj) PHONY := __clean __clean: -include scripts/Kbuild.include +include $(srctree)/scripts/Kbuild.include # The filename Kbuild has precedence over Makefile kbuild-dir := $(if $(filter /%,$(src)),$(src),$(srctree)/$(src)) diff --git a/scripts/Makefile.compiler b/scripts/Makefile.compiler new file mode 100644 index 0000000000000000000000000000000000000000..3f2f3665216fbc8fc76ddd379884388c573ae9c8 --- /dev/null +++ b/scripts/Makefile.compiler @@ -0,0 +1,75 @@ +# SPDX-License-Identifier: GPL-2.0-only + +# cc-cross-prefix +# Usage: CROSS_COMPILE := $(call cc-cross-prefix, m68k-linux-gnu- m68k-linux-) +# Return first where a gcc is found in PATH. +# If no gcc found in PATH with listed prefixes return nothing +# +# Note: '2>/dev/null' is here to force Make to invoke a shell. Otherwise, it +# would try to directly execute the shell builtin 'command'. This workaround +# should be kept for a long time since this issue was fixed only after the +# GNU Make 4.2.1 release. +cc-cross-prefix = $(firstword $(foreach c, $(1), \ + $(if $(shell command -v -- $(c)gcc 2>/dev/null), $(c)))) + +# output directory for tests below +TMPOUT = $(if $(KBUILD_EXTMOD),$(firstword $(KBUILD_EXTMOD))/).tmp_$$$$ + +# try-run +# Usage: option = $(call try-run, $(CC)...-o "$$TMP",option-ok,otherwise) +# Exit code chooses option. "$$TMP" serves as a temporary file and is +# automatically cleaned up. +try-run = $(shell set -e; \ + TMP=$(TMPOUT)/tmp; \ + TMPO=$(TMPOUT)/tmp.o; \ + mkdir -p $(TMPOUT); \ + trap "rm -rf $(TMPOUT)" EXIT; \ + if ($(1)) >/dev/null 2>&1; \ + then echo "$(2)"; \ + else echo "$(3)"; \ + fi) + +# as-option +# Usage: cflags-y += $(call as-option,-Wa$(comma)-isa=foo,) + +as-option = $(call try-run,\ + $(CC) $(KBUILD_CFLAGS) $(1) -c -x assembler /dev/null -o "$$TMP",$(1),$(2)) + +# as-instr +# Usage: cflags-y += $(call as-instr,instr,option1,option2) + +as-instr = $(call try-run,\ + printf "%b\n" "$(1)" | $(CC) $(KBUILD_AFLAGS) -c -x assembler -o "$$TMP" -,$(2),$(3)) + +# __cc-option +# Usage: MY_CFLAGS += $(call __cc-option,$(CC),$(MY_CFLAGS),-march=winchip-c6,-march=i586) +__cc-option = $(call try-run,\ + $(1) -Werror $(2) $(3) -c -x c /dev/null -o "$$TMP",$(3),$(4)) + +# cc-option +# Usage: cflags-y += $(call cc-option,-march=winchip-c6,-march=i586) + +cc-option = $(call __cc-option, $(CC),\ + $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS),$(1),$(2)) + +# cc-option-yn +# Usage: flag := $(call cc-option-yn,-march=winchip-c6) +cc-option-yn = $(call try-run,\ + $(CC) -Werror $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) $(1) -c -x c /dev/null -o "$$TMP",y,n) + +# cc-disable-warning +# Usage: cflags-y += $(call cc-disable-warning,unused-but-set-variable) +cc-disable-warning = $(call try-run,\ + $(CC) -Werror $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) -W$(strip $(1)) -c -x c /dev/null -o "$$TMP",-Wno-$(strip $(1))) + +# cc-ifversion +# Usage: EXTRA_CFLAGS += $(call cc-ifversion, -lt, 0402, -O1) +cc-ifversion = $(shell [ $(CONFIG_GCC_VERSION)0 $(1) $(2)000 ] && echo $(3) || echo $(4)) + +# ld-option +# Usage: KBUILD_LDFLAGS += $(call ld-option, -X, -Y) +ld-option = $(call try-run, $(LD) $(KBUILD_LDFLAGS) $(1) -v,$(1),$(2),$(3)) + +# ld-ifversion +# Usage: $(call ld-ifversion, -ge, 22252, y) +ld-ifversion = $(shell [ $(CONFIG_LD_VERSION)0 $(1) $(2)0 ] && echo $(3) || echo $(4)) diff --git a/scripts/Makefile.dtbinst b/scripts/Makefile.dtbinst index 50d580d77ae920b9c96d52639ec3b229082707db..459e84edf6cf084949855997bc9ead6314197b47 100644 --- a/scripts/Makefile.dtbinst +++ b/scripts/Makefile.dtbinst @@ -14,7 +14,7 @@ PHONY := __dtbs_install __dtbs_install: include include/config/auto.conf -include scripts/Kbuild.include +include $(srctree)/scripts/Kbuild.include include $(src)/Makefile dtbs := $(addprefix $(dst)/, $(dtb-y) $(if $(CONFIG_OF_ALL_DTBS),$(dtb-))) diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn index 6baee1200615d53ec7d783108a456825b385d5e0..23d3967786b9ff952f4b6fed4cdaf9c46d5cbf1c 100644 --- a/scripts/Makefile.extrawarn +++ b/scripts/Makefile.extrawarn @@ -51,6 +51,7 @@ KBUILD_CFLAGS += -Wno-sign-compare KBUILD_CFLAGS += -Wno-format-zero-length KBUILD_CFLAGS += $(call cc-disable-warning, pointer-to-enum-cast) KBUILD_CFLAGS += -Wno-tautological-constant-out-of-range-compare +KBUILD_CFLAGS += $(call cc-disable-warning, unaligned-access) endif endif diff --git a/scripts/Makefile.gcc-plugins b/scripts/Makefile.gcc-plugins index 952e46876329a78a104ee91ef539c7e63d77c68c..4aad28480035582f0c4b99a973826597774d054e 100644 --- a/scripts/Makefile.gcc-plugins +++ b/scripts/Makefile.gcc-plugins @@ -19,6 +19,10 @@ gcc-plugin-cflags-$(CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF) \ += -fplugin-arg-structleak_plugin-byref gcc-plugin-cflags-$(CONFIG_GCC_PLUGIN_STRUCTLEAK_BYREF_ALL) \ += -fplugin-arg-structleak_plugin-byref-all +ifdef CONFIG_GCC_PLUGIN_STRUCTLEAK + DISABLE_STRUCTLEAK_PLUGIN += -fplugin-arg-structleak_plugin-disable +endif +export DISABLE_STRUCTLEAK_PLUGIN gcc-plugin-cflags-$(CONFIG_GCC_PLUGIN_STRUCTLEAK) \ += -DSTRUCTLEAK_PLUGIN diff --git a/scripts/Makefile.headersinst b/scripts/Makefile.headersinst index 708fbd08a2c51d9321ac251605589c1e7a877005..029d85bb0b23794781933b64bb09c6233435e385 100644 --- a/scripts/Makefile.headersinst +++ b/scripts/Makefile.headersinst @@ -12,7 +12,7 @@ PHONY := __headers __headers: -include scripts/Kbuild.include +include $(srctree)/scripts/Kbuild.include src := $(srctree)/$(obj) gen := $(objtree)/$(subst include/,include/generated/,$(obj)) diff --git a/scripts/Makefile.kasan b/scripts/Makefile.kasan index 3d791908ed364ba71f9a589b42a707b72ef3878c..b9e94c5e7097090317275d2a543d06debbce36bc 100644 --- a/scripts/Makefile.kasan +++ b/scripts/Makefile.kasan @@ -33,10 +33,11 @@ else CFLAGS_KASAN := $(CFLAGS_KASAN_SHADOW) \ $(call cc-param,asan-globals=1) \ $(call cc-param,asan-instrumentation-with-call-threshold=$(call_threshold)) \ - $(call cc-param,asan-stack=$(stack_enable)) \ $(call cc-param,asan-instrument-allocas=1) endif +CFLAGS_KASAN += $(call cc-param,asan-stack=$(stack_enable)) + endif # CONFIG_KASAN_GENERIC ifdef CONFIG_KASAN_SW_TAGS @@ -50,6 +51,7 @@ endif CFLAGS_KASAN := -fsanitize=kernel-hwaddress \ $(call cc-param,hwasan-instrument-stack=$(stack_enable)) \ $(call cc-param,hwasan-use-short-granules=0) \ + $(call cc-param,hwasan-inline-all-checks=0) \ $(instrumentation_flags) endif # CONFIG_KASAN_SW_TAGS diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index a4f4906160b749a805e57acb513408bd33fc6cb8..28f73e4355b1f2758e1037beeb92b8a732b4d9cd 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -234,6 +234,13 @@ dtc_cpp_flags = -Wp,-MMD,$(depfile).pre.tmp -nostdinc \ $(addprefix -I,$(DTC_INCLUDE)) \ -undef -D__DTS__ +ifeq ($(CONFIG_LTO_CLANG),y) +# With CONFIG_LTO_CLANG, .o files in modules might be LLVM bitcode, so we +# need to run LTO to compile them into native code (.lto.o) before further +# processing. +mod-prelink-ext := .lto +endif + # Objtool arguments are also needed for modfinal with LTO, so we define # then here to avoid duplication. objtool_args = \ diff --git a/scripts/Makefile.modfinal b/scripts/Makefile.modfinal index 9ae3d9ae3d3674cdc3879ee6f448b3bbaaadad2f..6e2c8f8fbc8dbb671171e87bcb5d431d7b5a5f7b 100644 --- a/scripts/Makefile.modfinal +++ b/scripts/Makefile.modfinal @@ -9,7 +9,7 @@ __modfinal: include $(objtree)/include/config/auto.conf include $(srctree)/scripts/Kbuild.include -# for c_flags and objtool_args +# for c_flags and mod-prelink-ext include $(srctree)/scripts/Makefile.lib # find all modules listed in modules.order @@ -32,25 +32,6 @@ quiet_cmd_cc_o_c = CC [M] $@ ARCH_POSTLINK := $(wildcard $(srctree)/arch/$(SRCARCH)/Makefile.postlink) -ifdef CONFIG_LTO_CLANG -# With CONFIG_LTO_CLANG, reuse the object file we compiled for modpost to -# avoid a second slow LTO link -prelink-ext := .lto - -# ELF processing was skipped earlier because we didn't have native code, -# so let's now process the prelinked binary before we link the module. - -ifdef CONFIG_STACK_VALIDATION -ifneq ($(SKIP_STACK_VALIDATION),1) -cmd_ld_ko_o += \ - $(objtree)/tools/objtool/objtool $(objtool_args) \ - $(@:.ko=$(prelink-ext).o); - -endif # SKIP_STACK_VALIDATION -endif # CONFIG_STACK_VALIDATION - -endif # CONFIG_LTO_CLANG - quiet_cmd_ld_ko_o = LD [M] $@ cmd_ld_ko_o += \ $(LD) -r $(KBUILD_LDFLAGS) \ @@ -75,7 +56,7 @@ cmd_ld_ko_o += ; \ fi endif -$(modules): %.ko: %$(prelink-ext).o %.mod.o scripts/module.lds FORCE +$(modules): %.ko: %$(mod-prelink-ext).o %.mod.o scripts/module.lds FORCE +$(call if_changed,ld_ko_o) targets += $(modules) $(modules:.ko=.mod.o) diff --git a/scripts/Makefile.modinst b/scripts/Makefile.modinst index 5a4579e76485c111daef39b7731fff1ff24529e6..ad1981233d0ba5c65f97bb1fe7c59a91829d9937 100644 --- a/scripts/Makefile.modinst +++ b/scripts/Makefile.modinst @@ -6,7 +6,7 @@ PHONY := __modinst __modinst: -include scripts/Kbuild.include +include $(srctree)/scripts/Kbuild.include modules := $(sort $(shell cat $(if $(KBUILD_EXTMOD),$(KBUILD_EXTMOD)/)modules.order)) diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost index 772e73ee455dd76c8b35d9a5067eb0e5eb066635..5d86843fff93c04ba4710bfcbb3d2185a0173721 100644 --- a/scripts/Makefile.modpost +++ b/scripts/Makefile.modpost @@ -41,9 +41,9 @@ PHONY := __modpost __modpost: include include/config/auto.conf -include scripts/Kbuild.include +include $(srctree)/scripts/Kbuild.include -# for ld_flags +# for mod-prelink-ext include scripts/Makefile.lib mixed-build-prefix = $(if $(KBUILD_MIXED_TREE),$(KBUILD_MIXED_TREE)/) @@ -142,30 +142,14 @@ $(input-symdump): @echo >&2 'WARNING: Symbol version dump "$@" is missing.' @echo >&2 ' Modules may not have dependencies or modversions.' -ifdef CONFIG_LTO_CLANG -# With CONFIG_LTO_CLANG, .o files might be LLVM bitcode, so we need to run -# LTO to compile them into native code before running modpost -prelink-ext := .lto - -quiet_cmd_cc_lto_link_modules = LTO [M] $@ -cmd_cc_lto_link_modules = \ - $(LD) $(ld_flags) -r -o $@ \ - $(shell [ -s $(@:.lto.o=.o.symversions) ] && \ - echo -T $(@:.lto.o=.o.symversions)) \ - --whole-archive $^ - -%.lto.o: %.o - $(call if_changed,cc_lto_link_modules) -endif - modules := $(sort $(shell cat $(MODORDER))) # Read out modules.order to pass in modpost. # Otherwise, allmodconfig would fail with "Argument list too long". quiet_cmd_modpost = MODPOST $@ - cmd_modpost = sed 's/\.ko$$/$(prelink-ext)\.o/' $< | $(MODPOST) -T - + cmd_modpost = sed 's/\.ko$$/$(mod-prelink-ext)\.o/' $< | $(MODPOST) -T - -$(output-symdump): $(MODORDER) $(input-symdump) $(modules:.ko=$(prelink-ext).o) FORCE +$(output-symdump): $(MODORDER) $(input-symdump) $(modules:.ko=$(mod-prelink-ext).o) FORCE $(call if_changed,modpost) targets += $(output-symdump) diff --git a/scripts/Makefile.modsign b/scripts/Makefile.modsign index d7325cefe709d34960515b2ff1ad3a76e7f48e44..ddf9b5ca77d74b00c86e0574ad63638a88e51cae 100644 --- a/scripts/Makefile.modsign +++ b/scripts/Makefile.modsign @@ -6,7 +6,7 @@ PHONY := __modsign __modsign: -include scripts/Kbuild.include +include $(srctree)/scripts/Kbuild.include modules := $(sort $(shell cat modules.order)) diff --git a/scripts/as-version.sh b/scripts/as-version.sh new file mode 100755 index 0000000000000000000000000000000000000000..8b9410e329dfdbfafaf5d20c0a959164bfe990e6 --- /dev/null +++ b/scripts/as-version.sh @@ -0,0 +1,82 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0-only +# +# Print the assembler name and its version in a 5 or 6-digit form. +# Also, perform the minimum version check. +# (If it is the integrated assembler, return 0 as the version, and +# skip the version check.) + +set -e + +# Convert the version string x.y.z to a canonical 5 or 6-digit form. +get_canonical_version() +{ + IFS=. + set -- $1 + + # If the 2nd or 3rd field is missing, fill it with a zero. + # + # The 4th field, if present, is ignored. + # This occurs in development snapshots as in 2.35.1.20201116 + echo $((10000 * $1 + 100 * ${2:-0} + ${3:-0})) +} + +# Clang fails to handle -Wa,--version unless -no-integrated-as is given. +# We check -(f)integrated-as, expecting it is explicitly passed in for the +# integrated assembler case. +check_integrated_as() +{ + while [ $# -gt 0 ]; do + if [ "$1" = -integrated-as -o "$1" = -fintegrated-as ]; then + # For the intergrated assembler, we do not check the + # version here. It is the same as the clang version, and + # it has been already checked by scripts/cc-version.sh. + echo LLVM 0 + exit 0 + fi + shift + done +} + +check_integrated_as "$@" + +orig_args="$@" + +# Get the first line of the --version output. +IFS=' +' +set -- $(LC_ALL=C "$@" -Wa,--version -c -x assembler /dev/null -o /dev/null 2>/dev/null) + +# Split the line on spaces. +IFS=' ' +set -- $1 + +min_tool_version=$(dirname $0)/min-tool-version.sh + +if [ "$1" = GNU -a "$2" = assembler ]; then + shift $(($# - 1)) + version=$1 + min_version=$($min_tool_version binutils) + name=GNU +else + echo "$orig_args: unknown assembler invoked" >&2 + exit 1 +fi + +# Some distributions append a package release number, as in 2.34-4.fc32 +# Trim the hyphen and any characters that follow. +version=${version%-*} + +cversion=$(get_canonical_version $version) +min_cversion=$(get_canonical_version $min_version) + +if [ "$cversion" -lt "$min_cversion" ]; then + echo >&2 "***" + echo >&2 "*** Assembler is too old." + echo >&2 "*** Your $name assembler version: $version" + echo >&2 "*** Minimum $name assembler version: $min_version" + echo >&2 "***" + exit 1 +fi + +echo $name $cversion diff --git a/scripts/cc-version.sh b/scripts/cc-version.sh new file mode 100755 index 0000000000000000000000000000000000000000..f1952c522466244f8b6388eb2cfc3f2c259a30b3 --- /dev/null +++ b/scripts/cc-version.sh @@ -0,0 +1,72 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# Print the compiler name and its version in a 5 or 6-digit form. +# Also, perform the minimum version check. + +set -e + +# Print the compiler name and some version components. +get_compiler_info() +{ + cat <<- EOF | "$@" -E -P -x c - 2>/dev/null + #if defined(__clang__) + Clang __clang_major__ __clang_minor__ __clang_patchlevel__ + #elif defined(__INTEL_COMPILER) + ICC __INTEL_COMPILER __INTEL_COMPILER_UPDATE + #elif defined(__GNUC__) + GCC __GNUC__ __GNUC_MINOR__ __GNUC_PATCHLEVEL__ + #else + unknown + #endif + EOF +} + +# Convert the version string x.y.z to a canonical 5 or 6-digit form. +get_canonical_version() +{ + IFS=. + set -- $1 + echo $((10000 * $1 + 100 * $2 + $3)) +} + +# $@ instead of $1 because multiple words might be given, e.g. CC="ccache gcc". +orig_args="$@" +set -- $(get_compiler_info "$@") + +name=$1 + +min_tool_version=$(dirname $0)/min-tool-version.sh + +case "$name" in +GCC) + version=$2.$3.$4 + min_version=$($min_tool_version gcc) + ;; +Clang) + version=$2.$3.$4 + min_version=$($min_tool_version llvm) + ;; +ICC) + version=$(($2 / 100)).$(($2 % 100)).$3 + min_version=$($min_tool_version icc) + ;; +*) + echo "$orig_args: unknown compiler" >&2 + exit 1 + ;; +esac + +cversion=$(get_canonical_version $version) +min_cversion=$(get_canonical_version $min_version) + +if [ "$cversion" -lt "$min_cversion" ]; then + echo >&2 "***" + echo >&2 "*** Compiler is too old." + echo >&2 "*** Your $name version: $version" + echo >&2 "*** Minimum $name version: $min_version" + echo >&2 "***" + exit 1 +fi + +echo $name $cversion diff --git a/scripts/clang-version.sh b/scripts/clang-version.sh deleted file mode 100755 index 6fabf0695761977631bb044c1db44f2d464a45f8..0000000000000000000000000000000000000000 --- a/scripts/clang-version.sh +++ /dev/null @@ -1,19 +0,0 @@ -#!/bin/sh -# SPDX-License-Identifier: GPL-2.0 -# -# clang-version clang-command -# -# Print the compiler version of `clang-command' in a 5 or 6-digit form -# such as `50001' for clang-5.0.1 etc. - -compiler="$*" - -if ! ( $compiler --version | grep -q clang) ; then - echo 0 - exit 1 -fi - -MAJOR=$(echo __clang_major__ | $compiler -E -x c - | tail -n 1) -MINOR=$(echo __clang_minor__ | $compiler -E -x c - | tail -n 1) -PATCHLEVEL=$(echo __clang_patchlevel__ | $compiler -E -x c - | tail -n 1) -printf "%d%02d%02d\\n" $MAJOR $MINOR $PATCHLEVEL diff --git a/scripts/dtc/dtx_diff b/scripts/dtc/dtx_diff index d3422ee15e300bc76af3b15adc682aacf2d4e1ee..f2bbde4bba86bc70b27bb5a992a262c1997db811 100755 --- a/scripts/dtc/dtx_diff +++ b/scripts/dtc/dtx_diff @@ -59,12 +59,8 @@ Otherwise DTx is treated as a dts source file (aka .dts). or '/include/' to be processed. If DTx_1 and DTx_2 are in different architectures, then this script - may not work since \${ARCH} is part of the include path. Two possible - workarounds: - - `basename $0` \\ - <(ARCH=arch_of_dtx_1 `basename $0` DTx_1) \\ - <(ARCH=arch_of_dtx_2 `basename $0` DTx_2) + may not work since \${ARCH} is part of the include path. The following + workaround can be used: `basename $0` ARCH=arch_of_dtx_1 DTx_1 >tmp_dtx_1.dts `basename $0` ARCH=arch_of_dtx_2 DTx_2 >tmp_dtx_2.dts diff --git a/scripts/dummy-tools/gcc b/scripts/dummy-tools/gcc index 11c9f045ee4b938b77e45309193b6fa7be4c2259..9f5e943e7491a04dfd5dfebf93cb6721889a7371 100755 --- a/scripts/dummy-tools/gcc +++ b/scripts/dummy-tools/gcc @@ -67,6 +67,12 @@ if arg_contain -E "$@"; then fi fi +# To set CONFIG_AS_IS_GNU +if arg_contain -Wa,--version "$@"; then + echo "GNU assembler (scripts/dummy-tools) 2.50" + exit 0 +fi + if arg_contain -S "$@"; then # For scripts/gcc-x86-*-has-stack-protector.sh if arg_contain -fstack-protector "$@"; then @@ -75,16 +81,12 @@ if arg_contain -S "$@"; then fi fi -# For scripts/gcc-plugin.sh +# To set GCC_PLUGINS if arg_contain -print-file-name=plugin "$@"; then plugin_dir=$(mktemp -d) - sed -n 's/.*#include "\(.*\)"/\1/p' $(dirname $0)/../gcc-plugins/gcc-common.h | - while read header - do - mkdir -p $plugin_dir/include/$(dirname $header) - touch $plugin_dir/include/$header - done + mkdir -p $plugin_dir/include + touch $plugin_dir/include/plugin-version.h echo $plugin_dir exit 0 diff --git a/scripts/gcc-plugin.sh b/scripts/gcc-plugin.sh deleted file mode 100755 index b79fd0bea83844010355b0daf8fae23bb0435ace..0000000000000000000000000000000000000000 --- a/scripts/gcc-plugin.sh +++ /dev/null @@ -1,19 +0,0 @@ -#!/bin/sh -# SPDX-License-Identifier: GPL-2.0 - -set -e - -srctree=$(dirname "$0") - -gccplugins_dir=$($* -print-file-name=plugin) - -# we need a c++ compiler that supports the designated initializer GNU extension -$HOSTCC -c -x c++ -std=gnu++98 - -fsyntax-only -I $srctree/gcc-plugins -I $gccplugins_dir/include 2>/dev/null <&2 - printf "Usage:\n\t$0 \n" >&2 - exit 1 -fi - -MAJOR=$(echo __GNUC__ | $compiler -E -x c - | tail -n 1) -MINOR=$(echo __GNUC_MINOR__ | $compiler -E -x c - | tail -n 1) -PATCHLEVEL=$(echo __GNUC_PATCHLEVEL__ | $compiler -E -x c - | tail -n 1) -printf "%d%02d%02d\\n" $MAJOR $MINOR $PATCHLEVEL diff --git a/scripts/gen_autoksyms.sh b/scripts/gen_autoksyms.sh index da320151e7c39cd900dabac81c0ab0202c500bfb..6ed0d225c8b1c877abfc4256ddc1ee5e2e96516f 100755 --- a/scripts/gen_autoksyms.sh +++ b/scripts/gen_autoksyms.sh @@ -26,18 +26,6 @@ if [ -n "$CONFIG_MODVERSIONS" ]; then needed_symbols="$needed_symbols module_layout" fi -# With CONFIG_LTO_CLANG, LLVM bitcode has not yet been compiled into a binary -# when the .mod files are generated, which means they don't yet contain -# references to certain symbols that will be present in the final binaries. -if [ -n "$CONFIG_LTO_CLANG" ]; then - # intrinsic functions - needed_symbols="$needed_symbols memcpy memmove memset" - # ftrace - needed_symbols="$needed_symbols _mcount" - # stack protector symbols - needed_symbols="$needed_symbols __stack_chk_fail __stack_chk_guard" -fi - ksym_wl= if [ -n "$CONFIG_UNUSED_KSYMS_WHITELIST" ]; then # Use 'eval' to expand the whitelist path and check if it is relative diff --git a/scripts/gen_gki_modules_headers.sh b/scripts/gen_gki_modules_headers.sh new file mode 100755 index 0000000000000000000000000000000000000000..9fab5185db1923e73da2dbbd3c4927efb3533507 --- /dev/null +++ b/scripts/gen_gki_modules_headers.sh @@ -0,0 +1,96 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0-only +# +# Copyright 2021 Google LLC +# Author: ramjiyani@google.com (Ramji Jiyani) +# + +# +# Generates hearder files for GKI modules symbol and export protections +# +# Called By: KERNEL_SRC/kernel/Makefile if CONFIG_MODULE_SIG_PROTECT=y +# +# gki_module_exported.h: Symbols protected from _export_ by unsigned modules +# gki_module_protected.h: Symbols protected from _access_ by unsigned modules +# +# If valid symbol file doesn't exists then still generates valid C header files for +# compilation to proceed with no symbols to protect +# + +# Collect arguments from Makefile +TARGET=$1 +SRCTREE=$2 + +set -e + +# +# Common Definitions +# + +# +# generate_header(): +# Args: $1 = Name of the header file +# $2 = Input symbol list +# $3 = Symbol type (protected/exported) +# +generate_header() { + local header_file=$1 + local symbol_file=$2 + local symbol_type=$3 + + echo " GEN ${header_file}" + if [ -f "${header_file}" ]; then + rm -f -- "${header_file}" + fi + + # Find Maximum symbol name length if valid symbol_file exist + if [ -s "${symbol_file}" ]; then + # Skip 1st line (symbol header), Trim white spaces & +1 for null termination + local max_name_len=$(awk ' + { + $1=$1; + if ( length > L && NR > 1) { + L=length + } + } END { print ++L }' "${symbol_file}") + else + # Set to 1 to generate valid C header file + local max_name_len=1 + fi + + # Header generation + cat > "${header_file}" <<- EOT + /* + * DO NOT EDIT + * + * Build generated header file with GKI module symbols/exports + */ + + #define NO_OF_$(printf ${symbol_type} | tr [:lower:] [:upper:])_SYMBOLS \\ + $(printf '\t')(sizeof(gki_${symbol_type}_symbols) / sizeof(gki_${symbol_type}_symbols[0])) + #define MAX_$(printf ${symbol_type} | tr [:lower:] [:upper:])_NAME_LEN (${max_name_len}) + + static const char gki_${symbol_type}_symbols[][MAX_$(printf ${symbol_type} | + tr [:lower:] [:upper:])_NAME_LEN] = { + EOT + + # If a valid symbol_file present add symbols in an array except the 1st line + if [ -s "${symbol_file}" ]; then + sed -e 1d -e 's/^[ \t]*/\t"/;s/[ \t]*$/",/' "${symbol_file}" >> "${header_file}" + fi + + # Terminate the file + echo "};" >> "${header_file}" +} + +if [ "$(basename "${TARGET}")" = "gki_module_protected.h" ]; then + # Sorted list of protected symbols + GKI_PROTECTED_SYMBOLS="${SRCTREE}/android/abi_gki_modules_protected" + + generate_header "${TARGET}" "${GKI_PROTECTED_SYMBOLS}" "protected" +else + # Sorted list of exported symbols + GKI_EXPORTED_SYMBOLS="${SRCTREE}/android/abi_gki_modules_exports" + + generate_header "${TARGET}" "${GKI_EXPORTED_SYMBOLS}" "exported" +fi diff --git a/scripts/kconfig/confdata.c b/scripts/kconfig/confdata.c index a39d93e3c6ae8d34f617b835d8ebcf5900cbad89..867b06c6d27976b6458dc37e80234ce6bf0dbb2c 100644 --- a/scripts/kconfig/confdata.c +++ b/scripts/kconfig/confdata.c @@ -968,14 +968,19 @@ static int conf_write_dep(const char *name) static int conf_touch_deps(void) { - const char *name; + const char *name, *tmp; struct symbol *sym; int res, i; - strcpy(depfile_path, "include/config/"); - depfile_prefix_len = strlen(depfile_path); - name = conf_get_autoconfig_name(); + tmp = strrchr(name, '/'); + depfile_prefix_len = tmp ? tmp - name + 1 : 0; + if (depfile_prefix_len + 1 > sizeof(depfile_path)) + return -1; + + strncpy(depfile_path, name, depfile_prefix_len); + depfile_path[depfile_prefix_len] = 0; + conf_read_simple(name, S_DEF_AUTO); sym_calc_value(modules_sym); diff --git a/scripts/kconfig/preprocess.c b/scripts/kconfig/preprocess.c index 0590f86df6e40cfd073100904f3ec5cfbe650f5f..748da578b418c4acb53f454e17e2370d2050ceb6 100644 --- a/scripts/kconfig/preprocess.c +++ b/scripts/kconfig/preprocess.c @@ -141,7 +141,7 @@ static char *do_lineno(int argc, char *argv[]) static char *do_shell(int argc, char *argv[]) { FILE *p; - char buf[256]; + char buf[4096]; char *cmd; size_t nread; int i; diff --git a/scripts/ld-version.sh b/scripts/ld-version.sh index f2be0ff9a738aca75c014c80b7535833d1eeb1a1..a78b804b680cf3d46ee3feb335b89689cf9ac5ac 100755 --- a/scripts/ld-version.sh +++ b/scripts/ld-version.sh @@ -1,11 +1,76 @@ -#!/usr/bin/awk -f +#!/bin/sh # SPDX-License-Identifier: GPL-2.0 -# extract linker version number from stdin and turn into single number - { - gsub(".*\\)", ""); - gsub(".*version ", ""); - gsub("-.*", ""); - split($1,a, "."); - print a[1]*100000000 + a[2]*1000000 + a[3]*10000; - exit - } +# +# Print the linker name and its version in a 5 or 6-digit form. +# Also, perform the minimum version check. + +set -e + +# Convert the version string x.y.z to a canonical 5 or 6-digit form. +get_canonical_version() +{ + IFS=. + set -- $1 + + # If the 2nd or 3rd field is missing, fill it with a zero. + # + # The 4th field, if present, is ignored. + # This occurs in development snapshots as in 2.35.1.20201116 + echo $((10000 * $1 + 100 * ${2:-0} + ${3:-0})) +} + +orig_args="$@" + +# Get the first line of the --version output. +IFS=' +' +set -- $(LC_ALL=C "$@" --version) + +# Split the line on spaces. +IFS=' ' +set -- $1 + +min_tool_version=$(dirname $0)/min-tool-version.sh + +if [ "$1" = GNU -a "$2" = ld ]; then + shift $(($# - 1)) + version=$1 + min_version=$($min_tool_version binutils) + name=BFD + disp_name="GNU ld" +elif [ "$1" = GNU -a "$2" = gold ]; then + echo "gold linker is not supported as it is not capable of linking the kernel proper." >&2 + exit 1 +else + while [ $# -gt 1 -a "$1" != "LLD" ]; do + shift + done + + if [ "$1" = LLD ]; then + version=$2 + min_version=$($min_tool_version llvm) + name=LLD + disp_name=LLD + else + echo "$orig_args: unknown linker" >&2 + exit 1 + fi +fi + +# Some distributions append a package release number, as in 2.34-4.fc32 +# Trim the hyphen and any characters that follow. +version=${version%-*} + +cversion=$(get_canonical_version $version) +min_cversion=$(get_canonical_version $min_version) + +if [ "$cversion" -lt "$min_cversion" ]; then + echo >&2 "***" + echo >&2 "*** Linker is too old." + echo >&2 "*** Your $disp_name version: $version" + echo >&2 "*** Minimum $disp_name version: $min_version" + echo >&2 "***" + exit 1 +fi + +echo $name $cversion diff --git a/scripts/leaking_addresses.pl b/scripts/leaking_addresses.pl index b2d8b8aa2d99eca75873bfa1a31fbbcd9f9fe13d..8f636a23bc3f24bf0c56a1f760596b5e724516fc 100755 --- a/scripts/leaking_addresses.pl +++ b/scripts/leaking_addresses.pl @@ -455,8 +455,9 @@ sub parse_file open my $fh, "<", $file or return; while ( <$fh> ) { + chomp; if (may_leak_address($_)) { - print $file . ': ' . $_; + printf("$file: $_\n"); } } close $fh; diff --git a/scripts/lld-version.sh b/scripts/lld-version.sh deleted file mode 100755 index d70edb4d8a4f2d865e3f4498fa31c5b12e6aafe2..0000000000000000000000000000000000000000 --- a/scripts/lld-version.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/sh -# SPDX-License-Identifier: GPL-2.0 -# -# Usage: $ ./scripts/lld-version.sh ld.lld -# -# Print the linker version of `ld.lld' in a 5 or 6-digit form -# such as `100001' for ld.lld 10.0.1 etc. - -linker_string="$($* --version)" - -if ! ( echo $linker_string | grep -q LLD ); then - echo 0 - exit 1 -fi - -VERSION=$(echo $linker_string | cut -d ' ' -f 2) -MAJOR=$(echo $VERSION | cut -d . -f 1) -MINOR=$(echo $VERSION | cut -d . -f 2) -PATCHLEVEL=$(echo $VERSION | cut -d . -f 3) -printf "%d%02d%02d\\n" $MAJOR $MINOR $PATCHLEVEL diff --git a/scripts/min-tool-version.sh b/scripts/min-tool-version.sh new file mode 100755 index 0000000000000000000000000000000000000000..d22cf91212b0b55af91815fa724c5ee6c91620dd --- /dev/null +++ b/scripts/min-tool-version.sh @@ -0,0 +1,39 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0-only +# +# Print the minimum supported version of the given tool. +# When you raise the minimum version, please update +# Documentation/process/changes.rst as well. + +set -e + +if [ $# != 1 ]; then + echo "Usage: $0 toolname" >&2 + exit 1 +fi + +case "$1" in +binutils) + echo 2.23.0 + ;; +gcc) + # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63293 + # https://lore.kernel.org/r/20210107111841.GN1551@shell.armlinux.org.uk + if [ "$SRCARCH" = arm64 ]; then + echo 5.1.0 + else + echo 4.9.0 + fi + ;; +icc) + # temporary + echo 16.0.3 + ;; +llvm) + echo 10.0.1 + ;; +*) + echo "$1: unknown tool" >&2 + exit 1 + ;; +esac diff --git a/scripts/module.lds.S b/scripts/module.lds.S index 1713fc5383bbf7fb2367163b671add2d6d78c45e..6cca5d88744eb401cf30079405f284e86c18db41 100644 --- a/scripts/module.lds.S +++ b/scripts/module.lds.S @@ -56,22 +56,24 @@ SECTIONS { *(.rodata.._end) } -#ifdef CONFIG_CFI_CLANG - /* - * With CFI_CLANG, ensure __cfi_check is at the beginning of the - * .text section, and that the section is aligned to page size. - */ .text : ALIGN(PAGE_SIZE) { *(.text.._start) +#ifdef CONFIG_CFI_CLANG + /* + * With CFI_CLANG, ensure __cfi_check is at the beginning of + * the .text section, and that the section is aligned to page + * size. + */ *(.text.__cfi_check) +#endif *(.text .text.[0-9a-zA-Z_]*) __cfi_jt_start = .; *(.text..L.cfi.jumptable .text..L.cfi.jumptable.*) __cfi_jt_end = .; *(.text.._end) + *(.text.._fips140_unchecked) } #endif -#endif } /* bring in arch-specific sections */ diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl index 4f84657f55c23cbfd15dd405131478e56d35fb5d..dc1d3696af6b88091a4efa0ae87b653b22e787a7 100755 --- a/scripts/recordmcount.pl +++ b/scripts/recordmcount.pl @@ -222,7 +222,7 @@ if ($arch =~ /(x86(_64)?)|(i386)/) { $local_regex = "^[0-9a-fA-F]+\\s+t\\s+(\\S+)"; $weak_regex = "^[0-9a-fA-F]+\\s+([wW])\\s+(\\S+)"; $section_regex = "Disassembly of section\\s+(\\S+):"; -$function_regex = "^([0-9a-fA-F]+)\\s+<(.*?)>:"; +$function_regex = "^([0-9a-fA-F]+)\\s+<([^^]*?)>:"; $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\s(mcount|__fentry__)\$"; $section_type = '@progbits'; $mcount_adjust = 0; @@ -252,7 +252,7 @@ if ($arch eq "x86_64") { } elsif ($arch eq "s390" && $bits == 64) { if ($cc =~ /-DCC_USING_HOTPATCH/) { - $mcount_regex = "^\\s*([0-9a-fA-F]+):\\s*c0 04 00 00 00 00\\s*brcl\\s*0,[0-9a-f]+ <([^\+]*)>\$"; + $mcount_regex = "^\\s*([0-9a-fA-F]+):\\s*c0 04 00 00 00 00\\s*(brcl\\s*0,|jgnop\\s*)[0-9a-f]+ <([^\+]*)>\$"; $mcount_adjust = 0; } else { $mcount_regex = "^\\s*([0-9a-fA-F]+):\\s*R_390_(PC|PLT)32DBL\\s+_mcount\\+0x2\$"; diff --git a/scripts/setlocalversion b/scripts/setlocalversion index d3479a49dde20d3f5bca4da96206439909bdb8cc..2f51010f999305ba37af7b07aa01ed61c915a7ba 100755 --- a/scripts/setlocalversion +++ b/scripts/setlocalversion @@ -41,8 +41,6 @@ if test $# -gt 0; then kmi_generation=$1 [ $(expr $kmi_generation : '^[0-9]\+$') -eq 0 ] && usage shift - else - usage fi fi if test $# -gt 0 -o ! -d "$srctree"; then @@ -68,6 +66,8 @@ scm_version() if [ -n "$android_release" ] && [ -n "$kmi_generation" ]; then printf '%s' "-$android_release-$kmi_generation" + elif [ -n "$android_release" ]; then + printf '%s' "-$android_release" fi # If we are at a tagged commit (like "v2.6.30-rc6"), we ignore diff --git a/scripts/sign-file.c b/scripts/sign-file.c index fbd34b8e8f578aba003533348c907899779fb62c..8f06328d1cd1116e0045f587beb7d6ccf72d162b 100644 --- a/scripts/sign-file.c +++ b/scripts/sign-file.c @@ -92,6 +92,7 @@ static void display_openssl_errors(int l) } } +#ifndef OPENSSL_NO_ENGINE static void drain_openssl_errors(void) { const char *file; @@ -101,6 +102,7 @@ static void drain_openssl_errors(void) return; while (ERR_get_error_line(&file, &line)) {} } +#endif #define ERR(cond, fmt, ...) \ do { \ @@ -135,7 +137,9 @@ static int pem_pw_cb(char *buf, int len, int w, void *v) static EVP_PKEY *read_private_key(const char *private_key_name) { EVP_PKEY *private_key; + BIO *b; +#ifndef OPENSSL_NO_ENGINE if (!strncmp(private_key_name, "pkcs11:", 7)) { ENGINE *e; @@ -153,17 +157,16 @@ static EVP_PKEY *read_private_key(const char *private_key_name) private_key = ENGINE_load_private_key(e, private_key_name, NULL, NULL); ERR(!private_key, "%s", private_key_name); - } else { - BIO *b; - - b = BIO_new_file(private_key_name, "rb"); - ERR(!b, "%s", private_key_name); - private_key = PEM_read_bio_PrivateKey(b, NULL, pem_pw_cb, - NULL); - ERR(!private_key, "%s", private_key_name); - BIO_free(b); + return private_key; } +#endif + b = BIO_new_file(private_key_name, "rb"); + ERR(!b, "%s", private_key_name); + private_key = PEM_read_bio_PrivateKey(b, NULL, pem_pw_cb, + NULL); + ERR(!private_key, "%s", private_key_name); + BIO_free(b); return private_key; } diff --git a/scripts/sphinx-pre-install b/scripts/sphinx-pre-install index 828a8615a9181ccdb4a9883df20ea58c9762984d..8fcea769d44f50dcf3a85054d33871cc2898c911 100755 --- a/scripts/sphinx-pre-install +++ b/scripts/sphinx-pre-install @@ -76,6 +76,7 @@ my %texlive = ( 'ucs.sty' => 'texlive-ucs', 'upquote.sty' => 'texlive-upquote', 'wrapfig.sty' => 'texlive-wrapfig', + 'ctexhook.sty' => 'texlive-ctex', ); # @@ -370,6 +371,9 @@ sub give_debian_hints() ); if ($pdf) { + check_missing_file(["/usr/share/texlive/texmf-dist/tex/latex/ctex/ctexhook.sty"], + "texlive-lang-chinese", 2); + check_missing_file(["/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf"], "fonts-dejavu", 2); diff --git a/security/Kconfig b/security/Kconfig index 7561f6f99f1d294d354f6c58cd46f58006e5f29b..0548db16c49dca4db75afd69de21540335403072 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -191,6 +191,9 @@ config HARDENED_USERCOPY_PAGESPAN config FORTIFY_SOURCE bool "Harden common str/mem functions against buffer overflows" depends on ARCH_HAS_FORTIFY_SOURCE + # https://bugs.llvm.org/show_bug.cgi?id=50322 + # https://bugs.llvm.org/show_bug.cgi?id=41459 + depends on !CC_IS_CLANG help Detect overflows of buffers in common string and memory functions where the compiler can determine and validate the buffer sizes. diff --git a/security/apparmor/label.c b/security/apparmor/label.c index e68bcedca976b9340919216cfd7bdfa55deb268b..6222fdfebe4e55a905a55ed554f505b9bb34d216 100644 --- a/security/apparmor/label.c +++ b/security/apparmor/label.c @@ -1454,7 +1454,7 @@ bool aa_update_label_name(struct aa_ns *ns, struct aa_label *label, gfp_t gfp) if (label->hname || labels_ns(label) != ns) return res; - if (aa_label_acntsxprint(&name, ns, label, FLAGS_NONE, gfp) == -1) + if (aa_label_acntsxprint(&name, ns, label, FLAGS_NONE, gfp) < 0) return res; ls = labels_set(label); @@ -1704,7 +1704,7 @@ int aa_label_asxprint(char **strp, struct aa_ns *ns, struct aa_label *label, /** * aa_label_acntsxprint - allocate a __counted string buffer and print label - * @strp: buffer to write to. (MAY BE NULL if @size == 0) + * @strp: buffer to write to. * @ns: namespace profile is being viewed from * @label: label to view (NOT NULL) * @flags: flags controlling what label info is printed diff --git a/security/integrity/evm/evm_main.c b/security/integrity/evm/evm_main.c index 9672b59cb9ff7824f07cf5d68b3e0e46061be405..fab2dc30b5f47c54a9b1e6370495216caac144f4 100644 --- a/security/integrity/evm/evm_main.c +++ b/security/integrity/evm/evm_main.c @@ -54,7 +54,7 @@ static struct xattr_list evm_config_default_xattrnames[] = { LIST_HEAD(evm_config_xattrnames); -static int evm_fixmode; +static int evm_fixmode __ro_after_init; static int __init evm_set_fixmode(char *str) { if (strncmp(str, "fix", 3) == 0) diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c index ea8ff8a07b36b4b80cbb664126614643e628d1e7..98d5a800fe5b01dfc3b538ab21ba1c516eb9fa5a 100644 --- a/security/integrity/ima/ima_fs.c +++ b/security/integrity/ima/ima_fs.c @@ -496,12 +496,12 @@ int __init ima_fs_init(void) return 0; out: + securityfs_remove(ima_policy); securityfs_remove(violations); securityfs_remove(runtime_measurements_count); securityfs_remove(ascii_runtime_measurements); securityfs_remove(binary_runtime_measurements); securityfs_remove(ima_symlink); securityfs_remove(ima_dir); - securityfs_remove(ima_policy); return -1; } diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c index 9b5adeaa47fc819c4c9fc6396871db2848770505..e737c216efc4971133f09731ac6a3fba968c1b0e 100644 --- a/security/integrity/ima/ima_policy.c +++ b/security/integrity/ima/ima_policy.c @@ -1636,6 +1636,14 @@ int ima_policy_show(struct seq_file *m, void *v) rcu_read_lock(); + /* Do not print rules with inactive LSM labels */ + for (i = 0; i < MAX_LSM_RULES; i++) { + if (entry->lsm[i].args_p && !entry->lsm[i].rule) { + rcu_read_unlock(); + return 0; + } + } + if (entry->action & MEASURE) seq_puts(m, pt(Opt_measure)); if (entry->action & DONT_MEASURE) diff --git a/security/integrity/ima/ima_template.c b/security/integrity/ima/ima_template.c index f83255a39e6538574617592587b5067a456b26c8..f64c01d53e96aabf0b00d55266a3be2ce4e12725 100644 --- a/security/integrity/ima/ima_template.c +++ b/security/integrity/ima/ima_template.c @@ -27,6 +27,7 @@ static struct ima_template_desc builtin_templates[] = { static LIST_HEAD(defined_templates); static DEFINE_SPINLOCK(template_list); +static int template_setup_done; static const struct ima_template_field supported_fields[] = { {.field_id = "d", .field_init = ima_eventdigest_init, @@ -80,10 +81,11 @@ static int __init ima_template_setup(char *str) struct ima_template_desc *template_desc; int template_len = strlen(str); - if (ima_template) + if (template_setup_done) return 1; - ima_init_template_list(); + if (!ima_template) + ima_init_template_list(); /* * Verify that a template with the supplied name exists. @@ -107,6 +109,7 @@ static int __init ima_template_setup(char *str) } ima_template = template_desc; + template_setup_done = 1; return 1; } __setup("ima_template=", ima_template_setup); @@ -115,7 +118,7 @@ static int __init ima_template_fmt_setup(char *str) { int num_templates = ARRAY_SIZE(builtin_templates); - if (ima_template) + if (template_setup_done) return 1; if (template_desc_init_fields(str, NULL, NULL) < 0) { @@ -126,6 +129,7 @@ static int __init ima_template_fmt_setup(char *str) builtin_templates[num_templates - 1].fmt = str; ima_template = builtin_templates + num_templates - 1; + template_setup_done = 1; return 1; } diff --git a/security/integrity/integrity_audit.c b/security/integrity/integrity_audit.c index 29220056207f4e51e83afda6965816d7032d5f6e..0ec5e4c22cb2a1066c2b897776ead6d3db72635c 100644 --- a/security/integrity/integrity_audit.c +++ b/security/integrity/integrity_audit.c @@ -45,6 +45,8 @@ void integrity_audit_message(int audit_msgno, struct inode *inode, return; ab = audit_log_start(audit_context(), GFP_KERNEL, audit_msgno); + if (!ab) + return; audit_log_format(ab, "pid=%d uid=%u auid=%u ses=%u", task_pid_nr(current), from_kuid(&init_user_ns, current_uid()), diff --git a/security/security.c b/security/security.c index cf8ddcae1c6b5baeb5e6f44d77eb7094274f9919..1e0f79c665a34478f301bffa48d6dd9e81139802 100644 --- a/security/security.c +++ b/security/security.c @@ -689,25 +689,25 @@ static void __init lsm_early_task(struct task_struct *task) /* Security operations */ -int security_binder_set_context_mgr(struct task_struct *mgr) +int security_binder_set_context_mgr(const struct cred *mgr) { return call_int_hook(binder_set_context_mgr, 0, mgr); } -int security_binder_transaction(struct task_struct *from, - struct task_struct *to) +int security_binder_transaction(const struct cred *from, + const struct cred *to) { return call_int_hook(binder_transaction, 0, from, to); } -int security_binder_transfer_binder(struct task_struct *from, - struct task_struct *to) +int security_binder_transfer_binder(const struct cred *from, + const struct cred *to) { return call_int_hook(binder_transfer_binder, 0, from, to); } -int security_binder_transfer_file(struct task_struct *from, - struct task_struct *to, struct file *file) +int security_binder_transfer_file(const struct cred *from, + const struct cred *to, struct file *file) { return call_int_hook(binder_transfer_file, 0, from, to, file); } diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index a490c8449cacf9e99920bcb817bf236e3c99f291..56a6a7056388f177c6fd88d9c91645fe30783dd6 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -948,18 +948,22 @@ out: static int selinux_add_opt(int token, const char *s, void **mnt_opts) { struct selinux_mnt_opts *opts = *mnt_opts; + bool is_alloc_opts = false; if (token == Opt_seclabel) /* eaten and completely ignored */ return 0; + if (!s) + return -ENOMEM; + if (!opts) { opts = kzalloc(sizeof(struct selinux_mnt_opts), GFP_KERNEL); if (!opts) return -ENOMEM; *mnt_opts = opts; + is_alloc_opts = true; } - if (!s) - return -ENOMEM; + switch (token) { case Opt_context: if (opts->context || opts->defcontext) @@ -984,6 +988,10 @@ static int selinux_add_opt(int token, const char *s, void **mnt_opts) } return 0; Einval: + if (is_alloc_opts) { + kfree(opts); + *mnt_opts = NULL; + } pr_warn(SEL_MOUNT_FAIL_MSG); return -EINVAL; } @@ -2007,22 +2015,19 @@ static inline u32 open_file_to_av(struct file *file) /* Hook functions begin here. */ -static int selinux_binder_set_context_mgr(struct task_struct *mgr) +static int selinux_binder_set_context_mgr(const struct cred *mgr) { - u32 mysid = current_sid(); - u32 mgrsid = task_sid(mgr); - return avc_has_perm(&selinux_state, - mysid, mgrsid, SECCLASS_BINDER, + current_sid(), cred_sid(mgr), SECCLASS_BINDER, BINDER__SET_CONTEXT_MGR, NULL); } -static int selinux_binder_transaction(struct task_struct *from, - struct task_struct *to) +static int selinux_binder_transaction(const struct cred *from, + const struct cred *to) { u32 mysid = current_sid(); - u32 fromsid = task_sid(from); - u32 tosid = task_sid(to); + u32 fromsid = cred_sid(from); + u32 tosid = cred_sid(to); int rc; if (mysid != fromsid) { @@ -2033,27 +2038,24 @@ static int selinux_binder_transaction(struct task_struct *from, return rc; } - return avc_has_perm(&selinux_state, - fromsid, tosid, SECCLASS_BINDER, BINDER__CALL, - NULL); + return avc_has_perm(&selinux_state, fromsid, tosid, + SECCLASS_BINDER, BINDER__CALL, NULL); } -static int selinux_binder_transfer_binder(struct task_struct *from, - struct task_struct *to) +static int selinux_binder_transfer_binder(const struct cred *from, + const struct cred *to) { - u32 fromsid = task_sid(from); - u32 tosid = task_sid(to); - return avc_has_perm(&selinux_state, - fromsid, tosid, SECCLASS_BINDER, BINDER__TRANSFER, + cred_sid(from), cred_sid(to), + SECCLASS_BINDER, BINDER__TRANSFER, NULL); } -static int selinux_binder_transfer_file(struct task_struct *from, - struct task_struct *to, +static int selinux_binder_transfer_file(const struct cred *from, + const struct cred *to, struct file *file) { - u32 sid = task_sid(to); + u32 sid = cred_sid(to); struct file_security_struct *fsec = selinux_file(file); struct dentry *dentry = file->f_path.dentry; struct inode_security_struct *isec; @@ -5730,7 +5732,7 @@ static unsigned int selinux_ip_postroute_compat(struct sk_buff *skb, struct common_audit_data ad; struct lsm_network_audit net = {0,}; char *addrp; - u8 proto; + u8 proto = 0; if (sk == NULL) return NF_ACCEPT; diff --git a/security/selinux/ss/conditional.c b/security/selinux/ss/conditional.c index 1ef74c085f2b0f288ca689097e38ce03be6090c7..865611127357ee83b8016b65f26dd36791a18727 100644 --- a/security/selinux/ss/conditional.c +++ b/security/selinux/ss/conditional.c @@ -152,6 +152,8 @@ static void cond_list_destroy(struct policydb *p) for (i = 0; i < p->cond_list_len; i++) cond_node_destroy(&p->cond_list[i]); kfree(p->cond_list); + p->cond_list = NULL; + p->cond_list_len = 0; } void cond_policydb_destroy(struct policydb *p) @@ -440,7 +442,6 @@ int cond_read_list(struct policydb *p, void *fp) return 0; err: cond_list_destroy(p); - p->cond_list = NULL; return rc; } diff --git a/security/selinux/ss/hashtab.c b/security/selinux/ss/hashtab.c index dab8c25c739b91e082156a5a47a73a852786b105..7335f67ce54eb1f2a0b3a00512f7a4bd2ac3c168 100644 --- a/security/selinux/ss/hashtab.c +++ b/security/selinux/ss/hashtab.c @@ -30,13 +30,20 @@ static u32 hashtab_compute_size(u32 nel) int hashtab_init(struct hashtab *h, u32 nel_hint) { - h->size = hashtab_compute_size(nel_hint); + u32 size = hashtab_compute_size(nel_hint); + + /* should already be zeroed, but better be safe */ h->nel = 0; - if (!h->size) - return 0; + h->size = 0; + h->htable = NULL; - h->htable = kcalloc(h->size, sizeof(*h->htable), GFP_KERNEL); - return h->htable ? 0 : -ENOMEM; + if (size) { + h->htable = kcalloc(size, sizeof(*h->htable), GFP_KERNEL); + if (!h->htable) + return -ENOMEM; + h->size = size; + } + return 0; } int __hashtab_insert(struct hashtab *h, struct hashtab_node **dst, diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index a2fcde88ce0f8132f6b6976d1e735d02bad3ef33..5df46e23824497d06863362790d4cfa14cef07d0 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -2375,6 +2375,43 @@ err_policy: return rc; } +/** + * ocontext_to_sid - Helper to safely get sid for an ocontext + * @sidtab: SID table + * @c: ocontext structure + * @index: index of the context entry (0 or 1) + * @out_sid: pointer to the resulting SID value + * + * For all ocontexts except OCON_ISID the SID fields are populated + * on-demand when needed. Since updating the SID value is an SMP-sensitive + * operation, this helper must be used to do that safely. + * + * WARNING: This function may return -ESTALE, indicating that the caller + * must retry the operation after re-acquiring the policy pointer! + */ +static int ocontext_to_sid(struct sidtab *sidtab, struct ocontext *c, + size_t index, u32 *out_sid) +{ + int rc; + u32 sid; + + /* Ensure the associated sidtab entry is visible to this thread. */ + sid = smp_load_acquire(&c->sid[index]); + if (!sid) { + rc = sidtab_context_to_sid(sidtab, &c->context[index], &sid); + if (rc) + return rc; + + /* + * Ensure the new sidtab entry is visible to other threads + * when they see the SID. + */ + smp_store_release(&c->sid[index], sid); + } + *out_sid = sid; + return 0; +} + /** * security_port_sid - Obtain the SID for a port. * @protocol: protocol number @@ -2412,17 +2449,13 @@ retry: } if (c) { - if (!c->sid[0]) { - rc = sidtab_context_to_sid(sidtab, &c->context[0], - &c->sid[0]); - if (rc == -ESTALE) { - rcu_read_unlock(); - goto retry; - } - if (rc) - goto out; + rc = ocontext_to_sid(sidtab, c, 0, out_sid); + if (rc == -ESTALE) { + rcu_read_unlock(); + goto retry; } - *out_sid = c->sid[0]; + if (rc) + goto out; } else { *out_sid = SECINITSID_PORT; } @@ -2470,18 +2503,13 @@ retry: } if (c) { - if (!c->sid[0]) { - rc = sidtab_context_to_sid(sidtab, - &c->context[0], - &c->sid[0]); - if (rc == -ESTALE) { - rcu_read_unlock(); - goto retry; - } - if (rc) - goto out; + rc = ocontext_to_sid(sidtab, c, 0, out_sid); + if (rc == -ESTALE) { + rcu_read_unlock(); + goto retry; } - *out_sid = c->sid[0]; + if (rc) + goto out; } else *out_sid = SECINITSID_UNLABELED; @@ -2529,17 +2557,13 @@ retry: } if (c) { - if (!c->sid[0]) { - rc = sidtab_context_to_sid(sidtab, &c->context[0], - &c->sid[0]); - if (rc == -ESTALE) { - rcu_read_unlock(); - goto retry; - } - if (rc) - goto out; + rc = ocontext_to_sid(sidtab, c, 0, out_sid); + if (rc == -ESTALE) { + rcu_read_unlock(); + goto retry; } - *out_sid = c->sid[0]; + if (rc) + goto out; } else *out_sid = SECINITSID_UNLABELED; @@ -2582,25 +2606,13 @@ retry: } if (c) { - if (!c->sid[0] || !c->sid[1]) { - rc = sidtab_context_to_sid(sidtab, &c->context[0], - &c->sid[0]); - if (rc == -ESTALE) { - rcu_read_unlock(); - goto retry; - } - if (rc) - goto out; - rc = sidtab_context_to_sid(sidtab, &c->context[1], - &c->sid[1]); - if (rc == -ESTALE) { - rcu_read_unlock(); - goto retry; - } - if (rc) - goto out; + rc = ocontext_to_sid(sidtab, c, 0, if_sid); + if (rc == -ESTALE) { + rcu_read_unlock(); + goto retry; } - *if_sid = c->sid[0]; + if (rc) + goto out; } else *if_sid = SECINITSID_NETIF; @@ -2691,18 +2703,13 @@ retry: } if (c) { - if (!c->sid[0]) { - rc = sidtab_context_to_sid(sidtab, - &c->context[0], - &c->sid[0]); - if (rc == -ESTALE) { - rcu_read_unlock(); - goto retry; - } - if (rc) - goto out; + rc = ocontext_to_sid(sidtab, c, 0, out_sid); + if (rc == -ESTALE) { + rcu_read_unlock(); + goto retry; } - *out_sid = c->sid[0]; + if (rc) + goto out; } else { *out_sid = SECINITSID_NODE; } @@ -2866,7 +2873,7 @@ static inline int __security_genfs_sid(struct selinux_policy *policy, u16 sclass; struct genfs *genfs; struct ocontext *c; - int rc, cmp = 0; + int cmp = 0; while (path[0] == '/' && path[1] == '/') path++; @@ -2880,9 +2887,8 @@ static inline int __security_genfs_sid(struct selinux_policy *policy, break; } - rc = -ENOENT; if (!genfs || cmp) - goto out; + return -ENOENT; for (c = genfs->head; c; c = c->next) { len = strlen(c->u.name); @@ -2891,20 +2897,10 @@ static inline int __security_genfs_sid(struct selinux_policy *policy, break; } - rc = -ENOENT; if (!c) - goto out; - - if (!c->sid[0]) { - rc = sidtab_context_to_sid(sidtab, &c->context[0], &c->sid[0]); - if (rc) - goto out; - } + return -ENOENT; - *sid = c->sid[0]; - rc = 0; -out: - return rc; + return ocontext_to_sid(sidtab, c, 0, sid); } /** @@ -2987,17 +2983,13 @@ retry: if (c) { sbsec->behavior = c->v.behavior; - if (!c->sid[0]) { - rc = sidtab_context_to_sid(sidtab, &c->context[0], - &c->sid[0]); - if (rc == -ESTALE) { - rcu_read_unlock(); - goto retry; - } - if (rc) - goto out; + rc = ocontext_to_sid(sidtab, c, 0, &sbsec->sid); + if (rc == -ESTALE) { + rcu_read_unlock(); + goto retry; } - sbsec->sid = c->sid[0]; + if (rc) + goto out; } else { rc = __security_genfs_sid(policy, fstype, "/", SECCLASS_DIR, &sbsec->sid); diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c index b88c1a953833464ed9baa3c3f4a518c354b7affa..3eabcc469669e74b71ab6ed22e5e6bdd685b4b12 100644 --- a/security/smack/smackfs.c +++ b/security/smack/smackfs.c @@ -693,9 +693,7 @@ static void smk_cipso_doi(void) printk(KERN_WARNING "%s:%d remove rc = %d\n", __func__, __LINE__, rc); - doip = kmalloc(sizeof(struct cipso_v4_doi), GFP_KERNEL); - if (doip == NULL) - panic("smack: Failed to initialize cipso DOI.\n"); + doip = kmalloc(sizeof(struct cipso_v4_doi), GFP_KERNEL | __GFP_NOFAIL); doip->map.std = NULL; doip->doi = smk_cipso_doi_value; doip->type = CIPSO_V4_MAP_PASS; @@ -714,7 +712,7 @@ static void smk_cipso_doi(void) if (rc != 0) { printk(KERN_WARNING "%s:%d map add rc = %d\n", __func__, __LINE__, rc); - kfree(doip); + netlbl_cfg_cipsov4_del(doip->doi, &nai); return; } } @@ -831,6 +829,7 @@ static int smk_open_cipso(struct inode *inode, struct file *file) static ssize_t smk_set_cipso(struct file *file, const char __user *buf, size_t count, loff_t *ppos, int format) { + struct netlbl_lsm_catmap *old_cat; struct smack_known *skp; struct netlbl_lsm_secattr ncats; char mapcatset[SMK_CIPSOLEN]; @@ -920,9 +919,11 @@ static ssize_t smk_set_cipso(struct file *file, const char __user *buf, rc = smk_netlbl_mls(maplevel, mapcatset, &ncats, SMK_CIPSOLEN); if (rc >= 0) { - netlbl_catmap_free(skp->smk_netlabel.attr.mls.cat); + old_cat = skp->smk_netlabel.attr.mls.cat; skp->smk_netlabel.attr.mls.cat = ncats.attr.mls.cat; skp->smk_netlabel.attr.mls.lvl = ncats.attr.mls.lvl; + synchronize_rcu(); + netlbl_catmap_free(old_cat); rc = count; /* * This mapping may have been cached, so clear the cache. diff --git a/security/tomoyo/util.c b/security/tomoyo/util.c index cd458e10cf2af9ee5c69a47a7ddf95f1a315186d..11dd8260c9cc73cd62595a7eec55ac496517783e 100644 --- a/security/tomoyo/util.c +++ b/security/tomoyo/util.c @@ -1046,10 +1046,11 @@ bool tomoyo_domain_quota_is_ok(struct tomoyo_request_info *r) return false; if (!domain) return true; + if (READ_ONCE(domain->flags[TOMOYO_DIF_QUOTA_WARNED])) + return false; list_for_each_entry_rcu(ptr, &domain->acl_info_list, list, srcu_read_lock_held(&tomoyo_ss)) { u16 perm; - u8 i; if (ptr->is_deleted) continue; @@ -1060,23 +1061,23 @@ bool tomoyo_domain_quota_is_ok(struct tomoyo_request_info *r) */ switch (ptr->type) { case TOMOYO_TYPE_PATH_ACL: - data_race(perm = container_of(ptr, struct tomoyo_path_acl, head)->perm); + perm = data_race(container_of(ptr, struct tomoyo_path_acl, head)->perm); break; case TOMOYO_TYPE_PATH2_ACL: - data_race(perm = container_of(ptr, struct tomoyo_path2_acl, head)->perm); + perm = data_race(container_of(ptr, struct tomoyo_path2_acl, head)->perm); break; case TOMOYO_TYPE_PATH_NUMBER_ACL: - data_race(perm = container_of(ptr, struct tomoyo_path_number_acl, head) + perm = data_race(container_of(ptr, struct tomoyo_path_number_acl, head) ->perm); break; case TOMOYO_TYPE_MKDEV_ACL: - data_race(perm = container_of(ptr, struct tomoyo_mkdev_acl, head)->perm); + perm = data_race(container_of(ptr, struct tomoyo_mkdev_acl, head)->perm); break; case TOMOYO_TYPE_INET_ACL: - data_race(perm = container_of(ptr, struct tomoyo_inet_acl, head)->perm); + perm = data_race(container_of(ptr, struct tomoyo_inet_acl, head)->perm); break; case TOMOYO_TYPE_UNIX_ACL: - data_race(perm = container_of(ptr, struct tomoyo_unix_acl, head)->perm); + perm = data_race(container_of(ptr, struct tomoyo_unix_acl, head)->perm); break; case TOMOYO_TYPE_MANUAL_TASK_ACL: perm = 0; @@ -1084,21 +1085,17 @@ bool tomoyo_domain_quota_is_ok(struct tomoyo_request_info *r) default: perm = 1; } - for (i = 0; i < 16; i++) - if (perm & (1 << i)) - count++; + count += hweight16(perm); } if (count < tomoyo_profile(domain->ns, domain->profile)-> pref[TOMOYO_PREF_MAX_LEARNING_ENTRY]) return true; - if (!domain->flags[TOMOYO_DIF_QUOTA_WARNED]) { - domain->flags[TOMOYO_DIF_QUOTA_WARNED] = true; - /* r->granted = false; */ - tomoyo_write_log(r, "%s", tomoyo_dif[TOMOYO_DIF_QUOTA_WARNED]); + WRITE_ONCE(domain->flags[TOMOYO_DIF_QUOTA_WARNED], true); + /* r->granted = false; */ + tomoyo_write_log(r, "%s", tomoyo_dif[TOMOYO_DIF_QUOTA_WARNED]); #ifndef CONFIG_SECURITY_TOMOYO_INSECURE_BUILTIN_SETTING - pr_warn("WARNING: Domain '%s' has too many ACLs to hold. Stopped learning mode.\n", - domain->domainname->name); + pr_warn("WARNING: Domain '%s' has too many ACLs to hold. Stopped learning mode.\n", + domain->domainname->name); #endif - } return false; } diff --git a/sound/core/Makefile b/sound/core/Makefile index ee4a4a6b99ba77dab73026df41df852fd48ab559..d123587c0fd8f553f0a8af8de2f9f4c8545d82bf 100644 --- a/sound/core/Makefile +++ b/sound/core/Makefile @@ -9,7 +9,9 @@ ifneq ($(CONFIG_SND_PROC_FS),) snd-y += info.o snd-$(CONFIG_SND_OSSEMUL) += info_oss.o endif +ifneq ($(CONFIG_M68K),y) snd-$(CONFIG_ISA_DMA_API) += isadma.o +endif snd-$(CONFIG_SND_OSSEMUL) += sound_oss.o snd-$(CONFIG_SND_VMASTER) += vmaster.o snd-$(CONFIG_SND_JACK) += ctljack.o jack.o diff --git a/sound/core/control_compat.c b/sound/core/control_compat.c index 1d708aab9c98ea1c1a9af79c61b2f54a9a868139..97467f6a32a135790d62dd036c0d55aa9f53828f 100644 --- a/sound/core/control_compat.c +++ b/sound/core/control_compat.c @@ -264,6 +264,7 @@ static int copy_ctl_value_to_user(void __user *userdata, struct snd_ctl_elem_value *data, int type, int count) { + struct snd_ctl_elem_value32 __user *data32 = userdata; int i, size; if (type == SNDRV_CTL_ELEM_TYPE_BOOLEAN || @@ -280,6 +281,8 @@ static int copy_ctl_value_to_user(void __user *userdata, if (copy_to_user(valuep, data->value.bytes.data, size)) return -EFAULT; } + if (copy_to_user(&data32->id, &data->id, sizeof(data32->id))) + return -EFAULT; return 0; } diff --git a/sound/core/jack.c b/sound/core/jack.c index 503c8af79d550d5c59e4dc523afe926232e370b5..dc2e06ae2414960beec0a04a1c0042e13407440f 100644 --- a/sound/core/jack.c +++ b/sound/core/jack.c @@ -54,10 +54,13 @@ static int snd_jack_dev_free(struct snd_device *device) struct snd_card *card = device->card; struct snd_jack_kctl *jack_kctl, *tmp_jack_kctl; + down_write(&card->controls_rwsem); list_for_each_entry_safe(jack_kctl, tmp_jack_kctl, &jack->kctl_list, list) { list_del_init(&jack_kctl->list); snd_ctl_remove(card, jack_kctl->kctl); } + up_write(&card->controls_rwsem); + if (jack->private_free) jack->private_free(jack); @@ -220,6 +223,10 @@ int snd_jack_new(struct snd_card *card, const char *id, int type, return -ENOMEM; jack->id = kstrdup(id, GFP_KERNEL); + if (jack->id == NULL) { + kfree(jack); + return -ENOMEM; + } /* don't creat input device for phantom jack */ if (!phantom_jack) { diff --git a/sound/core/memalloc.c b/sound/core/memalloc.c index 0f335162f87c710760f7efa3f85a9185dcd8b2d1..966bef5acc750650a9e7b30f878601158e8733c7 100644 --- a/sound/core/memalloc.c +++ b/sound/core/memalloc.c @@ -133,6 +133,7 @@ int snd_dma_alloc_pages(int type, struct device *device, size_t size, if (WARN_ON(!dmab)) return -ENXIO; + size = PAGE_ALIGN(size); dmab->dev.type = type; dmab->dev.dev = device; dmab->bytes = 0; diff --git a/sound/core/oss/mixer_oss.c b/sound/core/oss/mixer_oss.c index f702c96a7478ff1ef3be32e62d4a16ba6fdb4b49..bfed82a3a1881a432440c54460dde252ac0b2481 100644 --- a/sound/core/oss/mixer_oss.c +++ b/sound/core/oss/mixer_oss.c @@ -130,11 +130,13 @@ static int snd_mixer_oss_devmask(struct snd_mixer_oss_file *fmixer) if (mixer == NULL) return -EIO; + mutex_lock(&mixer->reg_mutex); for (chn = 0; chn < 31; chn++) { pslot = &mixer->slots[chn]; if (pslot->put_volume || pslot->put_recsrc) result |= 1 << chn; } + mutex_unlock(&mixer->reg_mutex); return result; } @@ -146,11 +148,13 @@ static int snd_mixer_oss_stereodevs(struct snd_mixer_oss_file *fmixer) if (mixer == NULL) return -EIO; + mutex_lock(&mixer->reg_mutex); for (chn = 0; chn < 31; chn++) { pslot = &mixer->slots[chn]; if (pslot->put_volume && pslot->stereo) result |= 1 << chn; } + mutex_unlock(&mixer->reg_mutex); return result; } @@ -161,6 +165,7 @@ static int snd_mixer_oss_recmask(struct snd_mixer_oss_file *fmixer) if (mixer == NULL) return -EIO; + mutex_lock(&mixer->reg_mutex); if (mixer->put_recsrc && mixer->get_recsrc) { /* exclusive */ result = mixer->mask_recsrc; } else { @@ -172,6 +177,7 @@ static int snd_mixer_oss_recmask(struct snd_mixer_oss_file *fmixer) result |= 1 << chn; } } + mutex_unlock(&mixer->reg_mutex); return result; } @@ -182,11 +188,12 @@ static int snd_mixer_oss_get_recsrc(struct snd_mixer_oss_file *fmixer) if (mixer == NULL) return -EIO; + mutex_lock(&mixer->reg_mutex); if (mixer->put_recsrc && mixer->get_recsrc) { /* exclusive */ - int err; unsigned int index; - if ((err = mixer->get_recsrc(fmixer, &index)) < 0) - return err; + result = mixer->get_recsrc(fmixer, &index); + if (result < 0) + goto unlock; result = 1 << index; } else { struct snd_mixer_oss_slot *pslot; @@ -201,7 +208,10 @@ static int snd_mixer_oss_get_recsrc(struct snd_mixer_oss_file *fmixer) } } } - return mixer->oss_recsrc = result; + mixer->oss_recsrc = result; + unlock: + mutex_unlock(&mixer->reg_mutex); + return result; } static int snd_mixer_oss_set_recsrc(struct snd_mixer_oss_file *fmixer, int recsrc) @@ -214,6 +224,7 @@ static int snd_mixer_oss_set_recsrc(struct snd_mixer_oss_file *fmixer, int recsr if (mixer == NULL) return -EIO; + mutex_lock(&mixer->reg_mutex); if (mixer->get_recsrc && mixer->put_recsrc) { /* exclusive input */ if (recsrc & ~mixer->oss_recsrc) recsrc &= ~mixer->oss_recsrc; @@ -239,6 +250,7 @@ static int snd_mixer_oss_set_recsrc(struct snd_mixer_oss_file *fmixer, int recsr } } } + mutex_unlock(&mixer->reg_mutex); return result; } @@ -250,6 +262,7 @@ static int snd_mixer_oss_get_volume(struct snd_mixer_oss_file *fmixer, int slot) if (mixer == NULL || slot > 30) return -EIO; + mutex_lock(&mixer->reg_mutex); pslot = &mixer->slots[slot]; left = pslot->volume[0]; right = pslot->volume[1]; @@ -257,15 +270,21 @@ static int snd_mixer_oss_get_volume(struct snd_mixer_oss_file *fmixer, int slot) result = pslot->get_volume(fmixer, pslot, &left, &right); if (!pslot->stereo) right = left; - if (snd_BUG_ON(left < 0 || left > 100)) - return -EIO; - if (snd_BUG_ON(right < 0 || right > 100)) - return -EIO; + if (snd_BUG_ON(left < 0 || left > 100)) { + result = -EIO; + goto unlock; + } + if (snd_BUG_ON(right < 0 || right > 100)) { + result = -EIO; + goto unlock; + } if (result >= 0) { pslot->volume[0] = left; pslot->volume[1] = right; result = (left & 0xff) | ((right & 0xff) << 8); } + unlock: + mutex_unlock(&mixer->reg_mutex); return result; } @@ -278,6 +297,7 @@ static int snd_mixer_oss_set_volume(struct snd_mixer_oss_file *fmixer, if (mixer == NULL || slot > 30) return -EIO; + mutex_lock(&mixer->reg_mutex); pslot = &mixer->slots[slot]; if (left > 100) left = 100; @@ -288,10 +308,13 @@ static int snd_mixer_oss_set_volume(struct snd_mixer_oss_file *fmixer, if (pslot->put_volume) result = pslot->put_volume(fmixer, pslot, left, right); if (result < 0) - return result; + goto unlock; pslot->volume[0] = left; pslot->volume[1] = right; - return (left & 0xff) | ((right & 0xff) << 8); + result = (left & 0xff) | ((right & 0xff) << 8); + unlock: + mutex_unlock(&mixer->reg_mutex); + return result; } static int snd_mixer_oss_ioctl1(struct snd_mixer_oss_file *fmixer, unsigned int cmd, unsigned long arg) diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c index 142fc751a84770111679f23d740a919c3377144c..d79febeebf0c591725703112330e020426ab7669 100644 --- a/sound/core/oss/pcm_oss.c +++ b/sound/core/oss/pcm_oss.c @@ -147,7 +147,7 @@ snd_pcm_hw_param_value_min(const struct snd_pcm_hw_params *params, * * Return the maximum value for field PAR. */ -static unsigned int +static int snd_pcm_hw_param_value_max(const struct snd_pcm_hw_params *params, snd_pcm_hw_param_t var, int *dir) { @@ -682,18 +682,24 @@ static int snd_pcm_oss_period_size(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *oss_params, struct snd_pcm_hw_params *slave_params) { - size_t s; - size_t oss_buffer_size, oss_period_size, oss_periods; - size_t min_period_size, max_period_size; + ssize_t s; + ssize_t oss_buffer_size; + ssize_t oss_period_size, oss_periods; + ssize_t min_period_size, max_period_size; struct snd_pcm_runtime *runtime = substream->runtime; size_t oss_frame_size; oss_frame_size = snd_pcm_format_physical_width(params_format(oss_params)) * params_channels(oss_params) / 8; + oss_buffer_size = snd_pcm_hw_param_value_max(slave_params, + SNDRV_PCM_HW_PARAM_BUFFER_SIZE, + NULL); + if (oss_buffer_size <= 0) + return -EINVAL; oss_buffer_size = snd_pcm_plug_client_size(substream, - snd_pcm_hw_param_value_max(slave_params, SNDRV_PCM_HW_PARAM_BUFFER_SIZE, NULL)) * oss_frame_size; - if (!oss_buffer_size) + oss_buffer_size * oss_frame_size); + if (oss_buffer_size <= 0) return -EINVAL; oss_buffer_size = rounddown_pow_of_two(oss_buffer_size); if (atomic_read(&substream->mmap_count)) { @@ -730,7 +736,7 @@ static int snd_pcm_oss_period_size(struct snd_pcm_substream *substream, min_period_size = snd_pcm_plug_client_size(substream, snd_pcm_hw_param_value_min(slave_params, SNDRV_PCM_HW_PARAM_PERIOD_SIZE, NULL)); - if (min_period_size) { + if (min_period_size > 0) { min_period_size *= oss_frame_size; min_period_size = roundup_pow_of_two(min_period_size); if (oss_period_size < min_period_size) @@ -739,7 +745,7 @@ static int snd_pcm_oss_period_size(struct snd_pcm_substream *substream, max_period_size = snd_pcm_plug_client_size(substream, snd_pcm_hw_param_value_max(slave_params, SNDRV_PCM_HW_PARAM_PERIOD_SIZE, NULL)); - if (max_period_size) { + if (max_period_size > 0) { max_period_size *= oss_frame_size; max_period_size = rounddown_pow_of_two(max_period_size); if (oss_period_size > max_period_size) @@ -752,7 +758,7 @@ static int snd_pcm_oss_period_size(struct snd_pcm_substream *substream, oss_periods = substream->oss.setup.periods; s = snd_pcm_hw_param_value_max(slave_params, SNDRV_PCM_HW_PARAM_PERIODS, NULL); - if (runtime->oss.maxfrags && s > runtime->oss.maxfrags) + if (s > 0 && runtime->oss.maxfrags && s > runtime->oss.maxfrags) s = runtime->oss.maxfrags; if (oss_periods > s) oss_periods = s; @@ -878,8 +884,15 @@ static int snd_pcm_oss_change_params_locked(struct snd_pcm_substream *substream) err = -EINVAL; goto failure; } - choose_rate(substream, sparams, runtime->oss.rate); - snd_pcm_hw_param_near(substream, sparams, SNDRV_PCM_HW_PARAM_CHANNELS, runtime->oss.channels, NULL); + + err = choose_rate(substream, sparams, runtime->oss.rate); + if (err < 0) + goto failure; + err = snd_pcm_hw_param_near(substream, sparams, + SNDRV_PCM_HW_PARAM_CHANNELS, + runtime->oss.channels, NULL); + if (err < 0) + goto failure; format = snd_pcm_oss_format_from(runtime->oss.format); @@ -1947,7 +1960,7 @@ static int snd_pcm_oss_set_fragment1(struct snd_pcm_substream *substream, unsign if (runtime->oss.subdivision || runtime->oss.fragshift) return -EINVAL; fragshift = val & 0xffff; - if (fragshift >= 31) + if (fragshift >= 25) /* should be large enough */ return -EINVAL; runtime->oss.fragshift = fragshift; runtime->oss.maxfrags = (val >> 16) & 0xffff; @@ -2043,7 +2056,7 @@ static int snd_pcm_oss_set_trigger(struct snd_pcm_oss_file *pcm_oss_file, int tr int err, cmd; #ifdef OSS_DEBUG - pcm_dbg(substream->pcm, "pcm_oss: trigger = 0x%x\n", trigger); + pr_debug("pcm_oss: trigger = 0x%x\n", trigger); #endif psubstream = pcm_oss_file->streams[SNDRV_PCM_STREAM_PLAYBACK]; diff --git a/sound/core/pcm.c b/sound/core/pcm.c index 41cbdac5b1cfaa6e93fc0d992727c04dc070f1c4..a8ae5928decdad11d29699114ea8dea3757a37c2 100644 --- a/sound/core/pcm.c +++ b/sound/core/pcm.c @@ -810,7 +810,11 @@ EXPORT_SYMBOL(snd_pcm_new_internal); static void free_chmap(struct snd_pcm_str *pstr) { if (pstr->chmap_kctl) { - snd_ctl_remove(pstr->pcm->card, pstr->chmap_kctl); + struct snd_card *card = pstr->pcm->card; + + down_write(&card->controls_rwsem); + snd_ctl_remove(card, pstr->chmap_kctl); + up_write(&card->controls_rwsem); pstr->chmap_kctl = NULL; } } diff --git a/sound/core/pcm_compat.c b/sound/core/pcm_compat.c index 590a46a9e78de4bf2028c33ba66a7b427be4771b..a226d8f240287eb357224c78cc6e83b999ad21e2 100644 --- a/sound/core/pcm_compat.c +++ b/sound/core/pcm_compat.c @@ -466,6 +466,76 @@ static int snd_pcm_ioctl_sync_ptr_x32(struct snd_pcm_substream *substream, } #endif /* CONFIG_X86_X32 */ +#ifdef __BIG_ENDIAN +typedef char __pad_before_u32[4]; +typedef char __pad_after_u32[0]; +#else +typedef char __pad_before_u32[0]; +typedef char __pad_after_u32[4]; +#endif + +/* PCM 2.0.15 API definition had a bug in mmap control; it puts the avail_min + * at the wrong offset due to a typo in padding type. + * The bug hits only 32bit. + * A workaround for incorrect read/write is needed only in 32bit compat mode. + */ +struct __snd_pcm_mmap_control64_buggy { + __pad_before_u32 __pad1; + __u32 appl_ptr; + __pad_before_u32 __pad2; /* SiC! here is the bug */ + __pad_before_u32 __pad3; + __u32 avail_min; + __pad_after_uframe __pad4; +}; + +static int snd_pcm_ioctl_sync_ptr_buggy(struct snd_pcm_substream *substream, + struct snd_pcm_sync_ptr __user *_sync_ptr) +{ + struct snd_pcm_runtime *runtime = substream->runtime; + struct snd_pcm_sync_ptr sync_ptr; + struct __snd_pcm_mmap_control64_buggy *sync_cp; + volatile struct snd_pcm_mmap_status *status; + volatile struct snd_pcm_mmap_control *control; + int err; + + memset(&sync_ptr, 0, sizeof(sync_ptr)); + sync_cp = (struct __snd_pcm_mmap_control64_buggy *)&sync_ptr.c.control; + if (get_user(sync_ptr.flags, (unsigned __user *)&(_sync_ptr->flags))) + return -EFAULT; + if (copy_from_user(sync_cp, &(_sync_ptr->c.control), sizeof(*sync_cp))) + return -EFAULT; + status = runtime->status; + control = runtime->control; + if (sync_ptr.flags & SNDRV_PCM_SYNC_PTR_HWSYNC) { + err = snd_pcm_hwsync(substream); + if (err < 0) + return err; + } + snd_pcm_stream_lock_irq(substream); + if (!(sync_ptr.flags & SNDRV_PCM_SYNC_PTR_APPL)) { + err = pcm_lib_apply_appl_ptr(substream, sync_cp->appl_ptr); + if (err < 0) { + snd_pcm_stream_unlock_irq(substream); + return err; + } + } else { + sync_cp->appl_ptr = control->appl_ptr; + } + if (!(sync_ptr.flags & SNDRV_PCM_SYNC_PTR_AVAIL_MIN)) + control->avail_min = sync_cp->avail_min; + else + sync_cp->avail_min = control->avail_min; + sync_ptr.s.status.state = status->state; + sync_ptr.s.status.hw_ptr = status->hw_ptr; + sync_ptr.s.status.tstamp = status->tstamp; + sync_ptr.s.status.suspended_state = status->suspended_state; + sync_ptr.s.status.audio_tstamp = status->audio_tstamp; + snd_pcm_stream_unlock_irq(substream); + if (copy_to_user(_sync_ptr, &sync_ptr, sizeof(sync_ptr))) + return -EFAULT; + return 0; +} + /* */ enum { @@ -535,7 +605,7 @@ static long snd_pcm_ioctl_compat(struct file *file, unsigned int cmd, unsigned l if (in_x32_syscall()) return snd_pcm_ioctl_sync_ptr_x32(substream, argp); #endif /* CONFIG_X86_X32 */ - return snd_pcm_common_ioctl(file, substream, cmd, argp); + return snd_pcm_ioctl_sync_ptr_buggy(substream, argp); case SNDRV_PCM_IOCTL_HW_REFINE32: return snd_pcm_ioctl_hw_params_compat(substream, 1, argp); case SNDRV_PCM_IOCTL_HW_PARAMS32: diff --git a/sound/core/seq/seq_queue.c b/sound/core/seq/seq_queue.c index 71a6ea62c3be7c8a0a1885da0b32b2b527ef8644..4ff0b927230c2f5ee4efe89046b61e68d0880eb9 100644 --- a/sound/core/seq/seq_queue.c +++ b/sound/core/seq/seq_queue.c @@ -234,12 +234,15 @@ struct snd_seq_queue *snd_seq_queue_find_name(char *name) /* -------------------------------------------------------- */ +#define MAX_CELL_PROCESSES_IN_QUEUE 1000 + void snd_seq_check_queue(struct snd_seq_queue *q, int atomic, int hop) { unsigned long flags; struct snd_seq_event_cell *cell; snd_seq_tick_time_t cur_tick; snd_seq_real_time_t cur_time; + int processed = 0; if (q == NULL) return; @@ -262,6 +265,8 @@ void snd_seq_check_queue(struct snd_seq_queue *q, int atomic, int hop) if (!cell) break; snd_seq_dispatch_event(cell, atomic, hop); + if (++processed >= MAX_CELL_PROCESSES_IN_QUEUE) + goto out; /* the rest processed at the next batch */ } /* Process time queue... */ @@ -271,14 +276,19 @@ void snd_seq_check_queue(struct snd_seq_queue *q, int atomic, int hop) if (!cell) break; snd_seq_dispatch_event(cell, atomic, hop); + if (++processed >= MAX_CELL_PROCESSES_IN_QUEUE) + goto out; /* the rest processed at the next batch */ } + out: /* free lock */ spin_lock_irqsave(&q->check_lock, flags); if (q->check_again) { q->check_again = 0; - spin_unlock_irqrestore(&q->check_lock, flags); - goto __again; + if (processed < MAX_CELL_PROCESSES_IN_QUEUE) { + spin_unlock_irqrestore(&q->check_lock, flags); + goto __again; + } } q->check_blocked = 0; spin_unlock_irqrestore(&q->check_lock, flags); diff --git a/sound/core/seq_device.c b/sound/core/seq_device.c index 7ed13cb32ef82bf73db73330b434c244e02f7de8..f22d3bbfcaa54b2ccc5495e0d4806a38d2897d73 100644 --- a/sound/core/seq_device.c +++ b/sound/core/seq_device.c @@ -147,6 +147,8 @@ static int snd_seq_device_dev_free(struct snd_device *device) struct snd_seq_device *dev = device->device_data; cancel_autoload_drivers(); + if (dev->private_free) + dev->private_free(dev); put_device(&dev->dev); return 0; } @@ -174,11 +176,7 @@ static int snd_seq_device_dev_disconnect(struct snd_device *device) static void snd_seq_dev_release(struct device *dev) { - struct snd_seq_device *sdev = to_seq_dev(dev); - - if (sdev->private_free) - sdev->private_free(sdev); - kfree(sdev); + kfree(to_seq_dev(dev)); } /* diff --git a/sound/core/timer.c b/sound/core/timer.c index c15c8314671b7a5ba2339d7fb22aa5dcab5fa682..04cd8953605ab33ab80589ac576e8fa2f2be8da0 100644 --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -624,13 +624,13 @@ static int snd_timer_stop1(struct snd_timer_instance *timeri, bool stop) if (!timer) return -EINVAL; spin_lock_irqsave(&timer->lock, flags); + list_del_init(&timeri->ack_list); + list_del_init(&timeri->active_list); if (!(timeri->flags & (SNDRV_TIMER_IFLG_RUNNING | SNDRV_TIMER_IFLG_START))) { result = -EBUSY; goto unlock; } - list_del_init(&timeri->ack_list); - list_del_init(&timeri->active_list); if (timer->card && timer->card->shutdown) goto unlock; if (stop) { @@ -665,23 +665,22 @@ static int snd_timer_stop1(struct snd_timer_instance *timeri, bool stop) static int snd_timer_stop_slave(struct snd_timer_instance *timeri, bool stop) { unsigned long flags; + bool running; spin_lock_irqsave(&slave_active_lock, flags); - if (!(timeri->flags & SNDRV_TIMER_IFLG_RUNNING)) { - spin_unlock_irqrestore(&slave_active_lock, flags); - return -EBUSY; - } + running = timeri->flags & SNDRV_TIMER_IFLG_RUNNING; timeri->flags &= ~SNDRV_TIMER_IFLG_RUNNING; if (timeri->timer) { spin_lock(&timeri->timer->lock); list_del_init(&timeri->ack_list); list_del_init(&timeri->active_list); - snd_timer_notify1(timeri, stop ? SNDRV_TIMER_EVENT_STOP : - SNDRV_TIMER_EVENT_PAUSE); + if (running) + snd_timer_notify1(timeri, stop ? SNDRV_TIMER_EVENT_STOP : + SNDRV_TIMER_EVENT_PAUSE); spin_unlock(&timeri->timer->lock); } spin_unlock_irqrestore(&slave_active_lock, flags); - return 0; + return running ? 0 : -EBUSY; } /* diff --git a/sound/drivers/opl3/opl3_midi.c b/sound/drivers/opl3/opl3_midi.c index eb23c55323ae1d5166d8093ef5d967088b4929db..a2583a30c45ad1ec9763bbd1306f569d2b0de618 100644 --- a/sound/drivers/opl3/opl3_midi.c +++ b/sound/drivers/opl3/opl3_midi.c @@ -398,7 +398,7 @@ void snd_opl3_note_on(void *p, int note, int vel, struct snd_midi_channel *chan) } if (instr_4op) { vp2 = &opl3->voices[voice + 3]; - if (vp->state > 0) { + if (vp2->state > 0) { opl3_reg = reg_side | (OPL3_REG_KEYON_BLOCK + voice_offset + 3); reg_val = vp->keyon_reg & ~OPL3_KEYON_BIT; diff --git a/sound/hda/ext/hdac_ext_stream.c b/sound/hda/ext/hdac_ext_stream.c index c4d54a838773cdcf91565624aecd594eac4c5f91..1e6e4cf428cdab3f81764ba7b0c85de9ed8f6fa7 100644 --- a/sound/hda/ext/hdac_ext_stream.c +++ b/sound/hda/ext/hdac_ext_stream.c @@ -106,20 +106,14 @@ void snd_hdac_stream_free_all(struct hdac_bus *bus) } EXPORT_SYMBOL_GPL(snd_hdac_stream_free_all); -/** - * snd_hdac_ext_stream_decouple - decouple the hdac stream - * @bus: HD-audio core bus - * @stream: HD-audio ext core stream object to initialize - * @decouple: flag to decouple - */ -void snd_hdac_ext_stream_decouple(struct hdac_bus *bus, - struct hdac_ext_stream *stream, bool decouple) +void snd_hdac_ext_stream_decouple_locked(struct hdac_bus *bus, + struct hdac_ext_stream *stream, + bool decouple) { struct hdac_stream *hstream = &stream->hstream; u32 val; int mask = AZX_PPCTL_PROCEN(hstream->index); - spin_lock_irq(&bus->reg_lock); val = readw(bus->ppcap + AZX_REG_PP_PPCTL) & mask; if (decouple && !val) @@ -128,6 +122,20 @@ void snd_hdac_ext_stream_decouple(struct hdac_bus *bus, snd_hdac_updatel(bus->ppcap, AZX_REG_PP_PPCTL, mask, 0); stream->decoupled = decouple; +} +EXPORT_SYMBOL_GPL(snd_hdac_ext_stream_decouple_locked); + +/** + * snd_hdac_ext_stream_decouple - decouple the hdac stream + * @bus: HD-audio core bus + * @stream: HD-audio ext core stream object to initialize + * @decouple: flag to decouple + */ +void snd_hdac_ext_stream_decouple(struct hdac_bus *bus, + struct hdac_ext_stream *stream, bool decouple) +{ + spin_lock_irq(&bus->reg_lock); + snd_hdac_ext_stream_decouple_locked(bus, stream, decouple); spin_unlock_irq(&bus->reg_lock); } EXPORT_SYMBOL_GPL(snd_hdac_ext_stream_decouple); @@ -252,6 +260,7 @@ hdac_ext_link_stream_assign(struct hdac_bus *bus, return NULL; } + spin_lock_irq(&bus->reg_lock); list_for_each_entry(stream, &bus->stream_list, list) { struct hdac_ext_stream *hstream = container_of(stream, struct hdac_ext_stream, @@ -266,17 +275,16 @@ hdac_ext_link_stream_assign(struct hdac_bus *bus, } if (!hstream->link_locked) { - snd_hdac_ext_stream_decouple(bus, hstream, true); + snd_hdac_ext_stream_decouple_locked(bus, hstream, true); res = hstream; break; } } if (res) { - spin_lock_irq(&bus->reg_lock); res->link_locked = 1; res->link_substream = substream; - spin_unlock_irq(&bus->reg_lock); } + spin_unlock_irq(&bus->reg_lock); return res; } @@ -292,6 +300,7 @@ hdac_ext_host_stream_assign(struct hdac_bus *bus, return NULL; } + spin_lock_irq(&bus->reg_lock); list_for_each_entry(stream, &bus->stream_list, list) { struct hdac_ext_stream *hstream = container_of(stream, struct hdac_ext_stream, @@ -301,18 +310,17 @@ hdac_ext_host_stream_assign(struct hdac_bus *bus, if (!stream->opened) { if (!hstream->decoupled) - snd_hdac_ext_stream_decouple(bus, hstream, true); + snd_hdac_ext_stream_decouple_locked(bus, hstream, true); res = hstream; break; } } if (res) { - spin_lock_irq(&bus->reg_lock); res->hstream.opened = 1; res->hstream.running = 0; res->hstream.substream = substream; - spin_unlock_irq(&bus->reg_lock); } + spin_unlock_irq(&bus->reg_lock); return res; } @@ -378,15 +386,17 @@ void snd_hdac_ext_stream_release(struct hdac_ext_stream *stream, int type) break; case HDAC_EXT_STREAM_TYPE_HOST: + spin_lock_irq(&bus->reg_lock); if (stream->decoupled && !stream->link_locked) - snd_hdac_ext_stream_decouple(bus, stream, false); + snd_hdac_ext_stream_decouple_locked(bus, stream, false); + spin_unlock_irq(&bus->reg_lock); snd_hdac_stream_release(&stream->hstream); break; case HDAC_EXT_STREAM_TYPE_LINK: - if (stream->decoupled && !stream->hstream.opened) - snd_hdac_ext_stream_decouple(bus, stream, false); spin_lock_irq(&bus->reg_lock); + if (stream->decoupled && !stream->hstream.opened) + snd_hdac_ext_stream_decouple_locked(bus, stream, false); stream->link_locked = 0; stream->link_substream = NULL; spin_unlock_irq(&bus->reg_lock); diff --git a/sound/hda/hdac_controller.c b/sound/hda/hdac_controller.c index b98449fd92f3b184d0d4312eef1e84f652528318..522d1897659cbd032421257662ece16e201fd53e 100644 --- a/sound/hda/hdac_controller.c +++ b/sound/hda/hdac_controller.c @@ -421,8 +421,9 @@ int snd_hdac_bus_reset_link(struct hdac_bus *bus, bool full_reset) if (!full_reset) goto skip_reset; - /* clear STATESTS */ - snd_hdac_chip_writew(bus, STATESTS, STATESTS_INT_MASK); + /* clear STATESTS if not in reset */ + if (snd_hdac_chip_readb(bus, GCTL) & AZX_GCTL_RESET) + snd_hdac_chip_writew(bus, STATESTS, STATESTS_INT_MASK); /* reset controller */ snd_hdac_bus_enter_link_reset(bus); diff --git a/sound/hda/hdac_stream.c b/sound/hda/hdac_stream.c index abe7a1b16fe1eacbc5a312232a2818eebf848ef5..ce77a53201639d05a32143c636b84586dcdfdfbd 100644 --- a/sound/hda/hdac_stream.c +++ b/sound/hda/hdac_stream.c @@ -296,6 +296,7 @@ struct hdac_stream *snd_hdac_stream_assign(struct hdac_bus *bus, int key = (substream->pcm->device << 16) | (substream->number << 2) | (substream->stream + 1); + spin_lock_irq(&bus->reg_lock); list_for_each_entry(azx_dev, &bus->stream_list, list) { if (azx_dev->direction != substream->stream) continue; @@ -309,13 +310,12 @@ struct hdac_stream *snd_hdac_stream_assign(struct hdac_bus *bus, res = azx_dev; } if (res) { - spin_lock_irq(&bus->reg_lock); res->opened = 1; res->running = 0; res->assigned_key = key; res->substream = substream; - spin_unlock_irq(&bus->reg_lock); } + spin_unlock_irq(&bus->reg_lock); return res; } EXPORT_SYMBOL_GPL(snd_hdac_stream_assign); diff --git a/sound/hda/intel-dsp-config.c b/sound/hda/intel-dsp-config.c index 61e1de6d7be0a20330645533a75b4d7ecf6d96dd..2a5ba9dca6b08bba5398862739bcbaee8d8987f2 100644 --- a/sound/hda/intel-dsp-config.c +++ b/sound/hda/intel-dsp-config.c @@ -30,6 +30,7 @@ struct config_entry { u32 flags; u16 device; const struct dmi_system_id *dmi_table; + u8 codec_hid[ACPI_ID_LEN]; }; /* @@ -55,7 +56,7 @@ static const struct config_entry config_table[] = { /* * Apollolake (Broxton-P) * the legacy HDAudio driver is used except on Up Squared (SOF) and - * Chromebooks (SST) + * Chromebooks (SST), as well as devices based on the ES8336 codec */ #if IS_ENABLED(CONFIG_SND_SOC_SOF_APOLLOLAKE) { @@ -72,6 +73,11 @@ static const struct config_entry config_table[] = { {} } }, + { + .flags = FLAG_SOF, + .device = 0x5a98, + .codec_hid = "ESSX8336", + }, #endif #if IS_ENABLED(CONFIG_SND_SOC_INTEL_APL) { @@ -136,7 +142,7 @@ static const struct config_entry config_table[] = { /* * Geminilake uses legacy HDAudio driver except for Google - * Chromebooks + * Chromebooks and devices based on the ES8336 codec */ /* Geminilake */ #if IS_ENABLED(CONFIG_SND_SOC_SOF_GEMINILAKE) @@ -153,6 +159,11 @@ static const struct config_entry config_table[] = { {} } }, + { + .flags = FLAG_SOF, + .device = 0x3198, + .codec_hid = "ESSX8336", + }, #endif /* @@ -240,6 +251,11 @@ static const struct config_entry config_table[] = { .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC_OR_SOUNDWIRE, .device = 0x02c8, }, + { + .flags = FLAG_SOF, + .device = 0x02c8, + .codec_hid = "ESSX8336", + }, /* Cometlake-H */ { .flags = FLAG_SOF, @@ -264,6 +280,11 @@ static const struct config_entry config_table[] = { .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC_OR_SOUNDWIRE, .device = 0x06c8, }, + { + .flags = FLAG_SOF, + .device = 0x06c8, + .codec_hid = "ESSX8336", + }, #endif /* Icelake */ @@ -287,6 +308,15 @@ static const struct config_entry config_table[] = { }, #endif +/* JasperLake */ +#if IS_ENABLED(CONFIG_SND_SOC_SOF_JASPERLAKE) + { + .flags = FLAG_SOF, + .device = 0x4dc8, + .codec_hid = "ESSX8336", + }, +#endif + /* Tigerlake */ #if IS_ENABLED(CONFIG_SND_SOC_SOF_TIGERLAKE) { @@ -310,6 +340,11 @@ static const struct config_entry config_table[] = { .flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC_OR_SOUNDWIRE, .device = 0x43c8, }, + { + .flags = FLAG_SOF, + .device = 0xa0c8, + .codec_hid = "ESSX8336", + }, #endif /* Elkhart Lake */ @@ -337,6 +372,8 @@ static const struct config_entry *snd_intel_dsp_find_config continue; if (table->dmi_table && !dmi_check_system(table->dmi_table)) continue; + if (table->codec_hid[0] && !acpi_dev_present(table->codec_hid, NULL, -1)) + continue; return table; } return NULL; diff --git a/sound/isa/Kconfig b/sound/isa/Kconfig index 6ffa48dd5983035684a5359196b53d93ac1d6b18..570b88e0b201835b8daa2fdd1ec3b08d3003ec89 100644 --- a/sound/isa/Kconfig +++ b/sound/isa/Kconfig @@ -22,7 +22,7 @@ config SND_SB16_DSP menuconfig SND_ISA bool "ISA sound devices" depends on ISA || COMPILE_TEST - depends on ISA_DMA_API + depends on ISA_DMA_API && !M68K default y help Support for sound devices connected via the ISA bus. diff --git a/sound/isa/gus/gus_dma.c b/sound/isa/gus/gus_dma.c index a1c770d826ddab1d6ed6d2f6c34aadcdf25d2701..6d664dd8dde0b4fe5b54bf3ea35bca454d81d493 100644 --- a/sound/isa/gus/gus_dma.c +++ b/sound/isa/gus/gus_dma.c @@ -126,6 +126,8 @@ static void snd_gf1_dma_interrupt(struct snd_gus_card * gus) } block = snd_gf1_dma_next_block(gus); spin_unlock(&gus->dma_lock); + if (!block) + return; snd_gf1_dma_program(gus, block->addr, block->buf_addr, block->count, (unsigned short) block->cmd); kfree(block); #if 0 diff --git a/sound/pci/Kconfig b/sound/pci/Kconfig index 93bc9bef7641fa1839e1bdd9fb58f86681b35c84..41ce12597177757bf8ef3f0e81817feba753e5de 100644 --- a/sound/pci/Kconfig +++ b/sound/pci/Kconfig @@ -279,6 +279,7 @@ config SND_CS46XX_NEW_DSP config SND_CS5530 tristate "CS5530 Audio" depends on ISA_DMA_API && (X86_32 || COMPILE_TEST) + depends on !M68K select SND_SB16_DSP help Say Y here to include support for audio on Cyrix/NatSemi CS5530 chips. diff --git a/sound/pci/ctxfi/ctamixer.c b/sound/pci/ctxfi/ctamixer.c index d4ff377eb3a3405fab75c044e92603d2387e1a1d..6d636bdcaa5a3bf984fd382abd7c2a6929c1a1e6 100644 --- a/sound/pci/ctxfi/ctamixer.c +++ b/sound/pci/ctxfi/ctamixer.c @@ -23,16 +23,15 @@ #define BLANK_SLOT 4094 -static int amixer_master(struct rsc *rsc) +static void amixer_master(struct rsc *rsc) { rsc->conj = 0; - return rsc->idx = container_of(rsc, struct amixer, rsc)->idx[0]; + rsc->idx = container_of(rsc, struct amixer, rsc)->idx[0]; } -static int amixer_next_conj(struct rsc *rsc) +static void amixer_next_conj(struct rsc *rsc) { rsc->conj++; - return container_of(rsc, struct amixer, rsc)->idx[rsc->conj]; } static int amixer_index(const struct rsc *rsc) @@ -331,16 +330,15 @@ int amixer_mgr_destroy(struct amixer_mgr *amixer_mgr) /* SUM resource management */ -static int sum_master(struct rsc *rsc) +static void sum_master(struct rsc *rsc) { rsc->conj = 0; - return rsc->idx = container_of(rsc, struct sum, rsc)->idx[0]; + rsc->idx = container_of(rsc, struct sum, rsc)->idx[0]; } -static int sum_next_conj(struct rsc *rsc) +static void sum_next_conj(struct rsc *rsc) { rsc->conj++; - return container_of(rsc, struct sum, rsc)->idx[rsc->conj]; } static int sum_index(const struct rsc *rsc) diff --git a/sound/pci/ctxfi/ctdaio.c b/sound/pci/ctxfi/ctdaio.c index 4cb47b5a792c54d5a1c1f3bcc149443dd6a8fad3..aae544dff88684d590ef36f8e57122de194cb5d5 100644 --- a/sound/pci/ctxfi/ctdaio.c +++ b/sound/pci/ctxfi/ctdaio.c @@ -51,12 +51,12 @@ static const struct daio_rsc_idx idx_20k2[NUM_DAIOTYP] = { [SPDIFIO] = {.left = 0x05, .right = 0x85}, }; -static int daio_master(struct rsc *rsc) +static void daio_master(struct rsc *rsc) { /* Actually, this is not the resource index of DAIO. * For DAO, it is the input mapper index. And, for DAI, * it is the output time-slot index. */ - return rsc->conj = rsc->idx; + rsc->conj = rsc->idx; } static int daio_index(const struct rsc *rsc) @@ -64,19 +64,19 @@ static int daio_index(const struct rsc *rsc) return rsc->conj; } -static int daio_out_next_conj(struct rsc *rsc) +static void daio_out_next_conj(struct rsc *rsc) { - return rsc->conj += 2; + rsc->conj += 2; } -static int daio_in_next_conj_20k1(struct rsc *rsc) +static void daio_in_next_conj_20k1(struct rsc *rsc) { - return rsc->conj += 0x200; + rsc->conj += 0x200; } -static int daio_in_next_conj_20k2(struct rsc *rsc) +static void daio_in_next_conj_20k2(struct rsc *rsc) { - return rsc->conj += 0x100; + rsc->conj += 0x100; } static const struct rsc_ops daio_out_rsc_ops = { diff --git a/sound/pci/ctxfi/ctresource.c b/sound/pci/ctxfi/ctresource.c index 61e51e35ba168547d000e99b19769ee170a4d5fa..edf9d9ef9b8489b3e6a1061b5d49283843e2fda0 100644 --- a/sound/pci/ctxfi/ctresource.c +++ b/sound/pci/ctxfi/ctresource.c @@ -109,18 +109,17 @@ static int audio_ring_slot(const struct rsc *rsc) return (rsc->conj << 4) + offset_in_audio_slot_block[rsc->type]; } -static int rsc_next_conj(struct rsc *rsc) +static void rsc_next_conj(struct rsc *rsc) { unsigned int i; for (i = 0; (i < 8) && (!(rsc->msr & (0x1 << i))); ) i++; rsc->conj += (AUDIO_SLOT_BLOCK_NUM >> i); - return rsc->conj; } -static int rsc_master(struct rsc *rsc) +static void rsc_master(struct rsc *rsc) { - return rsc->conj = rsc->idx; + rsc->conj = rsc->idx; } static const struct rsc_ops rsc_generic_ops = { diff --git a/sound/pci/ctxfi/ctresource.h b/sound/pci/ctxfi/ctresource.h index 93e47488a1c1c6fa175a121bcd1addfe327f650b..92146054af582512945ed0a28d25b4e9a71e5d02 100644 --- a/sound/pci/ctxfi/ctresource.h +++ b/sound/pci/ctxfi/ctresource.h @@ -39,8 +39,8 @@ struct rsc { }; struct rsc_ops { - int (*master)(struct rsc *rsc); /* Move to master resource */ - int (*next_conj)(struct rsc *rsc); /* Move to next conjugate resource */ + void (*master)(struct rsc *rsc); /* Move to master resource */ + void (*next_conj)(struct rsc *rsc); /* Move to next conjugate resource */ int (*index)(const struct rsc *rsc); /* Return the index of resource */ /* Return the output slot number */ int (*output_slot)(const struct rsc *rsc); diff --git a/sound/pci/ctxfi/ctsrc.c b/sound/pci/ctxfi/ctsrc.c index 37c18ce84974ab549af2a4fd60b859d09b3c3970..7d2bda0c3d3de771fa1a1ea67d45a23e0cdc8235 100644 --- a/sound/pci/ctxfi/ctsrc.c +++ b/sound/pci/ctxfi/ctsrc.c @@ -590,16 +590,15 @@ int src_mgr_destroy(struct src_mgr *src_mgr) /* SRCIMP resource manager operations */ -static int srcimp_master(struct rsc *rsc) +static void srcimp_master(struct rsc *rsc) { rsc->conj = 0; - return rsc->idx = container_of(rsc, struct srcimp, rsc)->idx[0]; + rsc->idx = container_of(rsc, struct srcimp, rsc)->idx[0]; } -static int srcimp_next_conj(struct rsc *rsc) +static void srcimp_next_conj(struct rsc *rsc) { rsc->conj++; - return container_of(rsc, struct srcimp, rsc)->idx[rsc->conj]; } static int srcimp_index(const struct rsc *rsc) diff --git a/sound/pci/hda/hda_bind.c b/sound/pci/hda/hda_bind.c index 17a25e453f60cc31cfdf5fca04d1369c400e235c..4efbcc41fdfb7431a3fc07124083784476399d55 100644 --- a/sound/pci/hda/hda_bind.c +++ b/sound/pci/hda/hda_bind.c @@ -301,29 +301,31 @@ int snd_hda_codec_configure(struct hda_codec *codec) { int err; + if (codec->configured) + return 0; + if (is_generic_config(codec)) codec->probe_id = HDA_CODEC_ID_GENERIC; else codec->probe_id = 0; - err = snd_hdac_device_register(&codec->core); - if (err < 0) - return err; + if (!device_is_registered(&codec->core.dev)) { + err = snd_hdac_device_register(&codec->core); + if (err < 0) + return err; + } if (!codec->preset) codec_bind_module(codec); if (!codec->preset) { err = codec_bind_generic(codec); if (err < 0) { - codec_err(codec, "Unable to bind the codec\n"); - goto error; + codec_dbg(codec, "Unable to bind the codec\n"); + return err; } } + codec->configured = 1; return 0; - - error: - snd_hdac_device_unregister(&codec->core); - return err; } EXPORT_SYMBOL_GPL(snd_hda_codec_configure); diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c index 4cec1bd77e6fe66c4d5d75ba08ef5cdaf0223184..39281106477ebf2df18b9ecd273c272076bc3783 100644 --- a/sound/pci/hda/hda_codec.c +++ b/sound/pci/hda/hda_codec.c @@ -791,6 +791,7 @@ void snd_hda_codec_cleanup_for_unbind(struct hda_codec *codec) snd_array_free(&codec->nids); remove_conn_list(codec); snd_hdac_regmap_exit(&codec->core); + codec->configured = 0; } EXPORT_SYMBOL_GPL(snd_hda_codec_cleanup_for_unbind); @@ -1726,8 +1727,11 @@ void snd_hda_ctls_clear(struct hda_codec *codec) { int i; struct hda_nid_item *items = codec->mixers.list; + + down_write(&codec->card->controls_rwsem); for (i = 0; i < codec->mixers.used; i++) snd_ctl_remove(codec->card, items[i].kctl); + up_write(&codec->card->controls_rwsem); snd_array_free(&codec->mixers); snd_array_free(&codec->nids); } diff --git a/sound/pci/hda/hda_controller.c b/sound/pci/hda/hda_controller.c index b972d59eb1ec23a9eab333c07fa5d868294d1bd3..3de7dc34def2437f9a2f56dc3f539a24b07023c4 100644 --- a/sound/pci/hda/hda_controller.c +++ b/sound/pci/hda/hda_controller.c @@ -25,6 +25,7 @@ #include #include #include "hda_controller.h" +#include "hda_local.h" #define CREATE_TRACE_POINTS #include "hda_controller_trace.h" @@ -1259,17 +1260,24 @@ EXPORT_SYMBOL_GPL(azx_probe_codecs); int azx_codec_configure(struct azx *chip) { struct hda_codec *codec, *next; + int success = 0; - /* use _safe version here since snd_hda_codec_configure() deregisters - * the device upon error and deletes itself from the bus list. - */ - list_for_each_codec_safe(codec, next, &chip->bus) { - snd_hda_codec_configure(codec); + list_for_each_codec(codec, &chip->bus) { + if (!snd_hda_codec_configure(codec)) + success++; } - if (!azx_bus(chip)->num_codecs) - return -ENODEV; - return 0; + if (success) { + /* unregister failed codecs if any codec has been probed */ + list_for_each_codec_safe(codec, next, &chip->bus) { + if (!codec->configured) { + codec_err(codec, "Unable to configure, disabling\n"); + snd_hdac_device_unregister(&codec->core); + } + } + } + + return success ? 0 : -ENODEV; } EXPORT_SYMBOL_GPL(azx_codec_configure); diff --git a/sound/pci/hda/hda_controller.h b/sound/pci/hda/hda_controller.h index 68f9668788ea2d469041390d79f9c5d1d52e895d..324cba13c7bacd2c4349ef18e50ddbeb5422bc32 100644 --- a/sound/pci/hda/hda_controller.h +++ b/sound/pci/hda/hda_controller.h @@ -41,7 +41,7 @@ /* 24 unused */ #define AZX_DCAPS_COUNT_LPIB_DELAY (1 << 25) /* Take LPIB as delay */ #define AZX_DCAPS_PM_RUNTIME (1 << 26) /* runtime PM support */ -/* 27 unused */ +#define AZX_DCAPS_RETRY_PROBE (1 << 27) /* retry probe if no codec is configured */ #define AZX_DCAPS_CORBRP_SELF_CLEAR (1 << 28) /* CORBRP clears itself after reset */ #define AZX_DCAPS_NO_MSI64 (1 << 29) /* Stick to 32-bit MSIs */ #define AZX_DCAPS_SEPARATE_STREAM_TAG (1 << 30) /* capture and playback use separate stream tag */ diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c index 323df011b94a3de5188ea3b1f1d4a3975b3657d9..8ee3be7bbd24e7676231eed11ede07ee4bdebd23 100644 --- a/sound/pci/hda/hda_generic.c +++ b/sound/pci/hda/hda_generic.c @@ -91,6 +91,12 @@ static void snd_hda_gen_spec_free(struct hda_gen_spec *spec) free_kctls(spec); snd_array_free(&spec->paths); snd_array_free(&spec->loopback_list); +#ifdef CONFIG_SND_HDA_GENERIC_LEDS + if (spec->led_cdevs[LED_AUDIO_MUTE]) + led_classdev_unregister(spec->led_cdevs[LED_AUDIO_MUTE]); + if (spec->led_cdevs[LED_AUDIO_MICMUTE]) + led_classdev_unregister(spec->led_cdevs[LED_AUDIO_MICMUTE]); +#endif } /* @@ -3911,7 +3917,10 @@ static int create_mute_led_cdev(struct hda_codec *codec, enum led_brightness), bool micmute) { + struct hda_gen_spec *spec = codec->spec; struct led_classdev *cdev; + int idx = micmute ? LED_AUDIO_MICMUTE : LED_AUDIO_MUTE; + int err; cdev = devm_kzalloc(&codec->core.dev, sizeof(*cdev), GFP_KERNEL); if (!cdev) @@ -3921,10 +3930,14 @@ static int create_mute_led_cdev(struct hda_codec *codec, cdev->max_brightness = 1; cdev->default_trigger = micmute ? "audio-micmute" : "audio-mute"; cdev->brightness_set_blocking = callback; - cdev->brightness = ledtrig_audio_get(micmute ? LED_AUDIO_MICMUTE : LED_AUDIO_MUTE); + cdev->brightness = ledtrig_audio_get(idx); cdev->flags = LED_CORE_SUSPENDRESUME; - return devm_led_classdev_register(&codec->core.dev, cdev); + err = led_classdev_register(&codec->core.dev, cdev); + if (err < 0) + return err; + spec->led_cdevs[idx] = cdev; + return 0; } static void vmaster_update_mute_led(void *private_data, int enabled) diff --git a/sound/pci/hda/hda_generic.h b/sound/pci/hda/hda_generic.h index 0886bc81f40be64901b50d6db1a21145891dc810..578faa9adcdcdba43563efddd5fc9e40856c6cb7 100644 --- a/sound/pci/hda/hda_generic.h +++ b/sound/pci/hda/hda_generic.h @@ -305,6 +305,9 @@ struct hda_gen_spec { struct hda_jack_callback *cb); void (*mic_autoswitch_hook)(struct hda_codec *codec, struct hda_jack_callback *cb); + + /* leds */ + struct led_classdev *led_cdevs[NUM_AUDIO_LEDS]; }; /* values for add_stereo_mix_input flag */ diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 4c8b281c39921cbba025450742b92a1a2a078c63..600ea241ead79630bdfcd91da02be4259e401140 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -341,7 +341,8 @@ enum { /* quirks for AMD SB */ #define AZX_DCAPS_PRESET_AMD_SB \ (AZX_DCAPS_NO_TCSEL | AZX_DCAPS_AMD_WORKAROUND |\ - AZX_DCAPS_SNOOP_TYPE(ATI) | AZX_DCAPS_PM_RUNTIME) + AZX_DCAPS_SNOOP_TYPE(ATI) | AZX_DCAPS_PM_RUNTIME |\ + AZX_DCAPS_RETRY_PROBE) /* quirks for Nvidia */ #define AZX_DCAPS_PRESET_NVIDIA \ @@ -368,7 +369,10 @@ enum { ((pci)->device == 0x0c0c) || \ ((pci)->device == 0x0d0c) || \ ((pci)->device == 0x160c) || \ - ((pci)->device == 0x490d)) + ((pci)->device == 0x490d) || \ + ((pci)->device == 0x4f90) || \ + ((pci)->device == 0x4f91) || \ + ((pci)->device == 0x4f92)) #define IS_BXT(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x5a98) @@ -671,13 +675,17 @@ static int azx_position_check(struct azx *chip, struct azx_dev *azx_dev) * the update-IRQ timing. The IRQ is issued before actually the * data is processed. So, we need to process it afterwords in a * workqueue. + * + * Returns 1 if OK to proceed, 0 for delay handling, -1 for skipping update */ static int azx_position_ok(struct azx *chip, struct azx_dev *azx_dev) { struct snd_pcm_substream *substream = azx_dev->core.substream; + struct snd_pcm_runtime *runtime = substream->runtime; int stream = substream->stream; u32 wallclk; unsigned int pos; + snd_pcm_uframes_t hwptr, target; wallclk = azx_readl(chip, WALLCLK) - azx_dev->core.start_wallclk; if (wallclk < (azx_dev->core.period_wallclk * 2) / 3) @@ -714,6 +722,24 @@ static int azx_position_ok(struct azx *chip, struct azx_dev *azx_dev) /* NG - it's below the first next period boundary */ return chip->bdl_pos_adj ? 0 : -1; azx_dev->core.start_wallclk += wallclk; + + if (azx_dev->core.no_period_wakeup) + return 1; /* OK, no need to check period boundary */ + + if (runtime->hw_ptr_base != runtime->hw_ptr_interrupt) + return 1; /* OK, already in hwptr updating process */ + + /* check whether the period gets really elapsed */ + pos = bytes_to_frames(runtime, pos); + hwptr = runtime->hw_ptr_base + pos; + if (hwptr < runtime->status->hw_ptr) + hwptr += runtime->buffer_size; + target = runtime->hw_ptr_interrupt + runtime->period_size; + if (hwptr < target) { + /* too early wakeup, process it later */ + return chip->bdl_pos_adj ? 0 : -1; + } + return 1; /* OK, it's fine */ } @@ -892,35 +918,24 @@ static int azx_get_delay_from_fifo(struct azx *chip, struct azx_dev *azx_dev, return substream->runtime->delay; } -static unsigned int azx_skl_get_dpib_pos(struct azx *chip, - struct azx_dev *azx_dev) +static void __azx_shutdown_chip(struct azx *chip, bool skip_link_reset) { - return _snd_hdac_chip_readl(azx_bus(chip), - AZX_REG_VS_SDXDPIB_XBASE + - (AZX_REG_VS_SDXDPIB_XINTERVAL * - azx_dev->core.index)); -} - -/* get the current DMA position with correction on SKL+ chips */ -static unsigned int azx_get_pos_skl(struct azx *chip, struct azx_dev *azx_dev) -{ - /* DPIB register gives a more accurate position for playback */ - if (azx_dev->core.substream->stream == SNDRV_PCM_STREAM_PLAYBACK) - return azx_skl_get_dpib_pos(chip, azx_dev); - - /* For capture, we need to read posbuf, but it requires a delay - * for the possible boundary overlap; the read of DPIB fetches the - * actual posbuf - */ - udelay(20); - azx_skl_get_dpib_pos(chip, azx_dev); - return azx_get_pos_posbuf(chip, azx_dev); + azx_stop_chip(chip); + if (!skip_link_reset) + azx_enter_link_reset(chip); + azx_clear_irq_pending(chip); + display_power(chip, false); } #ifdef CONFIG_PM static DEFINE_MUTEX(card_list_lock); static LIST_HEAD(card_list); +static void azx_shutdown_chip(struct azx *chip) +{ + __azx_shutdown_chip(chip, false); +} + static void azx_add_card_list(struct azx *chip) { struct hda_intel *hda = container_of(chip, struct hda_intel, chip); @@ -976,14 +991,6 @@ static bool azx_is_pm_ready(struct snd_card *card) return true; } -static void __azx_runtime_suspend(struct azx *chip) -{ - azx_stop_chip(chip); - azx_enter_link_reset(chip); - azx_clear_irq_pending(chip); - display_power(chip, false); -} - static void __azx_runtime_resume(struct azx *chip) { struct hda_intel *hda = container_of(chip, struct hda_intel, chip); @@ -1062,7 +1069,7 @@ static int azx_suspend(struct device *dev) chip = card->private_data; bus = azx_bus(chip); - __azx_runtime_suspend(chip); + azx_shutdown_chip(chip); if (bus->irq >= 0) { free_irq(bus->irq, chip); bus->irq = -1; @@ -1141,7 +1148,7 @@ static int azx_runtime_suspend(struct device *dev) /* enable controller wake up event */ azx_writew(chip, WAKEEN, azx_readw(chip, WAKEEN) | STATESTS_INT_MASK); - __azx_runtime_suspend(chip); + azx_shutdown_chip(chip); trace_azx_runtime_suspend(chip); return 0; } @@ -1607,7 +1614,7 @@ static void assign_position_fix(struct azx *chip, int fix) [POS_FIX_POSBUF] = azx_get_pos_posbuf, [POS_FIX_VIACOMBO] = azx_via_get_position, [POS_FIX_COMBO] = azx_get_pos_lpib, - [POS_FIX_SKL] = azx_get_pos_skl, + [POS_FIX_SKL] = azx_get_pos_posbuf, [POS_FIX_FIFO] = azx_get_pos_fifo, }; @@ -1645,6 +1652,7 @@ static const struct snd_pci_quirk probe_mask_list[] = { /* forced codec slots */ SND_PCI_QUIRK(0x1043, 0x1262, "ASUS W5Fm", 0x103), SND_PCI_QUIRK(0x1046, 0x1262, "ASUS W5F", 0x103), + SND_PCI_QUIRK(0x1558, 0x0351, "Schenker Dock 15", 0x105), /* WinFast VP200 H (Teradici) user reported broken communication */ SND_PCI_QUIRK(0x3a21, 0x040d, "WinFast VP200 H", 0x101), {} @@ -1758,7 +1766,7 @@ static void azx_check_snoop_available(struct azx *chip) static void azx_probe_work(struct work_struct *work) { - struct hda_intel *hda = container_of(work, struct hda_intel, probe_work); + struct hda_intel *hda = container_of(work, struct hda_intel, probe_work.work); azx_probe_continue(&hda->chip); } @@ -1830,8 +1838,6 @@ static int azx_create(struct snd_card *card, struct pci_dev *pci, assign_position_fix(chip, check_position_fix(chip, position_fix[dev])); - check_probe_mask(chip, dev); - if (single_cmd < 0) /* allow fallback to single_cmd at errors */ chip->fallback_to_single_cmd = 1; else /* explicitly set to single_cmd or not */ @@ -1859,6 +1865,8 @@ static int azx_create(struct snd_card *card, struct pci_dev *pci, chip->bus.core.needs_damn_long_delay = 1; } + check_probe_mask(chip, dev); + err = snd_device_new(card, SNDRV_DEV_LOWLEVEL, chip, &ops); if (err < 0) { dev_err(card->dev, "Error creating device [card]!\n"); @@ -1867,7 +1875,7 @@ static int azx_create(struct snd_card *card, struct pci_dev *pci, } /* continue probing in work context as may trigger request module */ - INIT_WORK(&hda->probe_work, azx_probe_work); + INIT_DELAYED_WORK(&hda->probe_work, azx_probe_work); *rchip = chip; @@ -2202,7 +2210,7 @@ static int azx_probe(struct pci_dev *pci, #endif if (schedule_probe) - schedule_work(&hda->probe_work); + schedule_delayed_work(&hda->probe_work, 0); dev++; if (chip->disabled) @@ -2290,6 +2298,11 @@ static int azx_probe_continue(struct azx *chip) int dev = chip->dev_index; int err; + if (chip->disabled || hda->init_failed) + return -EIO; + if (hda->probe_retry) + goto probe_retry; + to_hda_bus(bus)->bus_probing = 1; hda->probe_continued = 1; @@ -2351,10 +2364,20 @@ static int azx_probe_continue(struct azx *chip) #endif } #endif + + probe_retry: if (bus->codec_mask && !(probe_only[dev] & 1)) { err = azx_codec_configure(chip); - if (err < 0) + if (err) { + if ((chip->driver_caps & AZX_DCAPS_RETRY_PROBE) && + ++hda->probe_retry < 60) { + schedule_delayed_work(&hda->probe_work, + msecs_to_jiffies(1000)); + return 0; /* keep things up */ + } + dev_err(chip->card->dev, "Cannot probe codecs, giving up\n"); goto out_free; + } } err = snd_card_register(chip->card); @@ -2376,7 +2399,8 @@ static int azx_probe_continue(struct azx *chip) out_free: if (err < 0) { - azx_free(chip); + pci_set_drvdata(pci, NULL); + snd_card_free(chip->card); return err; } @@ -2384,6 +2408,7 @@ out_free: display_power(chip, false); complete_all(&hda->probe_wait); to_hda_bus(bus)->bus_probing = 0; + hda->probe_retry = 0; return 0; } @@ -2409,7 +2434,7 @@ static void azx_remove(struct pci_dev *pci) * device during cancel_work_sync() call. */ device_unlock(&pci->dev); - cancel_work_sync(&hda->probe_work); + cancel_delayed_work_sync(&hda->probe_work); device_lock(&pci->dev); snd_card_free(card); @@ -2425,7 +2450,7 @@ static void azx_shutdown(struct pci_dev *pci) return; chip = card->private_data; if (chip && chip->running) - azx_stop_chip(chip); + __azx_shutdown_chip(chip, true); } /* PCI IDs */ @@ -2519,6 +2544,13 @@ static const struct pci_device_id azx_ids[] = { /* DG1 */ { PCI_DEVICE(0x8086, 0x490d), .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE}, + /* DG2 */ + { PCI_DEVICE(0x8086, 0x4f90), + .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE}, + { PCI_DEVICE(0x8086, 0x4f91), + .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE}, + { PCI_DEVICE(0x8086, 0x4f92), + .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE}, /* Alderlake-S */ { PCI_DEVICE(0x8086, 0x7ad0), .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE}, diff --git a/sound/pci/hda/hda_intel.h b/sound/pci/hda/hda_intel.h index 3fb119f090408865e2a380292807469b9bd8d369..0f39418f9328b08e7c34c2db540eed30f734100c 100644 --- a/sound/pci/hda/hda_intel.h +++ b/sound/pci/hda/hda_intel.h @@ -14,7 +14,7 @@ struct hda_intel { /* sync probing */ struct completion probe_wait; - struct work_struct probe_work; + struct delayed_work probe_work; /* card list (for power_save trigger) */ struct list_head list; @@ -30,6 +30,8 @@ struct hda_intel { unsigned int freed:1; /* resources already released */ bool need_i915_power:1; /* the hda controller needs i915 power */ + + int probe_retry; /* being probe-retry */ }; #endif diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index c65144715af782b6aac6bd6154b6f7f07638a91c..fe725f0f093124d8f9bbe867a7034d4c796a04a8 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -4362,10 +4362,11 @@ HDA_CODEC_ENTRY(0x8086280f, "Icelake HDMI", patch_i915_icl_hdmi), HDA_CODEC_ENTRY(0x80862812, "Tigerlake HDMI", patch_i915_tgl_hdmi), HDA_CODEC_ENTRY(0x80862814, "DG1 HDMI", patch_i915_tgl_hdmi), HDA_CODEC_ENTRY(0x80862815, "Alderlake HDMI", patch_i915_tgl_hdmi), -HDA_CODEC_ENTRY(0x8086281c, "Alderlake-P HDMI", patch_i915_tgl_hdmi), HDA_CODEC_ENTRY(0x80862816, "Rocketlake HDMI", patch_i915_tgl_hdmi), +HDA_CODEC_ENTRY(0x80862819, "DG2 HDMI", patch_i915_tgl_hdmi), HDA_CODEC_ENTRY(0x8086281a, "Jasperlake HDMI", patch_i915_icl_hdmi), HDA_CODEC_ENTRY(0x8086281b, "Elkhartlake HDMI", patch_i915_icl_hdmi), +HDA_CODEC_ENTRY(0x8086281c, "Alderlake-P HDMI", patch_i915_tgl_hdmi), HDA_CODEC_ENTRY(0x80862880, "CedarTrail HDMI", patch_generic_hdmi), HDA_CODEC_ENTRY(0x80862882, "Valleyview2 HDMI", patch_i915_byt_hdmi), HDA_CODEC_ENTRY(0x80862883, "Braswell HDMI", patch_i915_byt_hdmi), diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index f47f639980dbba166ca8700f50ec17396c94b7db..ed0cfcb05ef0d4ee0243a1ed4d47ea17a494313f 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -97,6 +97,7 @@ struct alc_spec { unsigned int gpio_mic_led_mask; struct alc_coef_led mute_led_coef; struct alc_coef_led mic_led_coef; + struct mutex coef_mutex; hda_nid_t headset_mic_pin; hda_nid_t headphone_mic_pin; @@ -133,8 +134,24 @@ struct alc_spec { * COEF access helper functions */ -static int alc_read_coefex_idx(struct hda_codec *codec, hda_nid_t nid, - unsigned int coef_idx) +static void coef_mutex_lock(struct hda_codec *codec) +{ + struct alc_spec *spec = codec->spec; + + snd_hda_power_up_pm(codec); + mutex_lock(&spec->coef_mutex); +} + +static void coef_mutex_unlock(struct hda_codec *codec) +{ + struct alc_spec *spec = codec->spec; + + mutex_unlock(&spec->coef_mutex); + snd_hda_power_down_pm(codec); +} + +static int __alc_read_coefex_idx(struct hda_codec *codec, hda_nid_t nid, + unsigned int coef_idx) { unsigned int val; @@ -143,28 +160,56 @@ static int alc_read_coefex_idx(struct hda_codec *codec, hda_nid_t nid, return val; } +static int alc_read_coefex_idx(struct hda_codec *codec, hda_nid_t nid, + unsigned int coef_idx) +{ + unsigned int val; + + coef_mutex_lock(codec); + val = __alc_read_coefex_idx(codec, nid, coef_idx); + coef_mutex_unlock(codec); + return val; +} + #define alc_read_coef_idx(codec, coef_idx) \ alc_read_coefex_idx(codec, 0x20, coef_idx) -static void alc_write_coefex_idx(struct hda_codec *codec, hda_nid_t nid, - unsigned int coef_idx, unsigned int coef_val) +static void __alc_write_coefex_idx(struct hda_codec *codec, hda_nid_t nid, + unsigned int coef_idx, unsigned int coef_val) { snd_hda_codec_write(codec, nid, 0, AC_VERB_SET_COEF_INDEX, coef_idx); snd_hda_codec_write(codec, nid, 0, AC_VERB_SET_PROC_COEF, coef_val); } +static void alc_write_coefex_idx(struct hda_codec *codec, hda_nid_t nid, + unsigned int coef_idx, unsigned int coef_val) +{ + coef_mutex_lock(codec); + __alc_write_coefex_idx(codec, nid, coef_idx, coef_val); + coef_mutex_unlock(codec); +} + #define alc_write_coef_idx(codec, coef_idx, coef_val) \ alc_write_coefex_idx(codec, 0x20, coef_idx, coef_val) +static void __alc_update_coefex_idx(struct hda_codec *codec, hda_nid_t nid, + unsigned int coef_idx, unsigned int mask, + unsigned int bits_set) +{ + unsigned int val = __alc_read_coefex_idx(codec, nid, coef_idx); + + if (val != -1) + __alc_write_coefex_idx(codec, nid, coef_idx, + (val & ~mask) | bits_set); +} + static void alc_update_coefex_idx(struct hda_codec *codec, hda_nid_t nid, unsigned int coef_idx, unsigned int mask, unsigned int bits_set) { - unsigned int val = alc_read_coefex_idx(codec, nid, coef_idx); - - if (val != -1) - alc_write_coefex_idx(codec, nid, coef_idx, - (val & ~mask) | bits_set); + coef_mutex_lock(codec); + __alc_update_coefex_idx(codec, nid, coef_idx, mask, bits_set); + coef_mutex_unlock(codec); } #define alc_update_coef_idx(codec, coef_idx, mask, bits_set) \ @@ -197,13 +242,15 @@ struct coef_fw { static void alc_process_coef_fw(struct hda_codec *codec, const struct coef_fw *fw) { + coef_mutex_lock(codec); for (; fw->nid; fw++) { if (fw->mask == (unsigned short)-1) - alc_write_coefex_idx(codec, fw->nid, fw->idx, fw->val); + __alc_write_coefex_idx(codec, fw->nid, fw->idx, fw->val); else - alc_update_coefex_idx(codec, fw->nid, fw->idx, - fw->mask, fw->val); + __alc_update_coefex_idx(codec, fw->nid, fw->idx, + fw->mask, fw->val); } + coef_mutex_unlock(codec); } /* @@ -527,6 +574,8 @@ static void alc_shutup_pins(struct hda_codec *codec) struct alc_spec *spec = codec->spec; switch (codec->core.vendor_id) { + case 0x10ec0236: + case 0x10ec0256: case 0x10ec0283: case 0x10ec0286: case 0x10ec0288: @@ -1158,6 +1207,7 @@ static int alc_alloc_spec(struct hda_codec *codec, hda_nid_t mixer_nid) codec->spdif_status_reset = 1; codec->forced_resume = 1; codec->patch_ops = alc_patch_ops; + mutex_init(&spec->coef_mutex); err = alc_codec_rename_from_preset(codec); if (err < 0) { @@ -1934,6 +1984,7 @@ enum { ALC887_FIXUP_ASUS_BASS, ALC887_FIXUP_BASS_CHMAP, ALC1220_FIXUP_GB_DUAL_CODECS, + ALC1220_FIXUP_GB_X570, ALC1220_FIXUP_CLEVO_P950, ALC1220_FIXUP_CLEVO_PB51ED, ALC1220_FIXUP_CLEVO_PB51ED_PINS, @@ -2123,6 +2174,30 @@ static void alc1220_fixup_gb_dual_codecs(struct hda_codec *codec, } } +static void alc1220_fixup_gb_x570(struct hda_codec *codec, + const struct hda_fixup *fix, + int action) +{ + static const hda_nid_t conn1[] = { 0x0c }; + static const struct coef_fw gb_x570_coefs[] = { + WRITE_COEF(0x07, 0x03c0), + WRITE_COEF(0x1a, 0x01c1), + WRITE_COEF(0x1b, 0x0202), + WRITE_COEF(0x43, 0x3005), + {} + }; + + switch (action) { + case HDA_FIXUP_ACT_PRE_PROBE: + snd_hda_override_conn_list(codec, 0x14, ARRAY_SIZE(conn1), conn1); + snd_hda_override_conn_list(codec, 0x1b, ARRAY_SIZE(conn1), conn1); + break; + case HDA_FIXUP_ACT_INIT: + alc_process_coef_fw(codec, gb_x570_coefs); + break; + } +} + static void alc1220_fixup_clevo_p950(struct hda_codec *codec, const struct hda_fixup *fix, int action) @@ -2425,6 +2500,10 @@ static const struct hda_fixup alc882_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc1220_fixup_gb_dual_codecs, }, + [ALC1220_FIXUP_GB_X570] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc1220_fixup_gb_x570, + }, [ALC1220_FIXUP_CLEVO_P950] = { .type = HDA_FIXUP_FUNC, .v.func = alc1220_fixup_clevo_p950, @@ -2527,8 +2606,9 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = { SND_PCI_QUIRK(0x13fe, 0x1009, "Advantech MIT-W101", ALC886_FIXUP_EAPD), SND_PCI_QUIRK(0x1458, 0xa002, "Gigabyte EP45-DS3/Z87X-UD3H", ALC889_FIXUP_FRONT_HP_NO_PRESENCE), SND_PCI_QUIRK(0x1458, 0xa0b8, "Gigabyte AZ370-Gaming", ALC1220_FIXUP_GB_DUAL_CODECS), - SND_PCI_QUIRK(0x1458, 0xa0cd, "Gigabyte X570 Aorus Master", ALC1220_FIXUP_CLEVO_P950), - SND_PCI_QUIRK(0x1458, 0xa0ce, "Gigabyte X570 Aorus Xtreme", ALC1220_FIXUP_CLEVO_P950), + SND_PCI_QUIRK(0x1458, 0xa0cd, "Gigabyte X570 Aorus Master", ALC1220_FIXUP_GB_X570), + SND_PCI_QUIRK(0x1458, 0xa0ce, "Gigabyte X570 Aorus Xtreme", ALC1220_FIXUP_GB_X570), + SND_PCI_QUIRK(0x1458, 0xa0d5, "Gigabyte X570S Aorus Master", ALC1220_FIXUP_GB_X570), SND_PCI_QUIRK(0x1462, 0x11f7, "MSI-GE63", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1462, 0x1228, "MSI-GP63", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1462, 0x1229, "MSI-GP73", ALC1220_FIXUP_CLEVO_P950), @@ -2545,11 +2625,14 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = { SND_PCI_QUIRK(0x1558, 0x65d2, "Clevo PB51R[CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x65e1, "Clevo PB51[ED][DF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x65e5, "Clevo PC50D[PRS](?:-D|-G)?", ALC1220_FIXUP_CLEVO_PB51ED_PINS), + SND_PCI_QUIRK(0x1558, 0x65f1, "Clevo PC50HS", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x67d1, "Clevo PB71[ER][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x67e1, "Clevo PB71[DE][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x67e5, "Clevo PC70D[PRS](?:-D|-G)?", ALC1220_FIXUP_CLEVO_PB51ED_PINS), + SND_PCI_QUIRK(0x1558, 0x67f1, "Clevo PC70H[PRS]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), SND_PCI_QUIRK(0x1558, 0x70d1, "Clevo PC70[ER][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS), - SND_PCI_QUIRK(0x1558, 0x7714, "Clevo X170", ALC1220_FIXUP_CLEVO_PB51ED_PINS), + SND_PCI_QUIRK(0x1558, 0x7714, "Clevo X170SM", ALC1220_FIXUP_CLEVO_PB51ED_PINS), + SND_PCI_QUIRK(0x1558, 0x7715, "Clevo X170KM-G", ALC1220_FIXUP_CLEVO_PB51ED), SND_PCI_QUIRK(0x1558, 0x9501, "Clevo P950HR", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1558, 0x9506, "Clevo P955HQ", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1558, 0x950a, "Clevo P955H[PR]", ALC1220_FIXUP_CLEVO_P950), @@ -2600,6 +2683,7 @@ static const struct hda_model_fixup alc882_fixup_models[] = { {.id = ALC882_FIXUP_NO_PRIMARY_HP, .name = "no-primary-hp"}, {.id = ALC887_FIXUP_ASUS_BASS, .name = "asus-bass"}, {.id = ALC1220_FIXUP_GB_DUAL_CODECS, .name = "dual-codecs"}, + {.id = ALC1220_FIXUP_GB_X570, .name = "gb-x570"}, {.id = ALC1220_FIXUP_CLEVO_P950, .name = "clevo-p950"}, {} }; @@ -3531,7 +3615,8 @@ static void alc256_shutup(struct hda_codec *codec) /* If disable 3k pulldown control for alc257, the Mic detection will not work correctly * when booting with headset plugged. So skip setting it for the codec alc257 */ - if (codec->core.vendor_id != 0x10ec0257) + if (spec->codec_variant != ALC269_TYPE_ALC257 && + spec->codec_variant != ALC269_TYPE_ALC256) alc_update_coef_idx(codec, 0x46, 0, 3 << 12); if (!spec->no_shutup_pins) @@ -4295,6 +4380,16 @@ static void alc287_fixup_hp_gpio_led(struct hda_codec *codec, alc_fixup_hp_gpio_led(codec, action, 0x10, 0); } +static void alc245_fixup_hp_gpio_led(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + struct alc_spec *spec = codec->spec; + + if (action == HDA_FIXUP_ACT_PRE_PROBE) + spec->micmute_led_polarity = 1; + alc_fixup_hp_gpio_led(codec, action, 0, 0x04); +} + /* turn on/off mic-mute LED per capture hook via VREF change */ static int vref_micmute_led_set(struct led_classdev *led_cdev, enum led_brightness brightness) @@ -6347,6 +6442,44 @@ static void alc_fixup_no_int_mic(struct hda_codec *codec, } } +/* GPIO1 = amplifier on/off + * GPIO3 = mic mute LED + */ +static void alc285_fixup_hp_spectre_x360_eb1(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + static const hda_nid_t conn[] = { 0x02 }; + + struct alc_spec *spec = codec->spec; + static const struct hda_pintbl pincfgs[] = { + { 0x14, 0x90170110 }, /* front/high speakers */ + { 0x17, 0x90170130 }, /* back/bass speakers */ + { } + }; + + //enable micmute led + alc_fixup_hp_gpio_led(codec, action, 0x00, 0x04); + + switch (action) { + case HDA_FIXUP_ACT_PRE_PROBE: + spec->micmute_led_polarity = 1; + /* needed for amp of back speakers */ + spec->gpio_mask |= 0x01; + spec->gpio_dir |= 0x01; + snd_hda_apply_pincfgs(codec, pincfgs); + /* share DAC to have unified volume control */ + snd_hda_override_conn_list(codec, 0x14, ARRAY_SIZE(conn), conn); + snd_hda_override_conn_list(codec, 0x17, ARRAY_SIZE(conn), conn); + break; + case HDA_FIXUP_ACT_INIT: + /* need to toggle GPIO to enable the amp of back speakers */ + alc_update_gpio_data(codec, 0x01, true); + msleep(100); + alc_update_gpio_data(codec, 0x01, false); + break; + } +} + static void alc285_fixup_hp_spectre_x360(struct hda_codec *codec, const struct hda_fixup *fix, int action) { @@ -6375,12 +6508,86 @@ static void alc_fixup_thinkpad_acpi(struct hda_codec *codec, hda_fixup_thinkpad_acpi(codec, fix, action); } +/* Fixup for Lenovo Legion 15IMHg05 speaker output on headset removal. */ +static void alc287_fixup_legion_15imhg05_speakers(struct hda_codec *codec, + const struct hda_fixup *fix, + int action) +{ + struct alc_spec *spec = codec->spec; + + switch (action) { + case HDA_FIXUP_ACT_PRE_PROBE: + spec->gen.suppress_auto_mute = 1; + break; + } +} + /* for alc295_fixup_hp_top_speakers */ #include "hp_x360_helper.c" /* for alc285_fixup_ideapad_s740_coef() */ #include "ideapad_s740_helper.c" +static const struct coef_fw alc256_fixup_set_coef_defaults_coefs[] = { + WRITE_COEF(0x10, 0x0020), WRITE_COEF(0x24, 0x0000), + WRITE_COEF(0x26, 0x0000), WRITE_COEF(0x29, 0x3000), + WRITE_COEF(0x37, 0xfe05), WRITE_COEF(0x45, 0x5089), + {} +}; + +static void alc256_fixup_set_coef_defaults(struct hda_codec *codec, + const struct hda_fixup *fix, + int action) +{ + /* + * A certain other OS sets these coeffs to different values. On at least + * one TongFang barebone these settings might survive even a cold + * reboot. So to restore a clean slate the values are explicitly reset + * to default here. Without this, the external microphone is always in a + * plugged-in state, while the internal microphone is always in an + * unplugged state, breaking the ability to use the internal microphone. + */ + alc_process_coef_fw(codec, alc256_fixup_set_coef_defaults_coefs); +} + +static const struct coef_fw alc233_fixup_no_audio_jack_coefs[] = { + WRITE_COEF(0x1a, 0x9003), WRITE_COEF(0x1b, 0x0e2b), WRITE_COEF(0x37, 0xfe06), + WRITE_COEF(0x38, 0x4981), WRITE_COEF(0x45, 0xd489), WRITE_COEF(0x46, 0x0074), + WRITE_COEF(0x49, 0x0149), + {} +}; + +static void alc233_fixup_no_audio_jack(struct hda_codec *codec, + const struct hda_fixup *fix, + int action) +{ + /* + * The audio jack input and output is not detected on the ASRock NUC Box + * 1100 series when cold booting without this fix. Warm rebooting from a + * certain other OS makes the audio functional, as COEF settings are + * preserved in this case. This fix sets these altered COEF values as + * the default. + */ + alc_process_coef_fw(codec, alc233_fixup_no_audio_jack_coefs); +} + +static void alc256_fixup_mic_no_presence_and_resume(struct hda_codec *codec, + const struct hda_fixup *fix, + int action) +{ + /* + * The Clevo NJ51CU comes either with the ALC293 or the ALC256 codec, + * but uses the 0x8686 subproduct id in both cases. The ALC256 codec + * needs an additional quirk for sound working after suspend and resume. + */ + if (codec->core.vendor_id == 0x10ec0256) { + alc_update_coef_idx(codec, 0x10, 1<<9, 0); + snd_hda_codec_set_pincfg(codec, 0x19, 0x04a11120); + } else { + snd_hda_codec_set_pincfg(codec, 0x1a, 0x04a1113c); + } +} + enum { ALC269_FIXUP_GPIO2, ALC269_FIXUP_SONY_VAIO, @@ -6467,6 +6674,7 @@ enum { ALC269_FIXUP_HP_DOCK_GPIO_MIC1_LED, ALC280_FIXUP_HP_9480M, ALC245_FIXUP_HP_X360_AMP, + ALC285_FIXUP_HP_SPECTRE_X360_EB1, ALC288_FIXUP_DELL_HEADSET_MODE, ALC288_FIXUP_DELL1_MIC_NO_PRESENCE, ALC288_FIXUP_DELL_XPS_13, @@ -6579,6 +6787,7 @@ enum { ALC285_FIXUP_THINKPAD_NO_BASS_SPK_HEADSET_JACK, ALC287_FIXUP_HP_GPIO_LED, ALC256_FIXUP_HP_HEADSET_MIC, + ALC245_FIXUP_HP_GPIO_LED, ALC236_FIXUP_DELL_AIO_HEADSET_MIC, ALC282_FIXUP_ACER_DISABLE_LINEOUT, ALC255_FIXUP_ACER_LIMIT_INT_MIC_BOOST, @@ -6591,6 +6800,16 @@ enum { ALC623_FIXUP_LENOVO_THINKSTATION_P340, ALC255_FIXUP_ACER_HEADPHONE_AND_MIC, ALC236_FIXUP_HP_LIMIT_INT_MIC_BOOST, + ALC287_FIXUP_LEGION_15IMHG05_SPEAKERS, + ALC287_FIXUP_LEGION_15IMHG05_AUTOMUTE, + ALC287_FIXUP_YOGA7_14ITL_SPEAKERS, + ALC287_FIXUP_13S_GEN2_SPEAKERS, + ALC256_FIXUP_SET_COEF_DEFAULTS, + ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE, + ALC233_FIXUP_NO_AUDIO_JACK, + ALC256_FIXUP_MIC_NO_PRESENCE_AND_RESUME, + ALC285_FIXUP_LEGION_Y9000X_SPEAKERS, + ALC285_FIXUP_LEGION_Y9000X_AUTOMUTE, }; static const struct hda_fixup alc269_fixups[] = { @@ -7197,6 +7416,8 @@ static const struct hda_fixup alc269_fixups[] = { [ALC245_FIXUP_HP_X360_AMP] = { .type = HDA_FIXUP_FUNC, .v.func = alc245_fixup_hp_x360_amp, + .chained = true, + .chain_id = ALC245_FIXUP_HP_GPIO_LED }, [ALC288_FIXUP_DELL_HEADSET_MODE] = { .type = HDA_FIXUP_FUNC, @@ -8148,6 +8369,10 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc285_fixup_hp_spectre_x360, }, + [ALC285_FIXUP_HP_SPECTRE_X360_EB1] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc285_fixup_hp_spectre_x360_eb1 + }, [ALC287_FIXUP_IDEAPAD_BASS_SPK_AMP] = { .type = HDA_FIXUP_FUNC, .v.func = alc285_fixup_ideapad_s740_coef, @@ -8175,6 +8400,152 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF, }, + [ALC285_FIXUP_LEGION_Y9000X_SPEAKERS] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc285_fixup_ideapad_s740_coef, + .chained = true, + .chain_id = ALC285_FIXUP_LEGION_Y9000X_AUTOMUTE, + }, + [ALC285_FIXUP_LEGION_Y9000X_AUTOMUTE] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc287_fixup_legion_15imhg05_speakers, + .chained = true, + .chain_id = ALC269_FIXUP_THINKPAD_ACPI, + }, + [ALC287_FIXUP_LEGION_15IMHG05_SPEAKERS] = { + .type = HDA_FIXUP_VERBS, + //.v.verbs = legion_15imhg05_coefs, + .v.verbs = (const struct hda_verb[]) { + // set left speaker Legion 7i. + { 0x20, AC_VERB_SET_COEF_INDEX, 0x24 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0x41 }, + + { 0x20, AC_VERB_SET_COEF_INDEX, 0x26 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0xc }, + { 0x20, AC_VERB_SET_PROC_COEF, 0x0 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0x1a }, + { 0x20, AC_VERB_SET_PROC_COEF, 0xb020 }, + + { 0x20, AC_VERB_SET_COEF_INDEX, 0x26 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0x2 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0x0 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0x0 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0xb020 }, + + // set right speaker Legion 7i. + { 0x20, AC_VERB_SET_COEF_INDEX, 0x24 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0x42 }, + + { 0x20, AC_VERB_SET_COEF_INDEX, 0x26 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0xc }, + { 0x20, AC_VERB_SET_PROC_COEF, 0x0 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0x2a }, + { 0x20, AC_VERB_SET_PROC_COEF, 0xb020 }, + + { 0x20, AC_VERB_SET_COEF_INDEX, 0x26 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0x2 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0x0 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0x0 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0xb020 }, + {} + }, + .chained = true, + .chain_id = ALC287_FIXUP_LEGION_15IMHG05_AUTOMUTE, + }, + [ALC287_FIXUP_LEGION_15IMHG05_AUTOMUTE] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc287_fixup_legion_15imhg05_speakers, + .chained = true, + .chain_id = ALC269_FIXUP_HEADSET_MODE, + }, + [ALC287_FIXUP_YOGA7_14ITL_SPEAKERS] = { + .type = HDA_FIXUP_VERBS, + .v.verbs = (const struct hda_verb[]) { + // set left speaker Yoga 7i. + { 0x20, AC_VERB_SET_COEF_INDEX, 0x24 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0x41 }, + + { 0x20, AC_VERB_SET_COEF_INDEX, 0x26 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0xc }, + { 0x20, AC_VERB_SET_PROC_COEF, 0x0 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0x1a }, + { 0x20, AC_VERB_SET_PROC_COEF, 0xb020 }, + + { 0x20, AC_VERB_SET_COEF_INDEX, 0x26 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0x2 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0x0 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0x0 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0xb020 }, + + // set right speaker Yoga 7i. + { 0x20, AC_VERB_SET_COEF_INDEX, 0x24 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0x46 }, + + { 0x20, AC_VERB_SET_COEF_INDEX, 0x26 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0xc }, + { 0x20, AC_VERB_SET_PROC_COEF, 0x0 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0x2a }, + { 0x20, AC_VERB_SET_PROC_COEF, 0xb020 }, + + { 0x20, AC_VERB_SET_COEF_INDEX, 0x26 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0x2 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0x0 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0x0 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0xb020 }, + {} + }, + .chained = true, + .chain_id = ALC269_FIXUP_HEADSET_MODE, + }, + [ALC287_FIXUP_13S_GEN2_SPEAKERS] = { + .type = HDA_FIXUP_VERBS, + .v.verbs = (const struct hda_verb[]) { + { 0x20, AC_VERB_SET_COEF_INDEX, 0x24 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0x41 }, + { 0x20, AC_VERB_SET_COEF_INDEX, 0x26 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0x2 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0x0 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0x0 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0xb020 }, + { 0x20, AC_VERB_SET_COEF_INDEX, 0x24 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0x42 }, + { 0x20, AC_VERB_SET_COEF_INDEX, 0x26 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0x2 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0x0 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0x0 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0xb020 }, + {} + }, + .chained = true, + .chain_id = ALC269_FIXUP_HEADSET_MODE, + }, + [ALC256_FIXUP_SET_COEF_DEFAULTS] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc256_fixup_set_coef_defaults, + }, + [ALC245_FIXUP_HP_GPIO_LED] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc245_fixup_hp_gpio_led, + }, + [ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x19, 0x03a11120 }, /* use as headset mic, without its own jack detect */ + { } + }, + .chained = true, + .chain_id = ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC, + }, + [ALC233_FIXUP_NO_AUDIO_JACK] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc233_fixup_no_audio_jack, + }, + [ALC256_FIXUP_MIC_NO_PRESENCE_AND_RESUME] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc256_fixup_mic_no_presence_and_resume, + .chained = true, + .chain_id = ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -8211,6 +8582,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1025, 0x1308, "Acer Aspire Z24-890", ALC286_FIXUP_ACER_AIO_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x132a, "Acer TravelMate B114-21", ALC233_FIXUP_ACER_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x1330, "Acer TravelMate X514-51T", ALC255_FIXUP_ACER_HEADSET_MIC), + SND_PCI_QUIRK(0x1025, 0x141f, "Acer Spin SP513-54N", ALC255_FIXUP_ACER_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1025, 0x142b, "Acer Swift SF314-42", ALC255_FIXUP_ACER_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1025, 0x1430, "Acer TravelMate B311R-31", ALC256_FIXUP_ACER_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1025, 0x1466, "Acer Aspire A515-56", ALC255_FIXUP_ACER_HEADPHONE_AND_MIC), @@ -8266,6 +8638,9 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1028, 0x0a30, "Dell", ALC236_FIXUP_DELL_AIO_HEADSET_MIC), SND_PCI_QUIRK(0x1028, 0x0a58, "Dell", ALC255_FIXUP_DELL_HEADSET_MIC), SND_PCI_QUIRK(0x1028, 0x0a61, "Dell XPS 15 9510", ALC289_FIXUP_DUAL_SPK), + SND_PCI_QUIRK(0x1028, 0x0a62, "Dell Precision 5560", ALC289_FIXUP_DUAL_SPK), + SND_PCI_QUIRK(0x1028, 0x0a9d, "Dell Latitude 5430", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1028, 0x0a9e, "Dell Latitude 5430", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x164a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1028, 0x164b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x1586, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC2), @@ -8341,14 +8716,17 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x84da, "HP OMEN dc0019-ur", ALC295_FIXUP_HP_OMEN), SND_PCI_QUIRK(0x103c, 0x84e7, "HP Pavilion 15", ALC269_FIXUP_HP_MUTE_LED_MIC3), SND_PCI_QUIRK(0x103c, 0x8519, "HP Spectre x360 15-df0xxx", ALC285_FIXUP_HP_SPECTRE_X360), + SND_PCI_QUIRK(0x103c, 0x860f, "HP ZBook 15 G6", ALC285_FIXUP_HP_GPIO_AMP_INIT), SND_PCI_QUIRK(0x103c, 0x861f, "HP Elite Dragonfly G1", ALC285_FIXUP_HP_GPIO_AMP_INIT), SND_PCI_QUIRK(0x103c, 0x869d, "HP", ALC236_FIXUP_HP_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x86c7, "HP Envy AiO 32", ALC274_FIXUP_HP_ENVY_GPIO), SND_PCI_QUIRK(0x103c, 0x8716, "HP Elite Dragonfly G2 Notebook PC", ALC285_FIXUP_HP_GPIO_AMP_INIT), SND_PCI_QUIRK(0x103c, 0x8720, "HP EliteBook x360 1040 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_AMP_INIT), SND_PCI_QUIRK(0x103c, 0x8724, "HP EliteBook 850 G7", ALC285_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8728, "HP EliteBook 840 G7", ALC285_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8729, "HP", ALC285_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8730, "HP ProBook 445 G7", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), + SND_PCI_QUIRK(0x103c, 0x8735, "HP ProBook 435 G7", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF), SND_PCI_QUIRK(0x103c, 0x8736, "HP", ALC285_FIXUP_HP_GPIO_AMP_INIT), SND_PCI_QUIRK(0x103c, 0x8760, "HP", ALC285_FIXUP_HP_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x877a, "HP", ALC285_FIXUP_HP_MUTE_LED), @@ -8357,6 +8735,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { ALC285_FIXUP_HP_GPIO_AMP_INIT), SND_PCI_QUIRK(0x103c, 0x8783, "HP ZBook Fury 15 G7 Mobile Workstation", ALC285_FIXUP_HP_GPIO_AMP_INIT), + SND_PCI_QUIRK(0x103c, 0x8788, "HP OMEN 15", ALC285_FIXUP_HP_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x87c8, "HP", ALC287_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x87e5, "HP ProBook 440 G8 Notebook PC", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x87e7, "HP ProBook 450 G8 Notebook PC", ALC236_FIXUP_HP_GPIO_LED), @@ -8368,6 +8747,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x87f7, "HP Spectre x360 14", ALC245_FIXUP_HP_X360_AMP), SND_PCI_QUIRK(0x103c, 0x8805, "HP ProBook 650 G8 Notebook PC", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x880d, "HP EliteBook 830 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8811, "HP Spectre x360 15-eb1xxx", ALC285_FIXUP_HP_SPECTRE_X360_EB1), + SND_PCI_QUIRK(0x103c, 0x8812, "HP Spectre x360 15-eb1xxx", ALC285_FIXUP_HP_SPECTRE_X360_EB1), SND_PCI_QUIRK(0x103c, 0x8846, "HP EliteBook 850 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8847, "HP EliteBook x360 830 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x884b, "HP EliteBook 840 Aero G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED), @@ -8404,6 +8785,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x18b1, "Asus MJ401TA", ALC256_FIXUP_ASUS_HEADSET_MIC), SND_PCI_QUIRK(0x1043, 0x18f1, "Asus FX505DT", ALC256_FIXUP_ASUS_HEADSET_MIC), SND_PCI_QUIRK(0x1043, 0x194e, "ASUS UX563FD", ALC294_FIXUP_ASUS_HPE), + SND_PCI_QUIRK(0x1043, 0x1970, "ASUS UX550VE", ALC289_FIXUP_ASUS_GA401), SND_PCI_QUIRK(0x1043, 0x1982, "ASUS B1400CEPE", ALC256_FIXUP_ASUS_HPE), SND_PCI_QUIRK(0x1043, 0x19ce, "ASUS B9450FA", ALC294_FIXUP_ASUS_HPE), SND_PCI_QUIRK(0x1043, 0x19e1, "ASUS UX581LV", ALC295_FIXUP_ASUS_MIC_NO_PRESENCE), @@ -8419,6 +8801,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x1e51, "ASUS Zephyrus M15", ALC294_FIXUP_ASUS_GU502_PINS), SND_PCI_QUIRK(0x1043, 0x1e8e, "ASUS Zephyrus G15", ALC289_FIXUP_ASUS_GA401), SND_PCI_QUIRK(0x1043, 0x1f11, "ASUS Zephyrus G14", ALC289_FIXUP_ASUS_GA401), + SND_PCI_QUIRK(0x1043, 0x16b2, "ASUS GU603", ALC289_FIXUP_ASUS_GA401), SND_PCI_QUIRK(0x1043, 0x3030, "ASUS ZN270IE", ALC256_FIXUP_ASUS_AIO_GPIO2), SND_PCI_QUIRK(0x1043, 0x831a, "ASUS P901", ALC269_FIXUP_STEREO_DMIC), SND_PCI_QUIRK(0x1043, 0x834a, "ASUS S101", ALC269_FIXUP_STEREO_DMIC), @@ -8467,11 +8850,15 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1558, 0x40a1, "Clevo NL40GU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x40c1, "Clevo NL40[CZ]U", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x40d1, "Clevo NL41DU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x5015, "Clevo NH5[58]H[HJK]Q", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x5017, "Clevo NH7[79]H[HJK]Q", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x50a3, "Clevo NJ51GU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x50b3, "Clevo NK50S[BEZ]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x50b6, "Clevo NK50S5", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x50b8, "Clevo NK50SZ", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x50d5, "Clevo NP50D5", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x50e1, "Clevo NH5[58]HPQ", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x50e2, "Clevo NH7[79]HPQ", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x50f0, "Clevo NH50A[CDF]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x50f2, "Clevo NH50E[PR]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x50f3, "Clevo NH58DPQ", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), @@ -8498,7 +8885,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1558, 0x8562, "Clevo NH[5|7][0-9]RZ[Q]", ALC269_FIXUP_DMIC), SND_PCI_QUIRK(0x1558, 0x8668, "Clevo NP50B[BE]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x8680, "Clevo NJ50LU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), - SND_PCI_QUIRK(0x1558, 0x8686, "Clevo NH50[CZ]U", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x8686, "Clevo NH50[CZ]U", ALC256_FIXUP_MIC_NO_PRESENCE_AND_RESUME), SND_PCI_QUIRK(0x1558, 0x8a20, "Clevo NH55DCQ-Y", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x8a51, "Clevo NH70RCQ-Y", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x8d50, "Clevo NH55RCQ-M", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), @@ -8564,9 +8951,17 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x3176, "ThinkCentre Station", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x17aa, 0x3178, "ThinkCentre Station", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x17aa, 0x31af, "ThinkCentre Station", ALC623_FIXUP_LENOVO_THINKSTATION_P340), + SND_PCI_QUIRK(0x17aa, 0x3813, "Legion 7i 15IMHG05", ALC287_FIXUP_LEGION_15IMHG05_SPEAKERS), SND_PCI_QUIRK(0x17aa, 0x3818, "Lenovo C940", ALC298_FIXUP_LENOVO_SPK_VOLUME), + SND_PCI_QUIRK(0x17aa, 0x3819, "Lenovo 13s Gen2 ITL", ALC287_FIXUP_13S_GEN2_SPEAKERS), + SND_PCI_QUIRK(0x17aa, 0x3824, "Legion Y9000X 2020", ALC285_FIXUP_LEGION_Y9000X_SPEAKERS), SND_PCI_QUIRK(0x17aa, 0x3827, "Ideapad S740", ALC285_FIXUP_IDEAPAD_S740_COEF), + SND_PCI_QUIRK(0x17aa, 0x3834, "Lenovo IdeaPad Slim 9i 14ITL5", ALC287_FIXUP_YOGA7_14ITL_SPEAKERS), + SND_PCI_QUIRK(0x17aa, 0x383d, "Legion Y9000X 2019", ALC285_FIXUP_LEGION_Y9000X_SPEAKERS), SND_PCI_QUIRK(0x17aa, 0x3843, "Yoga 9i", ALC287_FIXUP_IDEAPAD_BASS_SPK_AMP), + SND_PCI_QUIRK(0x17aa, 0x384a, "Lenovo Yoga 7 15ITL5", ALC287_FIXUP_YOGA7_14ITL_SPEAKERS), + SND_PCI_QUIRK(0x17aa, 0x3852, "Lenovo Yoga 7 14ITL5", ALC287_FIXUP_YOGA7_14ITL_SPEAKERS), + SND_PCI_QUIRK(0x17aa, 0x3853, "Lenovo Yoga 7 15ITL5", ALC287_FIXUP_YOGA7_14ITL_SPEAKERS), SND_PCI_QUIRK(0x17aa, 0x3902, "Lenovo E50-80", ALC269_FIXUP_DMIC_THINKPAD_ACPI), SND_PCI_QUIRK(0x17aa, 0x3977, "IdeaPad S210", ALC283_FIXUP_INT_MIC), SND_PCI_QUIRK(0x17aa, 0x3978, "Lenovo B50-70", ALC269_FIXUP_DMIC_THINKPAD_ACPI), @@ -8590,6 +8985,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x511e, "Thinkpad", ALC298_FIXUP_TPT470_DOCK), SND_PCI_QUIRK(0x17aa, 0x511f, "Thinkpad", ALC298_FIXUP_TPT470_DOCK), SND_PCI_QUIRK(0x17aa, 0x9e54, "LENOVO NB", ALC269_FIXUP_LENOVO_EAPD), + SND_PCI_QUIRK(0x1849, 0x1233, "ASRock NUC Box 1100", ALC233_FIXUP_NO_AUDIO_JACK), SND_PCI_QUIRK(0x19e5, 0x3204, "Huawei MACH-WX9", ALC256_FIXUP_HUAWEI_MACH_WX9_PINS), SND_PCI_QUIRK(0x1b35, 0x1235, "CZC B20", ALC269_FIXUP_CZC_B20), SND_PCI_QUIRK(0x1b35, 0x1236, "CZC TMI", ALC269_FIXUP_CZC_TMI), @@ -8597,6 +8993,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1b7d, 0xa831, "Ordissimo EVE2 ", ALC269VB_FIXUP_ORDISSIMO_EVE2), /* Also known as Malata PC-B1303 */ SND_PCI_QUIRK(0x1c06, 0x2013, "Lemote A1802", ALC269_FIXUP_LEMOTE_A1802), SND_PCI_QUIRK(0x1c06, 0x2015, "Lemote A190X", ALC269_FIXUP_LEMOTE_A190X), + SND_PCI_QUIRK(0x1d05, 0x1132, "TongFang PHxTxX1", ALC256_FIXUP_SET_COEF_DEFAULTS), SND_PCI_QUIRK(0x1d72, 0x1602, "RedmiBook", ALC255_FIXUP_XIAOMI_HEADSET_MIC), SND_PCI_QUIRK(0x1d72, 0x1701, "XiaomiNotebook Pro", ALC298_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1d72, 0x1901, "RedmiBook 14", ALC256_FIXUP_ASUS_HEADSET_MIC), @@ -8782,9 +9179,11 @@ static const struct hda_model_fixup alc269_fixup_models[] = { {.id = ALC245_FIXUP_HP_X360_AMP, .name = "alc245-hp-x360-amp"}, {.id = ALC295_FIXUP_HP_OMEN, .name = "alc295-hp-omen"}, {.id = ALC285_FIXUP_HP_SPECTRE_X360, .name = "alc285-hp-spectre-x360"}, + {.id = ALC285_FIXUP_HP_SPECTRE_X360_EB1, .name = "alc285-hp-spectre-x360-eb1"}, {.id = ALC287_FIXUP_IDEAPAD_BASS_SPK_AMP, .name = "alc287-ideapad-bass-spk-amp"}, {.id = ALC623_FIXUP_LENOVO_THINKSTATION_P340, .name = "alc623-lenovo-thinkstation-p340"}, {.id = ALC255_FIXUP_ACER_HEADPHONE_AND_MIC, .name = "alc255-acer-headphone-and-mic"}, + {.id = ALC285_FIXUP_HP_GPIO_AMP_INIT, .name = "alc285-hp-amp-init"}, {} }; #define ALC225_STANDARD_PINS \ @@ -9892,6 +10291,27 @@ static void alc671_fixup_hp_headset_mic2(struct hda_codec *codec, } } +static void alc897_hp_automute_hook(struct hda_codec *codec, + struct hda_jack_callback *jack) +{ + struct alc_spec *spec = codec->spec; + int vref; + + snd_hda_gen_hp_automute(codec, jack); + vref = spec->gen.hp_jack_present ? (PIN_HP | AC_PINCTL_VREF_100) : PIN_HP; + snd_hda_codec_write(codec, 0x1b, 0, AC_VERB_SET_PIN_WIDGET_CONTROL, + vref); +} + +static void alc897_fixup_lenovo_headset_mic(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + struct alc_spec *spec = codec->spec; + if (action == HDA_FIXUP_ACT_PRE_PROBE) { + spec->gen.hp_automute_hook = alc897_hp_automute_hook; + } +} + static const struct coef_fw alc668_coefs[] = { WRITE_COEF(0x01, 0xbebe), WRITE_COEF(0x02, 0xaaaa), WRITE_COEF(0x03, 0x0), WRITE_COEF(0x04, 0x0180), WRITE_COEF(0x06, 0x0), WRITE_COEF(0x07, 0x0f80), @@ -9969,6 +10389,11 @@ enum { ALC671_FIXUP_HP_HEADSET_MIC2, ALC662_FIXUP_ACER_X2660G_HEADSET_MODE, ALC662_FIXUP_ACER_NITRO_HEADSET_MODE, + ALC668_FIXUP_ASUS_NO_HEADSET_MIC, + ALC668_FIXUP_HEADSET_MIC, + ALC668_FIXUP_MIC_DET_COEF, + ALC897_FIXUP_LENOVO_HEADSET_MIC, + ALC897_FIXUP_HEADSET_MIC_PIN, }; static const struct hda_fixup alc662_fixups[] = { @@ -10352,6 +10777,42 @@ static const struct hda_fixup alc662_fixups[] = { .chained = true, .chain_id = ALC662_FIXUP_USI_FUNC }, + [ALC668_FIXUP_ASUS_NO_HEADSET_MIC] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x1b, 0x04a1112c }, + { } + }, + .chained = true, + .chain_id = ALC668_FIXUP_HEADSET_MIC + }, + [ALC668_FIXUP_HEADSET_MIC] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc269_fixup_headset_mic, + .chained = true, + .chain_id = ALC668_FIXUP_MIC_DET_COEF + }, + [ALC668_FIXUP_MIC_DET_COEF] = { + .type = HDA_FIXUP_VERBS, + .v.verbs = (const struct hda_verb[]) { + { 0x20, AC_VERB_SET_COEF_INDEX, 0x15 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0x0d60 }, + {} + }, + }, + [ALC897_FIXUP_LENOVO_HEADSET_MIC] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc897_fixup_lenovo_headset_mic, + }, + [ALC897_FIXUP_HEADSET_MIC_PIN] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x1a, 0x03a11050 }, + { } + }, + .chained = true, + .chain_id = ALC897_FIXUP_LENOVO_HEADSET_MIC + }, }; static const struct snd_pci_quirk alc662_fixup_tbl[] = { @@ -10387,6 +10848,7 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x15a7, "ASUS UX51VZH", ALC662_FIXUP_BASS_16), SND_PCI_QUIRK(0x1043, 0x177d, "ASUS N551", ALC668_FIXUP_ASUS_Nx51), SND_PCI_QUIRK(0x1043, 0x17bd, "ASUS N751", ALC668_FIXUP_ASUS_Nx51), + SND_PCI_QUIRK(0x1043, 0x185d, "ASUS G551JW", ALC668_FIXUP_ASUS_NO_HEADSET_MIC), SND_PCI_QUIRK(0x1043, 0x1963, "ASUS X71SL", ALC662_FIXUP_ASUS_MODE8), SND_PCI_QUIRK(0x1043, 0x1b73, "ASUS N55SF", ALC662_FIXUP_BASS_16), SND_PCI_QUIRK(0x1043, 0x1bf3, "ASUS N76VZ", ALC662_FIXUP_BASS_MODE4_CHMAP), @@ -10395,6 +10857,10 @@ static const struct snd_pci_quirk alc662_fixup_tbl[] = { SND_PCI_QUIRK(0x144d, 0xc051, "Samsung R720", ALC662_FIXUP_IDEAPAD), SND_PCI_QUIRK(0x14cd, 0x5003, "USI", ALC662_FIXUP_USI_HEADSET_MODE), SND_PCI_QUIRK(0x17aa, 0x1036, "Lenovo P520", ALC662_FIXUP_LENOVO_MULTI_CODECS), + SND_PCI_QUIRK(0x17aa, 0x32ca, "Lenovo ThinkCentre M80", ALC897_FIXUP_HEADSET_MIC_PIN), + SND_PCI_QUIRK(0x17aa, 0x32cb, "Lenovo ThinkCentre M70", ALC897_FIXUP_HEADSET_MIC_PIN), + SND_PCI_QUIRK(0x17aa, 0x32cf, "Lenovo ThinkCentre M950", ALC897_FIXUP_HEADSET_MIC_PIN), + SND_PCI_QUIRK(0x17aa, 0x32f7, "Lenovo ThinkCentre M90", ALC897_FIXUP_HEADSET_MIC_PIN), SND_PCI_QUIRK(0x17aa, 0x38af, "Lenovo Ideapad Y550P", ALC662_FIXUP_IDEAPAD), SND_PCI_QUIRK(0x17aa, 0x3a0d, "Lenovo Ideapad Y550", ALC662_FIXUP_IDEAPAD), SND_PCI_QUIRK(0x1849, 0x5892, "ASRock B150M", ALC892_FIXUP_ASROCK_MOBO), diff --git a/sound/soc/codecs/cpcap.c b/sound/soc/codecs/cpcap.c index c0425e3707d9c2d91c56370f7e62d9ed74493ef1..a3597137fee3e40eda5d52d9bca9a31d9725a814 100644 --- a/sound/soc/codecs/cpcap.c +++ b/sound/soc/codecs/cpcap.c @@ -1544,6 +1544,8 @@ static int cpcap_codec_probe(struct platform_device *pdev) { struct device_node *codec_node = of_get_child_by_name(pdev->dev.parent->of_node, "audio-codec"); + if (!codec_node) + return -ENODEV; pdev->dev.of_node = codec_node; diff --git a/sound/soc/codecs/cs4265.c b/sound/soc/codecs/cs4265.c index d76be44f46b406df325c9cda1d6384a99642dbce..36b9e4fab099b971c9f6d4d131f0f86ec40a40b4 100644 --- a/sound/soc/codecs/cs4265.c +++ b/sound/soc/codecs/cs4265.c @@ -150,7 +150,6 @@ static const struct snd_kcontrol_new cs4265_snd_controls[] = { SOC_SINGLE("E to F Buffer Disable Switch", CS4265_SPDIF_CTL1, 6, 1, 0), SOC_ENUM("C Data Access", cam_mode_enum), - SOC_SINGLE("SPDIF Switch", CS4265_SPDIF_CTL2, 5, 1, 1), SOC_SINGLE("Validity Bit Control Switch", CS4265_SPDIF_CTL2, 3, 1, 0), SOC_ENUM("SPDIF Mono/Stereo", spdif_mono_stereo_enum), @@ -186,7 +185,7 @@ static const struct snd_soc_dapm_widget cs4265_dapm_widgets[] = { SND_SOC_DAPM_SWITCH("Loopback", SND_SOC_NOPM, 0, 0, &loopback_ctl), - SND_SOC_DAPM_SWITCH("SPDIF", SND_SOC_NOPM, 0, 0, + SND_SOC_DAPM_SWITCH("SPDIF", CS4265_SPDIF_CTL2, 5, 1, &spdif_switch), SND_SOC_DAPM_SWITCH("DAC", CS4265_PWRCTL, 1, 1, &dac_switch), diff --git a/sound/soc/codecs/cs42l42.c b/sound/soc/codecs/cs42l42.c index 828dc78202e8bca4df4de4126919a7042a22565a..54c1ede59b8b758830f4b3bd2840676628fe7dac 100644 --- a/sound/soc/codecs/cs42l42.c +++ b/sound/soc/codecs/cs42l42.c @@ -20,10 +20,9 @@ #include #include #include +#include #include #include -#include -#include #include #include #include @@ -91,7 +90,7 @@ static const struct reg_default cs42l42_reg_defaults[] = { { CS42L42_ASP_RX_INT_MASK, 0x1F }, { CS42L42_ASP_TX_INT_MASK, 0x0F }, { CS42L42_CODEC_INT_MASK, 0x03 }, - { CS42L42_SRCPL_INT_MASK, 0xFF }, + { CS42L42_SRCPL_INT_MASK, 0x7F }, { CS42L42_VPMON_INT_MASK, 0x01 }, { CS42L42_PLL_LOCK_INT_MASK, 0x01 }, { CS42L42_TSRS_PLUG_INT_MASK, 0x0F }, @@ -128,7 +127,7 @@ static const struct reg_default cs42l42_reg_defaults[] = { { CS42L42_MIXER_CHA_VOL, 0x3F }, { CS42L42_MIXER_ADC_VOL, 0x3F }, { CS42L42_MIXER_CHB_VOL, 0x3F }, - { CS42L42_EQ_COEF_IN0, 0x22 }, + { CS42L42_EQ_COEF_IN0, 0x00 }, { CS42L42_EQ_COEF_IN1, 0x00 }, { CS42L42_EQ_COEF_IN2, 0x00 }, { CS42L42_EQ_COEF_IN3, 0x00 }, @@ -1530,12 +1529,15 @@ static void cs42l42_setup_hs_type_detect(struct cs42l42_private *cs42l42) (1 << CS42L42_HS_CLAMP_DISABLE_SHIFT)); /* Enable the tip sense circuit */ + regmap_update_bits(cs42l42->regmap, CS42L42_TSENSE_CTL, + CS42L42_TS_INV_MASK, CS42L42_TS_INV_MASK); + regmap_update_bits(cs42l42->regmap, CS42L42_TIPSENSE_CTL, CS42L42_TIP_SENSE_CTRL_MASK | CS42L42_TIP_SENSE_INV_MASK | CS42L42_TIP_SENSE_DEBOUNCE_MASK, (3 << CS42L42_TIP_SENSE_CTRL_SHIFT) | - (0 << CS42L42_TIP_SENSE_INV_SHIFT) | + (!cs42l42->ts_inv << CS42L42_TIP_SENSE_INV_SHIFT) | (2 << CS42L42_TIP_SENSE_DEBOUNCE_SHIFT)); /* Save the initial status of the tip sense */ @@ -1554,17 +1556,15 @@ static const unsigned int threshold_defaults[] = { CS42L42_HS_DET_LEVEL_1 }; -static int cs42l42_handle_device_data(struct i2c_client *i2c_client, +static int cs42l42_handle_device_data(struct device *dev, struct cs42l42_private *cs42l42) { - struct device_node *np = i2c_client->dev.of_node; unsigned int val; - unsigned int thresholds[CS42L42_NUM_BIASES]; + u32 thresholds[CS42L42_NUM_BIASES]; int ret; int i; - ret = of_property_read_u32(np, "cirrus,ts-inv", &val); - + ret = device_property_read_u32(dev, "cirrus,ts-inv", &val); if (!ret) { switch (val) { case CS42L42_TS_INV_EN: @@ -1572,7 +1572,7 @@ static int cs42l42_handle_device_data(struct i2c_client *i2c_client, cs42l42->ts_inv = val; break; default: - dev_err(&i2c_client->dev, + dev_err(dev, "Wrong cirrus,ts-inv DT value %d\n", val); cs42l42->ts_inv = CS42L42_TS_INV_DIS; @@ -1581,12 +1581,7 @@ static int cs42l42_handle_device_data(struct i2c_client *i2c_client, cs42l42->ts_inv = CS42L42_TS_INV_DIS; } - regmap_update_bits(cs42l42->regmap, CS42L42_TSENSE_CTL, - CS42L42_TS_INV_MASK, - (cs42l42->ts_inv << CS42L42_TS_INV_SHIFT)); - - ret = of_property_read_u32(np, "cirrus,ts-dbnc-rise", &val); - + ret = device_property_read_u32(dev, "cirrus,ts-dbnc-rise", &val); if (!ret) { switch (val) { case CS42L42_TS_DBNCE_0: @@ -1600,7 +1595,7 @@ static int cs42l42_handle_device_data(struct i2c_client *i2c_client, cs42l42->ts_dbnc_rise = val; break; default: - dev_err(&i2c_client->dev, + dev_err(dev, "Wrong cirrus,ts-dbnc-rise DT value %d\n", val); cs42l42->ts_dbnc_rise = CS42L42_TS_DBNCE_1000; @@ -1614,8 +1609,7 @@ static int cs42l42_handle_device_data(struct i2c_client *i2c_client, (cs42l42->ts_dbnc_rise << CS42L42_TS_RISE_DBNCE_TIME_SHIFT)); - ret = of_property_read_u32(np, "cirrus,ts-dbnc-fall", &val); - + ret = device_property_read_u32(dev, "cirrus,ts-dbnc-fall", &val); if (!ret) { switch (val) { case CS42L42_TS_DBNCE_0: @@ -1629,7 +1623,7 @@ static int cs42l42_handle_device_data(struct i2c_client *i2c_client, cs42l42->ts_dbnc_fall = val; break; default: - dev_err(&i2c_client->dev, + dev_err(dev, "Wrong cirrus,ts-dbnc-fall DT value %d\n", val); cs42l42->ts_dbnc_fall = CS42L42_TS_DBNCE_0; @@ -1643,13 +1637,12 @@ static int cs42l42_handle_device_data(struct i2c_client *i2c_client, (cs42l42->ts_dbnc_fall << CS42L42_TS_FALL_DBNCE_TIME_SHIFT)); - ret = of_property_read_u32(np, "cirrus,btn-det-init-dbnce", &val); - + ret = device_property_read_u32(dev, "cirrus,btn-det-init-dbnce", &val); if (!ret) { if (val <= CS42L42_BTN_DET_INIT_DBNCE_MAX) cs42l42->btn_det_init_dbnce = val; else { - dev_err(&i2c_client->dev, + dev_err(dev, "Wrong cirrus,btn-det-init-dbnce DT value %d\n", val); cs42l42->btn_det_init_dbnce = @@ -1660,14 +1653,13 @@ static int cs42l42_handle_device_data(struct i2c_client *i2c_client, CS42L42_BTN_DET_INIT_DBNCE_DEFAULT; } - ret = of_property_read_u32(np, "cirrus,btn-det-event-dbnce", &val); - + ret = device_property_read_u32(dev, "cirrus,btn-det-event-dbnce", &val); if (!ret) { if (val <= CS42L42_BTN_DET_EVENT_DBNCE_MAX) cs42l42->btn_det_event_dbnce = val; else { - dev_err(&i2c_client->dev, - "Wrong cirrus,btn-det-event-dbnce DT value %d\n", val); + dev_err(dev, + "Wrong cirrus,btn-det-event-dbnce DT value %d\n", val); cs42l42->btn_det_event_dbnce = CS42L42_BTN_DET_EVENT_DBNCE_DEFAULT; } @@ -1676,19 +1668,17 @@ static int cs42l42_handle_device_data(struct i2c_client *i2c_client, CS42L42_BTN_DET_EVENT_DBNCE_DEFAULT; } - ret = of_property_read_u32_array(np, "cirrus,bias-lvls", - (u32 *)thresholds, CS42L42_NUM_BIASES); - + ret = device_property_read_u32_array(dev, "cirrus,bias-lvls", + thresholds, ARRAY_SIZE(thresholds)); if (!ret) { for (i = 0; i < CS42L42_NUM_BIASES; i++) { if (thresholds[i] <= CS42L42_HS_DET_LEVEL_MAX) cs42l42->bias_thresholds[i] = thresholds[i]; else { - dev_err(&i2c_client->dev, - "Wrong cirrus,bias-lvls[%d] DT value %d\n", i, + dev_err(dev, + "Wrong cirrus,bias-lvls[%d] DT value %d\n", i, thresholds[i]); - cs42l42->bias_thresholds[i] = - threshold_defaults[i]; + cs42l42->bias_thresholds[i] = threshold_defaults[i]; } } } else { @@ -1696,8 +1686,7 @@ static int cs42l42_handle_device_data(struct i2c_client *i2c_client, cs42l42->bias_thresholds[i] = threshold_defaults[i]; } - ret = of_property_read_u32(np, "cirrus,hs-bias-ramp-rate", &val); - + ret = device_property_read_u32(dev, "cirrus,hs-bias-ramp-rate", &val); if (!ret) { switch (val) { case CS42L42_HSBIAS_RAMP_FAST_RISE_SLOW_FALL: @@ -1717,7 +1706,7 @@ static int cs42l42_handle_device_data(struct i2c_client *i2c_client, cs42l42->hs_bias_ramp_time = CS42L42_HSBIAS_RAMP_TIME3; break; default: - dev_err(&i2c_client->dev, + dev_err(dev, "Wrong cirrus,hs-bias-ramp-rate DT value %d\n", val); cs42l42->hs_bias_ramp_rate = CS42L42_HSBIAS_RAMP_SLOW; @@ -1781,8 +1770,10 @@ static int cs42l42_i2c_probe(struct i2c_client *i2c_client, /* Reset the Device */ cs42l42->reset_gpio = devm_gpiod_get_optional(&i2c_client->dev, "reset", GPIOD_OUT_LOW); - if (IS_ERR(cs42l42->reset_gpio)) - return PTR_ERR(cs42l42->reset_gpio); + if (IS_ERR(cs42l42->reset_gpio)) { + ret = PTR_ERR(cs42l42->reset_gpio); + goto err_disable; + } if (cs42l42->reset_gpio) { dev_dbg(&i2c_client->dev, "Found reset GPIO\n"); @@ -1796,8 +1787,9 @@ static int cs42l42_i2c_probe(struct i2c_client *i2c_client, NULL, cs42l42_irq_thread, IRQF_ONESHOT | IRQF_TRIGGER_LOW, "cs42l42", cs42l42); - - if (ret != 0) + if (ret == -EPROBE_DEFER) + goto err_disable; + else if (ret != 0) dev_err(&i2c_client->dev, "Failed to request IRQ: %d\n", ret); @@ -1816,13 +1808,13 @@ static int cs42l42_i2c_probe(struct i2c_client *i2c_client, dev_err(&i2c_client->dev, "CS42L42 Device ID (%X). Expected %X\n", devid, CS42L42_CHIP_ID); - return ret; + goto err_disable; } ret = regmap_read(cs42l42->regmap, CS42L42_REVID, ®); if (ret < 0) { dev_err(&i2c_client->dev, "Get Revision ID failed\n"); - return ret; + goto err_disable; } dev_info(&i2c_client->dev, @@ -1845,11 +1837,9 @@ static int cs42l42_i2c_probe(struct i2c_client *i2c_client, (1 << CS42L42_ADC_PDN_SHIFT) | (0 << CS42L42_PDN_ALL_SHIFT)); - if (i2c_client->dev.of_node) { - ret = cs42l42_handle_device_data(i2c_client, cs42l42); - if (ret != 0) - return ret; - } + ret = cs42l42_handle_device_data(&i2c_client->dev, cs42l42); + if (ret != 0) + goto err_disable; /* Setup headset detection */ cs42l42_setup_hs_type_detect(cs42l42); diff --git a/sound/soc/codecs/max9759.c b/sound/soc/codecs/max9759.c index 00e9d4fd1651fe570a49dc89a85b7ad37bd25126..0c261335c8a16cbb680738e41484b1a2a91545a9 100644 --- a/sound/soc/codecs/max9759.c +++ b/sound/soc/codecs/max9759.c @@ -64,7 +64,8 @@ static int speaker_gain_control_put(struct snd_kcontrol *kcontrol, struct snd_soc_component *c = snd_soc_kcontrol_component(kcontrol); struct max9759 *priv = snd_soc_component_get_drvdata(c); - if (ucontrol->value.integer.value[0] > 3) + if (ucontrol->value.integer.value[0] < 0 || + ucontrol->value.integer.value[0] > 3) return -EINVAL; priv->gain = ucontrol->value.integer.value[0]; diff --git a/sound/soc/codecs/nau8824.c b/sound/soc/codecs/nau8824.c index 15bd8335f6678203527ee08c46c855f61e4cb44e..c8ccfa2fff848d9fad78929b10a35ec3aa3587de 100644 --- a/sound/soc/codecs/nau8824.c +++ b/sound/soc/codecs/nau8824.c @@ -8,6 +8,7 @@ #include #include +#include #include #include #include @@ -27,6 +28,12 @@ #include "nau8824.h" +#define NAU8824_JD_ACTIVE_HIGH BIT(0) + +static int nau8824_quirk; +static int quirk_override = -1; +module_param_named(quirk, quirk_override, uint, 0444); +MODULE_PARM_DESC(quirk, "Board-specific quirk override"); static int nau8824_config_sysclk(struct nau8824 *nau8824, int clk_id, unsigned int freq); @@ -1875,6 +1882,34 @@ static int nau8824_read_device_properties(struct device *dev, return 0; } +/* Please keep this list alphabetically sorted */ +static const struct dmi_system_id nau8824_quirk_table[] = { + { + /* Cyberbook T116 rugged tablet */ + .matches = { + DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "Default string"), + DMI_EXACT_MATCH(DMI_BOARD_NAME, "Cherry Trail CR"), + DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "20170531"), + }, + .driver_data = (void *)(NAU8824_JD_ACTIVE_HIGH), + }, + {} +}; + +static void nau8824_check_quirks(void) +{ + const struct dmi_system_id *dmi_id; + + if (quirk_override != -1) { + nau8824_quirk = quirk_override; + return; + } + + dmi_id = dmi_first_match(nau8824_quirk_table); + if (dmi_id) + nau8824_quirk = (unsigned long)dmi_id->driver_data; +} + static int nau8824_i2c_probe(struct i2c_client *i2c, const struct i2c_device_id *id) { @@ -1899,6 +1934,11 @@ static int nau8824_i2c_probe(struct i2c_client *i2c, nau8824->irq = i2c->irq; sema_init(&nau8824->jd_sem, 1); + nau8824_check_quirks(); + + if (nau8824_quirk & NAU8824_JD_ACTIVE_HIGH) + nau8824->jkdet_polarity = 0; + nau8824_print_device_properties(nau8824); ret = regmap_read(nau8824->regmap, NAU8824_REG_I2C_DEVICE_ID, &value); diff --git a/sound/soc/codecs/rt5663.c b/sound/soc/codecs/rt5663.c index 619fb9a031e39a8a03e5134d7b8006e88292618a..db8a41aaa3859c15a5735852126c4383575fee92 100644 --- a/sound/soc/codecs/rt5663.c +++ b/sound/soc/codecs/rt5663.c @@ -3461,6 +3461,7 @@ static void rt5663_calibrate(struct rt5663_priv *rt5663) static int rt5663_parse_dp(struct rt5663_priv *rt5663, struct device *dev) { int table_size; + int ret; device_property_read_u32(dev, "realtek,dc_offset_l_manual", &rt5663->pdata.dc_offset_l_manual); @@ -3477,9 +3478,11 @@ static int rt5663_parse_dp(struct rt5663_priv *rt5663, struct device *dev) table_size = sizeof(struct impedance_mapping_table) * rt5663->pdata.impedance_sensing_num; rt5663->imp_table = devm_kzalloc(dev, table_size, GFP_KERNEL); - device_property_read_u32_array(dev, + ret = device_property_read_u32_array(dev, "realtek,impedance_sensing_table", (u32 *)rt5663->imp_table, table_size); + if (ret) + return ret; } return 0; @@ -3504,8 +3507,11 @@ static int rt5663_i2c_probe(struct i2c_client *i2c, if (pdata) rt5663->pdata = *pdata; - else - rt5663_parse_dp(rt5663, &i2c->dev); + else { + ret = rt5663_parse_dp(rt5663, &i2c->dev); + if (ret) + return ret; + } for (i = 0; i < ARRAY_SIZE(rt5663->supplies); i++) rt5663->supplies[i].supply = rt5663_supply_names[i]; diff --git a/sound/soc/codecs/rt5668.c b/sound/soc/codecs/rt5668.c index bc69adc9c8b707bb8c3e292e35cd7a9ec474e093..e625df57c69e5a99d2195d259a4ed939e455bd1b 100644 --- a/sound/soc/codecs/rt5668.c +++ b/sound/soc/codecs/rt5668.c @@ -1022,11 +1022,13 @@ static void rt5668_jack_detect_handler(struct work_struct *work) container_of(work, struct rt5668_priv, jack_detect_work.work); int val, btn_type; - while (!rt5668->component) - usleep_range(10000, 15000); - - while (!rt5668->component->card->instantiated) - usleep_range(10000, 15000); + if (!rt5668->component || !rt5668->component->card || + !rt5668->component->card->instantiated) { + /* card not yet ready, try later */ + mod_delayed_work(system_power_efficient_wq, + &rt5668->jack_detect_work, msecs_to_jiffies(15)); + return; + } mutex_lock(&rt5668->calibrate_mutex); diff --git a/sound/soc/codecs/rt5682.c b/sound/soc/codecs/rt5682.c index 0486b1469799489699099ff2ad2b0967c9a83e83..113ed00ddf1e5839574ed53c251a148ca0fad55c 100644 --- a/sound/soc/codecs/rt5682.c +++ b/sound/soc/codecs/rt5682.c @@ -924,6 +924,8 @@ int rt5682_headset_detect(struct snd_soc_component *component, int jack_insert) unsigned int val, count; if (jack_insert) { + snd_soc_dapm_mutex_lock(dapm); + snd_soc_component_update_bits(component, RT5682_PWR_ANLG_1, RT5682_PWR_VREF2 | RT5682_PWR_MB, RT5682_PWR_VREF2 | RT5682_PWR_MB); @@ -968,6 +970,8 @@ int rt5682_headset_detect(struct snd_soc_component *component, int jack_insert) snd_soc_component_update_bits(component, RT5682_MICBIAS_2, RT5682_PWR_CLK25M_MASK | RT5682_PWR_CLK1M_MASK, RT5682_PWR_CLK25M_PU | RT5682_PWR_CLK1M_PU); + + snd_soc_dapm_mutex_unlock(dapm); } else { rt5682_enable_push_button_irq(component, false); snd_soc_component_update_bits(component, RT5682_CBJ_CTRL_1, @@ -1077,11 +1081,13 @@ void rt5682_jack_detect_handler(struct work_struct *work) container_of(work, struct rt5682_priv, jack_detect_work.work); int val, btn_type; - while (!rt5682->component) - usleep_range(10000, 15000); - - while (!rt5682->component->card->instantiated) - usleep_range(10000, 15000); + if (!rt5682->component || !rt5682->component->card || + !rt5682->component->card->instantiated) { + /* card not yet ready, try later */ + mod_delayed_work(system_power_efficient_wq, + &rt5682->jack_detect_work, msecs_to_jiffies(15)); + return; + } mutex_lock(&rt5682->calibrate_mutex); @@ -2797,6 +2803,8 @@ static int rt5682_register_dai_clks(struct snd_soc_component *component) for (i = 0; i < RT5682_DAI_NUM_CLKS; ++i) { struct clk_init_data init = { }; + struct clk_parent_data parent_data; + const struct clk_hw *parent; dai_clk_hw = &rt5682->dai_clks_hw[i]; @@ -2804,17 +2812,17 @@ static int rt5682_register_dai_clks(struct snd_soc_component *component) case RT5682_DAI_WCLK_IDX: /* Make MCLK the parent of WCLK */ if (rt5682->mclk) { - init.parent_data = &(struct clk_parent_data){ + parent_data = (struct clk_parent_data){ .fw_name = "mclk", }; + init.parent_data = &parent_data; init.num_parents = 1; } break; case RT5682_DAI_BCLK_IDX: /* Make WCLK the parent of BCLK */ - init.parent_hws = &(const struct clk_hw *){ - &rt5682->dai_clks_hw[RT5682_DAI_WCLK_IDX] - }; + parent = &rt5682->dai_clks_hw[RT5682_DAI_WCLK_IDX]; + init.parent_hws = &parent; init.num_parents = 1; break; default: diff --git a/sound/soc/codecs/tas2770.c b/sound/soc/codecs/tas2770.c index a91a0a31e74d1c07f7489cf1a948d214e09a95be..315fd9d971c8c3661b0acfb5ca2c86e05a1ee678 100644 --- a/sound/soc/codecs/tas2770.c +++ b/sound/soc/codecs/tas2770.c @@ -38,10 +38,12 @@ static void tas2770_reset(struct tas2770_priv *tas2770) gpiod_set_value_cansleep(tas2770->reset_gpio, 0); msleep(20); gpiod_set_value_cansleep(tas2770->reset_gpio, 1); + usleep_range(1000, 2000); } snd_soc_component_write(tas2770->component, TAS2770_SW_RST, TAS2770_RST); + usleep_range(1000, 2000); } static int tas2770_set_bias_level(struct snd_soc_component *component, @@ -110,6 +112,7 @@ static int tas2770_codec_resume(struct snd_soc_component *component) if (tas2770->sdz_gpio) { gpiod_set_value_cansleep(tas2770->sdz_gpio, 1); + usleep_range(1000, 2000); } else { ret = snd_soc_component_update_bits(component, TAS2770_PWR_CTRL, TAS2770_PWR_CTRL_MASK, @@ -291,11 +294,11 @@ static int tas2770_set_samplerate(struct tas2770_priv *tas2770, int samplerate) ramp_rate_val = TAS2770_TDM_CFG_REG0_SMP_44_1KHZ | TAS2770_TDM_CFG_REG0_31_88_2_96KHZ; break; - case 19200: + case 192000: ramp_rate_val = TAS2770_TDM_CFG_REG0_SMP_48KHZ | TAS2770_TDM_CFG_REG0_31_176_4_192KHZ; break; - case 17640: + case 176400: ramp_rate_val = TAS2770_TDM_CFG_REG0_SMP_44_1KHZ | TAS2770_TDM_CFG_REG0_31_176_4_192KHZ; break; @@ -510,8 +513,10 @@ static int tas2770_codec_probe(struct snd_soc_component *component) tas2770->component = component; - if (tas2770->sdz_gpio) + if (tas2770->sdz_gpio) { gpiod_set_value_cansleep(tas2770->sdz_gpio, 1); + usleep_range(1000, 2000); + } tas2770_reset(tas2770); diff --git a/sound/soc/codecs/wcd934x.c b/sound/soc/codecs/wcd934x.c index d18ae5e3ee809ddf31fd957d42d57b9f0f5c124f..01df3f4e045a9e18bcd5a7a396348d2fcd776c6b 100644 --- a/sound/soc/codecs/wcd934x.c +++ b/sound/soc/codecs/wcd934x.c @@ -1812,9 +1812,8 @@ static int wcd934x_hw_params(struct snd_pcm_substream *substream, } wcd->dai[dai->id].sconfig.rate = params_rate(params); - wcd934x_slim_set_hw_params(wcd, &wcd->dai[dai->id], substream->stream); - return 0; + return wcd934x_slim_set_hw_params(wcd, &wcd->dai[dai->id], substream->stream); } static int wcd934x_hw_free(struct snd_pcm_substream *substream, @@ -2471,6 +2470,9 @@ static int wcd934x_compander_set(struct snd_kcontrol *kc, int value = ucontrol->value.integer.value[0]; int sel; + if (wcd->comp_enabled[comp] == value) + return 0; + wcd->comp_enabled[comp] = value; sel = value ? WCD934X_HPH_GAIN_SRC_SEL_COMPANDER : WCD934X_HPH_GAIN_SRC_SEL_REGISTER; @@ -2494,10 +2496,10 @@ static int wcd934x_compander_set(struct snd_kcontrol *kc, case COMPANDER_8: break; default: - break; + return 0; } - return 0; + return 1; } static int wcd934x_rx_hph_mode_get(struct snd_kcontrol *kc, @@ -2541,6 +2543,31 @@ static int slim_rx_mux_get(struct snd_kcontrol *kc, return 0; } +static int slim_rx_mux_to_dai_id(int mux) +{ + int aif_id; + + switch (mux) { + case 1: + aif_id = AIF1_PB; + break; + case 2: + aif_id = AIF2_PB; + break; + case 3: + aif_id = AIF3_PB; + break; + case 4: + aif_id = AIF4_PB; + break; + default: + aif_id = -1; + break; + } + + return aif_id; +} + static int slim_rx_mux_put(struct snd_kcontrol *kc, struct snd_ctl_elem_value *ucontrol) { @@ -2548,43 +2575,59 @@ static int slim_rx_mux_put(struct snd_kcontrol *kc, struct wcd934x_codec *wcd = dev_get_drvdata(w->dapm->dev); struct soc_enum *e = (struct soc_enum *)kc->private_value; struct snd_soc_dapm_update *update = NULL; + struct wcd934x_slim_ch *ch, *c; u32 port_id = w->shift; + bool found = false; + int mux_idx; + int prev_mux_idx = wcd->rx_port_value[port_id]; + int aif_id; - if (wcd->rx_port_value[port_id] == ucontrol->value.enumerated.item[0]) - return 0; + mux_idx = ucontrol->value.enumerated.item[0]; - wcd->rx_port_value[port_id] = ucontrol->value.enumerated.item[0]; + if (mux_idx == prev_mux_idx) + return 0; - switch (wcd->rx_port_value[port_id]) { + switch(mux_idx) { case 0: - list_del_init(&wcd->rx_chs[port_id].list); - break; - case 1: - list_add_tail(&wcd->rx_chs[port_id].list, - &wcd->dai[AIF1_PB].slim_ch_list); - break; - case 2: - list_add_tail(&wcd->rx_chs[port_id].list, - &wcd->dai[AIF2_PB].slim_ch_list); - break; - case 3: - list_add_tail(&wcd->rx_chs[port_id].list, - &wcd->dai[AIF3_PB].slim_ch_list); + aif_id = slim_rx_mux_to_dai_id(prev_mux_idx); + if (aif_id < 0) + return 0; + + list_for_each_entry_safe(ch, c, &wcd->dai[aif_id].slim_ch_list, list) { + if (ch->port == port_id + WCD934X_RX_START) { + found = true; + list_del_init(&ch->list); + break; + } + } + if (!found) + return 0; + break; - case 4: - list_add_tail(&wcd->rx_chs[port_id].list, - &wcd->dai[AIF4_PB].slim_ch_list); + case 1 ... 4: + aif_id = slim_rx_mux_to_dai_id(mux_idx); + if (aif_id < 0) + return 0; + + if (list_empty(&wcd->rx_chs[port_id].list)) { + list_add_tail(&wcd->rx_chs[port_id].list, + &wcd->dai[aif_id].slim_ch_list); + } else { + dev_err(wcd->dev ,"SLIM_RX%d PORT is busy\n", port_id); + return 0; + } break; + default: - dev_err(wcd->dev, "Unknown AIF %d\n", - wcd->rx_port_value[port_id]); + dev_err(wcd->dev, "Unknown AIF %d\n", mux_idx); goto err; } + wcd->rx_port_value[port_id] = mux_idx; snd_soc_dapm_mux_update_power(w->dapm, kc, wcd->rx_port_value[port_id], e, update); - return 0; + return 1; err: return -EINVAL; } @@ -3030,6 +3073,7 @@ static int slim_tx_mixer_put(struct snd_kcontrol *kc, struct soc_mixer_control *mixer = (struct soc_mixer_control *)kc->private_value; int enable = ucontrol->value.integer.value[0]; + struct wcd934x_slim_ch *ch, *c; int dai_id = widget->shift; int port_id = mixer->shift; @@ -3037,17 +3081,32 @@ static int slim_tx_mixer_put(struct snd_kcontrol *kc, if (enable == wcd->tx_port_value[port_id]) return 0; - wcd->tx_port_value[port_id] = enable; - - if (enable) - list_add_tail(&wcd->tx_chs[port_id].list, - &wcd->dai[dai_id].slim_ch_list); - else - list_del_init(&wcd->tx_chs[port_id].list); + if (enable) { + if (list_empty(&wcd->tx_chs[port_id].list)) { + list_add_tail(&wcd->tx_chs[port_id].list, + &wcd->dai[dai_id].slim_ch_list); + } else { + dev_err(wcd->dev ,"SLIM_TX%d PORT is busy\n", port_id); + return 0; + } + } else { + bool found = false; + + list_for_each_entry_safe(ch, c, &wcd->dai[dai_id].slim_ch_list, list) { + if (ch->port == port_id) { + found = true; + list_del_init(&wcd->tx_chs[port_id].list); + break; + } + } + if (!found) + return 0; + } + wcd->tx_port_value[port_id] = enable; snd_soc_dapm_mixer_update_power(widget->dapm, kc, enable, update); - return 0; + return 1; } static const struct snd_kcontrol_new aif1_slim_cap_mixer[] = { diff --git a/sound/soc/codecs/wm8960.c b/sound/soc/codecs/wm8960.c index 9d325555e21910ed40183485e0124b1d17c5edcd..618692e2e0e4106d30cf125aef3b5245af5a32d1 100644 --- a/sound/soc/codecs/wm8960.c +++ b/sound/soc/codecs/wm8960.c @@ -742,9 +742,16 @@ static int wm8960_configure_clocking(struct snd_soc_component *component) int i, j, k; int ret; - if (!(iface1 & (1<<6))) { - dev_dbg(component->dev, - "Codec is slave mode, no need to configure clock\n"); + /* + * For Slave mode clocking should still be configured, + * so this if statement should be removed, but some platform + * may not work if the sysclk is not configured, to avoid such + * compatible issue, just add '!wm8960->sysclk' condition in + * this if statement. + */ + if (!(iface1 & (1 << 6)) && !wm8960->sysclk) { + dev_warn(component->dev, + "slave mode, but proceeding with no clock configuration\n"); return 0; } diff --git a/sound/soc/codecs/wsa881x.c b/sound/soc/codecs/wsa881x.c index db87e07b11c94eca195731b0b6a8cccb712302f6..601525c77bbaf3c5a98a1481ca7162116592a94a 100644 --- a/sound/soc/codecs/wsa881x.c +++ b/sound/soc/codecs/wsa881x.c @@ -772,7 +772,8 @@ static int wsa881x_put_pa_gain(struct snd_kcontrol *kc, usleep_range(1000, 1010); } - return 0; + + return 1; } static int wsa881x_get_port(struct snd_kcontrol *kcontrol, @@ -816,15 +817,22 @@ static int wsa881x_set_port(struct snd_kcontrol *kcontrol, (struct soc_mixer_control *)kcontrol->private_value; int portidx = mixer->reg; - if (ucontrol->value.integer.value[0]) + if (ucontrol->value.integer.value[0]) { + if (data->port_enable[portidx]) + return 0; + data->port_enable[portidx] = true; - else + } else { + if (!data->port_enable[portidx]) + return 0; + data->port_enable[portidx] = false; + } if (portidx == WSA881X_PORT_BOOST) /* Boost Switch */ wsa881x_boost_ctrl(comp, data->port_enable[portidx]); - return 0; + return 1; } static const char * const smart_boost_lvl_text[] = { diff --git a/sound/soc/fsl/fsl_asrc.c b/sound/soc/fsl/fsl_asrc.c index 02c81d2e34ad00f5437a46d5f264679552507a24..5e3c71f025f4531faf9df246b538b8414029423d 100644 --- a/sound/soc/fsl/fsl_asrc.c +++ b/sound/soc/fsl/fsl_asrc.c @@ -19,6 +19,7 @@ #include "fsl_asrc.h" #define IDEAL_RATIO_DECIMAL_DEPTH 26 +#define DIVIDER_NUM 64 #define pair_err(fmt, ...) \ dev_err(&asrc->pdev->dev, "Pair %c: " fmt, 'A' + index, ##__VA_ARGS__) @@ -101,6 +102,55 @@ static unsigned char clk_map_imx8qxp[2][ASRC_CLK_MAP_LEN] = { }, }; +/* + * According to RM, the divider range is 1 ~ 8, + * prescaler is power of 2 from 1 ~ 128. + */ +static int asrc_clk_divider[DIVIDER_NUM] = { + 1, 2, 4, 8, 16, 32, 64, 128, /* divider = 1 */ + 2, 4, 8, 16, 32, 64, 128, 256, /* divider = 2 */ + 3, 6, 12, 24, 48, 96, 192, 384, /* divider = 3 */ + 4, 8, 16, 32, 64, 128, 256, 512, /* divider = 4 */ + 5, 10, 20, 40, 80, 160, 320, 640, /* divider = 5 */ + 6, 12, 24, 48, 96, 192, 384, 768, /* divider = 6 */ + 7, 14, 28, 56, 112, 224, 448, 896, /* divider = 7 */ + 8, 16, 32, 64, 128, 256, 512, 1024, /* divider = 8 */ +}; + +/* + * Check if the divider is available for internal ratio mode + */ +static bool fsl_asrc_divider_avail(int clk_rate, int rate, int *div) +{ + u32 rem, i; + u64 n; + + if (div) + *div = 0; + + if (clk_rate == 0 || rate == 0) + return false; + + n = clk_rate; + rem = do_div(n, rate); + + if (div) + *div = n; + + if (rem != 0) + return false; + + for (i = 0; i < DIVIDER_NUM; i++) { + if (n == asrc_clk_divider[i]) + break; + } + + if (i == DIVIDER_NUM) + return false; + + return true; +} + /** * fsl_asrc_sel_proc - Select the pre-processing and post-processing options * @inrate: input sample rate @@ -330,12 +380,12 @@ static int fsl_asrc_config_pair(struct fsl_asrc_pair *pair, bool use_ideal_rate) enum asrc_word_width input_word_width; enum asrc_word_width output_word_width; u32 inrate, outrate, indiv, outdiv; - u32 clk_index[2], div[2], rem[2]; + u32 clk_index[2], div[2]; u64 clk_rate; int in, out, channels; int pre_proc, post_proc; struct clk *clk; - bool ideal; + bool ideal, div_avail; if (!config) { pair_err("invalid pair config\n"); @@ -415,8 +465,7 @@ static int fsl_asrc_config_pair(struct fsl_asrc_pair *pair, bool use_ideal_rate) clk = asrc_priv->asrck_clk[clk_index[ideal ? OUT : IN]]; clk_rate = clk_get_rate(clk); - rem[IN] = do_div(clk_rate, inrate); - div[IN] = (u32)clk_rate; + div_avail = fsl_asrc_divider_avail(clk_rate, inrate, &div[IN]); /* * The divider range is [1, 1024], defined by the hardware. For non- @@ -425,7 +474,7 @@ static int fsl_asrc_config_pair(struct fsl_asrc_pair *pair, bool use_ideal_rate) * only result in different converting speeds. So remainder does not * matter, as long as we keep the divider within its valid range. */ - if (div[IN] == 0 || (!ideal && (div[IN] > 1024 || rem[IN] != 0))) { + if (div[IN] == 0 || (!ideal && !div_avail)) { pair_err("failed to support input sample rate %dHz by asrck_%x\n", inrate, clk_index[ideal ? OUT : IN]); return -EINVAL; @@ -436,13 +485,12 @@ static int fsl_asrc_config_pair(struct fsl_asrc_pair *pair, bool use_ideal_rate) clk = asrc_priv->asrck_clk[clk_index[OUT]]; clk_rate = clk_get_rate(clk); if (ideal && use_ideal_rate) - rem[OUT] = do_div(clk_rate, IDEAL_RATIO_RATE); + div_avail = fsl_asrc_divider_avail(clk_rate, IDEAL_RATIO_RATE, &div[OUT]); else - rem[OUT] = do_div(clk_rate, outrate); - div[OUT] = clk_rate; + div_avail = fsl_asrc_divider_avail(clk_rate, outrate, &div[OUT]); /* Output divider has the same limitation as the input one */ - if (div[OUT] == 0 || (!ideal && (div[OUT] > 1024 || rem[OUT] != 0))) { + if (div[OUT] == 0 || (!ideal && !div_avail)) { pair_err("failed to support output sample rate %dHz by asrck_%x\n", outrate, clk_index[OUT]); return -EINVAL; @@ -621,8 +669,7 @@ static void fsl_asrc_select_clk(struct fsl_asrc_priv *asrc_priv, clk_index = asrc_priv->clk_map[j][i]; clk_rate = clk_get_rate(asrc_priv->asrck_clk[clk_index]); /* Only match a perfect clock source with no remainder */ - if (clk_rate != 0 && (clk_rate / rate[j]) <= 1024 && - (clk_rate % rate[j]) == 0) + if (fsl_asrc_divider_avail(clk_rate, rate[j], NULL)) break; } diff --git a/sound/soc/fsl/fsl_mqs.c b/sound/soc/fsl/fsl_mqs.c index 69aeb0e71844d9f614e156f4e8a0cbcdb41d728f..0d4efbed41dab6cac2e413f332fa4d1dae55b7bf 100644 --- a/sound/soc/fsl/fsl_mqs.c +++ b/sound/soc/fsl/fsl_mqs.c @@ -337,4 +337,4 @@ module_platform_driver(fsl_mqs_driver); MODULE_AUTHOR("Shengjiu Wang "); MODULE_DESCRIPTION("MQS codec driver"); MODULE_LICENSE("GPL v2"); -MODULE_ALIAS("platform: fsl-mqs"); +MODULE_ALIAS("platform:fsl-mqs"); diff --git a/sound/soc/fsl/pcm030-audio-fabric.c b/sound/soc/fsl/pcm030-audio-fabric.c index af3c3b90c0acab59c88d0050838edcc23a848c0b..83b4a22bf15ac0b8883f3144c55f34d5959f6012 100644 --- a/sound/soc/fsl/pcm030-audio-fabric.c +++ b/sound/soc/fsl/pcm030-audio-fabric.c @@ -93,16 +93,21 @@ static int pcm030_fabric_probe(struct platform_device *op) dev_err(&op->dev, "platform_device_alloc() failed\n"); ret = platform_device_add(pdata->codec_device); - if (ret) + if (ret) { dev_err(&op->dev, "platform_device_add() failed: %d\n", ret); + platform_device_put(pdata->codec_device); + } ret = snd_soc_register_card(card); - if (ret) + if (ret) { dev_err(&op->dev, "snd_soc_register_card() failed: %d\n", ret); + platform_device_del(pdata->codec_device); + platform_device_put(pdata->codec_device); + } platform_set_drvdata(op, pdata); - return ret; + } static int pcm030_fabric_remove(struct platform_device *op) diff --git a/sound/soc/intel/boards/sof_sdw.c b/sound/soc/intel/boards/sof_sdw.c index 2770e8179983a71a9d1c9906f5e41d7964f0764b..25548555d8d790304a9ce7a384cb460847d85224 100644 --- a/sound/soc/intel/boards/sof_sdw.c +++ b/sound/soc/intel/boards/sof_sdw.c @@ -847,6 +847,11 @@ static int create_sdw_dailink(struct device *dev, int *be_index, cpus + *cpu_id, cpu_dai_num, codecs, codec_num, NULL, &sdw_ops); + /* + * SoundWire DAILINKs use 'stream' functions and Bank Switch operations + * based on wait_for_completion(), tag them as 'nonatomic'. + */ + dai_links[*be_index].nonatomic = true; ret = set_codec_init_func(link, dai_links + (*be_index)++, playback, group_id); diff --git a/sound/soc/intel/catpt/dsp.c b/sound/soc/intel/catpt/dsp.c index 9e807b94173219c687758db9b5510bd9d423beab..38a92bbc1ed5682c72db8825ee34101560348fa3 100644 --- a/sound/soc/intel/catpt/dsp.c +++ b/sound/soc/intel/catpt/dsp.c @@ -65,6 +65,7 @@ static int catpt_dma_memcpy(struct catpt_dev *cdev, struct dma_chan *chan, { struct dma_async_tx_descriptor *desc; enum dma_status status; + int ret; desc = dmaengine_prep_dma_memcpy(chan, dst_addr, src_addr, size, DMA_CTRL_ACK); @@ -77,13 +78,22 @@ static int catpt_dma_memcpy(struct catpt_dev *cdev, struct dma_chan *chan, catpt_updatel_shim(cdev, HMDC, CATPT_HMDC_HDDA(CATPT_DMA_DEVID, chan->chan_id), CATPT_HMDC_HDDA(CATPT_DMA_DEVID, chan->chan_id)); - dmaengine_submit(desc); + + ret = dma_submit_error(dmaengine_submit(desc)); + if (ret) { + dev_err(cdev->dev, "submit tx failed: %d\n", ret); + goto clear_hdda; + } + status = dma_wait_for_async_tx(desc); + ret = (status == DMA_COMPLETE) ? 0 : -EPROTO; + +clear_hdda: /* regardless of status, disable access to HOST memory in demand mode */ catpt_updatel_shim(cdev, HMDC, CATPT_HMDC_HDDA(CATPT_DMA_DEVID, chan->chan_id), 0); - return (status == DMA_COMPLETE) ? 0 : -EPROTO; + return ret; } int catpt_dma_memcpy_todsp(struct catpt_dev *cdev, struct dma_chan *chan, diff --git a/sound/soc/mediatek/mt8173/mt8173-max98090.c b/sound/soc/mediatek/mt8173/mt8173-max98090.c index fc94314bfc02ffd40dd9863fe29dc80419f5e04b..3bdd4931316cd0e624e327e2372d609eb738af68 100644 --- a/sound/soc/mediatek/mt8173/mt8173-max98090.c +++ b/sound/soc/mediatek/mt8173/mt8173-max98090.c @@ -180,6 +180,9 @@ static int mt8173_max98090_dev_probe(struct platform_device *pdev) if (ret) dev_err(&pdev->dev, "%s snd_soc_register_card fail %d\n", __func__, ret); + + of_node_put(codec_node); + of_node_put(platform_node); return ret; } diff --git a/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5514.c b/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5514.c index 0f28dc2217c091e174f8fbe132d51dac25c03ba7..390da5bf727ebefedec2c9d92c9b5fa1f963e733 100644 --- a/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5514.c +++ b/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5514.c @@ -218,6 +218,8 @@ static int mt8173_rt5650_rt5514_dev_probe(struct platform_device *pdev) if (ret) dev_err(&pdev->dev, "%s snd_soc_register_card fail %d\n", __func__, ret); + + of_node_put(platform_node); return ret; } diff --git a/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5676.c b/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5676.c index 077c6ee0678067c3ac2a2a6836a4b40625c9f5e4..c8e4e85e105752ac8db257a3d768181c10178103 100644 --- a/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5676.c +++ b/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5676.c @@ -285,6 +285,8 @@ static int mt8173_rt5650_rt5676_dev_probe(struct platform_device *pdev) if (ret) dev_err(&pdev->dev, "%s snd_soc_register_card fail %d\n", __func__, ret); + + of_node_put(platform_node); return ret; } diff --git a/sound/soc/mediatek/mt8173/mt8173-rt5650.c b/sound/soc/mediatek/mt8173/mt8173-rt5650.c index c28ebf891cb05f77b50e88869dc75424edaad51e..e168d31f44459cf75cf242c81bf847a698d2b4fc 100644 --- a/sound/soc/mediatek/mt8173/mt8173-rt5650.c +++ b/sound/soc/mediatek/mt8173/mt8173-rt5650.c @@ -323,6 +323,8 @@ static int mt8173_rt5650_dev_probe(struct platform_device *pdev) if (ret) dev_err(&pdev->dev, "%s snd_soc_register_card fail %d\n", __func__, ret); + + of_node_put(platform_node); return ret; } diff --git a/sound/soc/mediatek/mt8183/mt8183-da7219-max98357.c b/sound/soc/mediatek/mt8183/mt8183-da7219-max98357.c index 20d31b69a5c00bf74354ea60aff898377fb3942d..9cc0f26b08fbcb296d0c657bb006ef241bf6afc5 100644 --- a/sound/soc/mediatek/mt8183/mt8183-da7219-max98357.c +++ b/sound/soc/mediatek/mt8183/mt8183-da7219-max98357.c @@ -787,7 +787,11 @@ static int mt8183_da7219_max98357_dev_probe(struct platform_device *pdev) return ret; } - return devm_snd_soc_register_card(&pdev->dev, card); + ret = devm_snd_soc_register_card(&pdev->dev, card); + + of_node_put(platform_node); + of_node_put(hdmi_codec); + return ret; } #ifdef CONFIG_OF diff --git a/sound/soc/mediatek/mt8183/mt8183-mt6358-ts3a227-max98357.c b/sound/soc/mediatek/mt8183/mt8183-mt6358-ts3a227-max98357.c index 79ba2f2d845223b4603f3ca3862f8ba41ed1dc88..14ce8b93597f3521fe7071aadf3f78a2aaef97ad 100644 --- a/sound/soc/mediatek/mt8183/mt8183-mt6358-ts3a227-max98357.c +++ b/sound/soc/mediatek/mt8183/mt8183-mt6358-ts3a227-max98357.c @@ -720,7 +720,12 @@ mt8183_mt6358_ts3a227_max98357_dev_probe(struct platform_device *pdev) __func__, ret); } - return devm_snd_soc_register_card(&pdev->dev, card); + ret = devm_snd_soc_register_card(&pdev->dev, card); + + of_node_put(platform_node); + of_node_put(ec_codec); + of_node_put(hdmi_codec); + return ret; } #ifdef CONFIG_OF diff --git a/sound/soc/meson/aiu-encoder-i2s.c b/sound/soc/meson/aiu-encoder-i2s.c index 9322245521463712fccb693448f0fa416aa7e12d..67729de41a73ef5981a0bb757a71817cfd41fd17 100644 --- a/sound/soc/meson/aiu-encoder-i2s.c +++ b/sound/soc/meson/aiu-encoder-i2s.c @@ -18,7 +18,6 @@ #define AIU_RST_SOFT_I2S_FAST BIT(0) #define AIU_I2S_DAC_CFG_MSB_FIRST BIT(2) -#define AIU_I2S_MISC_HOLD_EN BIT(2) #define AIU_CLK_CTRL_I2S_DIV_EN BIT(0) #define AIU_CLK_CTRL_I2S_DIV GENMASK(3, 2) #define AIU_CLK_CTRL_AOCLK_INVERT BIT(6) @@ -36,37 +35,6 @@ static void aiu_encoder_i2s_divider_enable(struct snd_soc_component *component, enable ? AIU_CLK_CTRL_I2S_DIV_EN : 0); } -static void aiu_encoder_i2s_hold(struct snd_soc_component *component, - bool enable) -{ - snd_soc_component_update_bits(component, AIU_I2S_MISC, - AIU_I2S_MISC_HOLD_EN, - enable ? AIU_I2S_MISC_HOLD_EN : 0); -} - -static int aiu_encoder_i2s_trigger(struct snd_pcm_substream *substream, int cmd, - struct snd_soc_dai *dai) -{ - struct snd_soc_component *component = dai->component; - - switch (cmd) { - case SNDRV_PCM_TRIGGER_START: - case SNDRV_PCM_TRIGGER_RESUME: - case SNDRV_PCM_TRIGGER_PAUSE_RELEASE: - aiu_encoder_i2s_hold(component, false); - return 0; - - case SNDRV_PCM_TRIGGER_STOP: - case SNDRV_PCM_TRIGGER_SUSPEND: - case SNDRV_PCM_TRIGGER_PAUSE_PUSH: - aiu_encoder_i2s_hold(component, true); - return 0; - - default: - return -EINVAL; - } -} - static int aiu_encoder_i2s_setup_desc(struct snd_soc_component *component, struct snd_pcm_hw_params *params) { @@ -353,7 +321,6 @@ static void aiu_encoder_i2s_shutdown(struct snd_pcm_substream *substream, } const struct snd_soc_dai_ops aiu_encoder_i2s_dai_ops = { - .trigger = aiu_encoder_i2s_trigger, .hw_params = aiu_encoder_i2s_hw_params, .hw_free = aiu_encoder_i2s_hw_free, .set_fmt = aiu_encoder_i2s_set_fmt, diff --git a/sound/soc/meson/aiu-fifo-i2s.c b/sound/soc/meson/aiu-fifo-i2s.c index d91b0d874342fbfa80f8f741f98c7137ecb17de2..2cbd127101d353e55fd908c973a271be454c76af 100644 --- a/sound/soc/meson/aiu-fifo-i2s.c +++ b/sound/soc/meson/aiu-fifo-i2s.c @@ -20,6 +20,8 @@ #define AIU_MEM_I2S_CONTROL_MODE_16BIT BIT(6) #define AIU_MEM_I2S_BUF_CNTL_INIT BIT(0) #define AIU_RST_SOFT_I2S_FAST BIT(0) +#define AIU_I2S_MISC_HOLD_EN BIT(2) +#define AIU_I2S_MISC_FORCE_LEFT_RIGHT BIT(4) #define AIU_FIFO_I2S_BLOCK 256 @@ -90,6 +92,10 @@ static int aiu_fifo_i2s_hw_params(struct snd_pcm_substream *substream, unsigned int val; int ret; + snd_soc_component_update_bits(component, AIU_I2S_MISC, + AIU_I2S_MISC_HOLD_EN, + AIU_I2S_MISC_HOLD_EN); + ret = aiu_fifo_hw_params(substream, params, dai); if (ret) return ret; @@ -117,6 +123,19 @@ static int aiu_fifo_i2s_hw_params(struct snd_pcm_substream *substream, snd_soc_component_update_bits(component, AIU_MEM_I2S_MASKS, AIU_MEM_I2S_MASKS_IRQ_BLOCK, val); + /* + * Most (all?) supported SoCs have this bit set by default. The vendor + * driver however sets it manually (depending on the version either + * while un-setting AIU_I2S_MISC_HOLD_EN or right before that). Follow + * the same approach for consistency with the vendor driver. + */ + snd_soc_component_update_bits(component, AIU_I2S_MISC, + AIU_I2S_MISC_FORCE_LEFT_RIGHT, + AIU_I2S_MISC_FORCE_LEFT_RIGHT); + + snd_soc_component_update_bits(component, AIU_I2S_MISC, + AIU_I2S_MISC_HOLD_EN, 0); + return 0; } diff --git a/sound/soc/meson/aiu-fifo.c b/sound/soc/meson/aiu-fifo.c index aa88aae8e517dc7c2f566bff3f75aee7e25aaa0e..3efc3cad0b4ecbc84e66c602d21fbff1379cfb26 100644 --- a/sound/soc/meson/aiu-fifo.c +++ b/sound/soc/meson/aiu-fifo.c @@ -5,6 +5,7 @@ #include #include +#include #include #include #include @@ -192,6 +193,11 @@ int aiu_fifo_pcm_new(struct snd_soc_pcm_runtime *rtd, struct snd_card *card = rtd->card->snd_card; struct aiu_fifo *fifo = dai->playback_dma_data; size_t size = fifo->pcm->buffer_bytes_max; + int ret; + + ret = dma_coerce_mask_and_coherent(card->dev, DMA_BIT_MASK(32)); + if (ret) + return ret; snd_pcm_lib_preallocate_pages(substream, SNDRV_DMA_TYPE_DEV, diff --git a/sound/soc/qcom/qdsp6/q6asm-dai.c b/sound/soc/qcom/qdsp6/q6asm-dai.c index 9766725c29166e1031bae0ab038d84ea6039f750..84cf190aa01a64ecce93ddca9b418757ed0fd78e 100644 --- a/sound/soc/qcom/qdsp6/q6asm-dai.c +++ b/sound/soc/qcom/qdsp6/q6asm-dai.c @@ -269,9 +269,7 @@ static int q6asm_dai_prepare(struct snd_soc_component *component, if (ret < 0) { dev_err(dev, "%s: q6asm_open_write failed\n", __func__); - q6asm_audio_client_free(prtd->audio_client); - prtd->audio_client = NULL; - return -ENOMEM; + goto open_err; } prtd->session_id = q6asm_get_session_id(prtd->audio_client); @@ -279,7 +277,7 @@ static int q6asm_dai_prepare(struct snd_soc_component *component, prtd->session_id, substream->stream); if (ret) { dev_err(dev, "%s: stream reg failed ret:%d\n", __func__, ret); - return ret; + goto routing_err; } if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) { @@ -301,10 +299,19 @@ static int q6asm_dai_prepare(struct snd_soc_component *component, } if (ret < 0) dev_info(dev, "%s: CMD Format block failed\n", __func__); + else + prtd->state = Q6ASM_STREAM_RUNNING; - prtd->state = Q6ASM_STREAM_RUNNING; + return ret; - return 0; +routing_err: + q6asm_cmd(prtd->audio_client, prtd->stream_id, CMD_CLOSE); +open_err: + q6asm_unmap_memory_regions(substream->stream, prtd->audio_client); + q6asm_audio_client_free(prtd->audio_client); + prtd->audio_client = NULL; + + return ret; } static int q6asm_dai_trigger(struct snd_soc_component *component, diff --git a/sound/soc/qcom/qdsp6/q6routing.c b/sound/soc/qcom/qdsp6/q6routing.c index 0a6b9433f6acfdaf645efe287537e976d32cc0ce..2026fa59029278f1075a1974af8eaca56628a9a4 100644 --- a/sound/soc/qcom/qdsp6/q6routing.c +++ b/sound/soc/qcom/qdsp6/q6routing.c @@ -488,9 +488,15 @@ static int msm_routing_put_audio_mixer(struct snd_kcontrol *kcontrol, struct session_data *session = &data->sessions[session_id]; if (ucontrol->value.integer.value[0]) { + if (session->port_id == be_id) + return 0; + session->port_id = be_id; snd_soc_dapm_mixer_update_power(dapm, kcontrol, 1, update); } else { + if (session->port_id == -1 || session->port_id != be_id) + return 0; + session->port_id = -1; snd_soc_dapm_mixer_update_power(dapm, kcontrol, 0, update); } diff --git a/sound/soc/samsung/idma.c b/sound/soc/samsung/idma.c index 66bcc2f97544bfecf0f905c363a85c37fe3a34a1..c3f1b054e2389c5e233258541d614d759c20a795 100644 --- a/sound/soc/samsung/idma.c +++ b/sound/soc/samsung/idma.c @@ -360,6 +360,8 @@ static int preallocate_idma_buffer(struct snd_pcm *pcm, int stream) buf->addr = idma.lp_tx_addr; buf->bytes = idma_hardware.buffer_bytes_max; buf->area = (unsigned char * __force)ioremap(buf->addr, buf->bytes); + if (!buf->area) + return -ENOMEM; return 0; } diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index 678547b939e55fea38f5bfbf2af938016f058ba6..19f6ff047cddc9fdb47dd2d27dd87bfb4439be6a 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -2454,6 +2454,7 @@ int snd_soc_component_initialize(struct snd_soc_component *component, INIT_LIST_HEAD(&component->dai_list); INIT_LIST_HEAD(&component->dobj_list); INIT_LIST_HEAD(&component->card_list); + INIT_LIST_HEAD(&component->list); mutex_init(&component->io_mutex); component->name = fmt_single_name(dev, &component->id); diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index 148c095df27b1209af72dd712d0d7d7efadacd67..2924d89bf0daf109730e49e5169fc9ccfc8ed264 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -2528,9 +2528,20 @@ static struct snd_soc_dapm_widget *dapm_find_widget( { struct snd_soc_dapm_widget *w; struct snd_soc_dapm_widget *fallback = NULL; + char prefixed_pin[80]; + const char *pin_name; + const char *prefix = soc_dapm_prefix(dapm); + + if (prefix) { + snprintf(prefixed_pin, sizeof(prefixed_pin), "%s %s", + prefix, pin); + pin_name = prefixed_pin; + } else { + pin_name = pin; + } for_each_card_widgets(dapm->card, w) { - if (!strcmp(w->name, pin)) { + if (!strcmp(w->name, pin_name)) { if (w->dapm == dapm) return w; else @@ -2544,10 +2555,16 @@ static struct snd_soc_dapm_widget *dapm_find_widget( return NULL; } -static int snd_soc_dapm_set_pin(struct snd_soc_dapm_context *dapm, - const char *pin, int status) +/* + * set the DAPM pin status: + * returns 1 when the value has been updated, 0 when unchanged, or a negative + * error code; called from kcontrol put callback + */ +static int __snd_soc_dapm_set_pin(struct snd_soc_dapm_context *dapm, + const char *pin, int status) { struct snd_soc_dapm_widget *w = dapm_find_widget(dapm, pin, true); + int ret = 0; dapm_assert_locked(dapm); @@ -2560,13 +2577,26 @@ static int snd_soc_dapm_set_pin(struct snd_soc_dapm_context *dapm, dapm_mark_dirty(w, "pin configuration"); dapm_widget_invalidate_input_paths(w); dapm_widget_invalidate_output_paths(w); + ret = 1; } w->connected = status; if (status == 0) w->force = 0; - return 0; + return ret; +} + +/* + * similar as __snd_soc_dapm_set_pin(), but returns 0 when successful; + * called from several API functions below + */ +static int snd_soc_dapm_set_pin(struct snd_soc_dapm_context *dapm, + const char *pin, int status) +{ + int ret = __snd_soc_dapm_set_pin(dapm, pin, status); + + return ret < 0 ? ret : 0; } /** @@ -3571,14 +3601,15 @@ int snd_soc_dapm_put_pin_switch(struct snd_kcontrol *kcontrol, { struct snd_soc_card *card = snd_kcontrol_chip(kcontrol); const char *pin = (const char *)kcontrol->private_value; + int ret; - if (ucontrol->value.integer.value[0]) - snd_soc_dapm_enable_pin(&card->dapm, pin); - else - snd_soc_dapm_disable_pin(&card->dapm, pin); + mutex_lock_nested(&card->dapm_mutex, SND_SOC_DAPM_CLASS_RUNTIME); + ret = __snd_soc_dapm_set_pin(&card->dapm, pin, + !!ucontrol->value.integer.value[0]); + mutex_unlock(&card->dapm_mutex); snd_soc_dapm_sync(&card->dapm); - return 0; + return ret; } EXPORT_SYMBOL_GPL(snd_soc_dapm_put_pin_switch); @@ -4024,7 +4055,7 @@ static int snd_soc_dapm_dai_link_put(struct snd_kcontrol *kcontrol, rtd->params_select = ucontrol->value.enumerated.item[0]; - return 0; + return 1; } static void diff --git a/sound/soc/soc-ops.c b/sound/soc/soc-ops.c index 10f48827bb0e0fee2a6a7d7565d97114f4f06963..2bc9fa6a34b8faf645ef36dcce95d1a38778671c 100644 --- a/sound/soc/soc-ops.c +++ b/sound/soc/soc-ops.c @@ -308,7 +308,7 @@ int snd_soc_put_volsw(struct snd_kcontrol *kcontrol, unsigned int sign_bit = mc->sign_bit; unsigned int mask = (1 << fls(max)) - 1; unsigned int invert = mc->invert; - int err; + int err, ret; bool type_2r = false; unsigned int val2 = 0; unsigned int val, val_mask; @@ -316,13 +316,27 @@ int snd_soc_put_volsw(struct snd_kcontrol *kcontrol, if (sign_bit) mask = BIT(sign_bit + 1) - 1; - val = ((ucontrol->value.integer.value[0] + min) & mask); + val = ucontrol->value.integer.value[0]; + if (mc->platform_max && ((int)val + min) > mc->platform_max) + return -EINVAL; + if (val > max - min) + return -EINVAL; + if (val < 0) + return -EINVAL; + val = (val + min) & mask; if (invert) val = max - val; val_mask = mask << shift; val = val << shift; if (snd_soc_volsw_is_stereo(mc)) { - val2 = ((ucontrol->value.integer.value[1] + min) & mask); + val2 = ucontrol->value.integer.value[1]; + if (mc->platform_max && ((int)val2 + min) > mc->platform_max) + return -EINVAL; + if (val2 > max - min) + return -EINVAL; + if (val2 < 0) + return -EINVAL; + val2 = (val2 + min) & mask; if (invert) val2 = max - val2; if (reg == reg2) { @@ -336,12 +350,18 @@ int snd_soc_put_volsw(struct snd_kcontrol *kcontrol, err = snd_soc_component_update_bits(component, reg, val_mask, val); if (err < 0) return err; + ret = err; - if (type_2r) + if (type_2r) { err = snd_soc_component_update_bits(component, reg2, val_mask, - val2); + val2); + /* Don't discard any error code or drop change flag */ + if (ret == 0 || err < 0) { + ret = err; + } + } - return err; + return ret; } EXPORT_SYMBOL_GPL(snd_soc_put_volsw); @@ -409,8 +429,15 @@ int snd_soc_put_volsw_sx(struct snd_kcontrol *kcontrol, int err = 0; unsigned int val, val_mask, val2 = 0; + val = ucontrol->value.integer.value[0]; + if (mc->platform_max && val > mc->platform_max) + return -EINVAL; + if (val > max - min) + return -EINVAL; + if (val < 0) + return -EINVAL; val_mask = mask << shift; - val = (ucontrol->value.integer.value[0] + min) & mask; + val = (val + min) & mask; val = val << shift; err = snd_soc_component_update_bits(component, reg, val_mask, val); @@ -483,7 +510,7 @@ int snd_soc_put_volsw_range(struct snd_kcontrol *kcontrol, unsigned int mask = (1 << fls(max)) - 1; unsigned int invert = mc->invert; unsigned int val, val_mask; - int ret; + int err, ret; if (invert) val = (max - ucontrol->value.integer.value[0]) & mask; @@ -492,9 +519,10 @@ int snd_soc_put_volsw_range(struct snd_kcontrol *kcontrol, val_mask = mask << shift; val = val << shift; - ret = snd_soc_component_update_bits(component, reg, val_mask, val); - if (ret < 0) - return ret; + err = snd_soc_component_update_bits(component, reg, val_mask, val); + if (err < 0) + return err; + ret = err; if (snd_soc_volsw_is_stereo(mc)) { if (invert) @@ -504,8 +532,12 @@ int snd_soc_put_volsw_range(struct snd_kcontrol *kcontrol, val_mask = mask << shift; val = val << shift; - ret = snd_soc_component_update_bits(component, rreg, val_mask, + err = snd_soc_component_update_bits(component, rreg, val_mask, val); + /* Don't discard any error code or drop change flag */ + if (ret == 0 || err < 0) { + ret = err; + } } return ret; @@ -859,6 +891,8 @@ int snd_soc_put_xr_sx(struct snd_kcontrol *kcontrol, unsigned int i, regval, regmask; int err; + if (val < mc->min || val > mc->max) + return -EINVAL; if (invert) val = max - val; val &= mask; diff --git a/sound/soc/soc-topology.c b/sound/soc/soc-topology.c index 1030e11017b2746527377daba04b5d737be08371..4d24ac255d2532a64def67a2b04a9011a05ef087 100644 --- a/sound/soc/soc-topology.c +++ b/sound/soc/soc-topology.c @@ -2873,6 +2873,7 @@ EXPORT_SYMBOL_GPL(snd_soc_tplg_widget_remove_all); /* remove dynamic controls from the component driver */ int snd_soc_tplg_component_remove(struct snd_soc_component *comp, u32 index) { + struct snd_card *card = comp->card->snd_card; struct snd_soc_dobj *dobj, *next_dobj; int pass = SOC_TPLG_PASS_END; @@ -2880,6 +2881,7 @@ int snd_soc_tplg_component_remove(struct snd_soc_component *comp, u32 index) while (pass >= SOC_TPLG_PASS_START) { /* remove mixer controls */ + down_write(&card->controls_rwsem); list_for_each_entry_safe(dobj, next_dobj, &comp->dobj_list, list) { @@ -2923,6 +2925,7 @@ int snd_soc_tplg_component_remove(struct snd_soc_component *comp, u32 index) break; } } + up_write(&card->controls_rwsem); pass--; } diff --git a/sound/soc/sof/core.c b/sound/soc/sof/core.c index adc7c37145d6407c173c942cd58f61103f0c3bbb..feced9077dfe19fa6a82cf31b0606456a603328b 100644 --- a/sound/soc/sof/core.c +++ b/sound/soc/sof/core.c @@ -354,7 +354,6 @@ int snd_sof_device_remove(struct device *dev) dev_warn(dev, "error: %d failed to prepare DSP for device removal", ret); - snd_sof_fw_unload(sdev); snd_sof_ipc_free(sdev); snd_sof_free_debug(sdev); snd_sof_free_trace(sdev); @@ -377,8 +376,7 @@ int snd_sof_device_remove(struct device *dev) snd_sof_remove(sdev); /* release firmware */ - release_firmware(pdata->fw); - pdata->fw = NULL; + snd_sof_fw_unload(sdev); return 0; } diff --git a/sound/soc/sof/intel/hda-dai.c b/sound/soc/sof/intel/hda-dai.c index c6cb8c212eca59e84f3ff9e7092b339a8f5a6d8e..ef316311e959a11df198df8dd47e89a9cca4f71a 100644 --- a/sound/soc/sof/intel/hda-dai.c +++ b/sound/soc/sof/intel/hda-dai.c @@ -68,6 +68,7 @@ static struct hdac_ext_stream * return NULL; } + spin_lock_irq(&bus->reg_lock); list_for_each_entry(stream, &bus->stream_list, list) { struct hdac_ext_stream *hstream = stream_to_hdac_ext_stream(stream); @@ -107,12 +108,12 @@ static struct hdac_ext_stream * * is updated in snd_hdac_ext_stream_decouple(). */ if (!res->decoupled) - snd_hdac_ext_stream_decouple(bus, res, true); - spin_lock_irq(&bus->reg_lock); + snd_hdac_ext_stream_decouple_locked(bus, res, true); + res->link_locked = 1; res->link_substream = substream; - spin_unlock_irq(&bus->reg_lock); } + spin_unlock_irq(&bus->reg_lock); return res; } diff --git a/sound/soc/sof/loader.c b/sound/soc/sof/loader.c index ba9ed66f98bc7607741fefa0a00ae3509116d71e..2d5c3fc93bc5cdcdd86b3d60b16fea3c6f0c9e2e 100644 --- a/sound/soc/sof/loader.c +++ b/sound/soc/sof/loader.c @@ -830,5 +830,7 @@ EXPORT_SYMBOL(snd_sof_run_firmware); void snd_sof_fw_unload(struct snd_sof_dev *sdev) { /* TODO: support module unloading at runtime */ + release_firmware(sdev->pdata->fw); + sdev->pdata->fw = NULL; } EXPORT_SYMBOL(snd_sof_fw_unload); diff --git a/sound/soc/sof/topology.c b/sound/soc/sof/topology.c index 69313fbdb636a72052da9b2c3dcec1c372f67b5d..b6327c30c2b5a8ed3e1e9ef16cf70213003acf55 100644 --- a/sound/soc/sof/topology.c +++ b/sound/soc/sof/topology.c @@ -2590,6 +2590,15 @@ static int sof_widget_unload(struct snd_soc_component *scomp, /* power down the pipeline schedule core */ pipeline = swidget->private; + + /* + * Runtime PM should still function normally if topology loading fails and + * it's components are unloaded. Do not power down the primary core so that the + * CTX_SAVE IPC can succeed during runtime suspend. + */ + if (pipeline->core == SOF_DSP_PRIMARY_CORE) + break; + ret = snd_sof_dsp_core_power_down(sdev, 1 << pipeline->core); if (ret < 0) dev_err(scomp->dev, "error: powering down pipeline schedule core %d\n", diff --git a/sound/soc/tegra/tegra186_dspk.c b/sound/soc/tegra/tegra186_dspk.c index 0cbe31e2c7e9c4cc14a2d4ddc4fee6cd4acc648a..373189e5907b98d55883f3c4894d4b0ab418fa66 100644 --- a/sound/soc/tegra/tegra186_dspk.c +++ b/sound/soc/tegra/tegra186_dspk.c @@ -26,51 +26,162 @@ static const struct reg_default tegra186_dspk_reg_defaults[] = { { TEGRA186_DSPK_CODEC_CTRL, 0x03000000 }, }; -static int tegra186_dspk_get_control(struct snd_kcontrol *kcontrol, +static int tegra186_dspk_get_fifo_th(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) { struct snd_soc_component *codec = snd_soc_kcontrol_component(kcontrol); struct tegra186_dspk *dspk = snd_soc_component_get_drvdata(codec); - if (strstr(kcontrol->id.name, "FIFO Threshold")) - ucontrol->value.integer.value[0] = dspk->rx_fifo_th; - else if (strstr(kcontrol->id.name, "OSR Value")) - ucontrol->value.integer.value[0] = dspk->osr_val; - else if (strstr(kcontrol->id.name, "LR Polarity Select")) - ucontrol->value.integer.value[0] = dspk->lrsel; - else if (strstr(kcontrol->id.name, "Channel Select")) - ucontrol->value.integer.value[0] = dspk->ch_sel; - else if (strstr(kcontrol->id.name, "Mono To Stereo")) - ucontrol->value.integer.value[0] = dspk->mono_to_stereo; - else if (strstr(kcontrol->id.name, "Stereo To Mono")) - ucontrol->value.integer.value[0] = dspk->stereo_to_mono; + ucontrol->value.integer.value[0] = dspk->rx_fifo_th; return 0; } -static int tegra186_dspk_put_control(struct snd_kcontrol *kcontrol, +static int tegra186_dspk_put_fifo_th(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) { struct snd_soc_component *codec = snd_soc_kcontrol_component(kcontrol); struct tegra186_dspk *dspk = snd_soc_component_get_drvdata(codec); - int val = ucontrol->value.integer.value[0]; - - if (strstr(kcontrol->id.name, "FIFO Threshold")) - dspk->rx_fifo_th = val; - else if (strstr(kcontrol->id.name, "OSR Value")) - dspk->osr_val = val; - else if (strstr(kcontrol->id.name, "LR Polarity Select")) - dspk->lrsel = val; - else if (strstr(kcontrol->id.name, "Channel Select")) - dspk->ch_sel = val; - else if (strstr(kcontrol->id.name, "Mono To Stereo")) - dspk->mono_to_stereo = val; - else if (strstr(kcontrol->id.name, "Stereo To Mono")) - dspk->stereo_to_mono = val; + int value = ucontrol->value.integer.value[0]; + + if (value == dspk->rx_fifo_th) + return 0; + + dspk->rx_fifo_th = value; + + return 1; +} + +static int tegra186_dspk_get_osr_val(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_component *codec = snd_soc_kcontrol_component(kcontrol); + struct tegra186_dspk *dspk = snd_soc_component_get_drvdata(codec); + + ucontrol->value.enumerated.item[0] = dspk->osr_val; return 0; } +static int tegra186_dspk_put_osr_val(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_component *codec = snd_soc_kcontrol_component(kcontrol); + struct tegra186_dspk *dspk = snd_soc_component_get_drvdata(codec); + unsigned int value = ucontrol->value.enumerated.item[0]; + + if (value == dspk->osr_val) + return 0; + + dspk->osr_val = value; + + return 1; +} + +static int tegra186_dspk_get_pol_sel(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_component *codec = snd_soc_kcontrol_component(kcontrol); + struct tegra186_dspk *dspk = snd_soc_component_get_drvdata(codec); + + ucontrol->value.enumerated.item[0] = dspk->lrsel; + + return 0; +} + +static int tegra186_dspk_put_pol_sel(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_component *codec = snd_soc_kcontrol_component(kcontrol); + struct tegra186_dspk *dspk = snd_soc_component_get_drvdata(codec); + unsigned int value = ucontrol->value.enumerated.item[0]; + + if (value == dspk->lrsel) + return 0; + + dspk->lrsel = value; + + return 1; +} + +static int tegra186_dspk_get_ch_sel(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_component *codec = snd_soc_kcontrol_component(kcontrol); + struct tegra186_dspk *dspk = snd_soc_component_get_drvdata(codec); + + ucontrol->value.enumerated.item[0] = dspk->ch_sel; + + return 0; +} + +static int tegra186_dspk_put_ch_sel(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_component *codec = snd_soc_kcontrol_component(kcontrol); + struct tegra186_dspk *dspk = snd_soc_component_get_drvdata(codec); + unsigned int value = ucontrol->value.enumerated.item[0]; + + if (value == dspk->ch_sel) + return 0; + + dspk->ch_sel = value; + + return 1; +} + +static int tegra186_dspk_get_mono_to_stereo(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_component *codec = snd_soc_kcontrol_component(kcontrol); + struct tegra186_dspk *dspk = snd_soc_component_get_drvdata(codec); + + ucontrol->value.enumerated.item[0] = dspk->mono_to_stereo; + + return 0; +} + +static int tegra186_dspk_put_mono_to_stereo(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_component *codec = snd_soc_kcontrol_component(kcontrol); + struct tegra186_dspk *dspk = snd_soc_component_get_drvdata(codec); + unsigned int value = ucontrol->value.enumerated.item[0]; + + if (value == dspk->mono_to_stereo) + return 0; + + dspk->mono_to_stereo = value; + + return 1; +} + +static int tegra186_dspk_get_stereo_to_mono(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_component *codec = snd_soc_kcontrol_component(kcontrol); + struct tegra186_dspk *dspk = snd_soc_component_get_drvdata(codec); + + ucontrol->value.enumerated.item[0] = dspk->stereo_to_mono; + + return 0; +} + +static int tegra186_dspk_put_stereo_to_mono(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_component *codec = snd_soc_kcontrol_component(kcontrol); + struct tegra186_dspk *dspk = snd_soc_component_get_drvdata(codec); + unsigned int value = ucontrol->value.enumerated.item[0]; + + if (value == dspk->stereo_to_mono) + return 0; + + dspk->stereo_to_mono = value; + + return 1; +} + static int __maybe_unused tegra186_dspk_runtime_suspend(struct device *dev) { struct tegra186_dspk *dspk = dev_get_drvdata(dev); @@ -279,17 +390,19 @@ static const struct soc_enum tegra186_dspk_lrsel_enum = static const struct snd_kcontrol_new tegrat186_dspk_controls[] = { SOC_SINGLE_EXT("FIFO Threshold", SND_SOC_NOPM, 0, TEGRA186_DSPK_RX_FIFO_DEPTH - 1, 0, - tegra186_dspk_get_control, tegra186_dspk_put_control), + tegra186_dspk_get_fifo_th, tegra186_dspk_put_fifo_th), SOC_ENUM_EXT("OSR Value", tegra186_dspk_osr_enum, - tegra186_dspk_get_control, tegra186_dspk_put_control), + tegra186_dspk_get_osr_val, tegra186_dspk_put_osr_val), SOC_ENUM_EXT("LR Polarity Select", tegra186_dspk_lrsel_enum, - tegra186_dspk_get_control, tegra186_dspk_put_control), + tegra186_dspk_get_pol_sel, tegra186_dspk_put_pol_sel), SOC_ENUM_EXT("Channel Select", tegra186_dspk_ch_sel_enum, - tegra186_dspk_get_control, tegra186_dspk_put_control), + tegra186_dspk_get_ch_sel, tegra186_dspk_put_ch_sel), SOC_ENUM_EXT("Mono To Stereo", tegra186_dspk_mono_conv_enum, - tegra186_dspk_get_control, tegra186_dspk_put_control), + tegra186_dspk_get_mono_to_stereo, + tegra186_dspk_put_mono_to_stereo), SOC_ENUM_EXT("Stereo To Mono", tegra186_dspk_stereo_conv_enum, - tegra186_dspk_get_control, tegra186_dspk_put_control), + tegra186_dspk_get_stereo_to_mono, + tegra186_dspk_put_stereo_to_mono), }; static const struct snd_soc_component_driver tegra186_dspk_cmpnt = { diff --git a/sound/soc/tegra/tegra210_admaif.c b/sound/soc/tegra/tegra210_admaif.c index 1268046b345d9f0d9c685754a48052781c86898d..d610cbe4a7c4a79bf78d323895479790f9f317c4 100644 --- a/sound/soc/tegra/tegra210_admaif.c +++ b/sound/soc/tegra/tegra210_admaif.c @@ -424,46 +424,122 @@ static const struct snd_soc_dai_ops tegra_admaif_dai_ops = { .trigger = tegra_admaif_trigger, }; -static int tegra_admaif_get_control(struct snd_kcontrol *kcontrol, - struct snd_ctl_elem_value *ucontrol) +static int tegra210_admaif_pget_mono_to_stereo(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) { struct snd_soc_component *cmpnt = snd_soc_kcontrol_component(kcontrol); + struct tegra_admaif *admaif = snd_soc_component_get_drvdata(cmpnt); + struct soc_enum *ec = (struct soc_enum *)kcontrol->private_value; + + ucontrol->value.enumerated.item[0] = + admaif->mono_to_stereo[ADMAIF_TX_PATH][ec->reg]; + + return 0; +} + +static int tegra210_admaif_pput_mono_to_stereo(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_component *cmpnt = snd_soc_kcontrol_component(kcontrol); + struct tegra_admaif *admaif = snd_soc_component_get_drvdata(cmpnt); + struct soc_enum *ec = (struct soc_enum *)kcontrol->private_value; + unsigned int value = ucontrol->value.enumerated.item[0]; + + if (value == admaif->mono_to_stereo[ADMAIF_TX_PATH][ec->reg]) + return 0; + + admaif->mono_to_stereo[ADMAIF_TX_PATH][ec->reg] = value; + + return 1; +} + +static int tegra210_admaif_cget_mono_to_stereo(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_component *cmpnt = snd_soc_kcontrol_component(kcontrol); + struct tegra_admaif *admaif = snd_soc_component_get_drvdata(cmpnt); + struct soc_enum *ec = (struct soc_enum *)kcontrol->private_value; + + ucontrol->value.enumerated.item[0] = + admaif->mono_to_stereo[ADMAIF_RX_PATH][ec->reg]; + + return 0; +} + +static int tegra210_admaif_cput_mono_to_stereo(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_component *cmpnt = snd_soc_kcontrol_component(kcontrol); + struct tegra_admaif *admaif = snd_soc_component_get_drvdata(cmpnt); struct soc_enum *ec = (struct soc_enum *)kcontrol->private_value; + unsigned int value = ucontrol->value.enumerated.item[0]; + + if (value == admaif->mono_to_stereo[ADMAIF_RX_PATH][ec->reg]) + return 0; + + admaif->mono_to_stereo[ADMAIF_RX_PATH][ec->reg] = value; + + return 1; +} + +static int tegra210_admaif_pget_stereo_to_mono(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_component *cmpnt = snd_soc_kcontrol_component(kcontrol); struct tegra_admaif *admaif = snd_soc_component_get_drvdata(cmpnt); - long *uctl_val = &ucontrol->value.integer.value[0]; + struct soc_enum *ec = (struct soc_enum *)kcontrol->private_value; - if (strstr(kcontrol->id.name, "Playback Mono To Stereo")) - *uctl_val = admaif->mono_to_stereo[ADMAIF_TX_PATH][ec->reg]; - else if (strstr(kcontrol->id.name, "Capture Mono To Stereo")) - *uctl_val = admaif->mono_to_stereo[ADMAIF_RX_PATH][ec->reg]; - else if (strstr(kcontrol->id.name, "Playback Stereo To Mono")) - *uctl_val = admaif->stereo_to_mono[ADMAIF_TX_PATH][ec->reg]; - else if (strstr(kcontrol->id.name, "Capture Stereo To Mono")) - *uctl_val = admaif->stereo_to_mono[ADMAIF_RX_PATH][ec->reg]; + ucontrol->value.enumerated.item[0] = + admaif->stereo_to_mono[ADMAIF_TX_PATH][ec->reg]; return 0; } -static int tegra_admaif_put_control(struct snd_kcontrol *kcontrol, - struct snd_ctl_elem_value *ucontrol) +static int tegra210_admaif_pput_stereo_to_mono(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) { struct snd_soc_component *cmpnt = snd_soc_kcontrol_component(kcontrol); + struct tegra_admaif *admaif = snd_soc_component_get_drvdata(cmpnt); struct soc_enum *ec = (struct soc_enum *)kcontrol->private_value; + unsigned int value = ucontrol->value.enumerated.item[0]; + + if (value == admaif->stereo_to_mono[ADMAIF_TX_PATH][ec->reg]) + return 0; + + admaif->stereo_to_mono[ADMAIF_TX_PATH][ec->reg] = value; + + return 1; +} + +static int tegra210_admaif_cget_stereo_to_mono(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_component *cmpnt = snd_soc_kcontrol_component(kcontrol); struct tegra_admaif *admaif = snd_soc_component_get_drvdata(cmpnt); - int value = ucontrol->value.integer.value[0]; + struct soc_enum *ec = (struct soc_enum *)kcontrol->private_value; - if (strstr(kcontrol->id.name, "Playback Mono To Stereo")) - admaif->mono_to_stereo[ADMAIF_TX_PATH][ec->reg] = value; - else if (strstr(kcontrol->id.name, "Capture Mono To Stereo")) - admaif->mono_to_stereo[ADMAIF_RX_PATH][ec->reg] = value; - else if (strstr(kcontrol->id.name, "Playback Stereo To Mono")) - admaif->stereo_to_mono[ADMAIF_TX_PATH][ec->reg] = value; - else if (strstr(kcontrol->id.name, "Capture Stereo To Mono")) - admaif->stereo_to_mono[ADMAIF_RX_PATH][ec->reg] = value; + ucontrol->value.enumerated.item[0] = + admaif->stereo_to_mono[ADMAIF_RX_PATH][ec->reg]; return 0; } +static int tegra210_admaif_cput_stereo_to_mono(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_component *cmpnt = snd_soc_kcontrol_component(kcontrol); + struct tegra_admaif *admaif = snd_soc_component_get_drvdata(cmpnt); + struct soc_enum *ec = (struct soc_enum *)kcontrol->private_value; + unsigned int value = ucontrol->value.enumerated.item[0]; + + if (value == admaif->stereo_to_mono[ADMAIF_RX_PATH][ec->reg]) + return 0; + + admaif->stereo_to_mono[ADMAIF_RX_PATH][ec->reg] = value; + + return 1; +} + static int tegra_admaif_dai_probe(struct snd_soc_dai *dai) { struct tegra_admaif *admaif = snd_soc_dai_get_drvdata(dai); @@ -559,17 +635,21 @@ static const char * const tegra_admaif_mono_conv_text[] = { } #define TEGRA_ADMAIF_CIF_CTRL(reg) \ - NV_SOC_ENUM_EXT("ADMAIF" #reg " Playback Mono To Stereo", reg - 1,\ - tegra_admaif_get_control, tegra_admaif_put_control, \ + NV_SOC_ENUM_EXT("ADMAIF" #reg " Playback Mono To Stereo", reg - 1, \ + tegra210_admaif_pget_mono_to_stereo, \ + tegra210_admaif_pput_mono_to_stereo, \ tegra_admaif_mono_conv_text), \ - NV_SOC_ENUM_EXT("ADMAIF" #reg " Playback Stereo To Mono", reg - 1,\ - tegra_admaif_get_control, tegra_admaif_put_control, \ + NV_SOC_ENUM_EXT("ADMAIF" #reg " Playback Stereo To Mono", reg - 1, \ + tegra210_admaif_pget_stereo_to_mono, \ + tegra210_admaif_pput_stereo_to_mono, \ tegra_admaif_stereo_conv_text), \ - NV_SOC_ENUM_EXT("ADMAIF" #reg " Capture Mono To Stereo", reg - 1, \ - tegra_admaif_get_control, tegra_admaif_put_control, \ + NV_SOC_ENUM_EXT("ADMAIF" #reg " Capture Mono To Stereo", reg - 1, \ + tegra210_admaif_cget_mono_to_stereo, \ + tegra210_admaif_cput_mono_to_stereo, \ tegra_admaif_mono_conv_text), \ - NV_SOC_ENUM_EXT("ADMAIF" #reg " Capture Stereo To Mono", reg - 1, \ - tegra_admaif_get_control, tegra_admaif_put_control, \ + NV_SOC_ENUM_EXT("ADMAIF" #reg " Capture Stereo To Mono", reg - 1, \ + tegra210_admaif_cget_stereo_to_mono, \ + tegra210_admaif_cput_stereo_to_mono, \ tegra_admaif_stereo_conv_text) static struct snd_kcontrol_new tegra210_admaif_controls[] = { diff --git a/sound/soc/tegra/tegra210_ahub.c b/sound/soc/tegra/tegra210_ahub.c index 66287a7c9865dc6971c778f90a6f2e438b83d660..1b2f7cb8c6adc2b2168906253c111f6faa74def2 100644 --- a/sound/soc/tegra/tegra210_ahub.c +++ b/sound/soc/tegra/tegra210_ahub.c @@ -62,6 +62,7 @@ static int tegra_ahub_put_value_enum(struct snd_kcontrol *kctl, unsigned int *item = uctl->value.enumerated.item; unsigned int value = e->values[item[0]]; unsigned int i, bit_pos, reg_idx = 0, reg_val = 0; + int change = 0; if (item[0] >= e->items) return -EINVAL; @@ -86,12 +87,14 @@ static int tegra_ahub_put_value_enum(struct snd_kcontrol *kctl, /* Update widget power if state has changed */ if (snd_soc_component_test_bits(cmpnt, update[i].reg, - update[i].mask, update[i].val)) - snd_soc_dapm_mux_update_power(dapm, kctl, item[0], e, - &update[i]); + update[i].mask, + update[i].val)) + change |= snd_soc_dapm_mux_update_power(dapm, kctl, + item[0], e, + &update[i]); } - return 0; + return change; } static struct snd_soc_dai_driver tegra210_ahub_dais[] = { diff --git a/sound/soc/tegra/tegra210_dmic.c b/sound/soc/tegra/tegra210_dmic.c index a661f40bc41c77121af07557beeabc51d5e61ce1..dd3481ae3372f8799a8c3f5a80c4f3154147ee4c 100644 --- a/sound/soc/tegra/tegra210_dmic.c +++ b/sound/soc/tegra/tegra210_dmic.c @@ -156,51 +156,162 @@ static int tegra210_dmic_hw_params(struct snd_pcm_substream *substream, return 0; } -static int tegra210_dmic_get_control(struct snd_kcontrol *kcontrol, +static int tegra210_dmic_get_boost_gain(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_component *comp = snd_soc_kcontrol_component(kcontrol); + struct tegra210_dmic *dmic = snd_soc_component_get_drvdata(comp); + + ucontrol->value.integer.value[0] = dmic->boost_gain; + + return 0; +} + +static int tegra210_dmic_put_boost_gain(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_component *comp = snd_soc_kcontrol_component(kcontrol); + struct tegra210_dmic *dmic = snd_soc_component_get_drvdata(comp); + int value = ucontrol->value.integer.value[0]; + + if (value == dmic->boost_gain) + return 0; + + dmic->boost_gain = value; + + return 1; +} + +static int tegra210_dmic_get_ch_select(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_component *comp = snd_soc_kcontrol_component(kcontrol); + struct tegra210_dmic *dmic = snd_soc_component_get_drvdata(comp); + + ucontrol->value.enumerated.item[0] = dmic->ch_select; + + return 0; +} + +static int tegra210_dmic_put_ch_select(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_component *comp = snd_soc_kcontrol_component(kcontrol); + struct tegra210_dmic *dmic = snd_soc_component_get_drvdata(comp); + unsigned int value = ucontrol->value.enumerated.item[0]; + + if (value == dmic->ch_select) + return 0; + + dmic->ch_select = value; + + return 1; +} + +static int tegra210_dmic_get_mono_to_stereo(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_component *comp = snd_soc_kcontrol_component(kcontrol); + struct tegra210_dmic *dmic = snd_soc_component_get_drvdata(comp); + + ucontrol->value.enumerated.item[0] = dmic->mono_to_stereo; + + return 0; +} + +static int tegra210_dmic_put_mono_to_stereo(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_component *comp = snd_soc_kcontrol_component(kcontrol); + struct tegra210_dmic *dmic = snd_soc_component_get_drvdata(comp); + unsigned int value = ucontrol->value.enumerated.item[0]; + + if (value == dmic->mono_to_stereo) + return 0; + + dmic->mono_to_stereo = value; + + return 1; +} + +static int tegra210_dmic_get_stereo_to_mono(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_component *comp = snd_soc_kcontrol_component(kcontrol); + struct tegra210_dmic *dmic = snd_soc_component_get_drvdata(comp); + + ucontrol->value.enumerated.item[0] = dmic->stereo_to_mono; + + return 0; +} + +static int tegra210_dmic_put_stereo_to_mono(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_component *comp = snd_soc_kcontrol_component(kcontrol); + struct tegra210_dmic *dmic = snd_soc_component_get_drvdata(comp); + unsigned int value = ucontrol->value.enumerated.item[0]; + + if (value == dmic->stereo_to_mono) + return 0; + + dmic->stereo_to_mono = value; + + return 1; +} + +static int tegra210_dmic_get_osr_val(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) { struct snd_soc_component *comp = snd_soc_kcontrol_component(kcontrol); struct tegra210_dmic *dmic = snd_soc_component_get_drvdata(comp); - if (strstr(kcontrol->id.name, "Boost Gain Volume")) - ucontrol->value.integer.value[0] = dmic->boost_gain; - else if (strstr(kcontrol->id.name, "Channel Select")) - ucontrol->value.integer.value[0] = dmic->ch_select; - else if (strstr(kcontrol->id.name, "Mono To Stereo")) - ucontrol->value.integer.value[0] = dmic->mono_to_stereo; - else if (strstr(kcontrol->id.name, "Stereo To Mono")) - ucontrol->value.integer.value[0] = dmic->stereo_to_mono; - else if (strstr(kcontrol->id.name, "OSR Value")) - ucontrol->value.integer.value[0] = dmic->osr_val; - else if (strstr(kcontrol->id.name, "LR Polarity Select")) - ucontrol->value.integer.value[0] = dmic->lrsel; + ucontrol->value.enumerated.item[0] = dmic->osr_val; return 0; } -static int tegra210_dmic_put_control(struct snd_kcontrol *kcontrol, +static int tegra210_dmic_put_osr_val(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_value *ucontrol) { struct snd_soc_component *comp = snd_soc_kcontrol_component(kcontrol); struct tegra210_dmic *dmic = snd_soc_component_get_drvdata(comp); - int value = ucontrol->value.integer.value[0]; + unsigned int value = ucontrol->value.enumerated.item[0]; - if (strstr(kcontrol->id.name, "Boost Gain Volume")) - dmic->boost_gain = value; - else if (strstr(kcontrol->id.name, "Channel Select")) - dmic->ch_select = ucontrol->value.integer.value[0]; - else if (strstr(kcontrol->id.name, "Mono To Stereo")) - dmic->mono_to_stereo = value; - else if (strstr(kcontrol->id.name, "Stereo To Mono")) - dmic->stereo_to_mono = value; - else if (strstr(kcontrol->id.name, "OSR Value")) - dmic->osr_val = value; - else if (strstr(kcontrol->id.name, "LR Polarity Select")) - dmic->lrsel = value; + if (value == dmic->osr_val) + return 0; + + dmic->osr_val = value; + + return 1; +} + +static int tegra210_dmic_get_pol_sel(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_component *comp = snd_soc_kcontrol_component(kcontrol); + struct tegra210_dmic *dmic = snd_soc_component_get_drvdata(comp); + + ucontrol->value.enumerated.item[0] = dmic->lrsel; return 0; } +static int tegra210_dmic_put_pol_sel(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_component *comp = snd_soc_kcontrol_component(kcontrol); + struct tegra210_dmic *dmic = snd_soc_component_get_drvdata(comp); + unsigned int value = ucontrol->value.enumerated.item[0]; + + if (value == dmic->lrsel) + return 0; + + dmic->lrsel = value; + + return 1; +} + static const struct snd_soc_dai_ops tegra210_dmic_dai_ops = { .hw_params = tegra210_dmic_hw_params, }; @@ -287,19 +398,22 @@ static const struct soc_enum tegra210_dmic_lrsel_enum = static const struct snd_kcontrol_new tegra210_dmic_controls[] = { SOC_SINGLE_EXT("Boost Gain Volume", 0, 0, MAX_BOOST_GAIN, 0, - tegra210_dmic_get_control, tegra210_dmic_put_control), + tegra210_dmic_get_boost_gain, + tegra210_dmic_put_boost_gain), SOC_ENUM_EXT("Channel Select", tegra210_dmic_ch_enum, - tegra210_dmic_get_control, tegra210_dmic_put_control), + tegra210_dmic_get_ch_select, tegra210_dmic_put_ch_select), SOC_ENUM_EXT("Mono To Stereo", - tegra210_dmic_mono_conv_enum, tegra210_dmic_get_control, - tegra210_dmic_put_control), + tegra210_dmic_mono_conv_enum, + tegra210_dmic_get_mono_to_stereo, + tegra210_dmic_put_mono_to_stereo), SOC_ENUM_EXT("Stereo To Mono", - tegra210_dmic_stereo_conv_enum, tegra210_dmic_get_control, - tegra210_dmic_put_control), + tegra210_dmic_stereo_conv_enum, + tegra210_dmic_get_stereo_to_mono, + tegra210_dmic_put_stereo_to_mono), SOC_ENUM_EXT("OSR Value", tegra210_dmic_osr_enum, - tegra210_dmic_get_control, tegra210_dmic_put_control), + tegra210_dmic_get_osr_val, tegra210_dmic_put_osr_val), SOC_ENUM_EXT("LR Polarity Select", tegra210_dmic_lrsel_enum, - tegra210_dmic_get_control, tegra210_dmic_put_control), + tegra210_dmic_get_pol_sel, tegra210_dmic_put_pol_sel), }; static const struct snd_soc_component_driver tegra210_dmic_compnt = { diff --git a/sound/soc/tegra/tegra210_i2s.c b/sound/soc/tegra/tegra210_i2s.c index a383bd5c51cd42e3b7e98b5ad8aa1ef9bdbb22ce..33fb37ad1391d223a709ca7ed4d8ce98191023eb 100644 --- a/sound/soc/tegra/tegra210_i2s.c +++ b/sound/soc/tegra/tegra210_i2s.c @@ -302,85 +302,235 @@ static int tegra210_i2s_set_tdm_slot(struct snd_soc_dai *dai, return 0; } -static int tegra210_i2s_set_dai_bclk_ratio(struct snd_soc_dai *dai, - unsigned int ratio) +static int tegra210_i2s_get_loopback(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) { - struct tegra210_i2s *i2s = snd_soc_dai_get_drvdata(dai); + struct snd_soc_component *compnt = snd_soc_kcontrol_component(kcontrol); + struct tegra210_i2s *i2s = snd_soc_component_get_drvdata(compnt); - i2s->bclk_ratio = ratio; + ucontrol->value.integer.value[0] = i2s->loopback; return 0; } -static int tegra210_i2s_get_control(struct snd_kcontrol *kcontrol, - struct snd_ctl_elem_value *ucontrol) +static int tegra210_i2s_put_loopback(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_component *compnt = snd_soc_kcontrol_component(kcontrol); + struct tegra210_i2s *i2s = snd_soc_component_get_drvdata(compnt); + int value = ucontrol->value.integer.value[0]; + + if (value == i2s->loopback) + return 0; + + i2s->loopback = value; + + regmap_update_bits(i2s->regmap, TEGRA210_I2S_CTRL, I2S_CTRL_LPBK_MASK, + i2s->loopback << I2S_CTRL_LPBK_SHIFT); + + return 1; +} + +static int tegra210_i2s_get_fsync_width(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) { struct snd_soc_component *compnt = snd_soc_kcontrol_component(kcontrol); struct tegra210_i2s *i2s = snd_soc_component_get_drvdata(compnt); - long *uctl_val = &ucontrol->value.integer.value[0]; - - if (strstr(kcontrol->id.name, "Loopback")) - *uctl_val = i2s->loopback; - else if (strstr(kcontrol->id.name, "FSYNC Width")) - *uctl_val = i2s->fsync_width; - else if (strstr(kcontrol->id.name, "Capture Stereo To Mono")) - *uctl_val = i2s->stereo_to_mono[I2S_TX_PATH]; - else if (strstr(kcontrol->id.name, "Capture Mono To Stereo")) - *uctl_val = i2s->mono_to_stereo[I2S_TX_PATH]; - else if (strstr(kcontrol->id.name, "Playback Stereo To Mono")) - *uctl_val = i2s->stereo_to_mono[I2S_RX_PATH]; - else if (strstr(kcontrol->id.name, "Playback Mono To Stereo")) - *uctl_val = i2s->mono_to_stereo[I2S_RX_PATH]; - else if (strstr(kcontrol->id.name, "Playback FIFO Threshold")) - *uctl_val = i2s->rx_fifo_th; - else if (strstr(kcontrol->id.name, "BCLK Ratio")) - *uctl_val = i2s->bclk_ratio; + + ucontrol->value.integer.value[0] = i2s->fsync_width; return 0; } -static int tegra210_i2s_put_control(struct snd_kcontrol *kcontrol, - struct snd_ctl_elem_value *ucontrol) +static int tegra210_i2s_put_fsync_width(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) { struct snd_soc_component *compnt = snd_soc_kcontrol_component(kcontrol); struct tegra210_i2s *i2s = snd_soc_component_get_drvdata(compnt); int value = ucontrol->value.integer.value[0]; - if (strstr(kcontrol->id.name, "Loopback")) { - i2s->loopback = value; + if (value == i2s->fsync_width) + return 0; - regmap_update_bits(i2s->regmap, TEGRA210_I2S_CTRL, - I2S_CTRL_LPBK_MASK, - i2s->loopback << I2S_CTRL_LPBK_SHIFT); + i2s->fsync_width = value; - } else if (strstr(kcontrol->id.name, "FSYNC Width")) { - /* - * Frame sync width is used only for FSYNC modes and not - * applicable for LRCK modes. Reset value for this field is "0", - * which means the width is one bit clock wide. - * The width requirement may depend on the codec and in such - * cases mixer control is used to update custom values. A value - * of "N" here means, width is "N + 1" bit clock wide. - */ - i2s->fsync_width = value; - - regmap_update_bits(i2s->regmap, TEGRA210_I2S_CTRL, - I2S_CTRL_FSYNC_WIDTH_MASK, - i2s->fsync_width << I2S_FSYNC_WIDTH_SHIFT); - - } else if (strstr(kcontrol->id.name, "Capture Stereo To Mono")) { - i2s->stereo_to_mono[I2S_TX_PATH] = value; - } else if (strstr(kcontrol->id.name, "Capture Mono To Stereo")) { - i2s->mono_to_stereo[I2S_TX_PATH] = value; - } else if (strstr(kcontrol->id.name, "Playback Stereo To Mono")) { - i2s->stereo_to_mono[I2S_RX_PATH] = value; - } else if (strstr(kcontrol->id.name, "Playback Mono To Stereo")) { - i2s->mono_to_stereo[I2S_RX_PATH] = value; - } else if (strstr(kcontrol->id.name, "Playback FIFO Threshold")) { - i2s->rx_fifo_th = value; - } else if (strstr(kcontrol->id.name, "BCLK Ratio")) { - i2s->bclk_ratio = value; - } + /* + * Frame sync width is used only for FSYNC modes and not + * applicable for LRCK modes. Reset value for this field is "0", + * which means the width is one bit clock wide. + * The width requirement may depend on the codec and in such + * cases mixer control is used to update custom values. A value + * of "N" here means, width is "N + 1" bit clock wide. + */ + regmap_update_bits(i2s->regmap, TEGRA210_I2S_CTRL, + I2S_CTRL_FSYNC_WIDTH_MASK, + i2s->fsync_width << I2S_FSYNC_WIDTH_SHIFT); + + return 1; +} + +static int tegra210_i2s_cget_stereo_to_mono(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_component *compnt = snd_soc_kcontrol_component(kcontrol); + struct tegra210_i2s *i2s = snd_soc_component_get_drvdata(compnt); + + ucontrol->value.enumerated.item[0] = i2s->stereo_to_mono[I2S_TX_PATH]; + + return 0; +} + +static int tegra210_i2s_cput_stereo_to_mono(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_component *compnt = snd_soc_kcontrol_component(kcontrol); + struct tegra210_i2s *i2s = snd_soc_component_get_drvdata(compnt); + unsigned int value = ucontrol->value.enumerated.item[0]; + + if (value == i2s->stereo_to_mono[I2S_TX_PATH]) + return 0; + + i2s->stereo_to_mono[I2S_TX_PATH] = value; + + return 1; +} + +static int tegra210_i2s_cget_mono_to_stereo(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_component *compnt = snd_soc_kcontrol_component(kcontrol); + struct tegra210_i2s *i2s = snd_soc_component_get_drvdata(compnt); + + ucontrol->value.enumerated.item[0] = i2s->mono_to_stereo[I2S_TX_PATH]; + + return 0; +} + +static int tegra210_i2s_cput_mono_to_stereo(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_component *compnt = snd_soc_kcontrol_component(kcontrol); + struct tegra210_i2s *i2s = snd_soc_component_get_drvdata(compnt); + unsigned int value = ucontrol->value.enumerated.item[0]; + + if (value == i2s->mono_to_stereo[I2S_TX_PATH]) + return 0; + + i2s->mono_to_stereo[I2S_TX_PATH] = value; + + return 1; +} + +static int tegra210_i2s_pget_stereo_to_mono(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_component *compnt = snd_soc_kcontrol_component(kcontrol); + struct tegra210_i2s *i2s = snd_soc_component_get_drvdata(compnt); + + ucontrol->value.enumerated.item[0] = i2s->stereo_to_mono[I2S_RX_PATH]; + + return 0; +} + +static int tegra210_i2s_pput_stereo_to_mono(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_component *compnt = snd_soc_kcontrol_component(kcontrol); + struct tegra210_i2s *i2s = snd_soc_component_get_drvdata(compnt); + unsigned int value = ucontrol->value.enumerated.item[0]; + + if (value == i2s->stereo_to_mono[I2S_RX_PATH]) + return 0; + + i2s->stereo_to_mono[I2S_RX_PATH] = value; + + return 1; +} + +static int tegra210_i2s_pget_mono_to_stereo(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_component *compnt = snd_soc_kcontrol_component(kcontrol); + struct tegra210_i2s *i2s = snd_soc_component_get_drvdata(compnt); + + ucontrol->value.enumerated.item[0] = i2s->mono_to_stereo[I2S_RX_PATH]; + + return 0; +} + +static int tegra210_i2s_pput_mono_to_stereo(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_component *compnt = snd_soc_kcontrol_component(kcontrol); + struct tegra210_i2s *i2s = snd_soc_component_get_drvdata(compnt); + unsigned int value = ucontrol->value.enumerated.item[0]; + + if (value == i2s->mono_to_stereo[I2S_RX_PATH]) + return 0; + + i2s->mono_to_stereo[I2S_RX_PATH] = value; + + return 1; +} + +static int tegra210_i2s_pget_fifo_th(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_component *compnt = snd_soc_kcontrol_component(kcontrol); + struct tegra210_i2s *i2s = snd_soc_component_get_drvdata(compnt); + + ucontrol->value.integer.value[0] = i2s->rx_fifo_th; + + return 0; +} + +static int tegra210_i2s_pput_fifo_th(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_component *compnt = snd_soc_kcontrol_component(kcontrol); + struct tegra210_i2s *i2s = snd_soc_component_get_drvdata(compnt); + int value = ucontrol->value.integer.value[0]; + + if (value == i2s->rx_fifo_th) + return 0; + + i2s->rx_fifo_th = value; + + return 1; +} + +static int tegra210_i2s_get_bclk_ratio(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_component *compnt = snd_soc_kcontrol_component(kcontrol); + struct tegra210_i2s *i2s = snd_soc_component_get_drvdata(compnt); + + ucontrol->value.integer.value[0] = i2s->bclk_ratio; + + return 0; +} + +static int tegra210_i2s_put_bclk_ratio(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct snd_soc_component *compnt = snd_soc_kcontrol_component(kcontrol); + struct tegra210_i2s *i2s = snd_soc_component_get_drvdata(compnt); + int value = ucontrol->value.integer.value[0]; + + if (value == i2s->bclk_ratio) + return 0; + + i2s->bclk_ratio = value; + + return 1; +} + +static int tegra210_i2s_set_dai_bclk_ratio(struct snd_soc_dai *dai, + unsigned int ratio) +{ + struct tegra210_i2s *i2s = snd_soc_dai_get_drvdata(dai); + + i2s->bclk_ratio = ratio; return 0; } @@ -598,22 +748,28 @@ static const struct soc_enum tegra210_i2s_stereo_conv_enum = tegra210_i2s_stereo_conv_text); static const struct snd_kcontrol_new tegra210_i2s_controls[] = { - SOC_SINGLE_EXT("Loopback", 0, 0, 1, 0, tegra210_i2s_get_control, - tegra210_i2s_put_control), - SOC_SINGLE_EXT("FSYNC Width", 0, 0, 255, 0, tegra210_i2s_get_control, - tegra210_i2s_put_control), + SOC_SINGLE_EXT("Loopback", 0, 0, 1, 0, tegra210_i2s_get_loopback, + tegra210_i2s_put_loopback), + SOC_SINGLE_EXT("FSYNC Width", 0, 0, 255, 0, + tegra210_i2s_get_fsync_width, + tegra210_i2s_put_fsync_width), SOC_ENUM_EXT("Capture Stereo To Mono", tegra210_i2s_stereo_conv_enum, - tegra210_i2s_get_control, tegra210_i2s_put_control), + tegra210_i2s_cget_stereo_to_mono, + tegra210_i2s_cput_stereo_to_mono), SOC_ENUM_EXT("Capture Mono To Stereo", tegra210_i2s_mono_conv_enum, - tegra210_i2s_get_control, tegra210_i2s_put_control), + tegra210_i2s_cget_mono_to_stereo, + tegra210_i2s_cput_mono_to_stereo), SOC_ENUM_EXT("Playback Stereo To Mono", tegra210_i2s_stereo_conv_enum, - tegra210_i2s_get_control, tegra210_i2s_put_control), + tegra210_i2s_pget_mono_to_stereo, + tegra210_i2s_pput_mono_to_stereo), SOC_ENUM_EXT("Playback Mono To Stereo", tegra210_i2s_mono_conv_enum, - tegra210_i2s_get_control, tegra210_i2s_put_control), + tegra210_i2s_pget_stereo_to_mono, + tegra210_i2s_pput_stereo_to_mono), SOC_SINGLE_EXT("Playback FIFO Threshold", 0, 0, I2S_RX_FIFO_DEPTH - 1, - 0, tegra210_i2s_get_control, tegra210_i2s_put_control), - SOC_SINGLE_EXT("BCLK Ratio", 0, 0, INT_MAX, 0, tegra210_i2s_get_control, - tegra210_i2s_put_control), + 0, tegra210_i2s_pget_fifo_th, tegra210_i2s_pput_fifo_th), + SOC_SINGLE_EXT("BCLK Ratio", 0, 0, INT_MAX, 0, + tegra210_i2s_get_bclk_ratio, + tegra210_i2s_put_bclk_ratio), }; static const struct snd_soc_dapm_widget tegra210_i2s_widgets[] = { diff --git a/sound/soc/uniphier/Kconfig b/sound/soc/uniphier/Kconfig index aa3592ee1358b06466dd9a05017142681b9669cc..ddfa6424c656bde0df43e24f80d7739c6fb6c94e 100644 --- a/sound/soc/uniphier/Kconfig +++ b/sound/soc/uniphier/Kconfig @@ -23,7 +23,6 @@ config SND_SOC_UNIPHIER_LD11 tristate "UniPhier LD11/LD20 Device Driver" depends on SND_SOC_UNIPHIER select SND_SOC_UNIPHIER_AIO - select SND_SOC_UNIPHIER_AIO_DMA help This adds ASoC driver for Socionext UniPhier LD11/LD20 input and output that can be used with other codecs. @@ -34,7 +33,6 @@ config SND_SOC_UNIPHIER_PXS2 tristate "UniPhier PXs2 Device Driver" depends on SND_SOC_UNIPHIER select SND_SOC_UNIPHIER_AIO - select SND_SOC_UNIPHIER_AIO_DMA help This adds ASoC driver for Socionext UniPhier PXs2 input and output that can be used with other codecs. diff --git a/sound/soc/xilinx/xlnx_formatter_pcm.c b/sound/soc/xilinx/xlnx_formatter_pcm.c index 91afea9d5de6787906ac5e817ba3406488fa3f58..ce19a6058b27964dfcd45336770b2e7cdd417cec 100644 --- a/sound/soc/xilinx/xlnx_formatter_pcm.c +++ b/sound/soc/xilinx/xlnx_formatter_pcm.c @@ -37,6 +37,7 @@ #define XLNX_AUD_XFER_COUNT 0x28 #define XLNX_AUD_CH_STS_START 0x2C #define XLNX_BYTES_PER_CH 0x44 +#define XLNX_AUD_ALIGN_BYTES 64 #define AUD_STS_IOC_IRQ_MASK BIT(31) #define AUD_STS_CH_STS_MASK BIT(29) @@ -368,12 +369,32 @@ static int xlnx_formatter_pcm_open(struct snd_soc_component *component, snd_soc_set_runtime_hwparams(substream, &xlnx_pcm_hardware); runtime->private_data = stream_data; - /* Resize the period size divisible by 64 */ + /* Resize the period bytes as divisible by 64 */ err = snd_pcm_hw_constraint_step(runtime, 0, - SNDRV_PCM_HW_PARAM_PERIOD_BYTES, 64); + SNDRV_PCM_HW_PARAM_PERIOD_BYTES, + XLNX_AUD_ALIGN_BYTES); if (err) { dev_err(component->dev, - "unable to set constraint on period bytes\n"); + "Unable to set constraint on period bytes\n"); + return err; + } + + /* Resize the buffer bytes as divisible by 64 */ + err = snd_pcm_hw_constraint_step(runtime, 0, + SNDRV_PCM_HW_PARAM_BUFFER_BYTES, + XLNX_AUD_ALIGN_BYTES); + if (err) { + dev_err(component->dev, + "Unable to set constraint on buffer bytes\n"); + return err; + } + + /* Set periods as integer multiple */ + err = snd_pcm_hw_constraint_integer(runtime, + SNDRV_PCM_HW_PARAM_PERIODS); + if (err < 0) { + dev_err(component->dev, + "Unable to set constraint on periods to be integer\n"); return err; } diff --git a/sound/synth/emux/emux.c b/sound/synth/emux/emux.c index f65e6c7b139f51bc91ff6bdd4142318a1cd017d2..6695530bba9b38c7ceda4fe40688fefdb55fea71 100644 --- a/sound/synth/emux/emux.c +++ b/sound/synth/emux/emux.c @@ -88,7 +88,7 @@ int snd_emux_register(struct snd_emux *emu, struct snd_card *card, int index, ch emu->name = kstrdup(name, GFP_KERNEL); emu->voices = kcalloc(emu->max_voices, sizeof(struct snd_emux_voice), GFP_KERNEL); - if (emu->voices == NULL) + if (emu->name == NULL || emu->voices == NULL) return -ENOMEM; /* create soundfont list */ diff --git a/sound/usb/6fire/comm.c b/sound/usb/6fire/comm.c index 43a2a62d66f7ea8b11e4ee67a23115c21939da35..49629d4bb327aed41f566079d328d865774207ea 100644 --- a/sound/usb/6fire/comm.c +++ b/sound/usb/6fire/comm.c @@ -95,7 +95,7 @@ static int usb6fire_comm_send_buffer(u8 *buffer, struct usb_device *dev) int actual_len; ret = usb_interrupt_msg(dev, usb_sndintpipe(dev, COMM_EP), - buffer, buffer[1] + 2, &actual_len, HZ); + buffer, buffer[1] + 2, &actual_len, 1000); if (ret < 0) return ret; else if (actual_len != buffer[1] + 2) diff --git a/sound/usb/6fire/firmware.c b/sound/usb/6fire/firmware.c index 8981e61f2da4a5dc7c7a9458f1fcffa515c8c1b5..c51abc54d2f844f515aecb0a6bed16a9eba58aba 100644 --- a/sound/usb/6fire/firmware.c +++ b/sound/usb/6fire/firmware.c @@ -160,7 +160,7 @@ static int usb6fire_fw_ezusb_write(struct usb_device *device, { return usb_control_msg_send(device, 0, type, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, - value, 0, data, len, HZ, GFP_KERNEL); + value, 0, data, len, 1000, GFP_KERNEL); } static int usb6fire_fw_ezusb_read(struct usb_device *device, @@ -168,7 +168,7 @@ static int usb6fire_fw_ezusb_read(struct usb_device *device, { return usb_control_msg_recv(device, 0, type, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, - value, 0, data, len, HZ, GFP_KERNEL); + value, 0, data, len, 1000, GFP_KERNEL); } static int usb6fire_fw_fpga_write(struct usb_device *device, @@ -178,7 +178,7 @@ static int usb6fire_fw_fpga_write(struct usb_device *device, int ret; ret = usb_bulk_msg(device, usb_sndbulkpipe(device, FPGA_EP), data, len, - &actual_len, HZ); + &actual_len, 1000); if (ret < 0) return ret; else if (actual_len != len) diff --git a/sound/usb/format.c b/sound/usb/format.c index 5c5b76c611480fe0343114f81c0229dbb4da936b..e8a63ea2189d1728c20b7439ddbdbc9506b06108 100644 --- a/sound/usb/format.c +++ b/sound/usb/format.c @@ -365,7 +365,7 @@ static int parse_uac2_sample_rate_range(struct snd_usb_audio *chip, for (rate = min; rate <= max; rate += res) { /* Filter out invalid rates on Presonus Studio 1810c */ - if (chip->usb_id == USB_ID(0x0194f, 0x010c) && + if (chip->usb_id == USB_ID(0x194f, 0x010c) && !s1810c_valid_sample_rate(fp, rate)) goto skip_rate; @@ -410,6 +410,7 @@ static int line6_parse_audio_format_rates_quirk(struct snd_usb_audio *chip, case USB_ID(0x0e41, 0x4242): /* Line6 Helix Rack */ case USB_ID(0x0e41, 0x4244): /* Line6 Helix LT */ case USB_ID(0x0e41, 0x4246): /* Line6 HX-Stomp */ + case USB_ID(0x0e41, 0x4253): /* Line6 HX-Stomp XL */ case USB_ID(0x0e41, 0x4247): /* Line6 Pod Go */ case USB_ID(0x0e41, 0x4248): /* Line6 Helix >= fw 2.82 */ case USB_ID(0x0e41, 0x4249): /* Line6 Helix Rack >= fw 2.82 */ diff --git a/sound/usb/line6/driver.c b/sound/usb/line6/driver.c index 9602929b7de9001df963e137c7f4c77a50b05ed8..59faa5a9a714154c0a387799a2a09a4a1baac091 100644 --- a/sound/usb/line6/driver.c +++ b/sound/usb/line6/driver.c @@ -113,12 +113,12 @@ int line6_send_raw_message(struct usb_line6 *line6, const char *buffer, retval = usb_interrupt_msg(line6->usbdev, usb_sndintpipe(line6->usbdev, properties->ep_ctrl_w), (char *)frag_buf, frag_size, - &partial, LINE6_TIMEOUT * HZ); + &partial, LINE6_TIMEOUT); } else { retval = usb_bulk_msg(line6->usbdev, usb_sndbulkpipe(line6->usbdev, properties->ep_ctrl_w), (char *)frag_buf, frag_size, - &partial, LINE6_TIMEOUT * HZ); + &partial, LINE6_TIMEOUT); } if (retval) { @@ -347,7 +347,7 @@ int line6_read_data(struct usb_line6 *line6, unsigned address, void *data, ret = usb_control_msg_send(usbdev, 0, 0x67, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_OUT, (datalen << 8) | 0x21, address, NULL, 0, - LINE6_TIMEOUT * HZ, GFP_KERNEL); + LINE6_TIMEOUT, GFP_KERNEL); if (ret) { dev_err(line6->ifcdev, "read request failed (error %d)\n", ret); goto exit; @@ -360,7 +360,7 @@ int line6_read_data(struct usb_line6 *line6, unsigned address, void *data, ret = usb_control_msg_recv(usbdev, 0, 0x67, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_IN, 0x0012, 0x0000, &len, 1, - LINE6_TIMEOUT * HZ, GFP_KERNEL); + LINE6_TIMEOUT, GFP_KERNEL); if (ret) { dev_err(line6->ifcdev, "receive length failed (error %d)\n", ret); @@ -387,7 +387,7 @@ int line6_read_data(struct usb_line6 *line6, unsigned address, void *data, /* receive the result: */ ret = usb_control_msg_recv(usbdev, 0, 0x67, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_IN, - 0x0013, 0x0000, data, datalen, LINE6_TIMEOUT * HZ, + 0x0013, 0x0000, data, datalen, LINE6_TIMEOUT, GFP_KERNEL); if (ret) dev_err(line6->ifcdev, "read failed (error %d)\n", ret); @@ -417,7 +417,7 @@ int line6_write_data(struct usb_line6 *line6, unsigned address, void *data, ret = usb_control_msg_send(usbdev, 0, 0x67, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_OUT, - 0x0022, address, data, datalen, LINE6_TIMEOUT * HZ, + 0x0022, address, data, datalen, LINE6_TIMEOUT, GFP_KERNEL); if (ret) { dev_err(line6->ifcdev, @@ -430,7 +430,7 @@ int line6_write_data(struct usb_line6 *line6, unsigned address, void *data, ret = usb_control_msg_recv(usbdev, 0, 0x67, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_IN, - 0x0012, 0x0000, status, 1, LINE6_TIMEOUT * HZ, + 0x0012, 0x0000, status, 1, LINE6_TIMEOUT, GFP_KERNEL); if (ret) { dev_err(line6->ifcdev, diff --git a/sound/usb/line6/driver.h b/sound/usb/line6/driver.h index 71d3da1db8c815ac6c11a094657b744602e2a152..ecf3a2b39c7eb44129e6acc82363d4e2b728e44b 100644 --- a/sound/usb/line6/driver.h +++ b/sound/usb/line6/driver.h @@ -27,7 +27,7 @@ #define LINE6_FALLBACK_INTERVAL 10 #define LINE6_FALLBACK_MAXPACKETSIZE 16 -#define LINE6_TIMEOUT 1 +#define LINE6_TIMEOUT 1000 #define LINE6_BUFSIZE_LISTEN 64 #define LINE6_MIDI_MESSAGE_MAXLEN 256 diff --git a/sound/usb/line6/podhd.c b/sound/usb/line6/podhd.c index 28794a35949d40293ca9a4e4638dab294addada3..b24bc82f89e37c3911353fb8ce112f59953c613b 100644 --- a/sound/usb/line6/podhd.c +++ b/sound/usb/line6/podhd.c @@ -190,7 +190,7 @@ static int podhd_dev_start(struct usb_line6_podhd *pod) ret = usb_control_msg_send(usbdev, 0, 0x67, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_OUT, 0x11, 0, - NULL, 0, LINE6_TIMEOUT * HZ, GFP_KERNEL); + NULL, 0, LINE6_TIMEOUT, GFP_KERNEL); if (ret) { dev_err(pod->line6.ifcdev, "read request failed (error %d)\n", ret); goto exit; @@ -200,7 +200,7 @@ static int podhd_dev_start(struct usb_line6_podhd *pod) ret = usb_control_msg_recv(usbdev, 0, 0x67, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_IN, 0x11, 0x0, - init_bytes, 3, LINE6_TIMEOUT * HZ, GFP_KERNEL); + init_bytes, 3, LINE6_TIMEOUT, GFP_KERNEL); if (ret) { dev_err(pod->line6.ifcdev, "receive length failed (error %d)\n", ret); @@ -220,7 +220,7 @@ static int podhd_dev_start(struct usb_line6_podhd *pod) USB_REQ_SET_FEATURE, USB_TYPE_STANDARD | USB_RECIP_DEVICE | USB_DIR_OUT, 1, 0, - NULL, 0, LINE6_TIMEOUT * HZ, GFP_KERNEL); + NULL, 0, LINE6_TIMEOUT, GFP_KERNEL); exit: return ret; } diff --git a/sound/usb/line6/toneport.c b/sound/usb/line6/toneport.c index 4e5693c97aa42165c40014f3b703aa084f8c3c95..e33df58740a91c834c8a25d3835fefa9837475cc 100644 --- a/sound/usb/line6/toneport.c +++ b/sound/usb/line6/toneport.c @@ -128,7 +128,7 @@ static int toneport_send_cmd(struct usb_device *usbdev, int cmd1, int cmd2) ret = usb_control_msg_send(usbdev, 0, 0x67, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_OUT, - cmd1, cmd2, NULL, 0, LINE6_TIMEOUT * HZ, + cmd1, cmd2, NULL, 0, LINE6_TIMEOUT, GFP_KERNEL); if (ret) { diff --git a/sound/usb/misc/ua101.c b/sound/usb/misc/ua101.c index 6b30155964ec0b48b93f2b3756ba98344fe777ba..5dd1cbe29d12bbca43e58d72e4b7b4d4d302a538 100644 --- a/sound/usb/misc/ua101.c +++ b/sound/usb/misc/ua101.c @@ -1001,7 +1001,7 @@ static int detect_usb_format(struct ua101 *ua) fmt_playback->bSubframeSize * ua->playback.channels; epd = &ua->intf[INTF_CAPTURE]->altsetting[1].endpoint[0].desc; - if (!usb_endpoint_is_isoc_in(epd)) { + if (!usb_endpoint_is_isoc_in(epd) || usb_endpoint_maxp(epd) == 0) { dev_err(&ua->dev->dev, "invalid capture endpoint\n"); return -ENXIO; } @@ -1009,7 +1009,7 @@ static int detect_usb_format(struct ua101 *ua) ua->capture.max_packet_bytes = usb_endpoint_maxp(epd); epd = &ua->intf[INTF_PLAYBACK]->altsetting[1].endpoint[0].desc; - if (!usb_endpoint_is_isoc_out(epd)) { + if (!usb_endpoint_is_isoc_out(epd) || usb_endpoint_maxp(epd) == 0) { dev_err(&ua->dev->dev, "invalid playback endpoint\n"); return -ENXIO; } diff --git a/sound/usb/mixer_maps.c b/sound/usb/mixer_maps.c index c5794e83fd800d93b18c4c3d29e5787a5d29fc10..8f6823df944fffe5af6e91b83a6fb799d0445a2f 100644 --- a/sound/usb/mixer_maps.c +++ b/sound/usb/mixer_maps.c @@ -528,6 +528,10 @@ static const struct usbmix_ctl_map usbmix_ctl_maps[] = { .id = USB_ID(0x2573, 0x0008), .map = maya44_map, }, + { + .id = USB_ID(0x2708, 0x0002), /* Audient iD14 */ + .ignore_ctl_error = 1, + }, { /* KEF X300A */ .id = USB_ID(0x27ac, 0x1000), @@ -538,6 +542,10 @@ static const struct usbmix_ctl_map usbmix_ctl_maps[] = { .id = USB_ID(0x25c4, 0x0003), .map = scms_usb3318_map, }, + { + .id = USB_ID(0x30be, 0x0101), /* Schiit Hel */ + .ignore_ctl_error = 1, + }, { /* Bose Companion 5 */ .id = USB_ID(0x05a7, 0x1020), diff --git a/sound/usb/mixer_quirks.c b/sound/usb/mixer_quirks.c index 8297117f4766e79b0d84431dad3aef2582efe35f..86fdd669f3fd76694234ddfd2c9b36dde2acc744 100644 --- a/sound/usb/mixer_quirks.c +++ b/sound/usb/mixer_quirks.c @@ -3033,7 +3033,7 @@ int snd_usb_mixer_apply_create_quirk(struct usb_mixer_interface *mixer) err = snd_rme_controls_create(mixer); break; - case USB_ID(0x0194f, 0x010c): /* Presonus Studio 1810c */ + case USB_ID(0x194f, 0x010c): /* Presonus Studio 1810c */ err = snd_sc1810_init_mixer(mixer); break; case USB_ID(0x2a39, 0x3fb0): /* RME Babyface Pro FS */ diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h index 5728bf722c88a5f66ba9bbf4644c0f012da9be19..aabd3a10ec5b46981f0edd2e3996e79779bea655 100644 --- a/sound/usb/quirks-table.h +++ b/sound/usb/quirks-table.h @@ -77,6 +77,48 @@ /* E-Mu 0204 USB */ { USB_DEVICE_VENDOR_SPEC(0x041e, 0x3f19) }, +/* + * Creative Technology, Ltd Live! Cam Sync HD [VF0770] + * The device advertises 8 formats, but only a rate of 48kHz is honored by the + * hardware and 24 bits give chopped audio, so only report the one working + * combination. + */ +{ + USB_AUDIO_DEVICE(0x041e, 0x4095), + .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) { + .ifnum = QUIRK_ANY_INTERFACE, + .type = QUIRK_COMPOSITE, + .data = &(const struct snd_usb_audio_quirk[]) { + { + .ifnum = 2, + .type = QUIRK_AUDIO_STANDARD_MIXER, + }, + { + .ifnum = 3, + .type = QUIRK_AUDIO_FIXED_ENDPOINT, + .data = &(const struct audioformat) { + .formats = SNDRV_PCM_FMTBIT_S16_LE, + .channels = 2, + .fmt_bits = 16, + .iface = 3, + .altsetting = 4, + .altset_idx = 4, + .endpoint = 0x82, + .ep_attr = 0x05, + .rates = SNDRV_PCM_RATE_48000, + .rate_min = 48000, + .rate_max = 48000, + .nr_rates = 1, + .rate_table = (unsigned int[]) { 48000 }, + }, + }, + { + .ifnum = -1 + }, + }, + }, +}, + /* * HP Wireless Audio * When not ignored, causes instability issues for some users, forcing them to @@ -3656,6 +3698,38 @@ AU0828_DEVICE(0x2040, 0x7270, "Hauppauge", "HVR-950Q"), } } }, +{ + /* + * Sennheiser GSP670 + * Change order of interfaces loaded + */ + USB_DEVICE(0x1395, 0x0300), + .bInterfaceClass = USB_CLASS_PER_INTERFACE, + .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) { + .ifnum = QUIRK_ANY_INTERFACE, + .type = QUIRK_COMPOSITE, + .data = &(const struct snd_usb_audio_quirk[]) { + // Communication + { + .ifnum = 3, + .type = QUIRK_AUDIO_STANDARD_INTERFACE + }, + // Recording + { + .ifnum = 4, + .type = QUIRK_AUDIO_STANDARD_INTERFACE + }, + // Main + { + .ifnum = 1, + .type = QUIRK_AUDIO_STANDARD_INTERFACE + }, + { + .ifnum = -1 + } + } + } +}, #undef USB_DEVICE_VENDOR_SPEC #undef USB_AUDIO_DEVICE diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index a45b27a2ed4ec14d2d65620c029c128cf96998b2..6333a2ecb848aa4600b1c89d67a3c9ab545fb650 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1310,7 +1310,7 @@ int snd_usb_apply_interface_quirk(struct snd_usb_audio *chip, if (chip->usb_id == USB_ID(0x0763, 0x2012)) return fasttrackpro_skip_setting_quirk(chip, iface, altno); /* presonus studio 1810c: skip altsets incompatible with device_setup */ - if (chip->usb_id == USB_ID(0x0194f, 0x010c)) + if (chip->usb_id == USB_ID(0x194f, 0x010c)) return s1810c_skip_setting_quirk(chip, iface, altno); @@ -1897,6 +1897,7 @@ static const struct registration_quirk registration_quirks[] = { REG_QUIRK_ENTRY(0x0951, 0x16ea, 2), /* Kingston HyperX Cloud Flight S */ REG_QUIRK_ENTRY(0x0ecb, 0x1f46, 2), /* JBL Quantum 600 */ REG_QUIRK_ENTRY(0x0ecb, 0x1f47, 2), /* JBL Quantum 800 */ + REG_QUIRK_ENTRY(0x0ecb, 0x1f4c, 2), /* JBL Quantum 400 */ REG_QUIRK_ENTRY(0x0ecb, 0x2039, 2), /* JBL Quantum 400 */ REG_QUIRK_ENTRY(0x0ecb, 0x203c, 2), /* JBL Quantum 600 */ REG_QUIRK_ENTRY(0x0ecb, 0x203e, 2), /* JBL Quantum 800 */ diff --git a/sound/x86/intel_hdmi_audio.c b/sound/x86/intel_hdmi_audio.c index 9f9fcd2749f224d8ec09e8fae677991f60cc0c9f..dbaa43ffbbd2dca3001a9aec772a6e4ade9a369e 100644 --- a/sound/x86/intel_hdmi_audio.c +++ b/sound/x86/intel_hdmi_audio.c @@ -1276,7 +1276,7 @@ static int had_pcm_mmap(struct snd_pcm_substream *substream, { vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); return remap_pfn_range(vma, vma->vm_start, - substream->dma_buffer.addr >> PAGE_SHIFT, + substream->runtime->dma_addr >> PAGE_SHIFT, vma->vm_end - vma->vm_start, vma->vm_page_prot); } diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index dad350d42ecfbf3063c64138375e81591729b092..b58730cc12e836ababc811f52b228d1181564074 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -204,7 +204,7 @@ #define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */ #define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */ #define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ -#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */ +#define X86_FEATURE_RETPOLINE_LFENCE ( 7*32+13) /* "" Use LFENCEs for Spectre variant 2 */ #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ #define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */ #define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */ diff --git a/tools/bpf/bpftool/Documentation/Makefile b/tools/bpf/bpftool/Documentation/Makefile index f33cb02de95cf1da1e9a783208d65314ad523ac7..3601b1d1974caac3373459398baf1283f34f5d0a 100644 --- a/tools/bpf/bpftool/Documentation/Makefile +++ b/tools/bpf/bpftool/Documentation/Makefile @@ -1,6 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only include ../../../scripts/Makefile.include -include ../../../scripts/utilities.mak INSTALL ?= install RM ?= rm -f diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index f60e6ad3a1dffaffe040655315ad735df6adeea2..1896ef69b4492b666a8d79adc9a04d506b78f2bb 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -1,6 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only include ../../scripts/Makefile.include -include ../../scripts/utilities.mak ifeq ($(srctree),) srctree := $(patsubst %/,%,$(dir $(CURDIR))) diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index c58a135dc355e68e82de8c00363acf3c95f3757f..1854d6b978604013b2decac6b983d7468dfe345c 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -396,6 +396,8 @@ int main(int argc, char **argv) }; int opt, ret; + setlinebuf(stdout); + last_do_help = do_help; pretty_output = false; json_output = false; diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 14237ffb90bae40fd0279f3d4a49be076a33be3d..592536904dde2d59a152c838a39273111df77495 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -304,18 +304,12 @@ static void show_prog_metadata(int fd, __u32 num_maps) if (printed_header) jsonw_end_object(json_wtr); } else { - json_writer_t *btf_wtr = jsonw_new(stdout); + json_writer_t *btf_wtr; struct btf_dumper d = { .btf = btf, - .jw = btf_wtr, .is_plain_text = true, }; - if (!btf_wtr) { - p_err("jsonw alloc failed"); - goto out_free; - } - for (i = 0; i < vlen; i++, vsi++) { t_var = btf__type_by_id(btf, vsi->type); name = btf__name_by_offset(btf, t_var->name_off); @@ -325,6 +319,14 @@ static void show_prog_metadata(int fd, __u32 num_maps) if (!printed_header) { printf("\tmetadata:"); + + btf_wtr = jsonw_new(stdout); + if (!btf_wtr) { + p_err("jsonw alloc failed"); + goto out_free; + } + d.jw = btf_wtr, + printed_header = true; } diff --git a/tools/bpf/resolve_btfids/Makefile b/tools/bpf/resolve_btfids/Makefile index bb9fa8de7e625a559f8edfa58272f082ec32c58a..35f092065c4fbf5089fcd24e39325440f6fb85b2 100644 --- a/tools/bpf/resolve_btfids/Makefile +++ b/tools/bpf/resolve_btfids/Makefile @@ -9,7 +9,11 @@ ifeq ($(V),1) msg = else Q = @ - msg = @printf ' %-8s %s%s\n' "$(1)" "$(notdir $(2))" "$(if $(3), $(3))"; + ifeq ($(silent),1) + msg = + else + msg = @printf ' %-8s %s%s\n' "$(1)" "$(notdir $(2))" "$(if $(3), $(3))"; + endif MAKEFLAGS=--no-print-directory endif @@ -19,6 +23,8 @@ CC = $(HOSTCC) LD = $(HOSTLD) ARCH = $(HOSTARCH) RM ?= rm +CFLAGS := $(KBUILD_HOSTCFLAGS) +LDFLAGS := $(KBUILD_HOSTLDFLAGS) OUTPUT ?= $(srctree)/tools/bpf/resolve_btfids/ @@ -41,9 +47,10 @@ $(SUBCMDOBJ): fixdep FORCE | $(OUTPUT)/libsubcmd $(Q)$(MAKE) -C $(SUBCMD_SRC) OUTPUT=$(abspath $(dir $@))/ $(abspath $@) $(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(OUTPUT)/libbpf - $(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC) OUTPUT=$(abspath $(dir $@))/ $(abspath $@) + $(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC) OUTPUT=$(abspath $(dir $@))/ \ + EXTRA_CFLAGS="$(CFLAGS)" $(abspath $@) -CFLAGS := -g \ +CFLAGS += -g \ -I$(srctree)/tools/include \ -I$(srctree)/tools/include/uapi \ -I$(LIBBPF_SRC) \ diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c index f32c059fbfb4f8b3a50ce86f0e8421e35993aa76..b3c4c503b8013e9f8bbd897ad8eed78e99a14b99 100644 --- a/tools/bpf/resolve_btfids/main.c +++ b/tools/bpf/resolve_btfids/main.c @@ -115,10 +115,10 @@ struct object { static int verbose; -int eprintf(int level, int var, const char *fmt, ...) +static int eprintf(int level, int var, const char *fmt, ...) { va_list args; - int ret; + int ret = 0; if (var >= level) { va_start(args, fmt); @@ -383,7 +383,7 @@ static int elf_collect(struct object *obj) static int symbols_collect(struct object *obj) { Elf_Scn *scn = NULL; - int n, i, err = 0; + int n, i; GElf_Shdr sh; char *name; @@ -400,11 +400,10 @@ static int symbols_collect(struct object *obj) * Scan symbols and look for the ones starting with * __BTF_ID__* over .BTF_ids section. */ - for (i = 0; !err && i < n; i++) { - char *tmp, *prefix; + for (i = 0; i < n; i++) { + char *prefix; struct btf_id *id; GElf_Sym sym; - int err = -1; if (!gelf_getsym(obj->efile.symbols, i, &sym)) return -1; diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index 97cbfb31b762592a4e9ff5f3558d7c750a5de904..e1d2c255669e6d3854df1d61feaaf9edfd6da3fa 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -49,7 +49,6 @@ FEATURE_TESTS_BASIC := \ numa_num_possible_cpus \ libperl \ libpython \ - libpython-version \ libslang \ libslang-include-subdir \ libcrypto \ diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index 89ba522e377dc7d815507ae5dbafb8eb589aaa64..22ea350dab588ada1ee922bfb54c753b18e17e34 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -31,7 +31,6 @@ FILES= \ test-numa_num_possible_cpus.bin \ test-libperl.bin \ test-libpython.bin \ - test-libpython-version.bin \ test-libslang.bin \ test-libslang-include-subdir.bin \ test-libcrypto.bin \ @@ -220,9 +219,6 @@ $(OUTPUT)test-libperl.bin: $(OUTPUT)test-libpython.bin: $(BUILD) $(FLAGS_PYTHON_EMBED) -$(OUTPUT)test-libpython-version.bin: - $(BUILD) - $(OUTPUT)test-libbfd.bin: $(BUILD) -DPACKAGE='"perf"' -lbfd -ldl diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c index 464873883396e9c7e85795174f019ed8106a3192..09517ff2fad5651e57f68f29d5fbb5b7cd2819ed 100644 --- a/tools/build/feature/test-all.c +++ b/tools/build/feature/test-all.c @@ -14,10 +14,6 @@ # include "test-libpython.c" #undef main -#define main main_test_libpython_version -# include "test-libpython-version.c" -#undef main - #define main main_test_libperl # include "test-libperl.c" #undef main @@ -181,7 +177,6 @@ int main(int argc, char *argv[]) { main_test_libpython(); - main_test_libpython_version(); main_test_libperl(); main_test_hello(); main_test_libelf(); diff --git a/tools/build/feature/test-libpython-version.c b/tools/build/feature/test-libpython-version.c deleted file mode 100644 index 47714b942d4d35bf61d4646d797d474e836a8ba9..0000000000000000000000000000000000000000 --- a/tools/build/feature/test-libpython-version.c +++ /dev/null @@ -1,11 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include - -#if PY_VERSION_HEX >= 0x03000000 - #error -#endif - -int main(void) -{ - return 0; -} diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h index 2551e9b71167b3526bcf692bb29cc00658f502ea..b8cecb66d28b7f236a1afc3436f8ce315c27500b 100644 --- a/tools/include/nolibc/nolibc.h +++ b/tools/include/nolibc/nolibc.h @@ -422,16 +422,22 @@ struct stat { }) /* startup code */ +/* + * x86-64 System V ABI mandates: + * 1) %rsp must be 16-byte aligned right before the function call. + * 2) The deepest stack frame should be zero (the %rbp). + * + */ asm(".section .text\n" ".global _start\n" "_start:\n" "pop %rdi\n" // argc (first arg, %rdi) "mov %rsp, %rsi\n" // argv[] (second arg, %rsi) "lea 8(%rsi,%rdi,8),%rdx\n" // then a NULL then envp (third arg, %rdx) - "and $-16, %rsp\n" // x86 ABI : esp must be 16-byte aligned when - "sub $8, %rsp\n" // entering the callee + "xor %ebp, %ebp\n" // zero the stack frame + "and $-16, %rsp\n" // x86 ABI : esp must be 16-byte aligned before call "call main\n" // main() returns the status code, we'll exit with it. - "movzb %al, %rdi\n" // retrieve exit code from 8 lower bits + "mov %eax, %edi\n" // retrieve exit code (32 bit) "mov $60, %rax\n" // NR_exit == 60 "syscall\n" // really exit "hlt\n" // ensure it does not return @@ -600,20 +606,28 @@ struct sys_stat_struct { }) /* startup code */ +/* + * i386 System V ABI mandates: + * 1) last pushed argument must be 16-byte aligned. + * 2) The deepest stack frame should be set to zero + * + */ asm(".section .text\n" ".global _start\n" "_start:\n" "pop %eax\n" // argc (first arg, %eax) "mov %esp, %ebx\n" // argv[] (second arg, %ebx) "lea 4(%ebx,%eax,4),%ecx\n" // then a NULL then envp (third arg, %ecx) - "and $-16, %esp\n" // x86 ABI : esp must be 16-byte aligned when + "xor %ebp, %ebp\n" // zero the stack frame + "and $-16, %esp\n" // x86 ABI : esp must be 16-byte aligned before + "sub $4, %esp\n" // the call instruction (args are aligned) "push %ecx\n" // push all registers on the stack so that we "push %ebx\n" // support both regparm and plain stack modes "push %eax\n" "call main\n" // main() returns the status code in %eax - "movzbl %al, %ebx\n" // retrieve exit code from lower 8 bits - "movl $1, %eax\n" // NR_exit == 1 - "int $0x80\n" // exit now + "mov %eax, %ebx\n" // retrieve exit code (32-bit int) + "movl $1, %eax\n" // NR_exit == 1 + "int $0x80\n" // exit now "hlt\n" // ensure it does not ""); @@ -797,7 +811,6 @@ asm(".section .text\n" "and %r3, %r1, $-8\n" // AAPCS : sp must be 8-byte aligned in the "mov %sp, %r3\n" // callee, an bl doesn't push (lr=pc) "bl main\n" // main() returns the status code, we'll exit with it. - "and %r0, %r0, $0xff\n" // limit exit code to 8 bits "movs r7, $1\n" // NR_exit == 1 "svc $0x00\n" ""); @@ -994,7 +1007,6 @@ asm(".section .text\n" "add x2, x2, x1\n" // + argv "and sp, x1, -16\n" // sp must be 16-byte aligned in the callee "bl main\n" // main() returns the status code, we'll exit with it. - "and x0, x0, 0xff\n" // limit exit code to 8 bits "mov x8, 93\n" // NR_exit == 93 "svc #0\n" ""); @@ -1199,7 +1211,7 @@ asm(".section .text\n" "addiu $sp,$sp,-16\n" // the callee expects to save a0..a3 there! "jal main\n" // main() returns the status code, we'll exit with it. "nop\n" // delayed slot - "and $a0, $v0, 0xff\n" // limit exit code to 8 bits + "move $a0, $v0\n" // retrieve 32-bit exit code from v0 "li $v0, 4001\n" // NR_exit == 4001 "syscall\n" ".end __start\n" @@ -1397,7 +1409,6 @@ asm(".section .text\n" "add a2,a2,a1\n" // + argv "andi sp,a1,-16\n" // sp must be 16-byte aligned "call main\n" // main() returns the status code, we'll exit with it. - "andi a0, a0, 0xff\n" // limit exit code to 8 bits "li a7, 93\n" // NR_exit == 93 "ecall\n" ""); diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 762bf87c26a3efa2d4bdf2bc3f9d218b951cbd1c..26b6bee72039a995a5faa06e698201002520d548 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -199,6 +199,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_EXT, BPF_PROG_TYPE_LSM, BPF_PROG_TYPE_SK_LOOKUP, + BPF_PROG_TYPE_FUSE, }; enum bpf_attach_type { diff --git a/tools/lib/bpf/.gitignore b/tools/lib/bpf/.gitignore index 8a81b3679d2b909613ff1a5a8d0a381846b8b67d..5d4cfac671d5402924b1b203b74f6f43ea7ed3ab 100644 --- a/tools/lib/bpf/.gitignore +++ b/tools/lib/bpf/.gitignore @@ -1,7 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only libbpf_version.h libbpf.pc -FEATURE-DUMP.libbpf libbpf.so.* TAGS tags diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index 154b75fc1373efab8c64de0775372e917c89e6f7..6841c86bfe2162552c2668f4044f8d2c81e4ccf5 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -59,28 +59,7 @@ ifndef VERBOSE VERBOSE = 0 endif -FEATURE_USER = .libbpf -FEATURE_TESTS = libelf zlib bpf -FEATURE_DISPLAY = libelf zlib bpf - INCLUDES = -I. -I$(srctree)/tools/include -I$(srctree)/tools/include/uapi -FEATURE_CHECK_CFLAGS-bpf = $(INCLUDES) - -check_feat := 1 -NON_CHECK_FEAT_TARGETS := clean TAGS tags cscope help -ifdef MAKECMDGOALS -ifeq ($(filter-out $(NON_CHECK_FEAT_TARGETS),$(MAKECMDGOALS)),) - check_feat := 0 -endif -endif - -ifeq ($(check_feat),1) -ifeq ($(FEATURES_DUMP),) -include $(srctree)/tools/build/Makefile.feature -else -include $(FEATURES_DUMP) -endif -endif export prefix libdir src obj @@ -157,7 +136,7 @@ all: fixdep all_cmd: $(CMD_TARGETS) check -$(BPF_IN_SHARED): force elfdep zdep bpfdep $(BPF_HELPER_DEFS) +$(BPF_IN_SHARED): force $(BPF_HELPER_DEFS) @(test -f ../../include/uapi/linux/bpf.h -a -f ../../../include/uapi/linux/bpf.h && ( \ (diff -B ../../include/uapi/linux/bpf.h ../../../include/uapi/linux/bpf.h >/dev/null) || \ echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/bpf.h' differs from latest version at 'include/uapi/linux/bpf.h'" >&2 )) || true @@ -175,7 +154,7 @@ $(BPF_IN_SHARED): force elfdep zdep bpfdep $(BPF_HELPER_DEFS) echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/if_xdp.h' differs from latest version at 'include/uapi/linux/if_xdp.h'" >&2 )) || true $(Q)$(MAKE) $(build)=libbpf OUTPUT=$(SHARED_OBJDIR) CFLAGS="$(CFLAGS) $(SHLIB_FLAGS)" -$(BPF_IN_STATIC): force elfdep zdep bpfdep $(BPF_HELPER_DEFS) +$(BPF_IN_STATIC): force $(BPF_HELPER_DEFS) $(Q)$(MAKE) $(build)=libbpf OUTPUT=$(STATIC_OBJDIR) $(BPF_HELPER_DEFS): $(srctree)/tools/include/uapi/linux/bpf.h @@ -264,34 +243,16 @@ install_pkgconfig: $(PC_FILE) install: install_lib install_pkgconfig install_headers -### Cleaning rules - -config-clean: - $(call QUIET_CLEAN, feature-detect) - $(Q)$(MAKE) -C $(srctree)/tools/build/feature/ clean >/dev/null - -clean: config-clean +clean: $(call QUIET_CLEAN, libbpf) $(RM) -rf $(CMD_TARGETS) \ *~ .*.d .*.cmd LIBBPF-CFLAGS $(BPF_HELPER_DEFS) \ $(SHARED_OBJDIR) $(STATIC_OBJDIR) \ $(addprefix $(OUTPUT), \ *.o *.a *.so *.so.$(LIBBPF_MAJOR_VERSION) *.pc) - $(call QUIET_CLEAN, core-gen) $(RM) $(OUTPUT)FEATURE-DUMP.libbpf - - -PHONY += force elfdep zdep bpfdep cscope tags +PHONY += force cscope tags force: -elfdep: - @if [ "$(feature-libelf)" != "1" ]; then echo "No libelf found"; exit 1 ; fi - -zdep: - @if [ "$(feature-zlib)" != "1" ]; then echo "No zlib found"; exit 1 ; fi - -bpfdep: - @if [ "$(feature-bpf)" != "1" ]; then echo "BPF API too old"; exit 1 ; fi - cscope: ls *.c *.h > cscope.files cscope -b -q -I $(srctree)/include -f cscope.out diff --git a/tools/lib/bpf/bpf_core_read.h b/tools/lib/bpf/bpf_core_read.h index 4538ed762a209156f9534255c9efcb613e3997a5..f05cfc082915dcec308e3d17f309406f34399b8c 100644 --- a/tools/lib/bpf/bpf_core_read.h +++ b/tools/lib/bpf/bpf_core_read.h @@ -40,7 +40,7 @@ enum bpf_enum_value_kind { #define __CORE_RELO(src, field, info) \ __builtin_preserve_field_info((src)->field, BPF_FIELD_##info) -#if __BYTE_ORDER == __LITTLE_ENDIAN +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ #define __CORE_BITFIELD_PROBE_READ(dst, src, fld) \ bpf_probe_read_kernel( \ (void *)dst, \ diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 231b07203e3d2755d867f5a67c074cddf154d15f..e6f644cdc9f15d2c95aa701d910e2fe929d1f353 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -205,32 +205,29 @@ static int btf_parse_hdr(struct btf *btf) } btf_bswap_hdr(hdr); } else if (hdr->magic != BTF_MAGIC) { - pr_debug("Invalid BTF magic:%x\n", hdr->magic); + pr_debug("Invalid BTF magic: %x\n", hdr->magic); return -EINVAL; } - meta_left = btf->raw_size - sizeof(*hdr); - if (!meta_left) { - pr_debug("BTF has no data\n"); + if (btf->raw_size < hdr->hdr_len) { + pr_debug("BTF header len %u larger than data size %u\n", + hdr->hdr_len, btf->raw_size); return -EINVAL; } - if (meta_left < hdr->type_off) { - pr_debug("Invalid BTF type section offset:%u\n", hdr->type_off); + meta_left = btf->raw_size - hdr->hdr_len; + if (meta_left < (long long)hdr->str_off + hdr->str_len) { + pr_debug("Invalid BTF total size: %u\n", btf->raw_size); return -EINVAL; } - if (meta_left < hdr->str_off) { - pr_debug("Invalid BTF string section offset:%u\n", hdr->str_off); + if ((long long)hdr->type_off + hdr->type_len > hdr->str_off) { + pr_debug("Invalid BTF data sections layout: type data at %u + %u, strings data at %u + %u\n", + hdr->type_off, hdr->type_len, hdr->str_off, hdr->str_len); return -EINVAL; } - if (hdr->type_off >= hdr->str_off) { - pr_debug("BTF type section offset >= string section offset. No type?\n"); - return -EINVAL; - } - - if (hdr->type_off & 0x02) { + if (hdr->type_off % 4) { pr_debug("BTF type section is not aligned to 4 bytes\n"); return -EINVAL; } diff --git a/tools/lib/perf/tests/test-evlist.c b/tools/lib/perf/tests/test-evlist.c index bd19cabddaf62bc0ec43ecd10571c0184a0f9e5a..60b5d1801103bf7cd576b8da57aec9ad1308e4b6 100644 --- a/tools/lib/perf/tests/test-evlist.c +++ b/tools/lib/perf/tests/test-evlist.c @@ -38,7 +38,7 @@ static int test_stat_cpu(void) .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK, }; - int err, cpu, tmp; + int err, idx; cpus = perf_cpu_map__new(NULL); __T("failed to create cpus", cpus); @@ -64,10 +64,10 @@ static int test_stat_cpu(void) perf_evlist__for_each_evsel(evlist, evsel) { cpus = perf_evsel__cpus(evsel); - perf_cpu_map__for_each_cpu(cpu, tmp, cpus) { + for (idx = 0; idx < perf_cpu_map__nr(cpus); idx++) { struct perf_counts_values counts = { .val = 0 }; - perf_evsel__read(evsel, cpu, 0, &counts); + perf_evsel__read(evsel, idx, 0, &counts); __T("failed to read value for evsel", counts.val != 0); } } diff --git a/tools/lib/perf/tests/test-evsel.c b/tools/lib/perf/tests/test-evsel.c index 0ad82d7a2a51b690e57d2d28f16531fe7df40609..2de98768d844441d5a641d2a2020a0e58732c178 100644 --- a/tools/lib/perf/tests/test-evsel.c +++ b/tools/lib/perf/tests/test-evsel.c @@ -21,7 +21,7 @@ static int test_stat_cpu(void) .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_CLOCK, }; - int err, cpu, tmp; + int err, idx; cpus = perf_cpu_map__new(NULL); __T("failed to create cpus", cpus); @@ -32,10 +32,10 @@ static int test_stat_cpu(void) err = perf_evsel__open(evsel, cpus, NULL); __T("failed to open evsel", err == 0); - perf_cpu_map__for_each_cpu(cpu, tmp, cpus) { + for (idx = 0; idx < perf_cpu_map__nr(cpus); idx++) { struct perf_counts_values counts = { .val = 0 }; - perf_evsel__read(evsel, cpu, 0, &counts); + perf_evsel__read(evsel, idx, 0, &counts); __T("failed to read value for evsel", counts.val != 0); } diff --git a/tools/lib/subcmd/subcmd-util.h b/tools/lib/subcmd/subcmd-util.h index 794a375dad3601e26f870edfa75b3458f1341aa8..b2aec04fce8f674e61eddf6e4820b738f4d4c7e3 100644 --- a/tools/lib/subcmd/subcmd-util.h +++ b/tools/lib/subcmd/subcmd-util.h @@ -50,15 +50,8 @@ static NORETURN inline void die(const char *err, ...) static inline void *xrealloc(void *ptr, size_t size) { void *ret = realloc(ptr, size); - if (!ret && !size) - ret = realloc(ptr, 1); - if (!ret) { - ret = realloc(ptr, size); - if (!ret && !size) - ret = realloc(ptr, 1); - if (!ret) - die("Out of memory, realloc failed"); - } + if (!ret) + die("Out of memory, realloc failed"); return ret; } diff --git a/tools/objtool/check.c b/tools/objtool/check.c index f76a74024cc67d29f56ba619de76525d6191f21d..4c114338b75b856f5d2b3b88a927a17ceb3b37f6 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -156,6 +156,8 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func, "machine_real_restart", "rewind_stack_do_exit", "kunit_try_catch_throw", + "xen_start_kernel", + "cpu_bringup_and_idle", }; if (!func) @@ -1065,6 +1067,11 @@ static int add_call_destinations(struct objtool_file *file) } else insn->call_dest = reloc->sym; + if (insn->call_dest && insn->call_dest->static_call_tramp) { + list_add_tail(&insn->static_call_node, + &file->static_call_list); + } + /* * Many compilers cannot disable KCOV with a function attribute * so they need a little help, NOP out any KCOV calls from noinstr @@ -1796,6 +1803,9 @@ static int decode_sections(struct objtool_file *file) if (ret) return ret; + /* + * Must be before add_{jump_call}_destination. + */ ret = read_static_call_tramps(file); if (ret) return ret; @@ -1808,6 +1818,10 @@ static int decode_sections(struct objtool_file *file) if (ret) return ret; + /* + * Must be before add_call_destination(); it changes INSN_CALL to + * INSN_JUMP. + */ ret = read_intra_function_calls(file); if (ret) return ret; @@ -2662,12 +2676,6 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, if (dead_end_function(file, insn->call_dest)) return 0; - if (insn->type == INSN_CALL && insn->call_dest && - insn->call_dest->static_call_tramp) { - list_add_tail(&insn->static_call_node, - &file->static_call_list); - } - break; case INSN_JUMP_CONDITIONAL: diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 014b959575caeeae50f9e18d788707145d2696e1..68408a5ecfed25ee7b6200ba08daf7d0fbd9e71a 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -259,8 +259,6 @@ endif FEATURE_CHECK_CFLAGS-libpython := $(PYTHON_EMBED_CCOPTS) FEATURE_CHECK_LDFLAGS-libpython := $(PYTHON_EMBED_LDOPTS) -FEATURE_CHECK_CFLAGS-libpython-version := $(PYTHON_EMBED_CCOPTS) -FEATURE_CHECK_LDFLAGS-libpython-version := $(PYTHON_EMBED_LDOPTS) FEATURE_CHECK_LDFLAGS-libaio = -lrt diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c index bb25d8beb3b852f71def15136fcc5f22de816aca..159bc89e6a79a2c15a0762a8cb91644664909bad 100644 --- a/tools/perf/bench/futex-lock-pi.c +++ b/tools/perf/bench/futex-lock-pi.c @@ -226,6 +226,7 @@ int bench_futex_lock_pi(int argc, const char **argv) print_summary(); free(worker); + perf_cpu_map__put(cpu); return ret; err: usage_with_options(bench_futex_lock_pi_usage, options); diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c index 7a15c2e610228f081d8a3e5b8da2e5e283a8a427..105b36cdc42d310dfbbc71e1528a323ead00f937 100644 --- a/tools/perf/bench/futex-requeue.c +++ b/tools/perf/bench/futex-requeue.c @@ -216,6 +216,7 @@ int bench_futex_requeue(int argc, const char **argv) print_summary(); free(worker); + perf_cpu_map__put(cpu); return ret; err: usage_with_options(bench_futex_requeue_usage, options); diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c index cd2b81a845acb0537f8841fb914b006c69c17772..a129c94eb3fe1a2ccddf0be4d5d26e9e875b952c 100644 --- a/tools/perf/bench/futex-wake-parallel.c +++ b/tools/perf/bench/futex-wake-parallel.c @@ -320,6 +320,7 @@ int bench_futex_wake_parallel(int argc, const char **argv) print_summary(); free(blocked_worker); + perf_cpu_map__put(cpu); return ret; } #endif /* HAVE_PTHREAD_BARRIER */ diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c index 2dfcef3e371e4b15b9dead17bfa348b00e2a79d1..507ff533612c684f4d0dc387a0b0334eec0f0158 100644 --- a/tools/perf/bench/futex-wake.c +++ b/tools/perf/bench/futex-wake.c @@ -210,5 +210,6 @@ int bench_futex_wake(int argc, const char **argv) print_summary(); free(worker); + perf_cpu_map__put(cpu); return ret; } diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 5378a14e38368402452c94215222a2d1d41c348c..8f1a99e2fcd7cbc02fbd0e8f934549cb80041c72 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -752,7 +752,7 @@ static int __cmd_inject(struct perf_inject *inject) inject->tool.ordered_events = true; inject->tool.ordering_requires_timestamps = true; /* Allow space in the header for new attributes */ - output_data_offset = 4096; + output_data_offset = roundup(8192 + session->header.data_offset, 4096); if (inject->strip) strip_init(inject); } diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 5824aa24acfccfc125fa292c91a2a65174921181..91cab5cdfbc16ff5b7a660dfc2018c13e4dc522f 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -610,14 +610,17 @@ static int report__browse_hists(struct report *rep) int ret; struct perf_session *session = rep->session; struct evlist *evlist = session->evlist; - const char *help = perf_tip(system_path(TIPDIR)); + char *help = NULL, *path = NULL; - if (help == NULL) { + path = system_path(TIPDIR); + if (perf_tip(&help, path) || help == NULL) { /* fallback for people who don't install perf ;-) */ - help = perf_tip(DOCDIR); - if (help == NULL) - help = "Cannot load tips.txt file, please install perf!"; + free(path); + path = system_path(DOCDIR); + if (perf_tip(&help, path) || help == NULL) + help = strdup("Cannot load tips.txt file, please install perf!"); } + free(path); switch (use_browser) { case 1: @@ -644,7 +647,7 @@ static int report__browse_hists(struct report *rep) ret = perf_evlist__tty_browse_hists(evlist, rep, help); break; } - + free(help); return ret; } diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 2bb159c1050359d0bfc46c22e576cabd0e076031..5109d01619eedfc1c48135bc99e00d50023363fb 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -2354,7 +2354,7 @@ static int process_switch_event(struct perf_tool *tool, if (perf_event__process_switch(tool, event, sample, machine) < 0) return -1; - if (scripting_ops && scripting_ops->process_switch) + if (scripting_ops && scripting_ops->process_switch && !filter_cpu(sample)) scripting_ops->process_switch(event, sample, machine); if (!script->show_switch_events) @@ -3820,11 +3820,15 @@ int cmd_script(int argc, const char **argv) goto out_delete; uname(&uts); - if (data.is_pipe || /* assume pipe_mode indicates native_arch */ - !strcmp(uts.machine, session->header.env.arch) || - (!strcmp(uts.machine, "x86_64") && - !strcmp(session->header.env.arch, "i386"))) + if (data.is_pipe) { /* Assume pipe_mode indicates native_arch */ native_arch = true; + } else if (session->header.env.arch) { + if (!strcmp(uts.machine, session->header.env.arch)) + native_arch = true; + else if (!strcmp(uts.machine, "x86_64") && + !strcmp(session->header.env.arch, "i386")) + native_arch = true; + } script.session = session; script__setup_sample_type(&script); diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index dcfdf6a322dc40615f38ed979b91f6c115c7b2d1..c679a79aef5133c04ec21b7e1582559d8db51365 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -1100,12 +1100,13 @@ static int process_one_file(const char *fpath, const struct stat *sb, */ int main(int argc, char *argv[]) { - int rc, ret = 0; + int rc, ret = 0, empty_map = 0; int maxfds; char ldirname[PATH_MAX]; const char *arch; const char *output_file; const char *start_dirname; + char *err_string_ext = ""; struct stat stbuf; prog = basename(argv[0]); @@ -1133,7 +1134,8 @@ int main(int argc, char *argv[]) /* If architecture does not have any event lists, bail out */ if (stat(ldirname, &stbuf) < 0) { pr_info("%s: Arch %s has no PMU event lists\n", prog, arch); - goto empty_map; + empty_map = 1; + goto err_close_eventsfp; } /* Include pmu-events.h first */ @@ -1150,75 +1152,60 @@ int main(int argc, char *argv[]) */ maxfds = get_maxfds(); - mapfile = NULL; rc = nftw(ldirname, preprocess_arch_std_files, maxfds, 0); - if (rc && verbose) { - pr_info("%s: Error preprocessing arch standard files %s\n", - prog, ldirname); - goto empty_map; - } else if (rc < 0) { - /* Make build fail */ - fclose(eventsfp); - free_arch_std_events(); - return 1; - } else if (rc) { - goto empty_map; - } + if (rc) + goto err_processing_std_arch_event_dir; rc = nftw(ldirname, process_one_file, maxfds, 0); - if (rc && verbose) { - pr_info("%s: Error walking file tree %s\n", prog, ldirname); - goto empty_map; - } else if (rc < 0) { - /* Make build fail */ - fclose(eventsfp); - free_arch_std_events(); - ret = 1; - goto out_free_mapfile; - } else if (rc) { - goto empty_map; - } + if (rc) + goto err_processing_dir; sprintf(ldirname, "%s/test", start_dirname); rc = nftw(ldirname, process_one_file, maxfds, 0); - if (rc && verbose) { - pr_info("%s: Error walking file tree %s rc=%d for test\n", - prog, ldirname, rc); - goto empty_map; - } else if (rc < 0) { - /* Make build fail */ - free_arch_std_events(); - ret = 1; - goto out_free_mapfile; - } else if (rc) { - goto empty_map; - } + if (rc) + goto err_processing_dir; if (close_table) print_events_table_suffix(eventsfp); if (!mapfile) { pr_info("%s: No CPU->JSON mapping?\n", prog); - goto empty_map; + empty_map = 1; + goto err_close_eventsfp; } - if (process_mapfile(eventsfp, mapfile)) { + rc = process_mapfile(eventsfp, mapfile); + fclose(eventsfp); + if (rc) { pr_info("%s: Error processing mapfile %s\n", prog, mapfile); /* Make build fail */ - fclose(eventsfp); - free_arch_std_events(); ret = 1; + goto err_out; } + free_arch_std_events(); + free(mapfile); + return 0; - goto out_free_mapfile; - -empty_map: +err_processing_std_arch_event_dir: + err_string_ext = " for std arch event"; +err_processing_dir: + if (verbose) { + pr_info("%s: Error walking file tree %s%s\n", prog, ldirname, + err_string_ext); + empty_map = 1; + } else if (rc < 0) { + ret = 1; + } else { + empty_map = 1; + } +err_close_eventsfp: fclose(eventsfp); - create_empty_mapping(output_file); + if (empty_map) + create_empty_mapping(output_file); +err_out: free_arch_std_events(); -out_free_mapfile: free(mapfile); return ret; } diff --git a/tools/perf/tests/shell/record+zstd_comp_decomp.sh b/tools/perf/tests/shell/record+zstd_comp_decomp.sh index 045723b3d9928f7e25acfee6ed63e9d6b8588931..c62af807198deda8f7634dd67a745e09e49bb5a5 100755 --- a/tools/perf/tests/shell/record+zstd_comp_decomp.sh +++ b/tools/perf/tests/shell/record+zstd_comp_decomp.sh @@ -12,7 +12,7 @@ skip_if_no_z_record() { collect_z_record() { echo "Collecting compressed record file:" - [[ "$(uname -m)" != s390x ]] && gflag='-g' + [ "$(uname -m)" != s390x ] && gflag='-g' $perf_tool record -o $trace_file $gflag -z -F 5000 -- \ dd count=500 if=/dev/urandom of=/dev/null } diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index c1f24d00485272bbbd0a87bc9762d6b304b1a349..5075ecead5f3d799a495ce89c93e4be707da5dec 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c @@ -535,6 +535,18 @@ struct perf_hpp_list perf_hpp_list = { #undef __HPP_SORT_ACC_FN #undef __HPP_SORT_RAW_FN +static void fmt_free(struct perf_hpp_fmt *fmt) +{ + /* + * At this point fmt should be completely + * unhooked, if not it's a bug. + */ + BUG_ON(!list_empty(&fmt->list)); + BUG_ON(!list_empty(&fmt->sort_list)); + + if (fmt->free) + fmt->free(fmt); +} void perf_hpp__init(void) { @@ -598,9 +610,10 @@ void perf_hpp_list__prepend_sort_field(struct perf_hpp_list *list, list_add(&format->sort_list, &list->sorts); } -void perf_hpp__column_unregister(struct perf_hpp_fmt *format) +static void perf_hpp__column_unregister(struct perf_hpp_fmt *format) { list_del_init(&format->list); + fmt_free(format); } void perf_hpp__cancel_cumulate(void) @@ -672,19 +685,6 @@ next: } -static void fmt_free(struct perf_hpp_fmt *fmt) -{ - /* - * At this point fmt should be completely - * unhooked, if not it's a bug. - */ - BUG_ON(!list_empty(&fmt->list)); - BUG_ON(!list_empty(&fmt->sort_list)); - - if (fmt->free) - fmt->free(fmt); -} - void perf_hpp__reset_output_field(struct perf_hpp_list *list) { struct perf_hpp_fmt *fmt, *tmp; diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c index 3882a5360ada40c18d9368fac1b086553231f90b..0350020acb96f288de39bce58d71db1dff6d05de 100644 --- a/tools/perf/util/arm-spe.c +++ b/tools/perf/util/arm-spe.c @@ -48,6 +48,7 @@ struct arm_spe { u8 timeless_decoding; u8 data_queued; + u64 sample_type; u8 sample_flc; u8 sample_llc; u8 sample_tlb; @@ -244,6 +245,12 @@ static void arm_spe_prep_sample(struct arm_spe *spe, event->sample.header.size = sizeof(struct perf_event_header); } +static int arm_spe__inject_event(union perf_event *event, struct perf_sample *sample, u64 type) +{ + event->header.size = perf_event__sample_event_size(sample, type, 0); + return perf_event__synthesize_sample(event, type, 0, sample); +} + static inline int arm_spe_deliver_synth_event(struct arm_spe *spe, struct arm_spe_queue *speq __maybe_unused, @@ -252,6 +259,12 @@ arm_spe_deliver_synth_event(struct arm_spe *spe, { int ret; + if (spe->synth_opts.inject) { + ret = arm_spe__inject_event(event, sample, spe->sample_type); + if (ret) + return ret; + } + ret = perf_session__deliver_synth_event(spe->session, event, sample); if (ret) pr_err("ARM SPE: failed to deliver event, error %d\n", ret); @@ -809,6 +822,8 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session) else attr.sample_type |= PERF_SAMPLE_TIME; + spe->sample_type = attr.sample_type; + attr.exclude_user = evsel->core.attr.exclude_user; attr.exclude_kernel = evsel->core.attr.exclude_kernel; attr.exclude_hv = evsel->core.attr.exclude_hv; diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c index 3742511a08d15cc942e041a45caebc32eb5dd1b2..4eb02762104bad04908b80119535aed028cb2f09 100644 --- a/tools/perf/util/bpf-event.c +++ b/tools/perf/util/bpf-event.c @@ -109,7 +109,11 @@ static int perf_env__fetch_btf(struct perf_env *env, node->data_size = data_size; memcpy(node->data, data, data_size); - perf_env__insert_btf(env, node); + if (!perf_env__insert_btf(env, node)) { + /* Insertion failed because of a duplicate. */ + free(node); + return -1; + } return 0; } @@ -557,7 +561,7 @@ void bpf_event__print_bpf_prog_info(struct bpf_prog_info *info, synthesize_bpf_prog_name(name, KSYM_NAME_LEN, info, btf, 0); fprintf(fp, "# bpf_prog_info %u: %s addr 0x%llx size %u\n", info->id, name, prog_addrs[0], prog_lens[0]); - return; + goto out; } fprintf(fp, "# bpf_prog_info %u:\n", info->id); @@ -567,4 +571,6 @@ void bpf_event__print_bpf_prog_info(struct bpf_prog_info *info, fprintf(fp, "# \tsub_prog %u: %s addr 0x%llx size %u\n", i, name, prog_addrs[i], prog_lens[i]); } +out: + btf__free(btf); } diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c index 0374adcb223c75c418b7b9bb51e0c7215e012160..ac99c0764bee83a87a3f25f8367694bdb0bd3be0 100644 --- a/tools/perf/util/bpf-loader.c +++ b/tools/perf/util/bpf-loader.c @@ -1215,9 +1215,10 @@ bpf__obj_config_map(struct bpf_object *obj, pr_debug("ERROR: Invalid map config option '%s'\n", map_opt); err = -BPF_LOADER_ERRNO__OBJCONF_MAP_OPT; out: - free(map_name); if (!err) *key_scan_pos += strlen(map_opt); + + free(map_name); return err; } diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c index bcb494dc816a0b47aa1884ba82d7a85d2e176b19..48754083791d8d35594a942809b189a727997689 100644 --- a/tools/perf/util/data.c +++ b/tools/perf/util/data.c @@ -44,10 +44,6 @@ int perf_data__create_dir(struct perf_data *data, int nr) if (!files) return -ENOMEM; - data->dir.version = PERF_DIR_VERSION; - data->dir.files = files; - data->dir.nr = nr; - for (i = 0; i < nr; i++) { struct perf_data_file *file = &files[i]; @@ -62,6 +58,9 @@ int perf_data__create_dir(struct perf_data *data, int nr) file->fd = ret; } + data->dir.version = PERF_DIR_VERSION; + data->dir.files = files; + data->dir.nr = nr; return 0; out_err: diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index 5cda5565777a0bfca8f319df7f5a7a79fb0dc4cf..0af163abaa62b87f3d5a4fb841f64e8f84be394d 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -145,7 +145,7 @@ static int trace_event_printer(enum binary_printer_ops op, break; case BINARY_PRINT_CHAR_DATA: printed += color_fprintf(fp, color, "%c", - isprint(ch) ? ch : '.'); + isprint(ch) && isascii(ch) ? ch : '.'); break; case BINARY_PRINT_CHAR_PAD: printed += color_fprintf(fp, color, " "); diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index f0dceb527ca385e936d8b612177b2dd57231ea05..d81ed1bc14bdc5111efa993ec2dbb6bfe3703e71 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -71,12 +71,13 @@ out: return node; } -void perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node) +bool perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node) { struct rb_node *parent = NULL; __u32 btf_id = btf_node->id; struct btf_node *node; struct rb_node **p; + bool ret = true; down_write(&env->bpf_progs.lock); p = &env->bpf_progs.btfs.rb_node; @@ -90,6 +91,7 @@ void perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node) p = &(*p)->rb_right; } else { pr_debug("duplicated btf %u\n", btf_id); + ret = false; goto out; } } @@ -99,6 +101,7 @@ void perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node) env->bpf_progs.btfs_cnt++; out: up_write(&env->bpf_progs.lock); + return ret; } struct btf_node *perf_env__find_btf(struct perf_env *env, __u32 btf_id) diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h index a1297265200646fa0da1ec983ae6719ee3ed907b..01378a955dd5e8652eb520ecb4a015e496d9b481 100644 --- a/tools/perf/util/env.h +++ b/tools/perf/util/env.h @@ -143,7 +143,7 @@ void perf_env__insert_bpf_prog_info(struct perf_env *env, struct bpf_prog_info_node *info_node); struct bpf_prog_info_node *perf_env__find_bpf_prog_info(struct perf_env *env, __u32 prog_id); -void perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node); +bool perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node); struct btf_node *perf_env__find_btf(struct perf_env *env, __u32 btf_id); int perf_env__numa_node(struct perf_env *env, int cpu); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 1cad6051d8b0892c3f04213d93b0f871bd5bea20..1a1cbd16d76d475b3ef06c2455faab6a13a09fb3 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1014,6 +1014,17 @@ struct evsel_config_term *__evsel__get_config_term(struct evsel *evsel, enum evs return found_term; } +static void evsel__set_default_freq_period(struct record_opts *opts, + struct perf_event_attr *attr) +{ + if (opts->freq) { + attr->freq = 1; + attr->sample_freq = opts->freq; + } else { + attr->sample_period = opts->default_interval; + } +} + /* * The enable_on_exec/disabled value strategy: * @@ -1080,14 +1091,12 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts, * We default some events to have a default interval. But keep * it a weak assumption overridable by the user. */ - if (!attr->sample_period) { - if (opts->freq) { - attr->freq = 1; - attr->sample_freq = opts->freq; - } else { - attr->sample_period = opts->default_interval; - } - } + if ((evsel->is_libpfm_event && !attr->sample_period) || + (!evsel->is_libpfm_event && (!attr->sample_period || + opts->user_freq != UINT_MAX || + opts->user_interval != ULLONG_MAX))) + evsel__set_default_freq_period(opts, attr); + /* * If attr->freq was set (here or earlier), ask for period * to be sampled. diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 96b1c13bbccc52957def1489b682506d9bf2da04..919f2c6c481424e5b1f58ac15d673567efea26c5 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -362,7 +362,6 @@ enum { }; void perf_hpp__init(void); -void perf_hpp__column_unregister(struct perf_hpp_fmt *format); void perf_hpp__cancel_cumulate(void); void perf_hpp__setup_output_field(struct perf_hpp_list *list); void perf_hpp__reset_output_field(struct perf_hpp_list *list); diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index e6029d4c096fb76f9edf07d80c90892a9761b549..e4c485f92c0289c77d2e8e4fc52433a80234bfbf 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -1114,61 +1114,69 @@ out_no_progress: static bool intel_pt_fup_event(struct intel_pt_decoder *decoder) { + enum intel_pt_sample_type type = decoder->state.type; bool ret = false; + decoder->state.type &= ~INTEL_PT_BRANCH; + if (decoder->set_fup_tx_flags) { decoder->set_fup_tx_flags = false; decoder->tx_flags = decoder->fup_tx_flags; - decoder->state.type = INTEL_PT_TRANSACTION; + decoder->state.type |= INTEL_PT_TRANSACTION; if (decoder->fup_tx_flags & INTEL_PT_ABORT_TX) decoder->state.type |= INTEL_PT_BRANCH; - decoder->state.from_ip = decoder->ip; - decoder->state.to_ip = 0; decoder->state.flags = decoder->fup_tx_flags; - return true; + ret = true; } if (decoder->set_fup_ptw) { decoder->set_fup_ptw = false; - decoder->state.type = INTEL_PT_PTW; + decoder->state.type |= INTEL_PT_PTW; decoder->state.flags |= INTEL_PT_FUP_IP; - decoder->state.from_ip = decoder->ip; - decoder->state.to_ip = 0; decoder->state.ptw_payload = decoder->fup_ptw_payload; - return true; + ret = true; } if (decoder->set_fup_mwait) { decoder->set_fup_mwait = false; - decoder->state.type = INTEL_PT_MWAIT_OP; - decoder->state.from_ip = decoder->ip; - decoder->state.to_ip = 0; + decoder->state.type |= INTEL_PT_MWAIT_OP; decoder->state.mwait_payload = decoder->fup_mwait_payload; ret = true; } if (decoder->set_fup_pwre) { decoder->set_fup_pwre = false; decoder->state.type |= INTEL_PT_PWR_ENTRY; - decoder->state.type &= ~INTEL_PT_BRANCH; - decoder->state.from_ip = decoder->ip; - decoder->state.to_ip = 0; decoder->state.pwre_payload = decoder->fup_pwre_payload; ret = true; } if (decoder->set_fup_exstop) { decoder->set_fup_exstop = false; decoder->state.type |= INTEL_PT_EX_STOP; - decoder->state.type &= ~INTEL_PT_BRANCH; decoder->state.flags |= INTEL_PT_FUP_IP; - decoder->state.from_ip = decoder->ip; - decoder->state.to_ip = 0; ret = true; } if (decoder->set_fup_bep) { decoder->set_fup_bep = false; decoder->state.type |= INTEL_PT_BLK_ITEMS; - decoder->state.type &= ~INTEL_PT_BRANCH; + ret = true; + } + if (decoder->overflow) { + decoder->overflow = false; + if (!ret && !decoder->pge) { + if (decoder->hop) { + decoder->state.type = 0; + decoder->pkt_state = INTEL_PT_STATE_RESAMPLE; + } + decoder->pge = true; + decoder->state.type |= INTEL_PT_BRANCH | INTEL_PT_TRACE_BEGIN; + decoder->state.from_ip = 0; + decoder->state.to_ip = decoder->ip; + return true; + } + } + if (ret) { decoder->state.from_ip = decoder->ip; decoder->state.to_ip = 0; - ret = true; + } else { + decoder->state.type = type; } return ret; } @@ -1486,7 +1494,16 @@ static int intel_pt_overflow(struct intel_pt_decoder *decoder) intel_pt_log("ERROR: Buffer overflow\n"); intel_pt_clear_tx_flags(decoder); decoder->timestamp_insn_cnt = 0; - decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC; + decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; + decoder->state.from_ip = decoder->ip; + decoder->ip = 0; + decoder->pge = false; + decoder->set_fup_tx_flags = false; + decoder->set_fup_ptw = false; + decoder->set_fup_mwait = false; + decoder->set_fup_pwre = false; + decoder->set_fup_exstop = false; + decoder->set_fup_bep = false; decoder->overflow = true; return -EOVERFLOW; } @@ -1937,6 +1954,8 @@ static int intel_pt_scan_for_psb(struct intel_pt_decoder *decoder); /* Hop mode: Ignore TNT, do not walk code, but get ip from FUPs and TIPs */ static int intel_pt_hop_trace(struct intel_pt_decoder *decoder, bool *no_tip, int *err) { + *err = 0; + /* Leap from PSB to PSB, getting ip from FUP within PSB+ */ if (decoder->leap && !decoder->in_psb && decoder->packet.type != INTEL_PT_PSB) { *err = intel_pt_scan_for_psb(decoder); @@ -1949,6 +1968,7 @@ static int intel_pt_hop_trace(struct intel_pt_decoder *decoder, bool *no_tip, in return HOP_IGNORE; case INTEL_PT_TIP_PGD: + decoder->pge = false; if (!decoder->packet.count) return HOP_IGNORE; intel_pt_set_ip(decoder); @@ -1970,18 +1990,21 @@ static int intel_pt_hop_trace(struct intel_pt_decoder *decoder, bool *no_tip, in if (!decoder->packet.count) return HOP_IGNORE; intel_pt_set_ip(decoder); - if (intel_pt_fup_event(decoder)) - return HOP_RETURN; - if (!decoder->branch_enable) + if (decoder->set_fup_mwait || decoder->set_fup_pwre) + *no_tip = true; + if (!decoder->branch_enable || !decoder->pge) *no_tip = true; if (*no_tip) { decoder->state.type = INTEL_PT_INSTRUCTION; decoder->state.from_ip = decoder->ip; decoder->state.to_ip = 0; + intel_pt_fup_event(decoder); return HOP_RETURN; } + intel_pt_fup_event(decoder); + decoder->state.type |= INTEL_PT_INSTRUCTION | INTEL_PT_BRANCH; *err = intel_pt_walk_fup_tip(decoder); - if (!*err) + if (!*err && decoder->state.to_ip) decoder->pkt_state = INTEL_PT_STATE_RESAMPLE; return HOP_RETURN; @@ -2050,6 +2073,7 @@ static int intel_pt_walk_trace(struct intel_pt_decoder *decoder) if (err) return err; next: + err = 0; if (decoder->cyc_threshold) { if (decoder->sample_cyc && last_packet_type != INTEL_PT_CYC) decoder->sample_cyc = false; @@ -2088,6 +2112,7 @@ next: case INTEL_PT_TIP_PGE: { decoder->pge = true; + decoder->overflow = false; intel_pt_mtc_cyc_cnt_pge(decoder); if (decoder->packet.count == 0) { intel_pt_log_at("Skipping zero TIP.PGE", @@ -2124,7 +2149,7 @@ next: break; } intel_pt_set_last_ip(decoder); - if (!decoder->branch_enable) { + if (!decoder->branch_enable || !decoder->pge) { decoder->ip = decoder->last_ip; if (intel_pt_fup_event(decoder)) return 0; @@ -2601,10 +2626,10 @@ static int intel_pt_sync_ip(struct intel_pt_decoder *decoder) decoder->set_fup_pwre = false; decoder->set_fup_exstop = false; decoder->set_fup_bep = false; + decoder->overflow = false; if (!decoder->branch_enable) { decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; - decoder->overflow = false; decoder->state.type = 0; /* Do not have a sample */ return 0; } @@ -2619,7 +2644,6 @@ static int intel_pt_sync_ip(struct intel_pt_decoder *decoder) decoder->pkt_state = INTEL_PT_STATE_RESAMPLE; else decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; - decoder->overflow = false; decoder->state.from_ip = 0; decoder->state.to_ip = decoder->ip; @@ -2732,7 +2756,7 @@ leap: return err; decoder->have_last_ip = true; - decoder->pkt_state = INTEL_PT_STATE_NO_IP; + decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; err = intel_pt_walk_psb(decoder); if (err) @@ -2828,7 +2852,8 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder) if (err) { decoder->state.err = intel_pt_ext_err(err); - decoder->state.from_ip = decoder->ip; + if (err != -EOVERFLOW) + decoder->state.from_ip = decoder->ip; intel_pt_update_sample_time(decoder); decoder->sample_tot_cyc_cnt = decoder->tot_cyc_cnt; } else { diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index e5aaf1337be98531e18ac39d12af53f556b14d2d..5163d2ffea70da39bc5890a3d826a59013ffe4e0 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -2271,6 +2271,7 @@ static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp) ptq->sync_switch = false; intel_pt_next_tid(pt, ptq); } + ptq->timestamp = state->est_timestamp; if (pt->synth_opts.errors) { err = intel_ptq_synth_error(ptq, state); if (err) diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 07db6cfad65b988e3c77642bba4d05c8cff3b268..d103084fcd56cbf5db6df63b305a716a12aea3dc 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -3035,6 +3035,9 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev, for (j = 0; j < num_matched_functions; j++) { sym = syms[j]; + if (sym->type != STT_FUNC) + continue; + /* There can be duplicated symbols in the map */ for (i = 0; i < j; i++) if (sym->start == syms[i]->start) { diff --git a/tools/perf/util/smt.c b/tools/perf/util/smt.c index 20bacd5972adec4425ad11bef211a9dee5b6d45d..34f1b1b1176c7808f7c08f0f945b59cbb43de27b 100644 --- a/tools/perf/util/smt.c +++ b/tools/perf/util/smt.c @@ -15,7 +15,7 @@ int smt_on(void) if (cached) return cached_result; - if (sysfs__read_int("devices/system/cpu/smt/active", &cached_result) > 0) + if (sysfs__read_int("devices/system/cpu/smt/active", &cached_result) >= 0) goto done; ncpu = sysconf(_SC_NPROCESSORS_CONF); diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index a963b5b8eb72409152596d6b33f0d961aeb1aa40..96fe9c1af33644b71393dffb16d1c5c86a0a5342 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -555,15 +555,16 @@ static void collect_all_aliases(struct perf_stat_config *config, struct evsel *c alias = list_prepare_entry(counter, &(evlist->core.entries), core.node); list_for_each_entry_continue (alias, &evlist->core.entries, core.node) { - if (strcmp(evsel__name(alias), evsel__name(counter)) || - alias->scale != counter->scale || - alias->cgrp != counter->cgrp || - strcmp(alias->unit, counter->unit) || - evsel__is_clock(alias) != evsel__is_clock(counter) || - !strcmp(alias->pmu_name, counter->pmu_name)) - break; - alias->merged_stat = true; - cb(config, alias, data, false); + /* Merge events with the same name, etc. but on different PMUs. */ + if (!strcmp(evsel__name(alias), evsel__name(counter)) && + alias->scale == counter->scale && + alias->cgrp == counter->cgrp && + !strcmp(alias->unit, counter->unit) && + evsel__is_clock(alias) == evsel__is_clock(counter) && + strcmp(alias->pmu_name, counter->pmu_name)) { + alias->merged_stat = true; + cb(config, alias, data, false); + } } } diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index 37a9492edb3ebf094e348c1ab4e72e9dfe9d2378..df3c4671be72afd5ef426ea4f3c79a5f50dc0874 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -379,32 +379,32 @@ fetch_kernel_version(unsigned int *puint, char *str, return 0; } -const char *perf_tip(const char *dirpath) +int perf_tip(char **strp, const char *dirpath) { struct strlist *tips; struct str_node *node; - char *tip = NULL; struct strlist_config conf = { .dirname = dirpath, .file_only = true, }; + int ret = 0; + *strp = NULL; tips = strlist__new("tips.txt", &conf); if (tips == NULL) - return errno == ENOENT ? NULL : - "Tip: check path of tips.txt or get more memory! ;-p"; + return -errno; if (strlist__nr_entries(tips) == 0) goto out; node = strlist__entry(tips, random() % strlist__nr_entries(tips)); - if (asprintf(&tip, "Tip: %s", node->s) < 0) - tip = (char *)"Tip: get more memory! ;-)"; + if (asprintf(strp, "Tip: %s", node->s) < 0) + ret = -ENOMEM; out: strlist__delete(tips); - return tip; + return ret; } char *perf_exe(char *buf, int len) diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index ad737052e59776dfe122ac633e737e972c5de217..9f0d36ba77f2d1734daced16f71dd3c2c053892c 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -39,7 +39,7 @@ int fetch_kernel_version(unsigned int *puint, #define KVER_FMT "%d.%d.%d" #define KVER_PARAM(x) KVER_VERSION(x), KVER_PATCHLEVEL(x), KVER_SUBLEVEL(x) -const char *perf_tip(const char *dirpath); +int perf_tip(char **strp, const char *dirpath); #ifndef HAVE_SCHED_GETCPU_SUPPORT int sched_getcpu(void); diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index b5322d60068c4a09636c2fbca22721d232b4d372..1d915553336082fd178f9ec15f1b206c2077d3f7 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -326,7 +326,8 @@ $(TRUNNER_BPF_OBJS): $(TRUNNER_OUTPUT)/%.o: \ $(TRUNNER_BPF_PROGS_DIR)/%.c \ $(TRUNNER_BPF_PROGS_DIR)/*.h \ $$(INCLUDE_DIR)/vmlinux.h \ - $(wildcard $(BPFDIR)/bpf_*.h) | $(TRUNNER_OUTPUT) + $(wildcard $(BPFDIR)/bpf_*.h) \ + | $(TRUNNER_OUTPUT) $$(BPFOBJ) $$(call $(TRUNNER_BPF_BUILD_RULE),$$<,$$@, \ $(TRUNNER_BPF_CFLAGS), \ $(TRUNNER_BPF_LDFLAGS)) diff --git a/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c b/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c index 86ccf37e26b3f00ae11ad822bed2050b137d20bf..d16fd888230a5ba6ee8217050ec874c9358701fe 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c @@ -90,7 +90,7 @@ static void print_err_line(void) static void test_conn(void) { - int listen_fd = -1, cli_fd = -1, err; + int listen_fd = -1, cli_fd = -1, srv_fd = -1, err; socklen_t addrlen = sizeof(srv_sa6); int srv_port; @@ -112,6 +112,10 @@ static void test_conn(void) if (CHECK_FAIL(cli_fd == -1)) goto done; + srv_fd = accept(listen_fd, NULL, NULL); + if (CHECK_FAIL(srv_fd == -1)) + goto done; + if (CHECK(skel->bss->listen_tp_sport != srv_port || skel->bss->req_sk_sport != srv_port, "Unexpected sk src port", @@ -134,11 +138,13 @@ done: close(listen_fd); if (cli_fd != -1) close(cli_fd); + if (srv_fd != -1) + close(srv_fd); } static void test_syncookie(void) { - int listen_fd = -1, cli_fd = -1, err; + int listen_fd = -1, cli_fd = -1, srv_fd = -1, err; socklen_t addrlen = sizeof(srv_sa6); int srv_port; @@ -161,6 +167,10 @@ static void test_syncookie(void) if (CHECK_FAIL(cli_fd == -1)) goto done; + srv_fd = accept(listen_fd, NULL, NULL); + if (CHECK_FAIL(srv_fd == -1)) + goto done; + if (CHECK(skel->bss->listen_tp_sport != srv_port, "Unexpected tp src port", "listen_tp_sport:%u expected:%u\n", @@ -188,6 +198,8 @@ done: close(listen_fd); if (cli_fd != -1) close(cli_fd); + if (srv_fd != -1) + close(srv_fd); } struct test { diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c index 8b641c306f26330899fdd57d4d0554cbc06ccc16..5b52985cb60e692f2f5cac77934929feb49161af 100644 --- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c +++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c @@ -857,7 +857,7 @@ void test_core_reloc(void) goto cleanup; } - if (!ASSERT_FALSE(test_case->fails, "obj_load_should_fail")) + if (!ASSERT_EQ(test_case->fails, false, "obj_load_should_fail")) goto cleanup; equal = memcmp(data->out, test_case->output, diff --git a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c index ca9f0895ec84e52b2349a914d81218871e09b854..8d75475408f57367f578b42ff8e6a81fdbc01101 100644 --- a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c +++ b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c @@ -107,8 +107,8 @@ void test_perf_buffer(void) "expect %d, seen %d\n", nr_on_cpus, CPU_COUNT(&cpu_seen))) goto out_free_pb; - if (CHECK(perf_buffer__buffer_cnt(pb) != nr_cpus, "buf_cnt", - "got %zu, expected %d\n", perf_buffer__buffer_cnt(pb), nr_cpus)) + if (CHECK(perf_buffer__buffer_cnt(pb) != nr_on_cpus, "buf_cnt", + "got %zu, expected %d\n", perf_buffer__buffer_cnt(pb), nr_on_cpus)) goto out_close; for (i = 0; i < nr_cpus; i++) { diff --git a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c index 9ff0412e1fd384fab4cd2c16de31a894cedb0e89..b4c9f4a96ae4d37b393ae91158834141d1fbacf1 100644 --- a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c +++ b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c @@ -241,6 +241,48 @@ fail: return -1; } +static __u64 socket_cookie(int fd) +{ + __u64 cookie; + socklen_t cookie_len = sizeof(cookie); + + if (CHECK(getsockopt(fd, SOL_SOCKET, SO_COOKIE, &cookie, &cookie_len) < 0, + "getsockopt(SO_COOKIE)", "%s\n", strerror(errno))) + return 0; + return cookie; +} + +static int fill_sk_lookup_ctx(struct bpf_sk_lookup *ctx, const char *local_ip, __u16 local_port, + const char *remote_ip, __u16 remote_port) +{ + void *local, *remote; + int err; + + memset(ctx, 0, sizeof(*ctx)); + ctx->local_port = local_port; + ctx->remote_port = htons(remote_port); + + if (is_ipv6(local_ip)) { + ctx->family = AF_INET6; + local = &ctx->local_ip6[0]; + remote = &ctx->remote_ip6[0]; + } else { + ctx->family = AF_INET; + local = &ctx->local_ip4; + remote = &ctx->remote_ip4; + } + + err = inet_pton(ctx->family, local_ip, local); + if (CHECK(err != 1, "inet_pton", "local_ip failed\n")) + return 1; + + err = inet_pton(ctx->family, remote_ip, remote); + if (CHECK(err != 1, "inet_pton", "remote_ip failed\n")) + return 1; + + return 0; +} + static int send_byte(int fd) { ssize_t n; @@ -556,7 +598,7 @@ close: static void run_lookup_prog(const struct test *t) { - int server_fds[MAX_SERVERS] = { -1 }; + int server_fds[] = { [0 ... MAX_SERVERS - 1] = -1 }; int client_fd, reuse_conn_fd = -1; struct bpf_link *lookup_link; int i, err; @@ -1009,18 +1051,27 @@ static void test_drop_on_reuseport(struct test_sk_lookup *skel) static void run_sk_assign(struct test_sk_lookup *skel, struct bpf_program *lookup_prog, - const char *listen_ip, const char *connect_ip) + const char *remote_ip, const char *local_ip) { - int client_fd, peer_fd, server_fds[MAX_SERVERS] = { -1 }; - struct bpf_link *lookup_link; + int server_fds[] = { [0 ... MAX_SERVERS - 1] = -1 }; + struct bpf_sk_lookup ctx; + __u64 server_cookie; int i, err; - lookup_link = attach_lookup_prog(lookup_prog); - if (!lookup_link) + DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts, + .ctx_in = &ctx, + .ctx_size_in = sizeof(ctx), + .ctx_out = &ctx, + .ctx_size_out = sizeof(ctx), + ); + + if (fill_sk_lookup_ctx(&ctx, local_ip, EXT_PORT, remote_ip, INT_PORT)) return; + ctx.protocol = IPPROTO_TCP; + for (i = 0; i < ARRAY_SIZE(server_fds); i++) { - server_fds[i] = make_server(SOCK_STREAM, listen_ip, 0, NULL); + server_fds[i] = make_server(SOCK_STREAM, local_ip, 0, NULL); if (server_fds[i] < 0) goto close_servers; @@ -1030,23 +1081,25 @@ static void run_sk_assign(struct test_sk_lookup *skel, goto close_servers; } - client_fd = make_client(SOCK_STREAM, connect_ip, EXT_PORT); - if (client_fd < 0) + server_cookie = socket_cookie(server_fds[SERVER_B]); + if (!server_cookie) + return; + + err = bpf_prog_test_run_opts(bpf_program__fd(lookup_prog), &opts); + if (CHECK(err, "test_run", "failed with error %d\n", errno)) + goto close_servers; + + if (CHECK(ctx.cookie == 0, "ctx.cookie", "no socket selected\n")) goto close_servers; - peer_fd = accept(server_fds[SERVER_B], NULL, NULL); - if (CHECK(peer_fd < 0, "accept", "failed\n")) - goto close_client; + CHECK(ctx.cookie != server_cookie, "ctx.cookie", + "selected sk %llu instead of %llu\n", ctx.cookie, server_cookie); - close(peer_fd); -close_client: - close(client_fd); close_servers: for (i = 0; i < ARRAY_SIZE(server_fds); i++) { if (server_fds[i] != -1) close(server_fds[i]); } - bpf_link__destroy(lookup_link); } static void run_sk_assign_v4(struct test_sk_lookup *skel, diff --git a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c index fafeddaad6a998e33ea99d95be5440fd6ed32558..23915be6172d63bda9e969c6b9b6f079f50f8d25 100644 --- a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c +++ b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c @@ -105,4 +105,6 @@ void test_skb_ctx(void) "ctx_out_mark", "skb->mark == %u, expected %d\n", skb.mark, 10); + + bpf_object__close(obj); } diff --git a/tools/testing/selftests/bpf/prog_tests/timer_crash.c b/tools/testing/selftests/bpf/prog_tests/timer_crash.c new file mode 100644 index 0000000000000000000000000000000000000000..f74b82305da8c88488117b0b908029d2d2364250 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/timer_crash.c @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include "timer_crash.skel.h" + +enum { + MODE_ARRAY, + MODE_HASH, +}; + +static void test_timer_crash_mode(int mode) +{ + struct timer_crash *skel; + + skel = timer_crash__open_and_load(); + if (!ASSERT_OK_PTR(skel, "timer_crash__open_and_load")) + return; + skel->bss->pid = getpid(); + skel->bss->crash_map = mode; + if (!ASSERT_OK(timer_crash__attach(skel), "timer_crash__attach")) + goto end; + usleep(1); +end: + timer_crash__destroy(skel); +} + +void test_timer_crash(void) +{ + if (test__start_subtest("array")) + test_timer_crash_mode(MODE_ARRAY); + if (test__start_subtest("hash")) + test_timer_crash_mode(MODE_HASH); +} diff --git a/tools/testing/selftests/bpf/progs/strobemeta.h b/tools/testing/selftests/bpf/progs/strobemeta.h index 7de534f38c3f1f245671731c475ab7cafed30bdb..60c93aee2f4ad00171edb2495b010d3d36c307cf 100644 --- a/tools/testing/selftests/bpf/progs/strobemeta.h +++ b/tools/testing/selftests/bpf/progs/strobemeta.h @@ -358,7 +358,7 @@ static __always_inline uint64_t read_str_var(struct strobemeta_cfg *cfg, void *payload) { void *location; - uint32_t len; + uint64_t len; data->str_lens[idx] = 0; location = calc_location(&cfg->str_locs[idx], tls_base); @@ -390,7 +390,7 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg, struct strobe_map_descr* descr = &data->map_descrs[idx]; struct strobe_map_raw map; void *location; - uint32_t len; + uint64_t len; int i; descr->tag_len = 0; /* presume no tag is set */ diff --git a/tools/testing/selftests/bpf/progs/test_sk_lookup.c b/tools/testing/selftests/bpf/progs/test_sk_lookup.c index 1032b292af5b784229c2b2da7fffb753703e12d7..ac6f7f205e25d2f3cc6199a56ef0666a7cff6ea5 100644 --- a/tools/testing/selftests/bpf/progs/test_sk_lookup.c +++ b/tools/testing/selftests/bpf/progs/test_sk_lookup.c @@ -64,6 +64,10 @@ static const int PROG_DONE = 1; static const __u32 KEY_SERVER_A = SERVER_A; static const __u32 KEY_SERVER_B = SERVER_B; +static const __u16 SRC_PORT = bpf_htons(8008); +static const __u32 SRC_IP4 = IP4(127, 0, 0, 2); +static const __u32 SRC_IP6[] = IP6(0xfd000000, 0x0, 0x0, 0x00000002); + static const __u16 DST_PORT = 7007; /* Host byte order */ static const __u32 DST_IP4 = IP4(127, 0, 0, 1); static const __u32 DST_IP6[] = IP6(0xfd000000, 0x0, 0x0, 0x00000001); @@ -398,11 +402,12 @@ int ctx_narrow_access(struct bpf_sk_lookup *ctx) if (LSW(ctx->protocol, 0) != IPPROTO_TCP) return SK_DROP; - /* Narrow loads from remote_port field. Expect non-0 value. */ - if (LSB(ctx->remote_port, 0) == 0 && LSB(ctx->remote_port, 1) == 0 && - LSB(ctx->remote_port, 2) == 0 && LSB(ctx->remote_port, 3) == 0) + /* Narrow loads from remote_port field. Expect SRC_PORT. */ + if (LSB(ctx->remote_port, 0) != ((SRC_PORT >> 0) & 0xff) || + LSB(ctx->remote_port, 1) != ((SRC_PORT >> 8) & 0xff) || + LSB(ctx->remote_port, 2) != 0 || LSB(ctx->remote_port, 3) != 0) return SK_DROP; - if (LSW(ctx->remote_port, 0) == 0) + if (LSW(ctx->remote_port, 0) != SRC_PORT) return SK_DROP; /* Narrow loads from local_port field. Expect DST_PORT. */ @@ -415,11 +420,14 @@ int ctx_narrow_access(struct bpf_sk_lookup *ctx) /* Narrow loads from IPv4 fields */ if (v4) { - /* Expect non-0.0.0.0 in remote_ip4 */ - if (LSB(ctx->remote_ip4, 0) == 0 && LSB(ctx->remote_ip4, 1) == 0 && - LSB(ctx->remote_ip4, 2) == 0 && LSB(ctx->remote_ip4, 3) == 0) + /* Expect SRC_IP4 in remote_ip4 */ + if (LSB(ctx->remote_ip4, 0) != ((SRC_IP4 >> 0) & 0xff) || + LSB(ctx->remote_ip4, 1) != ((SRC_IP4 >> 8) & 0xff) || + LSB(ctx->remote_ip4, 2) != ((SRC_IP4 >> 16) & 0xff) || + LSB(ctx->remote_ip4, 3) != ((SRC_IP4 >> 24) & 0xff)) return SK_DROP; - if (LSW(ctx->remote_ip4, 0) == 0 && LSW(ctx->remote_ip4, 1) == 0) + if (LSW(ctx->remote_ip4, 0) != ((SRC_IP4 >> 0) & 0xffff) || + LSW(ctx->remote_ip4, 1) != ((SRC_IP4 >> 16) & 0xffff)) return SK_DROP; /* Expect DST_IP4 in local_ip4 */ @@ -448,20 +456,32 @@ int ctx_narrow_access(struct bpf_sk_lookup *ctx) /* Narrow loads from IPv6 fields */ if (!v4) { - /* Expect non-:: IP in remote_ip6 */ - if (LSB(ctx->remote_ip6[0], 0) == 0 && LSB(ctx->remote_ip6[0], 1) == 0 && - LSB(ctx->remote_ip6[0], 2) == 0 && LSB(ctx->remote_ip6[0], 3) == 0 && - LSB(ctx->remote_ip6[1], 0) == 0 && LSB(ctx->remote_ip6[1], 1) == 0 && - LSB(ctx->remote_ip6[1], 2) == 0 && LSB(ctx->remote_ip6[1], 3) == 0 && - LSB(ctx->remote_ip6[2], 0) == 0 && LSB(ctx->remote_ip6[2], 1) == 0 && - LSB(ctx->remote_ip6[2], 2) == 0 && LSB(ctx->remote_ip6[2], 3) == 0 && - LSB(ctx->remote_ip6[3], 0) == 0 && LSB(ctx->remote_ip6[3], 1) == 0 && - LSB(ctx->remote_ip6[3], 2) == 0 && LSB(ctx->remote_ip6[3], 3) == 0) + /* Expect SRC_IP6 in remote_ip6 */ + if (LSB(ctx->remote_ip6[0], 0) != ((SRC_IP6[0] >> 0) & 0xff) || + LSB(ctx->remote_ip6[0], 1) != ((SRC_IP6[0] >> 8) & 0xff) || + LSB(ctx->remote_ip6[0], 2) != ((SRC_IP6[0] >> 16) & 0xff) || + LSB(ctx->remote_ip6[0], 3) != ((SRC_IP6[0] >> 24) & 0xff) || + LSB(ctx->remote_ip6[1], 0) != ((SRC_IP6[1] >> 0) & 0xff) || + LSB(ctx->remote_ip6[1], 1) != ((SRC_IP6[1] >> 8) & 0xff) || + LSB(ctx->remote_ip6[1], 2) != ((SRC_IP6[1] >> 16) & 0xff) || + LSB(ctx->remote_ip6[1], 3) != ((SRC_IP6[1] >> 24) & 0xff) || + LSB(ctx->remote_ip6[2], 0) != ((SRC_IP6[2] >> 0) & 0xff) || + LSB(ctx->remote_ip6[2], 1) != ((SRC_IP6[2] >> 8) & 0xff) || + LSB(ctx->remote_ip6[2], 2) != ((SRC_IP6[2] >> 16) & 0xff) || + LSB(ctx->remote_ip6[2], 3) != ((SRC_IP6[2] >> 24) & 0xff) || + LSB(ctx->remote_ip6[3], 0) != ((SRC_IP6[3] >> 0) & 0xff) || + LSB(ctx->remote_ip6[3], 1) != ((SRC_IP6[3] >> 8) & 0xff) || + LSB(ctx->remote_ip6[3], 2) != ((SRC_IP6[3] >> 16) & 0xff) || + LSB(ctx->remote_ip6[3], 3) != ((SRC_IP6[3] >> 24) & 0xff)) return SK_DROP; - if (LSW(ctx->remote_ip6[0], 0) == 0 && LSW(ctx->remote_ip6[0], 1) == 0 && - LSW(ctx->remote_ip6[1], 0) == 0 && LSW(ctx->remote_ip6[1], 1) == 0 && - LSW(ctx->remote_ip6[2], 0) == 0 && LSW(ctx->remote_ip6[2], 1) == 0 && - LSW(ctx->remote_ip6[3], 0) == 0 && LSW(ctx->remote_ip6[3], 1) == 0) + if (LSW(ctx->remote_ip6[0], 0) != ((SRC_IP6[0] >> 0) & 0xffff) || + LSW(ctx->remote_ip6[0], 1) != ((SRC_IP6[0] >> 16) & 0xffff) || + LSW(ctx->remote_ip6[1], 0) != ((SRC_IP6[1] >> 0) & 0xffff) || + LSW(ctx->remote_ip6[1], 1) != ((SRC_IP6[1] >> 16) & 0xffff) || + LSW(ctx->remote_ip6[2], 0) != ((SRC_IP6[2] >> 0) & 0xffff) || + LSW(ctx->remote_ip6[2], 1) != ((SRC_IP6[2] >> 16) & 0xffff) || + LSW(ctx->remote_ip6[3], 0) != ((SRC_IP6[3] >> 0) & 0xffff) || + LSW(ctx->remote_ip6[3], 1) != ((SRC_IP6[3] >> 16) & 0xffff)) return SK_DROP; /* Expect DST_IP6 in local_ip6 */ if (LSB(ctx->local_ip6[0], 0) != ((DST_IP6[0] >> 0) & 0xff) || diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_kern.h b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h index 1858435de7aaf91a3f2ef12e1c8a74e9ee934c4e..5cb90ca29218643a08fe5edd832f0675dfaee38e 100644 --- a/tools/testing/selftests/bpf/progs/test_sockmap_kern.h +++ b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h @@ -235,7 +235,7 @@ SEC("sk_msg1") int bpf_prog4(struct sk_msg_md *msg) { int *bytes, zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5; - int *start, *end, *start_push, *end_push, *start_pop, *pop; + int *start, *end, *start_push, *end_push, *start_pop, *pop, err = 0; bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero); if (bytes) @@ -249,8 +249,11 @@ int bpf_prog4(struct sk_msg_md *msg) bpf_msg_pull_data(msg, *start, *end, 0); start_push = bpf_map_lookup_elem(&sock_bytes, &two); end_push = bpf_map_lookup_elem(&sock_bytes, &three); - if (start_push && end_push) - bpf_msg_push_data(msg, *start_push, *end_push, 0); + if (start_push && end_push) { + err = bpf_msg_push_data(msg, *start_push, *end_push, 0); + if (err) + return SK_DROP; + } start_pop = bpf_map_lookup_elem(&sock_bytes, &four); pop = bpf_map_lookup_elem(&sock_bytes, &five); if (start_pop && pop) @@ -263,6 +266,7 @@ int bpf_prog6(struct sk_msg_md *msg) { int zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5, key = 0; int *bytes, *start, *end, *start_push, *end_push, *start_pop, *pop, *f; + int err = 0; __u64 flags = 0; bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero); @@ -279,8 +283,11 @@ int bpf_prog6(struct sk_msg_md *msg) start_push = bpf_map_lookup_elem(&sock_bytes, &two); end_push = bpf_map_lookup_elem(&sock_bytes, &three); - if (start_push && end_push) - bpf_msg_push_data(msg, *start_push, *end_push, 0); + if (start_push && end_push) { + err = bpf_msg_push_data(msg, *start_push, *end_push, 0); + if (err) + return SK_DROP; + } start_pop = bpf_map_lookup_elem(&sock_bytes, &four); pop = bpf_map_lookup_elem(&sock_bytes, &five); @@ -338,7 +345,7 @@ SEC("sk_msg5") int bpf_prog10(struct sk_msg_md *msg) { int *bytes, *start, *end, *start_push, *end_push, *start_pop, *pop; - int zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5; + int zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5, err = 0; bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero); if (bytes) @@ -352,8 +359,11 @@ int bpf_prog10(struct sk_msg_md *msg) bpf_msg_pull_data(msg, *start, *end, 0); start_push = bpf_map_lookup_elem(&sock_bytes, &two); end_push = bpf_map_lookup_elem(&sock_bytes, &three); - if (start_push && end_push) - bpf_msg_push_data(msg, *start_push, *end_push, 0); + if (start_push && end_push) { + err = bpf_msg_push_data(msg, *start_push, *end_push, 0); + if (err) + return SK_PASS; + } start_pop = bpf_map_lookup_elem(&sock_bytes, &four); pop = bpf_map_lookup_elem(&sock_bytes, &five); if (start_pop && pop) diff --git a/tools/testing/selftests/bpf/progs/timer_crash.c b/tools/testing/selftests/bpf/progs/timer_crash.c new file mode 100644 index 0000000000000000000000000000000000000000..f8f7944e70dae40317340ae995ecfed7783cd686 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/timer_crash.c @@ -0,0 +1,54 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include + +struct map_elem { + struct bpf_timer timer; + struct bpf_spin_lock lock; +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, struct map_elem); +} amap SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1); + __type(key, int); + __type(value, struct map_elem); +} hmap SEC(".maps"); + +int pid = 0; +int crash_map = 0; /* 0 for amap, 1 for hmap */ + +SEC("fentry/do_nanosleep") +int sys_enter(void *ctx) +{ + struct map_elem *e, value = {}; + void *map = crash_map ? (void *)&hmap : (void *)&amap; + + if (bpf_get_current_task_btf()->tgid != pid) + return 0; + + *(void **)&value = (void *)0xdeadcaf3; + + bpf_map_update_elem(map, &(int){0}, &value, 0); + /* For array map, doing bpf_map_update_elem will do a + * check_and_free_timer_in_array, which will trigger the crash if timer + * pointer was overwritten, for hmap we need to use bpf_timer_cancel. + */ + if (crash_map == 1) { + e = bpf_map_lookup_elem(map, &(int){0}); + if (!e) + return 0; + bpf_timer_cancel(&e->timer); + } + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_lwt_ip_encap.sh b/tools/testing/selftests/bpf/test_lwt_ip_encap.sh index 59ea56945e6cd6819dc12fe337f324d784449e48..b497bb85b667f78f0bfa393e5ffaca0766b345f0 100755 --- a/tools/testing/selftests/bpf/test_lwt_ip_encap.sh +++ b/tools/testing/selftests/bpf/test_lwt_ip_encap.sh @@ -112,6 +112,14 @@ setup() ip netns add "${NS2}" ip netns add "${NS3}" + # rp_filter gets confused by what these tests are doing, so disable it + ip netns exec ${NS1} sysctl -wq net.ipv4.conf.all.rp_filter=0 + ip netns exec ${NS2} sysctl -wq net.ipv4.conf.all.rp_filter=0 + ip netns exec ${NS3} sysctl -wq net.ipv4.conf.all.rp_filter=0 + ip netns exec ${NS1} sysctl -wq net.ipv4.conf.default.rp_filter=0 + ip netns exec ${NS2} sysctl -wq net.ipv4.conf.default.rp_filter=0 + ip netns exec ${NS3} sysctl -wq net.ipv4.conf.default.rp_filter=0 + ip link add veth1 type veth peer name veth2 ip link add veth3 type veth peer name veth4 ip link add veth5 type veth peer name veth6 @@ -236,11 +244,6 @@ setup() ip -netns ${NS1} -6 route add ${IPv6_GRE}/128 dev veth5 via ${IPv6_6} ${VRF} ip -netns ${NS2} -6 route add ${IPv6_GRE}/128 dev veth7 via ${IPv6_8} ${VRF} - # rp_filter gets confused by what these tests are doing, so disable it - ip netns exec ${NS1} sysctl -wq net.ipv4.conf.all.rp_filter=0 - ip netns exec ${NS2} sysctl -wq net.ipv4.conf.all.rp_filter=0 - ip netns exec ${NS3} sysctl -wq net.ipv4.conf.all.rp_filter=0 - TMPFILE=$(mktemp /tmp/test_lwt_ip_encap.XXXXXX) sleep 1 # reduce flakiness diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 22943b58d752ae12baf9b4db75ceff36ee4b5205..4a13477aef9dd1bafe2c6d2fa77f2cc4c8c26570 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -347,7 +347,7 @@ int extract_build_id(char *build_id, size_t size) if (getline(&line, &len, fp) == -1) goto err; - fclose(fp); + pclose(fp); if (len > size) len = size; @@ -356,7 +356,7 @@ int extract_build_id(char *build_id, size_t size) free(line); return 0; err: - fclose(fp); + pclose(fp); return -1; } diff --git a/tools/testing/selftests/bpf/verifier/array_access.c b/tools/testing/selftests/bpf/verifier/array_access.c index 1b1c798e924895387e638d51dad2780eafa114b7..1b138cd2b187d982428092dfd7217d9f32b2f385 100644 --- a/tools/testing/selftests/bpf/verifier/array_access.c +++ b/tools/testing/selftests/bpf/verifier/array_access.c @@ -186,7 +186,7 @@ }, .fixup_map_hash_48b = { 3 }, .errstr_unpriv = "R0 leaks addr", - .errstr = "R0 unbounded memory access", + .errstr = "invalid access to map value, value_size=48 off=44 size=8", .result_unpriv = REJECT, .result = REJECT, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, diff --git a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c index a3e593ddfafc93ec21d67976716950fa5f1a99ca..d8765a4d5bc6bdea761d45cbcedd085923b1f9e2 100644 --- a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c +++ b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c @@ -848,6 +848,29 @@ .errstr = "R0 invalid mem access 'inv'", .errstr_unpriv = "R0 pointer -= pointer prohibited", }, +{ + "map access: trying to leak tained dst reg", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_MOV32_IMM(BPF_REG_1, 0xFFFFFFFF), + BPF_MOV32_REG(BPF_REG_1, BPF_REG_1), + BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1), + BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 4 }, + .result = REJECT, + .errstr = "math between map_value pointer and 4294967295 is not allowed", +}, { "32bit pkt_ptr -= scalar", .insns = { diff --git a/tools/testing/selftests/bpf/verifier/xdp_direct_packet_access.c b/tools/testing/selftests/bpf/verifier/xdp_direct_packet_access.c index bfb97383e6b5ae2c7e10c0e8294d4dd115b61954..b4ec228eb95d05e02ef416351c4c9578b38b983b 100644 --- a/tools/testing/selftests/bpf/verifier/xdp_direct_packet_access.c +++ b/tools/testing/selftests/bpf/verifier/xdp_direct_packet_access.c @@ -35,7 +35,7 @@ .prog_type = BPF_PROG_TYPE_XDP, }, { - "XDP pkt read, pkt_data' > pkt_end, good access", + "XDP pkt read, pkt_data' > pkt_end, corner case, good access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, @@ -87,6 +87,41 @@ .prog_type = BPF_PROG_TYPE_XDP, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, +{ + "XDP pkt read, pkt_data' > pkt_end, corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 9), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -9), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_data' > pkt_end, corner case -1, bad access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, { "XDP pkt read, pkt_end > pkt_data', good access", .insns = { @@ -106,16 +141,16 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { - "XDP pkt read, pkt_end > pkt_data', bad access 1", + "XDP pkt read, pkt_end > pkt_data', corner case -1, bad access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data_end)), BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6), BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1), BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, @@ -142,6 +177,42 @@ .prog_type = BPF_PROG_TYPE_XDP, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, +{ + "XDP pkt read, pkt_end > pkt_data', corner case, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_end > pkt_data', corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, { "XDP pkt read, pkt_data' < pkt_end, good access", .insns = { @@ -161,16 +232,16 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { - "XDP pkt read, pkt_data' < pkt_end, bad access 1", + "XDP pkt read, pkt_data' < pkt_end, corner case -1, bad access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data_end)), BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6), BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1), BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, @@ -198,7 +269,43 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { - "XDP pkt read, pkt_end < pkt_data', good access", + "XDP pkt read, pkt_data' < pkt_end, corner case, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_data' < pkt_end, corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_end < pkt_data', corner case, good access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, @@ -250,6 +357,41 @@ .prog_type = BPF_PROG_TYPE_XDP, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, +{ + "XDP pkt read, pkt_end < pkt_data', corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 9), + BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -9), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_end < pkt_data', corner case -1, bad access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, { "XDP pkt read, pkt_data' >= pkt_end, good access", .insns = { @@ -268,15 +410,15 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { - "XDP pkt read, pkt_data' >= pkt_end, bad access 1", + "XDP pkt read, pkt_data' >= pkt_end, corner case -1, bad access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data_end)), BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6), BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, @@ -304,7 +446,41 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { - "XDP pkt read, pkt_end >= pkt_data', good access", + "XDP pkt read, pkt_data' >= pkt_end, corner case, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_data' >= pkt_end, corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_end >= pkt_data', corner case, good access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, @@ -359,7 +535,44 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { - "XDP pkt read, pkt_data' <= pkt_end, good access", + "XDP pkt read, pkt_end >= pkt_data', corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 9), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -9), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_end >= pkt_data', corner case -1, bad access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_data' <= pkt_end, corner case, good access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, @@ -413,6 +626,43 @@ .prog_type = BPF_PROG_TYPE_XDP, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, +{ + "XDP pkt read, pkt_data' <= pkt_end, corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 9), + BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -9), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_data' <= pkt_end, corner case -1, bad access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, { "XDP pkt read, pkt_end <= pkt_data', good access", .insns = { @@ -431,15 +681,15 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { - "XDP pkt read, pkt_end <= pkt_data', bad access 1", + "XDP pkt read, pkt_end <= pkt_data', corner case -1, bad access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data_end)), BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6), BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, @@ -467,7 +717,41 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { - "XDP pkt read, pkt_meta' > pkt_data, good access", + "XDP pkt read, pkt_end <= pkt_data', corner case, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_end <= pkt_data', corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_meta' > pkt_data, corner case, good access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data_meta)), @@ -519,6 +803,41 @@ .prog_type = BPF_PROG_TYPE_XDP, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, +{ + "XDP pkt read, pkt_meta' > pkt_data, corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 9), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -9), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_meta' > pkt_data, corner case -1, bad access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, { "XDP pkt read, pkt_data > pkt_meta', good access", .insns = { @@ -538,16 +857,16 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { - "XDP pkt read, pkt_data > pkt_meta', bad access 1", + "XDP pkt read, pkt_data > pkt_meta', corner case -1, bad access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data_meta)), BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6), BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1), BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, @@ -574,6 +893,42 @@ .prog_type = BPF_PROG_TYPE_XDP, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, +{ + "XDP pkt read, pkt_data > pkt_meta', corner case, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_data > pkt_meta', corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, { "XDP pkt read, pkt_meta' < pkt_data, good access", .insns = { @@ -593,16 +948,16 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { - "XDP pkt read, pkt_meta' < pkt_data, bad access 1", + "XDP pkt read, pkt_meta' < pkt_data, corner case -1, bad access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data_meta)), BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6), BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1), BPF_JMP_IMM(BPF_JA, 0, 0, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, @@ -630,7 +985,43 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { - "XDP pkt read, pkt_data < pkt_meta', good access", + "XDP pkt read, pkt_meta' < pkt_data, corner case, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_meta' < pkt_data, corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_data < pkt_meta', corner case, good access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data_meta)), @@ -682,6 +1073,41 @@ .prog_type = BPF_PROG_TYPE_XDP, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, +{ + "XDP pkt read, pkt_data < pkt_meta', corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 9), + BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -9), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_data < pkt_meta', corner case -1, bad access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, { "XDP pkt read, pkt_meta' >= pkt_data, good access", .insns = { @@ -700,15 +1126,15 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { - "XDP pkt read, pkt_meta' >= pkt_data, bad access 1", + "XDP pkt read, pkt_meta' >= pkt_data, corner case -1, bad access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data_meta)), BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6), BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, @@ -736,7 +1162,41 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { - "XDP pkt read, pkt_data >= pkt_meta', good access", + "XDP pkt read, pkt_meta' >= pkt_data, corner case, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_meta' >= pkt_data, corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_data >= pkt_meta', corner case, good access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data_meta)), @@ -791,7 +1251,44 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { - "XDP pkt read, pkt_meta' <= pkt_data, good access", + "XDP pkt read, pkt_data >= pkt_meta', corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 9), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -9), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_data >= pkt_meta', corner case -1, bad access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_meta' <= pkt_data, corner case, good access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data_meta)), @@ -845,6 +1342,43 @@ .prog_type = BPF_PROG_TYPE_XDP, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, +{ + "XDP pkt read, pkt_meta' <= pkt_data, corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 9), + BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -9), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_meta' <= pkt_data, corner case -1, bad access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "R1 offset is outside of the packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, { "XDP pkt read, pkt_data <= pkt_meta', good access", .insns = { @@ -863,15 +1397,15 @@ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { - "XDP pkt read, pkt_data <= pkt_meta', bad access 1", + "XDP pkt read, pkt_data <= pkt_meta', corner case -1, bad access", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data_meta)), BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6), BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1), - BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -6), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, @@ -898,3 +1432,37 @@ .prog_type = BPF_PROG_TYPE_XDP, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, +{ + "XDP pkt read, pkt_data <= pkt_meta', corner case, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "XDP pkt read, pkt_data <= pkt_meta', corner case +1, good access", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data_meta)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, diff --git a/tools/testing/selftests/clone3/clone3.c b/tools/testing/selftests/clone3/clone3.c index 42be3b9258301e87823edfe7284142fca5d49158..cd4582129c7d69c1e45b6a3acbd7a5f2c6c07e67 100644 --- a/tools/testing/selftests/clone3/clone3.c +++ b/tools/testing/selftests/clone3/clone3.c @@ -52,6 +52,12 @@ static int call_clone3(uint64_t flags, size_t size, enum test_mode test_mode) size = sizeof(struct __clone_args); switch (test_mode) { + case CLONE3_ARGS_NO_TEST: + /* + * Uses default 'flags' and 'SIGCHLD' + * assignment. + */ + break; case CLONE3_ARGS_ALL_0: args.flags = 0; args.exit_signal = 0; @@ -120,8 +126,6 @@ static void test_clone3(uint64_t flags, size_t size, int expected, int main(int argc, char *argv[]) { - pid_t pid; - uid_t uid = getuid(); ksft_print_header(); diff --git a/tools/testing/selftests/core/close_range_test.c b/tools/testing/selftests/core/close_range_test.c index 575b391ddc78d31bcfc424e40c5a540428d98e09..0a26795842f6f83ad0381542cf1247cc04e30df1 100644 --- a/tools/testing/selftests/core/close_range_test.c +++ b/tools/testing/selftests/core/close_range_test.c @@ -33,7 +33,7 @@ static inline int sys_close_range(unsigned int fd, unsigned int max_fd, #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) #endif -TEST(close_range) +TEST(core_close_range) { int i, ret; int open_fds[101]; diff --git a/tools/testing/selftests/damon/Makefile b/tools/testing/selftests/damon/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..8a3f2cd9fec0c65b8d88f7ff1b891c7ef6746c0b --- /dev/null +++ b/tools/testing/selftests/damon/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0 +# Makefile for damon selftests + +TEST_FILES = _chk_dependency.sh +TEST_PROGS = debugfs_attrs.sh + +include ../lib.mk diff --git a/tools/testing/selftests/damon/_chk_dependency.sh b/tools/testing/selftests/damon/_chk_dependency.sh new file mode 100644 index 0000000000000000000000000000000000000000..0189db81550be9f196cee04c6ed3ad1614db4248 --- /dev/null +++ b/tools/testing/selftests/damon/_chk_dependency.sh @@ -0,0 +1,28 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +DBGFS=/sys/kernel/debug/damon + +if [ $EUID -ne 0 ]; +then + echo "Run as root" + exit $ksft_skip +fi + +if [ ! -d "$DBGFS" ] +then + echo "$DBGFS not found" + exit $ksft_skip +fi + +for f in attrs target_ids monitor_on +do + if [ ! -f "$DBGFS/$f" ] + then + echo "$f not found" + exit 1 + fi +done diff --git a/tools/testing/selftests/damon/debugfs_attrs.sh b/tools/testing/selftests/damon/debugfs_attrs.sh new file mode 100644 index 0000000000000000000000000000000000000000..196b6640bf3783601833f348447d720f71f9f7ec --- /dev/null +++ b/tools/testing/selftests/damon/debugfs_attrs.sh @@ -0,0 +1,88 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +test_write_result() { + file=$1 + content=$2 + orig_content=$3 + expect_reason=$4 + expected=$5 + + echo "$content" > "$file" + if [ $? -ne "$expected" ] + then + echo "writing $content to $file doesn't return $expected" + echo "expected because: $expect_reason" + echo "$orig_content" > "$file" + exit 1 + fi +} + +test_write_succ() { + test_write_result "$1" "$2" "$3" "$4" 0 +} + +test_write_fail() { + test_write_result "$1" "$2" "$3" "$4" 1 +} + +test_content() { + file=$1 + orig_content=$2 + expected=$3 + expect_reason=$4 + + content=$(cat "$file") + if [ "$content" != "$expected" ] + then + echo "reading $file expected $expected but $content" + echo "expected because: $expect_reason" + echo "$orig_content" > "$file" + exit 1 + fi +} + +source ./_chk_dependency.sh + +# Test attrs file +# =============== + +file="$DBGFS/attrs" +orig_content=$(cat "$file") + +test_write_succ "$file" "1 2 3 4 5" "$orig_content" "valid input" +test_write_fail "$file" "1 2 3 4" "$orig_content" "no enough fields" +test_write_fail "$file" "1 2 3 5 4" "$orig_content" \ + "min_nr_regions > max_nr_regions" +test_content "$file" "$orig_content" "1 2 3 4 5" "successfully written" +echo "$orig_content" > "$file" + +# Test schemes file +# ================= + +file="$DBGFS/schemes" +orig_content=$(cat "$file") + +test_write_succ "$file" "1 2 3 4 5 6 4 0 0 0 1 2 3 1 100 3 2 1" \ + "$orig_content" "valid input" +test_write_fail "$file" "1 2 +3 4 5 6 3 0 0 0 1 2 3 1 100 3 2 1" "$orig_content" "multi lines" +test_write_succ "$file" "" "$orig_content" "disabling" +echo "$orig_content" > "$file" + +# Test target_ids file +# ==================== + +file="$DBGFS/target_ids" +orig_content=$(cat "$file") + +test_write_succ "$file" "1 2 3 4" "$orig_content" "valid input" +test_write_succ "$file" "1 2 abc 4" "$orig_content" "still valid input" +test_content "$file" "$orig_content" "1 2" "non-integer was there" +test_write_succ "$file" "abc 2 3" "$orig_content" "the file allows wrong input" +test_content "$file" "$orig_content" "" "wrong input written" +test_write_succ "$file" "" "$orig_content" "empty input" +test_content "$file" "$orig_content" "" "empty input written" +echo "$orig_content" > "$file" + +echo "PASS" diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh index 3e3e06ea5703cd93e73f619f4c39ffe16115eb80..86e787895f78b19300677529884f01e6eeb65cab 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh @@ -60,7 +60,8 @@ __tc_police_test() tc_police_rules_create $count $should_fail - offload_count=$(tc filter show dev $swp1 ingress | grep in_hw | wc -l) + offload_count=$(tc -j filter show dev $swp1 ingress | + jq "[.[] | select(.options.in_hw == true)] | length") ((offload_count == count)) check_err_fail $should_fail $? "tc police offload count" } diff --git a/tools/testing/selftests/exec/Makefile b/tools/testing/selftests/exec/Makefile index dd61118df66edbcde89f92a2626d2e9186122fda..2d7fca446c7f7b418f1be5580ce42f1c205978f2 100644 --- a/tools/testing/selftests/exec/Makefile +++ b/tools/testing/selftests/exec/Makefile @@ -3,9 +3,9 @@ CFLAGS = -Wall CFLAGS += -Wno-nonnull CFLAGS += -D_GNU_SOURCE -TEST_PROGS := binfmt_script non-regular -TEST_GEN_PROGS := execveat load_address_4096 load_address_2097152 load_address_16777216 -TEST_GEN_FILES := execveat.symlink execveat.denatured script subdir pipe +TEST_PROGS := binfmt_script +TEST_GEN_PROGS := execveat load_address_4096 load_address_2097152 load_address_16777216 non-regular +TEST_GEN_FILES := execveat.symlink execveat.denatured script subdir # Makefile is a run-time dependency, since it's accessed by the execveat test TEST_FILES := Makefile diff --git a/tools/testing/selftests/filesystems/binderfs/binderfs_test.c b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c index 477cbb042f5badec5361ffa6abd1d739fd578edb..0315955ff0f41df9f0a9b168582a84f914a694aa 100644 --- a/tools/testing/selftests/filesystems/binderfs/binderfs_test.c +++ b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c @@ -62,6 +62,9 @@ static int __do_binderfs_test(struct __test_metadata *_metadata) struct binder_version version = { 0 }; char binderfs_mntpt[] = P_tmpdir "/binderfs_XXXXXX", device_path[sizeof(P_tmpdir "/binderfs_XXXXXX/") + BINDERFS_MAX_NAME]; + static const char * const binder_features[] = { + "oneway_spam_detection", + }; change_mountns(_metadata); @@ -150,6 +153,20 @@ static int __do_binderfs_test(struct __test_metadata *_metadata) } /* success: binder-control device removal failed as expected */ + + for (int i = 0; i < ARRAY_SIZE(binder_features); i++) { + snprintf(device_path, sizeof(device_path), "%s/features/%s", + binderfs_mntpt, binder_features[i]); + fd = open(device_path, O_CLOEXEC | O_RDONLY); + EXPECT_GE(fd, 0) { + TH_LOG("%s - Failed to open binder feature: %s", + strerror(errno), binder_features[i]); + goto umount; + } + close(fd); + } + + /* success: binder feature files found */ result = 0; umount: diff --git a/tools/testing/selftests/filesystems/fuse/.gitignore b/tools/testing/selftests/filesystems/fuse/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..3ee9a27fe66a41621d4ec916829d29c3a8cef019 --- /dev/null +++ b/tools/testing/selftests/filesystems/fuse/.gitignore @@ -0,0 +1,2 @@ +fuse_test +*.raw diff --git a/tools/testing/selftests/filesystems/fuse/Makefile b/tools/testing/selftests/filesystems/fuse/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..9e7420a1e825051213e156ddef022ded7962bbea --- /dev/null +++ b/tools/testing/selftests/filesystems/fuse/Makefile @@ -0,0 +1,34 @@ +# SPDX-License-Identifier: GPL-2.0 +CFLAGS += -D_FILE_OFFSET_BITS=64 -Wall -Werror -I../.. -I../../../../.. +LDLIBS := -lpthread -lelf +TEST_GEN_PROGS := fuse_test fuse_daemon +TEST_GEN_FILES := \ + test_bpf.bpf \ + fd_bpf.bpf \ + fd.sh \ + +EXTRA_CLEAN := *.bpf +BPF_FLAGS = -Wall -Werror -O2 -g -emit-llvm \ + -I ../../../../../include \ + -idirafter /usr/lib/gcc/x86_64-linux-gnu/10/include \ + -idirafter /usr/local/include \ + -idirafter /usr/include/x86_64-linux-gnu \ + -idirafter /usr/include \ + +include ../../lib.mk + +# Put after include ../../lib.mk since that changes $(TEST_GEN_PROGS) +# Otherwise you get multiple targets, this becomes the default, and it's a mess +EXTRA_SOURCES := bpf_loader.c +$(TEST_GEN_PROGS) : $(EXTRA_SOURCES) + +$(OUTPUT)/%.ir: %.c + clang $(BPF_FLAGS) -c $< -o $@ + +$(OUTPUT)/%.bpf: $(OUTPUT)/%.ir + llc -march=bpf -filetype=obj -o $@ $< + +$(OUTPUT)/fd.sh: fd.txt + cp $< $@ + chmod 755 $@ + diff --git a/tools/testing/selftests/filesystems/fuse/OWNERS b/tools/testing/selftests/filesystems/fuse/OWNERS new file mode 100644 index 0000000000000000000000000000000000000000..5eb371e1a5a315c47cd87e7b8b3d85477ee45ab9 --- /dev/null +++ b/tools/testing/selftests/filesystems/fuse/OWNERS @@ -0,0 +1,2 @@ +# include OWNERS from the authoritative android-mainline branch +include kernel/common:android-mainline:/tools/testing/selftests/filesystems/incfs/OWNERS diff --git a/tools/testing/selftests/filesystems/fuse/bpf_loader.c b/tools/testing/selftests/filesystems/fuse/bpf_loader.c new file mode 100644 index 0000000000000000000000000000000000000000..6377c18d38c1c0ada37972594495851643173c5a --- /dev/null +++ b/tools/testing/selftests/filesystems/fuse/bpf_loader.c @@ -0,0 +1,751 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2021 Google LLC + */ + +#include "test_fuse.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +#include +#include + +struct _test_options test_options; + +struct s s(const char *s1) +{ + struct s s = {0}; + + if (!s1) + return s; + + s.s = malloc(strlen(s1) + 1); + if (!s.s) + return s; + + strcpy(s.s, s1); + return s; +} + +struct s sn(const char *s1, const char *s2) +{ + struct s s = {0}; + + if (!s1) + return s; + + s.s = malloc(s2 - s1 + 1); + if (!s.s) + return s; + + strncpy(s.s, s1, s2 - s1); + s.s[s2 - s1] = 0; + return s; +} + +int s_cmp(struct s s1, struct s s2) +{ + int result = -1; + + if (!s1.s || !s2.s) + goto out; + result = strcmp(s1.s, s2.s); +out: + free(s1.s); + free(s2.s); + return result; +} + +struct s s_cat(struct s s1, struct s s2) +{ + struct s s = {0}; + + if (!s1.s || !s2.s) + goto out; + + s.s = malloc(strlen(s1.s) + strlen(s2.s) + 1); + if (!s.s) + goto out; + + strcpy(s.s, s1.s); + strcat(s.s, s2.s); +out: + free(s1.s); + free(s2.s); + return s; +} + +struct s s_splitleft(struct s s1, char c) +{ + struct s s = {0}; + char *split; + + if (!s1.s) + return s; + + split = strchr(s1.s, c); + if (split) + s = sn(s1.s, split); + + free(s1.s); + return s; +} + +struct s s_splitright(struct s s1, char c) +{ + struct s s2 = {0}; + char *split; + + if (!s1.s) + return s2; + + split = strchr(s1.s, c); + if (split) + s2 = s(split + 1); + + free(s1.s); + return s2; +} + +struct s s_word(struct s s1, char c, size_t n) +{ + while (n--) + s1 = s_splitright(s1, c); + return s_splitleft(s1, c); +} + +struct s s_path(struct s s1, struct s s2) +{ + return s_cat(s_cat(s1, s("/")), s2); +} + +struct s s_pathn(size_t n, struct s s1, ...) +{ + va_list argp; + + va_start(argp, s1); + while (--n) + s1 = s_path(s1, va_arg(argp, struct s)); + va_end(argp); + return s1; +} + +int s_link(struct s src_pathname, struct s dst_pathname) +{ + int res; + + if (src_pathname.s && dst_pathname.s) { + res = link(src_pathname.s, dst_pathname.s); + } else { + res = -1; + errno = ENOMEM; + } + + free(src_pathname.s); + free(dst_pathname.s); + return res; +} + +int s_symlink(struct s src_pathname, struct s dst_pathname) +{ + int res; + + if (src_pathname.s && dst_pathname.s) { + res = symlink(src_pathname.s, dst_pathname.s); + } else { + res = -1; + errno = ENOMEM; + } + + free(src_pathname.s); + free(dst_pathname.s); + return res; +} + + +int s_mkdir(struct s pathname, mode_t mode) +{ + int res; + + if (!pathname.s) { + errno = ENOMEM; + return -1; + } + + res = mkdir(pathname.s, mode); + free(pathname.s); + return res; +} + +int s_rmdir(struct s pathname) +{ + int res; + + if (!pathname.s) { + errno = ENOMEM; + return -1; + } + + res = rmdir(pathname.s); + free(pathname.s); + return res; +} + +int s_unlink(struct s pathname) +{ + int res; + + if (!pathname.s) { + errno = ENOMEM; + return -1; + } + + res = unlink(pathname.s); + free(pathname.s); + return res; +} + +int s_open(struct s pathname, int flags, ...) +{ + va_list ap; + int res; + + va_start(ap, flags); + if (!pathname.s) { + errno = ENOMEM; + return -1; + } + + if (flags & (O_CREAT | O_TMPFILE)) + res = open(pathname.s, flags, va_arg(ap, mode_t)); + else + res = open(pathname.s, flags); + + free(pathname.s); + va_end(ap); + return res; +} + +int s_openat(int dirfd, struct s pathname, int flags, ...) +{ + va_list ap; + int res; + + va_start(ap, flags); + if (!pathname.s) { + errno = ENOMEM; + return -1; + } + + if (flags & (O_CREAT | O_TMPFILE)) + res = openat(dirfd, pathname.s, flags, va_arg(ap, mode_t)); + else + res = openat(dirfd, pathname.s, flags); + + free(pathname.s); + va_end(ap); + return res; +} + +int s_creat(struct s pathname, mode_t mode) +{ + int res; + + if (!pathname.s) { + errno = ENOMEM; + return -1; + } + + res = open(pathname.s, O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, mode); + free(pathname.s); + return res; +} + +int s_mkfifo(struct s pathname, mode_t mode) +{ + int res; + + if (!pathname.s) { + errno = ENOMEM; + return -1; + } + + res = mknod(pathname.s, S_IFIFO | mode, 0); + free(pathname.s); + return res; +} + +int s_stat(struct s pathname, struct stat *st) +{ + int res; + + if (!pathname.s) { + errno = ENOMEM; + return -1; + } + + res = stat(pathname.s, st); + free(pathname.s); + return res; +} + +int s_statfs(struct s pathname, struct statfs *st) +{ + int res; + + if (!pathname.s) { + errno = ENOMEM; + return -1; + } + + res = statfs(pathname.s, st); + free(pathname.s); + return res; +} + +DIR *s_opendir(struct s pathname) +{ + DIR *res; + + res = opendir(pathname.s); + free(pathname.s); + return res; +} + +int s_getxattr(struct s pathname, const char name[], void *value, size_t size, + ssize_t *ret_size) +{ + if (!pathname.s) { + errno = ENOMEM; + return -1; + } + + *ret_size = getxattr(pathname.s, name, value, size); + free(pathname.s); + return *ret_size >= 0 ? 0 : -1; +} + +int s_listxattr(struct s pathname, void *list, size_t size, ssize_t *ret_size) +{ + if (!pathname.s) { + errno = ENOMEM; + return -1; + } + + *ret_size = listxattr(pathname.s, list, size); + free(pathname.s); + return *ret_size >= 0 ? 0 : -1; +} + +int s_setxattr(struct s pathname, const char name[], const void *value, size_t size, int flags) +{ + int res; + + if (!pathname.s) { + errno = ENOMEM; + return -1; + } + + res = setxattr(pathname.s, name, value, size, flags); + free(pathname.s); + return res; +} + +int s_removexattr(struct s pathname, const char name[]) +{ + int res; + + if (!pathname.s) { + errno = ENOMEM; + return -1; + } + + res = removexattr(pathname.s, name); + free(pathname.s); + return res; +} + +int s_rename(struct s oldpathname, struct s newpathname) +{ + int res; + + if (!oldpathname.s || !newpathname.s) { + errno = ENOMEM; + return -1; + } + + res = rename(oldpathname.s, newpathname.s); + free(oldpathname.s); + free(newpathname.s); + return res; +} + +struct s tracing_folder(void) +{ + struct s trace = {0}; + FILE *mounts = NULL; + char *line = NULL; + size_t size = 0; + + TEST(mounts = fopen("/proc/mounts", "re"), mounts); + while (getline(&line, &size, mounts) != -1) { + if (!s_cmp(s_word(sn(line, line + size), ' ', 2), + s("tracefs"))) { + trace = s_word(sn(line, line + size), ' ', 1); + break; + } + + if (!s_cmp(s_word(sn(line, line + size), ' ', 2), s("debugfs"))) + trace = s_path(s_word(sn(line, line + size), ' ', 1), + s("tracing")); + } + +out: + free(line); + fclose(mounts); + return trace; +} + +int tracing_on(void) +{ + int result = TEST_FAILURE; + int tracing_on = -1; + + TEST(tracing_on = s_open(s_path(tracing_folder(), s("tracing_on")), + O_WRONLY | O_CLOEXEC), + tracing_on != -1); + TESTEQUAL(write(tracing_on, "1", 1), 1); + result = TEST_SUCCESS; +out: + close(tracing_on); + return result; +} + +char *concat_file_name(const char *dir, const char *file) +{ + char full_name[FILENAME_MAX] = ""; + + if (snprintf(full_name, ARRAY_SIZE(full_name), "%s/%s", dir, file) < 0) + return NULL; + return strdup(full_name); +} + +char *setup_mount_dir(const char *name) +{ + struct stat st; + char *current_dir = getcwd(NULL, 0); + char *mount_dir = concat_file_name(current_dir, name); + + free(current_dir); + if (stat(mount_dir, &st) == 0) { + if (S_ISDIR(st.st_mode)) + return mount_dir; + + ksft_print_msg("%s is a file, not a dir.\n", mount_dir); + return NULL; + } + + if (mkdir(mount_dir, 0777)) { + ksft_print_msg("Can't create mount dir."); + return NULL; + } + + return mount_dir; +} + +int delete_dir_tree(const char *dir_path, bool remove_root) +{ + DIR *dir = NULL; + struct dirent *dp; + int result = 0; + + dir = opendir(dir_path); + if (!dir) { + result = -errno; + goto out; + } + + while ((dp = readdir(dir))) { + char *full_path; + + if (!strcmp(dp->d_name, ".") || !strcmp(dp->d_name, "..")) + continue; + + full_path = concat_file_name(dir_path, dp->d_name); + if (dp->d_type == DT_DIR) + result = delete_dir_tree(full_path, true); + else + result = unlink(full_path); + free(full_path); + if (result) + goto out; + } + +out: + if (dir) + closedir(dir); + if (!result && remove_root) + rmdir(dir_path); + return result; +} + +static int mount_fuse_maybe_init(const char *mount_dir, int bpf_fd, int dir_fd, + int *fuse_dev_ptr, bool init) +{ + int result = TEST_FAILURE; + int fuse_dev = -1; + char options[FILENAME_MAX]; + uint8_t bytes_in[FUSE_MIN_READ_BUFFER]; + uint8_t bytes_out[FUSE_MIN_READ_BUFFER]; + + DECL_FUSE_IN(init); + + TEST(fuse_dev = open("/dev/fuse", O_RDWR | O_CLOEXEC), fuse_dev != -1); + snprintf(options, FILENAME_MAX, "fd=%d,user_id=0,group_id=0,rootmode=0040000", + fuse_dev); + if (bpf_fd != -1) + snprintf(options + strlen(options), + sizeof(options) - strlen(options), + ",root_bpf=%d", bpf_fd); + if (dir_fd != -1) + snprintf(options + strlen(options), + sizeof(options) - strlen(options), + ",root_dir=%d", dir_fd); + TESTSYSCALL(mount("ABC", mount_dir, "fuse", 0, options)); + + if (init) { + TESTFUSEIN(FUSE_INIT, init_in); + TESTEQUAL(init_in->major, FUSE_KERNEL_VERSION); + TESTEQUAL(init_in->minor, FUSE_KERNEL_MINOR_VERSION); + TESTFUSEOUT1(fuse_init_out, ((struct fuse_init_out) { + .major = FUSE_KERNEL_VERSION, + .minor = FUSE_KERNEL_MINOR_VERSION, + .max_readahead = 4096, + .flags = 0, + .max_background = 0, + .congestion_threshold = 0, + .max_write = 4096, + .time_gran = 1000, + .max_pages = 12, + .map_alignment = 4096, + })); + } + + *fuse_dev_ptr = fuse_dev; + fuse_dev = -1; + result = TEST_SUCCESS; +out: + close(fuse_dev); + return result; +} + +int mount_fuse(const char *mount_dir, int bpf_fd, int dir_fd, int *fuse_dev_ptr) +{ + return mount_fuse_maybe_init(mount_dir, bpf_fd, dir_fd, fuse_dev_ptr, + true); +} + +int mount_fuse_no_init(const char *mount_dir, int bpf_fd, int dir_fd, + int *fuse_dev_ptr) +{ + return mount_fuse_maybe_init(mount_dir, bpf_fd, dir_fd, fuse_dev_ptr, + false); +} + +struct fuse_bpf_map { + unsigned int map_type; + size_t key_size; + size_t value_size; + unsigned int max_entries; +}; + +static int install_maps(Elf_Data *maps, int maps_index, Elf *elf, + Elf_Data *symbols, int symbol_index, + struct map_relocation **mr, size_t *map_count) +{ + int result = TEST_FAILURE; + int i; + GElf_Sym symbol; + + TESTNE((void *)symbols, NULL); + + for (i = 0; i < symbols->d_size / sizeof(symbol); ++i) { + TESTNE((void *)gelf_getsym(symbols, i, &symbol), 0); + if (symbol.st_shndx == maps_index) { + struct fuse_bpf_map *map; + union bpf_attr attr; + int map_fd; + + map = (struct fuse_bpf_map *) + ((char *)maps->d_buf + symbol.st_value); + + attr = (union bpf_attr) { + .map_type = map->map_type, + .key_size = map->key_size, + .value_size = map->value_size, + .max_entries = map->max_entries, + }; + + TEST(*mr = realloc(*mr, ++*map_count * + sizeof(struct fuse_bpf_map)), + *mr); + TEST(map_fd = syscall(__NR_bpf, BPF_MAP_CREATE, + &attr, sizeof(attr)), + map_fd != -1); + (*mr)[*map_count - 1] = (struct map_relocation) { + .name = strdup(elf_strptr(elf, symbol_index, + symbol.st_name)), + .fd = map_fd, + .value = symbol.st_value, + }; + } + } + + result = TEST_SUCCESS; +out: + return result; +} + +static inline int relocate_maps(GElf_Shdr *rel_header, Elf_Data *rel_data, + Elf_Data *prog_data, Elf_Data *symbol_data, + struct map_relocation *map_relocations, + size_t map_count) +{ + int result = TEST_FAILURE; + int i; + struct bpf_insn *insns = (struct bpf_insn *) prog_data->d_buf; + + for (i = 0; i < rel_header->sh_size / rel_header->sh_entsize; ++i) { + GElf_Sym sym; + GElf_Rel rel; + unsigned int insn_idx; + int map_idx; + + gelf_getrel(rel_data, i, &rel); + insn_idx = rel.r_offset / sizeof(struct bpf_insn); + insns[insn_idx].src_reg = BPF_PSEUDO_MAP_FD; + + gelf_getsym(symbol_data, GELF_R_SYM(rel.r_info), &sym); + for (map_idx = 0; map_idx < map_count; map_idx++) { + if (map_relocations[map_idx].value == sym.st_value) { + insns[insn_idx].imm = + map_relocations[map_idx].fd; + break; + } + } + TESTNE(map_idx, map_count); + } + + result = TEST_SUCCESS; +out: + return result; +} + +int install_elf_bpf(const char *file, const char *section, int *fd, + struct map_relocation **map_relocations, size_t *map_count) +{ + int result = TEST_FAILURE; + char path[PATH_MAX] = {}; + char *last_slash; + int filter_fd = -1; + union bpf_attr bpf_attr; + static char log[1 << 20]; + Elf *elf = NULL; + GElf_Ehdr ehdr; + Elf_Data *data_prog = NULL, *data_maps = NULL, *data_symbols = NULL; + int maps_index, symbol_index, prog_index; + int i; + + TESTNE(readlink("/proc/self/exe", path, PATH_MAX), -1); + TEST(last_slash = strrchr(path, '/'), last_slash); + strcpy(last_slash + 1, file); + TEST(filter_fd = open(path, O_RDONLY | O_CLOEXEC), filter_fd != -1); + TESTNE(elf_version(EV_CURRENT), EV_NONE); + TEST(elf = elf_begin(filter_fd, ELF_C_READ, NULL), elf); + TESTEQUAL((void *) gelf_getehdr(elf, &ehdr), &ehdr); + for (i = 1; i < ehdr.e_shnum; i++) { + char *shname; + GElf_Shdr shdr; + Elf_Scn *scn; + + TEST(scn = elf_getscn(elf, i), scn); + TESTEQUAL((void *)gelf_getshdr(scn, &shdr), &shdr); + TEST(shname = elf_strptr(elf, ehdr.e_shstrndx, shdr.sh_name), + shname); + + if (!strcmp(shname, "maps")) { + TEST(data_maps = elf_getdata(scn, 0), data_maps); + maps_index = i; + } else if (shdr.sh_type == SHT_SYMTAB) { + TEST(data_symbols = elf_getdata(scn, 0), data_symbols); + symbol_index = shdr.sh_link; + } else if (!strcmp(shname, section)) { + TEST(data_prog = elf_getdata(scn, 0), data_prog); + prog_index = i; + } + } + TESTNE((void *) data_prog, NULL); + + if (data_maps) + TESTEQUAL(install_maps(data_maps, maps_index, elf, + data_symbols, symbol_index, + map_relocations, map_count), 0); + + /* Now relocate maps */ + for (i = 1; i < ehdr.e_shnum; i++) { + GElf_Shdr rel_header; + Elf_Scn *scn; + Elf_Data *rel_data; + + TEST(scn = elf_getscn(elf, i), scn); + TESTEQUAL((void *)gelf_getshdr(scn, &rel_header), + &rel_header); + if (rel_header.sh_type != SHT_REL) + continue; + TEST(rel_data = elf_getdata(scn, 0), rel_data); + + if (rel_header.sh_info != prog_index) + continue; + TESTEQUAL(relocate_maps(&rel_header, rel_data, + data_prog, data_symbols, + *map_relocations, *map_count), + 0); + } + + bpf_attr = (union bpf_attr) { + .prog_type = BPF_PROG_TYPE_FUSE, + .insn_cnt = data_prog->d_size / 8, + .insns = ptr_to_u64(data_prog->d_buf), + .license = ptr_to_u64("GPL"), + .log_buf = test_options.verbose ? ptr_to_u64(log) : 0, + .log_size = test_options.verbose ? sizeof(log) : 0, + .log_level = test_options.verbose ? 2 : 0, + }; + *fd = syscall(__NR_bpf, BPF_PROG_LOAD, &bpf_attr, sizeof(bpf_attr)); + if (test_options.verbose) + ksft_print_msg("%s\n", log); + if (*fd == -1 && errno == ENOSPC) + ksft_print_msg("bpf log size too small!\n"); + TESTNE(*fd, -1); + + result = TEST_SUCCESS; +out: + close(filter_fd); + return result; +} + + diff --git a/tools/testing/selftests/filesystems/fuse/fd.txt b/tools/testing/selftests/filesystems/fuse/fd.txt new file mode 100644 index 0000000000000000000000000000000000000000..15ce77180d556cbdd43ab6d1dec588ffe38c78f8 --- /dev/null +++ b/tools/testing/selftests/filesystems/fuse/fd.txt @@ -0,0 +1,21 @@ +fuse_daemon $* +cd fd-dst +ls +cd show +ls +fsstress -s 123 -d . -p 4 -n 100 -l5 +echo test > wibble +ls +cat wibble +fallocate -l 1000 wobble +mkdir testdir +mkdir tmpdir +rmdir tmpdir +touch tmp +mv tmp tmp2 +rm tmp2 + +# FUSE_LINK +echo "ln_src contents" > ln_src +ln ln_src ln_link +cat ln_link diff --git a/tools/testing/selftests/filesystems/fuse/fd_bpf.c b/tools/testing/selftests/filesystems/fuse/fd_bpf.c new file mode 100644 index 0000000000000000000000000000000000000000..ebf5c92c92972f22779c71faf720ef040a1406fa --- /dev/null +++ b/tools/testing/selftests/filesystems/fuse/fd_bpf.c @@ -0,0 +1,310 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +// Copyright (c) 2021 Google LLC + +#define __EXPORTED_HEADERS__ +#define __KERNEL__ + +#ifdef __ANDROID__ +#include +#endif + +#include +#include +#include +#include + +struct fuse_bpf_map { + int map_type; + size_t key_size; + size_t value_size; + int max_entries; +}; + +static void *(*bpf_map_lookup_elem)(struct fuse_bpf_map *map, void *key) + = (void *) 1; + +static void *(*bpf_map_update_elem)(struct fuse_bpf_map *map, void *key, + void *value, int flags) + = (void *) 2; + +static long (*bpf_trace_printk)(const char *fmt, __u32 fmt_size, ...) + = (void *) 6; + +static long (*bpf_get_current_pid_tgid)() + = (void *) 14; + +static long (*bpf_get_current_uid_gid)() + = (void *) 15; + +#define bpf_printk(fmt, ...) \ + ({ \ + char ____fmt[] = fmt; \ + bpf_trace_printk(____fmt, sizeof(____fmt), \ + ##__VA_ARGS__); \ + }) + +#define SEC(NAME) __attribute__((section(NAME), used)) + +SEC("dummy") + +inline int strcmp(const char *a, const char *b) +{ + int i; + + for (i = 0; i < __builtin_strlen(b) + 1; ++i) + if (a[i] != b[i]) + return -1; + + return 0; +} + +SEC("maps") struct fuse_bpf_map test_map = { + BPF_MAP_TYPE_ARRAY, + sizeof(uint32_t), + sizeof(uint32_t), + 1000, +}; + +SEC("maps") struct fuse_bpf_map test_map2 = { + BPF_MAP_TYPE_HASH, + sizeof(uint32_t), + sizeof(uint64_t), + 76, +}; + +SEC("test_daemon") + +int trace_daemon(struct fuse_bpf_args *fa) +{ + uint64_t uid_gid = bpf_get_current_uid_gid(); + uint32_t uid = uid_gid & 0xffffffff; + uint64_t pid_tgid = bpf_get_current_pid_tgid(); + uint32_t pid = pid_tgid & 0xffffffff; + uint32_t key = 23; + uint32_t *pvalue; + + + pvalue = bpf_map_lookup_elem(&test_map, &key); + if (pvalue) { + uint32_t value = *pvalue; + + bpf_printk("pid %u uid %u value %u", pid, uid, value); + value++; + bpf_map_update_elem(&test_map, &key, &value, BPF_ANY); + } + + switch (fa->opcode) { + case FUSE_ACCESS | FUSE_PREFILTER: { + bpf_printk("Access: %d", fa->nodeid); + return FUSE_BPF_BACKING; + } + + case FUSE_GETATTR | FUSE_PREFILTER: { + const struct fuse_getattr_in *fgi = fa->in_args[0].value; + + bpf_printk("Get Attr %d", fgi->fh); + return FUSE_BPF_BACKING; + } + + case FUSE_SETATTR | FUSE_PREFILTER: { + const struct fuse_setattr_in *fsi = fa->in_args[0].value; + + bpf_printk("Set Attr %d", fsi->fh); + return FUSE_BPF_BACKING; + } + + case FUSE_OPENDIR | FUSE_PREFILTER: { + bpf_printk("Open Dir: %d", fa->nodeid); + return FUSE_BPF_BACKING; + } + + case FUSE_READDIR | FUSE_PREFILTER: { + const struct fuse_read_in *fri = fa->in_args[0].value; + + bpf_printk("Read Dir: fh: %lu", fri->fh, fri->offset); + return FUSE_BPF_BACKING; + } + + case FUSE_LOOKUP | FUSE_PREFILTER: { + const char *name = fa->in_args[0].value; + + bpf_printk("Lookup: %lx %s", fa->nodeid, name); + if (fa->nodeid == 1) + return FUSE_BPF_USER_FILTER | FUSE_BPF_BACKING; + else + return FUSE_BPF_BACKING; + } + + case FUSE_MKNOD | FUSE_PREFILTER: { + const struct fuse_mknod_in *fmi = fa->in_args[0].value; + const char *name = fa->in_args[1].value; + + bpf_printk("mknod %s %x %x", name, fmi->rdev | fmi->mode, fmi->umask); + return FUSE_BPF_BACKING; + } + + case FUSE_MKDIR | FUSE_PREFILTER: { + const struct fuse_mkdir_in *fmi = fa->in_args[0].value; + const char *name = fa->in_args[1].value; + + bpf_printk("mkdir: %s %x %x", name, fmi->mode, fmi->umask); + return FUSE_BPF_BACKING; + } + + case FUSE_RMDIR | FUSE_PREFILTER: { + const char *name = fa->in_args[0].value; + + bpf_printk("rmdir: %s", name); + return FUSE_BPF_BACKING; + } + + case FUSE_RENAME | FUSE_PREFILTER: { + const char *oldname = fa->in_args[1].value; + const char *newname = fa->in_args[2].value; + + bpf_printk("rename from %s", oldname); + bpf_printk("rename to %s", newname); + return FUSE_BPF_BACKING; + } + + case FUSE_RENAME2 | FUSE_PREFILTER: { + const struct fuse_rename2_in *fri = fa->in_args[0].value; + uint32_t flags = fri->flags; + const char *oldname = fa->in_args[1].value; + const char *newname = fa->in_args[2].value; + + bpf_printk("rename(%x) from %s", flags, oldname); + bpf_printk("rename to %s", newname); + return FUSE_BPF_BACKING; + } + + case FUSE_UNLINK | FUSE_PREFILTER: { + const char *name = fa->in_args[0].value; + + bpf_printk("unlink: %s", name); + return FUSE_BPF_BACKING; + } + + case FUSE_LINK | FUSE_PREFILTER: { + const struct fuse_link_in *fli = fa->in_args[0].value; + const char *dst_name = fa->in_args[1].value; + + bpf_printk("Link: %d %s", fli->oldnodeid, dst_name); + return FUSE_BPF_BACKING; + } + + case FUSE_SYMLINK | FUSE_PREFILTER: { + const char *link_name = fa->in_args[0].value; + const char *link_dest = fa->in_args[1].value; + + bpf_printk("symlink from %s", link_name); + bpf_printk("symlink to %s", link_dest); + return FUSE_BPF_BACKING; + } + + case FUSE_READLINK | FUSE_PREFILTER: { + const char *link_name = fa->in_args[0].value; + + bpf_printk("readlink from %s", link_name); + return FUSE_BPF_BACKING; + } + + case FUSE_RELEASE | FUSE_PREFILTER: { + const struct fuse_release_in *fri = fa->in_args[0].value; + + bpf_printk("Release: %d", fri->fh); + return FUSE_BPF_BACKING; + } + + case FUSE_RELEASEDIR | FUSE_PREFILTER: { + const struct fuse_release_in *fri = fa->in_args[0].value; + + bpf_printk("Release Dir: %d", fri->fh); + return FUSE_BPF_BACKING; + } + + case FUSE_CREATE | FUSE_PREFILTER: { + bpf_printk("Create %s", fa->in_args[1].value); + return FUSE_BPF_BACKING; + } + + case FUSE_OPEN | FUSE_PREFILTER: { + bpf_printk("Open: %d", fa->nodeid); + return FUSE_BPF_BACKING; + } + + case FUSE_READ | FUSE_PREFILTER: { + const struct fuse_read_in *fri = fa->in_args[0].value; + + bpf_printk("Read: fh: %lu, offset %lu, size %lu", + fri->fh, fri->offset, fri->size); + return FUSE_BPF_BACKING; + } + + case FUSE_WRITE | FUSE_PREFILTER: { + const struct fuse_write_in *fwi = fa->in_args[0].value; + + bpf_printk("Write: fh: %lu, offset %lu, size %lu", + fwi->fh, fwi->offset, fwi->size); + return FUSE_BPF_BACKING; + } + + case FUSE_FLUSH | FUSE_PREFILTER: { + const struct fuse_flush_in *ffi = fa->in_args[0].value; + + bpf_printk("Flush %d", ffi->fh); + return FUSE_BPF_BACKING; + } + + case FUSE_FALLOCATE | FUSE_PREFILTER: { + const struct fuse_fallocate_in *ffa = fa->in_args[0].value; + + bpf_printk("Fallocate %d %lu", ffa->fh, ffa->length); + return FUSE_BPF_BACKING; + } + + case FUSE_GETXATTR | FUSE_PREFILTER: { + const char *name = fa->in_args[1].value; + + bpf_printk("Getxattr %d %s", fa->nodeid, name); + return FUSE_BPF_BACKING; + } + + case FUSE_LISTXATTR | FUSE_PREFILTER: { + const char *name = fa->in_args[1].value; + + bpf_printk("Listxattr %d %s", fa->nodeid, name); + return FUSE_BPF_BACKING; + } + + case FUSE_SETXATTR | FUSE_PREFILTER: { + const char *name = fa->in_args[1].value; + + bpf_printk("Setxattr %d %s", fa->nodeid, name); + return FUSE_BPF_BACKING; + } + + case FUSE_STATFS | FUSE_PREFILTER: { + bpf_printk("statfs %d", fa->nodeid); + return FUSE_BPF_BACKING; + } + + case FUSE_LSEEK | FUSE_PREFILTER: { + const struct fuse_lseek_in *fli = fa->in_args[0].value; + + bpf_printk("lseek type:%d, offset:%lld", fli->whence, fli->offset); + return FUSE_BPF_BACKING; + } + + default: + if (fa->opcode & FUSE_PREFILTER) + bpf_printk("prefilter *** UNKNOWN *** opcode: %d", + fa->opcode & FUSE_OPCODE_FILTER); + else if (fa->opcode & FUSE_POSTFILTER) + bpf_printk("postfilter *** UNKNOWN *** opcode: %d", + fa->opcode & FUSE_OPCODE_FILTER); + else + bpf_printk("*** UNKNOWN *** opcode: %d", fa->opcode); + return FUSE_BPF_BACKING; + } +} diff --git a/tools/testing/selftests/filesystems/fuse/fuse_daemon.c b/tools/testing/selftests/filesystems/fuse/fuse_daemon.c new file mode 100644 index 0000000000000000000000000000000000000000..6213606c28c3d50bfec1a86a802d827213d446b9 --- /dev/null +++ b/tools/testing/selftests/filesystems/fuse/fuse_daemon.c @@ -0,0 +1,294 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2021 Google LLC + */ + +#include "test_fuse.h" + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +#include +#include + +bool user_messages; +bool kernel_messages; + +static int display_trace(void) +{ + int pid = -1; + int tp = -1; + char c; + ssize_t bytes_read; + static char line[256] = {0}; + + if (!kernel_messages) + return TEST_SUCCESS; + + TEST(pid = fork(), pid != -1); + if (pid != 0) + return pid; + + TESTEQUAL(tracing_on(), 0); + TEST(tp = s_open(s_path(tracing_folder(), s("trace_pipe")), + O_RDONLY | O_CLOEXEC), tp != -1); + for (;;) { + TEST(bytes_read = read(tp, &c, sizeof(c)), + bytes_read == 1); + if (c == '\n') { + printf("%s\n", line); + line[0] = 0; + } else + sprintf(line + strlen(line), "%c", c); + } +out: + if (pid == 0) { + close(tp); + exit(TEST_FAILURE); + } + return pid; +} + +static const char *fuse_opcode_to_string(int opcode) +{ + switch (opcode & FUSE_OPCODE_FILTER) { + case FUSE_LOOKUP: + return "FUSE_LOOKUP"; + case FUSE_FORGET: + return "FUSE_FORGET"; + case FUSE_GETATTR: + return "FUSE_GETATTR"; + case FUSE_SETATTR: + return "FUSE_SETATTR"; + case FUSE_READLINK: + return "FUSE_READLINK"; + case FUSE_SYMLINK: + return "FUSE_SYMLINK"; + case FUSE_MKNOD: + return "FUSE_MKNOD"; + case FUSE_MKDIR: + return "FUSE_MKDIR"; + case FUSE_UNLINK: + return "FUSE_UNLINK"; + case FUSE_RMDIR: + return "FUSE_RMDIR"; + case FUSE_RENAME: + return "FUSE_RENAME"; + case FUSE_LINK: + return "FUSE_LINK"; + case FUSE_OPEN: + return "FUSE_OPEN"; + case FUSE_READ: + return "FUSE_READ"; + case FUSE_WRITE: + return "FUSE_WRITE"; + case FUSE_STATFS: + return "FUSE_STATFS"; + case FUSE_RELEASE: + return "FUSE_RELEASE"; + case FUSE_FSYNC: + return "FUSE_FSYNC"; + case FUSE_SETXATTR: + return "FUSE_SETXATTR"; + case FUSE_GETXATTR: + return "FUSE_GETXATTR"; + case FUSE_LISTXATTR: + return "FUSE_LISTXATTR"; + case FUSE_REMOVEXATTR: + return "FUSE_REMOVEXATTR"; + case FUSE_FLUSH: + return "FUSE_FLUSH"; + case FUSE_INIT: + return "FUSE_INIT"; + case FUSE_OPENDIR: + return "FUSE_OPENDIR"; + case FUSE_READDIR: + return "FUSE_READDIR"; + case FUSE_RELEASEDIR: + return "FUSE_RELEASEDIR"; + case FUSE_FSYNCDIR: + return "FUSE_FSYNCDIR"; + case FUSE_GETLK: + return "FUSE_GETLK"; + case FUSE_SETLK: + return "FUSE_SETLK"; + case FUSE_SETLKW: + return "FUSE_SETLKW"; + case FUSE_ACCESS: + return "FUSE_ACCESS"; + case FUSE_CREATE: + return "FUSE_CREATE"; + case FUSE_INTERRUPT: + return "FUSE_INTERRUPT"; + case FUSE_BMAP: + return "FUSE_BMAP"; + case FUSE_DESTROY: + return "FUSE_DESTROY"; + case FUSE_IOCTL: + return "FUSE_IOCTL"; + case FUSE_POLL: + return "FUSE_POLL"; + case FUSE_NOTIFY_REPLY: + return "FUSE_NOTIFY_REPLY"; + case FUSE_BATCH_FORGET: + return "FUSE_BATCH_FORGET"; + case FUSE_FALLOCATE: + return "FUSE_FALLOCATE"; + case FUSE_READDIRPLUS: + return "FUSE_READDIRPLUS"; + case FUSE_RENAME2: + return "FUSE_RENAME2"; + case FUSE_LSEEK: + return "FUSE_LSEEK"; + case FUSE_COPY_FILE_RANGE: + return "FUSE_COPY_FILE_RANGE"; + case FUSE_SETUPMAPPING: + return "FUSE_SETUPMAPPING"; + case FUSE_REMOVEMAPPING: + return "FUSE_REMOVEMAPPING"; + //case FUSE_SYNCFS: + // return "FUSE_SYNCFS"; + case CUSE_INIT: + return "CUSE_INIT"; + case CUSE_INIT_BSWAP_RESERVED: + return "CUSE_INIT_BSWAP_RESERVED"; + case FUSE_INIT_BSWAP_RESERVED: + return "FUSE_INIT_BSWAP_RESERVED"; + } + return "?"; +} + +static int parse_options(int argc, char *const *argv) +{ + signed char c; + + while ((c = getopt(argc, argv, "kuv")) != -1) + switch (c) { + case 'v': + test_options.verbose = true; + break; + + case 'u': + user_messages = true; + break; + + case 'k': + kernel_messages = true; + break; + + default: + return -EINVAL; + } + + return 0; +} + +int main(int argc, char *argv[]) +{ + int result = TEST_FAILURE; + int trace_pid = -1; + char *mount_dir = NULL; + char *src_dir = NULL; + int bpf_fd = -1; + int src_fd = -1; + int fuse_dev = -1; + struct map_relocation *map_relocations = NULL; + size_t map_count = 0; + int i; + + if (geteuid() != 0) + ksft_print_msg("Not a root, might fail to mount.\n"); + TESTEQUAL(parse_options(argc, argv), 0); + + TEST(trace_pid = display_trace(), trace_pid != -1); + + delete_dir_tree("fd-src", true); + TEST(src_dir = setup_mount_dir("fd-src"), src_dir); + delete_dir_tree("fd-dst", true); + TEST(mount_dir = setup_mount_dir("fd-dst"), mount_dir); + + TESTEQUAL(install_elf_bpf("fd_bpf.bpf", "test_daemon", &bpf_fd, + &map_relocations, &map_count), 0); + + TEST(src_fd = open("fd-src", O_DIRECTORY | O_RDONLY | O_CLOEXEC), + src_fd != -1); + TESTSYSCALL(mkdirat(src_fd, "show", 0777)); + TESTSYSCALL(mkdirat(src_fd, "hide", 0777)); + + for (i = 0; i < map_count; ++i) + if (!strcmp(map_relocations[i].name, "test_map")) { + uint32_t key = 23; + uint32_t value = 1234; + union bpf_attr attr = { + .map_fd = map_relocations[i].fd, + .key = ptr_to_u64(&key), + .value = ptr_to_u64(&value), + .flags = BPF_ANY, + }; + TESTSYSCALL(syscall(__NR_bpf, BPF_MAP_UPDATE_ELEM, + &attr, sizeof(attr))); + } + + TESTEQUAL(mount_fuse(mount_dir, bpf_fd, src_fd, &fuse_dev), 0); + + if (fork()) + return 0; + + for (;;) { + uint8_t bytes_in[FUSE_MIN_READ_BUFFER]; + uint8_t bytes_out[FUSE_MIN_READ_BUFFER] __attribute__((unused)); + struct fuse_in_header *in_header = + (struct fuse_in_header *)bytes_in; + ssize_t res = read(fuse_dev, bytes_in, sizeof(bytes_in)); + + if (res == -1) + break; + + switch (in_header->opcode) { + case FUSE_LOOKUP | FUSE_PREFILTER: { + char *name = (char *)(bytes_in + sizeof(*in_header)); + + if (user_messages) + printf("Lookup %s\n", name); + if (!strcmp(name, "hide")) + TESTFUSEOUTERROR(-ENOENT); + else + TESTFUSEOUTREAD(name, strlen(name) + 1); + break; + } + default: + if (user_messages) { + printf("opcode is %d (%s)\n", in_header->opcode, + fuse_opcode_to_string( + in_header->opcode)); + } + break; + } + } + + result = TEST_SUCCESS; + +out: + for (i = 0; i < map_count; ++i) { + free(map_relocations[i].name); + close(map_relocations[i].fd); + } + free(map_relocations); + umount2(mount_dir, MNT_FORCE); + delete_dir_tree(mount_dir, true); + free(mount_dir); + delete_dir_tree(src_dir, true); + free(src_dir); + if (trace_pid != -1) + kill(trace_pid, SIGKILL); + return result; +} diff --git a/tools/testing/selftests/filesystems/fuse/fuse_test.c b/tools/testing/selftests/filesystems/fuse/fuse_test.c new file mode 100644 index 0000000000000000000000000000000000000000..7adc6bdb2241645f316ffeb6d8b1cd0af14e23cf --- /dev/null +++ b/tools/testing/selftests/filesystems/fuse/fuse_test.c @@ -0,0 +1,1624 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2021 Google LLC + */ +#define _GNU_SOURCE + +#include "test_fuse.h" + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include + +#include +#include + +static const char *ft_src = "ft-src"; +static const char *ft_dst = "ft-dst"; + +static void fill_buffer(uint8_t *data, size_t len, int file, int block) +{ + int i; + int seed = 7919 * file + block; + + for (i = 0; i < len; i++) { + seed = 1103515245 * seed + 12345; + data[i] = (uint8_t)(seed >> (i % 13)); + } +} + +static bool test_buffer(uint8_t *data, size_t len, int file, int block) +{ + int i; + int seed = 7919 * file + block; + + for (i = 0; i < len; i++) { + seed = 1103515245 * seed + 12345; + if (data[i] != (uint8_t)(seed >> (i % 13))) + return false; + } + + return true; +} + +static int create_file(int dir, struct s name, int index, size_t blocks) +{ + int result = TEST_FAILURE; + int fd = -1; + int i; + uint8_t data[PAGE_SIZE]; + + TEST(fd = s_openat(dir, name, O_CREAT | O_WRONLY, 0777), fd != -1); + for (i = 0; i < blocks; ++i) { + fill_buffer(data, PAGE_SIZE, index, i); + TESTEQUAL(write(fd, data, sizeof(data)), PAGE_SIZE); + } + TESTSYSCALL(close(fd)); + result = TEST_SUCCESS; + +out: + close(fd); + return result; +} + +static int bpf_clear_trace(void) +{ + int result = TEST_FAILURE; + int tp = -1; + + TEST(tp = s_open(s_path(tracing_folder(), s("trace")), + O_WRONLY | O_TRUNC | O_CLOEXEC), tp != -1); + + result = TEST_SUCCESS; +out: + close(tp); + return result; +} + +static int bpf_test_trace_maybe(const char *substr, bool present) +{ + int result = TEST_FAILURE; + int tp = -1; + char trace_buffer[4096] = {}; + ssize_t bytes_read; + + TEST(tp = s_open(s_path(tracing_folder(), s("trace_pipe")), + O_RDONLY | O_CLOEXEC), + tp != -1); + fcntl(tp, F_SETFL, O_NONBLOCK); + + for (;;) { + bytes_read = read(tp, trace_buffer, sizeof(trace_buffer)); + if (present) + TESTCOND(bytes_read > 0); + else if (bytes_read <= 0) { + result = TEST_SUCCESS; + break; + } + + if (test_options.verbose) + ksft_print_msg("%s\n", trace_buffer); + + if (strstr(trace_buffer, substr)) { + if (present) + result = TEST_SUCCESS; + break; + } + } +out: + close(tp); + return result; +} + +static int bpf_test_trace(const char *substr) +{ + return bpf_test_trace_maybe(substr, true); +} + +static int bpf_test_no_trace(const char *substr) +{ + return bpf_test_trace_maybe(substr, false); +} + +static int basic_test(const char *mount_dir) +{ + const char *test_name = "test"; + const char *test_data = "data"; + + int result = TEST_FAILURE; + int fuse_dev = -1; + char *filename = NULL; + int fd = -1; + int pid = -1; + int status; + + TESTEQUAL(mount_fuse(mount_dir, -1, -1, &fuse_dev), 0); + FUSE_ACTION + char data[256]; + + filename = concat_file_name(mount_dir, test_name); + TESTERR(fd = open(filename, O_RDONLY | O_CLOEXEC), fd != -1); + TESTEQUAL(read(fd, data, strlen(test_data)), strlen(test_data)); + TESTCOND(!strcmp(data, test_data)); + TESTSYSCALL(close(fd)); + fd = -1; + FUSE_DAEMON + DECL_FUSE_IN(open); + DECL_FUSE_IN(read); + DECL_FUSE_IN(flush); + DECL_FUSE_IN(release); + + TESTFUSELOOKUP(test_name, 0); + TESTFUSEOUT1(fuse_entry_out, ((struct fuse_entry_out) { + .nodeid = 2, + .generation = 1, + .attr.ino = 100, + .attr.size = 4, + .attr.blksize = 512, + .attr.mode = S_IFREG | 0777, + })); + + TESTFUSEIN(FUSE_OPEN, open_in); + TESTFUSEOUT1(fuse_open_out, ((struct fuse_open_out) { + .fh = 1, + .open_flags = open_in->flags, + })); + + TESTFUSEINNULL(FUSE_CANONICAL_PATH); + TESTFUSEOUTREAD("ignored", 7); + + TESTFUSEIN(FUSE_READ, read_in); + TESTFUSEOUTREAD(test_data, strlen(test_data)); + + TESTFUSEIN(FUSE_FLUSH, flush_in); + TESTFUSEOUTEMPTY(); + + TESTFUSEIN(FUSE_RELEASE, release_in); + TESTFUSEOUTEMPTY(); + FUSE_DONE + + result = TEST_SUCCESS; +out: + if (!pid) + exit(TEST_FAILURE); + close(fuse_dev); + close(fd); + free(filename); + umount(mount_dir); + return result; +} + +static int bpf_test_real(const char *mount_dir) +{ + const char *test_name = "real"; + const char *test_data = "Weebles wobble but they don't fall down"; + int result = TEST_FAILURE; + int bpf_fd = -1; + int src_fd = -1; + int fuse_dev = -1; + char *filename = NULL; + int fd = -1; + char read_buffer[256] = {}; + ssize_t bytes_read; + + TEST(src_fd = open(ft_src, O_DIRECTORY | O_RDONLY | O_CLOEXEC), + src_fd != -1); + TEST(fd = openat(src_fd, test_name, O_CREAT | O_RDWR | O_CLOEXEC, 0777), + fd != -1); + TESTEQUAL(write(fd, test_data, strlen(test_data)), strlen(test_data)); + TESTSYSCALL(close(fd)); + fd = -1; + + TESTEQUAL(install_elf_bpf("test_bpf.bpf", "test_trace", + &bpf_fd, NULL, NULL), 0); + TESTEQUAL(mount_fuse(mount_dir, bpf_fd, src_fd, &fuse_dev), 0); + + filename = concat_file_name(mount_dir, test_name); + TESTERR(fd = open(filename, O_RDONLY | O_CLOEXEC), fd != -1); + bytes_read = read(fd, read_buffer, strlen(test_data)); + TESTEQUAL(bytes_read, strlen(test_data)); + TESTEQUAL(strcmp(test_data, read_buffer), 0); + TESTEQUAL(bpf_test_trace("read"), 0); + + result = TEST_SUCCESS; +out: + close(fuse_dev); + close(fd); + free(filename); + umount(mount_dir); + close(src_fd); + close(bpf_fd); + return result; +} + + +static int bpf_test_partial(const char *mount_dir) +{ + const char *test_name = "partial"; + int result = TEST_FAILURE; + int bpf_fd = -1; + int src_fd = -1; + int fuse_dev = -1; + char *filename = NULL; + int fd = -1; + int pid = -1; + int status; + + TEST(src_fd = open(ft_src, O_DIRECTORY | O_RDONLY | O_CLOEXEC), + src_fd != -1); + TESTEQUAL(create_file(src_fd, s(test_name), 1, 2), 0); + TESTEQUAL(install_elf_bpf("test_bpf.bpf", "test_trace", + &bpf_fd, NULL, NULL), 0); + TESTEQUAL(mount_fuse(mount_dir, bpf_fd, src_fd, &fuse_dev), 0); + + FUSE_ACTION + uint8_t data[PAGE_SIZE]; + + TEST(filename = concat_file_name(mount_dir, test_name), + filename); + TESTERR(fd = open(filename, O_RDONLY | O_CLOEXEC), fd != -1); + TESTEQUAL(read(fd, data, PAGE_SIZE), PAGE_SIZE); + TESTEQUAL(bpf_test_trace("read"), 0); + TESTCOND(test_buffer(data, PAGE_SIZE, 2, 0)); + TESTCOND(!test_buffer(data, PAGE_SIZE, 1, 0)); + TESTEQUAL(read(fd, data, PAGE_SIZE), PAGE_SIZE); + TESTCOND(test_buffer(data, PAGE_SIZE, 1, 1)); + TESTCOND(!test_buffer(data, PAGE_SIZE, 2, 1)); + TESTSYSCALL(close(fd)); + fd = -1; + FUSE_DAEMON + DECL_FUSE(open); + DECL_FUSE(read); + DECL_FUSE(release); + uint8_t data[PAGE_SIZE]; + + TESTFUSEIN2(FUSE_OPEN | FUSE_POSTFILTER, open_in, open_out); + TESTFUSEOUT1(fuse_open_out, ((struct fuse_open_out) { + .fh = 1, + .open_flags = open_in->flags, + })); + + TESTFUSEIN(FUSE_READ, read_in); + fill_buffer(data, PAGE_SIZE, 2, 0); + TESTFUSEOUTREAD(data, PAGE_SIZE); + + TESTFUSEIN(FUSE_RELEASE, release_in); + TESTFUSEOUTEMPTY(); + FUSE_DONE + + result = TEST_SUCCESS; +out: + if (!pid) + exit(TEST_FAILURE); + close(fuse_dev); + close(fd); + free(filename); + umount(mount_dir); + close(src_fd); + close(bpf_fd); + return result; +} + +static int bpf_test_attrs(const char *mount_dir) +{ + const char *test_name = "partial"; + int result = TEST_FAILURE; + int bpf_fd = -1; + int src_fd = -1; + int fuse_dev = -1; + char *filename = NULL; + struct stat st; + + TEST(src_fd = open(ft_src, O_DIRECTORY | O_RDONLY | O_CLOEXEC), + src_fd != -1); + TESTEQUAL(create_file(src_fd, s(test_name), 1, 2), 0); + TESTEQUAL(install_elf_bpf("test_bpf.bpf", "test_trace", + &bpf_fd, NULL, NULL), 0); + TESTEQUAL(mount_fuse(mount_dir, bpf_fd, src_fd, &fuse_dev), 0); + + TEST(filename = concat_file_name(mount_dir, test_name), filename); + TESTSYSCALL(stat(filename, &st)); + TESTSYSCALL(chmod(filename, 0111)); + TESTSYSCALL(stat(filename, &st)); + TESTEQUAL(st.st_mode & 0777, 0111); + TESTSYSCALL(chmod(filename, 0777)); + TESTSYSCALL(stat(filename, &st)); + TESTEQUAL(st.st_mode & 0777, 0777); + TESTSYSCALL(chown(filename, 5, 6)); + TESTSYSCALL(stat(filename, &st)); + TESTEQUAL(st.st_uid, 5); + TESTEQUAL(st.st_gid, 6); + + result = TEST_SUCCESS; +out: + close(fuse_dev); + free(filename); + umount(mount_dir); + close(src_fd); + close(bpf_fd); + return result; +} + +static int bpf_test_readdir(const char *mount_dir) +{ + const char *names[] = {"real", "partial", "fake", ".", ".."}; + int result = TEST_FAILURE; + int bpf_fd = -1; + int src_fd = -1; + int fuse_dev = -1; + int pid = -1; + int status; + DIR *dir = NULL; + struct dirent *dirent; + + TEST(src_fd = open(ft_src, O_DIRECTORY | O_RDONLY | O_CLOEXEC), + src_fd != -1); + TESTEQUAL(create_file(src_fd, s(names[0]), 1, 2), 0); + TESTEQUAL(create_file(src_fd, s(names[1]), 1, 2), 0); + TESTEQUAL(install_elf_bpf("test_bpf.bpf", "test_trace", + &bpf_fd, NULL, NULL), 0); + TESTEQUAL(mount_fuse(mount_dir, bpf_fd, src_fd, &fuse_dev), 0); + + FUSE_ACTION + int i, j; + + TEST(dir = s_opendir(s(mount_dir)), dir); + TESTEQUAL(bpf_test_trace("opendir"), 0); + + for (i = 0; i < ARRAY_SIZE(names); ++i) { + TEST(dirent = readdir(dir), dirent); + + for (j = 0; j < ARRAY_SIZE(names); ++j) + if (names[j] && + strcmp(names[j], dirent->d_name) == 0) { + names[j] = NULL; + break; + } + TESTNE(j, ARRAY_SIZE(names)); + } + TEST(dirent = readdir(dir), dirent == NULL); + TESTSYSCALL(closedir(dir)); + dir = NULL; + TESTEQUAL(bpf_test_trace("readdir"), 0); + FUSE_DAEMON + struct fuse_in_header *in_header = + (struct fuse_in_header *)bytes_in; + ssize_t res = read(fuse_dev, bytes_in, sizeof(bytes_in)); + struct fuse_read_out *read_out = + (struct fuse_read_out *) (bytes_in + + sizeof(*in_header) + + sizeof(struct fuse_read_in)); + struct fuse_dirent *fuse_dirent = + (struct fuse_dirent *) (bytes_in + res); + + TESTGE(res, sizeof(*in_header) + sizeof(struct fuse_read_in)); + TESTEQUAL(in_header->opcode, FUSE_READDIR | FUSE_POSTFILTER); + *fuse_dirent = (struct fuse_dirent) { + .ino = 100, + .off = 5, + .namelen = strlen("fake"), + .type = DT_REG, + }; + strcpy((char *)(bytes_in + res + sizeof(*fuse_dirent)), "fake"); + res += FUSE_DIRENT_ALIGN(sizeof(*fuse_dirent) + strlen("fake") + + 1); + TESTFUSEDIROUTREAD(read_out, + bytes_in + + sizeof(struct fuse_in_header) + + sizeof(struct fuse_read_in) + + sizeof(struct fuse_read_out), + res - sizeof(struct fuse_in_header) - + sizeof(struct fuse_read_in) - + sizeof(struct fuse_read_out)); + res = read(fuse_dev, bytes_in, sizeof(bytes_in)); + TESTEQUAL(res, sizeof(*in_header) + + sizeof(struct fuse_read_in) + + sizeof(struct fuse_read_out)); + TESTEQUAL(in_header->opcode, FUSE_READDIR | FUSE_POSTFILTER); + TESTFUSEDIROUTREAD(read_out, bytes_in, 0); + FUSE_DONE + + result = TEST_SUCCESS; +out: + closedir(dir); + close(fuse_dev); + umount(mount_dir); + close(src_fd); + close(bpf_fd); + return result; +} + +static int bpf_test_redact_readdir(const char *mount_dir) +{ + const char *names[] = {"f1", "f2", "f3", "f4", "f5", "f6", ".", ".."}; + int num_shown = (ARRAY_SIZE(names) - 2) / 2 + 2; + int result = TEST_FAILURE; + int bpf_fd = -1; + int src_fd = -1; + int fuse_dev = -1; + int pid = -1; + int status; + DIR *dir = NULL; + struct dirent *dirent; + int i; + int count = 0; + + TEST(src_fd = open(ft_src, O_DIRECTORY | O_RDONLY | O_CLOEXEC), + src_fd != -1); + for (i = 0; i < ARRAY_SIZE(names) - 2; i++) + TESTEQUAL(create_file(src_fd, s(names[i]), 1, 2), 0); + + TESTEQUAL(install_elf_bpf("test_bpf.bpf", "test_readdir_redact", + &bpf_fd, NULL, NULL), 0); + TESTEQUAL(mount_fuse(mount_dir, bpf_fd, src_fd, &fuse_dev), 0); + + FUSE_ACTION + int j; + + TEST(dir = s_opendir(s(mount_dir)), dir); + while ((dirent = readdir(dir))) { + errno = 0; + TESTEQUAL(errno, 0); + + for (j = 0; j < ARRAY_SIZE(names); ++j) + if (names[j] && + strcmp(names[j], dirent->d_name) == 0) { + names[j] = NULL; + count++; + break; + } + TESTNE(j, ARRAY_SIZE(names)); + TESTGE(num_shown, count); + } + TESTEQUAL(count, num_shown); + TESTSYSCALL(closedir(dir)); + dir = NULL; + FUSE_DAEMON + bool skip = true; + for (int i = 0; i < ARRAY_SIZE(names) + 1; i++) { + uint8_t bytes_in[FUSE_MIN_READ_BUFFER]; + uint8_t bytes_out[FUSE_MIN_READ_BUFFER]; + struct fuse_in_header *in_header = + (struct fuse_in_header *)bytes_in; + ssize_t res = read(fuse_dev, bytes_in, sizeof(bytes_in)); + int length_out = 0; + uint8_t *pos; + uint8_t *dirs_in; + uint8_t *dirs_out; + struct fuse_read_in *fuse_read_in; + struct fuse_read_out *fuse_read_out_in; + struct fuse_read_out *fuse_read_out_out; + struct fuse_dirent *fuse_dirent_in = NULL; + struct fuse_dirent *next = NULL; + bool again = false; + int dir_ent_len = 0; + + TESTGE(res, sizeof(struct fuse_in_header) + + sizeof(struct fuse_read_in) + + sizeof(struct fuse_read_out)); + + pos = bytes_in + sizeof(struct fuse_in_header); + fuse_read_in = (struct fuse_read_in *) pos; + pos += sizeof(*fuse_read_in); + fuse_read_out_in = (struct fuse_read_out *) pos; + pos += sizeof(*fuse_read_out_in); + dirs_in = pos; + + pos = bytes_out + sizeof(struct fuse_out_header); + fuse_read_out_out = (struct fuse_read_out *) pos; + pos += sizeof(*fuse_read_out_out); + dirs_out = pos; + + if (dirs_in < bytes_in + res) { + bool is_dot; + + fuse_dirent_in = (struct fuse_dirent *) dirs_in; + is_dot = (fuse_dirent_in->namelen == 1 && + !strncmp(fuse_dirent_in->name, ".", 1)) || + (fuse_dirent_in->namelen == 2 && + !strncmp(fuse_dirent_in->name, "..", 2)); + + dir_ent_len = FUSE_DIRENT_ALIGN( + sizeof(*fuse_dirent_in) + + fuse_dirent_in->namelen); + + if (dirs_in + dir_ent_len < bytes_in + res) + next = (struct fuse_dirent *) + (dirs_in + dir_ent_len); + + if (!skip || is_dot) { + memcpy(dirs_out, fuse_dirent_in, + sizeof(struct fuse_dirent) + + fuse_dirent_in->namelen); + length_out += dir_ent_len; + } + again = ((skip && !is_dot) && next); + + if (!is_dot) + skip = !skip; + } + + fuse_read_out_out->offset = next ? next->off : + fuse_read_out_in->offset; + fuse_read_out_out->again = again; + + { + struct fuse_out_header *out_header = + (struct fuse_out_header *)bytes_out; + + *out_header = (struct fuse_out_header) { + .len = sizeof(*out_header) + + sizeof(*fuse_read_out_out) + length_out, + .unique = in_header->unique, + }; + TESTEQUAL(write(fuse_dev, bytes_out, out_header->len), + out_header->len); + } + } + FUSE_DONE + + result = TEST_SUCCESS; +out: + closedir(dir); + close(fuse_dev); + umount(mount_dir); + close(src_fd); + close(bpf_fd); + return result; +} + +/* + * This test is more to show what classic fuse does with a creat in a subdir + * than a test of any new functionality + */ +static int bpf_test_creat(const char *mount_dir) +{ + const char *dir_name = "show"; + const char *file_name = "file"; + int result = TEST_FAILURE; + int fuse_dev = -1; + int pid = -1; + int status; + int fd = -1; + + TESTEQUAL(mount_fuse(mount_dir, -1, -1, &fuse_dev), 0); + + FUSE_ACTION + TEST(fd = s_creat(s_path(s_path(s(mount_dir), s(dir_name)), + s(file_name)), + 0777), + fd != -1); + TESTSYSCALL(close(fd)); + FUSE_DAEMON + DECL_FUSE_IN(create); + DECL_FUSE_IN(release); + DECL_FUSE_IN(flush); + + TESTFUSELOOKUP(dir_name, 0); + TESTFUSEOUT1(fuse_entry_out, ((struct fuse_entry_out) { + .nodeid = 3, + .generation = 1, + .attr.ino = 100, + .attr.size = 4, + .attr.blksize = 512, + .attr.mode = S_IFDIR | 0777, + })); + + TESTFUSELOOKUP(file_name, 0); + TESTFUSEOUTERROR(-ENOENT); + + TESTFUSEINEXT(FUSE_CREATE, create_in, strlen(file_name) + 1); + TESTFUSEOUT2(fuse_entry_out, ((struct fuse_entry_out) { + .nodeid = 2, + .generation = 1, + .attr.ino = 200, + .attr.size = 4, + .attr.blksize = 512, + .attr.mode = S_IFREG, + }), + fuse_open_out, ((struct fuse_open_out) { + .fh = 1, + .open_flags = create_in->flags, + })); + + TESTFUSEINNULL(FUSE_CANONICAL_PATH); + TESTFUSEOUTREAD("ignored", 7); + + TESTFUSEIN(FUSE_FLUSH, flush_in); + TESTFUSEOUTEMPTY(); + + TESTFUSEIN(FUSE_RELEASE, release_in); + TESTFUSEOUTEMPTY(); + FUSE_DONE + + result = TEST_SUCCESS; +out: + close(fuse_dev); + umount(mount_dir); + return result; +} + +static int bpf_test_hidden_entries(const char *mount_dir) +{ + static const char * const dir_names[] = { + "show", + "hide", + }; + const char *file_name = "file"; + const char *data = "The quick brown fox jumps over the lazy dog\n"; + int result = TEST_FAILURE; + int src_fd = -1; + int bpf_fd = -1; + int fuse_dev = -1; + int fd = -1; + + TEST(src_fd = open(ft_src, O_DIRECTORY | O_RDONLY | O_CLOEXEC), + src_fd != -1); + TESTSYSCALL(mkdirat(src_fd, dir_names[0], 0777)); + TESTSYSCALL(mkdirat(src_fd, dir_names[1], 0777)); + TESTEQUAL(install_elf_bpf("test_bpf.bpf", "test_hidden", + &bpf_fd, NULL, NULL), 0); + TESTEQUAL(mount_fuse(mount_dir, bpf_fd, src_fd, &fuse_dev), 0); + + TEST(fd = s_creat(s_path(s_path(s(mount_dir), s(dir_names[0])), + s(file_name)), + 0777), + fd != -1); + TESTSYSCALL(fallocate(fd, 0, 0, 4096)); + TEST(write(fd, data, strlen(data)), strlen(data)); + TESTSYSCALL(close(fd)); + TESTEQUAL(bpf_test_trace("Create"), 0); + + result = TEST_SUCCESS; +out: + close(fuse_dev); + umount(mount_dir); + close(bpf_fd); + close(src_fd); + return result; +} + +static int bpf_test_dir(const char *mount_dir) +{ + const char *dir_name = "dir"; + int result = TEST_FAILURE; + int src_fd = -1; + int bpf_fd = -1; + int fuse_dev = -1; + struct stat st; + + TEST(src_fd = open(ft_src, O_DIRECTORY | O_RDONLY | O_CLOEXEC), + src_fd != -1); + TESTEQUAL(install_elf_bpf("test_bpf.bpf", "test_trace", + &bpf_fd, NULL, NULL), 0); + TESTEQUAL(mount_fuse(mount_dir, bpf_fd, src_fd, &fuse_dev), 0); + + TESTSYSCALL(s_mkdir(s_path(s(mount_dir), s(dir_name)), 0777)); + TESTEQUAL(bpf_test_trace("mkdir"), 0); + TESTSYSCALL(s_stat(s_path(s(ft_src), s(dir_name)), &st)); + TESTSYSCALL(s_rmdir(s_path(s(mount_dir), s(dir_name)))); + TESTEQUAL(s_stat(s_path(s(ft_src), s(dir_name)), &st), -1); + TESTEQUAL(errno, ENOENT); + result = TEST_SUCCESS; +out: + close(fuse_dev); + umount(mount_dir); + close(bpf_fd); + close(src_fd); + return result; +} + +static int bpf_test_file(const char *mount_dir, bool close_first) +{ + const char *file_name = "real"; + int result = TEST_FAILURE; + int src_fd = -1; + int bpf_fd = -1; + int fuse_dev = -1; + int fd = -1; + struct stat st; + + TEST(src_fd = open(ft_src, O_DIRECTORY | O_RDONLY | O_CLOEXEC), + src_fd != -1); + TESTEQUAL(install_elf_bpf("test_bpf.bpf", "test_trace", + &bpf_fd, NULL, NULL), 0); + TESTEQUAL(mount_fuse(mount_dir, bpf_fd, src_fd, &fuse_dev), 0); + + TEST(fd = s_creat(s_path(s(mount_dir), s(file_name)), + 0777), + fd != -1); + TESTEQUAL(bpf_test_trace("Create"), 0); + if (close_first) { + TESTSYSCALL(close(fd)); + fd = -1; + } + TESTSYSCALL(s_stat(s_path(s(ft_src), s(file_name)), &st)); + TESTSYSCALL(s_unlink(s_path(s(mount_dir), s(file_name)))); + TESTEQUAL(bpf_test_trace("unlink"), 0); + TESTEQUAL(s_stat(s_path(s(ft_src), s(file_name)), &st), -1); + TESTEQUAL(errno, ENOENT); + if (!close_first) { + TESTSYSCALL(close(fd)); + fd = -1; + } + result = TEST_SUCCESS; +out: + close(fd); + close(fuse_dev); + umount(mount_dir); + close(bpf_fd); + close(src_fd); + return result; +} + +static int bpf_test_file_early_close(const char *mount_dir) +{ + return bpf_test_file(mount_dir, true); +} + +static int bpf_test_file_late_close(const char *mount_dir) +{ + return bpf_test_file(mount_dir, false); +} + +static int bpf_test_alter_errcode_bpf(const char *mount_dir) +{ + const char *dir_name = "dir"; + int result = TEST_FAILURE; + int src_fd = -1; + int bpf_fd = -1; + int fuse_dev = -1; + struct stat st; + + TEST(src_fd = open(ft_src, O_DIRECTORY | O_RDONLY | O_CLOEXEC), + src_fd != -1); + TESTEQUAL(install_elf_bpf("test_bpf.bpf", "test_error", + &bpf_fd, NULL, NULL), 0); + TESTEQUAL(mount_fuse(mount_dir, bpf_fd, src_fd, &fuse_dev), 0); + + TESTSYSCALL(s_mkdir(s_path(s(mount_dir), s(dir_name)), 0777)); + //TESTEQUAL(bpf_test_trace("mkdir"), 0); + TESTSYSCALL(s_stat(s_path(s(ft_src), s(dir_name)), &st)); + TESTEQUAL(s_mkdir(s_path(s(mount_dir), s(dir_name)), 0777), -EPERM); + TESTSYSCALL(s_rmdir(s_path(s(mount_dir), s(dir_name)))); + TESTEQUAL(s_stat(s_path(s(ft_src), s(dir_name)), &st), -1); + TESTEQUAL(errno, ENOENT); + result = TEST_SUCCESS; +out: + close(fuse_dev); + umount(mount_dir); + close(bpf_fd); + close(src_fd); + return result; +} + +static int bpf_test_alter_errcode_userspace(const char *mount_dir) +{ + const char *dir_name = "doesnotexist"; + int result = TEST_FAILURE; + int src_fd = -1; + int bpf_fd = -1; + int fuse_dev = -1; + int pid = -1; + int status; + + TEST(src_fd = open(ft_src, O_DIRECTORY | O_RDONLY | O_CLOEXEC), + src_fd != -1); + TESTEQUAL(install_elf_bpf("test_bpf.bpf", "test_error", + &bpf_fd, NULL, NULL), 0); + TESTEQUAL(mount_fuse(mount_dir, bpf_fd, src_fd, &fuse_dev), 0); + + FUSE_ACTION + TESTEQUAL(s_unlink(s_path(s(mount_dir), s(dir_name))), + -1); + TESTEQUAL(errno, ENOMEM); + FUSE_DAEMON + TESTFUSELOOKUP("doesnotexist", FUSE_POSTFILTER); + TESTFUSEOUTERROR(-ENOMEM); + FUSE_DONE + result = TEST_SUCCESS; +out: + close(fuse_dev); + umount(mount_dir); + close(bpf_fd); + close(src_fd); + return result; +} + +static int bpf_test_mknod(const char *mount_dir) +{ + const char *file_name = "real"; + int result = TEST_FAILURE; + int src_fd = -1; + int bpf_fd = -1; + int fuse_dev = -1; + struct stat st; + + TEST(src_fd = open(ft_src, O_DIRECTORY | O_RDONLY | O_CLOEXEC), + src_fd != -1); + TESTEQUAL(install_elf_bpf("test_bpf.bpf", "test_trace", + &bpf_fd, NULL, NULL), 0); + TESTEQUAL(mount_fuse(mount_dir, bpf_fd, src_fd, &fuse_dev), 0); + + TESTSYSCALL(s_mkfifo(s_path(s(mount_dir), s(file_name)), 0777)); + TESTEQUAL(bpf_test_trace("mknod"), 0); + TESTSYSCALL(s_stat(s_path(s(ft_src), s(file_name)), &st)); + TESTSYSCALL(s_unlink(s_path(s(mount_dir), s(file_name)))); + TESTEQUAL(bpf_test_trace("unlink"), 0); + TESTEQUAL(s_stat(s_path(s(ft_src), s(file_name)), &st), -1); + TESTEQUAL(errno, ENOENT); + result = TEST_SUCCESS; +out: + close(fuse_dev); + umount(mount_dir); + close(bpf_fd); + close(src_fd); + return result; +} + +static int bpf_test_largedir(const char *mount_dir) +{ + const char *show = "show"; + const int files = 1000; + + int result = TEST_FAILURE; + int src_fd = -1; + int bpf_fd = -1; + int fuse_dev = -1; + struct map_relocation *map_relocations = NULL; + size_t map_count = 0; + int pid = -1; + int status; + + TEST(src_fd = open(ft_src, O_DIRECTORY | O_RDONLY | O_CLOEXEC), + src_fd != -1); + TESTEQUAL(install_elf_bpf("fd_bpf.bpf", "test_daemon", + &bpf_fd, &map_relocations, &map_count), 0); + TESTEQUAL(mount_fuse(mount_dir, bpf_fd, src_fd, &fuse_dev), 0); + + FUSE_ACTION + int i; + int fd; + DIR *dir = NULL; + struct dirent *dirent; + + TESTSYSCALL(s_mkdir(s_path(s(mount_dir), s(show)), 0777)); + for (i = 0; i < files; ++i) { + char filename[NAME_MAX]; + + sprintf(filename, "%d", i); + TEST(fd = s_creat(s_path(s_path(s(mount_dir), s(show)), + s(filename)), 0777), fd != -1); + TESTSYSCALL(close(fd)); + } + + TEST(dir = s_opendir(s_path(s(mount_dir), s(show))), dir); + for (dirent = readdir(dir); dirent; dirent = readdir(dir)) + ; + closedir(dir); + FUSE_DAEMON + int i; + + for (i = 0; i < files + 2; ++i) { + TESTFUSELOOKUP(show, FUSE_PREFILTER); + TESTFUSEOUTREAD(show, 5); + } + FUSE_DONE + + result = TEST_SUCCESS; +out: + close(fuse_dev); + umount(mount_dir); + close(bpf_fd); + close(src_fd); + return result; +} + +static int bpf_test_link(const char *mount_dir) +{ + const char *file_name = "real"; + const char *link_name = "partial"; + int result = TEST_FAILURE; + int fd = -1; + int src_fd = -1; + int bpf_fd = -1; + int fuse_dev = -1; + struct stat st; + + TEST(src_fd = open(ft_src, O_DIRECTORY | O_RDONLY | O_CLOEXEC), + src_fd != -1); + TESTEQUAL(install_elf_bpf("test_bpf.bpf", "test_trace", &bpf_fd, NULL, + NULL), + 0); + TESTEQUAL(mount_fuse(mount_dir, bpf_fd, src_fd, &fuse_dev), 0); + + TEST(fd = s_creat(s_path(s(mount_dir), s(file_name)), 0777), fd != -1); + TESTEQUAL(bpf_test_trace("Create"), 0); + TESTSYSCALL(s_stat(s_path(s(ft_src), s(file_name)), &st)); + + TESTSYSCALL(s_link(s_path(s(mount_dir), s(file_name)), + s_path(s(mount_dir), s(link_name)))); + + TESTEQUAL(bpf_test_trace("link"), 0); + TESTSYSCALL(s_stat(s_path(s(ft_src), s(link_name)), &st)); + + TESTSYSCALL(s_unlink(s_path(s(mount_dir), s(link_name)))); + TESTEQUAL(bpf_test_trace("unlink"), 0); + TESTEQUAL(s_stat(s_path(s(ft_src), s(link_name)), &st), -1); + TESTEQUAL(errno, ENOENT); + + TESTSYSCALL(s_unlink(s_path(s(mount_dir), s(file_name)))); + TESTEQUAL(bpf_test_trace("unlink"), 0); + TESTEQUAL(s_stat(s_path(s(ft_src), s(file_name)), &st), -1); + TESTEQUAL(errno, ENOENT); + + result = TEST_SUCCESS; +out: + close(fd); + close(fuse_dev); + umount(mount_dir); + close(bpf_fd); + close(src_fd); + return result; +} + +static int bpf_test_symlink(const char *mount_dir) +{ + const char *test_name = "real"; + const char *symlink_name = "partial"; + const char *test_data = "Weebles wobble but they don't fall down"; + int result = TEST_FAILURE; + int bpf_fd = -1; + int src_fd = -1; + int fuse_dev = -1; + int fd = -1; + char read_buffer[256] = {}; + ssize_t bytes_read; + + TEST(src_fd = open(ft_src, O_DIRECTORY | O_RDONLY | O_CLOEXEC), + src_fd != -1); + TEST(fd = openat(src_fd, test_name, O_CREAT | O_RDWR | O_CLOEXEC, 0777), + fd != -1); + TESTEQUAL(write(fd, test_data, strlen(test_data)), strlen(test_data)); + TESTSYSCALL(close(fd)); + fd = -1; + + TESTEQUAL(install_elf_bpf("test_bpf.bpf", "test_trace", + &bpf_fd, NULL, NULL), 0); + TESTEQUAL(mount_fuse(mount_dir, bpf_fd, src_fd, &fuse_dev), 0); + + TESTSYSCALL(s_symlink(s_path(s(mount_dir), s(test_name)), + s_path(s(mount_dir), s(symlink_name)))); + TESTEQUAL(bpf_test_trace("symlink"), 0); + + TESTERR(fd = s_open(s_path(s(mount_dir), s(symlink_name)), O_RDONLY | O_CLOEXEC), fd != -1); + bytes_read = read(fd, read_buffer, strlen(test_data)); + TESTEQUAL(bpf_test_trace("readlink"), 0); + TESTEQUAL(bytes_read, strlen(test_data)); + TESTEQUAL(strcmp(test_data, read_buffer), 0); + + result = TEST_SUCCESS; +out: + close(fuse_dev); + close(fd); + umount(mount_dir); + close(src_fd); + close(bpf_fd); + return result; +} + +static int bpf_test_xattr(const char *mount_dir) +{ + static const char file_name[] = "real"; + static const char xattr_name[] = "user.xattr_test_name"; + static const char xattr_value[] = "this_is_a_test"; + const size_t xattr_size = sizeof(xattr_value); + char xattr_value_ret[256]; + ssize_t xattr_size_ret; + int result = TEST_FAILURE; + int fd = -1; + int src_fd = -1; + int bpf_fd = -1; + int fuse_dev = -1; + struct stat st; + + memset(xattr_value_ret, '\0', sizeof(xattr_value_ret)); + + TEST(src_fd = open(ft_src, O_DIRECTORY | O_RDONLY | O_CLOEXEC), + src_fd != -1); + TESTEQUAL(install_elf_bpf("test_bpf.bpf", "test_trace", &bpf_fd, NULL, + NULL), + 0); + TESTEQUAL(mount_fuse(mount_dir, bpf_fd, src_fd, &fuse_dev), 0); + + TEST(fd = s_creat(s_path(s(mount_dir), s(file_name)), 0777), fd != -1); + TESTEQUAL(bpf_test_trace("Create"), 0); + TESTSYSCALL(close(fd)); + + TESTSYSCALL(s_stat(s_path(s(ft_src), s(file_name)), &st)); + TEST(result = s_getxattr(s_path(s(mount_dir), s(file_name)), xattr_name, + xattr_value_ret, sizeof(xattr_value_ret), + &xattr_size_ret), + result == -1); + TESTEQUAL(errno, ENODATA); + TESTEQUAL(bpf_test_trace("getxattr"), 0); + + TESTSYSCALL(s_listxattr(s_path(s(mount_dir), s(file_name)), + xattr_value_ret, sizeof(xattr_value_ret), + &xattr_size_ret)); + TESTEQUAL(bpf_test_trace("listxattr"), 0); + TESTEQUAL(xattr_size_ret, 0); + + TESTSYSCALL(s_setxattr(s_path(s(mount_dir), s(file_name)), xattr_name, + xattr_value, xattr_size, 0)); + TESTEQUAL(bpf_test_trace("setxattr"), 0); + + TESTSYSCALL(s_listxattr(s_path(s(mount_dir), s(file_name)), + xattr_value_ret, sizeof(xattr_value_ret), + &xattr_size_ret)); + TESTEQUAL(bpf_test_trace("listxattr"), 0); + TESTEQUAL(xattr_size_ret, sizeof(xattr_name)); + TESTEQUAL(strcmp(xattr_name, xattr_value_ret), 0); + + TESTSYSCALL(s_getxattr(s_path(s(mount_dir), s(file_name)), xattr_name, + xattr_value_ret, sizeof(xattr_value_ret), + &xattr_size_ret)); + TESTEQUAL(bpf_test_trace("getxattr"), 0); + TESTEQUAL(xattr_size, xattr_size_ret); + TESTEQUAL(strcmp(xattr_value, xattr_value_ret), 0); + + TESTSYSCALL(s_removexattr(s_path(s(mount_dir), s(file_name)), xattr_name)); + TESTEQUAL(bpf_test_trace("removexattr"), 0); + + TESTEQUAL(s_getxattr(s_path(s(mount_dir), s(file_name)), xattr_name, + xattr_value_ret, sizeof(xattr_value_ret), + &xattr_size_ret), -1); + TESTEQUAL(errno, ENODATA); + + TESTSYSCALL(s_unlink(s_path(s(mount_dir), s(file_name)))); + TESTEQUAL(bpf_test_trace("unlink"), 0); + TESTEQUAL(s_stat(s_path(s(ft_src), s(file_name)), &st), -1); + TESTEQUAL(errno, ENOENT); + + result = TEST_SUCCESS; +out: + close(fuse_dev); + umount(mount_dir); + close(bpf_fd); + close(src_fd); + return result; +} + +static int bpf_test_set_backing(const char *mount_dir) +{ + const char *backing_name = "backing"; + const char *test_data = "data"; + const char *test_name = "test"; + + int result = TEST_FAILURE; + int fuse_dev = -1; + int fd = -1; + int pid = -1; + int status; + + TESTEQUAL(mount_fuse_no_init(mount_dir, -1, -1, &fuse_dev), 0); + FUSE_ACTION + char data[256] = {0}; + + TESTERR(fd = s_open(s_path(s(mount_dir), s(test_name)), + O_RDONLY | O_CLOEXEC), fd != -1); + TESTEQUAL(read(fd, data, strlen(test_data)), strlen(test_data)); + TESTCOND(!strcmp(data, test_data)); + TESTSYSCALL(close(fd)); + fd = -1; + TESTSYSCALL(umount(mount_dir)); + FUSE_DAEMON + int bpf_fd = -1; + int backing_fd = -1; + + TESTERR(backing_fd = s_creat(s_path(s(ft_src), s(backing_name)), 0777), + backing_fd != -1); + TESTEQUAL(write(backing_fd, test_data, strlen(test_data)), + strlen(test_data)); + TESTEQUAL(install_elf_bpf("test_bpf.bpf", "test_simple", + &bpf_fd, NULL, NULL), 0); + + TESTFUSEINIT(); + TESTFUSELOOKUP(test_name, 0); + TESTFUSEOUT2(fuse_entry_out, ((struct fuse_entry_out) {0}), + fuse_entry_bpf_out, ((struct fuse_entry_bpf_out) { + .backing_action = FUSE_ACTION_REPLACE, + .backing_fd = backing_fd, + .bpf_action = FUSE_ACTION_REPLACE, + .bpf_fd = bpf_fd, + })); + read(fuse_dev, bytes_in, sizeof(bytes_in)); + TESTSYSCALL(close(bpf_fd)); + TESTSYSCALL(close(backing_fd)); + FUSE_DONE + + result = TEST_SUCCESS; +out: + if (!pid) + exit(TEST_FAILURE); + close(fuse_dev); + close(fd); + umount(mount_dir); + return result; +} + +static int bpf_test_remove_backing(const char *mount_dir) +{ + const char *folder1 = "folder1"; + const char *folder2 = "folder2"; + const char *file = "file1"; + const char *contents1 = "contents1"; + const char *contents2 = "contents2"; + + int result = TEST_FAILURE; + int fuse_dev = -1; + int fd = -1; + int src_fd = -1; + int bpf_fd = -1; + int pid = -1; + int status; + char data[256] = {0}; + + /* + * Create folder1/file + * folder2/file + * + * test will install bpf into mount + * bpf will postfilter root lookup to daemon + * daemon will remove bpf and redirect opens on folder1 to folder2 + * test will open folder1/file which will be redirected to folder2 + * test will check no traces for file, and contents are folder2/file + */ + TESTEQUAL(bpf_clear_trace(), 0); + TESTSYSCALL(s_mkdir(s_path(s(ft_src), s(folder1)), 0777)); + TEST(fd = s_creat(s_pathn(3, s(ft_src), s(folder1), s(file)), 0777), + fd != -1); + TESTEQUAL(write(fd, contents1, strlen(contents1)), strlen(contents1)); + TESTSYSCALL(close(fd)); + TESTSYSCALL(s_mkdir(s_path(s(ft_src), s(folder2)), 0777)); + TEST(fd = s_creat(s_pathn(3, s(ft_src), s(folder2), s(file)), 0777), + fd != -1); + TESTEQUAL(write(fd, contents2, strlen(contents2)), strlen(contents2)); + TESTSYSCALL(close(fd)); + + TEST(src_fd = open(ft_src, O_DIRECTORY | O_RDONLY | O_CLOEXEC), + src_fd != -1); + TESTEQUAL(install_elf_bpf("test_bpf.bpf", "test_passthrough", &bpf_fd, + NULL, NULL), 0); + TESTEQUAL(mount_fuse_no_init(mount_dir, bpf_fd, src_fd, &fuse_dev), 0); + + FUSE_ACTION + TESTERR(fd = s_open(s_pathn(3, s(mount_dir), s(folder1), + s(file)), + O_RDONLY | O_CLOEXEC), fd != -1); + TESTEQUAL(read(fd, data, sizeof(data)), strlen(contents2)); + TESTCOND(!strcmp(data, contents2)); + TESTEQUAL(bpf_test_no_trace("file"), 0); + TESTSYSCALL(close(fd)); + fd = -1; + TESTSYSCALL(umount(mount_dir)); + FUSE_DAEMON + struct { + char name[8]; + struct fuse_entry_out feo; + struct fuse_entry_bpf_out febo; + } __attribute__((packed)) in; + int backing_fd = -1; + + TESTFUSEINIT(); + TESTFUSEIN(FUSE_LOOKUP | FUSE_POSTFILTER, &in); + TEST(backing_fd = s_open(s_path(s(ft_src), s(folder2)), + O_DIRECTORY | O_RDONLY | O_CLOEXEC), + backing_fd != -1); + TESTFUSEOUT2(fuse_entry_out, ((struct fuse_entry_out) {0}), + fuse_entry_bpf_out, ((struct fuse_entry_bpf_out) { + .bpf_action = FUSE_ACTION_REMOVE, + .backing_action = FUSE_ACTION_REPLACE, + .backing_fd = backing_fd, + })); + + while (read(fuse_dev, bytes_in, sizeof(bytes_in)) != -1) + ; + TESTSYSCALL(close(backing_fd)); + FUSE_DONE + + result = TEST_SUCCESS; +out: + close(fuse_dev); + close(fd); + close(src_fd); + close(bpf_fd); + umount(mount_dir); + return result; +} + +static int bpf_test_dir_rename(const char *mount_dir) +{ + const char *dir_name = "dir"; + const char *dir_name2 = "dir2"; + int result = TEST_FAILURE; + int src_fd = -1; + int bpf_fd = -1; + int fuse_dev = -1; + struct stat st; + + TEST(src_fd = open(ft_src, O_DIRECTORY | O_RDONLY | O_CLOEXEC), + src_fd != -1); + TESTEQUAL(install_elf_bpf("test_bpf.bpf", "test_trace", + &bpf_fd, NULL, NULL), 0); + TESTEQUAL(mount_fuse(mount_dir, bpf_fd, src_fd, &fuse_dev), 0); + + TESTSYSCALL(s_mkdir(s_path(s(mount_dir), s(dir_name)), 0777)); + TESTEQUAL(bpf_test_trace("mkdir"), 0); + TESTSYSCALL(s_stat(s_path(s(ft_src), s(dir_name)), &st)); + TESTSYSCALL(s_rename(s_path(s(mount_dir), s(dir_name)), + s_path(s(mount_dir), s(dir_name2)))); + TESTEQUAL(s_stat(s_path(s(ft_src), s(dir_name)), &st), -1); + TESTEQUAL(errno, ENOENT); + TESTSYSCALL(s_stat(s_path(s(ft_src), s(dir_name2)), &st)); + result = TEST_SUCCESS; +out: + close(fuse_dev); + umount(mount_dir); + close(bpf_fd); + close(src_fd); + return result; +} + +static int bpf_test_file_rename(const char *mount_dir) +{ + const char *dir = "dir"; + const char *file1 = "file1"; + const char *file2 = "file2"; + int result = TEST_FAILURE; + int src_fd = -1; + int bpf_fd = -1; + int fuse_dev = -1; + int fd = -1; + + TEST(src_fd = open(ft_src, O_DIRECTORY | O_RDONLY | O_CLOEXEC), + src_fd != -1); + TESTEQUAL(install_elf_bpf("test_bpf.bpf", "test_trace", + &bpf_fd, NULL, NULL), 0); + TESTEQUAL(mount_fuse(mount_dir, bpf_fd, src_fd, &fuse_dev), 0); + + TESTSYSCALL(s_mkdir(s_path(s(mount_dir), s(dir)), 0777)); + TEST(fd = s_creat(s_pathn(3, s(mount_dir), s(dir), s(file1)), 0777), + fd != -1); + TESTSYSCALL(s_rename(s_pathn(3, s(mount_dir), s(dir), s(file1)), + s_pathn(3, s(mount_dir), s(dir), s(file2)))); + result = TEST_SUCCESS; +out: + close(fd); + umount(mount_dir); + close(fuse_dev); + close(bpf_fd); + close(src_fd); + return result; +} + +static int mmap_test(const char *mount_dir) +{ + const char *file = "file"; + int result = TEST_FAILURE; + int src_fd = -1; + int fuse_dev = -1; + int fd = -1; + char *addr = NULL; + + TEST(src_fd = open(ft_src, O_DIRECTORY | O_RDONLY | O_CLOEXEC), + src_fd != -1); + TESTEQUAL(mount_fuse(mount_dir, -1, src_fd, &fuse_dev), 0); + TEST(fd = s_open(s_path(s(mount_dir), s(file)), + O_CREAT | O_RDWR | O_CLOEXEC, 0777), + fd != -1); + TESTSYSCALL(fallocate(fd, 0, 4096, SEEK_CUR)); + TEST(addr = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0), + addr != (void *) -1); + memset(addr, 'a', 4096); + + result = TEST_SUCCESS; +out: + munmap(addr, 4096); + close(fd); + umount(mount_dir); + close(fuse_dev); + close(src_fd); + return result; +} + +static int readdir_perms_test(const char *mount_dir) +{ + int result = TEST_FAILURE; + struct __user_cap_header_struct uchs = { _LINUX_CAPABILITY_VERSION_3 }; + struct __user_cap_data_struct ucds[2]; + int src_fd = -1; + int fuse_dev = -1; + DIR *dir = NULL; + + /* Must remove capabilities for this test. */ + TESTSYSCALL(syscall(SYS_capget, &uchs, ucds)); + ucds[0].effective &= ~(1 << CAP_DAC_OVERRIDE | 1 << CAP_DAC_READ_SEARCH); + TESTSYSCALL(syscall(SYS_capset, &uchs, ucds)); + + /* This is what we are testing in fuseland. First test without fuse, */ + TESTSYSCALL(mkdir("test", 0111)); + TEST(dir = opendir("test"), dir == NULL); + closedir(dir); + dir = NULL; + + TEST(src_fd = open(ft_src, O_DIRECTORY | O_RDONLY | O_CLOEXEC), + src_fd != -1); + TESTEQUAL(mount_fuse(mount_dir, -1, src_fd, &fuse_dev), 0); + + TESTSYSCALL(s_mkdir(s_path(s(mount_dir), s("test")), 0111)); + TEST(dir = s_opendir(s_path(s(mount_dir), s("test"))), dir == NULL); + + result = TEST_SUCCESS; +out: + ucds[0].effective |= 1 << CAP_DAC_OVERRIDE | 1 << CAP_DAC_READ_SEARCH; + syscall(SYS_capset, &uchs, ucds); + + closedir(dir); + s_rmdir(s_path(s(mount_dir), s("test"))); + umount(mount_dir); + close(fuse_dev); + close(src_fd); + rmdir("test"); + return result; +} + +static int inotify_test(const char *mount_dir) +{ + int result = TEST_FAILURE; + int src_fd = -1; + int fuse_dev = -1; + struct s dir; + int inotify_fd = -1; + int watch; + int fd = -1; + char buffer[sizeof(struct inotify_event) + NAME_MAX + 1]; + + TEST(src_fd = open(ft_src, O_DIRECTORY | O_RDONLY | O_CLOEXEC), + src_fd != -1); + TESTEQUAL(mount_fuse(mount_dir, -1, src_fd, &fuse_dev), 0); + + TEST(inotify_fd = inotify_init1(IN_CLOEXEC), inotify_fd != -1); + dir = s_path(s(mount_dir), s("dir")); + TESTSYSCALL(mkdir(dir.s, 0777)); + TEST(watch = inotify_add_watch(inotify_fd, dir.s, IN_CREATE), watch); + TEST(fd = s_creat(s_path(s(ft_src), s("dir/file")), 0777), fd != -1); + // buffer will be two struct lengths, as "file" gets rounded up to the + // next multiple of struct inotify_event + TESTEQUAL(read(inotify_fd, &buffer, sizeof(buffer)), + sizeof(struct inotify_event) * 2); + + result = TEST_SUCCESS; +out: + close(fd); + s_unlink(s_path(s(ft_src), s("dir/file"))); + close(inotify_fd); + rmdir(dir.s); + free(dir.s); + umount(mount_dir); + close(fuse_dev); + close(src_fd); + return result; +} + +static int bpf_test_statfs(const char *mount_dir) +{ + int result = TEST_FAILURE; + int src_fd = -1; + int bpf_fd = -1; + int fuse_dev = -1; + int fd = -1; + struct statfs st; + + TEST(src_fd = open(ft_src, O_DIRECTORY | O_RDONLY | O_CLOEXEC), + src_fd != -1); + TESTEQUAL(install_elf_bpf("test_bpf.bpf", "test_trace", + &bpf_fd, NULL, NULL), 0); + TESTEQUAL(mount_fuse(mount_dir, bpf_fd, src_fd, &fuse_dev), 0); + + TESTSYSCALL(s_statfs(s(mount_dir), &st)); + TESTEQUAL(bpf_test_trace("statfs"), 0); + TESTEQUAL(st.f_type, 0x65735546); + result = TEST_SUCCESS; +out: + close(fd); + umount(mount_dir); + close(fuse_dev); + close(bpf_fd); + close(src_fd); + return result; +} + +static int bpf_test_lseek(const char *mount_dir) +{ + const char *file = "real"; + const char *test_data = "data"; + int result = TEST_FAILURE; + int src_fd = -1; + int bpf_fd = -1; + int fuse_dev = -1; + int fd = -1; + + TEST(src_fd = open(ft_src, O_DIRECTORY | O_RDONLY | O_CLOEXEC), + src_fd != -1); + TEST(fd = openat(src_fd, file, O_CREAT | O_RDWR | O_CLOEXEC, 0777), + fd != -1); + TESTEQUAL(write(fd, test_data, strlen(test_data)), strlen(test_data)); + TESTSYSCALL(close(fd)); + fd = -1; + TESTEQUAL(install_elf_bpf("test_bpf.bpf", "test_trace", + &bpf_fd, NULL, NULL), 0); + TESTEQUAL(mount_fuse(mount_dir, bpf_fd, src_fd, &fuse_dev), 0); + + TEST(fd = s_open(s_path(s(mount_dir), s(file)), O_RDONLY | O_CLOEXEC), + fd != -1); + TESTEQUAL(lseek(fd, 3, SEEK_SET), 3); + TESTEQUAL(bpf_test_trace("lseek"), 0); + TESTEQUAL(lseek(fd, 5, SEEK_END), 9); + TESTEQUAL(bpf_test_trace("lseek"), 0); + TESTEQUAL(lseek(fd, 1, SEEK_CUR), 10); + TESTEQUAL(bpf_test_trace("lseek"), 0); + TESTEQUAL(lseek(fd, 1, SEEK_DATA), 1); + TESTEQUAL(bpf_test_trace("lseek"), 0); + result = TEST_SUCCESS; +out: + close(fd); + umount(mount_dir); + close(fuse_dev); + close(bpf_fd); + close(src_fd); + return result; +} + + +static int parse_options(int argc, char *const *argv) +{ + signed char c; + + while ((c = getopt(argc, argv, "f:t:v")) != -1) + switch (c) { + case 'f': + test_options.file = strtol(optarg, NULL, 10); + break; + + case 't': + test_options.test = strtol(optarg, NULL, 10); + break; + + case 'v': + test_options.verbose = true; + break; + + default: + return -EINVAL; + } + + return 0; +} + +struct test_case { + int (*pfunc)(const char *dir); + const char *name; +}; + +static void run_one_test(const char *mount_dir, struct test_case *test_case) +{ + ksft_print_msg("Running %s\n", test_case->name); + if (test_case->pfunc(mount_dir) == TEST_SUCCESS) + ksft_test_result_pass("%s\n", test_case->name); + else + ksft_test_result_fail("%s\n", test_case->name); +} + +int main(int argc, char *argv[]) +{ + char *mount_dir = NULL; + char *src_dir = NULL; + int i; + int fd, count; + + if (parse_options(argc, argv)) + ksft_exit_fail_msg("Bad options\n"); + + // Seed randomness pool for testing on QEMU + // NOTE - this abuses the concept of randomness - do *not* ever do this + // on a machine for production use - the device will think it has good + // randomness when it does not. + fd = open("/dev/urandom", O_WRONLY | O_CLOEXEC); + count = 4096; + for (int i = 0; i < 128; ++i) + ioctl(fd, RNDADDTOENTCNT, &count); + close(fd); + + ksft_print_header(); + + if (geteuid() != 0) + ksft_print_msg("Not a root, might fail to mount.\n"); + + if (tracing_on() != TEST_SUCCESS) + ksft_exit_fail_msg("Can't turn on tracing\n"); + + src_dir = setup_mount_dir(ft_src); + mount_dir = setup_mount_dir(ft_dst); + if (src_dir == NULL || mount_dir == NULL) + ksft_exit_fail_msg("Can't create a mount dir\n"); + +#define MAKE_TEST(test) \ + { \ + test, #test \ + } + struct test_case cases[] = { + MAKE_TEST(basic_test), + MAKE_TEST(bpf_test_real), + MAKE_TEST(bpf_test_partial), + MAKE_TEST(bpf_test_attrs), + MAKE_TEST(bpf_test_readdir), + MAKE_TEST(bpf_test_creat), + MAKE_TEST(bpf_test_hidden_entries), + MAKE_TEST(bpf_test_dir), + MAKE_TEST(bpf_test_file_early_close), + MAKE_TEST(bpf_test_file_late_close), + MAKE_TEST(bpf_test_mknod), + MAKE_TEST(bpf_test_largedir), + MAKE_TEST(bpf_test_link), + MAKE_TEST(bpf_test_symlink), + MAKE_TEST(bpf_test_xattr), + MAKE_TEST(bpf_test_redact_readdir), + MAKE_TEST(bpf_test_set_backing), + MAKE_TEST(bpf_test_remove_backing), + MAKE_TEST(bpf_test_dir_rename), + MAKE_TEST(bpf_test_file_rename), + MAKE_TEST(bpf_test_alter_errcode_bpf), + MAKE_TEST(bpf_test_alter_errcode_userspace), + MAKE_TEST(mmap_test), + MAKE_TEST(readdir_perms_test), + MAKE_TEST(inotify_test), + MAKE_TEST(bpf_test_statfs), + MAKE_TEST(bpf_test_lseek), + }; +#undef MAKE_TEST + + if (test_options.test) { + if (test_options.test <= 0 || + test_options.test > ARRAY_SIZE(cases)) + ksft_exit_fail_msg("Invalid test\n"); + + ksft_set_plan(1); + delete_dir_tree(mount_dir, false); + delete_dir_tree(src_dir, false); + run_one_test(mount_dir, &cases[test_options.test - 1]); + } else { + ksft_set_plan(ARRAY_SIZE(cases)); + for (i = 0; i < ARRAY_SIZE(cases); ++i) { + delete_dir_tree(mount_dir, false); + delete_dir_tree(src_dir, false); + run_one_test(mount_dir, &cases[i]); + } + } + + umount2(mount_dir, MNT_FORCE); + delete_dir_tree(mount_dir, true); + delete_dir_tree(src_dir, true); + return !ksft_get_fail_cnt() ? ksft_exit_pass() : ksft_exit_fail(); +} diff --git a/tools/testing/selftests/filesystems/fuse/test_bpf.c b/tools/testing/selftests/filesystems/fuse/test_bpf.c new file mode 100644 index 0000000000000000000000000000000000000000..3f4a9e727b6832cafbca0afd112248e9137ab42a --- /dev/null +++ b/tools/testing/selftests/filesystems/fuse/test_bpf.c @@ -0,0 +1,516 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +// Copyright (c) 2021 Google LLC + +#define __EXPORTED_HEADERS__ + +#include +#include +#include + +#include + +#define SEC(NAME) __attribute__((section(NAME), used)) + +static long (*bpf_trace_printk)(const char *fmt, __u32 fmt_size, ...) + = (void *) 6; + +#define bpf_printk(fmt, ...) \ + ({ \ + char ____fmt[] = fmt; \ + bpf_trace_printk(____fmt, sizeof(____fmt), \ + ##__VA_ARGS__); \ + }) + +SEC("dummy") + +inline int strcmp(const char *a, const char *b) +{ + int i; + + for (i = 0; i < __builtin_strlen(b) + 1; ++i) + if (a[i] != b[i]) + return -1; + + return 0; +} +SEC("test_readdir_redact") +/* return FUSE_BPF_BACKING to use backing fs, 0 to pass to usermode */ +int readdir_test(struct fuse_bpf_args *fa) +{ + switch (fa->opcode) { + case FUSE_READDIR | FUSE_PREFILTER: { + const struct fuse_read_in *fri = fa->in_args[0].value; + + bpf_printk("readdir %d", fri->fh); + return FUSE_BPF_BACKING | FUSE_BPF_POST_FILTER; + } + + case FUSE_READDIR | FUSE_POSTFILTER: { + const struct fuse_read_in *fri = fa->in_args[0].value; + + bpf_printk("readdir postfilter %x", fri->fh); + return FUSE_BPF_USER_FILTER; + } + + default: + bpf_printk("opcode %d", fa->opcode); + return FUSE_BPF_BACKING; + } +} +SEC("test_trace") + +/* return FUSE_BPF_BACKING to use backing fs, 0 to pass to usermode */ +int trace_test(struct fuse_bpf_args *fa) +{ + switch (fa->opcode) { + case FUSE_LOOKUP | FUSE_PREFILTER: { + /* real and partial use backing file */ + const char *name = fa->in_args[0].value; + bool backing = false; + + if (strcmp(name, "real") == 0 || strcmp(name, "partial") == 0) + backing = true; + + if (strcmp(name, "dir") == 0) + backing = true; + if (strcmp(name, "dir2") == 0) + backing = true; + + if (strcmp(name, "file1") == 0) + backing = true; + if (strcmp(name, "file2") == 0) + backing = true; + + bpf_printk("lookup %s %d", name, backing); + return backing ? FUSE_BPF_BACKING | FUSE_BPF_POST_FILTER : 0; + } + + case FUSE_LOOKUP | FUSE_POSTFILTER: { + const char *name = fa->in_args[0].value; + struct fuse_entry_out *feo = fa->out_args[0].value; + + if (strcmp(name, "real") == 0) + feo->nodeid = 5; + else if (strcmp(name, "partial") == 0) + feo->nodeid = 6; + + bpf_printk("post-lookup %s %d", name, feo->nodeid); + return 0; + } + + case FUSE_ACCESS | FUSE_PREFILTER: { + bpf_printk("Access: %d", fa->nodeid); + return FUSE_BPF_BACKING; + } + + case FUSE_CREATE | FUSE_PREFILTER: + bpf_printk("Create: %d", fa->nodeid); + return FUSE_BPF_BACKING; + + case FUSE_MKNOD | FUSE_PREFILTER: { + const struct fuse_mknod_in *fmi = fa->in_args[0].value; + const char *name = fa->in_args[1].value; + + bpf_printk("mknod %s %x %x", name, fmi->rdev | fmi->mode, fmi->umask); + return FUSE_BPF_BACKING; + } + + case FUSE_MKDIR | FUSE_PREFILTER: { + const struct fuse_mkdir_in *fmi = fa->in_args[0].value; + const char *name = fa->in_args[1].value; + + bpf_printk("mkdir %s %x %x", name, fmi->mode, fmi->umask); + return FUSE_BPF_BACKING; + } + + case FUSE_RMDIR | FUSE_PREFILTER: { + const char *name = fa->in_args[0].value; + + bpf_printk("rmdir %s", name); + return FUSE_BPF_BACKING; + } + + case FUSE_RENAME | FUSE_PREFILTER: { + const char *oldname = fa->in_args[1].value; + const char *newname = fa->in_args[2].value; + + bpf_printk("rename from %s", oldname); + bpf_printk("rename to %s", newname); + return FUSE_BPF_BACKING; + } + + case FUSE_RENAME2 | FUSE_PREFILTER: { + const struct fuse_rename2_in *fri = fa->in_args[0].value; + uint32_t flags = fri->flags; + const char *oldname = fa->in_args[1].value; + const char *newname = fa->in_args[2].value; + + bpf_printk("rename(%x) from %s", flags, oldname); + bpf_printk("rename to %s", newname); + return FUSE_BPF_BACKING; + } + + case FUSE_UNLINK | FUSE_PREFILTER: { + const char *name = fa->in_args[0].value; + + bpf_printk("unlink %s", name); + return FUSE_BPF_BACKING; + } + + case FUSE_LINK | FUSE_PREFILTER: { + const struct fuse_link_in *fli = fa->in_args[0].value; + const char *link_name = fa->in_args[1].value; + + bpf_printk("link %d %s", fli->oldnodeid, link_name); + return FUSE_BPF_BACKING; + } + + case FUSE_SYMLINK | FUSE_PREFILTER: { + const char *link_name = fa->in_args[0].value; + const char *link_dest = fa->in_args[1].value; + + bpf_printk("symlink from %s", link_name); + bpf_printk("symlink to %s", link_dest); + return FUSE_BPF_BACKING; + } + + case FUSE_READLINK | FUSE_PREFILTER: { + const char *link_name = fa->in_args[0].value; + + bpf_printk("readlink from", link_name); + return FUSE_BPF_BACKING; + } + + case FUSE_OPEN | FUSE_PREFILTER: { + int backing = 0; + + switch (fa->nodeid) { + case 5: + backing = FUSE_BPF_BACKING; + break; + + case 6: + backing = FUSE_BPF_BACKING | FUSE_BPF_POST_FILTER; + break; + + default: + break; + } + + bpf_printk("open %d %d", fa->nodeid, backing); + return backing; + } + + case FUSE_OPEN | FUSE_POSTFILTER: + bpf_printk("open postfilter"); + return FUSE_BPF_USER_FILTER; + + case FUSE_READ | FUSE_PREFILTER: { + const struct fuse_read_in *fri = fa->in_args[0].value; + + bpf_printk("read %llu %llu", fri->fh, fri->offset); + if (fri->fh == 1 && fri->offset == 0) + return 0; + return FUSE_BPF_BACKING; + } + + case FUSE_GETATTR | FUSE_PREFILTER: { + /* real and partial use backing file */ + int backing = 0; + + switch (fa->nodeid) { + case 1: + case 5: + case 6: + /* + * TODO: Find better solution + * Add 100 to stop clang compiling to jump table which bpf hates + */ + case 100: + backing = FUSE_BPF_BACKING; + break; + } + + bpf_printk("getattr %d %d", fa->nodeid, backing); + return backing; + } + + case FUSE_SETATTR | FUSE_PREFILTER: { + /* real and partial use backing file */ + int backing = 0; + + switch (fa->nodeid) { + case 1: + case 5: + case 6: + /* TODO See above */ + case 100: + backing = FUSE_BPF_BACKING; + break; + } + + bpf_printk("setattr %d %d", fa->nodeid, backing); + return backing; + } + + case FUSE_OPENDIR | FUSE_PREFILTER: { + int backing = 0; + + switch (fa->nodeid) { + case 1: + backing = FUSE_BPF_BACKING | FUSE_BPF_POST_FILTER; + break; + } + + bpf_printk("opendir %d %d", fa->nodeid, backing); + return backing; + } + + case FUSE_OPENDIR | FUSE_POSTFILTER: { + struct fuse_open_out *foo = fa->out_args[0].value; + + foo->fh = 2; + bpf_printk("opendir postfilter"); + return 0; + } + + case FUSE_READDIR | FUSE_PREFILTER: { + const struct fuse_read_in *fri = fa->in_args[0].value; + int backing = 0; + + if (fri->fh == 2) + backing = FUSE_BPF_BACKING | FUSE_BPF_POST_FILTER; + + bpf_printk("readdir %d %d", fri->fh, backing); + return backing; + } + + case FUSE_READDIR | FUSE_POSTFILTER: { + const struct fuse_read_in *fri = fa->in_args[0].value; + int backing = 0; + + if (fri->fh == 2) + backing = FUSE_BPF_USER_FILTER | FUSE_BPF_BACKING | + FUSE_BPF_POST_FILTER; + + bpf_printk("readdir postfilter %d %d", fri->fh, backing); + return backing; + } + + case FUSE_FLUSH | FUSE_PREFILTER: { + const struct fuse_flush_in *ffi = fa->in_args[0].value; + + bpf_printk("Flush %d", ffi->fh); + return FUSE_BPF_BACKING; + } + + case FUSE_GETXATTR | FUSE_PREFILTER: { + const struct fuse_flush_in *ffi = fa->in_args[0].value; + const char *name = fa->in_args[1].value; + + bpf_printk("getxattr %d %s", ffi->fh, name); + return FUSE_BPF_BACKING; + } + + case FUSE_LISTXATTR | FUSE_PREFILTER: { + const struct fuse_flush_in *ffi = fa->in_args[0].value; + const char *name = fa->in_args[1].value; + + bpf_printk("listxattr %d %s", ffi->fh, name); + return FUSE_BPF_BACKING; + } + + case FUSE_SETXATTR | FUSE_PREFILTER: { + const struct fuse_flush_in *ffi = fa->in_args[0].value; + const char *name = fa->in_args[1].value; + unsigned int size = fa->in_args[2].size; + + bpf_printk("setxattr %d %s %u", ffi->fh, name, size); + return FUSE_BPF_BACKING; + } + + case FUSE_REMOVEXATTR | FUSE_PREFILTER: { + const char *name = fa->in_args[0].value; + + bpf_printk("removexattr %s", name); + return FUSE_BPF_BACKING; + } + + case FUSE_CANONICAL_PATH | FUSE_PREFILTER: { + bpf_printk("canonical_path"); + return FUSE_BPF_BACKING; + } + + case FUSE_STATFS | FUSE_PREFILTER: { + bpf_printk("statfs"); + return FUSE_BPF_BACKING; + } + + case FUSE_LSEEK | FUSE_PREFILTER: { + const struct fuse_lseek_in *fli = fa->in_args[0].value; + + bpf_printk("lseek type:%d, offset:%lld", fli->whence, fli->offset); + return FUSE_BPF_BACKING; + } + + default: + bpf_printk("Unknown opcode %d", fa->opcode); + return 0; + } +} + +SEC("test_hidden") + +int trace_hidden(struct fuse_bpf_args *fa) +{ + switch (fa->opcode) { + case FUSE_LOOKUP | FUSE_PREFILTER: { + const char *name = fa->in_args[0].value; + + bpf_printk("Lookup: %s", name); + if (!strcmp(name, "show")) + return FUSE_BPF_BACKING; + if (!strcmp(name, "hide")) + return -ENOENT; + + return FUSE_BPF_BACKING; + } + + case FUSE_ACCESS | FUSE_PREFILTER: { + bpf_printk("Access: %d", fa->nodeid); + return FUSE_BPF_BACKING; + } + + case FUSE_CREATE | FUSE_PREFILTER: + bpf_printk("Create: %d", fa->nodeid); + return FUSE_BPF_BACKING; + + case FUSE_WRITE | FUSE_PREFILTER: + // TODO: Clang combines similar printk calls, causing BPF to complain + // bpf_printk("Write: %d", fa->nodeid); + return FUSE_BPF_BACKING; + + case FUSE_FLUSH | FUSE_PREFILTER: { + // const struct fuse_flush_in *ffi = fa->in_args[0].value; + + // bpf_printk("Flush %d", ffi->fh); + return FUSE_BPF_BACKING; + } + + case FUSE_RELEASE | FUSE_PREFILTER: { + // const struct fuse_release_in *fri = fa->in_args[0].value; + + // bpf_printk("Release %d", fri->fh); + return FUSE_BPF_BACKING; + } + + case FUSE_FALLOCATE | FUSE_PREFILTER: + // bpf_printk("fallocate %d", fa->nodeid); + return FUSE_BPF_BACKING; + + case FUSE_CANONICAL_PATH | FUSE_PREFILTER: { + return FUSE_BPF_BACKING; + } + default: + bpf_printk("Unknown opcode: %d", fa->opcode); + return 0; + } +} + +SEC("test_simple") +int trace_simple(struct fuse_bpf_args *fa) +{ + if (fa->opcode & FUSE_PREFILTER) + bpf_printk("prefilter opcode: %d", + fa->opcode & FUSE_OPCODE_FILTER); + else if (fa->opcode & FUSE_POSTFILTER) + bpf_printk("postfilter opcode: %d", + fa->opcode & FUSE_OPCODE_FILTER); + else + bpf_printk("*** UNKNOWN *** opcode: %d", fa->opcode); + return FUSE_BPF_BACKING; +} + +SEC("test_passthrough") +int trace_daemon(struct fuse_bpf_args *fa) +{ + switch (fa->opcode) { + case FUSE_LOOKUP | FUSE_PREFILTER: { + const char *name = fa->in_args[0].value; + + bpf_printk("Lookup prefilter: %lx %s", fa->nodeid, name); + return FUSE_BPF_BACKING | FUSE_BPF_POST_FILTER; + } + + case FUSE_LOOKUP | FUSE_POSTFILTER: { + const char *name = fa->in_args[0].value; + struct fuse_entry_bpf_out *febo = fa->out_args[1].value; + + bpf_printk("Lookup postfilter: %lx %s %lu", fa->nodeid, name); + febo->bpf_action = FUSE_ACTION_REMOVE; + + return FUSE_BPF_USER_FILTER; + } + + default: + if (fa->opcode & FUSE_PREFILTER) + bpf_printk("prefilter opcode: %d", + fa->opcode & FUSE_OPCODE_FILTER); + else if (fa->opcode & FUSE_POSTFILTER) + bpf_printk("postfilter opcode: %d", + fa->opcode & FUSE_OPCODE_FILTER); + else + bpf_printk("*** UNKNOWN *** opcode: %d", fa->opcode); + return FUSE_BPF_BACKING; + } +} + +SEC("test_error") + +/* return FUSE_BPF_BACKING to use backing fs, 0 to pass to usermode */ +int error_test(struct fuse_bpf_args *fa) +{ + switch (fa->opcode) { + case FUSE_MKDIR | FUSE_PREFILTER: { + bpf_printk("mkdir"); + return FUSE_BPF_BACKING | FUSE_BPF_POST_FILTER; + } + case FUSE_MKDIR | FUSE_POSTFILTER: { + bpf_printk("mkdir postfilter"); + if (fa->error_in == -EEXIST) + return -EPERM; + + return 0; + } + + case FUSE_LOOKUP | FUSE_PREFILTER: { + const char *name = fa->in_args[0].value; + + bpf_printk("lookup prefilter %s", name); + return FUSE_BPF_BACKING | FUSE_BPF_POST_FILTER; + } + case FUSE_LOOKUP | FUSE_POSTFILTER: { + const char *name = fa->in_args[0].value; + + bpf_printk("lookup postfilter %s %d", name, fa->error_in); + if (strcmp(name, "doesnotexist") == 0/* && fa->error_in == -EEXIST*/) { + bpf_printk("lookup postfilter doesnotexist"); + return FUSE_BPF_USER_FILTER; + } + bpf_printk("meh"); + return 0; + } + + default: + if (fa->opcode & FUSE_PREFILTER) + bpf_printk("prefilter opcode: %d", + fa->opcode & FUSE_OPCODE_FILTER); + else if (fa->opcode & FUSE_POSTFILTER) + bpf_printk("postfilter opcode: %d", + fa->opcode & FUSE_OPCODE_FILTER); + else + bpf_printk("*** UNKNOWN *** opcode: %d", fa->opcode); + return FUSE_BPF_BACKING; + } +} + diff --git a/tools/testing/selftests/filesystems/fuse/test_framework.h b/tools/testing/selftests/filesystems/fuse/test_framework.h new file mode 100644 index 0000000000000000000000000000000000000000..945f141e9a667a76cf01d874a3d4ac67aee36964 --- /dev/null +++ b/tools/testing/selftests/filesystems/fuse/test_framework.h @@ -0,0 +1,175 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2021 Google LLC + */ + +#ifndef _TEST_FRAMEWORK_H +#define _TEST_FRAMEWORK_H + +#include +#include + +#ifdef __ANDROID__ +static int test_case_pass; +static int test_case_fail; +#define ksft_print_msg printf +#define ksft_test_result_pass(...) ({test_case_pass++; printf(__VA_ARGS__); }) +#define ksft_test_result_fail(...) ({test_case_fail++; printf(__VA_ARGS__); }) +#define ksft_exit_fail_msg(...) printf(__VA_ARGS__) +#define ksft_print_header() +#define ksft_set_plan(cnt) +#define ksft_get_fail_cnt() test_case_fail +#define ksft_exit_pass() 0 +#define ksft_exit_fail() 1 +#else +#include +#endif + +#define TEST_FAILURE 1 +#define TEST_SUCCESS 0 + +#define ptr_to_u64(p) ((__u64)p) + +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#define le16_to_cpu(x) (x) +#define le32_to_cpu(x) (x) +#define le64_to_cpu(x) (x) +#else +#error Big endian not supported! +#endif + +struct _test_options { + int file; + int test; + bool verbose; +}; + +extern struct _test_options test_options; + +#define TESTCOND(condition) \ + do { \ + if (!(condition)) { \ + ksft_print_msg("%s failed %d\n", \ + __func__, __LINE__); \ + goto out; \ + } else if (test_options.verbose) \ + ksft_print_msg("%s succeeded %d\n", \ + __func__, __LINE__); \ + } while (false) + +#define TESTCONDERR(condition) \ + do { \ + if (!(condition)) { \ + ksft_print_msg("%s failed %d\n", \ + __func__, __LINE__); \ + ksft_print_msg("Error %d (\"%s\")\n", \ + errno, strerror(errno)); \ + goto out; \ + } else if (test_options.verbose) \ + ksft_print_msg("%s succeeded %d\n", \ + __func__, __LINE__); \ + } while (false) + +#define TEST(statement, condition) \ + do { \ + statement; \ + TESTCOND(condition); \ + } while (false) + +#define TESTERR(statement, condition) \ + do { \ + statement; \ + TESTCONDERR(condition); \ + } while (false) + +enum _operator { + _eq, + _ne, + _ge, +}; + +static const char * const _operator_name[] = { + "==", + "!=", + ">=", +}; + +#define _TEST_OPERATOR(name, _type, format_specifier) \ +static inline int _test_operator_##name(const char *func, int line, \ + _type a, _type b, enum _operator o) \ +{ \ + bool pass; \ + switch (o) { \ + case _eq: pass = a == b; break; \ + case _ne: pass = a != b; break; \ + case _ge: pass = a >= b; break; \ + } \ + \ + if (!pass) \ + ksft_print_msg("Failed: %s at line %d, " \ + format_specifier " %s " \ + format_specifier "\n", \ + func, line, a, _operator_name[o], b); \ + else if (test_options.verbose) \ + ksft_print_msg("Passed: %s at line %d, " \ + format_specifier " %s " \ + format_specifier "\n", \ + func, line, a, _operator_name[o], b); \ + \ + return pass ? TEST_SUCCESS : TEST_FAILURE; \ +} + +_TEST_OPERATOR(i, int, "%d") +_TEST_OPERATOR(ui, unsigned int, "%u") +_TEST_OPERATOR(lui, unsigned long, "%lu") +_TEST_OPERATOR(ss, ssize_t, "%zd") +_TEST_OPERATOR(vp, void *, "%px") +_TEST_OPERATOR(cp, char *, "%px") + +#define _CALL_TO(_type, name, a, b, o) \ + _type:_test_operator_##name(__func__, __LINE__, \ + (_type) (long long) (a), \ + (_type) (long long) (b), o) + +#define TESTOPERATOR(a, b, o) \ + do { \ + if (_Generic((a), \ + _CALL_TO(int, i, a, b, o), \ + _CALL_TO(unsigned int, ui, a, b, o), \ + _CALL_TO(unsigned long, lui, a, b, o), \ + _CALL_TO(ssize_t, ss, a, b, o), \ + _CALL_TO(void *, vp, a, b, o), \ + _CALL_TO(char *, cp, a, b, o) \ + )) \ + goto out; \ + } while (false) + +#define TESTEQUAL(a, b) TESTOPERATOR(a, b, _eq) +#define TESTNE(a, b) TESTOPERATOR(a, b, _ne) +#define TESTGE(a, b) TESTOPERATOR(a, b, _ge) + +/* For testing a syscall that returns 0 on success and sets errno otherwise */ +#define TESTSYSCALL(statement) TESTCONDERR((statement) == 0) + +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0])) + +static inline void print_bytes(const void *data, size_t size) +{ + const char *bytes = data; + int i; + + for (i = 0; i < size; ++i) { + if (i % 0x10 == 0) + printf("%08x:", i); + printf("%02x ", (unsigned int) (unsigned char) bytes[i]); + if (i % 0x10 == 0x0f) + printf("\n"); + } + + if (i % 0x10 != 0) + printf("\n"); +} + + + +#endif diff --git a/tools/testing/selftests/filesystems/fuse/test_fuse.h b/tools/testing/selftests/filesystems/fuse/test_fuse.h new file mode 100644 index 0000000000000000000000000000000000000000..2382d16edcc598ddcb726ad76690459493e06701 --- /dev/null +++ b/tools/testing/selftests/filesystems/fuse/test_fuse.h @@ -0,0 +1,320 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2021 Google LLC + */ + +#ifndef TEST_FUSE__H +#define TEST_FUSE__H + +#define _GNU_SOURCE + +#include "test_framework.h" + +#include +#include +#include +#include + +#define PAGE_SIZE 4096 +#define FUSE_POSTFILTER 0x20000 + +extern struct _test_options test_options; + +/* Slow but semantically easy string functions */ + +/* + * struct s just wraps a char pointer + * It is a pointer to a malloc'd string, or null + * All consumers handle null input correctly + * All consumers free the string + */ +struct s { + char *s; +}; + +struct s s(const char *s1); +struct s sn(const char *s1, const char *s2); +int s_cmp(struct s s1, struct s s2); +struct s s_cat(struct s s1, struct s s2); +struct s s_splitleft(struct s s1, char c); +struct s s_splitright(struct s s1, char c); +struct s s_word(struct s s1, char c, size_t n); +struct s s_path(struct s s1, struct s s2); +struct s s_pathn(size_t n, struct s s1, ...); +int s_link(struct s src_pathname, struct s dst_pathname); +int s_symlink(struct s src_pathname, struct s dst_pathname); +int s_mkdir(struct s pathname, mode_t mode); +int s_rmdir(struct s pathname); +int s_unlink(struct s pathname); +int s_open(struct s pathname, int flags, ...); +int s_openat(int dirfd, struct s pathname, int flags, ...); +int s_creat(struct s pathname, mode_t mode); +int s_mkfifo(struct s pathname, mode_t mode); +int s_stat(struct s pathname, struct stat *st); +int s_statfs(struct s pathname, struct statfs *st); +DIR *s_opendir(struct s pathname); +int s_getxattr(struct s pathname, const char name[], void *value, size_t size, + ssize_t *ret_size); +int s_listxattr(struct s pathname, void *list, size_t size, ssize_t *ret_size); +int s_setxattr(struct s pathname, const char name[], const void *value, + size_t size, int flags); +int s_removexattr(struct s pathname, const char name[]); +int s_rename(struct s oldpathname, struct s newpathname); + +struct s tracing_folder(void); +int tracing_on(void); + +char *concat_file_name(const char *dir, const char *file); +char *setup_mount_dir(const char *name); +int delete_dir_tree(const char *dir_path, bool remove_root); + +#define TESTFUSEINNULL(_opcode) \ + do { \ + struct fuse_in_header *in_header = \ + (struct fuse_in_header *)bytes_in; \ + ssize_t res = read(fuse_dev, &bytes_in, \ + sizeof(bytes_in)); \ + \ + TESTEQUAL(in_header->opcode, _opcode); \ + TESTEQUAL(res, sizeof(*in_header)); \ + } while (false) + +#define TESTFUSEIN(_opcode, in_struct) \ + do { \ + struct fuse_in_header *in_header = \ + (struct fuse_in_header *)bytes_in; \ + ssize_t res = read(fuse_dev, &bytes_in, \ + sizeof(bytes_in)); \ + \ + TESTEQUAL(in_header->opcode, _opcode); \ + TESTEQUAL(res, sizeof(*in_header) + sizeof(*in_struct));\ + } while (false) + +#define TESTFUSEIN2(_opcode, in_struct1, in_struct2) \ + do { \ + struct fuse_in_header *in_header = \ + (struct fuse_in_header *)bytes_in; \ + ssize_t res = read(fuse_dev, &bytes_in, \ + sizeof(bytes_in)); \ + \ + TESTEQUAL(in_header->opcode, _opcode); \ + TESTEQUAL(res, sizeof(*in_header) + sizeof(*in_struct1) \ + + sizeof(*in_struct2)); \ + in_struct1 = (void *)(bytes_in + sizeof(*in_header)); \ + in_struct2 = (void *)(bytes_in + sizeof(*in_header) \ + + sizeof(*in_struct1)); \ + } while (false) + +#define TESTFUSEINEXT(_opcode, in_struct, extra) \ + do { \ + struct fuse_in_header *in_header = \ + (struct fuse_in_header *)bytes_in; \ + ssize_t res = read(fuse_dev, &bytes_in, \ + sizeof(bytes_in)); \ + \ + TESTEQUAL(in_header->opcode, _opcode); \ + TESTEQUAL(res, \ + sizeof(*in_header) + sizeof(*in_struct) + extra);\ + } while (false) + +#define TESTFUSEINUNKNOWN() \ + do { \ + struct fuse_in_header *in_header = \ + (struct fuse_in_header *)bytes_in; \ + ssize_t res = read(fuse_dev, &bytes_in, \ + sizeof(bytes_in)); \ + \ + TESTGE(res, sizeof(*in_header)); \ + TESTEQUAL(in_header->opcode, -1); \ + } while (false) + +/* Special case lookup since it is asymmetric */ +#define TESTFUSELOOKUP(expected, filter) \ + do { \ + struct fuse_in_header *in_header = \ + (struct fuse_in_header *)bytes_in; \ + char *name = (char *) (bytes_in + sizeof(*in_header)); \ + ssize_t res; \ + \ + TEST(res = read(fuse_dev, &bytes_in, sizeof(bytes_in)), \ + res != -1); \ + /* TODO once we handle forgets properly, remove */ \ + if (in_header->opcode == FUSE_FORGET) \ + continue; \ + if (in_header->opcode == FUSE_BATCH_FORGET) \ + continue; \ + TESTGE(res, sizeof(*in_header)); \ + TESTEQUAL(in_header->opcode, \ + FUSE_LOOKUP | filter); \ + TESTEQUAL(res, \ + sizeof(*in_header) + strlen(expected) + 1 + \ + (filter == FUSE_POSTFILTER ? \ + sizeof(struct fuse_entry_out) + \ + sizeof(struct fuse_entry_bpf_out) : 0));\ + TESTCOND(!strcmp(name, expected)); \ + break; \ + } while (true) + +#define TESTFUSEOUTEMPTY() \ + do { \ + struct fuse_in_header *in_header = \ + (struct fuse_in_header *)bytes_in; \ + struct fuse_out_header *out_header = \ + (struct fuse_out_header *)bytes_out; \ + \ + *out_header = (struct fuse_out_header) { \ + .len = sizeof(*out_header), \ + .unique = in_header->unique, \ + }; \ + TESTEQUAL(write(fuse_dev, bytes_out, out_header->len), \ + out_header->len); \ + } while (false) + +#define TESTFUSEOUTERROR(errno) \ + do { \ + struct fuse_in_header *in_header = \ + (struct fuse_in_header *)bytes_in; \ + struct fuse_out_header *out_header = \ + (struct fuse_out_header *)bytes_out; \ + \ + *out_header = (struct fuse_out_header) { \ + .len = sizeof(*out_header), \ + .error = errno, \ + .unique = in_header->unique, \ + }; \ + TESTEQUAL(write(fuse_dev, bytes_out, out_header->len), \ + out_header->len); \ + } while (false) + +#define TESTFUSEOUTREAD(data, length) \ + do { \ + struct fuse_in_header *in_header = \ + (struct fuse_in_header *)bytes_in; \ + struct fuse_out_header *out_header = \ + (struct fuse_out_header *)bytes_out; \ + \ + *out_header = (struct fuse_out_header) { \ + .len = sizeof(*out_header) + length, \ + .unique = in_header->unique, \ + }; \ + memcpy(bytes_out + sizeof(*out_header), data, length); \ + TESTEQUAL(write(fuse_dev, bytes_out, out_header->len), \ + out_header->len); \ + } while (false) + +#define TESTFUSEDIROUTREAD(read_out, data, length) \ + do { \ + struct fuse_in_header *in_header = \ + (struct fuse_in_header *)bytes_in; \ + struct fuse_out_header *out_header = \ + (struct fuse_out_header *)bytes_out; \ + \ + *out_header = (struct fuse_out_header) { \ + .len = sizeof(*out_header) + \ + sizeof(*read_out) + length, \ + .unique = in_header->unique, \ + }; \ + memcpy(bytes_out + sizeof(*out_header) + \ + sizeof(*read_out), data, length); \ + memcpy(bytes_out + sizeof(*out_header), \ + read_out, sizeof(*read_out)); \ + TESTEQUAL(write(fuse_dev, bytes_out, out_header->len), \ + out_header->len); \ + } while (false) + +#define TESTFUSEOUT1(type1, obj1) \ + do { \ + *(struct fuse_out_header *) bytes_out \ + = (struct fuse_out_header) { \ + .len = sizeof(struct fuse_out_header) \ + + sizeof(struct type1), \ + .unique = ((struct fuse_in_header *) \ + bytes_in)->unique, \ + }; \ + *(struct type1 *) (bytes_out \ + + sizeof(struct fuse_out_header)) \ + = obj1; \ + TESTEQUAL(write(fuse_dev, bytes_out, \ + ((struct fuse_out_header *)bytes_out)->len), \ + ((struct fuse_out_header *)bytes_out)->len); \ + } while (false) + +#define TESTFUSEOUT2(type1, obj1, type2, obj2) \ + do { \ + *(struct fuse_out_header *) bytes_out \ + = (struct fuse_out_header) { \ + .len = sizeof(struct fuse_out_header) \ + + sizeof(struct type1) \ + + sizeof(struct type2), \ + .unique = ((struct fuse_in_header *) \ + bytes_in)->unique, \ + }; \ + *(struct type1 *) (bytes_out \ + + sizeof(struct fuse_out_header)) \ + = obj1; \ + *(struct type2 *) (bytes_out \ + + sizeof(struct fuse_out_header) \ + + sizeof(struct type1)) \ + = obj2; \ + TESTEQUAL(write(fuse_dev, bytes_out, \ + ((struct fuse_out_header *)bytes_out)->len), \ + ((struct fuse_out_header *)bytes_out)->len); \ + } while (false) + +#define TESTFUSEINIT() \ + do { \ + DECL_FUSE_IN(init); \ + \ + TESTFUSEIN(FUSE_INIT, init_in); \ + TESTEQUAL(init_in->major, FUSE_KERNEL_VERSION); \ + TESTEQUAL(init_in->minor, FUSE_KERNEL_MINOR_VERSION); \ + TESTFUSEOUT1(fuse_init_out, ((struct fuse_init_out) { \ + .major = FUSE_KERNEL_VERSION, \ + .minor = FUSE_KERNEL_MINOR_VERSION, \ + .max_readahead = 4096, \ + .flags = 0, \ + .max_background = 0, \ + .congestion_threshold = 0, \ + .max_write = 4096, \ + .time_gran = 1000, \ + .max_pages = 12, \ + .map_alignment = 4096, \ + })); \ + } while (false) + +#define DECL_FUSE_IN(name) \ + struct fuse_##name##_in *name##_in = \ + (struct fuse_##name##_in *) \ + (bytes_in + sizeof(struct fuse_in_header)) + +#define DECL_FUSE(name) \ + struct fuse_##name##_in *name##_in __attribute__((unused)); \ + struct fuse_##name##_out *name##_out __attribute__((unused)) + +#define FUSE_ACTION TEST(pid = fork(), pid != -1); \ + if (pid) { + +#define FUSE_DAEMON } else { \ + uint8_t bytes_in[FUSE_MIN_READ_BUFFER] \ + __attribute__((unused)); \ + uint8_t bytes_out[FUSE_MIN_READ_BUFFER] \ + __attribute__((unused)); + +#define FUSE_DONE exit(TEST_SUCCESS); \ + } \ + TESTEQUAL(waitpid(pid, &status, 0), pid); \ + TESTEQUAL(status, TEST_SUCCESS); + +struct map_relocation { + char *name; + int fd; + int value; +}; + +int mount_fuse(const char *mount_dir, int bpf_fd, int dir_fd, + int *fuse_dev_ptr); +int mount_fuse_no_init(const char *mount_dir, int bpf_fd, int dir_fd, + int *fuse_dev_ptr); +int install_elf_bpf(const char *file, const char *section, int *fd, + struct map_relocation **map_relocations, size_t *map_count); +#endif diff --git a/tools/testing/selftests/filesystems/incfs/Makefile b/tools/testing/selftests/filesystems/incfs/Makefile index f3798029247a37af19d647328766b82895ca5d1b..5a2f6301c4b27173176e533149667e524dfa12b7 100644 --- a/tools/testing/selftests/filesystems/incfs/Makefile +++ b/tools/testing/selftests/filesystems/incfs/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 -CFLAGS += -D_FILE_OFFSET_BITS=64 -Wall -Werror -I../.. -I../../../../.. -LDLIBS := -llz4 -lzstd -lcrypto -lpthread +CFLAGS += -D_FILE_OFFSET_BITS=64 -Wall -Werror -I../.. -I../../../../.. -fno-omit-frame-pointer -fsanitize=address -g +LDLIBS := -llz4 -lzstd -lcrypto -lpthread -fsanitize=address TEST_GEN_PROGS := incfs_test incfs_stress incfs_perf include ../../lib.mk diff --git a/tools/testing/selftests/filesystems/incfs/incfs_test.c b/tools/testing/selftests/filesystems/incfs/incfs_test.c index 4c30aec3f5455d9e3c6370de0f8581dbfe1e3805..b3773bd1df91f02840a190d40895bb6c32719dcb 100644 --- a/tools/testing/selftests/filesystems/incfs/incfs_test.c +++ b/tools/testing/selftests/filesystems/incfs/incfs_test.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -43,6 +44,7 @@ #define FS_IOC_GETFLAGS _IOR('f', 1, long) #define FS_VERITY_FL 0x00100000 /* Verity protected inode */ +#define TEST_SKIP 2 #define TEST_FAILURE 1 #define TEST_SUCCESS 0 @@ -2793,14 +2795,16 @@ failure: return TEST_FAILURE; } +#define THREE_GB (3LL * 1024 * 1024 * 1024) +#define FOUR_GB (4LL * 1024 * 1024 * 1024) /* Have 1GB of margin */ static int large_file_test(const char *mount_dir) { char *backing_dir; int cmd_fd = -1; int i; - int result = TEST_FAILURE; + int result = TEST_FAILURE, ret; uint8_t data[INCFS_DATA_FILE_BLOCK_SIZE] = {}; - int block_count = 3LL * 1024 * 1024 * 1024 / INCFS_DATA_FILE_BLOCK_SIZE; + int block_count = THREE_GB / INCFS_DATA_FILE_BLOCK_SIZE; struct incfs_fill_block *block_buf = calloc(block_count, sizeof(struct incfs_fill_block)); struct incfs_fill_blocks fill_blocks = { @@ -2809,6 +2813,22 @@ static int large_file_test(const char *mount_dir) }; incfs_uuid_t id; int fd = -1; + struct statvfs svfs; + unsigned long long free_disksz; + + ret = statvfs(mount_dir, &svfs); + if (ret) { + ksft_print_msg("Can't get disk size. Skipping %s...\n", __func__); + return TEST_SKIP; + } + + free_disksz = (unsigned long long)svfs.f_bavail * svfs.f_bsize; + + if (FOUR_GB > free_disksz) { + ksft_print_msg("Not enough free disk space (%lldMB). Skipping %s...\n", + free_disksz >> 20, __func__); + return TEST_SKIP; + } backing_dir = create_backing_dir(mount_dir); if (!backing_dir) @@ -2849,6 +2869,7 @@ static int large_file_test(const char *mount_dir) failure: close(fd); close(cmd_fd); + unlink("very_large_file"); umount(mount_dir); free(backing_dir); return result; @@ -4409,6 +4430,7 @@ static int sysfs_test(const char *mount_dir) int fd = -1; int pid = -1; char buffer[32]; + char *null_buf = NULL; int status; struct incfs_per_uid_read_timeouts purt_set[] = { { @@ -4437,13 +4459,13 @@ static int sysfs_test(const char *mount_dir) TEST(fd = open(filename, O_RDONLY | O_CLOEXEC), fd != -1); TESTEQUAL(ioctl_test_last_error(cmd_fd, NULL, 0, 0), 0); TESTEQUAL(sysfs_test_value("reads_failed_timed_out", 0), 0); - TEST(read(fd, NULL, 1), -1); + TESTEQUAL(read(fd, null_buf, 1), -1); TESTEQUAL(ioctl_test_last_error(cmd_fd, &file.id, 0, -ETIME), 0); TESTEQUAL(sysfs_test_value("reads_failed_timed_out", 2), 0); TESTEQUAL(emit_test_file_data(mount_dir, &file), 0); TESTEQUAL(sysfs_test_value("reads_failed_hash_verification", 0), 0); - TESTEQUAL(read(fd, NULL, 1), -1); + TESTEQUAL(read(fd, null_buf, 1), -1); TESTEQUAL(sysfs_test_value("reads_failed_hash_verification", 1), 0); TESTSYSCALL(close(fd)); fd = -1; @@ -4609,6 +4631,29 @@ out: return result; } +static int stacked_mount_test(const char *mount_dir) +{ + int result = TEST_FAILURE; + char *backing_dir = NULL; + + /* Mount with no node */ + TEST(backing_dir = create_backing_dir(mount_dir), backing_dir); + TESTEQUAL(mount_fs(mount_dir, backing_dir, 0), 0); + /* Try mounting another instance with same name */ + TESTEQUAL(mount_fs(mount_dir, backing_dir, 0), 0); + /* Try unmounting the first instance */ + TESTEQUAL(umount_fs(mount_dir), 0); + /* Try unmounting the second instance */ + TESTEQUAL(umount_fs(mount_dir), 0); + result = TEST_SUCCESS; +out: + /* Cleanup */ + rmdir(mount_dir); + rmdir(backing_dir); + free(backing_dir); + return result; +} + static char *setup_mount_dir() { struct stat st; @@ -4664,9 +4709,15 @@ struct test_case { void run_one_test(const char *mount_dir, struct test_case *test_case) { + int ret; + ksft_print_msg("Running %s\n", test_case->name); - if (test_case->pfunc(mount_dir) == TEST_SUCCESS) + ret = test_case->pfunc(mount_dir); + + if (ret == TEST_SUCCESS) ksft_test_result_pass("%s\n", test_case->name); + else if (ret == TEST_SKIP) + ksft_test_result_skip("%s\n", test_case->name); else ksft_test_result_fail("%s\n", test_case->name); } @@ -4730,6 +4781,7 @@ int main(int argc, char *argv[]) MAKE_TEST(stat_test), MAKE_TEST(sysfs_test), MAKE_TEST(sysfs_rename_test), + MAKE_TEST(stacked_mount_test), }; #undef MAKE_TEST diff --git a/tools/testing/selftests/filesystems/incfs/utils.c b/tools/testing/selftests/filesystems/incfs/utils.c index e60b0d36f49dfcda9295ddc9ea776a5687c41eee..d7deb5321c3e2af926c4d02bd9319f12bced2fbe 100644 --- a/tools/testing/selftests/filesystems/incfs/utils.c +++ b/tools/testing/selftests/filesystems/incfs/utils.c @@ -81,6 +81,16 @@ int mount_fs(const char *mount_dir, const char *backing_dir, return result; } +int umount_fs(const char *mount_dir) +{ + int result; + + result = umount(mount_dir); + if (result != 0) + perror("Error unmounting fs."); + return result; +} + int mount_fs_opt(const char *mount_dir, const char *backing_dir, const char *opt, bool remount) { @@ -116,6 +126,9 @@ size_t format_signature(void **buf, const char *root_hash, const char *add_data) size_t size = sizeof(struct signature_blob) + strlen(add_data) + 1; struct signature_blob *sb = malloc(size); + if (!sb) + return 0; + *sb = (struct signature_blob){ .version = INCFS_SIGNATURE_VERSION, .hash_section_size = sizeof(struct hash_section), @@ -126,7 +139,7 @@ size_t format_signature(void **buf, const char *root_hash, const char *add_data) .salt_size = 0, .hash_size = SHA256_DIGEST_SIZE, }, - .signing_section_size = sizeof(uint32_t) + strlen(add_data) + 1, + .signing_section_size = strlen(add_data) + 1, }; memcpy(sb->hash_section.hash, root_hash, SHA256_DIGEST_SIZE); diff --git a/tools/testing/selftests/filesystems/incfs/utils.h b/tools/testing/selftests/filesystems/incfs/utils.h index f5ed8dc5f0ff2820cf6a4c7d24f5c72039c59197..17a1ac5eef7d80c1596a4ba5afd324d4e264d38f 100644 --- a/tools/testing/selftests/filesystems/incfs/utils.h +++ b/tools/testing/selftests/filesystems/incfs/utils.h @@ -29,6 +29,8 @@ int drop_caches(void); int mount_fs(const char *mount_dir, const char *backing_dir, int read_timeout_ms); +int umount_fs(const char *mount_dir); + int mount_fs_opt(const char *mount_dir, const char *backing_dir, const char *opt, bool remount); diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc b/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc index 98166fa3eb91ccbbef854d9ad8c5c5a605b594f9..34fb89b0c61fa35bcc55c81ee2cbdd8f274af492 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc @@ -1,6 +1,6 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 -# description: Kprobe dynamic event - adding and removing +# description: Kprobe profile # requires: kprobe_events ! grep -q 'myevent' kprobe_profile diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-expressions.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-expressions.tc new file mode 100644 index 0000000000000000000000000000000000000000..05ffba299dbf2f68b9b99353a523b8082af60930 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-expressions.tc @@ -0,0 +1,63 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: event trigger - test histogram expression parsing +# requires: set_event events/sched/sched_process_fork/trigger events/sched/sched_process_fork/hist error_log "=":README + + +fail() { #msg + echo $1 + exit_fail +} + +test_hist_expr() { # test_name expression expected_val + trigger="events/sched/sched_process_fork/trigger" + + reset_trigger_file $trigger + + echo "Test hist trigger expressions - $1" + + echo "hist:keys=common_pid:x=$2" > $trigger + + for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done + + actual=`grep -o 'x=[[:digit:]]*' $trigger | awk -F= '{ print $2 }'` + + if [ $actual != $3 ]; then + fail "Failed hist trigger expression evaluation: Expression: $2 Expected: $3, Actual: $actual" + fi + + reset_trigger_file $trigger +} + +check_error() { # test_name command-with-error-pos-by-^ + trigger="events/sched/sched_process_fork/trigger" + + echo "Test hist trigger expressions - $1" + ftrace_errlog_check 'hist:sched:sched_process_fork' "$2" $trigger +} + +test_hist_expr "Variable assignment" "123" "123" + +test_hist_expr "Subtraction not associative" "16-8-4-2" "2" + +test_hist_expr "Division not associative" "64/8/4/2" "1" + +test_hist_expr "Same precedence operators (+,-) evaluated left to right" "16-8+4+2" "14" + +test_hist_expr "Same precedence operators (*,/) evaluated left to right" "4*3/2*2" "12" + +test_hist_expr "Multiplication evaluated before addition/subtraction" "4+3*2-2" "8" + +test_hist_expr "Division evaluated before addition/subtraction" "4+6/2-2" "5" + +# err pos for "too many subexpressions" is dependent on where +# the last subexpression was detected. This can vary depending +# on how the expression tree was generated. +check_error "Too many subexpressions" 'hist:keys=common_pid:x=32+^10*3/20-4' +check_error "Too many subexpressions" 'hist:keys=common_pid:x=^1+2+3+4+5' + +check_error "Unary minus not supported in subexpression" 'hist:keys=common_pid:x=-(^1)+2' + +check_error "Division by zero" 'hist:keys=common_pid:x=3/^0' + +exit 0 diff --git a/tools/testing/selftests/futex/Makefile b/tools/testing/selftests/futex/Makefile index 12631f0076a10274dff566228b05748e9a217be0..11e157d7533b8fbaf5ac691b3df7057e0ff558c1 100644 --- a/tools/testing/selftests/futex/Makefile +++ b/tools/testing/selftests/futex/Makefile @@ -11,7 +11,7 @@ all: @for DIR in $(SUBDIRS); do \ BUILD_TARGET=$(OUTPUT)/$$DIR; \ mkdir $$BUILD_TARGET -p; \ - make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\ + $(MAKE) OUTPUT=$$BUILD_TARGET -C $$DIR $@;\ if [ -e $$DIR/$(TEST_PROGS) ]; then \ rsync -a $$DIR/$(TEST_PROGS) $$BUILD_TARGET/; \ fi \ @@ -32,6 +32,6 @@ override define CLEAN @for DIR in $(SUBDIRS); do \ BUILD_TARGET=$(OUTPUT)/$$DIR; \ mkdir $$BUILD_TARGET -p; \ - make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\ + $(MAKE) OUTPUT=$$BUILD_TARGET -C $$DIR $@;\ done endef diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index edce85420d1934e3892f5ffe1d5c6d06c0df593f..3e7b2e521cde435333b0c1e5fafbc77c0f2f7dc1 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -871,7 +871,8 @@ static void __timeout_handler(int sig, siginfo_t *info, void *ucontext) } t->timed_out = true; - kill(t->pid, SIGKILL); + // signal process group + kill(-(t->pid), SIGKILL); } void __wait_for_test(struct __test_metadata *t) @@ -965,7 +966,7 @@ void __run_test(struct __fixture_metadata *f, t->passed = 1; t->skip = 0; t->trigger = 0; - t->step = 0; + t->step = 1; t->no_print = 0; memset(t->results->reason, 0, sizeof(t->results->reason)); @@ -981,6 +982,7 @@ void __run_test(struct __fixture_metadata *f, ksft_print_msg("ERROR SPAWNING TEST CHILD\n"); t->passed = 0; } else if (t->pid == 0) { + setpgrp(); t->fn(t, variant); if (t->skip) _exit(255); diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 3d14ef77755e537c504f151decb74ad9a4d35f1c..8470601b89ae84f292076114a37d05b7ed39e5de 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -73,6 +73,7 @@ TEST_GEN_PROGS_aarch64 += dirty_log_test TEST_GEN_PROGS_aarch64 += kvm_create_max_vcpus TEST_GEN_PROGS_aarch64 += set_memory_region_test TEST_GEN_PROGS_aarch64 += steal_time +TEST_PROGS_aarch64 += aarch64/s2mpu.sh TEST_GEN_PROGS_s390x = s390x/memop TEST_GEN_PROGS_s390x += s390x/resets @@ -83,6 +84,7 @@ TEST_GEN_PROGS_s390x += kvm_create_max_vcpus TEST_GEN_PROGS_s390x += set_memory_region_test TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M)) +TEST_PROGS += $(TEST_PROGS_$(UNAME_M)) LIBKVM += $(LIBKVM_$(UNAME_M)) INSTALL_HDR_PATH = $(top_srcdir)/usr diff --git a/tools/testing/selftests/kvm/aarch64/s2mpu.sh b/tools/testing/selftests/kvm/aarch64/s2mpu.sh new file mode 100755 index 0000000000000000000000000000000000000000..e822965e7ed9634d0066edae78805026ed773308 --- /dev/null +++ b/tools/testing/selftests/kvm/aarch64/s2mpu.sh @@ -0,0 +1,4 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# Tests the printf infrastructure using test_printf kernel module. +$(dirname $0)/../kselftest/module.sh "s2mpu" test_kvm_s2mpu diff --git a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c index 0299cd81b8ba246180dbcb1f0229465bb6185de0..aa3795cd7bd3d9b41e92da96bf193c7f8d11c169 100644 --- a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c +++ b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c @@ -12,6 +12,7 @@ #include #include #include +#include #include "test_util.h" @@ -40,10 +41,39 @@ int main(int argc, char *argv[]) { int kvm_max_vcpu_id = kvm_check_cap(KVM_CAP_MAX_VCPU_ID); int kvm_max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS); + /* + * Number of file descriptors reqired, KVM_CAP_MAX_VCPUS for vCPU fds + + * an arbitrary number for everything else. + */ + int nr_fds_wanted = kvm_max_vcpus + 100; + struct rlimit rl; pr_info("KVM_CAP_MAX_VCPU_ID: %d\n", kvm_max_vcpu_id); pr_info("KVM_CAP_MAX_VCPUS: %d\n", kvm_max_vcpus); + /* + * Check that we're allowed to open nr_fds_wanted file descriptors and + * try raising the limits if needed. + */ + TEST_ASSERT(!getrlimit(RLIMIT_NOFILE, &rl), "getrlimit() failed!"); + + if (rl.rlim_cur < nr_fds_wanted) { + rl.rlim_cur = nr_fds_wanted; + if (rl.rlim_max < nr_fds_wanted) { + int old_rlim_max = rl.rlim_max; + rl.rlim_max = nr_fds_wanted; + + int r = setrlimit(RLIMIT_NOFILE, &rl); + if (r < 0) { + printf("RLIMIT_NOFILE hard limit is too low (%d, wanted %d)\n", + old_rlim_max, nr_fds_wanted); + exit(KSFT_SKIP); + } + } else { + TEST_ASSERT(!setrlimit(RLIMIT_NOFILE, &rl), "setrlimit() failed!"); + } + } + /* * Upstream KVM prior to 4.8 does not support KVM_CAP_MAX_VCPU_ID. * Userspace is supposed to use KVM_CAP_MAX_VCPUS as the maximum ID diff --git a/tools/testing/selftests/kvm/lib/x86_64/svm.c b/tools/testing/selftests/kvm/lib/x86_64/svm.c index 3a5c72ed2b79287453224f98fe9cc4689ee82942..a58507a7b5d6db2081c5c703c747b21d193cefb1 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/svm.c +++ b/tools/testing/selftests/kvm/lib/x86_64/svm.c @@ -57,6 +57,18 @@ static void vmcb_set_seg(struct vmcb_seg *seg, u16 selector, seg->base = base; } +/* + * Avoid using memset to clear the vmcb, since libc may not be + * available in L1 (and, even if it is, features that libc memset may + * want to use, like AVX, may not be enabled). + */ +static void clear_vmcb(struct vmcb *vmcb) +{ + int n = sizeof(*vmcb) / sizeof(u32); + + asm volatile ("rep stosl" : "+c"(n), "+D"(vmcb) : "a"(0) : "memory"); +} + void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_rsp) { struct vmcb *vmcb = svm->vmcb; @@ -73,8 +85,8 @@ void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_r wrmsr(MSR_EFER, efer | EFER_SVME); wrmsr(MSR_VM_HSAVE_PA, svm->save_area_gpa); - memset(vmcb, 0, sizeof(*vmcb)); - asm volatile ("vmsave\n\t" : : "a" (vmcb_gpa) : "memory"); + clear_vmcb(vmcb); + asm volatile ("vmsave %0\n\t" : : "a" (vmcb_gpa) : "memory"); vmcb_set_seg(&save->es, get_es(), 0, -1U, data_seg_attr); vmcb_set_seg(&save->cs, get_cs(), 0, -1U, code_seg_attr); vmcb_set_seg(&save->ss, get_ss(), 0, -1U, data_seg_attr); @@ -131,19 +143,19 @@ void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_r void run_guest(struct vmcb *vmcb, uint64_t vmcb_gpa) { asm volatile ( - "vmload\n\t" + "vmload %[vmcb_gpa]\n\t" "mov rflags, %%r15\n\t" // rflags "mov %%r15, 0x170(%[vmcb])\n\t" "mov guest_regs, %%r15\n\t" // rax "mov %%r15, 0x1f8(%[vmcb])\n\t" LOAD_GPR_C - "vmrun\n\t" + "vmrun %[vmcb_gpa]\n\t" SAVE_GPR_C "mov 0x170(%[vmcb]), %%r15\n\t" // rflags "mov %%r15, rflags\n\t" "mov 0x1f8(%[vmcb]), %%r15\n\t" // rax "mov %%r15, guest_regs\n\t" - "vmsave\n\t" + "vmsave %[vmcb_gpa]\n\t" : : [vmcb] "r" (vmcb), [vmcb_gpa] "a" (vmcb_gpa) : "r15", "memory"); } diff --git a/tools/testing/selftests/kvm/steal_time.c b/tools/testing/selftests/kvm/steal_time.c index fcc840088c919a29c06f074e7515336982aa53c3..7daedee3e7ee7f4e42936db0c444e88e91dee34a 100644 --- a/tools/testing/selftests/kvm/steal_time.c +++ b/tools/testing/selftests/kvm/steal_time.c @@ -120,12 +120,12 @@ struct st_time { uint64_t st_time; }; -static int64_t smccc(uint32_t func, uint32_t arg) +static int64_t smccc(uint32_t func, uint64_t arg) { unsigned long ret; asm volatile( - "mov x0, %1\n" + "mov w0, %w1\n" "mov x1, %2\n" "hvc #0\n" "mov %0, x0\n" diff --git a/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c b/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c index e6480fd5c4bdc44c3de0782aa3ed6441e43388b3..9f55ccd169a13745d1897663784bfb7f9801ee09 100644 --- a/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c +++ b/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c @@ -82,8 +82,9 @@ int get_warnings_count(void) FILE *f; f = popen("dmesg | grep \"WARNING:\" | wc -l", "r"); - fscanf(f, "%d", &warnings); - fclose(f); + if (fscanf(f, "%d", &warnings) < 1) + warnings = 0; + pclose(f); return warnings; } diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index 0af84ad48aa77562cfdd45243e79c76ab013e09e..b7217b5251f57cc819a82cbf87f349359e245ef8 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -48,6 +48,7 @@ ARCH ?= $(SUBARCH) # When local build is done, headers are installed in the default # INSTALL_HDR_PATH usr/include. .PHONY: khdr +.NOTPARALLEL: khdr: ifndef KSFT_KHDR_INSTALL_DONE ifeq (1,$(DEFAULT_INSTALL_HDR_PATH)) diff --git a/tools/testing/selftests/memfd/memfd_test.c b/tools/testing/selftests/memfd/memfd_test.c index 334a7eea200428b07d8964555783f9959d028aa0..fba322d1c67a1714d74e84d545087cc0aec955c6 100644 --- a/tools/testing/selftests/memfd/memfd_test.c +++ b/tools/testing/selftests/memfd/memfd_test.c @@ -455,6 +455,7 @@ static void mfd_fail_write(int fd) printf("mmap()+mprotect() didn't fail as expected\n"); abort(); } + munmap(p, mfd_def_size); } /* verify PUNCH_HOLE fails */ diff --git a/tools/testing/selftests/mincore/mincore_selftest.c b/tools/testing/selftests/mincore/mincore_selftest.c index 5a1e85ff5d32a4a54fe5578e655692c15750f454..2cf6f2f277ab8a22951e57f906a4880f5f236480 100644 --- a/tools/testing/selftests/mincore/mincore_selftest.c +++ b/tools/testing/selftests/mincore/mincore_selftest.c @@ -208,15 +208,21 @@ TEST(check_file_mmap) errno = 0; fd = open(".", O_TMPFILE | O_RDWR, 0600); - ASSERT_NE(-1, fd) { - TH_LOG("Can't create temporary file: %s", - strerror(errno)); + if (fd < 0) { + ASSERT_EQ(errno, EOPNOTSUPP) { + TH_LOG("Can't create temporary file: %s", + strerror(errno)); + } + SKIP(goto out_free, "O_TMPFILE not supported by filesystem."); } errno = 0; retval = fallocate(fd, 0, 0, FILE_SIZE); - ASSERT_EQ(0, retval) { - TH_LOG("Error allocating space for the temporary file: %s", - strerror(errno)); + if (retval) { + ASSERT_EQ(errno, EOPNOTSUPP) { + TH_LOG("Error allocating space for the temporary file: %s", + strerror(errno)); + } + SKIP(goto out_close, "fallocate not supported by filesystem."); } /* @@ -272,7 +278,9 @@ TEST(check_file_mmap) } munmap(addr, FILE_SIZE); +out_close: close(fd); +out_free: free(vec); } diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh index 02b0b9ead40b9658c7d838fa868f594ec5795430..ace976d8912528e88b43ab73fae8c1a5671f0015 100755 --- a/tools/testing/selftests/net/fcnal-test.sh +++ b/tools/testing/selftests/net/fcnal-test.sh @@ -436,13 +436,32 @@ cleanup() ip -netns ${NSA} link set dev ${NSA_DEV} down ip -netns ${NSA} link del dev ${NSA_DEV} + ip netns pids ${NSA} | xargs kill 2>/dev/null ip netns del ${NSA} fi + ip netns pids ${NSB} | xargs kill 2>/dev/null ip netns del ${NSB} + ip netns pids ${NSC} | xargs kill 2>/dev/null ip netns del ${NSC} >/dev/null 2>&1 } +cleanup_vrf_dup() +{ + ip link del ${NSA_DEV2} >/dev/null 2>&1 + ip netns pids ${NSC} | xargs kill 2>/dev/null + ip netns del ${NSC} >/dev/null 2>&1 +} + +setup_vrf_dup() +{ + # some VRF tests use ns-C which has the same config as + # ns-B but for a device NOT in the VRF + create_ns ${NSC} "-" "-" + connect_ns ${NSA} ${NSA_DEV2} ${NSA_IP}/24 ${NSA_IP6}/64 \ + ${NSC} ${NSC_DEV} ${NSB_IP}/24 ${NSB_IP6}/64 +} + setup() { local with_vrf=${1} @@ -472,12 +491,6 @@ setup() ip -netns ${NSB} ro add ${VRF_IP}/32 via ${NSA_IP} dev ${NSB_DEV} ip -netns ${NSB} -6 ro add ${VRF_IP6}/128 via ${NSA_IP6} dev ${NSB_DEV} - - # some VRF tests use ns-C which has the same config as - # ns-B but for a device NOT in the VRF - create_ns ${NSC} "-" "-" - connect_ns ${NSA} ${NSA_DEV2} ${NSA_IP}/24 ${NSA_IP6}/64 \ - ${NSC} ${NSC_DEV} ${NSB_IP}/24 ${NSB_IP6}/64 else ip -netns ${NSA} ro add ${NSB_LO_IP}/32 via ${NSB_IP} dev ${NSA_DEV} ip -netns ${NSA} ro add ${NSB_LO_IP6}/128 via ${NSB_IP6} dev ${NSA_DEV} @@ -1174,7 +1187,9 @@ ipv4_tcp_vrf() log_test_addr ${a} $? 1 "Global server, local connection" # run MD5 tests + setup_vrf_dup ipv4_tcp_md5 + cleanup_vrf_dup # # enable VRF global server @@ -1732,8 +1747,9 @@ ipv4_addr_bind_vrf() for a in ${NSA_IP} ${VRF_IP} do log_start + show_hint "Socket not bound to VRF, but address is in VRF" run_cmd nettest -s -R -P icmp -l ${a} -b - log_test_addr ${a} $? 0 "Raw socket bind to local address" + log_test_addr ${a} $? 1 "Raw socket bind to local address" log_start run_cmd nettest -s -R -P icmp -l ${a} -d ${NSA_DEV} -b @@ -2125,7 +2141,7 @@ ipv6_ping_vrf() log_start show_hint "Fails since VRF device does not support linklocal or multicast" run_cmd ${ping6} -c1 -w1 ${a} - log_test_addr ${a} $? 2 "ping out, VRF bind" + log_test_addr ${a} $? 1 "ping out, VRF bind" done for a in ${NSB_IP6} ${NSB_LO_IP6} ${NSB_LINKIP6}%${NSA_DEV} ${MCAST}%${NSA_DEV} @@ -2653,7 +2669,9 @@ ipv6_tcp_vrf() log_test_addr ${a} $? 1 "Global server, local connection" # run MD5 tests + setup_vrf_dup ipv6_tcp_md5 + cleanup_vrf_dup # # enable VRF global server @@ -3348,11 +3366,14 @@ ipv6_addr_bind_novrf() run_cmd nettest -6 -s -l ${a} -d ${NSA_DEV} -t1 -b log_test_addr ${a} $? 0 "TCP socket bind to local address after device bind" + # Sadly, the kernel allows binding a socket to a device and then + # binding to an address not on the device. So this test passes + # when it really should not a=${NSA_LO_IP6} log_start - show_hint "Should fail with 'Cannot assign requested address'" - run_cmd nettest -6 -s -l ${a} -d ${NSA_DEV} -t1 -b - log_test_addr ${a} $? 1 "TCP socket bind to out of scope local address" + show_hint "Tecnically should fail since address is not on device but kernel allows" + run_cmd nettest -6 -s -l ${a} -I ${NSA_DEV} -t1 -b + log_test_addr ${a} $? 0 "TCP socket bind to out of scope local address" } ipv6_addr_bind_vrf() @@ -3393,10 +3414,15 @@ ipv6_addr_bind_vrf() run_cmd nettest -6 -s -l ${a} -d ${NSA_DEV} -t1 -b log_test_addr ${a} $? 0 "TCP socket bind to local address with device bind" + # Sadly, the kernel allows binding a socket to a device and then + # binding to an address not on the device. The only restriction + # is that the address is valid in the L3 domain. So this test + # passes when it really should not a=${VRF_IP6} log_start - run_cmd nettest -6 -s -l ${a} -d ${NSA_DEV} -t1 -b - log_test_addr ${a} $? 1 "TCP socket bind to VRF address with device bind" + show_hint "Tecnically should fail since address is not on device but kernel allows" + run_cmd nettest -6 -s -l ${a} -I ${NSA_DEV} -t1 -b + log_test_addr ${a} $? 0 "TCP socket bind to VRF address with device bind" a=${NSA_LO_IP6} log_start @@ -3908,8 +3934,8 @@ EOF ################################################################################ # main -TESTS_IPV4="ipv4_ping ipv4_tcp ipv4_udp ipv4_addr_bind ipv4_runtime ipv4_netfilter" -TESTS_IPV6="ipv6_ping ipv6_tcp ipv6_udp ipv6_addr_bind ipv6_runtime ipv6_netfilter" +TESTS_IPV4="ipv4_ping ipv4_tcp ipv4_udp ipv4_bind ipv4_runtime ipv4_netfilter" +TESTS_IPV6="ipv6_ping ipv6_tcp ipv6_udp ipv6_bind ipv6_runtime ipv6_netfilter" TESTS_OTHER="use_cases" PAUSE_ON_FAIL=no diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index 6fad54c7ecb4ab41346ea3dd0308dcccbbf9d1da..a7f53c2a9580aa1dadac523accc079eb78ad6c9f 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -444,24 +444,63 @@ fib_rp_filter_test() setup set -e + ip netns add ns2 + ip netns set ns2 auto + + ip -netns ns2 link set dev lo up + + $IP link add name veth1 type veth peer name veth2 + $IP link set dev veth2 netns ns2 + $IP address add 192.0.2.1/24 dev veth1 + ip -netns ns2 address add 192.0.2.1/24 dev veth2 + $IP link set dev veth1 up + ip -netns ns2 link set dev veth2 up + $IP link set dev lo address 52:54:00:6a:c7:5e - $IP link set dummy0 address 52:54:00:6a:c7:5e - $IP link add dummy1 type dummy - $IP link set dummy1 address 52:54:00:6a:c7:5e - $IP link set dev dummy1 up + $IP link set dev veth1 address 52:54:00:6a:c7:5e + ip -netns ns2 link set dev lo address 52:54:00:6a:c7:5e + ip -netns ns2 link set dev veth2 address 52:54:00:6a:c7:5e + + # 1. (ns2) redirect lo's egress to veth2's egress + ip netns exec ns2 tc qdisc add dev lo parent root handle 1: fq_codel + ip netns exec ns2 tc filter add dev lo parent 1: protocol arp basic \ + action mirred egress redirect dev veth2 + ip netns exec ns2 tc filter add dev lo parent 1: protocol ip basic \ + action mirred egress redirect dev veth2 + + # 2. (ns1) redirect veth1's ingress to lo's ingress + $NS_EXEC tc qdisc add dev veth1 ingress + $NS_EXEC tc filter add dev veth1 ingress protocol arp basic \ + action mirred ingress redirect dev lo + $NS_EXEC tc filter add dev veth1 ingress protocol ip basic \ + action mirred ingress redirect dev lo + + # 3. (ns1) redirect lo's egress to veth1's egress + $NS_EXEC tc qdisc add dev lo parent root handle 1: fq_codel + $NS_EXEC tc filter add dev lo parent 1: protocol arp basic \ + action mirred egress redirect dev veth1 + $NS_EXEC tc filter add dev lo parent 1: protocol ip basic \ + action mirred egress redirect dev veth1 + + # 4. (ns2) redirect veth2's ingress to lo's ingress + ip netns exec ns2 tc qdisc add dev veth2 ingress + ip netns exec ns2 tc filter add dev veth2 ingress protocol arp basic \ + action mirred ingress redirect dev lo + ip netns exec ns2 tc filter add dev veth2 ingress protocol ip basic \ + action mirred ingress redirect dev lo + $NS_EXEC sysctl -qw net.ipv4.conf.all.rp_filter=1 $NS_EXEC sysctl -qw net.ipv4.conf.all.accept_local=1 $NS_EXEC sysctl -qw net.ipv4.conf.all.route_localnet=1 - - $NS_EXEC tc qd add dev dummy1 parent root handle 1: fq_codel - $NS_EXEC tc filter add dev dummy1 parent 1: protocol arp basic action mirred egress redirect dev lo - $NS_EXEC tc filter add dev dummy1 parent 1: protocol ip basic action mirred egress redirect dev lo + ip netns exec ns2 sysctl -qw net.ipv4.conf.all.rp_filter=1 + ip netns exec ns2 sysctl -qw net.ipv4.conf.all.accept_local=1 + ip netns exec ns2 sysctl -qw net.ipv4.conf.all.route_localnet=1 set +e - run_cmd "ip netns exec ns1 ping -I dummy1 -w1 -c1 198.51.100.1" + run_cmd "ip netns exec ns2 ping -w1 -c1 192.0.2.1" log_test $? 0 "rp_filter passes local packets" - run_cmd "ip netns exec ns1 ping -I dummy1 -w1 -c1 127.0.0.1" + run_cmd "ip netns exec ns2 ping -w1 -c1 127.0.0.1" log_test $? 0 "rp_filter passes loopback packets" cleanup diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile index 250fbb2d1625209a82768608e3c9764d3fc63e18..881e680c2e9c289c602b3687e6522a6c49396574 100644 --- a/tools/testing/selftests/net/forwarding/Makefile +++ b/tools/testing/selftests/net/forwarding/Makefile @@ -9,6 +9,7 @@ TEST_PROGS = bridge_igmp.sh \ gre_inner_v4_multipath.sh \ gre_inner_v6_multipath.sh \ gre_multipath.sh \ + ip6_forward_instats_vrf.sh \ ip6gre_inner_v4_multipath.sh \ ip6gre_inner_v6_multipath.sh \ ipip_flat_gre_key.sh \ diff --git a/tools/testing/selftests/net/forwarding/forwarding.config.sample b/tools/testing/selftests/net/forwarding/forwarding.config.sample index b802c14d295096d5e656ed4ea06b2da4287d2e6f..e51def39fd801a4a16e6d47a3e6e01e8ec905d96 100644 --- a/tools/testing/selftests/net/forwarding/forwarding.config.sample +++ b/tools/testing/selftests/net/forwarding/forwarding.config.sample @@ -13,6 +13,8 @@ NETIFS[p5]=veth4 NETIFS[p6]=veth5 NETIFS[p7]=veth6 NETIFS[p8]=veth7 +NETIFS[p9]=veth8 +NETIFS[p10]=veth9 # Port that does not have a cable connected. NETIF_NO_CABLE=eth8 @@ -39,3 +41,5 @@ NETIF_CREATE=yes # Timeout (in seconds) before ping exits regardless of how many packets have # been sent or received PING_TIMEOUT=5 +# IPv6 traceroute utility name. +TROUTE6=traceroute6 diff --git a/tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh b/tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh new file mode 100755 index 0000000000000000000000000000000000000000..9f5b3e2e5e95467cfafbeda4df31331cbff9a72f --- /dev/null +++ b/tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh @@ -0,0 +1,172 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Test ipv6 stats on the incoming if when forwarding with VRF + +ALL_TESTS=" + ipv6_ping + ipv6_in_too_big_err + ipv6_in_hdr_err + ipv6_in_addr_err + ipv6_in_discard +" + +NUM_NETIFS=4 +source lib.sh + +h1_create() +{ + simple_if_init $h1 2001:1:1::2/64 + ip -6 route add vrf v$h1 2001:1:2::/64 via 2001:1:1::1 +} + +h1_destroy() +{ + ip -6 route del vrf v$h1 2001:1:2::/64 via 2001:1:1::1 + simple_if_fini $h1 2001:1:1::2/64 +} + +router_create() +{ + vrf_create router + __simple_if_init $rtr1 router 2001:1:1::1/64 + __simple_if_init $rtr2 router 2001:1:2::1/64 + mtu_set $rtr2 1280 +} + +router_destroy() +{ + mtu_restore $rtr2 + __simple_if_fini $rtr2 2001:1:2::1/64 + __simple_if_fini $rtr1 2001:1:1::1/64 + vrf_destroy router +} + +h2_create() +{ + simple_if_init $h2 2001:1:2::2/64 + ip -6 route add vrf v$h2 2001:1:1::/64 via 2001:1:2::1 + mtu_set $h2 1280 +} + +h2_destroy() +{ + mtu_restore $h2 + ip -6 route del vrf v$h2 2001:1:1::/64 via 2001:1:2::1 + simple_if_fini $h2 2001:1:2::2/64 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + rtr1=${NETIFS[p2]} + + rtr2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + vrf_prepare + h1_create + router_create + h2_create + + forwarding_enable +} + +cleanup() +{ + pre_cleanup + + forwarding_restore + + h2_destroy + router_destroy + h1_destroy + vrf_cleanup +} + +ipv6_in_too_big_err() +{ + RET=0 + + local t0=$(ipv6_stats_get $rtr1 Ip6InTooBigErrors) + local vrf_name=$(master_name_get $h1) + + # Send too big packets + ip vrf exec $vrf_name \ + $PING6 -s 1300 2001:1:2::2 -c 1 -w $PING_TIMEOUT &> /dev/null + + local t1=$(ipv6_stats_get $rtr1 Ip6InTooBigErrors) + test "$((t1 - t0))" -ne 0 + check_err $? + log_test "Ip6InTooBigErrors" +} + +ipv6_in_hdr_err() +{ + RET=0 + + local t0=$(ipv6_stats_get $rtr1 Ip6InHdrErrors) + local vrf_name=$(master_name_get $h1) + + # Send packets with hop limit 1, easiest with traceroute6 as some ping6 + # doesn't allow hop limit to be specified + ip vrf exec $vrf_name \ + $TROUTE6 2001:1:2::2 &> /dev/null + + local t1=$(ipv6_stats_get $rtr1 Ip6InHdrErrors) + test "$((t1 - t0))" -ne 0 + check_err $? + log_test "Ip6InHdrErrors" +} + +ipv6_in_addr_err() +{ + RET=0 + + local t0=$(ipv6_stats_get $rtr1 Ip6InAddrErrors) + local vrf_name=$(master_name_get $h1) + + # Disable forwarding temporary while sending the packet + sysctl -qw net.ipv6.conf.all.forwarding=0 + ip vrf exec $vrf_name \ + $PING6 2001:1:2::2 -c 1 -w $PING_TIMEOUT &> /dev/null + sysctl -qw net.ipv6.conf.all.forwarding=1 + + local t1=$(ipv6_stats_get $rtr1 Ip6InAddrErrors) + test "$((t1 - t0))" -ne 0 + check_err $? + log_test "Ip6InAddrErrors" +} + +ipv6_in_discard() +{ + RET=0 + + local t0=$(ipv6_stats_get $rtr1 Ip6InDiscards) + local vrf_name=$(master_name_get $h1) + + # Add a policy to discard + ip xfrm policy add dst 2001:1:2::2/128 dir fwd action block + ip vrf exec $vrf_name \ + $PING6 2001:1:2::2 -c 1 -w $PING_TIMEOUT &> /dev/null + ip xfrm policy del dst 2001:1:2::2/128 dir fwd + + local t1=$(ipv6_stats_get $rtr1 Ip6InDiscards) + test "$((t1 - t0))" -ne 0 + check_err $? + log_test "Ip6InDiscards" +} +ipv6_ping() +{ + RET=0 + + ping6_test $h1 2001:1:2::2 +} + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 927f9ba49e0874743607405e8f268d88ecc5cfc8..be6fa808d2196d91b16d4068f90f2e20ee82cd20 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -674,6 +674,14 @@ qdisc_parent_stats_get() | jq '.[] | select(.parent == "'"$parent"'") | '"$selector" } +ipv6_stats_get() +{ + local dev=$1; shift + local stat=$1; shift + + cat /proc/net/dev_snmp6/$dev | grep "^$stat" | cut -f2 +} + humanize() { local speed=$1; shift diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh index 3367fb5f2feff5bd161d3512362705e20d6c0395..3253fdc780d62909b659229b37b237a2893fa4d6 100755 --- a/tools/testing/selftests/net/pmtu.sh +++ b/tools/testing/selftests/net/pmtu.sh @@ -799,7 +799,6 @@ setup_ovs_bridge() { setup() { [ "$(id -u)" -ne 0 ] && echo " need to run as root" && return $ksft_skip - cleanup for arg do eval setup_${arg} || { echo " ${arg} not supported"; return 1; } done @@ -810,7 +809,7 @@ trace() { for arg do [ "${ns_cmd}" = "" ] && ns_cmd="${arg}" && continue - ${ns_cmd} tcpdump -s 0 -i "${arg}" -w "${name}_${arg}.pcap" 2> /dev/null & + ${ns_cmd} tcpdump --immediate-mode -s 0 -i "${arg}" -w "${name}_${arg}.pcap" 2> /dev/null & tcpdump_pids="${tcpdump_pids} $!" ns_cmd= done @@ -1636,6 +1635,10 @@ run_test() { unset IFS + # Since cleanup() relies on variables modified by this subshell, it + # has to run in this context. + trap cleanup EXIT + if [ "$VERBOSE" = "1" ]; then printf "\n##########################################################################\n\n" fi diff --git a/tools/testing/selftests/net/udpgso.c b/tools/testing/selftests/net/udpgso.c index c66da6ffd6d8ddae81470ed1a4e680b4ffad43b9..7badaf215de288fca3a1ad935b829c7425b32552 100644 --- a/tools/testing/selftests/net/udpgso.c +++ b/tools/testing/selftests/net/udpgso.c @@ -156,13 +156,13 @@ struct testcase testcases_v4[] = { }, { /* send max number of min sized segments */ - .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4, + .tlen = UDP_MAX_SEGMENTS, .gso_len = 1, - .r_num_mss = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4, + .r_num_mss = UDP_MAX_SEGMENTS, }, { /* send max number + 1 of min sized segments: fail */ - .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4 + 1, + .tlen = UDP_MAX_SEGMENTS + 1, .gso_len = 1, .tfail = true, }, @@ -259,13 +259,13 @@ struct testcase testcases_v6[] = { }, { /* send max number of min sized segments */ - .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6, + .tlen = UDP_MAX_SEGMENTS, .gso_len = 1, - .r_num_mss = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6, + .r_num_mss = UDP_MAX_SEGMENTS, }, { /* send max number + 1 of min sized segments: fail */ - .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6 + 1, + .tlen = UDP_MAX_SEGMENTS + 1, .gso_len = 1, .tfail = true, }, diff --git a/tools/testing/selftests/net/udpgso_bench_rx.c b/tools/testing/selftests/net/udpgso_bench_rx.c index 76a24052f4b47652dff9e0b9698a81bb0ae2227d..6a193425c367fccdbb18bcc1269f8f4b240acdce 100644 --- a/tools/testing/selftests/net/udpgso_bench_rx.c +++ b/tools/testing/selftests/net/udpgso_bench_rx.c @@ -293,19 +293,17 @@ static void usage(const char *filepath) static void parse_opts(int argc, char **argv) { + const char *bind_addr = NULL; int c; - /* bind to any by default */ - setup_sockaddr(PF_INET6, "::", &cfg_bind_addr); while ((c = getopt(argc, argv, "4b:C:Gl:n:p:rR:S:tv")) != -1) { switch (c) { case '4': cfg_family = PF_INET; cfg_alen = sizeof(struct sockaddr_in); - setup_sockaddr(PF_INET, "0.0.0.0", &cfg_bind_addr); break; case 'b': - setup_sockaddr(cfg_family, optarg, &cfg_bind_addr); + bind_addr = optarg; break; case 'C': cfg_connect_timeout_ms = strtoul(optarg, NULL, 0); @@ -341,6 +339,11 @@ static void parse_opts(int argc, char **argv) } } + if (!bind_addr) + bind_addr = cfg_family == PF_INET6 ? "::" : "0.0.0.0"; + + setup_sockaddr(cfg_family, bind_addr, &cfg_bind_addr); + if (optind != argc) usage(argv[0]); diff --git a/tools/testing/selftests/net/udpgso_bench_tx.c b/tools/testing/selftests/net/udpgso_bench_tx.c index 17512a43885e7a29af80e7930a4debd8421ceee8..f1fdaa270291331e70fb2456003bdd44056b93d5 100644 --- a/tools/testing/selftests/net/udpgso_bench_tx.c +++ b/tools/testing/selftests/net/udpgso_bench_tx.c @@ -419,6 +419,7 @@ static void usage(const char *filepath) static void parse_opts(int argc, char **argv) { + const char *bind_addr = NULL; int max_len, hdrlen; int c; @@ -446,7 +447,7 @@ static void parse_opts(int argc, char **argv) cfg_cpu = strtol(optarg, NULL, 0); break; case 'D': - setup_sockaddr(cfg_family, optarg, &cfg_dst_addr); + bind_addr = optarg; break; case 'l': cfg_runtime_ms = strtoul(optarg, NULL, 10) * 1000; @@ -492,6 +493,11 @@ static void parse_opts(int argc, char **argv) } } + if (!bind_addr) + bind_addr = cfg_family == PF_INET6 ? "::" : "0.0.0.0"; + + setup_sockaddr(cfg_family, bind_addr, &cfg_dst_addr); + if (optind != argc) usage(argv[0]); diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile index a374e10ef5065ffc70dd397f161c6c8af42ac54e..a56cfc4f2f3fa5cdb7688125d87e2fe65c5694ac 100644 --- a/tools/testing/selftests/netfilter/Makefile +++ b/tools/testing/selftests/netfilter/Makefile @@ -4,7 +4,8 @@ TEST_PROGS := nft_trans_stress.sh nft_nat.sh bridge_brouter.sh \ conntrack_icmp_related.sh nft_flowtable.sh ipvs.sh \ nft_concat_range.sh nft_conntrack_helper.sh \ - nft_queue.sh nft_meta.sh + nft_queue.sh nft_meta.sh \ + conntrack_vrf.sh LDLIBS = -lmnl TEST_GEN_FILES = nf-queue diff --git a/tools/testing/selftests/netfilter/conntrack_vrf.sh b/tools/testing/selftests/netfilter/conntrack_vrf.sh new file mode 100755 index 0000000000000000000000000000000000000000..8b5ea923458828247d4264fca22dc31298fcb14e --- /dev/null +++ b/tools/testing/selftests/netfilter/conntrack_vrf.sh @@ -0,0 +1,241 @@ +#!/bin/sh + +# This script demonstrates interaction of conntrack and vrf. +# The vrf driver calls the netfilter hooks again, with oif/iif +# pointing at the VRF device. +# +# For ingress, this means first iteration has iifname of lower/real +# device. In this script, thats veth0. +# Second iteration is iifname set to vrf device, tvrf in this script. +# +# For egress, this is reversed: first iteration has the vrf device, +# second iteration is done with the lower/real/veth0 device. +# +# test_ct_zone_in demonstrates unexpected change of nftables +# behavior # caused by commit 09e856d54bda5f28 "vrf: Reset skb conntrack +# connection on VRF rcv" +# +# It was possible to assign conntrack zone to a packet (or mark it for +# `notracking`) in the prerouting chain before conntrack, based on real iif. +# +# After the change, the zone assignment is lost and the zone is assigned based +# on the VRF master interface (in case such a rule exists). +# assignment is lost. Instead, assignment based on the `iif` matching +# Thus it is impossible to distinguish packets based on the original +# interface. +# +# test_masquerade_vrf and test_masquerade_veth0 demonstrate the problem +# that was supposed to be fixed by the commit mentioned above to make sure +# that any fix to test case 1 won't break masquerade again. + +ksft_skip=4 + +IP0=172.30.30.1 +IP1=172.30.30.2 +PFXL=30 +ret=0 + +sfx=$(mktemp -u "XXXXXXXX") +ns0="ns0-$sfx" +ns1="ns1-$sfx" + +cleanup() +{ + ip netns pids $ns0 | xargs kill 2>/dev/null + ip netns pids $ns1 | xargs kill 2>/dev/null + + ip netns del $ns0 $ns1 +} + +nft --version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without nft tool" + exit $ksft_skip +fi + +ip -Version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +ip netns add "$ns0" +if [ $? -ne 0 ];then + echo "SKIP: Could not create net namespace $ns0" + exit $ksft_skip +fi +ip netns add "$ns1" + +trap cleanup EXIT + +ip netns exec $ns0 sysctl -q -w net.ipv4.conf.default.rp_filter=0 +ip netns exec $ns0 sysctl -q -w net.ipv4.conf.all.rp_filter=0 +ip netns exec $ns0 sysctl -q -w net.ipv4.conf.all.rp_filter=0 + +ip link add veth0 netns "$ns0" type veth peer name veth0 netns "$ns1" > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not add veth device" + exit $ksft_skip +fi + +ip -net $ns0 li add tvrf type vrf table 9876 +if [ $? -ne 0 ];then + echo "SKIP: Could not add vrf device" + exit $ksft_skip +fi + +ip -net $ns0 li set lo up + +ip -net $ns0 li set veth0 master tvrf +ip -net $ns0 li set tvrf up +ip -net $ns0 li set veth0 up +ip -net $ns1 li set veth0 up + +ip -net $ns0 addr add $IP0/$PFXL dev veth0 +ip -net $ns1 addr add $IP1/$PFXL dev veth0 + +ip netns exec $ns1 iperf3 -s > /dev/null 2>&1& +if [ $? -ne 0 ];then + echo "SKIP: Could not start iperf3" + exit $ksft_skip +fi + +# test vrf ingress handling. +# The incoming connection should be placed in conntrack zone 1, +# as decided by the first iteration of the ruleset. +test_ct_zone_in() +{ +ip netns exec $ns0 nft -f - < /dev/null + + # should be in zone 1, not zone 2 + count=$(ip netns exec $ns0 conntrack -L -s $IP1 -d $IP0 -p icmp --zone 1 2>/dev/null | wc -l) + if [ $count -eq 1 ]; then + echo "PASS: entry found in conntrack zone 1" + else + echo "FAIL: entry not found in conntrack zone 1" + count=$(ip netns exec $ns0 conntrack -L -s $IP1 -d $IP0 -p icmp --zone 2 2> /dev/null | wc -l) + if [ $count -eq 1 ]; then + echo "FAIL: entry found in zone 2 instead" + else + echo "FAIL: entry not in zone 1 or 2, dumping table" + ip netns exec $ns0 conntrack -L + ip netns exec $ns0 nft list ruleset + fi + fi +} + +# add masq rule that gets evaluated w. outif set to vrf device. +# This tests the first iteration of the packet through conntrack, +# oifname is the vrf device. +test_masquerade_vrf() +{ + local qdisc=$1 + + if [ "$qdisc" != "default" ]; then + tc -net $ns0 qdisc add dev tvrf root $qdisc + fi + + ip netns exec $ns0 conntrack -F 2>/dev/null + +ip netns exec $ns0 nft -f - </dev/null + if [ $? -ne 0 ]; then + echo "FAIL: iperf3 connect failure with masquerade + sport rewrite on vrf device" + ret=1 + return + fi + + # must also check that nat table was evaluated on second (lower device) iteration. + ip netns exec $ns0 nft list table ip nat |grep -q 'counter packets 2' && + ip netns exec $ns0 nft list table ip nat |grep -q 'untracked counter packets [1-9]' + if [ $? -eq 0 ]; then + echo "PASS: iperf3 connect with masquerade + sport rewrite on vrf device ($qdisc qdisc)" + else + echo "FAIL: vrf rules have unexpected counter value" + ret=1 + fi + + if [ "$qdisc" != "default" ]; then + tc -net $ns0 qdisc del dev tvrf root + fi +} + +# add masq rule that gets evaluated w. outif set to veth device. +# This tests the 2nd iteration of the packet through conntrack, +# oifname is the lower device (veth0 in this case). +test_masquerade_veth() +{ + ip netns exec $ns0 conntrack -F 2>/dev/null +ip netns exec $ns0 nft -f - < /dev/null + if [ $? -ne 0 ]; then + echo "FAIL: iperf3 connect failure with masquerade + sport rewrite on veth device" + ret=1 + return + fi + + # must also check that nat table was evaluated on second (lower device) iteration. + ip netns exec $ns0 nft list table ip nat |grep -q 'counter packets 2' + if [ $? -eq 0 ]; then + echo "PASS: iperf3 connect with masquerade + sport rewrite on veth device" + else + echo "FAIL: vrf masq rule has unexpected counter value" + ret=1 + fi +} + +test_ct_zone_in +test_masquerade_vrf "default" +test_masquerade_vrf "pfifo" +test_masquerade_veth + +exit $ret diff --git a/tools/testing/selftests/netfilter/nft_concat_range.sh b/tools/testing/selftests/netfilter/nft_concat_range.sh index 5a4938d6dcf25a3f8137be799091f4f0d16ad7fd..b5eef5ffb58e5f8981085e483409c259929546fc 100755 --- a/tools/testing/selftests/netfilter/nft_concat_range.sh +++ b/tools/testing/selftests/netfilter/nft_concat_range.sh @@ -27,7 +27,7 @@ TYPES="net_port port_net net6_port port_proto net6_port_mac net6_port_mac_proto net_port_mac_proto_net" # Reported bugs, also described by TYPE_ variables below -BUGS="flush_remove_add" +BUGS="flush_remove_add reload" # List of possible paths to pktgen script from kernel tree for performance tests PKTGEN_SCRIPT_PATHS=" @@ -337,6 +337,23 @@ TYPE_flush_remove_add=" display Add two elements, flush, re-add " +TYPE_reload=" +display net,mac with reload +type_spec ipv4_addr . ether_addr +chain_spec ip daddr . ether saddr +dst addr4 +src mac +start 1 +count 1 +src_delta 2000 +tools sendip nc bash +proto udp + +race_repeat 0 + +perf_duration 0 +" + # Set template for all tests, types and rules are filled in depending on test set_template=' flush ruleset @@ -1455,6 +1472,59 @@ test_bug_flush_remove_add() { nft flush ruleset } +# - add ranged element, check that packets match it +# - reload the set, check packets still match +test_bug_reload() { + setup veth send_"${proto}" set || return ${KSELFTEST_SKIP} + rstart=${start} + + range_size=1 + for i in $(seq "${start}" $((start + count))); do + end=$((start + range_size)) + + # Avoid negative or zero-sized port ranges + if [ $((end / 65534)) -gt $((start / 65534)) ]; then + start=${end} + end=$((end + 1)) + fi + srcstart=$((start + src_delta)) + srcend=$((end + src_delta)) + + add "$(format)" || return 1 + range_size=$((range_size + 1)) + start=$((end + range_size)) + done + + # check kernel does allocate pcpu sctrach map + # for reload with no elemet add/delete + ( echo flush set inet filter test ; + nft list set inet filter test ) | nft -f - + + start=${rstart} + range_size=1 + + for i in $(seq "${start}" $((start + count))); do + end=$((start + range_size)) + + # Avoid negative or zero-sized port ranges + if [ $((end / 65534)) -gt $((start / 65534)) ]; then + start=${end} + end=$((end + 1)) + fi + srcstart=$((start + src_delta)) + srcend=$((end + src_delta)) + + for j in $(seq ${start} $((range_size / 2 + 1)) ${end}); do + send_match "${j}" $((j + src_delta)) || return 1 + done + + range_size=$((range_size + 1)) + start=$((end + range_size)) + done + + nft flush ruleset +} + test_reported_issues() { eval test_bug_"${subtest}" } @@ -1513,4 +1583,4 @@ for name in ${TESTS}; do done done -[ ${passed} -eq 0 ] && exit ${KSELFTEST_SKIP} +[ ${passed} -eq 0 ] && exit ${KSELFTEST_SKIP} || exit 0 diff --git a/tools/testing/selftests/netfilter/nft_flowtable.sh b/tools/testing/selftests/netfilter/nft_flowtable.sh index 431296c0f91cf6093c6e7c2fc43c6fba67d354f8..aefe50e0e4a85f91c96c039dc249baec32ea9f6b 100755 --- a/tools/testing/selftests/netfilter/nft_flowtable.sh +++ b/tools/testing/selftests/netfilter/nft_flowtable.sh @@ -199,7 +199,6 @@ fi # test basic connectivity if ! ip netns exec ns1 ping -c 1 -q 10.0.2.99 > /dev/null; then echo "ERROR: ns1 cannot reach ns2" 1>&2 - bash exit 1 fi diff --git a/tools/testing/selftests/openat2/Makefile b/tools/testing/selftests/openat2/Makefile index 4b93b1417b8626043f220a0ecae479bd82191165..843ba56d8e49ecf35777214fff8690b4f6c688e7 100644 --- a/tools/testing/selftests/openat2/Makefile +++ b/tools/testing/selftests/openat2/Makefile @@ -5,4 +5,4 @@ TEST_GEN_PROGS := openat2_test resolve_test rename_attack_test include ../lib.mk -$(TEST_GEN_PROGS): helpers.c +$(TEST_GEN_PROGS): helpers.c helpers.h diff --git a/tools/testing/selftests/openat2/helpers.h b/tools/testing/selftests/openat2/helpers.h index a6ea27344db2db4aac7b2e7a676dedb4283a84ab..7056340b9339e9f4159999c2797d15f5ddf6d3fd 100644 --- a/tools/testing/selftests/openat2/helpers.h +++ b/tools/testing/selftests/openat2/helpers.h @@ -9,6 +9,7 @@ #define _GNU_SOURCE #include +#include #include #include #include "../kselftest.h" @@ -62,11 +63,12 @@ bool needs_openat2(const struct open_how *how); (similar to chroot(2)). */ #endif /* RESOLVE_IN_ROOT */ -#define E_func(func, ...) \ - do { \ - if (func(__VA_ARGS__) < 0) \ - ksft_exit_fail_msg("%s:%d %s failed\n", \ - __FILE__, __LINE__, #func);\ +#define E_func(func, ...) \ + do { \ + errno = 0; \ + if (func(__VA_ARGS__) < 0) \ + ksft_exit_fail_msg("%s:%d %s failed - errno:%d\n", \ + __FILE__, __LINE__, #func, errno); \ } while (0) #define E_asprintf(...) E_func(asprintf, __VA_ARGS__) diff --git a/tools/testing/selftests/openat2/openat2_test.c b/tools/testing/selftests/openat2/openat2_test.c index b386367c606b1d9a3fecc4ca359006468e051412..453152b58e7f0098cc28de2bc479fd7631e76496 100644 --- a/tools/testing/selftests/openat2/openat2_test.c +++ b/tools/testing/selftests/openat2/openat2_test.c @@ -244,6 +244,16 @@ void test_openat2_flags(void) unlink(path); fd = sys_openat2(AT_FDCWD, path, &test->how); + if (fd < 0 && fd == -EOPNOTSUPP) { + /* + * Skip the testcase if it failed because not supported + * by FS. (e.g. a valid O_TMPFILE combination on NFS) + */ + ksft_test_result_skip("openat2 with %s fails with %d (%s)\n", + test->name, fd, strerror(-fd)); + goto next; + } + if (test->err >= 0) failed = (fd < 0); else @@ -288,7 +298,7 @@ skip: else resultfn("openat2 with %s fails with %d (%s)\n", test->name, test->err, strerror(-test->err)); - +next: free(fdpath); fflush(stdout); } diff --git a/tools/testing/selftests/pidfd/pidfd.h b/tools/testing/selftests/pidfd/pidfd.h index 01f8d3c0cf2cb844ecc233e5cc1f6b7fa1a42c0f..6922d6417e1cf06094c10627e0bd491d292d6fd2 100644 --- a/tools/testing/selftests/pidfd/pidfd.h +++ b/tools/testing/selftests/pidfd/pidfd.h @@ -68,7 +68,7 @@ #define PIDFD_SKIP 3 #define PIDFD_XFAIL 4 -int wait_for_pid(pid_t pid) +static inline int wait_for_pid(pid_t pid) { int status, ret; @@ -78,13 +78,20 @@ again: if (errno == EINTR) goto again; + ksft_print_msg("waitpid returned -1, errno=%d\n", errno); return -1; } - if (!WIFEXITED(status)) + if (!WIFEXITED(status)) { + ksft_print_msg( + "waitpid !WIFEXITED, WIFSIGNALED=%d, WTERMSIG=%d\n", + WIFSIGNALED(status), WTERMSIG(status)); return -1; + } - return WEXITSTATUS(status); + ret = WEXITSTATUS(status); + ksft_print_msg("waitpid WEXITSTATUS=%d\n", ret); + return ret; } static inline int sys_pidfd_open(pid_t pid, unsigned int flags) diff --git a/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c b/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c index 22558524f71c34da86e9e0dacc5a73c523254e72..3fd8e903118f532d43a2ac251ec71531f19ca161 100644 --- a/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c +++ b/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c @@ -12,6 +12,7 @@ #include #include #include +#include #include "pidfd.h" #include "../kselftest.h" @@ -80,7 +81,10 @@ static inline int error_check(struct error *err, const char *test_name) return err->code; } +#define CHILD_STACK_SIZE 8192 + struct child { + char *stack; pid_t pid; int fd; }; @@ -89,17 +93,22 @@ static struct child clone_newns(int (*fn)(void *), void *args, struct error *err) { static int flags = CLONE_PIDFD | CLONE_NEWPID | CLONE_NEWNS | SIGCHLD; - size_t stack_size = 1024; - char *stack[1024] = { 0 }; struct child ret; if (!(flags & CLONE_NEWUSER) && geteuid() != 0) flags |= CLONE_NEWUSER; + ret.stack = mmap(NULL, CHILD_STACK_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0); + if (ret.stack == MAP_FAILED) { + error_set(err, -1, "mmap of stack failed (errno %d)", errno); + return ret; + } + #ifdef __ia64__ - ret.pid = __clone2(fn, stack, stack_size, flags, args, &ret.fd); + ret.pid = __clone2(fn, ret.stack, CHILD_STACK_SIZE, flags, args, &ret.fd); #else - ret.pid = clone(fn, stack + stack_size, flags, args, &ret.fd); + ret.pid = clone(fn, ret.stack + CHILD_STACK_SIZE, flags, args, &ret.fd); #endif if (ret.pid < 0) { @@ -129,6 +138,11 @@ static inline int child_join(struct child *child, struct error *err) else if (r > 0) error_set(err, r, "child %d reported: %d", child->pid, r); + if (munmap(child->stack, CHILD_STACK_SIZE)) { + error_set(err, -1, "munmap of child stack failed (errno %d)", errno); + r = -1; + } + return r; } diff --git a/tools/testing/selftests/pidfd/pidfd_test.c b/tools/testing/selftests/pidfd/pidfd_test.c index 529eb700ac26a39833a4e2c4c25b4caeb57af272..9a2d64901d591f877a7f9db3989df3cb01dce893 100644 --- a/tools/testing/selftests/pidfd/pidfd_test.c +++ b/tools/testing/selftests/pidfd/pidfd_test.c @@ -441,7 +441,6 @@ static void test_pidfd_poll_exec(int use_waitpid) { int pid, pidfd = 0; int status, ret; - pthread_t t1; time_t prog_start = time(NULL); const char *test_name = "pidfd_poll check for premature notification on child thread exec"; @@ -500,13 +499,14 @@ static int child_poll_leader_exit_test(void *args) */ *child_exit_secs = time(NULL); syscall(SYS_exit, 0); + /* Never reached, but appeases compiler thinking we should return. */ + exit(0); } static void test_pidfd_poll_leader_exit(int use_waitpid) { int pid, pidfd = 0; - int status, ret; - time_t prog_start = time(NULL); + int status, ret = 0; const char *test_name = "pidfd_poll check for premature notification on non-empty" "group leader exit"; diff --git a/tools/testing/selftests/pidfd/pidfd_wait.c b/tools/testing/selftests/pidfd/pidfd_wait.c index be2943f072f6088ba77df89ba2b7c942b9534465..17999e082aa715525f013f194b278e1bdf6786ad 100644 --- a/tools/testing/selftests/pidfd/pidfd_wait.c +++ b/tools/testing/selftests/pidfd/pidfd_wait.c @@ -39,7 +39,7 @@ static int sys_waitid(int which, pid_t pid, siginfo_t *info, int options, TEST(wait_simple) { - int pidfd = -1, status = 0; + int pidfd = -1; pid_t parent_tid = -1; struct clone_args args = { .parent_tid = ptr_to_u64(&parent_tid), @@ -47,7 +47,6 @@ TEST(wait_simple) .flags = CLONE_PIDFD | CLONE_PARENT_SETTID, .exit_signal = SIGCHLD, }; - int ret; pid_t pid; siginfo_t info = { .si_signo = 0, @@ -88,7 +87,7 @@ TEST(wait_simple) TEST(wait_states) { - int pidfd = -1, status = 0; + int pidfd = -1; pid_t parent_tid = -1; struct clone_args args = { .parent_tid = ptr_to_u64(&parent_tid), diff --git a/tools/testing/selftests/powerpc/security/spectre_v2.c b/tools/testing/selftests/powerpc/security/spectre_v2.c index adc2b7294e5fddb54150bd797b04efc5825b3c3d..83647b8277e7dfa2efa7a45e9bb327dd30a02208 100644 --- a/tools/testing/selftests/powerpc/security/spectre_v2.c +++ b/tools/testing/selftests/powerpc/security/spectre_v2.c @@ -193,7 +193,7 @@ int spectre_v2_test(void) * We are not vulnerable and reporting otherwise, so * missing such a mismatch is safe. */ - if (state == VULNERABLE) + if (miss_percent > 95) return 4; return 1; diff --git a/tools/testing/selftests/rtc/settings b/tools/testing/selftests/rtc/settings index ba4d85f74cd6b9a671652d2610ac08bbd386a577..a953c96aa16e1e814867b33d06e894ffb664bb1b 100644 --- a/tools/testing/selftests/rtc/settings +++ b/tools/testing/selftests/rtc/settings @@ -1 +1 @@ -timeout=90 +timeout=180 diff --git a/tools/testing/selftests/seccomp/Makefile b/tools/testing/selftests/seccomp/Makefile index 0ebfe8b0e147fa9bd4d3bd895774393cf0d32c28..585f7a0c10cbea67d418366c94b3415cf9e00bfe 100644 --- a/tools/testing/selftests/seccomp/Makefile +++ b/tools/testing/selftests/seccomp/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -CFLAGS += -Wl,-no-as-needed -Wall +CFLAGS += -Wl,-no-as-needed -Wall -isystem ../../../../usr/include/ LDFLAGS += -lpthread TEST_GEN_PROGS := seccomp_bpf seccomp_benchmark diff --git a/tools/testing/selftests/vm/hmm-tests.c b/tools/testing/selftests/vm/hmm-tests.c index c9404ef9698e2b2d96c96178dc64529cc4014470..426dccc08f90675192fd5e96f3de089e7f4ba6ce 100644 --- a/tools/testing/selftests/vm/hmm-tests.c +++ b/tools/testing/selftests/vm/hmm-tests.c @@ -1242,6 +1242,48 @@ TEST_F(hmm, anon_teardown) } } +/* + * Test memory snapshot without faulting in pages accessed by the device. + */ +TEST_F(hmm, mixedmap) +{ + struct hmm_buffer *buffer; + unsigned long npages; + unsigned long size; + unsigned char *m; + int ret; + + npages = 1; + size = npages << self->page_shift; + + buffer = malloc(sizeof(*buffer)); + ASSERT_NE(buffer, NULL); + + buffer->fd = -1; + buffer->size = size; + buffer->mirror = malloc(npages); + ASSERT_NE(buffer->mirror, NULL); + + + /* Reserve a range of addresses. */ + buffer->ptr = mmap(NULL, size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE, + self->fd, 0); + ASSERT_NE(buffer->ptr, MAP_FAILED); + + /* Simulate a device snapshotting CPU pagetables. */ + ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_SNAPSHOT, buffer, npages); + ASSERT_EQ(ret, 0); + ASSERT_EQ(buffer->cpages, npages); + + /* Check what the device saw. */ + m = buffer->mirror; + ASSERT_EQ(m[0], HMM_DMIRROR_PROT_READ); + + hmm_buffer_free(buffer); +} + /* * Test memory snapshot without faulting in pages accessed by the device. */ diff --git a/tools/testing/selftests/vm/map_fixed_noreplace.c b/tools/testing/selftests/vm/map_fixed_noreplace.c index d91bde511268667ab183b98bdd1fa99258173df9..eed44322d1a635e6405bd6bc79eacf434a76697d 100644 --- a/tools/testing/selftests/vm/map_fixed_noreplace.c +++ b/tools/testing/selftests/vm/map_fixed_noreplace.c @@ -17,9 +17,6 @@ #define MAP_FIXED_NOREPLACE 0x100000 #endif -#define BASE_ADDRESS (256ul * 1024 * 1024) - - static void dump_maps(void) { char cmd[32]; @@ -28,18 +25,46 @@ static void dump_maps(void) system(cmd); } +static unsigned long find_base_addr(unsigned long size) +{ + void *addr; + unsigned long flags; + + flags = MAP_PRIVATE | MAP_ANONYMOUS; + addr = mmap(NULL, size, PROT_NONE, flags, -1, 0); + if (addr == MAP_FAILED) { + printf("Error: couldn't map the space we need for the test\n"); + return 0; + } + + if (munmap(addr, size) != 0) { + printf("Error: couldn't map the space we need for the test\n"); + return 0; + } + return (unsigned long)addr; +} + int main(void) { + unsigned long base_addr; unsigned long flags, addr, size, page_size; char *p; page_size = sysconf(_SC_PAGE_SIZE); + //let's find a base addr that is free before we start the tests + size = 5 * page_size; + base_addr = find_base_addr(size); + if (!base_addr) { + printf("Error: couldn't map the space we need for the test\n"); + return 1; + } + flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE; // Check we can map all the areas we need below errno = 0; - addr = BASE_ADDRESS; + addr = base_addr; size = 5 * page_size; p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0); @@ -60,7 +85,7 @@ int main(void) printf("unmap() successful\n"); errno = 0; - addr = BASE_ADDRESS + page_size; + addr = base_addr + page_size; size = 3 * page_size; p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0); printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); @@ -80,7 +105,7 @@ int main(void) * +4 | free | new */ errno = 0; - addr = BASE_ADDRESS; + addr = base_addr; size = 5 * page_size; p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0); printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); @@ -101,7 +126,7 @@ int main(void) * +4 | free | */ errno = 0; - addr = BASE_ADDRESS + (2 * page_size); + addr = base_addr + (2 * page_size); size = page_size; p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0); printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); @@ -121,7 +146,7 @@ int main(void) * +4 | free | new */ errno = 0; - addr = BASE_ADDRESS + (3 * page_size); + addr = base_addr + (3 * page_size); size = 2 * page_size; p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0); printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); @@ -141,7 +166,7 @@ int main(void) * +4 | free | */ errno = 0; - addr = BASE_ADDRESS; + addr = base_addr; size = 2 * page_size; p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0); printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); @@ -161,7 +186,7 @@ int main(void) * +4 | free | */ errno = 0; - addr = BASE_ADDRESS; + addr = base_addr; size = page_size; p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0); printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); @@ -181,7 +206,7 @@ int main(void) * +4 | free | new */ errno = 0; - addr = BASE_ADDRESS + (4 * page_size); + addr = base_addr + (4 * page_size); size = page_size; p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0); printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p); @@ -192,7 +217,7 @@ int main(void) return 1; } - addr = BASE_ADDRESS; + addr = base_addr; size = 5 * page_size; if (munmap((void *)addr, size) != 0) { dump_maps(); diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c index 6e342944956612c518e2de332d4c8edab164ee66..a2217f7aa1742843703496ae22cd334e2bbfb8b6 100644 --- a/tools/testing/selftests/vm/userfaultfd.c +++ b/tools/testing/selftests/vm/userfaultfd.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include #include @@ -413,9 +414,6 @@ static void uffd_test_ctx_init_ext(uint64_t *features) uffd_test_ops->allocate_area((void **)&area_src); uffd_test_ops->allocate_area((void **)&area_dst); - uffd_test_ops->release_pages(area_src); - uffd_test_ops->release_pages(area_dst); - userfaultfd_open(features); count_verify = malloc(nr_pages * sizeof(unsigned long long)); @@ -436,6 +434,26 @@ static void uffd_test_ctx_init_ext(uint64_t *features) *(area_count(area_src, nr) + 1) = 1; } + /* + * After initialization of area_src, we must explicitly release pages + * for area_dst to make sure it's fully empty. Otherwise we could have + * some area_dst pages be errornously initialized with zero pages, + * hence we could hit memory corruption later in the test. + * + * One example is when THP is globally enabled, above allocate_area() + * calls could have the two areas merged into a single VMA (as they + * will have the same VMA flags so they're mergeable). When we + * initialize the area_src above, it's possible that some part of + * area_dst could have been faulted in via one huge THP that will be + * shared between area_src and area_dst. It could cause some of the + * area_dst won't be trapped by missing userfaults. + * + * This release_pages() will guarantee even if that happened, we'll + * proactively split the thp and drop any accidentally initialized + * pages within area_dst. + */ + uffd_test_ops->release_pages(area_dst); + pipefd = malloc(sizeof(int) * nr_cpus * 2); if (!pipefd) err("pipefd"); diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh index ebc4ee0fe179ff1c135602b4cb332c05293dd18b..8a9461aa0878a0b6ea74fc9ec48f370846e39397 100755 --- a/tools/testing/selftests/wireguard/netns.sh +++ b/tools/testing/selftests/wireguard/netns.sh @@ -276,7 +276,11 @@ n0 ping -W 1 -c 1 192.168.241.2 n1 wg set wg0 peer "$pub2" endpoint 192.168.241.2:7 ip2 link del wg0 ip2 link del wg1 -! n0 ping -W 1 -c 10 -f 192.168.241.2 || false # Should not crash kernel +read _ _ tx_bytes_before < <(n0 wg show wg1 transfer) +! n0 ping -W 1 -c 10 -f 192.168.241.2 || false +sleep 1 +read _ _ tx_bytes_after < <(n0 wg show wg1 transfer) +(( tx_bytes_after - tx_bytes_before < 70000 )) ip0 link del wg1 ip1 link del wg0 @@ -609,6 +613,28 @@ ip0 link set wg0 up kill $ncat_pid ip0 link del wg0 +# Ensure that dst_cache references don't outlive netns lifetime +ip1 link add dev wg0 type wireguard +ip2 link add dev wg0 type wireguard +configure_peers +ip1 link add veth1 type veth peer name veth2 +ip1 link set veth2 netns $netns2 +ip1 addr add fd00:aa::1/64 dev veth1 +ip2 addr add fd00:aa::2/64 dev veth2 +ip1 link set veth1 up +ip2 link set veth2 up +waitiface $netns1 veth1 +waitiface $netns2 veth2 +ip1 -6 route add default dev veth1 via fd00:aa::2 +ip2 -6 route add default dev veth2 via fd00:aa::1 +n1 wg set wg0 peer "$pub2" endpoint [fd00:aa::2]:2 +n2 wg set wg0 peer "$pub1" endpoint [fd00:aa::1]:1 +n1 ping6 -c 1 fd00::2 +pp ip netns delete $netns1 +pp ip netns delete $netns2 +pp ip netns add $netns1 +pp ip netns add $netns2 + # Ensure there aren't circular reference loops ip1 link add wg1 type wireguard ip2 link add wg2 type wireguard @@ -627,7 +653,7 @@ while read -t 0.1 -r line 2>/dev/null || [[ $? -ne 142 ]]; do done < /dev/kmsg alldeleted=1 for object in "${!objects[@]}"; do - if [[ ${objects["$object"]} != *createddestroyed ]]; then + if [[ ${objects["$object"]} != *createddestroyed && ${objects["$object"]} != *createdcreateddestroyeddestroyed ]]; then echo "Error: $object: merely ${objects["$object"]}" >&3 alldeleted=0 fi diff --git a/tools/testing/selftests/wireguard/qemu/debug.config b/tools/testing/selftests/wireguard/qemu/debug.config index b50c2085c1ac0f1e50abc219a2bdab60b2b790bf..a92c5590e87ca33c85fafcb69fca356d52e98cb9 100644 --- a/tools/testing/selftests/wireguard/qemu/debug.config +++ b/tools/testing/selftests/wireguard/qemu/debug.config @@ -48,7 +48,7 @@ CONFIG_DEBUG_ATOMIC_SLEEP=y CONFIG_TRACE_IRQFLAGS=y CONFIG_DEBUG_BUGVERBOSE=y CONFIG_DEBUG_LIST=y -CONFIG_DEBUG_PI_LIST=y +CONFIG_DEBUG_PLIST=y CONFIG_PROVE_RCU=y CONFIG_SPARSE_RCU_POINTER=y CONFIG_RCU_CPU_STALL_TIMEOUT=21 diff --git a/tools/testing/selftests/wireguard/qemu/kernel.config b/tools/testing/selftests/wireguard/qemu/kernel.config index 74db83a0aedd8b67be991e0f856c53ec6ec7c9cd..a9b5a520a1d22e7de62729bf27f746993049d595 100644 --- a/tools/testing/selftests/wireguard/qemu/kernel.config +++ b/tools/testing/selftests/wireguard/qemu/kernel.config @@ -66,6 +66,7 @@ CONFIG_PROC_SYSCTL=y CONFIG_SYSFS=y CONFIG_TMPFS=y CONFIG_CONSOLE_LOGLEVEL_DEFAULT=15 +CONFIG_LOG_BUF_SHIFT=18 CONFIG_PRINTK_TIME=y CONFIG_BLK_DEV_INITRD=y CONFIG_LEGACY_VSYSCALL_NONE=y diff --git a/tools/testing/selftests/x86/iopl.c b/tools/testing/selftests/x86/iopl.c index bab2f6e06b63d06a01fa03d32623bebd3bc9755b..7e3e09c1abac67b94217cc0fc6a0b1987b7e24e5 100644 --- a/tools/testing/selftests/x86/iopl.c +++ b/tools/testing/selftests/x86/iopl.c @@ -85,48 +85,88 @@ static void expect_gp_outb(unsigned short port) printf("[OK]\toutb to 0x%02hx failed\n", port); } -static bool try_cli(void) +#define RET_FAULTED 0 +#define RET_FAIL 1 +#define RET_EMUL 2 + +static int try_cli(void) { + unsigned long flags; + sethandler(SIGSEGV, sigsegv, SA_RESETHAND); if (sigsetjmp(jmpbuf, 1) != 0) { - return false; + return RET_FAULTED; } else { - asm volatile ("cli"); - return true; + asm volatile("cli; pushf; pop %[flags]" + : [flags] "=rm" (flags)); + + /* X86_FLAGS_IF */ + if (!(flags & (1 << 9))) + return RET_FAIL; + else + return RET_EMUL; } clearhandler(SIGSEGV); } -static bool try_sti(void) +static int try_sti(bool irqs_off) { + unsigned long flags; + sethandler(SIGSEGV, sigsegv, SA_RESETHAND); if (sigsetjmp(jmpbuf, 1) != 0) { - return false; + return RET_FAULTED; } else { - asm volatile ("sti"); - return true; + asm volatile("sti; pushf; pop %[flags]" + : [flags] "=rm" (flags)); + + /* X86_FLAGS_IF */ + if (irqs_off && (flags & (1 << 9))) + return RET_FAIL; + else + return RET_EMUL; } clearhandler(SIGSEGV); } -static void expect_gp_sti(void) +static void expect_gp_sti(bool irqs_off) { - if (try_sti()) { + int ret = try_sti(irqs_off); + + switch (ret) { + case RET_FAULTED: + printf("[OK]\tSTI faulted\n"); + break; + case RET_EMUL: + printf("[OK]\tSTI NOPped\n"); + break; + default: printf("[FAIL]\tSTI worked\n"); nerrs++; - } else { - printf("[OK]\tSTI faulted\n"); } } -static void expect_gp_cli(void) +/* + * Returns whether it managed to disable interrupts. + */ +static bool test_cli(void) { - if (try_cli()) { + int ret = try_cli(); + + switch (ret) { + case RET_FAULTED: + printf("[OK]\tCLI faulted\n"); + break; + case RET_EMUL: + printf("[OK]\tCLI NOPped\n"); + break; + default: printf("[FAIL]\tCLI worked\n"); nerrs++; - } else { - printf("[OK]\tCLI faulted\n"); + return true; } + + return false; } int main(void) @@ -152,8 +192,7 @@ int main(void) } /* Make sure that CLI/STI are blocked even with IOPL level 3 */ - expect_gp_cli(); - expect_gp_sti(); + expect_gp_sti(test_cli()); expect_ok_outb(0x80); /* Establish an I/O bitmap to test the restore */ @@ -204,8 +243,7 @@ int main(void) printf("[RUN]\tparent: write to 0x80 (should fail)\n"); expect_gp_outb(0x80); - expect_gp_cli(); - expect_gp_sti(); + expect_gp_sti(test_cli()); /* Test the capability checks. */ printf("\tiopl(3)\n"); diff --git a/tools/testing/selftests/x86/test_vsyscall.c b/tools/testing/selftests/x86/test_vsyscall.c index 65c141ebfbbde8ded6f4d9af1ebe879a616b6e95..5b45e6986aeab16d0b72a091969f8265f2479fbe 100644 --- a/tools/testing/selftests/x86/test_vsyscall.c +++ b/tools/testing/selftests/x86/test_vsyscall.c @@ -497,7 +497,7 @@ static int test_process_vm_readv(void) } if (vsyscall_map_r) { - if (!memcmp(buf, (const void *)0xffffffffff600000, 4096)) { + if (!memcmp(buf, remote.iov_base, sizeof(buf))) { printf("[OK]\tIt worked and read correct data\n"); } else { printf("[FAIL]\tIt worked but returned incorrect data\n"); diff --git a/tools/testing/selftests/zram/zram.sh b/tools/testing/selftests/zram/zram.sh index 232e958ec454756501f2caa8eaf2133067fe10ac..b0b91d9b0dc2143cf689c9d42be8410057790215 100755 --- a/tools/testing/selftests/zram/zram.sh +++ b/tools/testing/selftests/zram/zram.sh @@ -2,9 +2,6 @@ # SPDX-License-Identifier: GPL-2.0 TCID="zram.sh" -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 - . ./zram_lib.sh run_zram () { @@ -18,14 +15,4 @@ echo "" check_prereqs -# check zram module exists -MODULE_PATH=/lib/modules/`uname -r`/kernel/drivers/block/zram/zram.ko -if [ -f $MODULE_PATH ]; then - run_zram -elif [ -b /dev/zram0 ]; then - run_zram -else - echo "$TCID : No zram.ko module or /dev/zram0 device file not found" - echo "$TCID : CONFIG_ZRAM is not set" - exit $ksft_skip -fi +run_zram diff --git a/tools/testing/selftests/zram/zram01.sh b/tools/testing/selftests/zram/zram01.sh index 114863d9fb8768c906732d7ce780e3f6ea73e44c..8f4affe34f3e4c1d379d96515506e048954be073 100755 --- a/tools/testing/selftests/zram/zram01.sh +++ b/tools/testing/selftests/zram/zram01.sh @@ -33,9 +33,7 @@ zram_algs="lzo" zram_fill_fs() { - local mem_free0=$(free -m | awk 'NR==2 {print $4}') - - for i in $(seq 0 $(($dev_num - 1))); do + for i in $(seq $dev_start $dev_end); do echo "fill zram$i..." local b=0 while [ true ]; do @@ -45,29 +43,17 @@ zram_fill_fs() b=$(($b + 1)) done echo "zram$i can be filled with '$b' KB" - done - local mem_free1=$(free -m | awk 'NR==2 {print $4}') - local used_mem=$(($mem_free0 - $mem_free1)) + local mem_used_total=`awk '{print $3}' "/sys/block/zram$i/mm_stat"` + local v=$((100 * 1024 * $b / $mem_used_total)) + if [ "$v" -lt 100 ]; then + echo "FAIL compression ratio: 0.$v:1" + ERR_CODE=-1 + return + fi - local total_size=0 - for sm in $zram_sizes; do - local s=$(echo $sm | sed 's/M//') - total_size=$(($total_size + $s)) + echo "zram compression ratio: $(echo "scale=2; $v / 100 " | bc):1: OK" done - - echo "zram used ${used_mem}M, zram disk sizes ${total_size}M" - - local v=$((100 * $total_size / $used_mem)) - - if [ "$v" -lt 100 ]; then - echo "FAIL compression ratio: 0.$v:1" - ERR_CODE=-1 - zram_cleanup - return - fi - - echo "zram compression ratio: $(echo "scale=2; $v / 100 " | bc):1: OK" } check_prereqs @@ -81,7 +67,6 @@ zram_mount zram_fill_fs zram_cleanup -zram_unload if [ $ERR_CODE -ne 0 ]; then echo "$TCID : [FAIL]" diff --git a/tools/testing/selftests/zram/zram02.sh b/tools/testing/selftests/zram/zram02.sh index e83b404807c09a7cf2a3cdefb9ec4eb746319a36..2418b0c4ed136b3b603ade62cd2838ff61af38b8 100755 --- a/tools/testing/selftests/zram/zram02.sh +++ b/tools/testing/selftests/zram/zram02.sh @@ -36,7 +36,6 @@ zram_set_memlimit zram_makeswap zram_swapoff zram_cleanup -zram_unload if [ $ERR_CODE -ne 0 ]; then echo "$TCID : [FAIL]" diff --git a/tools/testing/selftests/zram/zram_lib.sh b/tools/testing/selftests/zram/zram_lib.sh index 6f872f266fd1151b6771f55147d265468a3ad74d..21ec1966de76ca4047fe50eea82015bc03986c98 100755 --- a/tools/testing/selftests/zram/zram_lib.sh +++ b/tools/testing/selftests/zram/zram_lib.sh @@ -5,12 +5,17 @@ # Author: Alexey Kodanev # Modified: Naresh Kamboju -MODULE=0 dev_makeswap=-1 dev_mounted=-1 - +dev_start=0 +dev_end=-1 +module_load=-1 +sys_control=-1 # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 +kernel_version=`uname -r | cut -d'.' -f1,2` +kernel_major=${kernel_version%.*} +kernel_minor=${kernel_version#*.} trap INT @@ -25,68 +30,104 @@ check_prereqs() fi } +kernel_gte() +{ + major=${1%.*} + minor=${1#*.} + + if [ $kernel_major -gt $major ]; then + return 0 + elif [[ $kernel_major -eq $major && $kernel_minor -ge $minor ]]; then + return 0 + fi + + return 1 +} + zram_cleanup() { echo "zram cleanup" local i= - for i in $(seq 0 $dev_makeswap); do + for i in $(seq $dev_start $dev_makeswap); do swapoff /dev/zram$i done - for i in $(seq 0 $dev_mounted); do + for i in $(seq $dev_start $dev_mounted); do umount /dev/zram$i done - for i in $(seq 0 $(($dev_num - 1))); do + for i in $(seq $dev_start $dev_end); do echo 1 > /sys/block/zram${i}/reset rm -rf zram$i done -} + if [ $sys_control -eq 1 ]; then + for i in $(seq $dev_start $dev_end); do + echo $i > /sys/class/zram-control/hot_remove + done + fi -zram_unload() -{ - if [ $MODULE -ne 0 ] ; then - echo "zram rmmod zram" + if [ $module_load -eq 1 ]; then rmmod zram > /dev/null 2>&1 fi } zram_load() { - # check zram module exists - MODULE_PATH=/lib/modules/`uname -r`/kernel/drivers/block/zram/zram.ko - if [ -f $MODULE_PATH ]; then - MODULE=1 - echo "create '$dev_num' zram device(s)" - modprobe zram num_devices=$dev_num - if [ $? -ne 0 ]; then - echo "failed to insert zram module" - exit 1 - fi - - dev_num_created=$(ls /dev/zram* | wc -w) + echo "create '$dev_num' zram device(s)" + + # zram module loaded, new kernel + if [ -d "/sys/class/zram-control" ]; then + echo "zram modules already loaded, kernel supports" \ + "zram-control interface" + dev_start=$(ls /dev/zram* | wc -w) + dev_end=$(($dev_start + $dev_num - 1)) + sys_control=1 + + for i in $(seq $dev_start $dev_end); do + cat /sys/class/zram-control/hot_add > /dev/null + done + + echo "all zram devices (/dev/zram$dev_start~$dev_end" \ + "successfully created" + return 0 + fi - if [ "$dev_num_created" -ne "$dev_num" ]; then - echo "unexpected num of devices: $dev_num_created" - ERR_CODE=-1 + # detect old kernel or built-in + modprobe zram num_devices=$dev_num + if [ ! -d "/sys/class/zram-control" ]; then + if grep -q '^zram' /proc/modules; then + rmmod zram > /dev/null 2>&1 + if [ $? -ne 0 ]; then + echo "zram module is being used on old kernel" \ + "without zram-control interface" + exit $ksft_skip + fi else - echo "zram load module successful" + echo "test needs CONFIG_ZRAM=m on old kernel without" \ + "zram-control interface" + exit $ksft_skip fi - elif [ -b /dev/zram0 ]; then - echo "/dev/zram0 device file found: OK" - else - echo "ERROR: No zram.ko module or no /dev/zram0 device found" - echo "$TCID : CONFIG_ZRAM is not set" - exit 1 + modprobe zram num_devices=$dev_num fi + + module_load=1 + dev_end=$(($dev_num - 1)) + echo "all zram devices (/dev/zram0~$dev_end) successfully created" } zram_max_streams() { echo "set max_comp_streams to zram device(s)" - local i=0 + kernel_gte 4.7 + if [ $? -eq 0 ]; then + echo "The device attribute max_comp_streams was"\ + "deprecated in 4.7" + return 0 + fi + + local i=$dev_start for max_s in $zram_max_streams; do local sys_path="/sys/block/zram${i}/max_comp_streams" echo $max_s > $sys_path || \ @@ -98,7 +139,7 @@ zram_max_streams() echo "FAIL can't set max_streams '$max_s', get $max_stream" i=$(($i + 1)) - echo "$sys_path = '$max_streams' ($i/$dev_num)" + echo "$sys_path = '$max_streams'" done echo "zram max streams: OK" @@ -108,15 +149,16 @@ zram_compress_alg() { echo "test that we can set compression algorithm" - local algs=$(cat /sys/block/zram0/comp_algorithm) + local i=$dev_start + local algs=$(cat /sys/block/zram${i}/comp_algorithm) echo "supported algs: $algs" - local i=0 + for alg in $zram_algs; do local sys_path="/sys/block/zram${i}/comp_algorithm" echo "$alg" > $sys_path || \ echo "FAIL can't set '$alg' to $sys_path" i=$(($i + 1)) - echo "$sys_path = '$alg' ($i/$dev_num)" + echo "$sys_path = '$alg'" done echo "zram set compression algorithm: OK" @@ -125,14 +167,14 @@ zram_compress_alg() zram_set_disksizes() { echo "set disk size to zram device(s)" - local i=0 + local i=$dev_start for ds in $zram_sizes; do local sys_path="/sys/block/zram${i}/disksize" echo "$ds" > $sys_path || \ echo "FAIL can't set '$ds' to $sys_path" i=$(($i + 1)) - echo "$sys_path = '$ds' ($i/$dev_num)" + echo "$sys_path = '$ds'" done echo "zram set disksizes: OK" @@ -142,14 +184,14 @@ zram_set_memlimit() { echo "set memory limit to zram device(s)" - local i=0 + local i=$dev_start for ds in $zram_mem_limits; do local sys_path="/sys/block/zram${i}/mem_limit" echo "$ds" > $sys_path || \ echo "FAIL can't set '$ds' to $sys_path" i=$(($i + 1)) - echo "$sys_path = '$ds' ($i/$dev_num)" + echo "$sys_path = '$ds'" done echo "zram set memory limit: OK" @@ -158,8 +200,8 @@ zram_set_memlimit() zram_makeswap() { echo "make swap with zram device(s)" - local i=0 - for i in $(seq 0 $(($dev_num - 1))); do + local i=$dev_start + for i in $(seq $dev_start $dev_end); do mkswap /dev/zram$i > err.log 2>&1 if [ $? -ne 0 ]; then cat err.log @@ -182,7 +224,7 @@ zram_makeswap() zram_swapoff() { local i= - for i in $(seq 0 $dev_makeswap); do + for i in $(seq $dev_start $dev_end); do swapoff /dev/zram$i > err.log 2>&1 if [ $? -ne 0 ]; then cat err.log @@ -196,7 +238,7 @@ zram_swapoff() zram_makefs() { - local i=0 + local i=$dev_start for fs in $zram_filesystems; do # if requested fs not supported default it to ext2 which mkfs.$fs > /dev/null 2>&1 || fs=ext2 @@ -215,7 +257,7 @@ zram_makefs() zram_mount() { local i=0 - for i in $(seq 0 $(($dev_num - 1))); do + for i in $(seq $dev_start $dev_end); do echo "mount /dev/zram$i" mkdir zram$i mount /dev/zram$i zram$i > /dev/null || \ diff --git a/tools/usb/testusb.c b/tools/usb/testusb.c index ee8208b2f946024e28367faa3eff0b5f2425324c..69c3ead25313de00f4129c2a24ff7ebd7ee0d0b4 100644 --- a/tools/usb/testusb.c +++ b/tools/usb/testusb.c @@ -265,12 +265,6 @@ nomem: } entry->ifnum = ifnum; - - /* FIXME update USBDEVFS_CONNECTINFO so it tells about high speed etc */ - - fprintf(stderr, "%s speed\t%s\t%u\n", - speed(entry->speed), entry->name, entry->ifnum); - entry->next = testdevs; testdevs = entry; return 0; @@ -299,6 +293,14 @@ static void *handle_testdev (void *arg) return 0; } + status = ioctl(fd, USBDEVFS_GET_SPEED, NULL); + if (status < 0) + fprintf(stderr, "USBDEVFS_GET_SPEED failed %d\n", status); + else + dev->speed = status; + fprintf(stderr, "%s speed\t%s\t%u\n", + speed(dev->speed), dev->name, dev->ifnum); + restart: for (i = 0; i < TEST_CASES; i++) { if (dev->test != -1 && dev->test != i) diff --git a/tools/vm/page-types.c b/tools/vm/page-types.c index 0517c744b04e8d5d62c540d1c710591a12909b4b..f62f10c988db107c5aba893e31972f0e64a3b0f4 100644 --- a/tools/vm/page-types.c +++ b/tools/vm/page-types.c @@ -1331,7 +1331,7 @@ int main(int argc, char *argv[]) if (opt_list && opt_list_mapcnt) kpagecount_fd = checked_open(PROC_KPAGECOUNT, O_RDONLY); - if (opt_mark_idle && opt_file) + if (opt_mark_idle) page_idle_fd = checked_open(SYS_KERNEL_MM_PAGE_IDLE, O_RDWR); if (opt_list && opt_pid) diff --git a/usr/include/Makefile b/usr/include/Makefile index f6b3c85d900ede51844ca37a4ccd7d3817d23c0e..a8ed689918ce1c0c005fedc0c29438b59477e074 100644 --- a/usr/include/Makefile +++ b/usr/include/Makefile @@ -12,6 +12,9 @@ UAPI_CFLAGS := -std=c90 -Wall -Werror=implicit-function-declaration # It is here just because CONFIG_CC_CAN_LINK is tested with -m32 or -m64. UAPI_CFLAGS += $(filter -m32 -m64, $(KBUILD_CFLAGS)) +# USERCFLAGS might contain sysroot location for CC. +UAPI_CFLAGS += $(USERCFLAGS) + override c_flags = $(UAPI_CFLAGS) -Wp,-MMD,$(depfile) -I$(objtree)/usr/include # The following are excluded for now because they fail to build. @@ -34,7 +37,6 @@ no-header-test += linux/hdlc/ioctl.h no-header-test += linux/ivtv.h no-header-test += linux/kexec.h no-header-test += linux/matroxfb.h -no-header-test += linux/nfc.h no-header-test += linux/omap3isp.h no-header-test += linux/omapfb.h no-header-test += linux/patchkey.h diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index c2323c27a28b52dd14c6405dcbc8d54b1356efe0..518cd8dc390e2d73a21aeac626865cb5e1a9fccf 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -451,8 +451,8 @@ bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin) idx = srcu_read_lock(&kvm->irq_srcu); gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); if (gsi != -1) - hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, - link) + hlist_for_each_entry_srcu(kian, &kvm->irq_ack_notifier_list, + link, srcu_read_lock_held(&kvm->irq_srcu)) if (kian->gsi == gsi) { srcu_read_unlock(&kvm->irq_srcu, idx); return true; @@ -468,8 +468,8 @@ void kvm_notify_acked_gsi(struct kvm *kvm, int gsi) { struct kvm_irq_ack_notifier *kian; - hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, - link) + hlist_for_each_entry_srcu(kian, &kvm->irq_ack_notifier_list, + link, srcu_read_lock_held(&kvm->irq_srcu)) if (kian->gsi == gsi) kian->irq_acked(kian); } diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 4d16d862e39cc661b12e64ea5997a56e272858f4..a07bfa9beda3cb2f39e726383c6baf42826ceae8 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1287,7 +1287,8 @@ int __kvm_set_memory_region(struct kvm *kvm, id = (u16)mem->slot; /* General sanity checks */ - if (mem->memory_size & (PAGE_SIZE - 1)) + if ((mem->memory_size & (PAGE_SIZE - 1)) || + (mem->memory_size != (unsigned long)mem->memory_size)) return -EINVAL; if (mem->guest_phys_addr & (PAGE_SIZE - 1)) return -EINVAL; @@ -1680,7 +1681,6 @@ struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn { return __gfn_to_memslot(kvm_vcpu_memslots(vcpu), gfn); } -EXPORT_SYMBOL_GPL(kvm_vcpu_gfn_to_memslot); bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) { @@ -2572,7 +2572,8 @@ int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, int r; gpa_t gpa = ghc->gpa + offset; - BUG_ON(len + offset > ghc->len); + if (WARN_ON_ONCE(len + offset > ghc->len)) + return -EINVAL; if (slots->generation != ghc->generation) { if (__kvm_gfn_to_hva_cache_init(slots, ghc, ghc->gpa, ghc->len)) @@ -2609,7 +2610,8 @@ int kvm_read_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, int r; gpa_t gpa = ghc->gpa + offset; - BUG_ON(len + offset > ghc->len); + if (WARN_ON_ONCE(len + offset > ghc->len)) + return -EINVAL; if (slots->generation != ghc->generation) { if (__kvm_gfn_to_hva_cache_init(slots, ghc, ghc->gpa, ghc->len)) @@ -2739,15 +2741,19 @@ out: static void shrink_halt_poll_ns(struct kvm_vcpu *vcpu) { - unsigned int old, val, shrink; + unsigned int old, val, shrink, grow_start; old = val = vcpu->halt_poll_ns; shrink = READ_ONCE(halt_poll_ns_shrink); + grow_start = READ_ONCE(halt_poll_ns_grow_start); if (shrink == 0) val = 0; else val /= shrink; + if (val < grow_start) + val = 0; + vcpu->halt_poll_ns = val; trace_kvm_halt_poll_ns_shrink(vcpu->vcpu_id, val, old); }